• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * encoding.c : implements the encoding conversion functions needed for XML
3  *
4  * Related specs:
5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8  * [ISO-8859-1]   ISO Latin-1 characters codes.
9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10  *                Worldwide Character Encoding -- Version 1.0", Addison-
11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12  *                described in Unicode Technical Report #4.
13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14  *                Information Interchange, ANSI X3.4-1986.
15  *
16  * See Copyright for the status of this software.
17  *
18  * daniel@veillard.com
19  *
20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21  */
22 
23 #define IN_LIBXML
24 #include "libxml.h"
25 
26 #include <string.h>
27 #include <limits.h>
28 
29 #ifdef HAVE_CTYPE_H
30 #include <ctype.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef LIBXML_ICONV_ENABLED
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #endif
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
44 #endif
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
47 
48 #include "buf.h"
49 #include "enc.h"
50 
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53 
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56 struct _xmlCharEncodingAlias {
57     const char *name;
58     const char *alias;
59 };
60 
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62 static int xmlCharEncodingAliasesNb = 0;
63 static int xmlCharEncodingAliasesMax = 0;
64 
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66 #if 0
67 #define DEBUG_ENCODING  /* Define this to get encoding traces */
68 #endif
69 #else
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
72 #endif
73 #endif
74 
75 static int xmlLittleEndian = 1;
76 
77 /**
78  * xmlEncodingErrMemory:
79  * @extra:  extra information
80  *
81  * Handle an out of memory condition
82  */
83 static void
xmlEncodingErrMemory(const char * extra)84 xmlEncodingErrMemory(const char *extra)
85 {
86     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87 }
88 
89 /**
90  * xmlErrEncoding:
91  * @error:  the error number
92  * @msg:  the error message
93  *
94  * n encoding error
95  */
96 static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98 {
99     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100                     XML_FROM_I18N, error, XML_ERR_FATAL,
101                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102 }
103 
104 #ifdef LIBXML_ICU_ENABLED
105 static uconv_t*
openIcuConverter(const char * name,int toUnicode)106 openIcuConverter(const char* name, int toUnicode)
107 {
108   UErrorCode status = U_ZERO_ERROR;
109   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110   if (conv == NULL)
111     return NULL;
112 
113   conv->pivot_source = conv->pivot_buf;
114   conv->pivot_target = conv->pivot_buf;
115 
116   conv->uconv = ucnv_open(name, &status);
117   if (U_FAILURE(status))
118     goto error;
119 
120   status = U_ZERO_ERROR;
121   if (toUnicode) {
122     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
123                         NULL, NULL, NULL, &status);
124   }
125   else {
126     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
127                         NULL, NULL, NULL, &status);
128   }
129   if (U_FAILURE(status))
130     goto error;
131 
132   status = U_ZERO_ERROR;
133   conv->utf8 = ucnv_open("UTF-8", &status);
134   if (U_SUCCESS(status))
135     return conv;
136 
137 error:
138   if (conv->uconv)
139     ucnv_close(conv->uconv);
140   xmlFree(conv);
141   return NULL;
142 }
143 
144 static void
closeIcuConverter(uconv_t * conv)145 closeIcuConverter(uconv_t *conv)
146 {
147   if (conv != NULL) {
148     ucnv_close(conv->uconv);
149     ucnv_close(conv->utf8);
150     xmlFree(conv);
151   }
152 }
153 #endif /* LIBXML_ICU_ENABLED */
154 
155 /************************************************************************
156  *									*
157  *		Conversions To/From UTF8 encoding			*
158  *									*
159  ************************************************************************/
160 
161 /**
162  * asciiToUTF8:
163  * @out:  a pointer to an array of bytes to store the result
164  * @outlen:  the length of @out
165  * @in:  a pointer to an array of ASCII chars
166  * @inlen:  the length of @in
167  *
168  * Take a block of ASCII chars in and try to convert it to an UTF-8
169  * block of chars out.
170  * Returns 0 if success, or -1 otherwise
171  * The value of @inlen after return is the number of octets consumed
172  *     if the return value is positive, else unpredictable.
173  * The value of @outlen after return is the number of octets produced.
174  */
175 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)176 asciiToUTF8(unsigned char* out, int *outlen,
177               const unsigned char* in, int *inlen) {
178     unsigned char* outstart = out;
179     const unsigned char* base = in;
180     const unsigned char* processed = in;
181     unsigned char* outend = out + *outlen;
182     const unsigned char* inend;
183     unsigned int c;
184 
185     inend = in + (*inlen);
186     while ((in < inend) && (out - outstart + 5 < *outlen)) {
187 	c= *in++;
188 
189         if (out >= outend)
190 	    break;
191         if (c < 0x80) {
192 	    *out++ = c;
193 	} else {
194 	    *outlen = out - outstart;
195 	    *inlen = processed - base;
196 	    return(-1);
197 	}
198 
199 	processed = (const unsigned char*) in;
200     }
201     *outlen = out - outstart;
202     *inlen = processed - base;
203     return(*outlen);
204 }
205 
206 #ifdef LIBXML_OUTPUT_ENABLED
207 /**
208  * UTF8Toascii:
209  * @out:  a pointer to an array of bytes to store the result
210  * @outlen:  the length of @out
211  * @in:  a pointer to an array of UTF-8 chars
212  * @inlen:  the length of @in
213  *
214  * Take a block of UTF-8 chars in and try to convert it to an ASCII
215  * block of chars out.
216  *
217  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218  * The value of @inlen after return is the number of octets consumed
219  *     if the return value is positive, else unpredictable.
220  * The value of @outlen after return is the number of octets produced.
221  */
222 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)223 UTF8Toascii(unsigned char* out, int *outlen,
224               const unsigned char* in, int *inlen) {
225     const unsigned char* processed = in;
226     const unsigned char* outend;
227     const unsigned char* outstart = out;
228     const unsigned char* instart = in;
229     const unsigned char* inend;
230     unsigned int c, d;
231     int trailing;
232 
233     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
234     if (in == NULL) {
235         /*
236 	 * initialization nothing to do
237 	 */
238 	*outlen = 0;
239 	*inlen = 0;
240 	return(0);
241     }
242     inend = in + (*inlen);
243     outend = out + (*outlen);
244     while (in < inend) {
245 	d = *in++;
246 	if      (d < 0x80)  { c= d; trailing= 0; }
247 	else if (d < 0xC0) {
248 	    /* trailing byte in leading position */
249 	    *outlen = out - outstart;
250 	    *inlen = processed - instart;
251 	    return(-2);
252         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
253         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
254         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
255 	else {
256 	    /* no chance for this in Ascii */
257 	    *outlen = out - outstart;
258 	    *inlen = processed - instart;
259 	    return(-2);
260 	}
261 
262 	if (inend - in < trailing) {
263 	    break;
264 	}
265 
266 	for ( ; trailing; trailing--) {
267 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
268 		break;
269 	    c <<= 6;
270 	    c |= d & 0x3F;
271 	}
272 
273 	/* assertion: c is a single UTF-4 value */
274 	if (c < 0x80) {
275 	    if (out >= outend)
276 		break;
277 	    *out++ = c;
278 	} else {
279 	    /* no chance for this in Ascii */
280 	    *outlen = out - outstart;
281 	    *inlen = processed - instart;
282 	    return(-2);
283 	}
284 	processed = in;
285     }
286     *outlen = out - outstart;
287     *inlen = processed - instart;
288     return(*outlen);
289 }
290 #endif /* LIBXML_OUTPUT_ENABLED */
291 
292 /**
293  * isolat1ToUTF8:
294  * @out:  a pointer to an array of bytes to store the result
295  * @outlen:  the length of @out
296  * @in:  a pointer to an array of ISO Latin 1 chars
297  * @inlen:  the length of @in
298  *
299  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300  * block of chars out.
301  * Returns the number of bytes written if success, or -1 otherwise
302  * The value of @inlen after return is the number of octets consumed
303  *     if the return value is positive, else unpredictable.
304  * The value of @outlen after return is the number of octets produced.
305  */
306 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)307 isolat1ToUTF8(unsigned char* out, int *outlen,
308               const unsigned char* in, int *inlen) {
309     unsigned char* outstart = out;
310     const unsigned char* base = in;
311     unsigned char* outend;
312     const unsigned char* inend;
313     const unsigned char* instop;
314 
315     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316 	return(-1);
317 
318     outend = out + *outlen;
319     inend = in + (*inlen);
320     instop = inend;
321 
322     while ((in < inend) && (out < outend - 1)) {
323 	if (*in >= 0x80) {
324 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
325             *out++ = ((*in) & 0x3F) | 0x80;
326 	    ++in;
327 	}
328 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
329 	while ((in < instop) && (*in < 0x80)) {
330 	    *out++ = *in++;
331 	}
332     }
333     if ((in < inend) && (out < outend) && (*in < 0x80)) {
334         *out++ = *in++;
335     }
336     *outlen = out - outstart;
337     *inlen = in - base;
338     return(*outlen);
339 }
340 
341 /**
342  * UTF8ToUTF8:
343  * @out:  a pointer to an array of bytes to store the result
344  * @outlen:  the length of @out
345  * @inb:  a pointer to an array of UTF-8 chars
346  * @inlenb:  the length of @in in UTF-8 chars
347  *
348  * No op copy operation for UTF8 handling.
349  *
350  * Returns the number of bytes written, or -1 if lack of space.
351  *     The value of *inlen after return is the number of octets consumed
352  *     if the return value is positive, else unpredictable.
353  */
354 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)355 UTF8ToUTF8(unsigned char* out, int *outlen,
356            const unsigned char* inb, int *inlenb)
357 {
358     int len;
359 
360     if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
361 	return(-1);
362     if (inb == NULL) {
363         /* inb == NULL means output is initialized. */
364         *outlen = 0;
365         *inlenb = 0;
366         return(0);
367     }
368     if (*outlen > *inlenb) {
369 	len = *inlenb;
370     } else {
371 	len = *outlen;
372     }
373     if (len < 0)
374 	return(-1);
375 
376     /*
377      * FIXME: Conversion functions must assure valid UTF-8, so we have
378      * to check for UTF-8 validity. Preferably, this converter shouldn't
379      * be used at all.
380      */
381     memcpy(out, inb, len);
382 
383     *outlen = len;
384     *inlenb = len;
385     return(*outlen);
386 }
387 
388 
389 #ifdef LIBXML_OUTPUT_ENABLED
390 /**
391  * UTF8Toisolat1:
392  * @out:  a pointer to an array of bytes to store the result
393  * @outlen:  the length of @out
394  * @in:  a pointer to an array of UTF-8 chars
395  * @inlen:  the length of @in
396  *
397  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
398  * block of chars out.
399  *
400  * Returns the number of bytes written if success, -2 if the transcoding fails,
401            or -1 otherwise
402  * The value of @inlen after return is the number of octets consumed
403  *     if the return value is positive, else unpredictable.
404  * The value of @outlen after return is the number of octets produced.
405  */
406 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)407 UTF8Toisolat1(unsigned char* out, int *outlen,
408               const unsigned char* in, int *inlen) {
409     const unsigned char* processed = in;
410     const unsigned char* outend;
411     const unsigned char* outstart = out;
412     const unsigned char* instart = in;
413     const unsigned char* inend;
414     unsigned int c, d;
415     int trailing;
416 
417     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
418     if (in == NULL) {
419         /*
420 	 * initialization nothing to do
421 	 */
422 	*outlen = 0;
423 	*inlen = 0;
424 	return(0);
425     }
426     inend = in + (*inlen);
427     outend = out + (*outlen);
428     while (in < inend) {
429 	d = *in++;
430 	if      (d < 0x80)  { c= d; trailing= 0; }
431 	else if (d < 0xC0) {
432 	    /* trailing byte in leading position */
433 	    *outlen = out - outstart;
434 	    *inlen = processed - instart;
435 	    return(-2);
436         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
437         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
438         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
439 	else {
440 	    /* no chance for this in IsoLat1 */
441 	    *outlen = out - outstart;
442 	    *inlen = processed - instart;
443 	    return(-2);
444 	}
445 
446 	if (inend - in < trailing) {
447 	    break;
448 	}
449 
450 	for ( ; trailing; trailing--) {
451 	    if (in >= inend)
452 		break;
453 	    if (((d= *in++) & 0xC0) != 0x80) {
454 		*outlen = out - outstart;
455 		*inlen = processed - instart;
456 		return(-2);
457 	    }
458 	    c <<= 6;
459 	    c |= d & 0x3F;
460 	}
461 
462 	/* assertion: c is a single UTF-4 value */
463 	if (c <= 0xFF) {
464 	    if (out >= outend)
465 		break;
466 	    *out++ = c;
467 	} else {
468 	    /* no chance for this in IsoLat1 */
469 	    *outlen = out - outstart;
470 	    *inlen = processed - instart;
471 	    return(-2);
472 	}
473 	processed = in;
474     }
475     *outlen = out - outstart;
476     *inlen = processed - instart;
477     return(*outlen);
478 }
479 #endif /* LIBXML_OUTPUT_ENABLED */
480 
481 /**
482  * UTF16LEToUTF8:
483  * @out:  a pointer to an array of bytes to store the result
484  * @outlen:  the length of @out
485  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
486  * @inlenb:  the length of @in in UTF-16LE chars
487  *
488  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
489  * block of chars out. This function assumes the endian property
490  * is the same between the native type of this machine and the
491  * inputed one.
492  *
493  * Returns the number of bytes written, or -1 if lack of space, or -2
494  *     if the transcoding fails (if *in is not a valid utf16 string)
495  *     The value of *inlen after return is the number of octets consumed
496  *     if the return value is positive, else unpredictable.
497  */
498 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)499 UTF16LEToUTF8(unsigned char* out, int *outlen,
500             const unsigned char* inb, int *inlenb)
501 {
502     unsigned char* outstart = out;
503     const unsigned char* processed = inb;
504     unsigned char* outend;
505     unsigned short* in = (unsigned short*) inb;
506     unsigned short* inend;
507     unsigned int c, d, inlen;
508     unsigned char *tmp;
509     int bits;
510 
511     if (*outlen == 0) {
512         *inlenb = 0;
513         return(0);
514     }
515     outend = out + *outlen;
516     if ((*inlenb % 2) == 1)
517         (*inlenb)--;
518     inlen = *inlenb / 2;
519     inend = in + inlen;
520     while ((in < inend) && (out - outstart + 5 < *outlen)) {
521         if (xmlLittleEndian) {
522 	    c= *in++;
523 	} else {
524 	    tmp = (unsigned char *) in;
525 	    c = *tmp++;
526 	    c = c | (((unsigned int)*tmp) << 8);
527 	    in++;
528 	}
529         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
530 	    if (in >= inend) {           /* handle split mutli-byte characters */
531 		break;
532 	    }
533 	    if (xmlLittleEndian) {
534 		d = *in++;
535 	    } else {
536 		tmp = (unsigned char *) in;
537 		d = *tmp++;
538 		d = d | (((unsigned int)*tmp) << 8);
539 		in++;
540 	    }
541             if ((d & 0xFC00) == 0xDC00) {
542                 c &= 0x03FF;
543                 c <<= 10;
544                 c |= d & 0x03FF;
545                 c += 0x10000;
546             }
547             else {
548 		*outlen = out - outstart;
549 		*inlenb = processed - inb;
550 	        return(-2);
551 	    }
552         }
553 
554 	/* assertion: c is a single UTF-4 value */
555         if (out >= outend)
556 	    break;
557         if      (c <    0x80) {  *out++=  c;                bits= -6; }
558         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
559         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
560         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
561 
562         for ( ; bits >= 0; bits-= 6) {
563             if (out >= outend)
564 	        break;
565             *out++= ((c >> bits) & 0x3F) | 0x80;
566         }
567 	processed = (const unsigned char*) in;
568     }
569     *outlen = out - outstart;
570     *inlenb = processed - inb;
571     return(*outlen);
572 }
573 
574 #ifdef LIBXML_OUTPUT_ENABLED
575 /**
576  * UTF8ToUTF16LE:
577  * @outb:  a pointer to an array of bytes to store the result
578  * @outlen:  the length of @outb
579  * @in:  a pointer to an array of UTF-8 chars
580  * @inlen:  the length of @in
581  *
582  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
583  * block of chars out.
584  *
585  * Returns the number of bytes written, or -1 if lack of space, or -2
586  *     if the transcoding failed.
587  */
588 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)589 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
590             const unsigned char* in, int *inlen)
591 {
592     unsigned short* out = (unsigned short*) outb;
593     const unsigned char* processed = in;
594     const unsigned char *const instart = in;
595     unsigned short* outstart= out;
596     unsigned short* outend;
597     const unsigned char* inend;
598     unsigned int c, d;
599     int trailing;
600     unsigned char *tmp;
601     unsigned short tmp1, tmp2;
602 
603     /* UTF16LE encoding has no BOM */
604     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
605     if (in == NULL) {
606 	*outlen = 0;
607 	*inlen = 0;
608 	return(0);
609     }
610     inend= in + *inlen;
611     outend = out + (*outlen / 2);
612     while (in < inend) {
613       d= *in++;
614       if      (d < 0x80)  { c= d; trailing= 0; }
615       else if (d < 0xC0) {
616           /* trailing byte in leading position */
617 	  *outlen = (out - outstart) * 2;
618 	  *inlen = processed - instart;
619 	  return(-2);
620       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
621       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
622       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
623       else {
624 	/* no chance for this in UTF-16 */
625 	*outlen = (out - outstart) * 2;
626 	*inlen = processed - instart;
627 	return(-2);
628       }
629 
630       if (inend - in < trailing) {
631           break;
632       }
633 
634       for ( ; trailing; trailing--) {
635           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
636 	      break;
637           c <<= 6;
638           c |= d & 0x3F;
639       }
640 
641       /* assertion: c is a single UTF-4 value */
642         if (c < 0x10000) {
643             if (out >= outend)
644 	        break;
645 	    if (xmlLittleEndian) {
646 		*out++ = c;
647 	    } else {
648 		tmp = (unsigned char *) out;
649 		*tmp = c ;
650 		*(tmp + 1) = c >> 8 ;
651 		out++;
652 	    }
653         }
654         else if (c < 0x110000) {
655             if (out+1 >= outend)
656 	        break;
657             c -= 0x10000;
658 	    if (xmlLittleEndian) {
659 		*out++ = 0xD800 | (c >> 10);
660 		*out++ = 0xDC00 | (c & 0x03FF);
661 	    } else {
662 		tmp1 = 0xD800 | (c >> 10);
663 		tmp = (unsigned char *) out;
664 		*tmp = (unsigned char) tmp1;
665 		*(tmp + 1) = tmp1 >> 8;
666 		out++;
667 
668 		tmp2 = 0xDC00 | (c & 0x03FF);
669 		tmp = (unsigned char *) out;
670 		*tmp  = (unsigned char) tmp2;
671 		*(tmp + 1) = tmp2 >> 8;
672 		out++;
673 	    }
674         }
675         else
676 	    break;
677 	processed = in;
678     }
679     *outlen = (out - outstart) * 2;
680     *inlen = processed - instart;
681     return(*outlen);
682 }
683 
684 /**
685  * UTF8ToUTF16:
686  * @outb:  a pointer to an array of bytes to store the result
687  * @outlen:  the length of @outb
688  * @in:  a pointer to an array of UTF-8 chars
689  * @inlen:  the length of @in
690  *
691  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
692  * block of chars out.
693  *
694  * Returns the number of bytes written, or -1 if lack of space, or -2
695  *     if the transcoding failed.
696  */
697 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)698 UTF8ToUTF16(unsigned char* outb, int *outlen,
699             const unsigned char* in, int *inlen)
700 {
701     if (in == NULL) {
702 	/*
703 	 * initialization, add the Byte Order Mark for UTF-16LE
704 	 */
705         if (*outlen >= 2) {
706 	    outb[0] = 0xFF;
707 	    outb[1] = 0xFE;
708 	    *outlen = 2;
709 	    *inlen = 0;
710 #ifdef DEBUG_ENCODING
711             xmlGenericError(xmlGenericErrorContext,
712 		    "Added FFFE Byte Order Mark\n");
713 #endif
714 	    return(2);
715 	}
716 	*outlen = 0;
717 	*inlen = 0;
718 	return(0);
719     }
720     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
721 }
722 #endif /* LIBXML_OUTPUT_ENABLED */
723 
724 /**
725  * UTF16BEToUTF8:
726  * @out:  a pointer to an array of bytes to store the result
727  * @outlen:  the length of @out
728  * @inb:  a pointer to an array of UTF-16 passed as a byte array
729  * @inlenb:  the length of @in in UTF-16 chars
730  *
731  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
732  * block of chars out. This function assumes the endian property
733  * is the same between the native type of this machine and the
734  * inputed one.
735  *
736  * Returns the number of bytes written, or -1 if lack of space, or -2
737  *     if the transcoding fails (if *in is not a valid utf16 string)
738  * The value of *inlen after return is the number of octets consumed
739  *     if the return value is positive, else unpredictable.
740  */
741 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)742 UTF16BEToUTF8(unsigned char* out, int *outlen,
743             const unsigned char* inb, int *inlenb)
744 {
745     unsigned char* outstart = out;
746     const unsigned char* processed = inb;
747     unsigned char* outend;
748     unsigned short* in = (unsigned short*) inb;
749     unsigned short* inend;
750     unsigned int c, d, inlen;
751     unsigned char *tmp;
752     int bits;
753 
754     if (*outlen == 0) {
755         *inlenb = 0;
756         return(0);
757     }
758     outend = out + *outlen;
759     if ((*inlenb % 2) == 1)
760         (*inlenb)--;
761     inlen = *inlenb / 2;
762     inend= in + inlen;
763     while ((in < inend) && (out - outstart + 5 < *outlen)) {
764 	if (xmlLittleEndian) {
765 	    tmp = (unsigned char *) in;
766 	    c = *tmp++;
767 	    c = (c << 8) | (unsigned int) *tmp;
768 	    in++;
769 	} else {
770 	    c= *in++;
771 	}
772         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
773 	    if (in >= inend) {           /* handle split mutli-byte characters */
774                 break;
775 	    }
776 	    if (xmlLittleEndian) {
777 		tmp = (unsigned char *) in;
778 		d = *tmp++;
779 		d = (d << 8) | (unsigned int) *tmp;
780 		in++;
781 	    } else {
782 		d= *in++;
783 	    }
784             if ((d & 0xFC00) == 0xDC00) {
785                 c &= 0x03FF;
786                 c <<= 10;
787                 c |= d & 0x03FF;
788                 c += 0x10000;
789             }
790             else {
791 		*outlen = out - outstart;
792 		*inlenb = processed - inb;
793 	        return(-2);
794 	    }
795         }
796 
797 	/* assertion: c is a single UTF-4 value */
798         if (out >= outend)
799 	    break;
800         if      (c <    0x80) {  *out++=  c;                bits= -6; }
801         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
802         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
803         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
804 
805         for ( ; bits >= 0; bits-= 6) {
806             if (out >= outend)
807 	        break;
808             *out++= ((c >> bits) & 0x3F) | 0x80;
809         }
810 	processed = (const unsigned char*) in;
811     }
812     *outlen = out - outstart;
813     *inlenb = processed - inb;
814     return(*outlen);
815 }
816 
817 #ifdef LIBXML_OUTPUT_ENABLED
818 /**
819  * UTF8ToUTF16BE:
820  * @outb:  a pointer to an array of bytes to store the result
821  * @outlen:  the length of @outb
822  * @in:  a pointer to an array of UTF-8 chars
823  * @inlen:  the length of @in
824  *
825  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
826  * block of chars out.
827  *
828  * Returns the number of byte written, or -1 by lack of space, or -2
829  *     if the transcoding failed.
830  */
831 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)832 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
833             const unsigned char* in, int *inlen)
834 {
835     unsigned short* out = (unsigned short*) outb;
836     const unsigned char* processed = in;
837     const unsigned char *const instart = in;
838     unsigned short* outstart= out;
839     unsigned short* outend;
840     const unsigned char* inend;
841     unsigned int c, d;
842     int trailing;
843     unsigned char *tmp;
844     unsigned short tmp1, tmp2;
845 
846     /* UTF-16BE has no BOM */
847     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
848     if (in == NULL) {
849 	*outlen = 0;
850 	*inlen = 0;
851 	return(0);
852     }
853     inend= in + *inlen;
854     outend = out + (*outlen / 2);
855     while (in < inend) {
856       d= *in++;
857       if      (d < 0x80)  { c= d; trailing= 0; }
858       else if (d < 0xC0)  {
859           /* trailing byte in leading position */
860 	  *outlen = out - outstart;
861 	  *inlen = processed - instart;
862 	  return(-2);
863       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
864       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
865       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
866       else {
867           /* no chance for this in UTF-16 */
868 	  *outlen = out - outstart;
869 	  *inlen = processed - instart;
870 	  return(-2);
871       }
872 
873       if (inend - in < trailing) {
874           break;
875       }
876 
877       for ( ; trailing; trailing--) {
878           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
879           c <<= 6;
880           c |= d & 0x3F;
881       }
882 
883       /* assertion: c is a single UTF-4 value */
884         if (c < 0x10000) {
885             if (out >= outend)  break;
886 	    if (xmlLittleEndian) {
887 		tmp = (unsigned char *) out;
888 		*tmp = c >> 8;
889 		*(tmp + 1) = c;
890 		out++;
891 	    } else {
892 		*out++ = c;
893 	    }
894         }
895         else if (c < 0x110000) {
896             if (out+1 >= outend)  break;
897             c -= 0x10000;
898 	    if (xmlLittleEndian) {
899 		tmp1 = 0xD800 | (c >> 10);
900 		tmp = (unsigned char *) out;
901 		*tmp = tmp1 >> 8;
902 		*(tmp + 1) = (unsigned char) tmp1;
903 		out++;
904 
905 		tmp2 = 0xDC00 | (c & 0x03FF);
906 		tmp = (unsigned char *) out;
907 		*tmp = tmp2 >> 8;
908 		*(tmp + 1) = (unsigned char) tmp2;
909 		out++;
910 	    } else {
911 		*out++ = 0xD800 | (c >> 10);
912 		*out++ = 0xDC00 | (c & 0x03FF);
913 	    }
914         }
915         else
916 	    break;
917 	processed = in;
918     }
919     *outlen = (out - outstart) * 2;
920     *inlen = processed - instart;
921     return(*outlen);
922 }
923 #endif /* LIBXML_OUTPUT_ENABLED */
924 
925 /************************************************************************
926  *									*
927  *		Generic encoding handling routines			*
928  *									*
929  ************************************************************************/
930 
931 /**
932  * xmlDetectCharEncoding:
933  * @in:  a pointer to the first bytes of the XML entity, must be at least
934  *       2 bytes long (at least 4 if encoding is UTF4 variant).
935  * @len:  pointer to the length of the buffer
936  *
937  * Guess the encoding of the entity using the first bytes of the entity content
938  * according to the non-normative appendix F of the XML-1.0 recommendation.
939  *
940  * Returns one of the XML_CHAR_ENCODING_... values.
941  */
942 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)943 xmlDetectCharEncoding(const unsigned char* in, int len)
944 {
945     if (in == NULL)
946         return(XML_CHAR_ENCODING_NONE);
947     if (len >= 4) {
948 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
949 	    (in[2] == 0x00) && (in[3] == 0x3C))
950 	    return(XML_CHAR_ENCODING_UCS4BE);
951 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
952 	    (in[2] == 0x00) && (in[3] == 0x00))
953 	    return(XML_CHAR_ENCODING_UCS4LE);
954 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
955 	    (in[2] == 0x3C) && (in[3] == 0x00))
956 	    return(XML_CHAR_ENCODING_UCS4_2143);
957 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
958 	    (in[2] == 0x00) && (in[3] == 0x00))
959 	    return(XML_CHAR_ENCODING_UCS4_3412);
960 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
961 	    (in[2] == 0xA7) && (in[3] == 0x94))
962 	    return(XML_CHAR_ENCODING_EBCDIC);
963 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
964 	    (in[2] == 0x78) && (in[3] == 0x6D))
965 	    return(XML_CHAR_ENCODING_UTF8);
966 	/*
967 	 * Although not part of the recommendation, we also
968 	 * attempt an "auto-recognition" of UTF-16LE and
969 	 * UTF-16BE encodings.
970 	 */
971 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
972 	    (in[2] == 0x3F) && (in[3] == 0x00))
973 	    return(XML_CHAR_ENCODING_UTF16LE);
974 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
975 	    (in[2] == 0x00) && (in[3] == 0x3F))
976 	    return(XML_CHAR_ENCODING_UTF16BE);
977     }
978     if (len >= 3) {
979 	/*
980 	 * Errata on XML-1.0 June 20 2001
981 	 * We now allow an UTF8 encoded BOM
982 	 */
983 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
984 	    (in[2] == 0xBF))
985 	    return(XML_CHAR_ENCODING_UTF8);
986     }
987     /* For UTF-16 we can recognize by the BOM */
988     if (len >= 2) {
989 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
990 	    return(XML_CHAR_ENCODING_UTF16BE);
991 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
992 	    return(XML_CHAR_ENCODING_UTF16LE);
993     }
994     return(XML_CHAR_ENCODING_NONE);
995 }
996 
997 /**
998  * xmlCleanupEncodingAliases:
999  *
1000  * Unregisters all aliases
1001  */
1002 void
xmlCleanupEncodingAliases(void)1003 xmlCleanupEncodingAliases(void) {
1004     int i;
1005 
1006     if (xmlCharEncodingAliases == NULL)
1007 	return;
1008 
1009     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1010 	if (xmlCharEncodingAliases[i].name != NULL)
1011 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1012 	if (xmlCharEncodingAliases[i].alias != NULL)
1013 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1014     }
1015     xmlCharEncodingAliasesNb = 0;
1016     xmlCharEncodingAliasesMax = 0;
1017     xmlFree(xmlCharEncodingAliases);
1018     xmlCharEncodingAliases = NULL;
1019 }
1020 
1021 /**
1022  * xmlGetEncodingAlias:
1023  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1024  *
1025  * Lookup an encoding name for the given alias.
1026  *
1027  * Returns NULL if not found, otherwise the original name
1028  */
1029 const char *
xmlGetEncodingAlias(const char * alias)1030 xmlGetEncodingAlias(const char *alias) {
1031     int i;
1032     char upper[100];
1033 
1034     if (alias == NULL)
1035 	return(NULL);
1036 
1037     if (xmlCharEncodingAliases == NULL)
1038 	return(NULL);
1039 
1040     for (i = 0;i < 99;i++) {
1041         upper[i] = toupper(alias[i]);
1042 	if (upper[i] == 0) break;
1043     }
1044     upper[i] = 0;
1045 
1046     /*
1047      * Walk down the list looking for a definition of the alias
1048      */
1049     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1050 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1051 	    return(xmlCharEncodingAliases[i].name);
1052 	}
1053     }
1054     return(NULL);
1055 }
1056 
1057 /**
1058  * xmlAddEncodingAlias:
1059  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1060  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1061  *
1062  * Registers an alias @alias for an encoding named @name. Existing alias
1063  * will be overwritten.
1064  *
1065  * Returns 0 in case of success, -1 in case of error
1066  */
1067 int
xmlAddEncodingAlias(const char * name,const char * alias)1068 xmlAddEncodingAlias(const char *name, const char *alias) {
1069     int i;
1070     char upper[100];
1071 
1072     if ((name == NULL) || (alias == NULL))
1073 	return(-1);
1074 
1075     for (i = 0;i < 99;i++) {
1076         upper[i] = toupper(alias[i]);
1077 	if (upper[i] == 0) break;
1078     }
1079     upper[i] = 0;
1080 
1081     if (xmlCharEncodingAliases == NULL) {
1082 	xmlCharEncodingAliasesNb = 0;
1083 	xmlCharEncodingAliasesMax = 20;
1084 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1085 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1086 	if (xmlCharEncodingAliases == NULL)
1087 	    return(-1);
1088     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1089 	xmlCharEncodingAliasesMax *= 2;
1090 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1091 	      xmlRealloc(xmlCharEncodingAliases,
1092 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1093     }
1094     /*
1095      * Walk down the list looking for a definition of the alias
1096      */
1097     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1098 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1099 	    /*
1100 	     * Replace the definition.
1101 	     */
1102 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1103 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1104 	    return(0);
1105 	}
1106     }
1107     /*
1108      * Add the definition
1109      */
1110     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1111     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1112     xmlCharEncodingAliasesNb++;
1113     return(0);
1114 }
1115 
1116 /**
1117  * xmlDelEncodingAlias:
1118  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1119  *
1120  * Unregisters an encoding alias @alias
1121  *
1122  * Returns 0 in case of success, -1 in case of error
1123  */
1124 int
xmlDelEncodingAlias(const char * alias)1125 xmlDelEncodingAlias(const char *alias) {
1126     int i;
1127 
1128     if (alias == NULL)
1129 	return(-1);
1130 
1131     if (xmlCharEncodingAliases == NULL)
1132 	return(-1);
1133     /*
1134      * Walk down the list looking for a definition of the alias
1135      */
1136     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1137 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1138 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1139 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1140 	    xmlCharEncodingAliasesNb--;
1141 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1142 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1143 	    return(0);
1144 	}
1145     }
1146     return(-1);
1147 }
1148 
1149 /**
1150  * xmlParseCharEncoding:
1151  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1152  *
1153  * Compare the string to the encoding schemes already known. Note
1154  * that the comparison is case insensitive accordingly to the section
1155  * [XML] 4.3.3 Character Encoding in Entities.
1156  *
1157  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1158  * if not recognized.
1159  */
1160 xmlCharEncoding
xmlParseCharEncoding(const char * name)1161 xmlParseCharEncoding(const char* name)
1162 {
1163     const char *alias;
1164     char upper[500];
1165     int i;
1166 
1167     if (name == NULL)
1168 	return(XML_CHAR_ENCODING_NONE);
1169 
1170     /*
1171      * Do the alias resolution
1172      */
1173     alias = xmlGetEncodingAlias(name);
1174     if (alias != NULL)
1175 	name = alias;
1176 
1177     for (i = 0;i < 499;i++) {
1178         upper[i] = toupper(name[i]);
1179 	if (upper[i] == 0) break;
1180     }
1181     upper[i] = 0;
1182 
1183     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1184     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1185     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1186 
1187     /*
1188      * NOTE: if we were able to parse this, the endianness of UTF16 is
1189      *       already found and in use
1190      */
1191     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1192     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1193 
1194     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1195     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1196     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1197 
1198     /*
1199      * NOTE: if we were able to parse this, the endianness of UCS4 is
1200      *       already found and in use
1201      */
1202     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1203     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1204     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1205 
1206 
1207     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1208     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1209     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1210 
1211     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1212     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1213     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1214 
1215     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1216     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1217     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1218     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1219     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1220     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1221     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1222 
1223     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1224     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1225     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1226 
1227 #ifdef DEBUG_ENCODING
1228     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1229 #endif
1230     return(XML_CHAR_ENCODING_ERROR);
1231 }
1232 
1233 /**
1234  * xmlGetCharEncodingName:
1235  * @enc:  the encoding
1236  *
1237  * The "canonical" name for XML encoding.
1238  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1239  * Section 4.3.3  Character Encoding in Entities
1240  *
1241  * Returns the canonical name for the given encoding
1242  */
1243 
1244 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1245 xmlGetCharEncodingName(xmlCharEncoding enc) {
1246     switch (enc) {
1247         case XML_CHAR_ENCODING_ERROR:
1248 	    return(NULL);
1249         case XML_CHAR_ENCODING_NONE:
1250 	    return(NULL);
1251         case XML_CHAR_ENCODING_UTF8:
1252 	    return("UTF-8");
1253         case XML_CHAR_ENCODING_UTF16LE:
1254 	    return("UTF-16");
1255         case XML_CHAR_ENCODING_UTF16BE:
1256 	    return("UTF-16");
1257         case XML_CHAR_ENCODING_EBCDIC:
1258             return("EBCDIC");
1259         case XML_CHAR_ENCODING_UCS4LE:
1260             return("ISO-10646-UCS-4");
1261         case XML_CHAR_ENCODING_UCS4BE:
1262             return("ISO-10646-UCS-4");
1263         case XML_CHAR_ENCODING_UCS4_2143:
1264             return("ISO-10646-UCS-4");
1265         case XML_CHAR_ENCODING_UCS4_3412:
1266             return("ISO-10646-UCS-4");
1267         case XML_CHAR_ENCODING_UCS2:
1268             return("ISO-10646-UCS-2");
1269         case XML_CHAR_ENCODING_8859_1:
1270 	    return("ISO-8859-1");
1271         case XML_CHAR_ENCODING_8859_2:
1272 	    return("ISO-8859-2");
1273         case XML_CHAR_ENCODING_8859_3:
1274 	    return("ISO-8859-3");
1275         case XML_CHAR_ENCODING_8859_4:
1276 	    return("ISO-8859-4");
1277         case XML_CHAR_ENCODING_8859_5:
1278 	    return("ISO-8859-5");
1279         case XML_CHAR_ENCODING_8859_6:
1280 	    return("ISO-8859-6");
1281         case XML_CHAR_ENCODING_8859_7:
1282 	    return("ISO-8859-7");
1283         case XML_CHAR_ENCODING_8859_8:
1284 	    return("ISO-8859-8");
1285         case XML_CHAR_ENCODING_8859_9:
1286 	    return("ISO-8859-9");
1287         case XML_CHAR_ENCODING_2022_JP:
1288             return("ISO-2022-JP");
1289         case XML_CHAR_ENCODING_SHIFT_JIS:
1290             return("Shift-JIS");
1291         case XML_CHAR_ENCODING_EUC_JP:
1292             return("EUC-JP");
1293 	case XML_CHAR_ENCODING_ASCII:
1294 	    return(NULL);
1295     }
1296     return(NULL);
1297 }
1298 
1299 /************************************************************************
1300  *									*
1301  *			Char encoding handlers				*
1302  *									*
1303  ************************************************************************/
1304 
1305 
1306 /* the size should be growable, but it's not a big deal ... */
1307 #define MAX_ENCODING_HANDLERS 50
1308 static xmlCharEncodingHandlerPtr *handlers = NULL;
1309 static int nbCharEncodingHandler = 0;
1310 
1311 /*
1312  * The default is UTF-8 for XML, that's also the default used for the
1313  * parser internals, so the default encoding handler is NULL
1314  */
1315 
1316 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1317 
1318 /**
1319  * xmlNewCharEncodingHandler:
1320  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1321  * @input:  the xmlCharEncodingInputFunc to read that encoding
1322  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1323  *
1324  * Create and registers an xmlCharEncodingHandler.
1325  *
1326  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1327  */
1328 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1329 xmlNewCharEncodingHandler(const char *name,
1330                           xmlCharEncodingInputFunc input,
1331                           xmlCharEncodingOutputFunc output) {
1332     xmlCharEncodingHandlerPtr handler;
1333     const char *alias;
1334     char upper[500];
1335     int i;
1336     char *up = NULL;
1337 
1338     /*
1339      * Do the alias resolution
1340      */
1341     alias = xmlGetEncodingAlias(name);
1342     if (alias != NULL)
1343 	name = alias;
1344 
1345     /*
1346      * Keep only the uppercase version of the encoding.
1347      */
1348     if (name == NULL) {
1349         xmlEncodingErr(XML_I18N_NO_NAME,
1350 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1351 	return(NULL);
1352     }
1353     for (i = 0;i < 499;i++) {
1354         upper[i] = toupper(name[i]);
1355 	if (upper[i] == 0) break;
1356     }
1357     upper[i] = 0;
1358     up = xmlMemStrdup(upper);
1359     if (up == NULL) {
1360         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1361 	return(NULL);
1362     }
1363 
1364     /*
1365      * allocate and fill-up an handler block.
1366      */
1367     handler = (xmlCharEncodingHandlerPtr)
1368               xmlMalloc(sizeof(xmlCharEncodingHandler));
1369     if (handler == NULL) {
1370         xmlFree(up);
1371         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1372 	return(NULL);
1373     }
1374     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1375     handler->input = input;
1376     handler->output = output;
1377     handler->name = up;
1378 
1379 #ifdef LIBXML_ICONV_ENABLED
1380     handler->iconv_in = NULL;
1381     handler->iconv_out = NULL;
1382 #endif
1383 #ifdef LIBXML_ICU_ENABLED
1384     handler->uconv_in = NULL;
1385     handler->uconv_out = NULL;
1386 #endif
1387 
1388     /*
1389      * registers and returns the handler.
1390      */
1391     xmlRegisterCharEncodingHandler(handler);
1392 #ifdef DEBUG_ENCODING
1393     xmlGenericError(xmlGenericErrorContext,
1394 	    "Registered encoding handler for %s\n", name);
1395 #endif
1396     return(handler);
1397 }
1398 
1399 /**
1400  * xmlInitCharEncodingHandlers:
1401  *
1402  * Initialize the char encoding support, it registers the default
1403  * encoding supported.
1404  * NOTE: while public, this function usually doesn't need to be called
1405  *       in normal processing.
1406  */
1407 void
xmlInitCharEncodingHandlers(void)1408 xmlInitCharEncodingHandlers(void) {
1409     unsigned short int tst = 0x1234;
1410     unsigned char *ptr = (unsigned char *) &tst;
1411 
1412     if (handlers != NULL) return;
1413 
1414     handlers = (xmlCharEncodingHandlerPtr *)
1415         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1416 
1417     if (*ptr == 0x12) xmlLittleEndian = 0;
1418     else if (*ptr == 0x34) xmlLittleEndian = 1;
1419     else {
1420         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1421 	               "Odd problem at endianness detection\n", NULL);
1422     }
1423 
1424     if (handlers == NULL) {
1425         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1426 	return;
1427     }
1428     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1429 #ifdef LIBXML_OUTPUT_ENABLED
1430     xmlUTF16LEHandler =
1431           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1432     xmlUTF16BEHandler =
1433           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1434     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1435     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1436     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1437     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1438 #ifdef LIBXML_HTML_ENABLED
1439     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1440 #endif
1441 #else
1442     xmlUTF16LEHandler =
1443           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1444     xmlUTF16BEHandler =
1445           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1446     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1447     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1448     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1449     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1450 #endif /* LIBXML_OUTPUT_ENABLED */
1451 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1452 #ifdef LIBXML_ISO8859X_ENABLED
1453     xmlRegisterCharEncodingHandlersISO8859x ();
1454 #endif
1455 #endif
1456 
1457 }
1458 
1459 /**
1460  * xmlCleanupCharEncodingHandlers:
1461  *
1462  * Cleanup the memory allocated for the char encoding support, it
1463  * unregisters all the encoding handlers and the aliases.
1464  */
1465 void
xmlCleanupCharEncodingHandlers(void)1466 xmlCleanupCharEncodingHandlers(void) {
1467     xmlCleanupEncodingAliases();
1468 
1469     if (handlers == NULL) return;
1470 
1471     for (;nbCharEncodingHandler > 0;) {
1472         nbCharEncodingHandler--;
1473 	if (handlers[nbCharEncodingHandler] != NULL) {
1474 	    if (handlers[nbCharEncodingHandler]->name != NULL)
1475 		xmlFree(handlers[nbCharEncodingHandler]->name);
1476 	    xmlFree(handlers[nbCharEncodingHandler]);
1477 	}
1478     }
1479     xmlFree(handlers);
1480     handlers = NULL;
1481     nbCharEncodingHandler = 0;
1482     xmlDefaultCharEncodingHandler = NULL;
1483 }
1484 
1485 /**
1486  * xmlRegisterCharEncodingHandler:
1487  * @handler:  the xmlCharEncodingHandlerPtr handler block
1488  *
1489  * Register the char encoding handler, surprising, isn't it ?
1490  */
1491 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1492 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1493     if (handlers == NULL) xmlInitCharEncodingHandlers();
1494     if ((handler == NULL) || (handlers == NULL)) {
1495         xmlEncodingErr(XML_I18N_NO_HANDLER,
1496 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1497         goto free_handler;
1498     }
1499 
1500     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1501         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1502 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1503 	               "MAX_ENCODING_HANDLERS");
1504         goto free_handler;
1505     }
1506     handlers[nbCharEncodingHandler++] = handler;
1507     return;
1508 
1509 free_handler:
1510     if (handler != NULL) {
1511         if (handler->name != NULL) {
1512             xmlFree(handler->name);
1513         }
1514         xmlFree(handler);
1515     }
1516 }
1517 
1518 /**
1519  * xmlGetCharEncodingHandler:
1520  * @enc:  an xmlCharEncoding value.
1521  *
1522  * Search in the registered set the handler able to read/write that encoding.
1523  *
1524  * Returns the handler or NULL if not found
1525  */
1526 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1527 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1528     xmlCharEncodingHandlerPtr handler;
1529 
1530     if (handlers == NULL) xmlInitCharEncodingHandlers();
1531     switch (enc) {
1532         case XML_CHAR_ENCODING_ERROR:
1533 	    return(NULL);
1534         case XML_CHAR_ENCODING_NONE:
1535 	    return(NULL);
1536         case XML_CHAR_ENCODING_UTF8:
1537 	    return(NULL);
1538         case XML_CHAR_ENCODING_UTF16LE:
1539 	    return(xmlUTF16LEHandler);
1540         case XML_CHAR_ENCODING_UTF16BE:
1541 	    return(xmlUTF16BEHandler);
1542         case XML_CHAR_ENCODING_EBCDIC:
1543             handler = xmlFindCharEncodingHandler("EBCDIC");
1544             if (handler != NULL) return(handler);
1545             handler = xmlFindCharEncodingHandler("ebcdic");
1546             if (handler != NULL) return(handler);
1547             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1548             if (handler != NULL) return(handler);
1549             handler = xmlFindCharEncodingHandler("IBM-037");
1550             if (handler != NULL) return(handler);
1551 	    break;
1552         case XML_CHAR_ENCODING_UCS4BE:
1553             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1554             if (handler != NULL) return(handler);
1555             handler = xmlFindCharEncodingHandler("UCS-4");
1556             if (handler != NULL) return(handler);
1557             handler = xmlFindCharEncodingHandler("UCS4");
1558             if (handler != NULL) return(handler);
1559 	    break;
1560         case XML_CHAR_ENCODING_UCS4LE:
1561             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1562             if (handler != NULL) return(handler);
1563             handler = xmlFindCharEncodingHandler("UCS-4");
1564             if (handler != NULL) return(handler);
1565             handler = xmlFindCharEncodingHandler("UCS4");
1566             if (handler != NULL) return(handler);
1567 	    break;
1568         case XML_CHAR_ENCODING_UCS4_2143:
1569 	    break;
1570         case XML_CHAR_ENCODING_UCS4_3412:
1571 	    break;
1572         case XML_CHAR_ENCODING_UCS2:
1573             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1574             if (handler != NULL) return(handler);
1575             handler = xmlFindCharEncodingHandler("UCS-2");
1576             if (handler != NULL) return(handler);
1577             handler = xmlFindCharEncodingHandler("UCS2");
1578             if (handler != NULL) return(handler);
1579 	    break;
1580 
1581 	    /*
1582 	     * We used to keep ISO Latin encodings native in the
1583 	     * generated data. This led to so many problems that
1584 	     * this has been removed. One can still change this
1585 	     * back by registering no-ops encoders for those
1586 	     */
1587         case XML_CHAR_ENCODING_8859_1:
1588 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1589 	    if (handler != NULL) return(handler);
1590 	    break;
1591         case XML_CHAR_ENCODING_8859_2:
1592 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1593 	    if (handler != NULL) return(handler);
1594 	    break;
1595         case XML_CHAR_ENCODING_8859_3:
1596 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1597 	    if (handler != NULL) return(handler);
1598 	    break;
1599         case XML_CHAR_ENCODING_8859_4:
1600 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1601 	    if (handler != NULL) return(handler);
1602 	    break;
1603         case XML_CHAR_ENCODING_8859_5:
1604 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1605 	    if (handler != NULL) return(handler);
1606 	    break;
1607         case XML_CHAR_ENCODING_8859_6:
1608 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1609 	    if (handler != NULL) return(handler);
1610 	    break;
1611         case XML_CHAR_ENCODING_8859_7:
1612 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1613 	    if (handler != NULL) return(handler);
1614 	    break;
1615         case XML_CHAR_ENCODING_8859_8:
1616 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1617 	    if (handler != NULL) return(handler);
1618 	    break;
1619         case XML_CHAR_ENCODING_8859_9:
1620 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1621 	    if (handler != NULL) return(handler);
1622 	    break;
1623 
1624 
1625         case XML_CHAR_ENCODING_2022_JP:
1626             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1627             if (handler != NULL) return(handler);
1628 	    break;
1629         case XML_CHAR_ENCODING_SHIFT_JIS:
1630             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1631             if (handler != NULL) return(handler);
1632             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1633             if (handler != NULL) return(handler);
1634             handler = xmlFindCharEncodingHandler("Shift_JIS");
1635             if (handler != NULL) return(handler);
1636 	    break;
1637         case XML_CHAR_ENCODING_EUC_JP:
1638             handler = xmlFindCharEncodingHandler("EUC-JP");
1639             if (handler != NULL) return(handler);
1640 	    break;
1641 	default:
1642 	    break;
1643     }
1644 
1645 #ifdef DEBUG_ENCODING
1646     xmlGenericError(xmlGenericErrorContext,
1647 	    "No handler found for encoding %d\n", enc);
1648 #endif
1649     return(NULL);
1650 }
1651 
1652 /**
1653  * xmlFindCharEncodingHandler:
1654  * @name:  a string describing the char encoding.
1655  *
1656  * Search in the registered set the handler able to read/write that encoding.
1657  *
1658  * Returns the handler or NULL if not found
1659  */
1660 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1661 xmlFindCharEncodingHandler(const char *name) {
1662     const char *nalias;
1663     const char *norig;
1664     xmlCharEncoding alias;
1665 #ifdef LIBXML_ICONV_ENABLED
1666     xmlCharEncodingHandlerPtr enc;
1667     iconv_t icv_in, icv_out;
1668 #endif /* LIBXML_ICONV_ENABLED */
1669 #ifdef LIBXML_ICU_ENABLED
1670     xmlCharEncodingHandlerPtr encu;
1671     uconv_t *ucv_in, *ucv_out;
1672 #endif /* LIBXML_ICU_ENABLED */
1673     char upper[100];
1674     int i;
1675 
1676     if (handlers == NULL) xmlInitCharEncodingHandlers();
1677     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1678     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1679 
1680     /*
1681      * Do the alias resolution
1682      */
1683     norig = name;
1684     nalias = xmlGetEncodingAlias(name);
1685     if (nalias != NULL)
1686 	name = nalias;
1687 
1688     /*
1689      * Check first for directly registered encoding names
1690      */
1691     for (i = 0;i < 99;i++) {
1692         upper[i] = toupper(name[i]);
1693 	if (upper[i] == 0) break;
1694     }
1695     upper[i] = 0;
1696 
1697     if (handlers != NULL) {
1698         for (i = 0;i < nbCharEncodingHandler; i++) {
1699             if (!strcmp(upper, handlers[i]->name)) {
1700 #ifdef DEBUG_ENCODING
1701                 xmlGenericError(xmlGenericErrorContext,
1702                         "Found registered handler for encoding %s\n", name);
1703 #endif
1704                 return(handlers[i]);
1705             }
1706         }
1707     }
1708 
1709 #ifdef LIBXML_ICONV_ENABLED
1710     /* check whether iconv can handle this */
1711     icv_in = iconv_open("UTF-8", name);
1712     icv_out = iconv_open(name, "UTF-8");
1713     if (icv_in == (iconv_t) -1) {
1714         icv_in = iconv_open("UTF-8", upper);
1715     }
1716     if (icv_out == (iconv_t) -1) {
1717 	icv_out = iconv_open(upper, "UTF-8");
1718     }
1719     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1720 	    enc = (xmlCharEncodingHandlerPtr)
1721 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1722 	    if (enc == NULL) {
1723 	        iconv_close(icv_in);
1724 	        iconv_close(icv_out);
1725 		return(NULL);
1726 	    }
1727             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1728 	    enc->name = xmlMemStrdup(name);
1729 	    enc->input = NULL;
1730 	    enc->output = NULL;
1731 	    enc->iconv_in = icv_in;
1732 	    enc->iconv_out = icv_out;
1733 #ifdef DEBUG_ENCODING
1734             xmlGenericError(xmlGenericErrorContext,
1735 		    "Found iconv handler for encoding %s\n", name);
1736 #endif
1737 	    return enc;
1738     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1739 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1740 		    "iconv : problems with filters for '%s'\n", name);
1741     }
1742 #endif /* LIBXML_ICONV_ENABLED */
1743 #ifdef LIBXML_ICU_ENABLED
1744     /* check whether icu can handle this */
1745     ucv_in = openIcuConverter(name, 1);
1746     ucv_out = openIcuConverter(name, 0);
1747     if (ucv_in != NULL && ucv_out != NULL) {
1748 	    encu = (xmlCharEncodingHandlerPtr)
1749 	           xmlMalloc(sizeof(xmlCharEncodingHandler));
1750 	    if (encu == NULL) {
1751                 closeIcuConverter(ucv_in);
1752                 closeIcuConverter(ucv_out);
1753 		return(NULL);
1754 	    }
1755             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1756 	    encu->name = xmlMemStrdup(name);
1757 	    encu->input = NULL;
1758 	    encu->output = NULL;
1759 	    encu->uconv_in = ucv_in;
1760 	    encu->uconv_out = ucv_out;
1761 #ifdef DEBUG_ENCODING
1762             xmlGenericError(xmlGenericErrorContext,
1763 		    "Found ICU converter handler for encoding %s\n", name);
1764 #endif
1765 	    return encu;
1766     } else if (ucv_in != NULL || ucv_out != NULL) {
1767             closeIcuConverter(ucv_in);
1768             closeIcuConverter(ucv_out);
1769 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1770 		    "ICU converter : problems with filters for '%s'\n", name);
1771     }
1772 #endif /* LIBXML_ICU_ENABLED */
1773 
1774 #ifdef DEBUG_ENCODING
1775     xmlGenericError(xmlGenericErrorContext,
1776 	    "No handler found for encoding %s\n", name);
1777 #endif
1778 
1779     /*
1780      * Fallback using the canonical names
1781      */
1782     alias = xmlParseCharEncoding(norig);
1783     if (alias != XML_CHAR_ENCODING_ERROR) {
1784         const char* canon;
1785         canon = xmlGetCharEncodingName(alias);
1786         if ((canon != NULL) && (strcmp(name, canon))) {
1787 	    return(xmlFindCharEncodingHandler(canon));
1788         }
1789     }
1790 
1791     /* If "none of the above", give up */
1792     return(NULL);
1793 }
1794 
1795 /************************************************************************
1796  *									*
1797  *		ICONV based generic conversion functions		*
1798  *									*
1799  ************************************************************************/
1800 
1801 #ifdef LIBXML_ICONV_ENABLED
1802 /**
1803  * xmlIconvWrapper:
1804  * @cd:		iconv converter data structure
1805  * @out:  a pointer to an array of bytes to store the result
1806  * @outlen:  the length of @out
1807  * @in:  a pointer to an array of input bytes
1808  * @inlen:  the length of @in
1809  *
1810  * Returns 0 if success, or
1811  *     -1 by lack of space, or
1812  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1813  *        the result of transformation can't fit into the encoding we want), or
1814  *     -3 if there the last byte can't form a single output char.
1815  *
1816  * The value of @inlen after return is the number of octets consumed
1817  *     as the return value is positive, else unpredictable.
1818  * The value of @outlen after return is the number of octets produced.
1819  */
1820 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1821 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1822                 const unsigned char *in, int *inlen) {
1823     size_t icv_inlen, icv_outlen;
1824     const char *icv_in = (const char *) in;
1825     char *icv_out = (char *) out;
1826     size_t ret;
1827 
1828     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1829         if (outlen != NULL) *outlen = 0;
1830         return(-1);
1831     }
1832     icv_inlen = *inlen;
1833     icv_outlen = *outlen;
1834     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1835     *inlen -= icv_inlen;
1836     *outlen -= icv_outlen;
1837     if ((icv_inlen != 0) || (ret == (size_t) -1)) {
1838 #ifdef EILSEQ
1839         if (errno == EILSEQ) {
1840             return -2;
1841         } else
1842 #endif
1843 #ifdef E2BIG
1844         if (errno == E2BIG) {
1845             return -1;
1846         } else
1847 #endif
1848 #ifdef EINVAL
1849         if (errno == EINVAL) {
1850             return -3;
1851         } else
1852 #endif
1853         {
1854             return -3;
1855         }
1856     }
1857     return 0;
1858 }
1859 #endif /* LIBXML_ICONV_ENABLED */
1860 
1861 /************************************************************************
1862  *									*
1863  *		ICU based generic conversion functions		*
1864  *									*
1865  ************************************************************************/
1866 
1867 #ifdef LIBXML_ICU_ENABLED
1868 /**
1869  * xmlUconvWrapper:
1870  * @cd: ICU uconverter data structure
1871  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1872  * @out:  a pointer to an array of bytes to store the result
1873  * @outlen:  the length of @out
1874  * @in:  a pointer to an array of input bytes
1875  * @inlen:  the length of @in
1876  * @flush: if true, indicates end of input
1877  *
1878  * Returns 0 if success, or
1879  *     -1 by lack of space, or
1880  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1881  *        the result of transformation can't fit into the encoding we want), or
1882  *     -3 if there the last byte can't form a single output char.
1883  *
1884  * The value of @inlen after return is the number of octets consumed
1885  *     as the return value is positive, else unpredictable.
1886  * The value of @outlen after return is the number of octets produced.
1887  */
1888 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1889 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1890                 const unsigned char *in, int *inlen, int flush) {
1891     const char *ucv_in = (const char *) in;
1892     char *ucv_out = (char *) out;
1893     UErrorCode err = U_ZERO_ERROR;
1894 
1895     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1896         if (outlen != NULL) *outlen = 0;
1897         return(-1);
1898     }
1899 
1900     if (toUnicode) {
1901         /* encoding => UTF-16 => UTF-8 */
1902         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1903                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1904                        &cd->pivot_source, &cd->pivot_target,
1905                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1906     } else {
1907         /* UTF-8 => UTF-16 => encoding */
1908         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1909                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1910                        &cd->pivot_source, &cd->pivot_target,
1911                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1912     }
1913     *inlen = ucv_in - (const char*) in;
1914     *outlen = ucv_out - (char *) out;
1915     if (U_SUCCESS(err)) {
1916         /* reset pivot buf if this is the last call for input (flush==TRUE) */
1917         if (flush)
1918             cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1919         return 0;
1920     }
1921     if (err == U_BUFFER_OVERFLOW_ERROR)
1922         return -1;
1923     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1924         return -2;
1925     return -3;
1926 }
1927 #endif /* LIBXML_ICU_ENABLED */
1928 
1929 /************************************************************************
1930  *									*
1931  *		The real API used by libxml for on-the-fly conversion	*
1932  *									*
1933  ************************************************************************/
1934 
1935 /**
1936  * xmlEncInputChunk:
1937  * @handler:  encoding handler
1938  * @out:  a pointer to an array of bytes to store the result
1939  * @outlen:  the length of @out
1940  * @in:  a pointer to an array of input bytes
1941  * @inlen:  the length of @in
1942  * @flush:  flush (ICU-related)
1943  *
1944  * Returns 0 if success, or
1945  *     -1 by lack of space, or
1946  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1947  *        the result of transformation can't fit into the encoding we want), or
1948  *     -3 if there the last byte can't form a single output char.
1949  *
1950  * The value of @inlen after return is the number of octets consumed
1951  *     as the return value is 0, else unpredictable.
1952  * The value of @outlen after return is the number of octets produced.
1953  */
1954 static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1955 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1956                  int *outlen, const unsigned char *in, int *inlen, int flush) {
1957     int ret;
1958     (void)flush;
1959 
1960     if (handler->input != NULL) {
1961         ret = handler->input(out, outlen, in, inlen);
1962         if (ret > 0)
1963            ret = 0;
1964     }
1965 #ifdef LIBXML_ICONV_ENABLED
1966     else if (handler->iconv_in != NULL) {
1967         ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1968     }
1969 #endif /* LIBXML_ICONV_ENABLED */
1970 #ifdef LIBXML_ICU_ENABLED
1971     else if (handler->uconv_in != NULL) {
1972         ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1973                               flush);
1974     }
1975 #endif /* LIBXML_ICU_ENABLED */
1976     else {
1977         *outlen = 0;
1978         *inlen = 0;
1979         ret = -2;
1980     }
1981 
1982     return(ret);
1983 }
1984 
1985 /**
1986  * xmlEncOutputChunk:
1987  * @handler:  encoding handler
1988  * @out:  a pointer to an array of bytes to store the result
1989  * @outlen:  the length of @out
1990  * @in:  a pointer to an array of input bytes
1991  * @inlen:  the length of @in
1992  *
1993  * Returns 0 if success, or
1994  *     -1 by lack of space, or
1995  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1996  *        the result of transformation can't fit into the encoding we want), or
1997  *     -3 if there the last byte can't form a single output char.
1998  *     -4 if no output function was found.
1999  *
2000  * The value of @inlen after return is the number of octets consumed
2001  *     as the return value is 0, else unpredictable.
2002  * The value of @outlen after return is the number of octets produced.
2003  */
2004 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)2005 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
2006                   int *outlen, const unsigned char *in, int *inlen) {
2007     int ret;
2008 
2009     if (handler->output != NULL) {
2010         ret = handler->output(out, outlen, in, inlen);
2011         if (ret > 0)
2012            ret = 0;
2013     }
2014 #ifdef LIBXML_ICONV_ENABLED
2015     else if (handler->iconv_out != NULL) {
2016         ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
2017     }
2018 #endif /* LIBXML_ICONV_ENABLED */
2019 #ifdef LIBXML_ICU_ENABLED
2020     else if (handler->uconv_out != NULL) {
2021         ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
2022                               1);
2023     }
2024 #endif /* LIBXML_ICU_ENABLED */
2025     else {
2026         *outlen = 0;
2027         *inlen = 0;
2028         ret = -4;
2029     }
2030 
2031     return(ret);
2032 }
2033 
2034 /**
2035  * xmlCharEncFirstLineInt:
2036  * @handler:	char encoding transformation data structure
2037  * @out:  an xmlBuffer for the output.
2038  * @in:  an xmlBuffer for the input
2039  * @len:  number of bytes to convert for the first line, or -1
2040  *
2041  * Front-end for the encoding handler input function, but handle only
2042  * the very first line, i.e. limit itself to 45 chars.
2043  *
2044  * Returns the number of byte written if success, or
2045  *     -1 general error
2046  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2047  *        the result of transformation can't fit into the encoding we want), or
2048  */
2049 int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)2050 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2051                        xmlBufferPtr in, int len) {
2052     int ret;
2053     int written;
2054     int toconv;
2055 
2056     if (handler == NULL) return(-1);
2057     if (out == NULL) return(-1);
2058     if (in == NULL) return(-1);
2059 
2060     /* calculate space available */
2061     written = out->size - out->use - 1; /* count '\0' */
2062     toconv = in->use;
2063     /*
2064      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2065      * 45 chars should be sufficient to reach the end of the encoding
2066      * declaration without going too far inside the document content.
2067      * on UTF-16 this means 90bytes, on UCS4 this means 180
2068      * The actual value depending on guessed encoding is passed as @len
2069      * if provided
2070      */
2071     if (len >= 0) {
2072         if (toconv > len)
2073             toconv = len;
2074     } else {
2075         if (toconv > 180)
2076             toconv = 180;
2077     }
2078     if (toconv * 2 >= written) {
2079         xmlBufferGrow(out, toconv * 2);
2080 	written = out->size - out->use - 1;
2081     }
2082 
2083     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2084                            in->content, &toconv, 0);
2085     xmlBufferShrink(in, toconv);
2086     out->use += written;
2087     out->content[out->use] = 0;
2088     if (ret == -1) ret = -3;
2089 
2090 #ifdef DEBUG_ENCODING
2091     switch (ret) {
2092         case 0:
2093 	    xmlGenericError(xmlGenericErrorContext,
2094 		    "converted %d bytes to %d bytes of input\n",
2095 	            toconv, written);
2096 	    break;
2097         case -1:
2098 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2099 	            toconv, written, in->use);
2100 	    break;
2101         case -2:
2102 	    xmlGenericError(xmlGenericErrorContext,
2103 		    "input conversion failed due to input error\n");
2104 	    break;
2105         case -3:
2106 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2107 	            toconv, written, in->use);
2108 	    break;
2109 	default:
2110 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2111     }
2112 #endif /* DEBUG_ENCODING */
2113     /*
2114      * Ignore when input buffer is not on a boundary
2115      */
2116     if (ret == -3) ret = 0;
2117     if (ret == -1) ret = 0;
2118     return(written ? written : ret);
2119 }
2120 
2121 /**
2122  * xmlCharEncFirstLine:
2123  * @handler:	char encoding transformation data structure
2124  * @out:  an xmlBuffer for the output.
2125  * @in:  an xmlBuffer for the input
2126  *
2127  * Front-end for the encoding handler input function, but handle only
2128  * the very first line, i.e. limit itself to 45 chars.
2129  *
2130  * Returns the number of byte written if success, or
2131  *     -1 general error
2132  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2133  *        the result of transformation can't fit into the encoding we want), or
2134  */
2135 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2136 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2137                  xmlBufferPtr in) {
2138     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2139 }
2140 
2141 /**
2142  * xmlCharEncFirstLineInput:
2143  * @input: a parser input buffer
2144  * @len:  number of bytes to convert for the first line, or -1
2145  *
2146  * Front-end for the encoding handler input function, but handle only
2147  * the very first line. Point is that this is based on autodetection
2148  * of the encoding and once that first line is converted we may find
2149  * out that a different decoder is needed to process the input.
2150  *
2151  * Returns the number of byte written if success, or
2152  *     -1 general error
2153  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2154  *        the result of transformation can't fit into the encoding we want), or
2155  */
2156 int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2157 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2158 {
2159     int ret;
2160     size_t written;
2161     size_t toconv;
2162     int c_in;
2163     int c_out;
2164     xmlBufPtr in;
2165     xmlBufPtr out;
2166 
2167     if ((input == NULL) || (input->encoder == NULL) ||
2168         (input->buffer == NULL) || (input->raw == NULL))
2169         return (-1);
2170     out = input->buffer;
2171     in = input->raw;
2172 
2173     toconv = xmlBufUse(in);
2174     if (toconv == 0)
2175         return (0);
2176     written = xmlBufAvail(out) - 1; /* count '\0' */
2177     /*
2178      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2179      * 45 chars should be sufficient to reach the end of the encoding
2180      * declaration without going too far inside the document content.
2181      * on UTF-16 this means 90bytes, on UCS4 this means 180
2182      * The actual value depending on guessed encoding is passed as @len
2183      * if provided
2184      */
2185     if (len >= 0) {
2186         if (toconv > (unsigned int) len)
2187             toconv = len;
2188     } else {
2189         if (toconv > 180)
2190             toconv = 180;
2191     }
2192     if (toconv * 2 >= written) {
2193         xmlBufGrow(out, toconv * 2);
2194         written = xmlBufAvail(out) - 1;
2195     }
2196     if (written > 360)
2197         written = 360;
2198 
2199     c_in = toconv;
2200     c_out = written;
2201     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2202                            xmlBufContent(in), &c_in, 0);
2203     xmlBufShrink(in, c_in);
2204     xmlBufAddLen(out, c_out);
2205     if (ret == -1)
2206         ret = -3;
2207 
2208     switch (ret) {
2209         case 0:
2210 #ifdef DEBUG_ENCODING
2211             xmlGenericError(xmlGenericErrorContext,
2212                             "converted %d bytes to %d bytes of input\n",
2213                             c_in, c_out);
2214 #endif
2215             break;
2216         case -1:
2217 #ifdef DEBUG_ENCODING
2218             xmlGenericError(xmlGenericErrorContext,
2219                          "converted %d bytes to %d bytes of input, %d left\n",
2220                             c_in, c_out, (int)xmlBufUse(in));
2221 #endif
2222             break;
2223         case -3:
2224 #ifdef DEBUG_ENCODING
2225             xmlGenericError(xmlGenericErrorContext,
2226                         "converted %d bytes to %d bytes of input, %d left\n",
2227                             c_in, c_out, (int)xmlBufUse(in));
2228 #endif
2229             break;
2230         case -2: {
2231             char buf[50];
2232             const xmlChar *content = xmlBufContent(in);
2233 
2234 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2235 		     content[0], content[1],
2236 		     content[2], content[3]);
2237 	    buf[49] = 0;
2238 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2239 		    "input conversion failed due to input error, bytes %s\n",
2240 		           buf);
2241         }
2242     }
2243     /*
2244      * Ignore when input buffer is not on a boundary
2245      */
2246     if (ret == -3) ret = 0;
2247     if (ret == -1) ret = 0;
2248     return(c_out ? c_out : ret);
2249 }
2250 
2251 /**
2252  * xmlCharEncInput:
2253  * @input: a parser input buffer
2254  * @flush: try to flush all the raw buffer
2255  *
2256  * Generic front-end for the encoding handler on parser input
2257  *
2258  * Returns the number of byte written if success, or
2259  *     -1 general error
2260  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2261  *        the result of transformation can't fit into the encoding we want), or
2262  */
2263 int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2264 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2265 {
2266     int ret;
2267     size_t written;
2268     size_t toconv;
2269     int c_in;
2270     int c_out;
2271     xmlBufPtr in;
2272     xmlBufPtr out;
2273 
2274     if ((input == NULL) || (input->encoder == NULL) ||
2275         (input->buffer == NULL) || (input->raw == NULL))
2276         return (-1);
2277     out = input->buffer;
2278     in = input->raw;
2279 
2280     toconv = xmlBufUse(in);
2281     if (toconv == 0)
2282         return (0);
2283     if ((toconv > 64 * 1024) && (flush == 0))
2284         toconv = 64 * 1024;
2285     written = xmlBufAvail(out);
2286     if (written > 0)
2287         written--; /* count '\0' */
2288     if (toconv * 2 >= written) {
2289         xmlBufGrow(out, toconv * 2);
2290         written = xmlBufAvail(out);
2291         if (written > 0)
2292             written--; /* count '\0' */
2293     }
2294     if ((written > 128 * 1024) && (flush == 0))
2295         written = 128 * 1024;
2296 
2297     c_in = toconv;
2298     c_out = written;
2299     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2300                            xmlBufContent(in), &c_in, flush);
2301     xmlBufShrink(in, c_in);
2302     xmlBufAddLen(out, c_out);
2303     if (ret == -1)
2304         ret = -3;
2305 
2306     switch (ret) {
2307         case 0:
2308 #ifdef DEBUG_ENCODING
2309             xmlGenericError(xmlGenericErrorContext,
2310                             "converted %d bytes to %d bytes of input\n",
2311                             c_in, c_out);
2312 #endif
2313             break;
2314         case -1:
2315 #ifdef DEBUG_ENCODING
2316             xmlGenericError(xmlGenericErrorContext,
2317                          "converted %d bytes to %d bytes of input, %d left\n",
2318                             c_in, c_out, (int)xmlBufUse(in));
2319 #endif
2320             break;
2321         case -3:
2322 #ifdef DEBUG_ENCODING
2323             xmlGenericError(xmlGenericErrorContext,
2324                         "converted %d bytes to %d bytes of input, %d left\n",
2325                             c_in, c_out, (int)xmlBufUse(in));
2326 #endif
2327             break;
2328         case -2: {
2329             char buf[50];
2330             const xmlChar *content = xmlBufContent(in);
2331 
2332 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2333 		     content[0], content[1],
2334 		     content[2], content[3]);
2335 	    buf[49] = 0;
2336 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2337 		    "input conversion failed due to input error, bytes %s\n",
2338 		           buf);
2339         }
2340     }
2341     /*
2342      * Ignore when input buffer is not on a boundary
2343      */
2344     if (ret == -3)
2345         ret = 0;
2346     return (c_out? c_out : ret);
2347 }
2348 
2349 /**
2350  * xmlCharEncInFunc:
2351  * @handler:	char encoding transformation data structure
2352  * @out:  an xmlBuffer for the output.
2353  * @in:  an xmlBuffer for the input
2354  *
2355  * Generic front-end for the encoding handler input function
2356  *
2357  * Returns the number of byte written if success, or
2358  *     -1 general error
2359  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2360  *        the result of transformation can't fit into the encoding we want), or
2361  */
2362 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2363 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2364                  xmlBufferPtr in)
2365 {
2366     int ret;
2367     int written;
2368     int toconv;
2369 
2370     if (handler == NULL)
2371         return (-1);
2372     if (out == NULL)
2373         return (-1);
2374     if (in == NULL)
2375         return (-1);
2376 
2377     toconv = in->use;
2378     if (toconv == 0)
2379         return (0);
2380     written = out->size - out->use -1; /* count '\0' */
2381     if (toconv * 2 >= written) {
2382         xmlBufferGrow(out, out->size + toconv * 2);
2383         written = out->size - out->use - 1;
2384     }
2385     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2386                            in->content, &toconv, 1);
2387     xmlBufferShrink(in, toconv);
2388     out->use += written;
2389     out->content[out->use] = 0;
2390     if (ret == -1)
2391         ret = -3;
2392 
2393     switch (ret) {
2394         case 0:
2395 #ifdef DEBUG_ENCODING
2396             xmlGenericError(xmlGenericErrorContext,
2397                             "converted %d bytes to %d bytes of input\n",
2398                             toconv, written);
2399 #endif
2400             break;
2401         case -1:
2402 #ifdef DEBUG_ENCODING
2403             xmlGenericError(xmlGenericErrorContext,
2404                          "converted %d bytes to %d bytes of input, %d left\n",
2405                             toconv, written, in->use);
2406 #endif
2407             break;
2408         case -3:
2409 #ifdef DEBUG_ENCODING
2410             xmlGenericError(xmlGenericErrorContext,
2411                         "converted %d bytes to %d bytes of input, %d left\n",
2412                             toconv, written, in->use);
2413 #endif
2414             break;
2415         case -2: {
2416             char buf[50];
2417 
2418 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2419 		     in->content[0], in->content[1],
2420 		     in->content[2], in->content[3]);
2421 	    buf[49] = 0;
2422 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2423 		    "input conversion failed due to input error, bytes %s\n",
2424 		           buf);
2425         }
2426     }
2427     /*
2428      * Ignore when input buffer is not on a boundary
2429      */
2430     if (ret == -3)
2431         ret = 0;
2432     return (written? written : ret);
2433 }
2434 
2435 #ifdef LIBXML_OUTPUT_ENABLED
2436 /**
2437  * xmlCharEncOutput:
2438  * @output: a parser output buffer
2439  * @init: is this an initialization call without data
2440  *
2441  * Generic front-end for the encoding handler on parser output
2442  * a first call with @init == 1 has to be made first to initiate the
2443  * output in case of non-stateless encoding needing to initiate their
2444  * state or the output (like the BOM in UTF16).
2445  * In case of UTF8 sequence conversion errors for the given encoder,
2446  * the content will be automatically remapped to a CharRef sequence.
2447  *
2448  * Returns the number of byte written if success, or
2449  *     -1 general error
2450  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2451  *        the result of transformation can't fit into the encoding we want), or
2452  */
2453 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2454 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2455 {
2456     int ret;
2457     size_t written;
2458     int writtentot = 0;
2459     size_t toconv;
2460     int c_in;
2461     int c_out;
2462     xmlBufPtr in;
2463     xmlBufPtr out;
2464 
2465     if ((output == NULL) || (output->encoder == NULL) ||
2466         (output->buffer == NULL) || (output->conv == NULL))
2467         return (-1);
2468     out = output->conv;
2469     in = output->buffer;
2470 
2471 retry:
2472 
2473     written = xmlBufAvail(out);
2474     if (written > 0)
2475         written--; /* count '\0' */
2476 
2477     /*
2478      * First specific handling of the initialization call
2479      */
2480     if (init) {
2481         c_in = 0;
2482         c_out = written;
2483         /* TODO: Check return value. */
2484         xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2485                           NULL, &c_in);
2486         xmlBufAddLen(out, c_out);
2487 #ifdef DEBUG_ENCODING
2488 	xmlGenericError(xmlGenericErrorContext,
2489 		"initialized encoder\n");
2490 #endif
2491         return(c_out);
2492     }
2493 
2494     /*
2495      * Conversion itself.
2496      */
2497     toconv = xmlBufUse(in);
2498     if (toconv == 0)
2499         return (writtentot);
2500     if (toconv > 64 * 1024)
2501         toconv = 64 * 1024;
2502     if (toconv * 4 >= written) {
2503         xmlBufGrow(out, toconv * 4);
2504         written = xmlBufAvail(out) - 1;
2505     }
2506     if (written > 256 * 1024)
2507         written = 256 * 1024;
2508 
2509     c_in = toconv;
2510     c_out = written;
2511     ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2512                             xmlBufContent(in), &c_in);
2513     xmlBufShrink(in, c_in);
2514     xmlBufAddLen(out, c_out);
2515     writtentot += c_out;
2516     if (ret == -1) {
2517         if (c_out > 0) {
2518             /* Can be a limitation of iconv or uconv */
2519             goto retry;
2520         }
2521         ret = -3;
2522     }
2523 
2524     /*
2525      * Attempt to handle error cases
2526      */
2527     switch (ret) {
2528         case 0:
2529 #ifdef DEBUG_ENCODING
2530 	    xmlGenericError(xmlGenericErrorContext,
2531 		    "converted %d bytes to %d bytes of output\n",
2532 	            c_in, c_out);
2533 #endif
2534 	    break;
2535         case -1:
2536 #ifdef DEBUG_ENCODING
2537 	    xmlGenericError(xmlGenericErrorContext,
2538 		    "output conversion failed by lack of space\n");
2539 #endif
2540 	    break;
2541         case -3:
2542 #ifdef DEBUG_ENCODING
2543 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2544 	            c_in, c_out, (int) xmlBufUse(in));
2545 #endif
2546 	    break;
2547         case -4:
2548             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2549                            "xmlCharEncOutFunc: no output function !\n", NULL);
2550             ret = -1;
2551             break;
2552         case -2: {
2553 	    xmlChar charref[20];
2554 	    int len = (int) xmlBufUse(in);
2555             xmlChar *content = xmlBufContent(in);
2556 	    int cur, charrefLen;
2557 
2558 	    cur = xmlGetUTF8Char(content, &len);
2559 	    if (cur <= 0)
2560                 break;
2561 
2562 #ifdef DEBUG_ENCODING
2563             xmlGenericError(xmlGenericErrorContext,
2564                     "handling output conversion error\n");
2565             xmlGenericError(xmlGenericErrorContext,
2566                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2567                     content[0], content[1],
2568                     content[2], content[3]);
2569 #endif
2570             /*
2571              * Removes the UTF8 sequence, and replace it by a charref
2572              * and continue the transcoding phase, hoping the error
2573              * did not mangle the encoder state.
2574              */
2575             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2576                              "&#%d;", cur);
2577             xmlBufShrink(in, len);
2578             xmlBufGrow(out, charrefLen * 4);
2579             c_out = xmlBufAvail(out) - 1;
2580             c_in = charrefLen;
2581             ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2582                                     charref, &c_in);
2583 
2584 	    if ((ret < 0) || (c_in != charrefLen)) {
2585 		char buf[50];
2586 
2587 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2588 			 content[0], content[1],
2589 			 content[2], content[3]);
2590 		buf[49] = 0;
2591 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2592 		    "output conversion failed due to conv error, bytes %s\n",
2593 			       buf);
2594 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2595 		    content[0] = ' ';
2596                 break;
2597 	    }
2598 
2599             xmlBufAddLen(out, c_out);
2600             writtentot += c_out;
2601             goto retry;
2602 	}
2603     }
2604     return(writtentot ? writtentot : ret);
2605 }
2606 #endif
2607 
2608 /**
2609  * xmlCharEncOutFunc:
2610  * @handler:	char encoding transformation data structure
2611  * @out:  an xmlBuffer for the output.
2612  * @in:  an xmlBuffer for the input
2613  *
2614  * Generic front-end for the encoding handler output function
2615  * a first call with @in == NULL has to be made firs to initiate the
2616  * output in case of non-stateless encoding needing to initiate their
2617  * state or the output (like the BOM in UTF16).
2618  * In case of UTF8 sequence conversion errors for the given encoder,
2619  * the content will be automatically remapped to a CharRef sequence.
2620  *
2621  * Returns the number of byte written if success, or
2622  *     -1 general error
2623  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2624  *        the result of transformation can't fit into the encoding we want), or
2625  */
2626 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2627 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2628                   xmlBufferPtr in) {
2629     int ret;
2630     int written;
2631     int writtentot = 0;
2632     int toconv;
2633 
2634     if (handler == NULL) return(-1);
2635     if (out == NULL) return(-1);
2636 
2637 retry:
2638 
2639     written = out->size - out->use;
2640 
2641     if (written > 0)
2642 	written--; /* Gennady: count '/0' */
2643 
2644     /*
2645      * First specific handling of in = NULL, i.e. the initialization call
2646      */
2647     if (in == NULL) {
2648         toconv = 0;
2649         /* TODO: Check return value. */
2650         xmlEncOutputChunk(handler, &out->content[out->use], &written,
2651                           NULL, &toconv);
2652         out->use += written;
2653         out->content[out->use] = 0;
2654 #ifdef DEBUG_ENCODING
2655 	xmlGenericError(xmlGenericErrorContext,
2656 		"initialized encoder\n");
2657 #endif
2658         return(0);
2659     }
2660 
2661     /*
2662      * Conversion itself.
2663      */
2664     toconv = in->use;
2665     if (toconv == 0)
2666 	return(0);
2667     if (toconv * 4 >= written) {
2668         xmlBufferGrow(out, toconv * 4);
2669 	written = out->size - out->use - 1;
2670     }
2671     ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2672                             in->content, &toconv);
2673     xmlBufferShrink(in, toconv);
2674     out->use += written;
2675     writtentot += written;
2676     out->content[out->use] = 0;
2677     if (ret == -1) {
2678         if (written > 0) {
2679             /* Can be a limitation of iconv or uconv */
2680             goto retry;
2681         }
2682         ret = -3;
2683     }
2684 
2685     /*
2686      * Attempt to handle error cases
2687      */
2688     switch (ret) {
2689         case 0:
2690 #ifdef DEBUG_ENCODING
2691 	    xmlGenericError(xmlGenericErrorContext,
2692 		    "converted %d bytes to %d bytes of output\n",
2693 	            toconv, written);
2694 #endif
2695 	    break;
2696         case -1:
2697 #ifdef DEBUG_ENCODING
2698 	    xmlGenericError(xmlGenericErrorContext,
2699 		    "output conversion failed by lack of space\n");
2700 #endif
2701 	    break;
2702         case -3:
2703 #ifdef DEBUG_ENCODING
2704 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2705 	            toconv, written, in->use);
2706 #endif
2707 	    break;
2708         case -4:
2709 	    xmlEncodingErr(XML_I18N_NO_OUTPUT,
2710 		           "xmlCharEncOutFunc: no output function !\n", NULL);
2711 	    ret = -1;
2712             break;
2713         case -2: {
2714 	    xmlChar charref[20];
2715 	    int len = in->use;
2716 	    const xmlChar *utf = (const xmlChar *) in->content;
2717 	    int cur, charrefLen;
2718 
2719 	    cur = xmlGetUTF8Char(utf, &len);
2720 	    if (cur <= 0)
2721                 break;
2722 
2723 #ifdef DEBUG_ENCODING
2724             xmlGenericError(xmlGenericErrorContext,
2725                     "handling output conversion error\n");
2726             xmlGenericError(xmlGenericErrorContext,
2727                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2728                     in->content[0], in->content[1],
2729                     in->content[2], in->content[3]);
2730 #endif
2731             /*
2732              * Removes the UTF8 sequence, and replace it by a charref
2733              * and continue the transcoding phase, hoping the error
2734              * did not mangle the encoder state.
2735              */
2736             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2737                              "&#%d;", cur);
2738             xmlBufferShrink(in, len);
2739             xmlBufferGrow(out, charrefLen * 4);
2740 	    written = out->size - out->use - 1;
2741             toconv = charrefLen;
2742             ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2743                                     charref, &toconv);
2744 
2745 	    if ((ret < 0) || (toconv != charrefLen)) {
2746 		char buf[50];
2747 
2748 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2749 			 in->content[0], in->content[1],
2750 			 in->content[2], in->content[3]);
2751 		buf[49] = 0;
2752 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2753 		    "output conversion failed due to conv error, bytes %s\n",
2754 			       buf);
2755 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2756 		    in->content[0] = ' ';
2757 	        break;
2758 	    }
2759 
2760             out->use += written;
2761             writtentot += written;
2762             out->content[out->use] = 0;
2763             goto retry;
2764 	}
2765     }
2766     return(writtentot ? writtentot : ret);
2767 }
2768 
2769 /**
2770  * xmlCharEncCloseFunc:
2771  * @handler:	char encoding transformation data structure
2772  *
2773  * Generic front-end for encoding handler close function
2774  *
2775  * Returns 0 if success, or -1 in case of error
2776  */
2777 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2778 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2779     int ret = 0;
2780     int tofree = 0;
2781     int i, handler_in_list = 0;
2782 
2783     /* Avoid unused variable warning if features are disabled. */
2784     (void) handler_in_list;
2785 
2786     if (handler == NULL) return(-1);
2787     if (handler->name == NULL) return(-1);
2788     if (handlers != NULL) {
2789         for (i = 0;i < nbCharEncodingHandler; i++) {
2790             if (handler == handlers[i]) {
2791 	        handler_in_list = 1;
2792 		break;
2793 	    }
2794 	}
2795     }
2796 #ifdef LIBXML_ICONV_ENABLED
2797     /*
2798      * Iconv handlers can be used only once, free the whole block.
2799      * and the associated icon resources.
2800      */
2801     if ((handler_in_list == 0) &&
2802         ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2803         tofree = 1;
2804 	if (handler->iconv_out != NULL) {
2805 	    if (iconv_close(handler->iconv_out))
2806 		ret = -1;
2807 	    handler->iconv_out = NULL;
2808 	}
2809 	if (handler->iconv_in != NULL) {
2810 	    if (iconv_close(handler->iconv_in))
2811 		ret = -1;
2812 	    handler->iconv_in = NULL;
2813 	}
2814     }
2815 #endif /* LIBXML_ICONV_ENABLED */
2816 #ifdef LIBXML_ICU_ENABLED
2817     if ((handler_in_list == 0) &&
2818         ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2819         tofree = 1;
2820 	if (handler->uconv_out != NULL) {
2821 	    closeIcuConverter(handler->uconv_out);
2822 	    handler->uconv_out = NULL;
2823 	}
2824 	if (handler->uconv_in != NULL) {
2825 	    closeIcuConverter(handler->uconv_in);
2826 	    handler->uconv_in = NULL;
2827 	}
2828     }
2829 #endif
2830     if (tofree) {
2831         /* free up only dynamic handlers iconv/uconv */
2832         if (handler->name != NULL)
2833             xmlFree(handler->name);
2834         handler->name = NULL;
2835         xmlFree(handler);
2836     }
2837 #ifdef DEBUG_ENCODING
2838     if (ret)
2839         xmlGenericError(xmlGenericErrorContext,
2840 		"failed to close the encoding handler\n");
2841     else
2842         xmlGenericError(xmlGenericErrorContext,
2843 		"closed the encoding handler\n");
2844 #endif
2845 
2846     return(ret);
2847 }
2848 
2849 /**
2850  * xmlByteConsumed:
2851  * @ctxt: an XML parser context
2852  *
2853  * This function provides the current index of the parser relative
2854  * to the start of the current entity. This function is computed in
2855  * bytes from the beginning starting at zero and finishing at the
2856  * size in byte of the file if parsing a file. The function is
2857  * of constant cost if the input is UTF-8 but can be costly if run
2858  * on non-UTF-8 input.
2859  *
2860  * Returns the index in bytes from the beginning of the entity or -1
2861  *         in case the index could not be computed.
2862  */
2863 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2864 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2865     xmlParserInputPtr in;
2866 
2867     if (ctxt == NULL) return(-1);
2868     in = ctxt->input;
2869     if (in == NULL)  return(-1);
2870     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2871         unsigned int unused = 0;
2872 	xmlCharEncodingHandler * handler = in->buf->encoder;
2873         /*
2874 	 * Encoding conversion, compute the number of unused original
2875 	 * bytes from the input not consumed and subtract that from
2876 	 * the raw consumed value, this is not a cheap operation
2877 	 */
2878         if (in->end - in->cur > 0) {
2879 	    unsigned char convbuf[32000];
2880 	    const unsigned char *cur = (const unsigned char *)in->cur;
2881 	    int toconv = in->end - in->cur, written = 32000;
2882 
2883 	    int ret;
2884 
2885             do {
2886                 toconv = in->end - cur;
2887                 written = 32000;
2888                 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2889                                         cur, &toconv);
2890                 if (ret < 0) {
2891                     if (written > 0)
2892                         ret = -2;
2893                     else
2894                         return(-1);
2895                 }
2896                 unused += written;
2897                 cur += toconv;
2898             } while (ret == -2);
2899 	}
2900 	if (in->buf->rawconsumed < unused)
2901 	    return(-1);
2902 	return(in->buf->rawconsumed - unused);
2903     }
2904     return(in->consumed + (in->cur - in->base));
2905 }
2906 
2907 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2908 #ifdef LIBXML_ISO8859X_ENABLED
2909 
2910 /**
2911  * UTF8ToISO8859x:
2912  * @out:  a pointer to an array of bytes to store the result
2913  * @outlen:  the length of @out
2914  * @in:  a pointer to an array of UTF-8 chars
2915  * @inlen:  the length of @in
2916  * @xlattable: the 2-level transcoding table
2917  *
2918  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2919  * block of chars out.
2920  *
2921  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2922  * The value of @inlen after return is the number of octets consumed
2923  *     as the return value is positive, else unpredictable.
2924  * The value of @outlen after return is the number of octets consumed.
2925  */
2926 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2927 UTF8ToISO8859x(unsigned char* out, int *outlen,
2928               const unsigned char* in, int *inlen,
2929               unsigned char const *xlattable) {
2930     const unsigned char* outstart = out;
2931     const unsigned char* inend;
2932     const unsigned char* instart = in;
2933     const unsigned char* processed = in;
2934 
2935     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2936         (xlattable == NULL))
2937 	return(-1);
2938     if (in == NULL) {
2939         /*
2940         * initialization nothing to do
2941         */
2942         *outlen = 0;
2943         *inlen = 0;
2944         return(0);
2945     }
2946     inend = in + (*inlen);
2947     while (in < inend) {
2948         unsigned char d = *in++;
2949         if  (d < 0x80)  {
2950             *out++ = d;
2951         } else if (d < 0xC0) {
2952             /* trailing byte in leading position */
2953             *outlen = out - outstart;
2954             *inlen = processed - instart;
2955             return(-2);
2956         } else if (d < 0xE0) {
2957             unsigned char c;
2958             if (!(in < inend)) {
2959                 /* trailing byte not in input buffer */
2960                 *outlen = out - outstart;
2961                 *inlen = processed - instart;
2962                 return(-3);
2963             }
2964             c = *in++;
2965             if ((c & 0xC0) != 0x80) {
2966                 /* not a trailing byte */
2967                 *outlen = out - outstart;
2968                 *inlen = processed - instart;
2969                 return(-2);
2970             }
2971             c = c & 0x3F;
2972             d = d & 0x1F;
2973             d = xlattable [48 + c + xlattable [d] * 64];
2974             if (d == 0) {
2975                 /* not in character set */
2976                 *outlen = out - outstart;
2977                 *inlen = processed - instart;
2978                 return(-2);
2979             }
2980             *out++ = d;
2981         } else if (d < 0xF0) {
2982             unsigned char c1;
2983             unsigned char c2;
2984             if (!(in < inend - 1)) {
2985                 /* trailing bytes not in input buffer */
2986                 *outlen = out - outstart;
2987                 *inlen = processed - instart;
2988                 return(-3);
2989             }
2990             c1 = *in++;
2991             if ((c1 & 0xC0) != 0x80) {
2992                 /* not a trailing byte (c1) */
2993                 *outlen = out - outstart;
2994                 *inlen = processed - instart;
2995                 return(-2);
2996             }
2997             c2 = *in++;
2998             if ((c2 & 0xC0) != 0x80) {
2999                 /* not a trailing byte (c2) */
3000                 *outlen = out - outstart;
3001                 *inlen = processed - instart;
3002                 return(-2);
3003             }
3004             c1 = c1 & 0x3F;
3005             c2 = c2 & 0x3F;
3006 	    d = d & 0x0F;
3007 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
3008 			xlattable [32 + d] * 64] * 64];
3009             if (d == 0) {
3010                 /* not in character set */
3011                 *outlen = out - outstart;
3012                 *inlen = processed - instart;
3013                 return(-2);
3014             }
3015             *out++ = d;
3016         } else {
3017             /* cannot transcode >= U+010000 */
3018             *outlen = out - outstart;
3019             *inlen = processed - instart;
3020             return(-2);
3021         }
3022         processed = in;
3023     }
3024     *outlen = out - outstart;
3025     *inlen = processed - instart;
3026     return(*outlen);
3027 }
3028 
3029 /**
3030  * ISO8859xToUTF8
3031  * @out:  a pointer to an array of bytes to store the result
3032  * @outlen:  the length of @out
3033  * @in:  a pointer to an array of ISO Latin 1 chars
3034  * @inlen:  the length of @in
3035  *
3036  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
3037  * block of chars out.
3038  * Returns 0 if success, or -1 otherwise
3039  * The value of @inlen after return is the number of octets consumed
3040  * The value of @outlen after return is the number of octets produced.
3041  */
3042 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)3043 ISO8859xToUTF8(unsigned char* out, int *outlen,
3044               const unsigned char* in, int *inlen,
3045               unsigned short const *unicodetable) {
3046     unsigned char* outstart = out;
3047     unsigned char* outend;
3048     const unsigned char* instart = in;
3049     const unsigned char* inend;
3050     const unsigned char* instop;
3051     unsigned int c;
3052 
3053     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
3054         (in == NULL) || (unicodetable == NULL))
3055 	return(-1);
3056     outend = out + *outlen;
3057     inend = in + *inlen;
3058     instop = inend;
3059 
3060     while ((in < inend) && (out < outend - 2)) {
3061         if (*in >= 0x80) {
3062             c = unicodetable [*in - 0x80];
3063             if (c == 0) {
3064                 /* undefined code point */
3065                 *outlen = out - outstart;
3066                 *inlen = in - instart;
3067                 return (-1);
3068             }
3069             if (c < 0x800) {
3070                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
3071                 *out++ = (c & 0x3F) | 0x80;
3072             } else {
3073                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
3074                 *out++ = ((c >>  6) & 0x3F) | 0x80;
3075                 *out++ = (c & 0x3F) | 0x80;
3076             }
3077             ++in;
3078         }
3079         if (instop - in > outend - out) instop = in + (outend - out);
3080         while ((*in < 0x80) && (in < instop)) {
3081             *out++ = *in++;
3082         }
3083     }
3084     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3085         *out++ =  *in++;
3086     }
3087     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3088         *out++ =  *in++;
3089     }
3090     *outlen = out - outstart;
3091     *inlen = in - instart;
3092     return (*outlen);
3093 }
3094 
3095 
3096 /************************************************************************
3097  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3098  ************************************************************************/
3099 
3100 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3101     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3102     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3103     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3104     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3105     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3106     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3107     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3108     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3109     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3110     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3111     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3112     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3113     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3114     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3115     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3116     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3117 };
3118 
3119 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3120     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3121     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3122     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3125     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3128     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3129     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3130     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3131     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3132     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3133     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3134     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3135     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3136     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3137     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3140     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3141     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3142     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3143     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3144     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3145     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3146     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3147 };
3148 
3149 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3150     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3151     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3152     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3153     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3154     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3155     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3156     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3157     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3158     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3159     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3160     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3161     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3162     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3163     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3164     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3165     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3166 };
3167 
3168 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3169     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3170     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3171     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3172     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3173     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3174     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3177     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3178     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3179     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3180     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3181     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3182     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3183     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3185     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3186     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3191     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3194     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3195     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3196     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3197     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3198     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3199     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3200 };
3201 
3202 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3203     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3204     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3205     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3206     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3207     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3208     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3209     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3210     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3211     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3212     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3213     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3214     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3215     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3216     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3217     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3218     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3219 };
3220 
3221 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3222     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3223     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3224     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3225     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3226     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3227     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3230     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3231     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3232     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3233     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3234     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3235     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3236     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3237     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3238     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3239     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3240     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3241     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3242     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3243     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3244     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3246     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3247     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3248     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3249 };
3250 
3251 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3252     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3253     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3254     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3255     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3256     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3257     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3258     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3259     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3260     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3261     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3262     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3263     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3264     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3265     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3266     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3267     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3268 };
3269 
3270 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3271     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3275     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3276     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3277     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3278     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3279     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3280     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3281     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3283     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3284     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3285     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3286     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3287     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3288     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3289     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3290     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3291     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3292     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298 };
3299 
3300 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3301     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3302     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3303     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3304     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3305     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3306     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3307     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3308     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3309     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3310     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3311     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3312     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3313     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3314     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3315     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3316     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3317 };
3318 
3319 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3320     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3321     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3322     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3328     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3329     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3330     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3331     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3332     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3336     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3337     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3338     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3339     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3340     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3341     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3342     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343 };
3344 
3345 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3346     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3347     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3348     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3349     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3350     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3351     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3352     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3353     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3354     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3355     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3356     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3357     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3358     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3359     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3360     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3361     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3362 };
3363 
3364 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3365     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3366     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3367     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3373     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3374     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3375     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3376     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3381     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3382     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3386     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3387     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3389     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3390     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3391     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3392     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3393     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3394     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3395     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3396 };
3397 
3398 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3399     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3400     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3401     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3402     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3403     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3404     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3405     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3406     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3407     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3408     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3409     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3410     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3411     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3412     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3413     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3414     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3415 };
3416 
3417 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3418     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3419     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3420     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3421     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3422     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3423     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3424     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3425     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3426     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3427     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3428     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3429     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3431     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3434     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3435     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3436     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3437     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3438     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3439     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3440     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3441     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3442     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3443     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3444     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3445     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3446     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3447     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3448     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3449 };
3450 
3451 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3452     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3453     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3454     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3455     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3456     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3457     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3458     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3459     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3460     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3461     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3462     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3463     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3464     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3465     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3466     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3467     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3468 };
3469 
3470 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3471     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3473     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3474     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3479     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3480     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3481     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3482     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3483     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3484     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3485     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3486     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3487     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3488     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3489     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3490     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3491     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3492     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3493     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3494 };
3495 
3496 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3497     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3498     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3499     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3500     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3501     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3502     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3503     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3504     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3505     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3506     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3507     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3508     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3509     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3510     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3511     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3512     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3513 };
3514 
3515 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3516     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3517     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3523     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3524     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3525     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3526     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3527     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3528     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3529     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3530     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3531     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3532     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3533     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3534     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3535     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3536     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3537     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3538     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3539     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3540     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3541     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3542     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3543     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3544     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3545     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3546     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3547 };
3548 
3549 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3550     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3551     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3552     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3553     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3554     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3555     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3556     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3557     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3558     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3559     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3560     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3561     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3562     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3563     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3564     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3565     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3566 };
3567 
3568 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3569     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3574     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3577     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3578     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3579     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3580     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3581     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3582     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3583     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3584     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3585     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3586     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3587     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3588     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3589     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3590     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3591     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3592     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3593     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3594     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3595     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3596 };
3597 
3598 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3599     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3600     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3601     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3602     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3603     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3604     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3605     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3606     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3607     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3608     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3609     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3610     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3611     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3612     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3613     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3614     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3615 };
3616 
3617 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3618     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3619     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3620     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3625     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3626     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3627     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3628     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3629     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3630     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3635     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3638     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3639     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3640     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3641     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3642     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3643     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3644     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3645     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3646     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3647     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3648     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3649 };
3650 
3651 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3652     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3653     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3654     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3655     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3656     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3657     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3658     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3659     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3660     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3661     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3662     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3663     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3664     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3665     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3666     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3667     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3668 };
3669 
3670 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3671     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3672     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3673     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3674     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3675     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3679     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3680     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3681     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3683     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3684     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3685     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3686     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3687     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3688     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3691     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3693     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3698     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3699     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3700     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3701     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3702     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3703     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3704     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3705     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3706     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3707     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3708     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3709     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3710     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3711     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3712     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3713     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3714 };
3715 
3716 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3717     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3718     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3719     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3720     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3721     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3722     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3723     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3724     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3725     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3726     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3727     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3728     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3729     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3730     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3731     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3732     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3733 };
3734 
3735 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3736     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3737     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3741     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3743     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3744     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3745     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3746     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3747     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3750     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3753     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3754     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3757     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3759     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3760     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3761     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3762     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3763 };
3764 
3765 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3766     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3767     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3768     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3769     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3770     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3771     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3772     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3773     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3774     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3775     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3776     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3777     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3778     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3779     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3780     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3781     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3782 };
3783 
3784 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3785     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3786     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3787     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3788     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3789     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3790     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3791     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3792     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3793     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3794     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3795     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3796     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3797     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3798     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3799     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3800     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3801     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3802     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3803     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3804     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3805     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3806     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3807     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3808     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3809     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3810     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3811     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3812     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3813     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3814     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3815     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3816     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3817     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3818     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3819     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3820     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3821     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3822     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3823     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3824 };
3825 
3826 
3827 /*
3828  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3829  */
3830 
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3831 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3832     const unsigned char* in, int *inlen) {
3833     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3834 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3835 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3836     const unsigned char* in, int *inlen) {
3837     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3838 }
3839 
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3840 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3841     const unsigned char* in, int *inlen) {
3842     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3843 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3844 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3845     const unsigned char* in, int *inlen) {
3846     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3847 }
3848 
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3849 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3850     const unsigned char* in, int *inlen) {
3851     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3852 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3853 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3854     const unsigned char* in, int *inlen) {
3855     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3856 }
3857 
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3858 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3859     const unsigned char* in, int *inlen) {
3860     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3861 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3862 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3863     const unsigned char* in, int *inlen) {
3864     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3865 }
3866 
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3867 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3868     const unsigned char* in, int *inlen) {
3869     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3870 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3871 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3872     const unsigned char* in, int *inlen) {
3873     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3874 }
3875 
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3876 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3877     const unsigned char* in, int *inlen) {
3878     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3879 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3880 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3881     const unsigned char* in, int *inlen) {
3882     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3883 }
3884 
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3885 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3886     const unsigned char* in, int *inlen) {
3887     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3888 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3889 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3890     const unsigned char* in, int *inlen) {
3891     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3892 }
3893 
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3894 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3895     const unsigned char* in, int *inlen) {
3896     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3897 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3898 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3899     const unsigned char* in, int *inlen) {
3900     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3901 }
3902 
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3903 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3904     const unsigned char* in, int *inlen) {
3905     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3906 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3907 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3908     const unsigned char* in, int *inlen) {
3909     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3910 }
3911 
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3912 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3913     const unsigned char* in, int *inlen) {
3914     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3915 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3916 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3917     const unsigned char* in, int *inlen) {
3918     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3919 }
3920 
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3921 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3922     const unsigned char* in, int *inlen) {
3923     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3924 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3925 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3926     const unsigned char* in, int *inlen) {
3927     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3928 }
3929 
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3930 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3931     const unsigned char* in, int *inlen) {
3932     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3933 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3934 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3935     const unsigned char* in, int *inlen) {
3936     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3937 }
3938 
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3939 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3940     const unsigned char* in, int *inlen) {
3941     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3942 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3943 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3944     const unsigned char* in, int *inlen) {
3945     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3946 }
3947 
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3948 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3949     const unsigned char* in, int *inlen) {
3950     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3951 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3952 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3953     const unsigned char* in, int *inlen) {
3954     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3955 }
3956 
3957 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3958 xmlRegisterCharEncodingHandlersISO8859x (void) {
3959     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3960     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3961     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3962     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3963     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3964     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3965     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3966     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3967     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3968     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3969     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3970     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3971     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3972     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3973 }
3974 
3975 #endif
3976 #endif
3977 
3978