• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * encoding.c : implements the encoding conversion functions needed for XML
3  *
4  * Related specs:
5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8  * [ISO-8859-1]   ISO Latin-1 characters codes.
9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10  *                Worldwide Character Encoding -- Version 1.0", Addison-
11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12  *                described in Unicode Technical Report #4.
13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14  *                Information Interchange, ANSI X3.4-1986.
15  *
16  * See Copyright for the status of this software.
17  *
18  * daniel@veillard.com
19  *
20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21  */
22 
23 #define IN_LIBXML
24 #include "libxml.h"
25 
26 #include <string.h>
27 #include <limits.h>
28 
29 #ifdef HAVE_CTYPE_H
30 #include <ctype.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef LIBXML_ICONV_ENABLED
36 #ifdef HAVE_ERRNO_H
37 #include <errno.h>
38 #endif
39 #endif
40 #include <libxml/encoding.h>
41 #include <libxml/xmlmemory.h>
42 #ifdef LIBXML_HTML_ENABLED
43 #include <libxml/HTMLparser.h>
44 #endif
45 #include <libxml/globals.h>
46 #include <libxml/xmlerror.h>
47 
48 #include "buf.h"
49 #include "enc.h"
50 
51 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
52 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
53 
54 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
55 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
56 struct _xmlCharEncodingAlias {
57     const char *name;
58     const char *alias;
59 };
60 
61 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
62 static int xmlCharEncodingAliasesNb = 0;
63 static int xmlCharEncodingAliasesMax = 0;
64 
65 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
66 #if 0
67 #define DEBUG_ENCODING  /* Define this to get encoding traces */
68 #endif
69 #else
70 #ifdef LIBXML_ISO8859X_ENABLED
71 static void xmlRegisterCharEncodingHandlersISO8859x (void);
72 #endif
73 #endif
74 
75 static int xmlLittleEndian = 1;
76 
77 /**
78  * xmlEncodingErrMemory:
79  * @extra:  extra informations
80  *
81  * Handle an out of memory condition
82  */
83 static void
xmlEncodingErrMemory(const char * extra)84 xmlEncodingErrMemory(const char *extra)
85 {
86     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
87 }
88 
89 /**
90  * xmlErrEncoding:
91  * @error:  the error number
92  * @msg:  the error message
93  *
94  * n encoding error
95  */
96 static void LIBXML_ATTR_FORMAT(2,0)
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)97 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
98 {
99     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
100                     XML_FROM_I18N, error, XML_ERR_FATAL,
101                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
102 }
103 
104 #ifdef LIBXML_ICU_ENABLED
105 static uconv_t*
openIcuConverter(const char * name,int toUnicode)106 openIcuConverter(const char* name, int toUnicode)
107 {
108   UErrorCode status = U_ZERO_ERROR;
109   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
110   if (conv == NULL)
111     return NULL;
112 
113   conv->pivot_source = conv->pivot_buf;
114   conv->pivot_target = conv->pivot_buf;
115 
116   conv->uconv = ucnv_open(name, &status);
117   if (U_FAILURE(status))
118     goto error;
119 
120   status = U_ZERO_ERROR;
121   if (toUnicode) {
122     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
123                         NULL, NULL, NULL, &status);
124   }
125   else {
126     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
127                         NULL, NULL, NULL, &status);
128   }
129   if (U_FAILURE(status))
130     goto error;
131 
132   status = U_ZERO_ERROR;
133   conv->utf8 = ucnv_open("UTF-8", &status);
134   if (U_SUCCESS(status))
135     return conv;
136 
137 error:
138   if (conv->uconv)
139     ucnv_close(conv->uconv);
140   xmlFree(conv);
141   return NULL;
142 }
143 
144 static void
closeIcuConverter(uconv_t * conv)145 closeIcuConverter(uconv_t *conv)
146 {
147   if (conv != NULL) {
148     ucnv_close(conv->uconv);
149     ucnv_close(conv->utf8);
150     xmlFree(conv);
151   }
152 }
153 #endif /* LIBXML_ICU_ENABLED */
154 
155 /************************************************************************
156  *									*
157  *		Conversions To/From UTF8 encoding			*
158  *									*
159  ************************************************************************/
160 
161 /**
162  * asciiToUTF8:
163  * @out:  a pointer to an array of bytes to store the result
164  * @outlen:  the length of @out
165  * @in:  a pointer to an array of ASCII chars
166  * @inlen:  the length of @in
167  *
168  * Take a block of ASCII chars in and try to convert it to an UTF-8
169  * block of chars out.
170  * Returns 0 if success, or -1 otherwise
171  * The value of @inlen after return is the number of octets consumed
172  *     if the return value is positive, else unpredictable.
173  * The value of @outlen after return is the number of octets consumed.
174  */
175 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)176 asciiToUTF8(unsigned char* out, int *outlen,
177               const unsigned char* in, int *inlen) {
178     unsigned char* outstart = out;
179     const unsigned char* base = in;
180     const unsigned char* processed = in;
181     unsigned char* outend = out + *outlen;
182     const unsigned char* inend;
183     unsigned int c;
184 
185     inend = in + (*inlen);
186     while ((in < inend) && (out - outstart + 5 < *outlen)) {
187 	c= *in++;
188 
189         if (out >= outend)
190 	    break;
191         if (c < 0x80) {
192 	    *out++ = c;
193 	} else {
194 	    *outlen = out - outstart;
195 	    *inlen = processed - base;
196 	    return(-1);
197 	}
198 
199 	processed = (const unsigned char*) in;
200     }
201     *outlen = out - outstart;
202     *inlen = processed - base;
203     return(*outlen);
204 }
205 
206 #ifdef LIBXML_OUTPUT_ENABLED
207 /**
208  * UTF8Toascii:
209  * @out:  a pointer to an array of bytes to store the result
210  * @outlen:  the length of @out
211  * @in:  a pointer to an array of UTF-8 chars
212  * @inlen:  the length of @in
213  *
214  * Take a block of UTF-8 chars in and try to convert it to an ASCII
215  * block of chars out.
216  *
217  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
218  * The value of @inlen after return is the number of octets consumed
219  *     if the return value is positive, else unpredictable.
220  * The value of @outlen after return is the number of octets consumed.
221  */
222 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)223 UTF8Toascii(unsigned char* out, int *outlen,
224               const unsigned char* in, int *inlen) {
225     const unsigned char* processed = in;
226     const unsigned char* outend;
227     const unsigned char* outstart = out;
228     const unsigned char* instart = in;
229     const unsigned char* inend;
230     unsigned int c, d;
231     int trailing;
232 
233     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
234     if (in == NULL) {
235         /*
236 	 * initialization nothing to do
237 	 */
238 	*outlen = 0;
239 	*inlen = 0;
240 	return(0);
241     }
242     inend = in + (*inlen);
243     outend = out + (*outlen);
244     while (in < inend) {
245 	d = *in++;
246 	if      (d < 0x80)  { c= d; trailing= 0; }
247 	else if (d < 0xC0) {
248 	    /* trailing byte in leading position */
249 	    *outlen = out - outstart;
250 	    *inlen = processed - instart;
251 	    return(-2);
252         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
253         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
254         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
255 	else {
256 	    /* no chance for this in Ascii */
257 	    *outlen = out - outstart;
258 	    *inlen = processed - instart;
259 	    return(-2);
260 	}
261 
262 	if (inend - in < trailing) {
263 	    break;
264 	}
265 
266 	for ( ; trailing; trailing--) {
267 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
268 		break;
269 	    c <<= 6;
270 	    c |= d & 0x3F;
271 	}
272 
273 	/* assertion: c is a single UTF-4 value */
274 	if (c < 0x80) {
275 	    if (out >= outend)
276 		break;
277 	    *out++ = c;
278 	} else {
279 	    /* no chance for this in Ascii */
280 	    *outlen = out - outstart;
281 	    *inlen = processed - instart;
282 	    return(-2);
283 	}
284 	processed = in;
285     }
286     *outlen = out - outstart;
287     *inlen = processed - instart;
288     return(*outlen);
289 }
290 #endif /* LIBXML_OUTPUT_ENABLED */
291 
292 /**
293  * isolat1ToUTF8:
294  * @out:  a pointer to an array of bytes to store the result
295  * @outlen:  the length of @out
296  * @in:  a pointer to an array of ISO Latin 1 chars
297  * @inlen:  the length of @in
298  *
299  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
300  * block of chars out.
301  * Returns the number of bytes written if success, or -1 otherwise
302  * The value of @inlen after return is the number of octets consumed
303  *     if the return value is positive, else unpredictable.
304  * The value of @outlen after return is the number of octets consumed.
305  */
306 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)307 isolat1ToUTF8(unsigned char* out, int *outlen,
308               const unsigned char* in, int *inlen) {
309     unsigned char* outstart = out;
310     const unsigned char* base = in;
311     unsigned char* outend;
312     const unsigned char* inend;
313     const unsigned char* instop;
314 
315     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
316 	return(-1);
317 
318     outend = out + *outlen;
319     inend = in + (*inlen);
320     instop = inend;
321 
322     while ((in < inend) && (out < outend - 1)) {
323 	if (*in >= 0x80) {
324 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
325             *out++ = ((*in) & 0x3F) | 0x80;
326 	    ++in;
327 	}
328 	if ((instop - in) > (outend - out)) instop = in + (outend - out);
329 	while ((in < instop) && (*in < 0x80)) {
330 	    *out++ = *in++;
331 	}
332     }
333     if ((in < inend) && (out < outend) && (*in < 0x80)) {
334         *out++ = *in++;
335     }
336     *outlen = out - outstart;
337     *inlen = in - base;
338     return(*outlen);
339 }
340 
341 /**
342  * UTF8ToUTF8:
343  * @out:  a pointer to an array of bytes to store the result
344  * @outlen:  the length of @out
345  * @inb:  a pointer to an array of UTF-8 chars
346  * @inlenb:  the length of @in in UTF-8 chars
347  *
348  * No op copy operation for UTF8 handling.
349  *
350  * Returns the number of bytes written, or -1 if lack of space.
351  *     The value of *inlen after return is the number of octets consumed
352  *     if the return value is positive, else unpredictable.
353  */
354 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)355 UTF8ToUTF8(unsigned char* out, int *outlen,
356            const unsigned char* inb, int *inlenb)
357 {
358     int len;
359 
360     if ((out == NULL) || (outlen == NULL) || (inlenb == NULL))
361 	return(-1);
362     if (inb == NULL) {
363         /* inb == NULL means output is initialized. */
364         *outlen = 0;
365         *inlenb = 0;
366         return(0);
367     }
368     if (*outlen > *inlenb) {
369 	len = *inlenb;
370     } else {
371 	len = *outlen;
372     }
373     if (len < 0)
374 	return(-1);
375 
376     memcpy(out, inb, len);
377 
378     *outlen = len;
379     *inlenb = len;
380     return(*outlen);
381 }
382 
383 
384 #ifdef LIBXML_OUTPUT_ENABLED
385 /**
386  * UTF8Toisolat1:
387  * @out:  a pointer to an array of bytes to store the result
388  * @outlen:  the length of @out
389  * @in:  a pointer to an array of UTF-8 chars
390  * @inlen:  the length of @in
391  *
392  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
393  * block of chars out.
394  *
395  * Returns the number of bytes written if success, -2 if the transcoding fails,
396            or -1 otherwise
397  * The value of @inlen after return is the number of octets consumed
398  *     if the return value is positive, else unpredictable.
399  * The value of @outlen after return is the number of octets consumed.
400  */
401 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)402 UTF8Toisolat1(unsigned char* out, int *outlen,
403               const unsigned char* in, int *inlen) {
404     const unsigned char* processed = in;
405     const unsigned char* outend;
406     const unsigned char* outstart = out;
407     const unsigned char* instart = in;
408     const unsigned char* inend;
409     unsigned int c, d;
410     int trailing;
411 
412     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
413     if (in == NULL) {
414         /*
415 	 * initialization nothing to do
416 	 */
417 	*outlen = 0;
418 	*inlen = 0;
419 	return(0);
420     }
421     inend = in + (*inlen);
422     outend = out + (*outlen);
423     while (in < inend) {
424 	d = *in++;
425 	if      (d < 0x80)  { c= d; trailing= 0; }
426 	else if (d < 0xC0) {
427 	    /* trailing byte in leading position */
428 	    *outlen = out - outstart;
429 	    *inlen = processed - instart;
430 	    return(-2);
431         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
432         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
433         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
434 	else {
435 	    /* no chance for this in IsoLat1 */
436 	    *outlen = out - outstart;
437 	    *inlen = processed - instart;
438 	    return(-2);
439 	}
440 
441 	if (inend - in < trailing) {
442 	    break;
443 	}
444 
445 	for ( ; trailing; trailing--) {
446 	    if (in >= inend)
447 		break;
448 	    if (((d= *in++) & 0xC0) != 0x80) {
449 		*outlen = out - outstart;
450 		*inlen = processed - instart;
451 		return(-2);
452 	    }
453 	    c <<= 6;
454 	    c |= d & 0x3F;
455 	}
456 
457 	/* assertion: c is a single UTF-4 value */
458 	if (c <= 0xFF) {
459 	    if (out >= outend)
460 		break;
461 	    *out++ = c;
462 	} else {
463 	    /* no chance for this in IsoLat1 */
464 	    *outlen = out - outstart;
465 	    *inlen = processed - instart;
466 	    return(-2);
467 	}
468 	processed = in;
469     }
470     *outlen = out - outstart;
471     *inlen = processed - instart;
472     return(*outlen);
473 }
474 #endif /* LIBXML_OUTPUT_ENABLED */
475 
476 /**
477  * UTF16LEToUTF8:
478  * @out:  a pointer to an array of bytes to store the result
479  * @outlen:  the length of @out
480  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
481  * @inlenb:  the length of @in in UTF-16LE chars
482  *
483  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
484  * block of chars out. This function assumes the endian property
485  * is the same between the native type of this machine and the
486  * inputed one.
487  *
488  * Returns the number of bytes written, or -1 if lack of space, or -2
489  *     if the transcoding fails (if *in is not a valid utf16 string)
490  *     The value of *inlen after return is the number of octets consumed
491  *     if the return value is positive, else unpredictable.
492  */
493 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)494 UTF16LEToUTF8(unsigned char* out, int *outlen,
495             const unsigned char* inb, int *inlenb)
496 {
497     unsigned char* outstart = out;
498     const unsigned char* processed = inb;
499     unsigned char* outend = out + *outlen;
500     unsigned short* in = (unsigned short*) inb;
501     unsigned short* inend;
502     unsigned int c, d, inlen;
503     unsigned char *tmp;
504     int bits;
505 
506     if ((*inlenb % 2) == 1)
507         (*inlenb)--;
508     inlen = *inlenb / 2;
509     inend = in + inlen;
510     while ((in < inend) && (out - outstart + 5 < *outlen)) {
511         if (xmlLittleEndian) {
512 	    c= *in++;
513 	} else {
514 	    tmp = (unsigned char *) in;
515 	    c = *tmp++;
516 	    c = c | (((unsigned int)*tmp) << 8);
517 	    in++;
518 	}
519         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
520 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
521 		break;
522 	    }
523 	    if (xmlLittleEndian) {
524 		d = *in++;
525 	    } else {
526 		tmp = (unsigned char *) in;
527 		d = *tmp++;
528 		d = d | (((unsigned int)*tmp) << 8);
529 		in++;
530 	    }
531             if ((d & 0xFC00) == 0xDC00) {
532                 c &= 0x03FF;
533                 c <<= 10;
534                 c |= d & 0x03FF;
535                 c += 0x10000;
536             }
537             else {
538 		*outlen = out - outstart;
539 		*inlenb = processed - inb;
540 	        return(-2);
541 	    }
542         }
543 
544 	/* assertion: c is a single UTF-4 value */
545         if (out >= outend)
546 	    break;
547         if      (c <    0x80) {  *out++=  c;                bits= -6; }
548         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
549         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
550         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
551 
552         for ( ; bits >= 0; bits-= 6) {
553             if (out >= outend)
554 	        break;
555             *out++= ((c >> bits) & 0x3F) | 0x80;
556         }
557 	processed = (const unsigned char*) in;
558     }
559     *outlen = out - outstart;
560     *inlenb = processed - inb;
561     return(*outlen);
562 }
563 
564 #ifdef LIBXML_OUTPUT_ENABLED
565 /**
566  * UTF8ToUTF16LE:
567  * @outb:  a pointer to an array of bytes to store the result
568  * @outlen:  the length of @outb
569  * @in:  a pointer to an array of UTF-8 chars
570  * @inlen:  the length of @in
571  *
572  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
573  * block of chars out.
574  *
575  * Returns the number of bytes written, or -1 if lack of space, or -2
576  *     if the transcoding failed.
577  */
578 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)579 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
580             const unsigned char* in, int *inlen)
581 {
582     unsigned short* out = (unsigned short*) outb;
583     const unsigned char* processed = in;
584     const unsigned char *const instart = in;
585     unsigned short* outstart= out;
586     unsigned short* outend;
587     const unsigned char* inend;
588     unsigned int c, d;
589     int trailing;
590     unsigned char *tmp;
591     unsigned short tmp1, tmp2;
592 
593     /* UTF16LE encoding has no BOM */
594     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
595     if (in == NULL) {
596 	*outlen = 0;
597 	*inlen = 0;
598 	return(0);
599     }
600     inend= in + *inlen;
601     outend = out + (*outlen / 2);
602     while (in < inend) {
603       d= *in++;
604       if      (d < 0x80)  { c= d; trailing= 0; }
605       else if (d < 0xC0) {
606           /* trailing byte in leading position */
607 	  *outlen = (out - outstart) * 2;
608 	  *inlen = processed - instart;
609 	  return(-2);
610       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
611       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
612       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
613       else {
614 	/* no chance for this in UTF-16 */
615 	*outlen = (out - outstart) * 2;
616 	*inlen = processed - instart;
617 	return(-2);
618       }
619 
620       if (inend - in < trailing) {
621           break;
622       }
623 
624       for ( ; trailing; trailing--) {
625           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
626 	      break;
627           c <<= 6;
628           c |= d & 0x3F;
629       }
630 
631       /* assertion: c is a single UTF-4 value */
632         if (c < 0x10000) {
633             if (out >= outend)
634 	        break;
635 	    if (xmlLittleEndian) {
636 		*out++ = c;
637 	    } else {
638 		tmp = (unsigned char *) out;
639 		*tmp = c ;
640 		*(tmp + 1) = c >> 8 ;
641 		out++;
642 	    }
643         }
644         else if (c < 0x110000) {
645             if (out+1 >= outend)
646 	        break;
647             c -= 0x10000;
648 	    if (xmlLittleEndian) {
649 		*out++ = 0xD800 | (c >> 10);
650 		*out++ = 0xDC00 | (c & 0x03FF);
651 	    } else {
652 		tmp1 = 0xD800 | (c >> 10);
653 		tmp = (unsigned char *) out;
654 		*tmp = (unsigned char) tmp1;
655 		*(tmp + 1) = tmp1 >> 8;
656 		out++;
657 
658 		tmp2 = 0xDC00 | (c & 0x03FF);
659 		tmp = (unsigned char *) out;
660 		*tmp  = (unsigned char) tmp2;
661 		*(tmp + 1) = tmp2 >> 8;
662 		out++;
663 	    }
664         }
665         else
666 	    break;
667 	processed = in;
668     }
669     *outlen = (out - outstart) * 2;
670     *inlen = processed - instart;
671     return(*outlen);
672 }
673 
674 /**
675  * UTF8ToUTF16:
676  * @outb:  a pointer to an array of bytes to store the result
677  * @outlen:  the length of @outb
678  * @in:  a pointer to an array of UTF-8 chars
679  * @inlen:  the length of @in
680  *
681  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
682  * block of chars out.
683  *
684  * Returns the number of bytes written, or -1 if lack of space, or -2
685  *     if the transcoding failed.
686  */
687 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)688 UTF8ToUTF16(unsigned char* outb, int *outlen,
689             const unsigned char* in, int *inlen)
690 {
691     if (in == NULL) {
692 	/*
693 	 * initialization, add the Byte Order Mark for UTF-16LE
694 	 */
695         if (*outlen >= 2) {
696 	    outb[0] = 0xFF;
697 	    outb[1] = 0xFE;
698 	    *outlen = 2;
699 	    *inlen = 0;
700 #ifdef DEBUG_ENCODING
701             xmlGenericError(xmlGenericErrorContext,
702 		    "Added FFFE Byte Order Mark\n");
703 #endif
704 	    return(2);
705 	}
706 	*outlen = 0;
707 	*inlen = 0;
708 	return(0);
709     }
710     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
711 }
712 #endif /* LIBXML_OUTPUT_ENABLED */
713 
714 /**
715  * UTF16BEToUTF8:
716  * @out:  a pointer to an array of bytes to store the result
717  * @outlen:  the length of @out
718  * @inb:  a pointer to an array of UTF-16 passed as a byte array
719  * @inlenb:  the length of @in in UTF-16 chars
720  *
721  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
722  * block of chars out. This function assumes the endian property
723  * is the same between the native type of this machine and the
724  * inputed one.
725  *
726  * Returns the number of bytes written, or -1 if lack of space, or -2
727  *     if the transcoding fails (if *in is not a valid utf16 string)
728  * The value of *inlen after return is the number of octets consumed
729  *     if the return value is positive, else unpredictable.
730  */
731 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)732 UTF16BEToUTF8(unsigned char* out, int *outlen,
733             const unsigned char* inb, int *inlenb)
734 {
735     unsigned char* outstart = out;
736     const unsigned char* processed = inb;
737     unsigned char* outend = out + *outlen;
738     unsigned short* in = (unsigned short*) inb;
739     unsigned short* inend;
740     unsigned int c, d, inlen;
741     unsigned char *tmp;
742     int bits;
743 
744     if ((*inlenb % 2) == 1)
745         (*inlenb)--;
746     inlen = *inlenb / 2;
747     inend= in + inlen;
748     while (in < inend) {
749 	if (xmlLittleEndian) {
750 	    tmp = (unsigned char *) in;
751 	    c = *tmp++;
752 	    c = c << 8;
753 	    c = c | (unsigned int) *tmp;
754 	    in++;
755 	} else {
756 	    c= *in++;
757 	}
758         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
759 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
760 		*outlen = out - outstart;
761 		*inlenb = processed - inb;
762 	        return(-2);
763 	    }
764 	    if (xmlLittleEndian) {
765 		tmp = (unsigned char *) in;
766 		d = *tmp++;
767 		d = d << 8;
768 		d = d | (unsigned int) *tmp;
769 		in++;
770 	    } else {
771 		d= *in++;
772 	    }
773             if ((d & 0xFC00) == 0xDC00) {
774                 c &= 0x03FF;
775                 c <<= 10;
776                 c |= d & 0x03FF;
777                 c += 0x10000;
778             }
779             else {
780 		*outlen = out - outstart;
781 		*inlenb = processed - inb;
782 	        return(-2);
783 	    }
784         }
785 
786 	/* assertion: c is a single UTF-4 value */
787         if (out >= outend)
788 	    break;
789         if      (c <    0x80) {  *out++=  c;                bits= -6; }
790         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
791         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
792         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
793 
794         for ( ; bits >= 0; bits-= 6) {
795             if (out >= outend)
796 	        break;
797             *out++= ((c >> bits) & 0x3F) | 0x80;
798         }
799 	processed = (const unsigned char*) in;
800     }
801     *outlen = out - outstart;
802     *inlenb = processed - inb;
803     return(*outlen);
804 }
805 
806 #ifdef LIBXML_OUTPUT_ENABLED
807 /**
808  * UTF8ToUTF16BE:
809  * @outb:  a pointer to an array of bytes to store the result
810  * @outlen:  the length of @outb
811  * @in:  a pointer to an array of UTF-8 chars
812  * @inlen:  the length of @in
813  *
814  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
815  * block of chars out.
816  *
817  * Returns the number of byte written, or -1 by lack of space, or -2
818  *     if the transcoding failed.
819  */
820 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)821 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
822             const unsigned char* in, int *inlen)
823 {
824     unsigned short* out = (unsigned short*) outb;
825     const unsigned char* processed = in;
826     const unsigned char *const instart = in;
827     unsigned short* outstart= out;
828     unsigned short* outend;
829     const unsigned char* inend;
830     unsigned int c, d;
831     int trailing;
832     unsigned char *tmp;
833     unsigned short tmp1, tmp2;
834 
835     /* UTF-16BE has no BOM */
836     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
837     if (in == NULL) {
838 	*outlen = 0;
839 	*inlen = 0;
840 	return(0);
841     }
842     inend= in + *inlen;
843     outend = out + (*outlen / 2);
844     while (in < inend) {
845       d= *in++;
846       if      (d < 0x80)  { c= d; trailing= 0; }
847       else if (d < 0xC0)  {
848           /* trailing byte in leading position */
849 	  *outlen = out - outstart;
850 	  *inlen = processed - instart;
851 	  return(-2);
852       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
853       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
854       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
855       else {
856           /* no chance for this in UTF-16 */
857 	  *outlen = out - outstart;
858 	  *inlen = processed - instart;
859 	  return(-2);
860       }
861 
862       if (inend - in < trailing) {
863           break;
864       }
865 
866       for ( ; trailing; trailing--) {
867           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
868           c <<= 6;
869           c |= d & 0x3F;
870       }
871 
872       /* assertion: c is a single UTF-4 value */
873         if (c < 0x10000) {
874             if (out >= outend)  break;
875 	    if (xmlLittleEndian) {
876 		tmp = (unsigned char *) out;
877 		*tmp = c >> 8;
878 		*(tmp + 1) = c;
879 		out++;
880 	    } else {
881 		*out++ = c;
882 	    }
883         }
884         else if (c < 0x110000) {
885             if (out+1 >= outend)  break;
886             c -= 0x10000;
887 	    if (xmlLittleEndian) {
888 		tmp1 = 0xD800 | (c >> 10);
889 		tmp = (unsigned char *) out;
890 		*tmp = tmp1 >> 8;
891 		*(tmp + 1) = (unsigned char) tmp1;
892 		out++;
893 
894 		tmp2 = 0xDC00 | (c & 0x03FF);
895 		tmp = (unsigned char *) out;
896 		*tmp = tmp2 >> 8;
897 		*(tmp + 1) = (unsigned char) tmp2;
898 		out++;
899 	    } else {
900 		*out++ = 0xD800 | (c >> 10);
901 		*out++ = 0xDC00 | (c & 0x03FF);
902 	    }
903         }
904         else
905 	    break;
906 	processed = in;
907     }
908     *outlen = (out - outstart) * 2;
909     *inlen = processed - instart;
910     return(*outlen);
911 }
912 #endif /* LIBXML_OUTPUT_ENABLED */
913 
914 /************************************************************************
915  *									*
916  *		Generic encoding handling routines			*
917  *									*
918  ************************************************************************/
919 
920 /**
921  * xmlDetectCharEncoding:
922  * @in:  a pointer to the first bytes of the XML entity, must be at least
923  *       2 bytes long (at least 4 if encoding is UTF4 variant).
924  * @len:  pointer to the length of the buffer
925  *
926  * Guess the encoding of the entity using the first bytes of the entity content
927  * according to the non-normative appendix F of the XML-1.0 recommendation.
928  *
929  * Returns one of the XML_CHAR_ENCODING_... values.
930  */
931 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)932 xmlDetectCharEncoding(const unsigned char* in, int len)
933 {
934     if (in == NULL)
935         return(XML_CHAR_ENCODING_NONE);
936     if (len >= 4) {
937 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
938 	    (in[2] == 0x00) && (in[3] == 0x3C))
939 	    return(XML_CHAR_ENCODING_UCS4BE);
940 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
941 	    (in[2] == 0x00) && (in[3] == 0x00))
942 	    return(XML_CHAR_ENCODING_UCS4LE);
943 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
944 	    (in[2] == 0x3C) && (in[3] == 0x00))
945 	    return(XML_CHAR_ENCODING_UCS4_2143);
946 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
947 	    (in[2] == 0x00) && (in[3] == 0x00))
948 	    return(XML_CHAR_ENCODING_UCS4_3412);
949 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
950 	    (in[2] == 0xA7) && (in[3] == 0x94))
951 	    return(XML_CHAR_ENCODING_EBCDIC);
952 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
953 	    (in[2] == 0x78) && (in[3] == 0x6D))
954 	    return(XML_CHAR_ENCODING_UTF8);
955 	/*
956 	 * Although not part of the recommendation, we also
957 	 * attempt an "auto-recognition" of UTF-16LE and
958 	 * UTF-16BE encodings.
959 	 */
960 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
961 	    (in[2] == 0x3F) && (in[3] == 0x00))
962 	    return(XML_CHAR_ENCODING_UTF16LE);
963 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
964 	    (in[2] == 0x00) && (in[3] == 0x3F))
965 	    return(XML_CHAR_ENCODING_UTF16BE);
966     }
967     if (len >= 3) {
968 	/*
969 	 * Errata on XML-1.0 June 20 2001
970 	 * We now allow an UTF8 encoded BOM
971 	 */
972 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
973 	    (in[2] == 0xBF))
974 	    return(XML_CHAR_ENCODING_UTF8);
975     }
976     /* For UTF-16 we can recognize by the BOM */
977     if (len >= 2) {
978 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
979 	    return(XML_CHAR_ENCODING_UTF16BE);
980 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
981 	    return(XML_CHAR_ENCODING_UTF16LE);
982     }
983     return(XML_CHAR_ENCODING_NONE);
984 }
985 
986 /**
987  * xmlCleanupEncodingAliases:
988  *
989  * Unregisters all aliases
990  */
991 void
xmlCleanupEncodingAliases(void)992 xmlCleanupEncodingAliases(void) {
993     int i;
994 
995     if (xmlCharEncodingAliases == NULL)
996 	return;
997 
998     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
999 	if (xmlCharEncodingAliases[i].name != NULL)
1000 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1001 	if (xmlCharEncodingAliases[i].alias != NULL)
1002 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1003     }
1004     xmlCharEncodingAliasesNb = 0;
1005     xmlCharEncodingAliasesMax = 0;
1006     xmlFree(xmlCharEncodingAliases);
1007     xmlCharEncodingAliases = NULL;
1008 }
1009 
1010 /**
1011  * xmlGetEncodingAlias:
1012  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1013  *
1014  * Lookup an encoding name for the given alias.
1015  *
1016  * Returns NULL if not found, otherwise the original name
1017  */
1018 const char *
xmlGetEncodingAlias(const char * alias)1019 xmlGetEncodingAlias(const char *alias) {
1020     int i;
1021     char upper[100];
1022 
1023     if (alias == NULL)
1024 	return(NULL);
1025 
1026     if (xmlCharEncodingAliases == NULL)
1027 	return(NULL);
1028 
1029     for (i = 0;i < 99;i++) {
1030         upper[i] = toupper(alias[i]);
1031 	if (upper[i] == 0) break;
1032     }
1033     upper[i] = 0;
1034 
1035     /*
1036      * Walk down the list looking for a definition of the alias
1037      */
1038     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1039 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1040 	    return(xmlCharEncodingAliases[i].name);
1041 	}
1042     }
1043     return(NULL);
1044 }
1045 
1046 /**
1047  * xmlAddEncodingAlias:
1048  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1049  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1050  *
1051  * Registers an alias @alias for an encoding named @name. Existing alias
1052  * will be overwritten.
1053  *
1054  * Returns 0 in case of success, -1 in case of error
1055  */
1056 int
xmlAddEncodingAlias(const char * name,const char * alias)1057 xmlAddEncodingAlias(const char *name, const char *alias) {
1058     int i;
1059     char upper[100];
1060 
1061     if ((name == NULL) || (alias == NULL))
1062 	return(-1);
1063 
1064     for (i = 0;i < 99;i++) {
1065         upper[i] = toupper(alias[i]);
1066 	if (upper[i] == 0) break;
1067     }
1068     upper[i] = 0;
1069 
1070     if (xmlCharEncodingAliases == NULL) {
1071 	xmlCharEncodingAliasesNb = 0;
1072 	xmlCharEncodingAliasesMax = 20;
1073 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1074 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1075 	if (xmlCharEncodingAliases == NULL)
1076 	    return(-1);
1077     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1078 	xmlCharEncodingAliasesMax *= 2;
1079 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1080 	      xmlRealloc(xmlCharEncodingAliases,
1081 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1082     }
1083     /*
1084      * Walk down the list looking for a definition of the alias
1085      */
1086     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1087 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1088 	    /*
1089 	     * Replace the definition.
1090 	     */
1091 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1092 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1093 	    return(0);
1094 	}
1095     }
1096     /*
1097      * Add the definition
1098      */
1099     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1100     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1101     xmlCharEncodingAliasesNb++;
1102     return(0);
1103 }
1104 
1105 /**
1106  * xmlDelEncodingAlias:
1107  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1108  *
1109  * Unregisters an encoding alias @alias
1110  *
1111  * Returns 0 in case of success, -1 in case of error
1112  */
1113 int
xmlDelEncodingAlias(const char * alias)1114 xmlDelEncodingAlias(const char *alias) {
1115     int i;
1116 
1117     if (alias == NULL)
1118 	return(-1);
1119 
1120     if (xmlCharEncodingAliases == NULL)
1121 	return(-1);
1122     /*
1123      * Walk down the list looking for a definition of the alias
1124      */
1125     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1126 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1127 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1128 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1129 	    xmlCharEncodingAliasesNb--;
1130 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1131 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1132 	    return(0);
1133 	}
1134     }
1135     return(-1);
1136 }
1137 
1138 /**
1139  * xmlParseCharEncoding:
1140  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1141  *
1142  * Compare the string to the encoding schemes already known. Note
1143  * that the comparison is case insensitive accordingly to the section
1144  * [XML] 4.3.3 Character Encoding in Entities.
1145  *
1146  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1147  * if not recognized.
1148  */
1149 xmlCharEncoding
xmlParseCharEncoding(const char * name)1150 xmlParseCharEncoding(const char* name)
1151 {
1152     const char *alias;
1153     char upper[500];
1154     int i;
1155 
1156     if (name == NULL)
1157 	return(XML_CHAR_ENCODING_NONE);
1158 
1159     /*
1160      * Do the alias resolution
1161      */
1162     alias = xmlGetEncodingAlias(name);
1163     if (alias != NULL)
1164 	name = alias;
1165 
1166     for (i = 0;i < 499;i++) {
1167         upper[i] = toupper(name[i]);
1168 	if (upper[i] == 0) break;
1169     }
1170     upper[i] = 0;
1171 
1172     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1173     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1174     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1175 
1176     /*
1177      * NOTE: if we were able to parse this, the endianness of UTF16 is
1178      *       already found and in use
1179      */
1180     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1181     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1182 
1183     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1184     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1185     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1186 
1187     /*
1188      * NOTE: if we were able to parse this, the endianness of UCS4 is
1189      *       already found and in use
1190      */
1191     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1192     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1193     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1194 
1195 
1196     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1197     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1198     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1199 
1200     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1201     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1202     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1203 
1204     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1205     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1206     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1207     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1208     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1209     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1210     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1211 
1212     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1213     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1214     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1215 
1216 #ifdef DEBUG_ENCODING
1217     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1218 #endif
1219     return(XML_CHAR_ENCODING_ERROR);
1220 }
1221 
1222 /**
1223  * xmlGetCharEncodingName:
1224  * @enc:  the encoding
1225  *
1226  * The "canonical" name for XML encoding.
1227  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1228  * Section 4.3.3  Character Encoding in Entities
1229  *
1230  * Returns the canonical name for the given encoding
1231  */
1232 
1233 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1234 xmlGetCharEncodingName(xmlCharEncoding enc) {
1235     switch (enc) {
1236         case XML_CHAR_ENCODING_ERROR:
1237 	    return(NULL);
1238         case XML_CHAR_ENCODING_NONE:
1239 	    return(NULL);
1240         case XML_CHAR_ENCODING_UTF8:
1241 	    return("UTF-8");
1242         case XML_CHAR_ENCODING_UTF16LE:
1243 	    return("UTF-16");
1244         case XML_CHAR_ENCODING_UTF16BE:
1245 	    return("UTF-16");
1246         case XML_CHAR_ENCODING_EBCDIC:
1247             return("EBCDIC");
1248         case XML_CHAR_ENCODING_UCS4LE:
1249             return("ISO-10646-UCS-4");
1250         case XML_CHAR_ENCODING_UCS4BE:
1251             return("ISO-10646-UCS-4");
1252         case XML_CHAR_ENCODING_UCS4_2143:
1253             return("ISO-10646-UCS-4");
1254         case XML_CHAR_ENCODING_UCS4_3412:
1255             return("ISO-10646-UCS-4");
1256         case XML_CHAR_ENCODING_UCS2:
1257             return("ISO-10646-UCS-2");
1258         case XML_CHAR_ENCODING_8859_1:
1259 	    return("ISO-8859-1");
1260         case XML_CHAR_ENCODING_8859_2:
1261 	    return("ISO-8859-2");
1262         case XML_CHAR_ENCODING_8859_3:
1263 	    return("ISO-8859-3");
1264         case XML_CHAR_ENCODING_8859_4:
1265 	    return("ISO-8859-4");
1266         case XML_CHAR_ENCODING_8859_5:
1267 	    return("ISO-8859-5");
1268         case XML_CHAR_ENCODING_8859_6:
1269 	    return("ISO-8859-6");
1270         case XML_CHAR_ENCODING_8859_7:
1271 	    return("ISO-8859-7");
1272         case XML_CHAR_ENCODING_8859_8:
1273 	    return("ISO-8859-8");
1274         case XML_CHAR_ENCODING_8859_9:
1275 	    return("ISO-8859-9");
1276         case XML_CHAR_ENCODING_2022_JP:
1277             return("ISO-2022-JP");
1278         case XML_CHAR_ENCODING_SHIFT_JIS:
1279             return("Shift-JIS");
1280         case XML_CHAR_ENCODING_EUC_JP:
1281             return("EUC-JP");
1282 	case XML_CHAR_ENCODING_ASCII:
1283 	    return(NULL);
1284     }
1285     return(NULL);
1286 }
1287 
1288 /************************************************************************
1289  *									*
1290  *			Char encoding handlers				*
1291  *									*
1292  ************************************************************************/
1293 
1294 
1295 /* the size should be growable, but it's not a big deal ... */
1296 #define MAX_ENCODING_HANDLERS 50
1297 static xmlCharEncodingHandlerPtr *handlers = NULL;
1298 static int nbCharEncodingHandler = 0;
1299 
1300 /*
1301  * The default is UTF-8 for XML, that's also the default used for the
1302  * parser internals, so the default encoding handler is NULL
1303  */
1304 
1305 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1306 
1307 /**
1308  * xmlNewCharEncodingHandler:
1309  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1310  * @input:  the xmlCharEncodingInputFunc to read that encoding
1311  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1312  *
1313  * Create and registers an xmlCharEncodingHandler.
1314  *
1315  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1316  */
1317 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1318 xmlNewCharEncodingHandler(const char *name,
1319                           xmlCharEncodingInputFunc input,
1320                           xmlCharEncodingOutputFunc output) {
1321     xmlCharEncodingHandlerPtr handler;
1322     const char *alias;
1323     char upper[500];
1324     int i;
1325     char *up = NULL;
1326 
1327     /*
1328      * Do the alias resolution
1329      */
1330     alias = xmlGetEncodingAlias(name);
1331     if (alias != NULL)
1332 	name = alias;
1333 
1334     /*
1335      * Keep only the uppercase version of the encoding.
1336      */
1337     if (name == NULL) {
1338         xmlEncodingErr(XML_I18N_NO_NAME,
1339 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1340 	return(NULL);
1341     }
1342     for (i = 0;i < 499;i++) {
1343         upper[i] = toupper(name[i]);
1344 	if (upper[i] == 0) break;
1345     }
1346     upper[i] = 0;
1347     up = xmlMemStrdup(upper);
1348     if (up == NULL) {
1349         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1350 	return(NULL);
1351     }
1352 
1353     /*
1354      * allocate and fill-up an handler block.
1355      */
1356     handler = (xmlCharEncodingHandlerPtr)
1357               xmlMalloc(sizeof(xmlCharEncodingHandler));
1358     if (handler == NULL) {
1359         xmlFree(up);
1360         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1361 	return(NULL);
1362     }
1363     memset(handler, 0, sizeof(xmlCharEncodingHandler));
1364     handler->input = input;
1365     handler->output = output;
1366     handler->name = up;
1367 
1368 #ifdef LIBXML_ICONV_ENABLED
1369     handler->iconv_in = NULL;
1370     handler->iconv_out = NULL;
1371 #endif
1372 #ifdef LIBXML_ICU_ENABLED
1373     handler->uconv_in = NULL;
1374     handler->uconv_out = NULL;
1375 #endif
1376 
1377     /*
1378      * registers and returns the handler.
1379      */
1380     xmlRegisterCharEncodingHandler(handler);
1381 #ifdef DEBUG_ENCODING
1382     xmlGenericError(xmlGenericErrorContext,
1383 	    "Registered encoding handler for %s\n", name);
1384 #endif
1385     return(handler);
1386 }
1387 
1388 /**
1389  * xmlInitCharEncodingHandlers:
1390  *
1391  * Initialize the char encoding support, it registers the default
1392  * encoding supported.
1393  * NOTE: while public, this function usually doesn't need to be called
1394  *       in normal processing.
1395  */
1396 void
xmlInitCharEncodingHandlers(void)1397 xmlInitCharEncodingHandlers(void) {
1398     unsigned short int tst = 0x1234;
1399     unsigned char *ptr = (unsigned char *) &tst;
1400 
1401     if (handlers != NULL) return;
1402 
1403     handlers = (xmlCharEncodingHandlerPtr *)
1404         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1405 
1406     if (*ptr == 0x12) xmlLittleEndian = 0;
1407     else if (*ptr == 0x34) xmlLittleEndian = 1;
1408     else {
1409         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1410 	               "Odd problem at endianness detection\n", NULL);
1411     }
1412 
1413     if (handlers == NULL) {
1414         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1415 	return;
1416     }
1417     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1418 #ifdef LIBXML_OUTPUT_ENABLED
1419     xmlUTF16LEHandler =
1420           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1421     xmlUTF16BEHandler =
1422           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1423     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1424     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1425     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1426     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1427 #ifdef LIBXML_HTML_ENABLED
1428     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1429 #endif
1430 #else
1431     xmlUTF16LEHandler =
1432           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1433     xmlUTF16BEHandler =
1434           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1435     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1436     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1437     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1438     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1439 #endif /* LIBXML_OUTPUT_ENABLED */
1440 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1441 #ifdef LIBXML_ISO8859X_ENABLED
1442     xmlRegisterCharEncodingHandlersISO8859x ();
1443 #endif
1444 #endif
1445 
1446 }
1447 
1448 /**
1449  * xmlCleanupCharEncodingHandlers:
1450  *
1451  * Cleanup the memory allocated for the char encoding support, it
1452  * unregisters all the encoding handlers and the aliases.
1453  */
1454 void
xmlCleanupCharEncodingHandlers(void)1455 xmlCleanupCharEncodingHandlers(void) {
1456     xmlCleanupEncodingAliases();
1457 
1458     if (handlers == NULL) return;
1459 
1460     for (;nbCharEncodingHandler > 0;) {
1461         nbCharEncodingHandler--;
1462 	if (handlers[nbCharEncodingHandler] != NULL) {
1463 	    if (handlers[nbCharEncodingHandler]->name != NULL)
1464 		xmlFree(handlers[nbCharEncodingHandler]->name);
1465 	    xmlFree(handlers[nbCharEncodingHandler]);
1466 	}
1467     }
1468     xmlFree(handlers);
1469     handlers = NULL;
1470     nbCharEncodingHandler = 0;
1471     xmlDefaultCharEncodingHandler = NULL;
1472 }
1473 
1474 /**
1475  * xmlRegisterCharEncodingHandler:
1476  * @handler:  the xmlCharEncodingHandlerPtr handler block
1477  *
1478  * Register the char encoding handler, surprising, isn't it ?
1479  */
1480 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1481 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1482     if (handlers == NULL) xmlInitCharEncodingHandlers();
1483     if ((handler == NULL) || (handlers == NULL)) {
1484         xmlEncodingErr(XML_I18N_NO_HANDLER,
1485 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1486 	return;
1487     }
1488 
1489     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1490         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1491 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1492 	               "MAX_ENCODING_HANDLERS");
1493 	return;
1494     }
1495     handlers[nbCharEncodingHandler++] = handler;
1496 }
1497 
1498 /**
1499  * xmlGetCharEncodingHandler:
1500  * @enc:  an xmlCharEncoding value.
1501  *
1502  * Search in the registered set the handler able to read/write that encoding.
1503  *
1504  * Returns the handler or NULL if not found
1505  */
1506 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1507 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1508     xmlCharEncodingHandlerPtr handler;
1509 
1510     if (handlers == NULL) xmlInitCharEncodingHandlers();
1511     switch (enc) {
1512         case XML_CHAR_ENCODING_ERROR:
1513 	    return(NULL);
1514         case XML_CHAR_ENCODING_NONE:
1515 	    return(NULL);
1516         case XML_CHAR_ENCODING_UTF8:
1517 	    return(NULL);
1518         case XML_CHAR_ENCODING_UTF16LE:
1519 	    return(xmlUTF16LEHandler);
1520         case XML_CHAR_ENCODING_UTF16BE:
1521 	    return(xmlUTF16BEHandler);
1522         case XML_CHAR_ENCODING_EBCDIC:
1523             handler = xmlFindCharEncodingHandler("EBCDIC");
1524             if (handler != NULL) return(handler);
1525             handler = xmlFindCharEncodingHandler("ebcdic");
1526             if (handler != NULL) return(handler);
1527             handler = xmlFindCharEncodingHandler("EBCDIC-US");
1528             if (handler != NULL) return(handler);
1529             handler = xmlFindCharEncodingHandler("IBM-037");
1530             if (handler != NULL) return(handler);
1531 	    break;
1532         case XML_CHAR_ENCODING_UCS4BE:
1533             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1534             if (handler != NULL) return(handler);
1535             handler = xmlFindCharEncodingHandler("UCS-4");
1536             if (handler != NULL) return(handler);
1537             handler = xmlFindCharEncodingHandler("UCS4");
1538             if (handler != NULL) return(handler);
1539 	    break;
1540         case XML_CHAR_ENCODING_UCS4LE:
1541             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1542             if (handler != NULL) return(handler);
1543             handler = xmlFindCharEncodingHandler("UCS-4");
1544             if (handler != NULL) return(handler);
1545             handler = xmlFindCharEncodingHandler("UCS4");
1546             if (handler != NULL) return(handler);
1547 	    break;
1548         case XML_CHAR_ENCODING_UCS4_2143:
1549 	    break;
1550         case XML_CHAR_ENCODING_UCS4_3412:
1551 	    break;
1552         case XML_CHAR_ENCODING_UCS2:
1553             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1554             if (handler != NULL) return(handler);
1555             handler = xmlFindCharEncodingHandler("UCS-2");
1556             if (handler != NULL) return(handler);
1557             handler = xmlFindCharEncodingHandler("UCS2");
1558             if (handler != NULL) return(handler);
1559 	    break;
1560 
1561 	    /*
1562 	     * We used to keep ISO Latin encodings native in the
1563 	     * generated data. This led to so many problems that
1564 	     * this has been removed. One can still change this
1565 	     * back by registering no-ops encoders for those
1566 	     */
1567         case XML_CHAR_ENCODING_8859_1:
1568 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1569 	    if (handler != NULL) return(handler);
1570 	    break;
1571         case XML_CHAR_ENCODING_8859_2:
1572 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1573 	    if (handler != NULL) return(handler);
1574 	    break;
1575         case XML_CHAR_ENCODING_8859_3:
1576 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1577 	    if (handler != NULL) return(handler);
1578 	    break;
1579         case XML_CHAR_ENCODING_8859_4:
1580 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1581 	    if (handler != NULL) return(handler);
1582 	    break;
1583         case XML_CHAR_ENCODING_8859_5:
1584 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1585 	    if (handler != NULL) return(handler);
1586 	    break;
1587         case XML_CHAR_ENCODING_8859_6:
1588 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1589 	    if (handler != NULL) return(handler);
1590 	    break;
1591         case XML_CHAR_ENCODING_8859_7:
1592 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1593 	    if (handler != NULL) return(handler);
1594 	    break;
1595         case XML_CHAR_ENCODING_8859_8:
1596 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1597 	    if (handler != NULL) return(handler);
1598 	    break;
1599         case XML_CHAR_ENCODING_8859_9:
1600 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1601 	    if (handler != NULL) return(handler);
1602 	    break;
1603 
1604 
1605         case XML_CHAR_ENCODING_2022_JP:
1606             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1607             if (handler != NULL) return(handler);
1608 	    break;
1609         case XML_CHAR_ENCODING_SHIFT_JIS:
1610             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1611             if (handler != NULL) return(handler);
1612             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1613             if (handler != NULL) return(handler);
1614             handler = xmlFindCharEncodingHandler("Shift_JIS");
1615             if (handler != NULL) return(handler);
1616 	    break;
1617         case XML_CHAR_ENCODING_EUC_JP:
1618             handler = xmlFindCharEncodingHandler("EUC-JP");
1619             if (handler != NULL) return(handler);
1620 	    break;
1621 	default:
1622 	    break;
1623     }
1624 
1625 #ifdef DEBUG_ENCODING
1626     xmlGenericError(xmlGenericErrorContext,
1627 	    "No handler found for encoding %d\n", enc);
1628 #endif
1629     return(NULL);
1630 }
1631 
1632 /**
1633  * xmlFindCharEncodingHandler:
1634  * @name:  a string describing the char encoding.
1635  *
1636  * Search in the registered set the handler able to read/write that encoding.
1637  *
1638  * Returns the handler or NULL if not found
1639  */
1640 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1641 xmlFindCharEncodingHandler(const char *name) {
1642     const char *nalias;
1643     const char *norig;
1644     xmlCharEncoding alias;
1645 #ifdef LIBXML_ICONV_ENABLED
1646     xmlCharEncodingHandlerPtr enc;
1647     iconv_t icv_in, icv_out;
1648 #endif /* LIBXML_ICONV_ENABLED */
1649 #ifdef LIBXML_ICU_ENABLED
1650     xmlCharEncodingHandlerPtr encu;
1651     uconv_t *ucv_in, *ucv_out;
1652 #endif /* LIBXML_ICU_ENABLED */
1653     char upper[100];
1654     int i;
1655 
1656     if (handlers == NULL) xmlInitCharEncodingHandlers();
1657     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1658     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1659 
1660     /*
1661      * Do the alias resolution
1662      */
1663     norig = name;
1664     nalias = xmlGetEncodingAlias(name);
1665     if (nalias != NULL)
1666 	name = nalias;
1667 
1668     /*
1669      * Check first for directly registered encoding names
1670      */
1671     for (i = 0;i < 99;i++) {
1672         upper[i] = toupper(name[i]);
1673 	if (upper[i] == 0) break;
1674     }
1675     upper[i] = 0;
1676 
1677     if (handlers != NULL) {
1678         for (i = 0;i < nbCharEncodingHandler; i++) {
1679             if (!strcmp(upper, handlers[i]->name)) {
1680 #ifdef DEBUG_ENCODING
1681                 xmlGenericError(xmlGenericErrorContext,
1682                         "Found registered handler for encoding %s\n", name);
1683 #endif
1684                 return(handlers[i]);
1685             }
1686         }
1687     }
1688 
1689 #ifdef LIBXML_ICONV_ENABLED
1690     /* check whether iconv can handle this */
1691     icv_in = iconv_open("UTF-8", name);
1692     icv_out = iconv_open(name, "UTF-8");
1693     if (icv_in == (iconv_t) -1) {
1694         icv_in = iconv_open("UTF-8", upper);
1695     }
1696     if (icv_out == (iconv_t) -1) {
1697 	icv_out = iconv_open(upper, "UTF-8");
1698     }
1699     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1700 	    enc = (xmlCharEncodingHandlerPtr)
1701 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1702 	    if (enc == NULL) {
1703 	        iconv_close(icv_in);
1704 	        iconv_close(icv_out);
1705 		return(NULL);
1706 	    }
1707             memset(enc, 0, sizeof(xmlCharEncodingHandler));
1708 	    enc->name = xmlMemStrdup(name);
1709 	    enc->input = NULL;
1710 	    enc->output = NULL;
1711 	    enc->iconv_in = icv_in;
1712 	    enc->iconv_out = icv_out;
1713 #ifdef DEBUG_ENCODING
1714             xmlGenericError(xmlGenericErrorContext,
1715 		    "Found iconv handler for encoding %s\n", name);
1716 #endif
1717 	    return enc;
1718     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1719 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1720 		    "iconv : problems with filters for '%s'\n", name);
1721     }
1722 #endif /* LIBXML_ICONV_ENABLED */
1723 #ifdef LIBXML_ICU_ENABLED
1724     /* check whether icu can handle this */
1725     ucv_in = openIcuConverter(name, 1);
1726     ucv_out = openIcuConverter(name, 0);
1727     if (ucv_in != NULL && ucv_out != NULL) {
1728 	    encu = (xmlCharEncodingHandlerPtr)
1729 	           xmlMalloc(sizeof(xmlCharEncodingHandler));
1730 	    if (encu == NULL) {
1731                 closeIcuConverter(ucv_in);
1732                 closeIcuConverter(ucv_out);
1733 		return(NULL);
1734 	    }
1735             memset(encu, 0, sizeof(xmlCharEncodingHandler));
1736 	    encu->name = xmlMemStrdup(name);
1737 	    encu->input = NULL;
1738 	    encu->output = NULL;
1739 	    encu->uconv_in = ucv_in;
1740 	    encu->uconv_out = ucv_out;
1741 #ifdef DEBUG_ENCODING
1742             xmlGenericError(xmlGenericErrorContext,
1743 		    "Found ICU converter handler for encoding %s\n", name);
1744 #endif
1745 	    return encu;
1746     } else if (ucv_in != NULL || ucv_out != NULL) {
1747             closeIcuConverter(ucv_in);
1748             closeIcuConverter(ucv_out);
1749 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1750 		    "ICU converter : problems with filters for '%s'\n", name);
1751     }
1752 #endif /* LIBXML_ICU_ENABLED */
1753 
1754 #ifdef DEBUG_ENCODING
1755     xmlGenericError(xmlGenericErrorContext,
1756 	    "No handler found for encoding %s\n", name);
1757 #endif
1758 
1759     /*
1760      * Fallback using the canonical names
1761      */
1762     alias = xmlParseCharEncoding(norig);
1763     if (alias != XML_CHAR_ENCODING_ERROR) {
1764         const char* canon;
1765         canon = xmlGetCharEncodingName(alias);
1766         if ((canon != NULL) && (strcmp(name, canon))) {
1767 	    return(xmlFindCharEncodingHandler(canon));
1768         }
1769     }
1770 
1771     /* If "none of the above", give up */
1772     return(NULL);
1773 }
1774 
1775 /************************************************************************
1776  *									*
1777  *		ICONV based generic conversion functions		*
1778  *									*
1779  ************************************************************************/
1780 
1781 #ifdef LIBXML_ICONV_ENABLED
1782 /**
1783  * xmlIconvWrapper:
1784  * @cd:		iconv converter data structure
1785  * @out:  a pointer to an array of bytes to store the result
1786  * @outlen:  the length of @out
1787  * @in:  a pointer to an array of ISO Latin 1 chars
1788  * @inlen:  the length of @in
1789  *
1790  * Returns 0 if success, or
1791  *     -1 by lack of space, or
1792  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1793  *        the result of transformation can't fit into the encoding we want), or
1794  *     -3 if there the last byte can't form a single output char.
1795  *
1796  * The value of @inlen after return is the number of octets consumed
1797  *     as the return value is positive, else unpredictable.
1798  * The value of @outlen after return is the number of ocetes consumed.
1799  */
1800 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1801 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1802                 const unsigned char *in, int *inlen) {
1803     size_t icv_inlen, icv_outlen;
1804     const char *icv_in = (const char *) in;
1805     char *icv_out = (char *) out;
1806     int ret;
1807 
1808     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1809         if (outlen != NULL) *outlen = 0;
1810         return(-1);
1811     }
1812     icv_inlen = *inlen;
1813     icv_outlen = *outlen;
1814     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1815     *inlen -= icv_inlen;
1816     *outlen -= icv_outlen;
1817     if ((icv_inlen != 0) || (ret == -1)) {
1818 #ifdef EILSEQ
1819         if (errno == EILSEQ) {
1820             return -2;
1821         } else
1822 #endif
1823 #ifdef E2BIG
1824         if (errno == E2BIG) {
1825             return -1;
1826         } else
1827 #endif
1828 #ifdef EINVAL
1829         if (errno == EINVAL) {
1830             return -3;
1831         } else
1832 #endif
1833         {
1834             return -3;
1835         }
1836     }
1837     return 0;
1838 }
1839 #endif /* LIBXML_ICONV_ENABLED */
1840 
1841 /************************************************************************
1842  *									*
1843  *		ICU based generic conversion functions		*
1844  *									*
1845  ************************************************************************/
1846 
1847 #ifdef LIBXML_ICU_ENABLED
1848 /**
1849  * xmlUconvWrapper:
1850  * @cd: ICU uconverter data structure
1851  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1852  * @out:  a pointer to an array of bytes to store the result
1853  * @outlen:  the length of @out
1854  * @in:  a pointer to an array of ISO Latin 1 chars
1855  * @inlen:  the length of @in
1856  * @flush: if true, indicates end of input
1857  *
1858  * Returns 0 if success, or
1859  *     -1 by lack of space, or
1860  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1861  *        the result of transformation can't fit into the encoding we want), or
1862  *     -3 if there the last byte can't form a single output char.
1863  *
1864  * The value of @inlen after return is the number of octets consumed
1865  *     as the return value is positive, else unpredictable.
1866  * The value of @outlen after return is the number of ocetes consumed.
1867  */
1868 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1869 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1870                 const unsigned char *in, int *inlen, int flush) {
1871     const char *ucv_in = (const char *) in;
1872     char *ucv_out = (char *) out;
1873     UErrorCode err = U_ZERO_ERROR;
1874 
1875     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1876         if (outlen != NULL) *outlen = 0;
1877         return(-1);
1878     }
1879 
1880     if (toUnicode) {
1881         /* encoding => UTF-16 => UTF-8 */
1882         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1883                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1884                        &cd->pivot_source, &cd->pivot_target,
1885                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1886     } else {
1887         /* UTF-8 => UTF-16 => encoding */
1888         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1889                        &ucv_in, ucv_in + *inlen, cd->pivot_buf,
1890                        &cd->pivot_source, &cd->pivot_target,
1891                        cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, flush, &err);
1892     }
1893     *inlen = ucv_in - (const char*) in;
1894     *outlen = ucv_out - (char *) out;
1895     if (U_SUCCESS(err)) {
1896         /* reset pivot buf if this is the last call for input (flush==TRUE) */
1897         if (flush)
1898             cd->pivot_source = cd->pivot_target = cd->pivot_buf;
1899         return 0;
1900     }
1901     if (err == U_BUFFER_OVERFLOW_ERROR)
1902         return -1;
1903     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1904         return -2;
1905     return -3;
1906 }
1907 #endif /* LIBXML_ICU_ENABLED */
1908 
1909 /************************************************************************
1910  *									*
1911  *		The real API used by libxml for on-the-fly conversion	*
1912  *									*
1913  ************************************************************************/
1914 
1915 static int
xmlEncInputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen,int flush)1916 xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1917                  int *outlen, const unsigned char *in, int *inlen, int flush) {
1918     int ret;
1919     (void)flush;
1920 
1921     if (handler->input != NULL) {
1922         ret = handler->input(out, outlen, in, inlen);
1923     }
1924 #ifdef LIBXML_ICONV_ENABLED
1925     else if (handler->iconv_in != NULL) {
1926         ret = xmlIconvWrapper(handler->iconv_in, out, outlen, in, inlen);
1927     }
1928 #endif /* LIBXML_ICONV_ENABLED */
1929 #ifdef LIBXML_ICU_ENABLED
1930     else if (handler->uconv_in != NULL) {
1931         ret = xmlUconvWrapper(handler->uconv_in, 1, out, outlen, in, inlen,
1932                               flush);
1933     }
1934 #endif /* LIBXML_ICU_ENABLED */
1935     else {
1936         *outlen = 0;
1937         *inlen = 0;
1938         ret = -2;
1939     }
1940 
1941     return(ret);
1942 }
1943 
1944 /* Returns -4 if no output function was found. */
1945 static int
xmlEncOutputChunk(xmlCharEncodingHandler * handler,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1946 xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
1947                   int *outlen, const unsigned char *in, int *inlen) {
1948     int ret;
1949 
1950     if (handler->output != NULL) {
1951         ret = handler->output(out, outlen, in, inlen);
1952     }
1953 #ifdef LIBXML_ICONV_ENABLED
1954     else if (handler->iconv_out != NULL) {
1955         ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
1956     }
1957 #endif /* LIBXML_ICONV_ENABLED */
1958 #ifdef LIBXML_ICU_ENABLED
1959     else if (handler->uconv_out != NULL) {
1960         ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
1961                               TRUE);
1962     }
1963 #endif /* LIBXML_ICU_ENABLED */
1964     else {
1965         *outlen = 0;
1966         *inlen = 0;
1967         ret = -4;
1968     }
1969 
1970     return(ret);
1971 }
1972 
1973 /**
1974  * xmlCharEncFirstLineInt:
1975  * @handler:	char enconding transformation data structure
1976  * @out:  an xmlBuffer for the output.
1977  * @in:  an xmlBuffer for the input
1978  * @len:  number of bytes to convert for the first line, or -1
1979  *
1980  * Front-end for the encoding handler input function, but handle only
1981  * the very first line, i.e. limit itself to 45 chars.
1982  *
1983  * Returns the number of byte written if success, or
1984  *     -1 general error
1985  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1986  *        the result of transformation can't fit into the encoding we want), or
1987  */
1988 int
xmlCharEncFirstLineInt(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in,int len)1989 xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1990                        xmlBufferPtr in, int len) {
1991     int ret;
1992     int written;
1993     int toconv;
1994 
1995     if (handler == NULL) return(-1);
1996     if (out == NULL) return(-1);
1997     if (in == NULL) return(-1);
1998 
1999     /* calculate space available */
2000     written = out->size - out->use - 1; /* count '\0' */
2001     toconv = in->use;
2002     /*
2003      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2004      * 45 chars should be sufficient to reach the end of the encoding
2005      * declaration without going too far inside the document content.
2006      * on UTF-16 this means 90bytes, on UCS4 this means 180
2007      * The actual value depending on guessed encoding is passed as @len
2008      * if provided
2009      */
2010     if (len >= 0) {
2011         if (toconv > len)
2012             toconv = len;
2013     } else {
2014         if (toconv > 180)
2015             toconv = 180;
2016     }
2017     if (toconv * 2 >= written) {
2018         xmlBufferGrow(out, toconv * 2);
2019 	written = out->size - out->use - 1;
2020     }
2021 
2022     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2023                            in->content, &toconv, 0);
2024     xmlBufferShrink(in, toconv);
2025     out->use += written;
2026     out->content[out->use] = 0;
2027     if (ret == -1) ret = -3;
2028 
2029 #ifdef DEBUG_ENCODING
2030     switch (ret) {
2031         case 0:
2032 	    xmlGenericError(xmlGenericErrorContext,
2033 		    "converted %d bytes to %d bytes of input\n",
2034 	            toconv, written);
2035 	    break;
2036         case -1:
2037 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2038 	            toconv, written, in->use);
2039 	    break;
2040         case -2:
2041 	    xmlGenericError(xmlGenericErrorContext,
2042 		    "input conversion failed due to input error\n");
2043 	    break;
2044         case -3:
2045 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
2046 	            toconv, written, in->use);
2047 	    break;
2048 	default:
2049 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
2050     }
2051 #endif /* DEBUG_ENCODING */
2052     /*
2053      * Ignore when input buffer is not on a boundary
2054      */
2055     if (ret == -3) ret = 0;
2056     if (ret == -1) ret = 0;
2057     return(ret);
2058 }
2059 
2060 /**
2061  * xmlCharEncFirstLine:
2062  * @handler:	char enconding transformation data structure
2063  * @out:  an xmlBuffer for the output.
2064  * @in:  an xmlBuffer for the input
2065  *
2066  * Front-end for the encoding handler input function, but handle only
2067  * the very first line, i.e. limit itself to 45 chars.
2068  *
2069  * Returns the number of byte written if success, or
2070  *     -1 general error
2071  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2072  *        the result of transformation can't fit into the encoding we want), or
2073  */
2074 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2075 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2076                  xmlBufferPtr in) {
2077     return(xmlCharEncFirstLineInt(handler, out, in, -1));
2078 }
2079 
2080 /**
2081  * xmlCharEncFirstLineInput:
2082  * @input: a parser input buffer
2083  * @len:  number of bytes to convert for the first line, or -1
2084  *
2085  * Front-end for the encoding handler input function, but handle only
2086  * the very first line. Point is that this is based on autodetection
2087  * of the encoding and once that first line is converted we may find
2088  * out that a different decoder is needed to process the input.
2089  *
2090  * Returns the number of byte written if success, or
2091  *     -1 general error
2092  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2093  *        the result of transformation can't fit into the encoding we want), or
2094  */
2095 int
xmlCharEncFirstLineInput(xmlParserInputBufferPtr input,int len)2096 xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
2097 {
2098     int ret;
2099     size_t written;
2100     size_t toconv;
2101     int c_in;
2102     int c_out;
2103     xmlBufPtr in;
2104     xmlBufPtr out;
2105 
2106     if ((input == NULL) || (input->encoder == NULL) ||
2107         (input->buffer == NULL) || (input->raw == NULL))
2108         return (-1);
2109     out = input->buffer;
2110     in = input->raw;
2111 
2112     toconv = xmlBufUse(in);
2113     if (toconv == 0)
2114         return (0);
2115     written = xmlBufAvail(out) - 1; /* count '\0' */
2116     /*
2117      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
2118      * 45 chars should be sufficient to reach the end of the encoding
2119      * declaration without going too far inside the document content.
2120      * on UTF-16 this means 90bytes, on UCS4 this means 180
2121      * The actual value depending on guessed encoding is passed as @len
2122      * if provided
2123      */
2124     if (len >= 0) {
2125         if (toconv > (unsigned int) len)
2126             toconv = len;
2127     } else {
2128         if (toconv > 180)
2129             toconv = 180;
2130     }
2131     if (toconv * 2 >= written) {
2132         xmlBufGrow(out, toconv * 2);
2133         written = xmlBufAvail(out) - 1;
2134     }
2135     if (written > 360)
2136         written = 360;
2137 
2138     c_in = toconv;
2139     c_out = written;
2140     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2141                            xmlBufContent(in), &c_in, 0);
2142     xmlBufShrink(in, c_in);
2143     xmlBufAddLen(out, c_out);
2144     if (ret == -1)
2145         ret = -3;
2146 
2147     switch (ret) {
2148         case 0:
2149 #ifdef DEBUG_ENCODING
2150             xmlGenericError(xmlGenericErrorContext,
2151                             "converted %d bytes to %d bytes of input\n",
2152                             c_in, c_out);
2153 #endif
2154             break;
2155         case -1:
2156 #ifdef DEBUG_ENCODING
2157             xmlGenericError(xmlGenericErrorContext,
2158                          "converted %d bytes to %d bytes of input, %d left\n",
2159                             c_in, c_out, (int)xmlBufUse(in));
2160 #endif
2161             break;
2162         case -3:
2163 #ifdef DEBUG_ENCODING
2164             xmlGenericError(xmlGenericErrorContext,
2165                         "converted %d bytes to %d bytes of input, %d left\n",
2166                             c_in, c_out, (int)xmlBufUse(in));
2167 #endif
2168             break;
2169         case -2: {
2170             char buf[50];
2171             const xmlChar *content = xmlBufContent(in);
2172 
2173 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2174 		     content[0], content[1],
2175 		     content[2], content[3]);
2176 	    buf[49] = 0;
2177 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2178 		    "input conversion failed due to input error, bytes %s\n",
2179 		           buf);
2180         }
2181     }
2182     /*
2183      * Ignore when input buffer is not on a boundary
2184      */
2185     if (ret == -3) ret = 0;
2186     if (ret == -1) ret = 0;
2187     return(ret);
2188 }
2189 
2190 /**
2191  * xmlCharEncInput:
2192  * @input: a parser input buffer
2193  * @flush: try to flush all the raw buffer
2194  *
2195  * Generic front-end for the encoding handler on parser input
2196  *
2197  * Returns the number of byte written if success, or
2198  *     -1 general error
2199  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2200  *        the result of transformation can't fit into the encoding we want), or
2201  */
2202 int
xmlCharEncInput(xmlParserInputBufferPtr input,int flush)2203 xmlCharEncInput(xmlParserInputBufferPtr input, int flush)
2204 {
2205     int ret;
2206     size_t written;
2207     size_t toconv;
2208     int c_in;
2209     int c_out;
2210     xmlBufPtr in;
2211     xmlBufPtr out;
2212 
2213     if ((input == NULL) || (input->encoder == NULL) ||
2214         (input->buffer == NULL) || (input->raw == NULL))
2215         return (-1);
2216     out = input->buffer;
2217     in = input->raw;
2218 
2219     toconv = xmlBufUse(in);
2220     if (toconv == 0)
2221         return (0);
2222     if ((toconv > 64 * 1024) && (flush == 0))
2223         toconv = 64 * 1024;
2224     written = xmlBufAvail(out);
2225     if (written > 0)
2226         written--; /* count '\0' */
2227     if (toconv * 2 >= written) {
2228         xmlBufGrow(out, toconv * 2);
2229         written = xmlBufAvail(out);
2230         if (written > 0)
2231             written--; /* count '\0' */
2232     }
2233     if ((written > 128 * 1024) && (flush == 0))
2234         written = 128 * 1024;
2235 
2236     c_in = toconv;
2237     c_out = written;
2238     ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
2239                            xmlBufContent(in), &c_in, flush);
2240     xmlBufShrink(in, c_in);
2241     xmlBufAddLen(out, c_out);
2242     if (ret == -1)
2243         ret = -3;
2244 
2245     switch (ret) {
2246         case 0:
2247 #ifdef DEBUG_ENCODING
2248             xmlGenericError(xmlGenericErrorContext,
2249                             "converted %d bytes to %d bytes of input\n",
2250                             c_in, c_out);
2251 #endif
2252             break;
2253         case -1:
2254 #ifdef DEBUG_ENCODING
2255             xmlGenericError(xmlGenericErrorContext,
2256                          "converted %d bytes to %d bytes of input, %d left\n",
2257                             c_in, c_out, (int)xmlBufUse(in));
2258 #endif
2259             break;
2260         case -3:
2261 #ifdef DEBUG_ENCODING
2262             xmlGenericError(xmlGenericErrorContext,
2263                         "converted %d bytes to %d bytes of input, %d left\n",
2264                             c_in, c_out, (int)xmlBufUse(in));
2265 #endif
2266             break;
2267         case -2: {
2268             char buf[50];
2269             const xmlChar *content = xmlBufContent(in);
2270 
2271 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2272 		     content[0], content[1],
2273 		     content[2], content[3]);
2274 	    buf[49] = 0;
2275 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2276 		    "input conversion failed due to input error, bytes %s\n",
2277 		           buf);
2278         }
2279     }
2280     /*
2281      * Ignore when input buffer is not on a boundary
2282      */
2283     if (ret == -3)
2284         ret = 0;
2285     return (c_out? c_out : ret);
2286 }
2287 
2288 /**
2289  * xmlCharEncInFunc:
2290  * @handler:	char encoding transformation data structure
2291  * @out:  an xmlBuffer for the output.
2292  * @in:  an xmlBuffer for the input
2293  *
2294  * Generic front-end for the encoding handler input function
2295  *
2296  * Returns the number of byte written if success, or
2297  *     -1 general error
2298  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2299  *        the result of transformation can't fit into the encoding we want), or
2300  */
2301 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2302 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2303                  xmlBufferPtr in)
2304 {
2305     int ret;
2306     int written;
2307     int toconv;
2308 
2309     if (handler == NULL)
2310         return (-1);
2311     if (out == NULL)
2312         return (-1);
2313     if (in == NULL)
2314         return (-1);
2315 
2316     toconv = in->use;
2317     if (toconv == 0)
2318         return (0);
2319     written = out->size - out->use -1; /* count '\0' */
2320     if (toconv * 2 >= written) {
2321         xmlBufferGrow(out, out->size + toconv * 2);
2322         written = out->size - out->use - 1;
2323     }
2324     ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
2325                            in->content, &toconv, 1);
2326     xmlBufferShrink(in, toconv);
2327     out->use += written;
2328     out->content[out->use] = 0;
2329     if (ret == -1)
2330         ret = -3;
2331 
2332     switch (ret) {
2333         case 0:
2334 #ifdef DEBUG_ENCODING
2335             xmlGenericError(xmlGenericErrorContext,
2336                             "converted %d bytes to %d bytes of input\n",
2337                             toconv, written);
2338 #endif
2339             break;
2340         case -1:
2341 #ifdef DEBUG_ENCODING
2342             xmlGenericError(xmlGenericErrorContext,
2343                          "converted %d bytes to %d bytes of input, %d left\n",
2344                             toconv, written, in->use);
2345 #endif
2346             break;
2347         case -3:
2348 #ifdef DEBUG_ENCODING
2349             xmlGenericError(xmlGenericErrorContext,
2350                         "converted %d bytes to %d bytes of input, %d left\n",
2351                             toconv, written, in->use);
2352 #endif
2353             break;
2354         case -2: {
2355             char buf[50];
2356 
2357 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2358 		     in->content[0], in->content[1],
2359 		     in->content[2], in->content[3]);
2360 	    buf[49] = 0;
2361 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2362 		    "input conversion failed due to input error, bytes %s\n",
2363 		           buf);
2364         }
2365     }
2366     /*
2367      * Ignore when input buffer is not on a boundary
2368      */
2369     if (ret == -3)
2370         ret = 0;
2371     return (written? written : ret);
2372 }
2373 
2374 #ifdef LIBXML_OUTPUT_ENABLED
2375 /**
2376  * xmlCharEncOutput:
2377  * @output: a parser output buffer
2378  * @init: is this an initialization call without data
2379  *
2380  * Generic front-end for the encoding handler on parser output
2381  * a first call with @init == 1 has to be made first to initiate the
2382  * output in case of non-stateless encoding needing to initiate their
2383  * state or the output (like the BOM in UTF16).
2384  * In case of UTF8 sequence conversion errors for the given encoder,
2385  * the content will be automatically remapped to a CharRef sequence.
2386  *
2387  * Returns the number of byte written if success, or
2388  *     -1 general error
2389  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2390  *        the result of transformation can't fit into the encoding we want), or
2391  */
2392 int
xmlCharEncOutput(xmlOutputBufferPtr output,int init)2393 xmlCharEncOutput(xmlOutputBufferPtr output, int init)
2394 {
2395     int ret;
2396     size_t written;
2397     size_t writtentot = 0;
2398     size_t toconv;
2399     int c_in;
2400     int c_out;
2401     xmlBufPtr in;
2402     xmlBufPtr out;
2403 
2404     if ((output == NULL) || (output->encoder == NULL) ||
2405         (output->buffer == NULL) || (output->conv == NULL))
2406         return (-1);
2407     out = output->conv;
2408     in = output->buffer;
2409 
2410 retry:
2411 
2412     written = xmlBufAvail(out);
2413     if (written > 0)
2414         written--; /* count '\0' */
2415 
2416     /*
2417      * First specific handling of the initialization call
2418      */
2419     if (init) {
2420         c_in = 0;
2421         c_out = written;
2422         /* TODO: Check return value. */
2423         xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2424                           NULL, &c_in);
2425         xmlBufAddLen(out, c_out);
2426 #ifdef DEBUG_ENCODING
2427 	xmlGenericError(xmlGenericErrorContext,
2428 		"initialized encoder\n");
2429 #endif
2430         return(0);
2431     }
2432 
2433     /*
2434      * Conversion itself.
2435      */
2436     toconv = xmlBufUse(in);
2437     if (toconv == 0)
2438         return (0);
2439     if (toconv > 64 * 1024)
2440         toconv = 64 * 1024;
2441     if (toconv * 4 >= written) {
2442         xmlBufGrow(out, toconv * 4);
2443         written = xmlBufAvail(out) - 1;
2444     }
2445     if (written > 256 * 1024)
2446         written = 256 * 1024;
2447 
2448     c_in = toconv;
2449     c_out = written;
2450     ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2451                             xmlBufContent(in), &c_in);
2452     xmlBufShrink(in, c_in);
2453     xmlBufAddLen(out, c_out);
2454     writtentot += c_out;
2455     if (ret == -1) {
2456         if (c_out > 0) {
2457             /* Can be a limitation of iconv or uconv */
2458             goto retry;
2459         }
2460         ret = -3;
2461     }
2462 
2463     /*
2464      * Attempt to handle error cases
2465      */
2466     switch (ret) {
2467         case 0:
2468 #ifdef DEBUG_ENCODING
2469 	    xmlGenericError(xmlGenericErrorContext,
2470 		    "converted %d bytes to %d bytes of output\n",
2471 	            c_in, c_out);
2472 #endif
2473 	    break;
2474         case -1:
2475 #ifdef DEBUG_ENCODING
2476 	    xmlGenericError(xmlGenericErrorContext,
2477 		    "output conversion failed by lack of space\n");
2478 #endif
2479 	    break;
2480         case -3:
2481 #ifdef DEBUG_ENCODING
2482 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2483 	            c_in, c_out, (int) xmlBufUse(in));
2484 #endif
2485 	    break;
2486         case -4:
2487             xmlEncodingErr(XML_I18N_NO_OUTPUT,
2488                            "xmlCharEncOutFunc: no output function !\n", NULL);
2489             ret = -1;
2490             break;
2491         case -2: {
2492 	    xmlChar charref[20];
2493 	    int len = (int) xmlBufUse(in);
2494             xmlChar *content = xmlBufContent(in);
2495 	    int cur, charrefLen;
2496 
2497 	    cur = xmlGetUTF8Char(content, &len);
2498 	    if (cur <= 0)
2499                 break;
2500 
2501 #ifdef DEBUG_ENCODING
2502             xmlGenericError(xmlGenericErrorContext,
2503                     "handling output conversion error\n");
2504             xmlGenericError(xmlGenericErrorContext,
2505                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2506                     content[0], content[1],
2507                     content[2], content[3]);
2508 #endif
2509             /*
2510              * Removes the UTF8 sequence, and replace it by a charref
2511              * and continue the transcoding phase, hoping the error
2512              * did not mangle the encoder state.
2513              */
2514             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2515                              "&#%d;", cur);
2516             xmlBufShrink(in, len);
2517             xmlBufGrow(out, charrefLen * 4);
2518             c_out = xmlBufAvail(out) - 1;
2519             c_in = charrefLen;
2520             ret = xmlEncOutputChunk(output->encoder, xmlBufEnd(out), &c_out,
2521                                     charref, &c_in);
2522 
2523 	    if ((ret < 0) || (c_in != charrefLen)) {
2524 		char buf[50];
2525 
2526 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2527 			 content[0], content[1],
2528 			 content[2], content[3]);
2529 		buf[49] = 0;
2530 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2531 		    "output conversion failed due to conv error, bytes %s\n",
2532 			       buf);
2533 		if (xmlBufGetAllocationScheme(in) != XML_BUFFER_ALLOC_IMMUTABLE)
2534 		    content[0] = ' ';
2535                 break;
2536 	    }
2537 
2538             xmlBufAddLen(out, c_out);
2539             writtentot += c_out;
2540             goto retry;
2541 	}
2542     }
2543     return(ret);
2544 }
2545 #endif
2546 
2547 /**
2548  * xmlCharEncOutFunc:
2549  * @handler:	char enconding transformation data structure
2550  * @out:  an xmlBuffer for the output.
2551  * @in:  an xmlBuffer for the input
2552  *
2553  * Generic front-end for the encoding handler output function
2554  * a first call with @in == NULL has to be made firs to initiate the
2555  * output in case of non-stateless encoding needing to initiate their
2556  * state or the output (like the BOM in UTF16).
2557  * In case of UTF8 sequence conversion errors for the given encoder,
2558  * the content will be automatically remapped to a CharRef sequence.
2559  *
2560  * Returns the number of byte written if success, or
2561  *     -1 general error
2562  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2563  *        the result of transformation can't fit into the encoding we want), or
2564  */
2565 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2566 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2567                   xmlBufferPtr in) {
2568     int ret;
2569     int written;
2570     int writtentot = 0;
2571     int toconv;
2572     int output = 0;
2573 
2574     if (handler == NULL) return(-1);
2575     if (out == NULL) return(-1);
2576 
2577 retry:
2578 
2579     written = out->size - out->use;
2580 
2581     if (written > 0)
2582 	written--; /* Gennady: count '/0' */
2583 
2584     /*
2585      * First specific handling of in = NULL, i.e. the initialization call
2586      */
2587     if (in == NULL) {
2588         toconv = 0;
2589         /* TODO: Check return value. */
2590         xmlEncOutputChunk(handler, &out->content[out->use], &written,
2591                           NULL, &toconv);
2592         out->use += written;
2593         out->content[out->use] = 0;
2594 #ifdef DEBUG_ENCODING
2595 	xmlGenericError(xmlGenericErrorContext,
2596 		"initialized encoder\n");
2597 #endif
2598         return(0);
2599     }
2600 
2601     /*
2602      * Conversion itself.
2603      */
2604     toconv = in->use;
2605     if (toconv == 0)
2606 	return(0);
2607     if (toconv * 4 >= written) {
2608         xmlBufferGrow(out, toconv * 4);
2609 	written = out->size - out->use - 1;
2610     }
2611     ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2612                             in->content, &toconv);
2613     xmlBufferShrink(in, toconv);
2614     out->use += written;
2615     writtentot += written;
2616     out->content[out->use] = 0;
2617     if (ret == -1) {
2618         if (written > 0) {
2619             /* Can be a limitation of iconv or uconv */
2620             goto retry;
2621         }
2622         ret = -3;
2623     }
2624 
2625     if (ret >= 0) output += ret;
2626 
2627     /*
2628      * Attempt to handle error cases
2629      */
2630     switch (ret) {
2631         case 0:
2632 #ifdef DEBUG_ENCODING
2633 	    xmlGenericError(xmlGenericErrorContext,
2634 		    "converted %d bytes to %d bytes of output\n",
2635 	            toconv, written);
2636 #endif
2637 	    break;
2638         case -1:
2639 #ifdef DEBUG_ENCODING
2640 	    xmlGenericError(xmlGenericErrorContext,
2641 		    "output conversion failed by lack of space\n");
2642 #endif
2643 	    break;
2644         case -3:
2645 #ifdef DEBUG_ENCODING
2646 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2647 	            toconv, written, in->use);
2648 #endif
2649 	    break;
2650         case -4:
2651 	    xmlEncodingErr(XML_I18N_NO_OUTPUT,
2652 		           "xmlCharEncOutFunc: no output function !\n", NULL);
2653 	    ret = -1;
2654             break;
2655         case -2: {
2656 	    xmlChar charref[20];
2657 	    int len = in->use;
2658 	    const xmlChar *utf = (const xmlChar *) in->content;
2659 	    int cur, charrefLen;
2660 
2661 	    cur = xmlGetUTF8Char(utf, &len);
2662 	    if (cur <= 0)
2663                 break;
2664 
2665 #ifdef DEBUG_ENCODING
2666             xmlGenericError(xmlGenericErrorContext,
2667                     "handling output conversion error\n");
2668             xmlGenericError(xmlGenericErrorContext,
2669                     "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2670                     in->content[0], in->content[1],
2671                     in->content[2], in->content[3]);
2672 #endif
2673             /*
2674              * Removes the UTF8 sequence, and replace it by a charref
2675              * and continue the transcoding phase, hoping the error
2676              * did not mangle the encoder state.
2677              */
2678             charrefLen = snprintf((char *) &charref[0], sizeof(charref),
2679                              "&#%d;", cur);
2680             xmlBufferShrink(in, len);
2681             xmlBufferGrow(out, charrefLen * 4);
2682 	    written = out->size - out->use - 1;
2683             toconv = charrefLen;
2684             ret = xmlEncOutputChunk(handler, &out->content[out->use], &written,
2685                                     charref, &toconv);
2686 
2687 	    if ((ret < 0) || (toconv != charrefLen)) {
2688 		char buf[50];
2689 
2690 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2691 			 in->content[0], in->content[1],
2692 			 in->content[2], in->content[3]);
2693 		buf[49] = 0;
2694 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2695 		    "output conversion failed due to conv error, bytes %s\n",
2696 			       buf);
2697 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2698 		    in->content[0] = ' ';
2699 	        break;
2700 	    }
2701 
2702             out->use += written;
2703             writtentot += written;
2704             out->content[out->use] = 0;
2705             goto retry;
2706 	}
2707     }
2708     return(ret);
2709 }
2710 
2711 /**
2712  * xmlCharEncCloseFunc:
2713  * @handler:	char enconding transformation data structure
2714  *
2715  * Generic front-end for encoding handler close function
2716  *
2717  * Returns 0 if success, or -1 in case of error
2718  */
2719 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2720 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2721     int ret = 0;
2722     int tofree = 0;
2723     int i, handler_in_list = 0;
2724 
2725     if (handler == NULL) return(-1);
2726     if (handler->name == NULL) return(-1);
2727     if (handlers != NULL) {
2728         for (i = 0;i < nbCharEncodingHandler; i++) {
2729             if (handler == handlers[i]) {
2730 	        handler_in_list = 1;
2731 		break;
2732 	    }
2733 	}
2734     }
2735 #ifdef LIBXML_ICONV_ENABLED
2736     /*
2737      * Iconv handlers can be used only once, free the whole block.
2738      * and the associated icon resources.
2739      */
2740     if ((handler_in_list == 0) &&
2741         ((handler->iconv_out != NULL) || (handler->iconv_in != NULL))) {
2742         tofree = 1;
2743 	if (handler->iconv_out != NULL) {
2744 	    if (iconv_close(handler->iconv_out))
2745 		ret = -1;
2746 	    handler->iconv_out = NULL;
2747 	}
2748 	if (handler->iconv_in != NULL) {
2749 	    if (iconv_close(handler->iconv_in))
2750 		ret = -1;
2751 	    handler->iconv_in = NULL;
2752 	}
2753     }
2754 #endif /* LIBXML_ICONV_ENABLED */
2755 #ifdef LIBXML_ICU_ENABLED
2756     if ((handler_in_list == 0) &&
2757         ((handler->uconv_out != NULL) || (handler->uconv_in != NULL))) {
2758         tofree = 1;
2759 	if (handler->uconv_out != NULL) {
2760 	    closeIcuConverter(handler->uconv_out);
2761 	    handler->uconv_out = NULL;
2762 	}
2763 	if (handler->uconv_in != NULL) {
2764 	    closeIcuConverter(handler->uconv_in);
2765 	    handler->uconv_in = NULL;
2766 	}
2767     }
2768 #endif
2769     if (tofree) {
2770         /* free up only dynamic handlers iconv/uconv */
2771         if (handler->name != NULL)
2772             xmlFree(handler->name);
2773         handler->name = NULL;
2774         xmlFree(handler);
2775     }
2776 #ifdef DEBUG_ENCODING
2777     if (ret)
2778         xmlGenericError(xmlGenericErrorContext,
2779 		"failed to close the encoding handler\n");
2780     else
2781         xmlGenericError(xmlGenericErrorContext,
2782 		"closed the encoding handler\n");
2783 #endif
2784 
2785     return(ret);
2786 }
2787 
2788 /**
2789  * xmlByteConsumed:
2790  * @ctxt: an XML parser context
2791  *
2792  * This function provides the current index of the parser relative
2793  * to the start of the current entity. This function is computed in
2794  * bytes from the beginning starting at zero and finishing at the
2795  * size in byte of the file if parsing a file. The function is
2796  * of constant cost if the input is UTF-8 but can be costly if run
2797  * on non-UTF-8 input.
2798  *
2799  * Returns the index in bytes from the beginning of the entity or -1
2800  *         in case the index could not be computed.
2801  */
2802 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2803 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2804     xmlParserInputPtr in;
2805 
2806     if (ctxt == NULL) return(-1);
2807     in = ctxt->input;
2808     if (in == NULL)  return(-1);
2809     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2810         unsigned int unused = 0;
2811 	xmlCharEncodingHandler * handler = in->buf->encoder;
2812         /*
2813 	 * Encoding conversion, compute the number of unused original
2814 	 * bytes from the input not consumed and substract that from
2815 	 * the raw consumed value, this is not a cheap operation
2816 	 */
2817         if (in->end - in->cur > 0) {
2818 	    unsigned char convbuf[32000];
2819 	    const unsigned char *cur = (const unsigned char *)in->cur;
2820 	    int toconv = in->end - in->cur, written = 32000;
2821 
2822 	    int ret;
2823 
2824             do {
2825                 toconv = in->end - cur;
2826                 written = 32000;
2827                 ret = xmlEncOutputChunk(handler, &convbuf[0], &written,
2828                                         cur, &toconv);
2829                 if (ret < 0) {
2830                     if (written > 0)
2831                         ret = -2;
2832                     else
2833                         return(-1);
2834                 }
2835                 unused += written;
2836                 cur += toconv;
2837             } while (ret == -2);
2838 	}
2839 	if (in->buf->rawconsumed < unused)
2840 	    return(-1);
2841 	return(in->buf->rawconsumed - unused);
2842     }
2843     return(in->consumed + (in->cur - in->base));
2844 }
2845 
2846 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2847 #ifdef LIBXML_ISO8859X_ENABLED
2848 
2849 /**
2850  * UTF8ToISO8859x:
2851  * @out:  a pointer to an array of bytes to store the result
2852  * @outlen:  the length of @out
2853  * @in:  a pointer to an array of UTF-8 chars
2854  * @inlen:  the length of @in
2855  * @xlattable: the 2-level transcoding table
2856  *
2857  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2858  * block of chars out.
2859  *
2860  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2861  * The value of @inlen after return is the number of octets consumed
2862  *     as the return value is positive, else unpredictable.
2863  * The value of @outlen after return is the number of ocetes consumed.
2864  */
2865 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2866 UTF8ToISO8859x(unsigned char* out, int *outlen,
2867               const unsigned char* in, int *inlen,
2868               unsigned char const *xlattable) {
2869     const unsigned char* outstart = out;
2870     const unsigned char* inend;
2871     const unsigned char* instart = in;
2872     const unsigned char* processed = in;
2873 
2874     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2875         (xlattable == NULL))
2876 	return(-1);
2877     if (in == NULL) {
2878         /*
2879         * initialization nothing to do
2880         */
2881         *outlen = 0;
2882         *inlen = 0;
2883         return(0);
2884     }
2885     inend = in + (*inlen);
2886     while (in < inend) {
2887         unsigned char d = *in++;
2888         if  (d < 0x80)  {
2889             *out++ = d;
2890         } else if (d < 0xC0) {
2891             /* trailing byte in leading position */
2892             *outlen = out - outstart;
2893             *inlen = processed - instart;
2894             return(-2);
2895         } else if (d < 0xE0) {
2896             unsigned char c;
2897             if (!(in < inend)) {
2898                 /* trailing byte not in input buffer */
2899                 *outlen = out - outstart;
2900                 *inlen = processed - instart;
2901                 return(-3);
2902             }
2903             c = *in++;
2904             if ((c & 0xC0) != 0x80) {
2905                 /* not a trailing byte */
2906                 *outlen = out - outstart;
2907                 *inlen = processed - instart;
2908                 return(-2);
2909             }
2910             c = c & 0x3F;
2911             d = d & 0x1F;
2912             d = xlattable [48 + c + xlattable [d] * 64];
2913             if (d == 0) {
2914                 /* not in character set */
2915                 *outlen = out - outstart;
2916                 *inlen = processed - instart;
2917                 return(-2);
2918             }
2919             *out++ = d;
2920         } else if (d < 0xF0) {
2921             unsigned char c1;
2922             unsigned char c2;
2923             if (!(in < inend - 1)) {
2924                 /* trailing bytes not in input buffer */
2925                 *outlen = out - outstart;
2926                 *inlen = processed - instart;
2927                 return(-3);
2928             }
2929             c1 = *in++;
2930             if ((c1 & 0xC0) != 0x80) {
2931                 /* not a trailing byte (c1) */
2932                 *outlen = out - outstart;
2933                 *inlen = processed - instart;
2934                 return(-2);
2935             }
2936             c2 = *in++;
2937             if ((c2 & 0xC0) != 0x80) {
2938                 /* not a trailing byte (c2) */
2939                 *outlen = out - outstart;
2940                 *inlen = processed - instart;
2941                 return(-2);
2942             }
2943             c1 = c1 & 0x3F;
2944             c2 = c2 & 0x3F;
2945 	    d = d & 0x0F;
2946 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
2947 			xlattable [32 + d] * 64] * 64];
2948             if (d == 0) {
2949                 /* not in character set */
2950                 *outlen = out - outstart;
2951                 *inlen = processed - instart;
2952                 return(-2);
2953             }
2954             *out++ = d;
2955         } else {
2956             /* cannot transcode >= U+010000 */
2957             *outlen = out - outstart;
2958             *inlen = processed - instart;
2959             return(-2);
2960         }
2961         processed = in;
2962     }
2963     *outlen = out - outstart;
2964     *inlen = processed - instart;
2965     return(*outlen);
2966 }
2967 
2968 /**
2969  * ISO8859xToUTF8
2970  * @out:  a pointer to an array of bytes to store the result
2971  * @outlen:  the length of @out
2972  * @in:  a pointer to an array of ISO Latin 1 chars
2973  * @inlen:  the length of @in
2974  *
2975  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2976  * block of chars out.
2977  * Returns 0 if success, or -1 otherwise
2978  * The value of @inlen after return is the number of octets consumed
2979  * The value of @outlen after return is the number of ocetes produced.
2980  */
2981 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)2982 ISO8859xToUTF8(unsigned char* out, int *outlen,
2983               const unsigned char* in, int *inlen,
2984               unsigned short const *unicodetable) {
2985     unsigned char* outstart = out;
2986     unsigned char* outend;
2987     const unsigned char* instart = in;
2988     const unsigned char* inend;
2989     const unsigned char* instop;
2990     unsigned int c;
2991 
2992     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2993         (in == NULL) || (unicodetable == NULL))
2994 	return(-1);
2995     outend = out + *outlen;
2996     inend = in + *inlen;
2997     instop = inend;
2998 
2999     while ((in < inend) && (out < outend - 2)) {
3000         if (*in >= 0x80) {
3001             c = unicodetable [*in - 0x80];
3002             if (c == 0) {
3003                 /* undefined code point */
3004                 *outlen = out - outstart;
3005                 *inlen = in - instart;
3006                 return (-1);
3007             }
3008             if (c < 0x800) {
3009                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
3010                 *out++ = (c & 0x3F) | 0x80;
3011             } else {
3012                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
3013                 *out++ = ((c >>  6) & 0x3F) | 0x80;
3014                 *out++ = (c & 0x3F) | 0x80;
3015             }
3016             ++in;
3017         }
3018         if (instop - in > outend - out) instop = in + (outend - out);
3019         while ((*in < 0x80) && (in < instop)) {
3020             *out++ = *in++;
3021         }
3022     }
3023     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3024         *out++ =  *in++;
3025     }
3026     if ((in < inend) && (out < outend) && (*in < 0x80)) {
3027         *out++ =  *in++;
3028     }
3029     *outlen = out - outstart;
3030     *inlen = in - instart;
3031     return (*outlen);
3032 }
3033 
3034 
3035 /************************************************************************
3036  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
3037  ************************************************************************/
3038 
3039 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
3040     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3041     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3042     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3043     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3044     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
3045     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
3046     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
3047     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
3048     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
3049     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
3050     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
3051     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
3052     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
3053     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
3054     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
3055     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
3056 };
3057 
3058 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
3059     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3060     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3061     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3062     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3063     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3064     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3065     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3066     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3067     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3068     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3069     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3070     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
3071     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
3072     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
3074     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3075     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
3076     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
3079     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
3080     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
3081     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
3082     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3083     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
3084     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
3085     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
3086 };
3087 
3088 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
3089     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3090     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3091     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3092     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3093     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
3094     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
3095     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
3096     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
3097     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
3098     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3099     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
3100     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
3101     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
3102     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3103     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
3104     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
3105 };
3106 
3107 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
3108     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
3109     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3110     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3111     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3112     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3113     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3114     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3115     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3116     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3117     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
3118     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
3119     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
3120     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
3121     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
3122     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3123     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3124     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
3125     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
3133     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
3134     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
3135     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3136     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3137     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3138     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
3139 };
3140 
3141 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
3142     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3143     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3144     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3145     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3146     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
3147     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
3148     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
3149     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
3150     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3151     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
3152     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3153     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
3154     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3155     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
3156     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3157     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
3158 };
3159 
3160 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
3161     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
3162     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3163     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3164     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3165     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3166     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3167     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3168     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3169     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3170     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
3171     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
3172     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3173     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3174     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
3175     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
3176     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
3177     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
3178     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
3179     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
3180     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3181     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
3182     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3183     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3184     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
3185     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
3186     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
3187     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
3188 };
3189 
3190 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
3191     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3192     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3193     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3194     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3195     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
3196     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
3197     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
3198     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
3199     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
3200     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
3201     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
3202     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
3203     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
3204     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
3205     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
3206     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
3207 };
3208 
3209 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
3210     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3211     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3212     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3213     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3214     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3215     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3216     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3217     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3218     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3219     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
3220     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3221     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
3222     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3223     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3224     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3225     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3226     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
3227     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3235     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3236     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3237 };
3238 
3239 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
3240     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3241     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3242     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3243     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3244     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
3245     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
3246     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3247     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
3248     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
3249     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
3250     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
3251     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3252     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
3253     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
3254     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3255     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3256 };
3257 
3258 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
3259     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
3261     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3262     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3264     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3265     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3267     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3268     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
3269     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3270     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3271     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3272     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3273     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3274     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
3275     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
3276     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3277     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
3278     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3279     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3280     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3281     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3282 };
3283 
3284 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
3285     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3286     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3287     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3288     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3289     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
3290     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
3291     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
3292     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
3293     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
3294     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
3295     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
3296     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
3297     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
3298     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
3299     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
3300     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
3301 };
3302 
3303 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
3304     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
3305     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3310     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3312     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3313     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
3314     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
3315     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3316     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3317     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3318     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3319     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3320     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
3321     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3322     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3323     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3324     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3325     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3326     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3327     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
3328     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3329     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3330     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3331     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
3332     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3333     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3334     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3335 };
3336 
3337 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
3338     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3339     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3340     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3341     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3342     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3343     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3344     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3345     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
3346     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3347     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3348     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
3349     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
3350     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
3351     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
3352     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
3353     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
3354 };
3355 
3356 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
3357     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3358     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
3359     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3360     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3365     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3366     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
3367     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
3368     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3370     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
3374     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
3376     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3377     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3378     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3379     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3380     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
3381     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
3382     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3383     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3384     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3385     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3386     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3387     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3388 };
3389 
3390 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3391     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3392     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3393     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3394     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3395     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3396     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3397     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3398     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3399     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3400     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3401     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3402     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3403     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3404     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3405     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3406     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3407 };
3408 
3409 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3410     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3411     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3412     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3413     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3414     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3415     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3416     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3417     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3418     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3419     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3420     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3421     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3422     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3423     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3424     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3425     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3426     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3427     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3428     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3429     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3430     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3431     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3432     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3433 };
3434 
3435 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3436     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3437     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3438     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3439     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3440     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3441     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3442     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3443     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3444     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3445     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3446     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3447     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3448     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3449     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3450     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3451     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3452 };
3453 
3454 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3455     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3456     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3457     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3458     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3459     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3460     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3461     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3462     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3463     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3464     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3465     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3466     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3467     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3468     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3469     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3470     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3471     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3472     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3473     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3474     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3475     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3476     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3477     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3478     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3479     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3480     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3481     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3482     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3483     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3484     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3485     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3486 };
3487 
3488 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3489     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3490     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3491     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3492     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3493     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3494     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3495     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3496     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3497     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3498     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3499     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3500     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3501     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3502     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3503     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3504     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3505 };
3506 
3507 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3508     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3509     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3510     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3511     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3512     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3513     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3514     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3515     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3516     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3517     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3518     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3519     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3520     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3521     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3522     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3523     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3524     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3525     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3526     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3527     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3528     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3529     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3530     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3531     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3532     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3533     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3534     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3535 };
3536 
3537 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3538     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3539     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3540     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3541     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3542     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3543     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3544     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3545     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3546     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3547     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3548     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3549     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3550     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3551     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3552     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3553     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3554 };
3555 
3556 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3557     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3558     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3559     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3560     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3561     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3562     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3563     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3564     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3565     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3566     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3567     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3568     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3569     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3570     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3571     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3572     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3573     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3574     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3575     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3576     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3577     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3578     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3579     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3580     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3581     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3582     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3583     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3584     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3585     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3586     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3587     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3588 };
3589 
3590 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3591     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3592     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3593     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3594     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3595     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3596     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3597     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3598     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3599     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3600     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3601     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3602     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3603     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3604     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3605     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3606     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3607 };
3608 
3609 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3610     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3611     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3612     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3613     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3614     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3615     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3616     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3617     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3618     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3619     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3620     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3621     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3622     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3623     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3624     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3625     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3626     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3627     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3628     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3629     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3630     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3631     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3632     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3633     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3634     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3635     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3636     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3637     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3638     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3639     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3640     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3641     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3642     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3643     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3644     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3645     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3646     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3647     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3648     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3649     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3650     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3651     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3652     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3653 };
3654 
3655 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3656     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3657     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3658     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3659     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3660     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3661     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3662     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3663     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3664     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3665     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3666     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3667     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3668     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3669     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3670     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3671     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3672 };
3673 
3674 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3675     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3676     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3677     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3678     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3679     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3680     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3681     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3682     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3683     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3684     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3685     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3686     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3687     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3688     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3689     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3690     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3691     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3692     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3693     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3694     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3695     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3696     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3697     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3698     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3699     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3700     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3701     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3702 };
3703 
3704 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3705     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3706     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3707     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3708     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3709     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3710     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3711     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3712     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3713     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3714     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3715     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3716     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3717     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3718     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3719     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3720     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3721 };
3722 
3723 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3724     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3725     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3726     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3727     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3728     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3729     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3730     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3731     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3732     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3733     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3734     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3735     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3736     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3737     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3738     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3739     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3740     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3741     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3742     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3743     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3744     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3745     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3746     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3747     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3748     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3749     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3750     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3751     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3752     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3753     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3754     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3755     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3756     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3757     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3758     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3759     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3760     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3761     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3762     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3763 };
3764 
3765 
3766 /*
3767  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3768  */
3769 
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3770 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3771     const unsigned char* in, int *inlen) {
3772     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3773 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3774 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3775     const unsigned char* in, int *inlen) {
3776     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3777 }
3778 
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3779 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3780     const unsigned char* in, int *inlen) {
3781     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3782 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3783 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3784     const unsigned char* in, int *inlen) {
3785     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3786 }
3787 
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3788 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3789     const unsigned char* in, int *inlen) {
3790     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3791 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3792 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3793     const unsigned char* in, int *inlen) {
3794     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3795 }
3796 
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3797 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3798     const unsigned char* in, int *inlen) {
3799     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3800 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3801 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3802     const unsigned char* in, int *inlen) {
3803     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3804 }
3805 
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3806 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3807     const unsigned char* in, int *inlen) {
3808     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3809 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3810 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3811     const unsigned char* in, int *inlen) {
3812     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3813 }
3814 
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3815 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3816     const unsigned char* in, int *inlen) {
3817     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3818 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3819 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3820     const unsigned char* in, int *inlen) {
3821     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3822 }
3823 
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3824 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3825     const unsigned char* in, int *inlen) {
3826     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3827 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3828 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3829     const unsigned char* in, int *inlen) {
3830     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3831 }
3832 
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3833 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3834     const unsigned char* in, int *inlen) {
3835     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3836 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3837 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3838     const unsigned char* in, int *inlen) {
3839     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3840 }
3841 
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3842 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3843     const unsigned char* in, int *inlen) {
3844     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3845 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3846 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3847     const unsigned char* in, int *inlen) {
3848     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3849 }
3850 
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3851 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3852     const unsigned char* in, int *inlen) {
3853     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3854 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3855 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3856     const unsigned char* in, int *inlen) {
3857     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3858 }
3859 
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3860 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3861     const unsigned char* in, int *inlen) {
3862     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3863 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3864 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3865     const unsigned char* in, int *inlen) {
3866     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3867 }
3868 
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3869 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3870     const unsigned char* in, int *inlen) {
3871     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3872 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3873 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3874     const unsigned char* in, int *inlen) {
3875     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3876 }
3877 
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3878 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3879     const unsigned char* in, int *inlen) {
3880     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3881 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3882 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3883     const unsigned char* in, int *inlen) {
3884     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3885 }
3886 
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3887 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3888     const unsigned char* in, int *inlen) {
3889     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3890 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3891 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3892     const unsigned char* in, int *inlen) {
3893     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3894 }
3895 
3896 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3897 xmlRegisterCharEncodingHandlersISO8859x (void) {
3898     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3899     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3900     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3901     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3902     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3903     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3904     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3905     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3906     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3907     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3908     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3909     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3910     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3911     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3912 }
3913 
3914 #endif
3915 #endif
3916 
3917 #define bottom_encoding
3918 #include "elfgcchack.h"
3919