• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * encoding.c : implements the encoding conversion functions needed for XML
3  *
4  * Related specs:
5  * rfc2044        (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6  * rfc2781        UTF-16, an encoding of ISO 10646, P. Hoffman, F. Yergeau
7  * [ISO-10646]    UTF-8 and UTF-16 in Annexes
8  * [ISO-8859-1]   ISO Latin-1 characters codes.
9  * [UNICODE]      The Unicode Consortium, "The Unicode Standard --
10  *                Worldwide Character Encoding -- Version 1.0", Addison-
11  *                Wesley, Volume 1, 1991, Volume 2, 1992.  UTF-8 is
12  *                described in Unicode Technical Report #4.
13  * [US-ASCII]     Coded Character Set--7-bit American Standard Code for
14  *                Information Interchange, ANSI X3.4-1986.
15  *
16  * See Copyright for the status of this software.
17  *
18  * daniel@veillard.com
19  *
20  * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
21  */
22 
23 #define IN_LIBXML
24 #include "libxml.h"
25 
26 #include <string.h>
27 
28 #ifdef HAVE_CTYPE_H
29 #include <ctype.h>
30 #endif
31 #ifdef HAVE_STDLIB_H
32 #include <stdlib.h>
33 #endif
34 #ifdef LIBXML_ICONV_ENABLED
35 #ifdef HAVE_ERRNO_H
36 #include <errno.h>
37 #endif
38 #endif
39 #include <libxml/encoding.h>
40 #include <libxml/xmlmemory.h>
41 #ifdef LIBXML_HTML_ENABLED
42 #include <libxml/HTMLparser.h>
43 #endif
44 #include <libxml/globals.h>
45 #include <libxml/xmlerror.h>
46 
47 static xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
48 static xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
49 
50 typedef struct _xmlCharEncodingAlias xmlCharEncodingAlias;
51 typedef xmlCharEncodingAlias *xmlCharEncodingAliasPtr;
52 struct _xmlCharEncodingAlias {
53     const char *name;
54     const char *alias;
55 };
56 
57 static xmlCharEncodingAliasPtr xmlCharEncodingAliases = NULL;
58 static int xmlCharEncodingAliasesNb = 0;
59 static int xmlCharEncodingAliasesMax = 0;
60 
61 #if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
62 #if 0
63 #define DEBUG_ENCODING  /* Define this to get encoding traces */
64 #endif
65 #else
66 #ifdef LIBXML_ISO8859X_ENABLED
67 static void xmlRegisterCharEncodingHandlersISO8859x (void);
68 #endif
69 #endif
70 
71 static int xmlLittleEndian = 1;
72 
73 /**
74  * xmlEncodingErrMemory:
75  * @extra:  extra informations
76  *
77  * Handle an out of memory condition
78  */
79 static void
xmlEncodingErrMemory(const char * extra)80 xmlEncodingErrMemory(const char *extra)
81 {
82     __xmlSimpleError(XML_FROM_I18N, XML_ERR_NO_MEMORY, NULL, NULL, extra);
83 }
84 
85 /**
86  * xmlErrEncoding:
87  * @error:  the error number
88  * @msg:  the error message
89  *
90  * n encoding error
91  */
92 static void
xmlEncodingErr(xmlParserErrors error,const char * msg,const char * val)93 xmlEncodingErr(xmlParserErrors error, const char *msg, const char *val)
94 {
95     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL,
96                     XML_FROM_I18N, error, XML_ERR_FATAL,
97                     NULL, 0, val, NULL, NULL, 0, 0, msg, val);
98 }
99 
100 #ifdef LIBXML_ICU_ENABLED
101 static uconv_t*
openIcuConverter(const char * name,int toUnicode)102 openIcuConverter(const char* name, int toUnicode)
103 {
104   UErrorCode status = U_ZERO_ERROR;
105   uconv_t *conv = (uconv_t *) xmlMalloc(sizeof(uconv_t));
106   if (conv == NULL)
107     return NULL;
108 
109   conv->uconv = ucnv_open(name, &status);
110   if (U_FAILURE(status))
111     goto error;
112 
113   status = U_ZERO_ERROR;
114   if (toUnicode) {
115     ucnv_setToUCallBack(conv->uconv, UCNV_TO_U_CALLBACK_STOP,
116                         NULL, NULL, NULL, &status);
117   }
118   else {
119     ucnv_setFromUCallBack(conv->uconv, UCNV_FROM_U_CALLBACK_STOP,
120                         NULL, NULL, NULL, &status);
121   }
122   if (U_FAILURE(status))
123     goto error;
124 
125   status = U_ZERO_ERROR;
126   conv->utf8 = ucnv_open("UTF-8", &status);
127   if (U_SUCCESS(status))
128     return conv;
129 
130 error:
131   if (conv->uconv)
132     ucnv_close(conv->uconv);
133   xmlFree(conv);
134   return NULL;
135 }
136 
137 static void
closeIcuConverter(uconv_t * conv)138 closeIcuConverter(uconv_t *conv)
139 {
140   if (conv != NULL) {
141     ucnv_close(conv->uconv);
142     ucnv_close(conv->utf8);
143     xmlFree(conv);
144   }
145 }
146 #endif /* LIBXML_ICU_ENABLED */
147 
148 /************************************************************************
149  *									*
150  *		Conversions To/From UTF8 encoding			*
151  *									*
152  ************************************************************************/
153 
154 /**
155  * asciiToUTF8:
156  * @out:  a pointer to an array of bytes to store the result
157  * @outlen:  the length of @out
158  * @in:  a pointer to an array of ASCII chars
159  * @inlen:  the length of @in
160  *
161  * Take a block of ASCII chars in and try to convert it to an UTF-8
162  * block of chars out.
163  * Returns 0 if success, or -1 otherwise
164  * The value of @inlen after return is the number of octets consumed
165  *     if the return value is positive, else unpredictable.
166  * The value of @outlen after return is the number of octets consumed.
167  */
168 static int
asciiToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)169 asciiToUTF8(unsigned char* out, int *outlen,
170               const unsigned char* in, int *inlen) {
171     unsigned char* outstart = out;
172     const unsigned char* base = in;
173     const unsigned char* processed = in;
174     unsigned char* outend = out + *outlen;
175     const unsigned char* inend;
176     unsigned int c;
177 
178     inend = in + (*inlen);
179     while ((in < inend) && (out - outstart + 5 < *outlen)) {
180 	c= *in++;
181 
182         if (out >= outend)
183 	    break;
184         if (c < 0x80) {
185 	    *out++ = c;
186 	} else {
187 	    *outlen = out - outstart;
188 	    *inlen = processed - base;
189 	    return(-1);
190 	}
191 
192 	processed = (const unsigned char*) in;
193     }
194     *outlen = out - outstart;
195     *inlen = processed - base;
196     return(*outlen);
197 }
198 
199 #ifdef LIBXML_OUTPUT_ENABLED
200 /**
201  * UTF8Toascii:
202  * @out:  a pointer to an array of bytes to store the result
203  * @outlen:  the length of @out
204  * @in:  a pointer to an array of UTF-8 chars
205  * @inlen:  the length of @in
206  *
207  * Take a block of UTF-8 chars in and try to convert it to an ASCII
208  * block of chars out.
209  *
210  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
211  * The value of @inlen after return is the number of octets consumed
212  *     if the return value is positive, else unpredictable.
213  * The value of @outlen after return is the number of octets consumed.
214  */
215 static int
UTF8Toascii(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)216 UTF8Toascii(unsigned char* out, int *outlen,
217               const unsigned char* in, int *inlen) {
218     const unsigned char* processed = in;
219     const unsigned char* outend;
220     const unsigned char* outstart = out;
221     const unsigned char* instart = in;
222     const unsigned char* inend;
223     unsigned int c, d;
224     int trailing;
225 
226     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
227     if (in == NULL) {
228         /*
229 	 * initialization nothing to do
230 	 */
231 	*outlen = 0;
232 	*inlen = 0;
233 	return(0);
234     }
235     inend = in + (*inlen);
236     outend = out + (*outlen);
237     while (in < inend) {
238 	d = *in++;
239 	if      (d < 0x80)  { c= d; trailing= 0; }
240 	else if (d < 0xC0) {
241 	    /* trailing byte in leading position */
242 	    *outlen = out - outstart;
243 	    *inlen = processed - instart;
244 	    return(-2);
245         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
246         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
247         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
248 	else {
249 	    /* no chance for this in Ascii */
250 	    *outlen = out - outstart;
251 	    *inlen = processed - instart;
252 	    return(-2);
253 	}
254 
255 	if (inend - in < trailing) {
256 	    break;
257 	}
258 
259 	for ( ; trailing; trailing--) {
260 	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
261 		break;
262 	    c <<= 6;
263 	    c |= d & 0x3F;
264 	}
265 
266 	/* assertion: c is a single UTF-4 value */
267 	if (c < 0x80) {
268 	    if (out >= outend)
269 		break;
270 	    *out++ = c;
271 	} else {
272 	    /* no chance for this in Ascii */
273 	    *outlen = out - outstart;
274 	    *inlen = processed - instart;
275 	    return(-2);
276 	}
277 	processed = in;
278     }
279     *outlen = out - outstart;
280     *inlen = processed - instart;
281     return(*outlen);
282 }
283 #endif /* LIBXML_OUTPUT_ENABLED */
284 
285 /**
286  * isolat1ToUTF8:
287  * @out:  a pointer to an array of bytes to store the result
288  * @outlen:  the length of @out
289  * @in:  a pointer to an array of ISO Latin 1 chars
290  * @inlen:  the length of @in
291  *
292  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
293  * block of chars out.
294  * Returns the number of bytes written if success, or -1 otherwise
295  * The value of @inlen after return is the number of octets consumed
296  *     if the return value is positive, else unpredictable.
297  * The value of @outlen after return is the number of octets consumed.
298  */
299 int
isolat1ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)300 isolat1ToUTF8(unsigned char* out, int *outlen,
301               const unsigned char* in, int *inlen) {
302     unsigned char* outstart = out;
303     const unsigned char* base = in;
304     unsigned char* outend;
305     const unsigned char* inend;
306     const unsigned char* instop;
307 
308     if ((out == NULL) || (in == NULL) || (outlen == NULL) || (inlen == NULL))
309 	return(-1);
310 
311     outend = out + *outlen;
312     inend = in + (*inlen);
313     instop = inend;
314 
315     while (in < inend && out < outend - 1) {
316     	if (*in >= 0x80) {
317 	    *out++ = (((*in) >>  6) & 0x1F) | 0xC0;
318         *out++ = ((*in) & 0x3F) | 0x80;
319 	    ++in;
320 	}
321 	if (instop - in > outend - out) instop = in + (outend - out);
322 	while (in < instop && *in < 0x80) {
323 	    *out++ = *in++;
324 	}
325     }
326     if (in < inend && out < outend && *in < 0x80) {
327         *out++ = *in++;
328     }
329     *outlen = out - outstart;
330     *inlen = in - base;
331     return(*outlen);
332 }
333 
334 /**
335  * UTF8ToUTF8:
336  * @out:  a pointer to an array of bytes to store the result
337  * @outlen:  the length of @out
338  * @inb:  a pointer to an array of UTF-8 chars
339  * @inlenb:  the length of @in in UTF-8 chars
340  *
341  * No op copy operation for UTF8 handling.
342  *
343  * Returns the number of bytes written, or -1 if lack of space.
344  *     The value of *inlen after return is the number of octets consumed
345  *     if the return value is positive, else unpredictable.
346  */
347 static int
UTF8ToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)348 UTF8ToUTF8(unsigned char* out, int *outlen,
349            const unsigned char* inb, int *inlenb)
350 {
351     int len;
352 
353     if ((out == NULL) || (inb == NULL) || (outlen == NULL) || (inlenb == NULL))
354 	return(-1);
355     if (*outlen > *inlenb) {
356 	len = *inlenb;
357     } else {
358 	len = *outlen;
359     }
360     if (len < 0)
361 	return(-1);
362 
363     memcpy(out, inb, len);
364 
365     *outlen = len;
366     *inlenb = len;
367     return(*outlen);
368 }
369 
370 
371 #ifdef LIBXML_OUTPUT_ENABLED
372 /**
373  * UTF8Toisolat1:
374  * @out:  a pointer to an array of bytes to store the result
375  * @outlen:  the length of @out
376  * @in:  a pointer to an array of UTF-8 chars
377  * @inlen:  the length of @in
378  *
379  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
380  * block of chars out.
381  *
382  * Returns the number of bytes written if success, -2 if the transcoding fails,
383            or -1 otherwise
384  * The value of @inlen after return is the number of octets consumed
385  *     if the return value is positive, else unpredictable.
386  * The value of @outlen after return is the number of octets consumed.
387  */
388 int
UTF8Toisolat1(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)389 UTF8Toisolat1(unsigned char* out, int *outlen,
390               const unsigned char* in, int *inlen) {
391     const unsigned char* processed = in;
392     const unsigned char* outend;
393     const unsigned char* outstart = out;
394     const unsigned char* instart = in;
395     const unsigned char* inend;
396     unsigned int c, d;
397     int trailing;
398 
399     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
400     if (in == NULL) {
401         /*
402 	 * initialization nothing to do
403 	 */
404 	*outlen = 0;
405 	*inlen = 0;
406 	return(0);
407     }
408     inend = in + (*inlen);
409     outend = out + (*outlen);
410     while (in < inend) {
411 	d = *in++;
412 	if      (d < 0x80)  { c= d; trailing= 0; }
413 	else if (d < 0xC0) {
414 	    /* trailing byte in leading position */
415 	    *outlen = out - outstart;
416 	    *inlen = processed - instart;
417 	    return(-2);
418         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
419         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
420         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
421 	else {
422 	    /* no chance for this in IsoLat1 */
423 	    *outlen = out - outstart;
424 	    *inlen = processed - instart;
425 	    return(-2);
426 	}
427 
428 	if (inend - in < trailing) {
429 	    break;
430 	}
431 
432 	for ( ; trailing; trailing--) {
433 	    if (in >= inend)
434 		break;
435 	    if (((d= *in++) & 0xC0) != 0x80) {
436 		*outlen = out - outstart;
437 		*inlen = processed - instart;
438 		return(-2);
439 	    }
440 	    c <<= 6;
441 	    c |= d & 0x3F;
442 	}
443 
444 	/* assertion: c is a single UTF-4 value */
445 	if (c <= 0xFF) {
446 	    if (out >= outend)
447 		break;
448 	    *out++ = c;
449 	} else {
450 	    /* no chance for this in IsoLat1 */
451 	    *outlen = out - outstart;
452 	    *inlen = processed - instart;
453 	    return(-2);
454 	}
455 	processed = in;
456     }
457     *outlen = out - outstart;
458     *inlen = processed - instart;
459     return(*outlen);
460 }
461 #endif /* LIBXML_OUTPUT_ENABLED */
462 
463 /**
464  * UTF16LEToUTF8:
465  * @out:  a pointer to an array of bytes to store the result
466  * @outlen:  the length of @out
467  * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
468  * @inlenb:  the length of @in in UTF-16LE chars
469  *
470  * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
471  * block of chars out. This function assumes the endian property
472  * is the same between the native type of this machine and the
473  * inputed one.
474  *
475  * Returns the number of bytes written, or -1 if lack of space, or -2
476  *     if the transcoding fails (if *in is not a valid utf16 string)
477  *     The value of *inlen after return is the number of octets consumed
478  *     if the return value is positive, else unpredictable.
479  */
480 static int
UTF16LEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)481 UTF16LEToUTF8(unsigned char* out, int *outlen,
482             const unsigned char* inb, int *inlenb)
483 {
484     unsigned char* outstart = out;
485     const unsigned char* processed = inb;
486     unsigned char* outend = out + *outlen;
487     unsigned short* in = (unsigned short*) inb;
488     unsigned short* inend;
489     unsigned int c, d, inlen;
490     unsigned char *tmp;
491     int bits;
492 
493     if ((*inlenb % 2) == 1)
494         (*inlenb)--;
495     inlen = *inlenb / 2;
496     inend = in + inlen;
497     while ((in < inend) && (out - outstart + 5 < *outlen)) {
498         if (xmlLittleEndian) {
499 	    c= *in++;
500 	} else {
501 	    tmp = (unsigned char *) in;
502 	    c = *tmp++;
503 	    c = c | (((unsigned int)*tmp) << 8);
504 	    in++;
505 	}
506         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
507 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
508 		break;
509 	    }
510 	    if (xmlLittleEndian) {
511 		d = *in++;
512 	    } else {
513 		tmp = (unsigned char *) in;
514 		d = *tmp++;
515 		d = d | (((unsigned int)*tmp) << 8);
516 		in++;
517 	    }
518             if ((d & 0xFC00) == 0xDC00) {
519                 c &= 0x03FF;
520                 c <<= 10;
521                 c |= d & 0x03FF;
522                 c += 0x10000;
523             }
524             else {
525 		*outlen = out - outstart;
526 		*inlenb = processed - inb;
527 	        return(-2);
528 	    }
529         }
530 
531 	/* assertion: c is a single UTF-4 value */
532         if (out >= outend)
533 	    break;
534         if      (c <    0x80) {  *out++=  c;                bits= -6; }
535         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
536         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
537         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
538 
539         for ( ; bits >= 0; bits-= 6) {
540             if (out >= outend)
541 	        break;
542             *out++= ((c >> bits) & 0x3F) | 0x80;
543         }
544 	processed = (const unsigned char*) in;
545     }
546     *outlen = out - outstart;
547     *inlenb = processed - inb;
548     return(*outlen);
549 }
550 
551 #ifdef LIBXML_OUTPUT_ENABLED
552 /**
553  * UTF8ToUTF16LE:
554  * @outb:  a pointer to an array of bytes to store the result
555  * @outlen:  the length of @outb
556  * @in:  a pointer to an array of UTF-8 chars
557  * @inlen:  the length of @in
558  *
559  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
560  * block of chars out.
561  *
562  * Returns the number of bytes written, or -1 if lack of space, or -2
563  *     if the transcoding failed.
564  */
565 static int
UTF8ToUTF16LE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)566 UTF8ToUTF16LE(unsigned char* outb, int *outlen,
567             const unsigned char* in, int *inlen)
568 {
569     unsigned short* out = (unsigned short*) outb;
570     const unsigned char* processed = in;
571     const unsigned char *const instart = in;
572     unsigned short* outstart= out;
573     unsigned short* outend;
574     const unsigned char* inend;
575     unsigned int c, d;
576     int trailing;
577     unsigned char *tmp;
578     unsigned short tmp1, tmp2;
579 
580     /* UTF16LE encoding has no BOM */
581     if ((out == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
582     if (in == NULL) {
583 	*outlen = 0;
584 	*inlen = 0;
585 	return(0);
586     }
587     inend= in + *inlen;
588     outend = out + (*outlen / 2);
589     while (in < inend) {
590       d= *in++;
591       if      (d < 0x80)  { c= d; trailing= 0; }
592       else if (d < 0xC0) {
593           /* trailing byte in leading position */
594 	  *outlen = (out - outstart) * 2;
595 	  *inlen = processed - instart;
596 	  return(-2);
597       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
598       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
599       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
600       else {
601 	/* no chance for this in UTF-16 */
602 	*outlen = (out - outstart) * 2;
603 	*inlen = processed - instart;
604 	return(-2);
605       }
606 
607       if (inend - in < trailing) {
608           break;
609       }
610 
611       for ( ; trailing; trailing--) {
612           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
613 	      break;
614           c <<= 6;
615           c |= d & 0x3F;
616       }
617 
618       /* assertion: c is a single UTF-4 value */
619         if (c < 0x10000) {
620             if (out >= outend)
621 	        break;
622 	    if (xmlLittleEndian) {
623 		*out++ = c;
624 	    } else {
625 		tmp = (unsigned char *) out;
626 		*tmp = c ;
627 		*(tmp + 1) = c >> 8 ;
628 		out++;
629 	    }
630         }
631         else if (c < 0x110000) {
632             if (out+1 >= outend)
633 	        break;
634             c -= 0x10000;
635 	    if (xmlLittleEndian) {
636 		*out++ = 0xD800 | (c >> 10);
637 		*out++ = 0xDC00 | (c & 0x03FF);
638 	    } else {
639 		tmp1 = 0xD800 | (c >> 10);
640 		tmp = (unsigned char *) out;
641 		*tmp = (unsigned char) tmp1;
642 		*(tmp + 1) = tmp1 >> 8;
643 		out++;
644 
645 		tmp2 = 0xDC00 | (c & 0x03FF);
646 		tmp = (unsigned char *) out;
647 		*tmp  = (unsigned char) tmp2;
648 		*(tmp + 1) = tmp2 >> 8;
649 		out++;
650 	    }
651         }
652         else
653 	    break;
654 	processed = in;
655     }
656     *outlen = (out - outstart) * 2;
657     *inlen = processed - instart;
658     return(*outlen);
659 }
660 
661 /**
662  * UTF8ToUTF16:
663  * @outb:  a pointer to an array of bytes to store the result
664  * @outlen:  the length of @outb
665  * @in:  a pointer to an array of UTF-8 chars
666  * @inlen:  the length of @in
667  *
668  * Take a block of UTF-8 chars in and try to convert it to an UTF-16
669  * block of chars out.
670  *
671  * Returns the number of bytes written, or -1 if lack of space, or -2
672  *     if the transcoding failed.
673  */
674 static int
UTF8ToUTF16(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)675 UTF8ToUTF16(unsigned char* outb, int *outlen,
676             const unsigned char* in, int *inlen)
677 {
678     if (in == NULL) {
679 	/*
680 	 * initialization, add the Byte Order Mark for UTF-16LE
681 	 */
682         if (*outlen >= 2) {
683 	    outb[0] = 0xFF;
684 	    outb[1] = 0xFE;
685 	    *outlen = 2;
686 	    *inlen = 0;
687 #ifdef DEBUG_ENCODING
688             xmlGenericError(xmlGenericErrorContext,
689 		    "Added FFFE Byte Order Mark\n");
690 #endif
691 	    return(2);
692 	}
693 	*outlen = 0;
694 	*inlen = 0;
695 	return(0);
696     }
697     return (UTF8ToUTF16LE(outb, outlen, in, inlen));
698 }
699 #endif /* LIBXML_OUTPUT_ENABLED */
700 
701 /**
702  * UTF16BEToUTF8:
703  * @out:  a pointer to an array of bytes to store the result
704  * @outlen:  the length of @out
705  * @inb:  a pointer to an array of UTF-16 passed as a byte array
706  * @inlenb:  the length of @in in UTF-16 chars
707  *
708  * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
709  * block of chars out. This function assumes the endian property
710  * is the same between the native type of this machine and the
711  * inputed one.
712  *
713  * Returns the number of bytes written, or -1 if lack of space, or -2
714  *     if the transcoding fails (if *in is not a valid utf16 string)
715  * The value of *inlen after return is the number of octets consumed
716  *     if the return value is positive, else unpredictable.
717  */
718 static int
UTF16BEToUTF8(unsigned char * out,int * outlen,const unsigned char * inb,int * inlenb)719 UTF16BEToUTF8(unsigned char* out, int *outlen,
720             const unsigned char* inb, int *inlenb)
721 {
722     unsigned char* outstart = out;
723     const unsigned char* processed = inb;
724     unsigned char* outend = out + *outlen;
725     unsigned short* in = (unsigned short*) inb;
726     unsigned short* inend;
727     unsigned int c, d, inlen;
728     unsigned char *tmp;
729     int bits;
730 
731     if ((*inlenb % 2) == 1)
732         (*inlenb)--;
733     inlen = *inlenb / 2;
734     inend= in + inlen;
735     while (in < inend) {
736 	if (xmlLittleEndian) {
737 	    tmp = (unsigned char *) in;
738 	    c = *tmp++;
739 	    c = c << 8;
740 	    c = c | (unsigned int) *tmp;
741 	    in++;
742 	} else {
743 	    c= *in++;
744 	}
745         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
746 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
747 		*outlen = out - outstart;
748 		*inlenb = processed - inb;
749 	        return(-2);
750 	    }
751 	    if (xmlLittleEndian) {
752 		tmp = (unsigned char *) in;
753 		d = *tmp++;
754 		d = d << 8;
755 		d = d | (unsigned int) *tmp;
756 		in++;
757 	    } else {
758 		d= *in++;
759 	    }
760             if ((d & 0xFC00) == 0xDC00) {
761                 c &= 0x03FF;
762                 c <<= 10;
763                 c |= d & 0x03FF;
764                 c += 0x10000;
765             }
766             else {
767 		*outlen = out - outstart;
768 		*inlenb = processed - inb;
769 	        return(-2);
770 	    }
771         }
772 
773 	/* assertion: c is a single UTF-4 value */
774         if (out >= outend)
775 	    break;
776         if      (c <    0x80) {  *out++=  c;                bits= -6; }
777         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
778         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
779         else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
780 
781         for ( ; bits >= 0; bits-= 6) {
782             if (out >= outend)
783 	        break;
784             *out++= ((c >> bits) & 0x3F) | 0x80;
785         }
786 	processed = (const unsigned char*) in;
787     }
788     *outlen = out - outstart;
789     *inlenb = processed - inb;
790     return(*outlen);
791 }
792 
793 #ifdef LIBXML_OUTPUT_ENABLED
794 /**
795  * UTF8ToUTF16BE:
796  * @outb:  a pointer to an array of bytes to store the result
797  * @outlen:  the length of @outb
798  * @in:  a pointer to an array of UTF-8 chars
799  * @inlen:  the length of @in
800  *
801  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
802  * block of chars out.
803  *
804  * Returns the number of byte written, or -1 by lack of space, or -2
805  *     if the transcoding failed.
806  */
807 static int
UTF8ToUTF16BE(unsigned char * outb,int * outlen,const unsigned char * in,int * inlen)808 UTF8ToUTF16BE(unsigned char* outb, int *outlen,
809             const unsigned char* in, int *inlen)
810 {
811     unsigned short* out = (unsigned short*) outb;
812     const unsigned char* processed = in;
813     const unsigned char *const instart = in;
814     unsigned short* outstart= out;
815     unsigned short* outend;
816     const unsigned char* inend;
817     unsigned int c, d;
818     int trailing;
819     unsigned char *tmp;
820     unsigned short tmp1, tmp2;
821 
822     /* UTF-16BE has no BOM */
823     if ((outb == NULL) || (outlen == NULL) || (inlen == NULL)) return(-1);
824     if (in == NULL) {
825 	*outlen = 0;
826 	*inlen = 0;
827 	return(0);
828     }
829     inend= in + *inlen;
830     outend = out + (*outlen / 2);
831     while (in < inend) {
832       d= *in++;
833       if      (d < 0x80)  { c= d; trailing= 0; }
834       else if (d < 0xC0)  {
835           /* trailing byte in leading position */
836 	  *outlen = out - outstart;
837 	  *inlen = processed - instart;
838 	  return(-2);
839       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
840       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
841       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
842       else {
843           /* no chance for this in UTF-16 */
844 	  *outlen = out - outstart;
845 	  *inlen = processed - instart;
846 	  return(-2);
847       }
848 
849       if (inend - in < trailing) {
850           break;
851       }
852 
853       for ( ; trailing; trailing--) {
854           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
855           c <<= 6;
856           c |= d & 0x3F;
857       }
858 
859       /* assertion: c is a single UTF-4 value */
860         if (c < 0x10000) {
861             if (out >= outend)  break;
862 	    if (xmlLittleEndian) {
863 		tmp = (unsigned char *) out;
864 		*tmp = c >> 8;
865 		*(tmp + 1) = c;
866 		out++;
867 	    } else {
868 		*out++ = c;
869 	    }
870         }
871         else if (c < 0x110000) {
872             if (out+1 >= outend)  break;
873             c -= 0x10000;
874 	    if (xmlLittleEndian) {
875 		tmp1 = 0xD800 | (c >> 10);
876 		tmp = (unsigned char *) out;
877 		*tmp = tmp1 >> 8;
878 		*(tmp + 1) = (unsigned char) tmp1;
879 		out++;
880 
881 		tmp2 = 0xDC00 | (c & 0x03FF);
882 		tmp = (unsigned char *) out;
883 		*tmp = tmp2 >> 8;
884 		*(tmp + 1) = (unsigned char) tmp2;
885 		out++;
886 	    } else {
887 		*out++ = 0xD800 | (c >> 10);
888 		*out++ = 0xDC00 | (c & 0x03FF);
889 	    }
890         }
891         else
892 	    break;
893 	processed = in;
894     }
895     *outlen = (out - outstart) * 2;
896     *inlen = processed - instart;
897     return(*outlen);
898 }
899 #endif /* LIBXML_OUTPUT_ENABLED */
900 
901 /************************************************************************
902  *									*
903  *		Generic encoding handling routines			*
904  *									*
905  ************************************************************************/
906 
907 /**
908  * xmlDetectCharEncoding:
909  * @in:  a pointer to the first bytes of the XML entity, must be at least
910  *       2 bytes long (at least 4 if encoding is UTF4 variant).
911  * @len:  pointer to the length of the buffer
912  *
913  * Guess the encoding of the entity using the first bytes of the entity content
914  * according to the non-normative appendix F of the XML-1.0 recommendation.
915  *
916  * Returns one of the XML_CHAR_ENCODING_... values.
917  */
918 xmlCharEncoding
xmlDetectCharEncoding(const unsigned char * in,int len)919 xmlDetectCharEncoding(const unsigned char* in, int len)
920 {
921     if (in == NULL)
922         return(XML_CHAR_ENCODING_NONE);
923     if (len >= 4) {
924 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
925 	    (in[2] == 0x00) && (in[3] == 0x3C))
926 	    return(XML_CHAR_ENCODING_UCS4BE);
927 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
928 	    (in[2] == 0x00) && (in[3] == 0x00))
929 	    return(XML_CHAR_ENCODING_UCS4LE);
930 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
931 	    (in[2] == 0x3C) && (in[3] == 0x00))
932 	    return(XML_CHAR_ENCODING_UCS4_2143);
933 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
934 	    (in[2] == 0x00) && (in[3] == 0x00))
935 	    return(XML_CHAR_ENCODING_UCS4_3412);
936 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
937 	    (in[2] == 0xA7) && (in[3] == 0x94))
938 	    return(XML_CHAR_ENCODING_EBCDIC);
939 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
940 	    (in[2] == 0x78) && (in[3] == 0x6D))
941 	    return(XML_CHAR_ENCODING_UTF8);
942 	/*
943 	 * Although not part of the recommendation, we also
944 	 * attempt an "auto-recognition" of UTF-16LE and
945 	 * UTF-16BE encodings.
946 	 */
947 	if ((in[0] == 0x3C) && (in[1] == 0x00) &&
948 	    (in[2] == 0x3F) && (in[3] == 0x00))
949 	    return(XML_CHAR_ENCODING_UTF16LE);
950 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
951 	    (in[2] == 0x00) && (in[3] == 0x3F))
952 	    return(XML_CHAR_ENCODING_UTF16BE);
953     }
954     if (len >= 3) {
955 	/*
956 	 * Errata on XML-1.0 June 20 2001
957 	 * We now allow an UTF8 encoded BOM
958 	 */
959 	if ((in[0] == 0xEF) && (in[1] == 0xBB) &&
960 	    (in[2] == 0xBF))
961 	    return(XML_CHAR_ENCODING_UTF8);
962     }
963     /* For UTF-16 we can recognize by the BOM */
964     if (len >= 2) {
965 	if ((in[0] == 0xFE) && (in[1] == 0xFF))
966 	    return(XML_CHAR_ENCODING_UTF16BE);
967 	if ((in[0] == 0xFF) && (in[1] == 0xFE))
968 	    return(XML_CHAR_ENCODING_UTF16LE);
969     }
970     return(XML_CHAR_ENCODING_NONE);
971 }
972 
973 /**
974  * xmlCleanupEncodingAliases:
975  *
976  * Unregisters all aliases
977  */
978 void
xmlCleanupEncodingAliases(void)979 xmlCleanupEncodingAliases(void) {
980     int i;
981 
982     if (xmlCharEncodingAliases == NULL)
983 	return;
984 
985     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
986 	if (xmlCharEncodingAliases[i].name != NULL)
987 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
988 	if (xmlCharEncodingAliases[i].alias != NULL)
989 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
990     }
991     xmlCharEncodingAliasesNb = 0;
992     xmlCharEncodingAliasesMax = 0;
993     xmlFree(xmlCharEncodingAliases);
994     xmlCharEncodingAliases = NULL;
995 }
996 
997 /**
998  * xmlGetEncodingAlias:
999  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1000  *
1001  * Lookup an encoding name for the given alias.
1002  *
1003  * Returns NULL if not found, otherwise the original name
1004  */
1005 const char *
xmlGetEncodingAlias(const char * alias)1006 xmlGetEncodingAlias(const char *alias) {
1007     int i;
1008     char upper[100];
1009 
1010     if (alias == NULL)
1011 	return(NULL);
1012 
1013     if (xmlCharEncodingAliases == NULL)
1014 	return(NULL);
1015 
1016     for (i = 0;i < 99;i++) {
1017         upper[i] = toupper(alias[i]);
1018 	if (upper[i] == 0) break;
1019     }
1020     upper[i] = 0;
1021 
1022     /*
1023      * Walk down the list looking for a definition of the alias
1024      */
1025     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1026 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1027 	    return(xmlCharEncodingAliases[i].name);
1028 	}
1029     }
1030     return(NULL);
1031 }
1032 
1033 /**
1034  * xmlAddEncodingAlias:
1035  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1036  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1037  *
1038  * Registers an alias @alias for an encoding named @name. Existing alias
1039  * will be overwritten.
1040  *
1041  * Returns 0 in case of success, -1 in case of error
1042  */
1043 int
xmlAddEncodingAlias(const char * name,const char * alias)1044 xmlAddEncodingAlias(const char *name, const char *alias) {
1045     int i;
1046     char upper[100];
1047 
1048     if ((name == NULL) || (alias == NULL))
1049 	return(-1);
1050 
1051     for (i = 0;i < 99;i++) {
1052         upper[i] = toupper(alias[i]);
1053 	if (upper[i] == 0) break;
1054     }
1055     upper[i] = 0;
1056 
1057     if (xmlCharEncodingAliases == NULL) {
1058 	xmlCharEncodingAliasesNb = 0;
1059 	xmlCharEncodingAliasesMax = 20;
1060 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1061 	      xmlMalloc(xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1062 	if (xmlCharEncodingAliases == NULL)
1063 	    return(-1);
1064     } else if (xmlCharEncodingAliasesNb >= xmlCharEncodingAliasesMax) {
1065 	xmlCharEncodingAliasesMax *= 2;
1066 	xmlCharEncodingAliases = (xmlCharEncodingAliasPtr)
1067 	      xmlRealloc(xmlCharEncodingAliases,
1068 		         xmlCharEncodingAliasesMax * sizeof(xmlCharEncodingAlias));
1069     }
1070     /*
1071      * Walk down the list looking for a definition of the alias
1072      */
1073     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1074 	if (!strcmp(xmlCharEncodingAliases[i].alias, upper)) {
1075 	    /*
1076 	     * Replace the definition.
1077 	     */
1078 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1079 	    xmlCharEncodingAliases[i].name = xmlMemStrdup(name);
1080 	    return(0);
1081 	}
1082     }
1083     /*
1084      * Add the definition
1085      */
1086     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].name = xmlMemStrdup(name);
1087     xmlCharEncodingAliases[xmlCharEncodingAliasesNb].alias = xmlMemStrdup(upper);
1088     xmlCharEncodingAliasesNb++;
1089     return(0);
1090 }
1091 
1092 /**
1093  * xmlDelEncodingAlias:
1094  * @alias:  the alias name as parsed, in UTF-8 format (ASCII actually)
1095  *
1096  * Unregisters an encoding alias @alias
1097  *
1098  * Returns 0 in case of success, -1 in case of error
1099  */
1100 int
xmlDelEncodingAlias(const char * alias)1101 xmlDelEncodingAlias(const char *alias) {
1102     int i;
1103 
1104     if (alias == NULL)
1105 	return(-1);
1106 
1107     if (xmlCharEncodingAliases == NULL)
1108 	return(-1);
1109     /*
1110      * Walk down the list looking for a definition of the alias
1111      */
1112     for (i = 0;i < xmlCharEncodingAliasesNb;i++) {
1113 	if (!strcmp(xmlCharEncodingAliases[i].alias, alias)) {
1114 	    xmlFree((char *) xmlCharEncodingAliases[i].name);
1115 	    xmlFree((char *) xmlCharEncodingAliases[i].alias);
1116 	    xmlCharEncodingAliasesNb--;
1117 	    memmove(&xmlCharEncodingAliases[i], &xmlCharEncodingAliases[i + 1],
1118 		    sizeof(xmlCharEncodingAlias) * (xmlCharEncodingAliasesNb - i));
1119 	    return(0);
1120 	}
1121     }
1122     return(-1);
1123 }
1124 
1125 /**
1126  * xmlParseCharEncoding:
1127  * @name:  the encoding name as parsed, in UTF-8 format (ASCII actually)
1128  *
1129  * Compare the string to the encoding schemes already known. Note
1130  * that the comparison is case insensitive accordingly to the section
1131  * [XML] 4.3.3 Character Encoding in Entities.
1132  *
1133  * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
1134  * if not recognized.
1135  */
1136 xmlCharEncoding
xmlParseCharEncoding(const char * name)1137 xmlParseCharEncoding(const char* name)
1138 {
1139     const char *alias;
1140     char upper[500];
1141     int i;
1142 
1143     if (name == NULL)
1144 	return(XML_CHAR_ENCODING_NONE);
1145 
1146     /*
1147      * Do the alias resolution
1148      */
1149     alias = xmlGetEncodingAlias(name);
1150     if (alias != NULL)
1151 	name = alias;
1152 
1153     for (i = 0;i < 499;i++) {
1154         upper[i] = toupper(name[i]);
1155 	if (upper[i] == 0) break;
1156     }
1157     upper[i] = 0;
1158 
1159     if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
1160     if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
1161     if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
1162 
1163     /*
1164      * NOTE: if we were able to parse this, the endianness of UTF16 is
1165      *       already found and in use
1166      */
1167     if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
1168     if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
1169 
1170     if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1171     if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
1172     if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
1173 
1174     /*
1175      * NOTE: if we were able to parse this, the endianness of UCS4 is
1176      *       already found and in use
1177      */
1178     if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1179     if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
1180     if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
1181 
1182 
1183     if (!strcmp(upper,  "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
1184     if (!strcmp(upper,  "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
1185     if (!strcmp(upper,  "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
1186 
1187     if (!strcmp(upper,  "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
1188     if (!strcmp(upper,  "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
1189     if (!strcmp(upper,  "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
1190 
1191     if (!strcmp(upper,  "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
1192     if (!strcmp(upper,  "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
1193     if (!strcmp(upper,  "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
1194     if (!strcmp(upper,  "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
1195     if (!strcmp(upper,  "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
1196     if (!strcmp(upper,  "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
1197     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
1198 
1199     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
1200     if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
1201     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
1202 
1203 #ifdef DEBUG_ENCODING
1204     xmlGenericError(xmlGenericErrorContext, "Unknown encoding %s\n", name);
1205 #endif
1206     return(XML_CHAR_ENCODING_ERROR);
1207 }
1208 
1209 /**
1210  * xmlGetCharEncodingName:
1211  * @enc:  the encoding
1212  *
1213  * The "canonical" name for XML encoding.
1214  * C.f. http://www.w3.org/TR/REC-xml#charencoding
1215  * Section 4.3.3  Character Encoding in Entities
1216  *
1217  * Returns the canonical name for the given encoding
1218  */
1219 
1220 const char*
xmlGetCharEncodingName(xmlCharEncoding enc)1221 xmlGetCharEncodingName(xmlCharEncoding enc) {
1222     switch (enc) {
1223         case XML_CHAR_ENCODING_ERROR:
1224 	    return(NULL);
1225         case XML_CHAR_ENCODING_NONE:
1226 	    return(NULL);
1227         case XML_CHAR_ENCODING_UTF8:
1228 	    return("UTF-8");
1229         case XML_CHAR_ENCODING_UTF16LE:
1230 	    return("UTF-16");
1231         case XML_CHAR_ENCODING_UTF16BE:
1232 	    return("UTF-16");
1233         case XML_CHAR_ENCODING_EBCDIC:
1234             return("EBCDIC");
1235         case XML_CHAR_ENCODING_UCS4LE:
1236             return("ISO-10646-UCS-4");
1237         case XML_CHAR_ENCODING_UCS4BE:
1238             return("ISO-10646-UCS-4");
1239         case XML_CHAR_ENCODING_UCS4_2143:
1240             return("ISO-10646-UCS-4");
1241         case XML_CHAR_ENCODING_UCS4_3412:
1242             return("ISO-10646-UCS-4");
1243         case XML_CHAR_ENCODING_UCS2:
1244             return("ISO-10646-UCS-2");
1245         case XML_CHAR_ENCODING_8859_1:
1246 	    return("ISO-8859-1");
1247         case XML_CHAR_ENCODING_8859_2:
1248 	    return("ISO-8859-2");
1249         case XML_CHAR_ENCODING_8859_3:
1250 	    return("ISO-8859-3");
1251         case XML_CHAR_ENCODING_8859_4:
1252 	    return("ISO-8859-4");
1253         case XML_CHAR_ENCODING_8859_5:
1254 	    return("ISO-8859-5");
1255         case XML_CHAR_ENCODING_8859_6:
1256 	    return("ISO-8859-6");
1257         case XML_CHAR_ENCODING_8859_7:
1258 	    return("ISO-8859-7");
1259         case XML_CHAR_ENCODING_8859_8:
1260 	    return("ISO-8859-8");
1261         case XML_CHAR_ENCODING_8859_9:
1262 	    return("ISO-8859-9");
1263         case XML_CHAR_ENCODING_2022_JP:
1264             return("ISO-2022-JP");
1265         case XML_CHAR_ENCODING_SHIFT_JIS:
1266             return("Shift-JIS");
1267         case XML_CHAR_ENCODING_EUC_JP:
1268             return("EUC-JP");
1269 	case XML_CHAR_ENCODING_ASCII:
1270 	    return(NULL);
1271     }
1272     return(NULL);
1273 }
1274 
1275 /************************************************************************
1276  *									*
1277  *			Char encoding handlers				*
1278  *									*
1279  ************************************************************************/
1280 
1281 
1282 /* the size should be growable, but it's not a big deal ... */
1283 #define MAX_ENCODING_HANDLERS 50
1284 static xmlCharEncodingHandlerPtr *handlers = NULL;
1285 static int nbCharEncodingHandler = 0;
1286 
1287 /*
1288  * The default is UTF-8 for XML, that's also the default used for the
1289  * parser internals, so the default encoding handler is NULL
1290  */
1291 
1292 static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
1293 
1294 /**
1295  * xmlNewCharEncodingHandler:
1296  * @name:  the encoding name, in UTF-8 format (ASCII actually)
1297  * @input:  the xmlCharEncodingInputFunc to read that encoding
1298  * @output:  the xmlCharEncodingOutputFunc to write that encoding
1299  *
1300  * Create and registers an xmlCharEncodingHandler.
1301  *
1302  * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
1303  */
1304 xmlCharEncodingHandlerPtr
xmlNewCharEncodingHandler(const char * name,xmlCharEncodingInputFunc input,xmlCharEncodingOutputFunc output)1305 xmlNewCharEncodingHandler(const char *name,
1306                           xmlCharEncodingInputFunc input,
1307                           xmlCharEncodingOutputFunc output) {
1308     xmlCharEncodingHandlerPtr handler;
1309     const char *alias;
1310     char upper[500];
1311     int i;
1312     char *up = NULL;
1313 
1314     /*
1315      * Do the alias resolution
1316      */
1317     alias = xmlGetEncodingAlias(name);
1318     if (alias != NULL)
1319 	name = alias;
1320 
1321     /*
1322      * Keep only the uppercase version of the encoding.
1323      */
1324     if (name == NULL) {
1325         xmlEncodingErr(XML_I18N_NO_NAME,
1326 		       "xmlNewCharEncodingHandler : no name !\n", NULL);
1327 	return(NULL);
1328     }
1329     for (i = 0;i < 499;i++) {
1330         upper[i] = toupper(name[i]);
1331 	if (upper[i] == 0) break;
1332     }
1333     upper[i] = 0;
1334     up = xmlMemStrdup(upper);
1335     if (up == NULL) {
1336         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1337 	return(NULL);
1338     }
1339 
1340     /*
1341      * allocate and fill-up an handler block.
1342      */
1343     handler = (xmlCharEncodingHandlerPtr)
1344               xmlMalloc(sizeof(xmlCharEncodingHandler));
1345     if (handler == NULL) {
1346         xmlFree(up);
1347         xmlEncodingErrMemory("xmlNewCharEncodingHandler : out of memory !\n");
1348 	return(NULL);
1349     }
1350     handler->input = input;
1351     handler->output = output;
1352     handler->name = up;
1353 
1354 #ifdef LIBXML_ICONV_ENABLED
1355     handler->iconv_in = NULL;
1356     handler->iconv_out = NULL;
1357 #endif
1358 #ifdef LIBXML_ICU_ENABLED
1359     handler->uconv_in = NULL;
1360     handler->uconv_out = NULL;
1361 #endif
1362 
1363     /*
1364      * registers and returns the handler.
1365      */
1366     xmlRegisterCharEncodingHandler(handler);
1367 #ifdef DEBUG_ENCODING
1368     xmlGenericError(xmlGenericErrorContext,
1369 	    "Registered encoding handler for %s\n", name);
1370 #endif
1371     return(handler);
1372 }
1373 
1374 /**
1375  * xmlInitCharEncodingHandlers:
1376  *
1377  * Initialize the char encoding support, it registers the default
1378  * encoding supported.
1379  * NOTE: while public, this function usually doesn't need to be called
1380  *       in normal processing.
1381  */
1382 void
xmlInitCharEncodingHandlers(void)1383 xmlInitCharEncodingHandlers(void) {
1384     unsigned short int tst = 0x1234;
1385     unsigned char *ptr = (unsigned char *) &tst;
1386 
1387     if (handlers != NULL) return;
1388 
1389     handlers = (xmlCharEncodingHandlerPtr *)
1390         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1391 
1392     if (*ptr == 0x12) xmlLittleEndian = 0;
1393     else if (*ptr == 0x34) xmlLittleEndian = 1;
1394     else {
1395         xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1396 	               "Odd problem at endianness detection\n", NULL);
1397     }
1398 
1399     if (handlers == NULL) {
1400         xmlEncodingErrMemory("xmlInitCharEncodingHandlers : out of memory !\n");
1401 	return;
1402     }
1403     xmlNewCharEncodingHandler("UTF-8", UTF8ToUTF8, UTF8ToUTF8);
1404 #ifdef LIBXML_OUTPUT_ENABLED
1405     xmlUTF16LEHandler =
1406           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
1407     xmlUTF16BEHandler =
1408           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
1409     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
1410     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1411     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
1412     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
1413 #ifdef LIBXML_HTML_ENABLED
1414     xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
1415 #endif
1416 #else
1417     xmlUTF16LEHandler =
1418           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
1419     xmlUTF16BEHandler =
1420           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
1421     xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
1422     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
1423     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
1424     xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
1425 #endif /* LIBXML_OUTPUT_ENABLED */
1426 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
1427 #ifdef LIBXML_ISO8859X_ENABLED
1428     xmlRegisterCharEncodingHandlersISO8859x ();
1429 #endif
1430 #endif
1431 
1432 }
1433 
1434 /**
1435  * xmlCleanupCharEncodingHandlers:
1436  *
1437  * Cleanup the memory allocated for the char encoding support, it
1438  * unregisters all the encoding handlers and the aliases.
1439  */
1440 void
xmlCleanupCharEncodingHandlers(void)1441 xmlCleanupCharEncodingHandlers(void) {
1442     xmlCleanupEncodingAliases();
1443 
1444     if (handlers == NULL) return;
1445 
1446     for (;nbCharEncodingHandler > 0;) {
1447         nbCharEncodingHandler--;
1448 	if (handlers[nbCharEncodingHandler] != NULL) {
1449 	    if (handlers[nbCharEncodingHandler]->name != NULL)
1450 		xmlFree(handlers[nbCharEncodingHandler]->name);
1451 	    xmlFree(handlers[nbCharEncodingHandler]);
1452 	}
1453     }
1454     xmlFree(handlers);
1455     handlers = NULL;
1456     nbCharEncodingHandler = 0;
1457     xmlDefaultCharEncodingHandler = NULL;
1458 }
1459 
1460 /**
1461  * xmlRegisterCharEncodingHandler:
1462  * @handler:  the xmlCharEncodingHandlerPtr handler block
1463  *
1464  * Register the char encoding handler, surprising, isn't it ?
1465  */
1466 void
xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler)1467 xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
1468     if (handlers == NULL) xmlInitCharEncodingHandlers();
1469     if (handler == NULL) {
1470         xmlEncodingErr(XML_I18N_NO_HANDLER,
1471 		"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
1472 	return;
1473     }
1474 
1475     if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
1476         xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
1477 	"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
1478 	               "MAX_ENCODING_HANDLERS");
1479 	return;
1480     }
1481     handlers[nbCharEncodingHandler++] = handler;
1482 }
1483 
1484 /**
1485  * xmlGetCharEncodingHandler:
1486  * @enc:  an xmlCharEncoding value.
1487  *
1488  * Search in the registered set the handler able to read/write that encoding.
1489  *
1490  * Returns the handler or NULL if not found
1491  */
1492 xmlCharEncodingHandlerPtr
xmlGetCharEncodingHandler(xmlCharEncoding enc)1493 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
1494     xmlCharEncodingHandlerPtr handler;
1495 
1496     if (handlers == NULL) xmlInitCharEncodingHandlers();
1497     switch (enc) {
1498         case XML_CHAR_ENCODING_ERROR:
1499 	    return(NULL);
1500         case XML_CHAR_ENCODING_NONE:
1501 	    return(NULL);
1502         case XML_CHAR_ENCODING_UTF8:
1503 	    return(NULL);
1504         case XML_CHAR_ENCODING_UTF16LE:
1505 	    return(xmlUTF16LEHandler);
1506         case XML_CHAR_ENCODING_UTF16BE:
1507 	    return(xmlUTF16BEHandler);
1508         case XML_CHAR_ENCODING_EBCDIC:
1509             handler = xmlFindCharEncodingHandler("EBCDIC");
1510             if (handler != NULL) return(handler);
1511             handler = xmlFindCharEncodingHandler("ebcdic");
1512             if (handler != NULL) return(handler);
1513 	    break;
1514         case XML_CHAR_ENCODING_UCS4BE:
1515             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1516             if (handler != NULL) return(handler);
1517             handler = xmlFindCharEncodingHandler("UCS-4");
1518             if (handler != NULL) return(handler);
1519             handler = xmlFindCharEncodingHandler("UCS4");
1520             if (handler != NULL) return(handler);
1521 	    break;
1522         case XML_CHAR_ENCODING_UCS4LE:
1523             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
1524             if (handler != NULL) return(handler);
1525             handler = xmlFindCharEncodingHandler("UCS-4");
1526             if (handler != NULL) return(handler);
1527             handler = xmlFindCharEncodingHandler("UCS4");
1528             if (handler != NULL) return(handler);
1529 	    break;
1530         case XML_CHAR_ENCODING_UCS4_2143:
1531 	    break;
1532         case XML_CHAR_ENCODING_UCS4_3412:
1533 	    break;
1534         case XML_CHAR_ENCODING_UCS2:
1535             handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
1536             if (handler != NULL) return(handler);
1537             handler = xmlFindCharEncodingHandler("UCS-2");
1538             if (handler != NULL) return(handler);
1539             handler = xmlFindCharEncodingHandler("UCS2");
1540             if (handler != NULL) return(handler);
1541 	    break;
1542 
1543 	    /*
1544 	     * We used to keep ISO Latin encodings native in the
1545 	     * generated data. This led to so many problems that
1546 	     * this has been removed. One can still change this
1547 	     * back by registering no-ops encoders for those
1548 	     */
1549         case XML_CHAR_ENCODING_8859_1:
1550 	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
1551 	    if (handler != NULL) return(handler);
1552 	    break;
1553         case XML_CHAR_ENCODING_8859_2:
1554 	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
1555 	    if (handler != NULL) return(handler);
1556 	    break;
1557         case XML_CHAR_ENCODING_8859_3:
1558 	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
1559 	    if (handler != NULL) return(handler);
1560 	    break;
1561         case XML_CHAR_ENCODING_8859_4:
1562 	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
1563 	    if (handler != NULL) return(handler);
1564 	    break;
1565         case XML_CHAR_ENCODING_8859_5:
1566 	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
1567 	    if (handler != NULL) return(handler);
1568 	    break;
1569         case XML_CHAR_ENCODING_8859_6:
1570 	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
1571 	    if (handler != NULL) return(handler);
1572 	    break;
1573         case XML_CHAR_ENCODING_8859_7:
1574 	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
1575 	    if (handler != NULL) return(handler);
1576 	    break;
1577         case XML_CHAR_ENCODING_8859_8:
1578 	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
1579 	    if (handler != NULL) return(handler);
1580 	    break;
1581         case XML_CHAR_ENCODING_8859_9:
1582 	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
1583 	    if (handler != NULL) return(handler);
1584 	    break;
1585 
1586 
1587         case XML_CHAR_ENCODING_2022_JP:
1588             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
1589             if (handler != NULL) return(handler);
1590 	    break;
1591         case XML_CHAR_ENCODING_SHIFT_JIS:
1592             handler = xmlFindCharEncodingHandler("SHIFT-JIS");
1593             if (handler != NULL) return(handler);
1594             handler = xmlFindCharEncodingHandler("SHIFT_JIS");
1595             if (handler != NULL) return(handler);
1596             handler = xmlFindCharEncodingHandler("Shift_JIS");
1597             if (handler != NULL) return(handler);
1598 	    break;
1599         case XML_CHAR_ENCODING_EUC_JP:
1600             handler = xmlFindCharEncodingHandler("EUC-JP");
1601             if (handler != NULL) return(handler);
1602 	    break;
1603 	default:
1604 	    break;
1605     }
1606 
1607 #ifdef DEBUG_ENCODING
1608     xmlGenericError(xmlGenericErrorContext,
1609 	    "No handler found for encoding %d\n", enc);
1610 #endif
1611     return(NULL);
1612 }
1613 
1614 /**
1615  * xmlFindCharEncodingHandler:
1616  * @name:  a string describing the char encoding.
1617  *
1618  * Search in the registered set the handler able to read/write that encoding.
1619  *
1620  * Returns the handler or NULL if not found
1621  */
1622 xmlCharEncodingHandlerPtr
xmlFindCharEncodingHandler(const char * name)1623 xmlFindCharEncodingHandler(const char *name) {
1624     const char *nalias;
1625     const char *norig;
1626     xmlCharEncoding alias;
1627 #ifdef LIBXML_ICONV_ENABLED
1628     xmlCharEncodingHandlerPtr enc;
1629     iconv_t icv_in, icv_out;
1630 #endif /* LIBXML_ICONV_ENABLED */
1631 #ifdef LIBXML_ICU_ENABLED
1632     xmlCharEncodingHandlerPtr enc;
1633     uconv_t *ucv_in, *ucv_out;
1634 #endif /* LIBXML_ICU_ENABLED */
1635     char upper[100];
1636     int i;
1637 
1638     if (handlers == NULL) xmlInitCharEncodingHandlers();
1639     if (name == NULL) return(xmlDefaultCharEncodingHandler);
1640     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
1641 
1642     /*
1643      * Do the alias resolution
1644      */
1645     norig = name;
1646     nalias = xmlGetEncodingAlias(name);
1647     if (nalias != NULL)
1648 	name = nalias;
1649 
1650     /*
1651      * Check first for directly registered encoding names
1652      */
1653     for (i = 0;i < 99;i++) {
1654         upper[i] = toupper(name[i]);
1655 	if (upper[i] == 0) break;
1656     }
1657     upper[i] = 0;
1658 
1659     for (i = 0;i < nbCharEncodingHandler; i++)
1660         if (!strcmp(upper, handlers[i]->name)) {
1661 #ifdef DEBUG_ENCODING
1662             xmlGenericError(xmlGenericErrorContext,
1663 		    "Found registered handler for encoding %s\n", name);
1664 #endif
1665 	    return(handlers[i]);
1666 	}
1667 
1668 #ifdef LIBXML_ICONV_ENABLED
1669     /* check whether iconv can handle this */
1670     icv_in = iconv_open("UTF-8", name);
1671     icv_out = iconv_open(name, "UTF-8");
1672     if (icv_in == (iconv_t) -1) {
1673         icv_in = iconv_open("UTF-8", upper);
1674     }
1675     if (icv_out == (iconv_t) -1) {
1676 	icv_out = iconv_open(upper, "UTF-8");
1677     }
1678     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
1679 	    enc = (xmlCharEncodingHandlerPtr)
1680 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1681 	    if (enc == NULL) {
1682 	        iconv_close(icv_in);
1683 	        iconv_close(icv_out);
1684 		return(NULL);
1685 	    }
1686 	    enc->name = xmlMemStrdup(name);
1687 	    enc->input = NULL;
1688 	    enc->output = NULL;
1689 	    enc->iconv_in = icv_in;
1690 	    enc->iconv_out = icv_out;
1691 #ifdef DEBUG_ENCODING
1692             xmlGenericError(xmlGenericErrorContext,
1693 		    "Found iconv handler for encoding %s\n", name);
1694 #endif
1695 	    return enc;
1696     } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
1697 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1698 		    "iconv : problems with filters for '%s'\n", name);
1699     }
1700 #endif /* LIBXML_ICONV_ENABLED */
1701 #ifdef LIBXML_ICU_ENABLED
1702     /* check whether icu can handle this */
1703     ucv_in = openIcuConverter(name, 1);
1704     ucv_out = openIcuConverter(name, 0);
1705     if (ucv_in != NULL && ucv_out != NULL) {
1706 	    enc = (xmlCharEncodingHandlerPtr)
1707 	          xmlMalloc(sizeof(xmlCharEncodingHandler));
1708 	    if (enc == NULL) {
1709                 closeIcuConverter(ucv_in);
1710                 closeIcuConverter(ucv_out);
1711 		return(NULL);
1712 	    }
1713 	    enc->name = xmlMemStrdup(name);
1714 	    enc->input = NULL;
1715 	    enc->output = NULL;
1716 	    enc->uconv_in = ucv_in;
1717 	    enc->uconv_out = ucv_out;
1718 #ifdef DEBUG_ENCODING
1719             xmlGenericError(xmlGenericErrorContext,
1720 		    "Found ICU converter handler for encoding %s\n", name);
1721 #endif
1722 	    return enc;
1723     } else if (ucv_in != NULL || ucv_out != NULL) {
1724             closeIcuConverter(ucv_in);
1725             closeIcuConverter(ucv_out);
1726 	    xmlEncodingErr(XML_ERR_INTERNAL_ERROR,
1727 		    "ICU converter : problems with filters for '%s'\n", name);
1728     }
1729 #endif /* LIBXML_ICU_ENABLED */
1730 
1731 #ifdef DEBUG_ENCODING
1732     xmlGenericError(xmlGenericErrorContext,
1733 	    "No handler found for encoding %s\n", name);
1734 #endif
1735 
1736     /*
1737      * Fallback using the canonical names
1738      */
1739     alias = xmlParseCharEncoding(norig);
1740     if (alias != XML_CHAR_ENCODING_ERROR) {
1741         const char* canon;
1742         canon = xmlGetCharEncodingName(alias);
1743         if ((canon != NULL) && (strcmp(name, canon))) {
1744 	    return(xmlFindCharEncodingHandler(canon));
1745         }
1746     }
1747 
1748     /* If "none of the above", give up */
1749     return(NULL);
1750 }
1751 
1752 /************************************************************************
1753  *									*
1754  *		ICONV based generic conversion functions		*
1755  *									*
1756  ************************************************************************/
1757 
1758 #ifdef LIBXML_ICONV_ENABLED
1759 /**
1760  * xmlIconvWrapper:
1761  * @cd:		iconv converter data structure
1762  * @out:  a pointer to an array of bytes to store the result
1763  * @outlen:  the length of @out
1764  * @in:  a pointer to an array of ISO Latin 1 chars
1765  * @inlen:  the length of @in
1766  *
1767  * Returns 0 if success, or
1768  *     -1 by lack of space, or
1769  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1770  *        the result of transformation can't fit into the encoding we want), or
1771  *     -3 if there the last byte can't form a single output char.
1772  *
1773  * The value of @inlen after return is the number of octets consumed
1774  *     as the return value is positive, else unpredictable.
1775  * The value of @outlen after return is the number of ocetes consumed.
1776  */
1777 static int
xmlIconvWrapper(iconv_t cd,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1778 xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
1779                 const unsigned char *in, int *inlen) {
1780     size_t icv_inlen, icv_outlen;
1781     const char *icv_in = (const char *) in;
1782     char *icv_out = (char *) out;
1783     int ret;
1784 
1785     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1786         if (outlen != NULL) *outlen = 0;
1787         return(-1);
1788     }
1789     icv_inlen = *inlen;
1790     icv_outlen = *outlen;
1791     ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
1792     *inlen -= icv_inlen;
1793     *outlen -= icv_outlen;
1794     if ((icv_inlen != 0) || (ret == -1)) {
1795 #ifdef EILSEQ
1796         if (errno == EILSEQ) {
1797             return -2;
1798         } else
1799 #endif
1800 #ifdef E2BIG
1801         if (errno == E2BIG) {
1802             return -1;
1803         } else
1804 #endif
1805 #ifdef EINVAL
1806         if (errno == EINVAL) {
1807             return -3;
1808         } else
1809 #endif
1810         {
1811             return -3;
1812         }
1813     }
1814     return 0;
1815 }
1816 #endif /* LIBXML_ICONV_ENABLED */
1817 
1818 /************************************************************************
1819  *									*
1820  *		ICU based generic conversion functions	         	*
1821  *									*
1822  ************************************************************************/
1823 
1824 #ifdef LIBXML_ICU_ENABLED
1825 /**
1826  * xmlUconvWrapper:
1827  * @cd: ICU uconverter data structure
1828  * @toUnicode : non-zero if toUnicode. 0 otherwise.
1829  * @out:  a pointer to an array of bytes to store the result
1830  * @outlen:  the length of @out
1831  * @in:  a pointer to an array of ISO Latin 1 chars
1832  * @inlen:  the length of @in
1833  *
1834  * Returns 0 if success, or
1835  *     -1 by lack of space, or
1836  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1837  *        the result of transformation can't fit into the encoding we want), or
1838  *     -3 if there the last byte can't form a single output char.
1839  *
1840  * The value of @inlen after return is the number of octets consumed
1841  *     as the return value is positive, else unpredictable.
1842  * The value of @outlen after return is the number of ocetes consumed.
1843  */
1844 static int
xmlUconvWrapper(uconv_t * cd,int toUnicode,unsigned char * out,int * outlen,const unsigned char * in,int * inlen)1845 xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
1846                 const unsigned char *in, int *inlen) {
1847     const char *ucv_in = (const char *) in;
1848     char *ucv_out = (char *) out;
1849     UErrorCode err = U_ZERO_ERROR;
1850 
1851     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
1852         if (outlen != NULL) *outlen = 0;
1853         return(-1);
1854     }
1855 
1856     /*
1857      * TODO(jungshik)
1858      * 1. is ucnv_convert(To|From)Algorithmic better?
1859      * 2. had we better use an explicit pivot buffer?
1860      * 3. error returned comes from 'fromUnicode' only even
1861      *    when toUnicode is true !
1862      */
1863     if (toUnicode) {
1864         /* encoding => UTF-16 => UTF-8 */
1865         ucnv_convertEx(cd->utf8, cd->uconv, &ucv_out, ucv_out + *outlen,
1866                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1867                        0, TRUE, &err);
1868     } else {
1869         /* UTF-8 => UTF-16 => encoding */
1870         ucnv_convertEx(cd->uconv, cd->utf8, &ucv_out, ucv_out + *outlen,
1871                        &ucv_in, ucv_in + *inlen, NULL, NULL, NULL, NULL,
1872                        0, TRUE, &err);
1873     }
1874     *inlen = ucv_in - (const char*) in;
1875     *outlen = ucv_out - (char *) out;
1876     if (U_SUCCESS(err))
1877         return 0;
1878     if (err == U_BUFFER_OVERFLOW_ERROR)
1879         return -1;
1880     if (err == U_INVALID_CHAR_FOUND || err == U_ILLEGAL_CHAR_FOUND)
1881         return -2;
1882     /* if (err == U_TRUNCATED_CHAR_FOUND) */
1883     return -3;
1884 }
1885 #endif /* LIBXML_ICU_ENABLED */
1886 
1887 /************************************************************************
1888  *									*
1889  *		The real API used by libxml for on-the-fly conversion	*
1890  *									*
1891  ************************************************************************/
1892 
1893 /**
1894  * xmlCharEncFirstLine:
1895  * @handler:	char enconding transformation data structure
1896  * @out:  an xmlBuffer for the output.
1897  * @in:  an xmlBuffer for the input
1898  *
1899  * Front-end for the encoding handler input function, but handle only
1900  * the very first line, i.e. limit itself to 45 chars.
1901  *
1902  * Returns the number of byte written if success, or
1903  *     -1 general error
1904  *     -2 if the transcoding fails (for *in is not valid utf8 string or
1905  *        the result of transformation can't fit into the encoding we want), or
1906  */
1907 int
xmlCharEncFirstLine(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)1908 xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
1909                  xmlBufferPtr in) {
1910     int ret = -2;
1911     int written;
1912     int toconv;
1913 
1914     if (handler == NULL) return(-1);
1915     if (out == NULL) return(-1);
1916     if (in == NULL) return(-1);
1917 
1918     /* calculate space available */
1919     written = out->size - out->use;
1920     toconv = in->use;
1921     /*
1922      * echo '<?xml version="1.0" encoding="UCS4"?>' | wc -c => 38
1923      * 45 chars should be sufficient to reach the end of the encoding
1924      * declaration without going too far inside the document content.
1925      * on UTF-16 this means 90bytes, on UCS4 this means 180
1926      */
1927     if (toconv > 180)
1928 	toconv  = 180;
1929     if (toconv * 2 >= written) {
1930         xmlBufferGrow(out, toconv);
1931 	written = out->size - out->use - 1;
1932     }
1933 
1934     if (handler->input != NULL) {
1935 	ret = handler->input(&out->content[out->use], &written,
1936 	                     in->content, &toconv);
1937 	xmlBufferShrink(in, toconv);
1938 	out->use += written;
1939 	out->content[out->use] = 0;
1940     }
1941 #ifdef LIBXML_ICONV_ENABLED
1942     else if (handler->iconv_in != NULL) {
1943 	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
1944 	                      &written, in->content, &toconv);
1945 	xmlBufferShrink(in, toconv);
1946 	out->use += written;
1947 	out->content[out->use] = 0;
1948 	if (ret == -1) ret = -3;
1949     }
1950 #endif /* LIBXML_ICONV_ENABLED */
1951 #ifdef LIBXML_ICU_ENABLED
1952     else if (handler->uconv_in != NULL) {
1953 	ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
1954 	                      &written, in->content, &toconv);
1955 	xmlBufferShrink(in, toconv);
1956 	out->use += written;
1957 	out->content[out->use] = 0;
1958 	if (ret == -1) ret = -3;
1959     }
1960 #endif /* LIBXML_ICU_ENABLED */
1961 #ifdef DEBUG_ENCODING
1962     switch (ret) {
1963         case 0:
1964 	    xmlGenericError(xmlGenericErrorContext,
1965 		    "converted %d bytes to %d bytes of input\n",
1966 	            toconv, written);
1967 	    break;
1968         case -1:
1969 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1970 	            toconv, written, in->use);
1971 	    break;
1972         case -2:
1973 	    xmlGenericError(xmlGenericErrorContext,
1974 		    "input conversion failed due to input error\n");
1975 	    break;
1976         case -3:
1977 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of input, %d left\n",
1978 	            toconv, written, in->use);
1979 	    break;
1980 	default:
1981 	    xmlGenericError(xmlGenericErrorContext,"Unknown input conversion failed %d\n", ret);
1982     }
1983 #endif /* DEBUG_ENCODING */
1984     /*
1985      * Ignore when input buffer is not on a boundary
1986      */
1987     if (ret == -3) ret = 0;
1988     if (ret == -1) ret = 0;
1989     return(ret);
1990 }
1991 
1992 /**
1993  * xmlCharEncInFunc:
1994  * @handler:	char encoding transformation data structure
1995  * @out:  an xmlBuffer for the output.
1996  * @in:  an xmlBuffer for the input
1997  *
1998  * Generic front-end for the encoding handler input function
1999  *
2000  * Returns the number of byte written if success, or
2001  *     -1 general error
2002  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2003  *        the result of transformation can't fit into the encoding we want), or
2004  */
2005 int
xmlCharEncInFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2006 xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
2007                  xmlBufferPtr in)
2008 {
2009     int ret = -2;
2010     int written;
2011     int toconv;
2012 
2013     if (handler == NULL)
2014         return (-1);
2015     if (out == NULL)
2016         return (-1);
2017     if (in == NULL)
2018         return (-1);
2019 
2020     toconv = in->use;
2021     if (toconv == 0)
2022         return (0);
2023     written = out->size - out->use;
2024     if (toconv * 2 >= written) {
2025         xmlBufferGrow(out, out->size + toconv * 2);
2026         written = out->size - out->use - 1;
2027     }
2028     if (handler->input != NULL) {
2029         ret = handler->input(&out->content[out->use], &written,
2030                              in->content, &toconv);
2031         xmlBufferShrink(in, toconv);
2032         out->use += written;
2033         out->content[out->use] = 0;
2034     }
2035 #ifdef LIBXML_ICONV_ENABLED
2036     else if (handler->iconv_in != NULL) {
2037         ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
2038                               &written, in->content, &toconv);
2039         xmlBufferShrink(in, toconv);
2040         out->use += written;
2041         out->content[out->use] = 0;
2042         if (ret == -1)
2043             ret = -3;
2044     }
2045 #endif /* LIBXML_ICONV_ENABLED */
2046 #ifdef LIBXML_ICU_ENABLED
2047     else if (handler->uconv_in != NULL) {
2048         ret = xmlUconvWrapper(handler->uconv_in, 1, &out->content[out->use],
2049                               &written, in->content, &toconv);
2050         xmlBufferShrink(in, toconv);
2051         out->use += written;
2052         out->content[out->use] = 0;
2053         if (ret == -1)
2054             ret = -3;
2055     }
2056 #endif /* LIBXML_ICU_ENABLED */
2057     switch (ret) {
2058         case 0:
2059 #ifdef DEBUG_ENCODING
2060             xmlGenericError(xmlGenericErrorContext,
2061                             "converted %d bytes to %d bytes of input\n",
2062                             toconv, written);
2063 #endif
2064             break;
2065         case -1:
2066 #ifdef DEBUG_ENCODING
2067             xmlGenericError(xmlGenericErrorContext,
2068                          "converted %d bytes to %d bytes of input, %d left\n",
2069                             toconv, written, in->use);
2070 #endif
2071             break;
2072         case -3:
2073 #ifdef DEBUG_ENCODING
2074             xmlGenericError(xmlGenericErrorContext,
2075                         "converted %d bytes to %d bytes of input, %d left\n",
2076                             toconv, written, in->use);
2077 #endif
2078             break;
2079         case -2: {
2080             char buf[50];
2081 
2082 	    snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2083 		     in->content[0], in->content[1],
2084 		     in->content[2], in->content[3]);
2085 	    buf[49] = 0;
2086 	    xmlEncodingErr(XML_I18N_CONV_FAILED,
2087 		    "input conversion failed due to input error, bytes %s\n",
2088 		           buf);
2089         }
2090     }
2091     /*
2092      * Ignore when input buffer is not on a boundary
2093      */
2094     if (ret == -3)
2095         ret = 0;
2096     return (written? written : ret);
2097 }
2098 
2099 /**
2100  * xmlCharEncOutFunc:
2101  * @handler:	char enconding transformation data structure
2102  * @out:  an xmlBuffer for the output.
2103  * @in:  an xmlBuffer for the input
2104  *
2105  * Generic front-end for the encoding handler output function
2106  * a first call with @in == NULL has to be made firs to initiate the
2107  * output in case of non-stateless encoding needing to initiate their
2108  * state or the output (like the BOM in UTF16).
2109  * In case of UTF8 sequence conversion errors for the given encoder,
2110  * the content will be automatically remapped to a CharRef sequence.
2111  *
2112  * Returns the number of byte written if success, or
2113  *     -1 general error
2114  *     -2 if the transcoding fails (for *in is not valid utf8 string or
2115  *        the result of transformation can't fit into the encoding we want), or
2116  */
2117 int
xmlCharEncOutFunc(xmlCharEncodingHandler * handler,xmlBufferPtr out,xmlBufferPtr in)2118 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
2119                   xmlBufferPtr in) {
2120     int ret = -2;
2121     int written;
2122     int writtentot = 0;
2123     int toconv;
2124     int output = 0;
2125 
2126     if (handler == NULL) return(-1);
2127     if (out == NULL) return(-1);
2128 
2129 retry:
2130 
2131     written = out->size - out->use;
2132 
2133     if (written > 0)
2134 	written--; /* Gennady: count '/0' */
2135 
2136     /*
2137      * First specific handling of in = NULL, i.e. the initialization call
2138      */
2139     if (in == NULL) {
2140         toconv = 0;
2141 	if (handler->output != NULL) {
2142 	    ret = handler->output(&out->content[out->use], &written,
2143 				  NULL, &toconv);
2144 	    if (ret >= 0) { /* Gennady: check return value */
2145 		out->use += written;
2146 		out->content[out->use] = 0;
2147 	    }
2148 	}
2149 #ifdef LIBXML_ICONV_ENABLED
2150 	else if (handler->iconv_out != NULL) {
2151 	    ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2152 				  &written, NULL, &toconv);
2153 	    out->use += written;
2154 	    out->content[out->use] = 0;
2155 	}
2156 #endif /* LIBXML_ICONV_ENABLED */
2157 #ifdef LIBXML_ICU_ENABLED
2158 	else if (handler->uconv_out != NULL) {
2159 	    ret = xmlUconvWrapper(handler->uconv_out, 0,
2160                               &out->content[out->use],
2161  				              &written, NULL, &toconv);
2162 	    out->use += written;
2163 	    out->content[out->use] = 0;
2164 	}
2165 #endif /* LIBXML_ICU_ENABLED */
2166 #ifdef DEBUG_ENCODING
2167 	xmlGenericError(xmlGenericErrorContext,
2168 		"initialized encoder\n");
2169 #endif
2170         return(0);
2171     }
2172 
2173     /*
2174      * Conversion itself.
2175      */
2176     toconv = in->use;
2177     if (toconv == 0)
2178 	return(0);
2179     if (toconv * 4 >= written) {
2180         xmlBufferGrow(out, toconv * 4);
2181 	written = out->size - out->use - 1;
2182     }
2183     if (handler->output != NULL) {
2184 	ret = handler->output(&out->content[out->use], &written,
2185 	                      in->content, &toconv);
2186 	if (written > 0) {
2187 	    xmlBufferShrink(in, toconv);
2188 	    out->use += written;
2189 	    writtentot += written;
2190 	}
2191 	out->content[out->use] = 0;
2192     }
2193 #ifdef LIBXML_ICONV_ENABLED
2194     else if (handler->iconv_out != NULL) {
2195 	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
2196 	                      &written, in->content, &toconv);
2197 	xmlBufferShrink(in, toconv);
2198 	out->use += written;
2199 	writtentot += written;
2200 	out->content[out->use] = 0;
2201 	if (ret == -1) {
2202 	    if (written > 0) {
2203 		/*
2204 		 * Can be a limitation of iconv
2205 		 */
2206 		goto retry;
2207 	    }
2208 	    ret = -3;
2209 	}
2210     }
2211 #endif /* LIBXML_ICONV_ENABLED */
2212 #ifdef LIBXML_ICU_ENABLED
2213     else if (handler->uconv_out != NULL) {
2214 	ret = xmlUconvWrapper(handler->uconv_out, 0,
2215                               &out->content[out->use],
2216 	                      &written, in->content, &toconv);
2217 	xmlBufferShrink(in, toconv);
2218 	out->use += written;
2219 	writtentot += written;
2220 	out->content[out->use] = 0;
2221 	if (ret == -1) {
2222 	    if (written > 0) {
2223 		/*
2224 		 * Can be a limitation of iconv
2225 		 */
2226 		goto retry;
2227 	    }
2228 	    ret = -3;
2229 	}
2230     }
2231 #endif /* LIBXML_ICU_ENABLED */
2232     else {
2233 	xmlEncodingErr(XML_I18N_NO_OUTPUT,
2234 		       "xmlCharEncOutFunc: no output function !\n", NULL);
2235 	return(-1);
2236     }
2237 
2238     if (ret >= 0) output += ret;
2239 
2240     /*
2241      * Attempt to handle error cases
2242      */
2243     switch (ret) {
2244         case 0:
2245 #ifdef DEBUG_ENCODING
2246 	    xmlGenericError(xmlGenericErrorContext,
2247 		    "converted %d bytes to %d bytes of output\n",
2248 	            toconv, written);
2249 #endif
2250 	    break;
2251         case -1:
2252 #ifdef DEBUG_ENCODING
2253 	    xmlGenericError(xmlGenericErrorContext,
2254 		    "output conversion failed by lack of space\n");
2255 #endif
2256 	    break;
2257         case -3:
2258 #ifdef DEBUG_ENCODING
2259 	    xmlGenericError(xmlGenericErrorContext,"converted %d bytes to %d bytes of output %d left\n",
2260 	            toconv, written, in->use);
2261 #endif
2262 	    break;
2263         case -2: {
2264 	    int len = in->use;
2265 	    const xmlChar *utf = (const xmlChar *) in->content;
2266 	    int cur;
2267 
2268 	    cur = xmlGetUTF8Char(utf, &len);
2269 	    if (cur > 0) {
2270 		xmlChar charref[20];
2271 
2272 #ifdef DEBUG_ENCODING
2273 		xmlGenericError(xmlGenericErrorContext,
2274 			"handling output conversion error\n");
2275 		xmlGenericError(xmlGenericErrorContext,
2276 			"Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
2277 			in->content[0], in->content[1],
2278 			in->content[2], in->content[3]);
2279 #endif
2280 		/*
2281 		 * Removes the UTF8 sequence, and replace it by a charref
2282 		 * and continue the transcoding phase, hoping the error
2283 		 * did not mangle the encoder state.
2284 		 */
2285 		snprintf((char *) &charref[0], sizeof(charref), "&#%d;", cur);
2286 		xmlBufferShrink(in, len);
2287 		xmlBufferAddHead(in, charref, -1);
2288 
2289 		goto retry;
2290 	    } else {
2291 		char buf[50];
2292 
2293 		snprintf(&buf[0], 49, "0x%02X 0x%02X 0x%02X 0x%02X",
2294 			 in->content[0], in->content[1],
2295 			 in->content[2], in->content[3]);
2296 		buf[49] = 0;
2297 		xmlEncodingErr(XML_I18N_CONV_FAILED,
2298 		    "output conversion failed due to conv error, bytes %s\n",
2299 			       buf);
2300 		if (in->alloc != XML_BUFFER_ALLOC_IMMUTABLE)
2301 		    in->content[0] = ' ';
2302 	    }
2303 	    break;
2304 	}
2305     }
2306     return(ret);
2307 }
2308 
2309 /**
2310  * xmlCharEncCloseFunc:
2311  * @handler:	char enconding transformation data structure
2312  *
2313  * Generic front-end for encoding handler close function
2314  *
2315  * Returns 0 if success, or -1 in case of error
2316  */
2317 int
xmlCharEncCloseFunc(xmlCharEncodingHandler * handler)2318 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
2319     int ret = 0;
2320     if (handler == NULL) return(-1);
2321     if (handler->name == NULL) return(-1);
2322 #ifdef LIBXML_ICONV_ENABLED
2323     /*
2324      * Iconv handlers can be used only once, free the whole block.
2325      * and the associated icon resources.
2326      */
2327     if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
2328 	if (handler->name != NULL)
2329 	    xmlFree(handler->name);
2330 	handler->name = NULL;
2331 	if (handler->iconv_out != NULL) {
2332 	    if (iconv_close(handler->iconv_out))
2333 		ret = -1;
2334 	    handler->iconv_out = NULL;
2335 	}
2336 	if (handler->iconv_in != NULL) {
2337 	    if (iconv_close(handler->iconv_in))
2338 		ret = -1;
2339 	    handler->iconv_in = NULL;
2340 	}
2341 	xmlFree(handler);
2342     }
2343 #endif /* LIBXML_ICONV_ENABLED */
2344 #ifdef LIBXML_ICU_ENABLED
2345     if ((handler->uconv_out != NULL) || (handler->uconv_in != NULL)) {
2346 	if (handler->name != NULL)
2347 	    xmlFree(handler->name);
2348 	handler->name = NULL;
2349 	if (handler->uconv_out != NULL) {
2350 	    closeIcuConverter(handler->uconv_out);
2351 	    handler->uconv_out = NULL;
2352 	}
2353 	if (handler->uconv_in != NULL) {
2354 	    closeIcuConverter(handler->uconv_in);
2355 	    handler->uconv_in = NULL;
2356 	}
2357 	xmlFree(handler);
2358     }
2359 #endif
2360 #ifdef DEBUG_ENCODING
2361     if (ret)
2362         xmlGenericError(xmlGenericErrorContext,
2363 		"failed to close the encoding handler\n");
2364     else
2365         xmlGenericError(xmlGenericErrorContext,
2366 		"closed the encoding handler\n");
2367 #endif
2368 
2369     return(ret);
2370 }
2371 
2372 /**
2373  * xmlByteConsumed:
2374  * @ctxt: an XML parser context
2375  *
2376  * This function provides the current index of the parser relative
2377  * to the start of the current entity. This function is computed in
2378  * bytes from the beginning starting at zero and finishing at the
2379  * size in byte of the file if parsing a file. The function is
2380  * of constant cost if the input is UTF-8 but can be costly if run
2381  * on non-UTF-8 input.
2382  *
2383  * Returns the index in bytes from the beginning of the entity or -1
2384  *         in case the index could not be computed.
2385  */
2386 long
xmlByteConsumed(xmlParserCtxtPtr ctxt)2387 xmlByteConsumed(xmlParserCtxtPtr ctxt) {
2388     xmlParserInputPtr in;
2389 
2390     if (ctxt == NULL) return(-1);
2391     in = ctxt->input;
2392     if (in == NULL)  return(-1);
2393     if ((in->buf != NULL) && (in->buf->encoder != NULL)) {
2394         unsigned int unused = 0;
2395 	xmlCharEncodingHandler * handler = in->buf->encoder;
2396         /*
2397 	 * Encoding conversion, compute the number of unused original
2398 	 * bytes from the input not consumed and substract that from
2399 	 * the raw consumed value, this is not a cheap operation
2400 	 */
2401         if (in->end - in->cur > 0) {
2402 	    unsigned char convbuf[32000];
2403 	    const unsigned char *cur = (const unsigned char *)in->cur;
2404 	    int toconv = in->end - in->cur, written = 32000;
2405 
2406 	    int ret;
2407 
2408 	    if (handler->output != NULL) {
2409 	        do {
2410 		    toconv = in->end - cur;
2411 		    written = 32000;
2412 		    ret = handler->output(&convbuf[0], &written,
2413 				      cur, &toconv);
2414 		    if (ret == -1) return(-1);
2415 		    unused += written;
2416 		    cur += toconv;
2417 		} while (ret == -2);
2418 #ifdef LIBXML_ICONV_ENABLED
2419 	    } else if (handler->iconv_out != NULL) {
2420 	        do {
2421 		    toconv = in->end - cur;
2422 		    written = 32000;
2423 		    ret = xmlIconvWrapper(handler->iconv_out, &convbuf[0],
2424 	                      &written, cur, &toconv);
2425 		    if (ret < 0) {
2426 		        if (written > 0)
2427 			    ret = -2;
2428 			else
2429 			    return(-1);
2430 		    }
2431 		    unused += written;
2432 		    cur += toconv;
2433 		} while (ret == -2);
2434 #endif
2435 #ifdef LIBXML_ICU_ENABLED
2436 	    } else if (handler->uconv_out != NULL) {
2437 	        do {
2438 		    toconv = in->end - cur;
2439 		    written = 32000;
2440 		    ret = xmlUconvWrapper(handler->uconv_out, 0, &convbuf[0],
2441 	                      &written, cur, &toconv);
2442 		    if (ret < 0) {
2443 		        if (written > 0)
2444 			    ret = -2;
2445 			else
2446 			    return(-1);
2447 		    }
2448 		    unused += written;
2449 		    cur += toconv;
2450 		} while (ret == -2);
2451             } else {
2452 	        /* could not find a converter */
2453 	        return(-1);
2454 	    }
2455 	}
2456 	if (in->buf->rawconsumed < unused)
2457 	    return(-1);
2458 	return(in->buf->rawconsumed - unused);
2459     }
2460     return(in->consumed + (in->cur - in->base));
2461 }
2462 #endif
2463 
2464 #if !defined(LIBXML_ICONV_ENABLED) && !defined(LIBXML_ICU_ENABLED)
2465 #ifdef LIBXML_ISO8859X_ENABLED
2466 
2467 /**
2468  * UTF8ToISO8859x:
2469  * @out:  a pointer to an array of bytes to store the result
2470  * @outlen:  the length of @out
2471  * @in:  a pointer to an array of UTF-8 chars
2472  * @inlen:  the length of @in
2473  * @xlattable: the 2-level transcoding table
2474  *
2475  * Take a block of UTF-8 chars in and try to convert it to an ISO 8859-*
2476  * block of chars out.
2477  *
2478  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
2479  * The value of @inlen after return is the number of octets consumed
2480  *     as the return value is positive, else unpredictable.
2481  * The value of @outlen after return is the number of ocetes consumed.
2482  */
2483 static int
UTF8ToISO8859x(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned char const * xlattable)2484 UTF8ToISO8859x(unsigned char* out, int *outlen,
2485               const unsigned char* in, int *inlen,
2486               unsigned char const *xlattable) {
2487     const unsigned char* outstart = out;
2488     const unsigned char* inend;
2489     const unsigned char* instart = in;
2490 
2491     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2492         (xlattable == NULL))
2493 	return(-1);
2494     if (in == NULL) {
2495         /*
2496         * initialization nothing to do
2497         */
2498         *outlen = 0;
2499         *inlen = 0;
2500         return(0);
2501     }
2502     inend = in + (*inlen);
2503     while (in < inend) {
2504         unsigned char d = *in++;
2505         if  (d < 0x80)  {
2506             *out++ = d;
2507         } else if (d < 0xC0) {
2508             /* trailing byte in leading position */
2509             *outlen = out - outstart;
2510             *inlen = in - instart - 1;
2511             return(-2);
2512         } else if (d < 0xE0) {
2513             unsigned char c;
2514             if (!(in < inend)) {
2515                 /* trailing byte not in input buffer */
2516                 *outlen = out - outstart;
2517                 *inlen = in - instart - 1;
2518                 return(-2);
2519             }
2520             c = *in++;
2521             if ((c & 0xC0) != 0x80) {
2522                 /* not a trailing byte */
2523                 *outlen = out - outstart;
2524                 *inlen = in - instart - 2;
2525                 return(-2);
2526             }
2527             c = c & 0x3F;
2528             d = d & 0x1F;
2529             d = xlattable [48 + c + xlattable [d] * 64];
2530             if (d == 0) {
2531                 /* not in character set */
2532                 *outlen = out - outstart;
2533                 *inlen = in - instart - 2;
2534                 return(-2);
2535             }
2536             *out++ = d;
2537         } else if (d < 0xF0) {
2538             unsigned char c1;
2539             unsigned char c2;
2540             if (!(in < inend - 1)) {
2541                 /* trailing bytes not in input buffer */
2542                 *outlen = out - outstart;
2543                 *inlen = in - instart - 1;
2544                 return(-2);
2545             }
2546             c1 = *in++;
2547             if ((c1 & 0xC0) != 0x80) {
2548                 /* not a trailing byte (c1) */
2549                 *outlen = out - outstart;
2550                 *inlen = in - instart - 2;
2551                 return(-2);
2552             }
2553             c2 = *in++;
2554             if ((c2 & 0xC0) != 0x80) {
2555                 /* not a trailing byte (c2) */
2556                 *outlen = out - outstart;
2557                 *inlen = in - instart - 2;
2558                 return(-2);
2559             }
2560             c1 = c1 & 0x3F;
2561             c2 = c2 & 0x3F;
2562 	    d = d & 0x0F;
2563 	    d = xlattable [48 + c2 + xlattable [48 + c1 +
2564 	    		xlattable [32 + d] * 64] * 64];
2565             if (d == 0) {
2566                 /* not in character set */
2567                 *outlen = out - outstart;
2568                 *inlen = in - instart - 3;
2569                 return(-2);
2570             }
2571             *out++ = d;
2572         } else {
2573             /* cannot transcode >= U+010000 */
2574             *outlen = out - outstart;
2575             *inlen = in - instart - 1;
2576             return(-2);
2577         }
2578     }
2579     *outlen = out - outstart;
2580     *inlen = in - instart;
2581     return(*outlen);
2582 }
2583 
2584 /**
2585  * ISO8859xToUTF8
2586  * @out:  a pointer to an array of bytes to store the result
2587  * @outlen:  the length of @out
2588  * @in:  a pointer to an array of ISO Latin 1 chars
2589  * @inlen:  the length of @in
2590  *
2591  * Take a block of ISO 8859-* chars in and try to convert it to an UTF-8
2592  * block of chars out.
2593  * Returns 0 if success, or -1 otherwise
2594  * The value of @inlen after return is the number of octets consumed
2595  * The value of @outlen after return is the number of ocetes produced.
2596  */
2597 static int
ISO8859xToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen,unsigned short const * unicodetable)2598 ISO8859xToUTF8(unsigned char* out, int *outlen,
2599               const unsigned char* in, int *inlen,
2600               unsigned short const *unicodetable) {
2601     unsigned char* outstart = out;
2602     unsigned char* outend;
2603     const unsigned char* instart = in;
2604     const unsigned char* inend;
2605     const unsigned char* instop;
2606     unsigned int c;
2607 
2608     if ((out == NULL) || (outlen == NULL) || (inlen == NULL) ||
2609         (in == NULL) || (unicodetable == NULL))
2610 	return(-1);
2611     outend = out + *outlen;
2612     inend = in + *inlen;
2613     instop = inend;
2614     c = *in;
2615     while (in < inend && out < outend - 1) {
2616         if (c >= 0x80) {
2617             c = unicodetable [c - 0x80];
2618             if (c == 0) {
2619                 /* undefined code point */
2620                 *outlen = out - outstart;
2621                 *inlen = in - instart;
2622                 return (-1);
2623             }
2624             if (c < 0x800) {
2625                 *out++ = ((c >>  6) & 0x1F) | 0xC0;
2626                 *out++ = (c & 0x3F) | 0x80;
2627             } else {
2628                 *out++ = ((c >>  12) & 0x0F) | 0xE0;
2629                 *out++ = ((c >>  6) & 0x3F) | 0x80;
2630                 *out++ = (c & 0x3F) | 0x80;
2631             }
2632             ++in;
2633             c = *in;
2634         }
2635         if (instop - in > outend - out) instop = in + (outend - out);
2636         while (c < 0x80 && in < instop) {
2637             *out++ =  c;
2638             ++in;
2639             c = *in;
2640         }
2641     }
2642     if (in < inend && out < outend && c < 0x80) {
2643         *out++ =  c;
2644         ++in;
2645     }
2646     *outlen = out - outstart;
2647     *inlen = in - instart;
2648     return (*outlen);
2649 }
2650 
2651 
2652 /************************************************************************
2653  * Lookup tables for ISO-8859-2..ISO-8859-16 transcoding                *
2654  ************************************************************************/
2655 
2656 static unsigned short const xmlunicodetable_ISO8859_2 [128] = {
2657     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2658     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2659     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2660     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2661     0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7,
2662     0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b,
2663     0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7,
2664     0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c,
2665     0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7,
2666     0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e,
2667     0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7,
2668     0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df,
2669     0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7,
2670     0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f,
2671     0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7,
2672     0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9,
2673 };
2674 
2675 static unsigned char const xmltranscodetable_ISO8859_2 [48 + 6 * 64] = {
2676     "\x00\x00\x01\x05\x02\x04\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2677     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2678     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2679     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2680     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2681     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2682     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2683     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2684     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2685     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2686     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2687     "\x00\x00\xc3\xe3\xa1\xb1\xc6\xe6\x00\x00\x00\x00\xc8\xe8\xcf\xef"
2688     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xca\xea\xcc\xec\x00\x00\x00\x00"
2689     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2690     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc5\xe5\x00\x00\xa5\xb5\x00"
2691     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2692     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\xb2\x00\xbd\x00\x00"
2693     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2694     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2695     "\x00\xa3\xb3\xd1\xf1\x00\x00\xd2\xf2\x00\x00\x00\x00\x00\x00\x00"
2696     "\xd5\xf5\x00\x00\xc0\xe0\x00\x00\xd8\xf8\xa6\xb6\x00\x00\xaa\xba"
2697     "\xa9\xb9\xde\xfe\xab\xbb\x00\x00\x00\x00\x00\x00\x00\x00\xd9\xf9"
2698     "\xdb\xfb\x00\x00\x00\x00\x00\x00\x00\xac\xbc\xaf\xbf\xae\xbe\x00"
2699     "\x00\xc1\xc2\x00\xc4\x00\x00\xc7\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2700     "\x00\x00\x00\xd3\xd4\x00\xd6\xd7\x00\x00\xda\x00\xdc\xdd\x00\xdf"
2701     "\x00\xe1\xe2\x00\xe4\x00\x00\xe7\x00\xe9\x00\xeb\x00\xed\xee\x00"
2702     "\x00\x00\x00\xf3\xf4\x00\xf6\xf7\x00\x00\xfa\x00\xfc\xfd\x00\x00"
2703 };
2704 
2705 static unsigned short const xmlunicodetable_ISO8859_3 [128] = {
2706     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2707     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2708     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2709     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2710     0x00a0, 0x0126, 0x02d8, 0x00a3, 0x00a4, 0x0000, 0x0124, 0x00a7,
2711     0x00a8, 0x0130, 0x015e, 0x011e, 0x0134, 0x00ad, 0x0000, 0x017b,
2712     0x00b0, 0x0127, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x0125, 0x00b7,
2713     0x00b8, 0x0131, 0x015f, 0x011f, 0x0135, 0x00bd, 0x0000, 0x017c,
2714     0x00c0, 0x00c1, 0x00c2, 0x0000, 0x00c4, 0x010a, 0x0108, 0x00c7,
2715     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
2716     0x0000, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x0120, 0x00d6, 0x00d7,
2717     0x011c, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x016c, 0x015c, 0x00df,
2718     0x00e0, 0x00e1, 0x00e2, 0x0000, 0x00e4, 0x010b, 0x0109, 0x00e7,
2719     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
2720     0x0000, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x0121, 0x00f6, 0x00f7,
2721     0x011d, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x016d, 0x015d, 0x02d9,
2722 };
2723 
2724 static unsigned char const xmltranscodetable_ISO8859_3 [48 + 7 * 64] = {
2725     "\x04\x00\x01\x06\x02\x05\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00"
2726     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2727     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2728     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2729     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2730     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2731     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2732     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2733     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2734     "\xa0\x00\x00\xa3\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\x00"
2735     "\xb0\x00\xb2\xb3\xb4\xb5\x00\xb7\xb8\x00\x00\x00\x00\xbd\x00\x00"
2736     "\x00\x00\x00\x00\x00\x00\x00\x00\xc6\xe6\xc5\xe5\x00\x00\x00\x00"
2737     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd8\xf8\xab\xbb"
2738     "\xd5\xf5\x00\x00\xa6\xb6\xa1\xb1\x00\x00\x00\x00\x00\x00\x00\x00"
2739     "\xa9\xb9\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2740     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2741     "\x00\x00\x00\x00\x00\x00\x00\x00\xa2\xff\x00\x00\x00\x00\x00\x00"
2742     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2743     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2744     "\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2745     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2746     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2747     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2748     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2749     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe\xaa\xba"
2750     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00"
2751     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaf\xbf\x00\x00\x00"
2752     "\xc0\xc1\xc2\x00\xc4\x00\x00\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2753     "\x00\xd1\xd2\xd3\xd4\x00\xd6\xd7\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
2754     "\xe0\xe1\xe2\x00\xe4\x00\x00\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2755     "\x00\xf1\xf2\xf3\xf4\x00\xf6\xf7\x00\xf9\xfa\xfb\xfc\x00\x00\x00"
2756 };
2757 
2758 static unsigned short const xmlunicodetable_ISO8859_4 [128] = {
2759     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2760     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2761     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2762     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2763     0x00a0, 0x0104, 0x0138, 0x0156, 0x00a4, 0x0128, 0x013b, 0x00a7,
2764     0x00a8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00ad, 0x017d, 0x00af,
2765     0x00b0, 0x0105, 0x02db, 0x0157, 0x00b4, 0x0129, 0x013c, 0x02c7,
2766     0x00b8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014a, 0x017e, 0x014b,
2767     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
2768     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x012a,
2769     0x0110, 0x0145, 0x014c, 0x0136, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
2770     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x0168, 0x016a, 0x00df,
2771     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
2772     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x012b,
2773     0x0111, 0x0146, 0x014d, 0x0137, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
2774     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x0169, 0x016b, 0x02d9,
2775 };
2776 
2777 static unsigned char const xmltranscodetable_ISO8859_4 [48 + 6 * 64] = {
2778     "\x00\x00\x01\x05\x02\x03\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00"
2779     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2780     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2781     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2782     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2783     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2784     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2785     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2786     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2787     "\xa0\x00\x00\x00\xa4\x00\x00\xa7\xa8\x00\x00\x00\x00\xad\x00\xaf"
2788     "\xb0\x00\x00\x00\xb4\x00\x00\x00\xb8\x00\x00\x00\x00\x00\x00\x00"
2789     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
2790     "\xd0\xf0\xaa\xba\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
2791     "\x00\x00\xab\xbb\x00\x00\x00\x00\xa5\xb5\xcf\xef\x00\x00\xc7\xe7"
2792     "\x00\x00\x00\x00\x00\x00\xd3\xf3\xa2\x00\x00\xa6\xb6\x00\x00\x00"
2793     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xbd\xbf\xd2\xf2\x00\x00"
2794     "\x00\x00\x00\x00\x00\x00\xa3\xb3\x00\x00\x00\x00\x00\x00\x00\x00"
2795     "\xa9\xb9\x00\x00\x00\x00\xac\xbc\xdd\xfd\xde\xfe\x00\x00\x00\x00"
2796     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xae\xbe\x00"
2797     "\x00\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
2798     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\xb2\x00\x00\x00\x00"
2799     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2800     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2801     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\x00"
2802     "\x00\x00\x00\x00\xd4\xd5\xd6\xd7\xd8\x00\xda\xdb\xdc\x00\x00\xdf"
2803     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\x00"
2804     "\x00\x00\x00\x00\xf4\xf5\xf6\xf7\xf8\x00\xfa\xfb\xfc\x00\x00\x00"
2805 };
2806 
2807 static unsigned short const xmlunicodetable_ISO8859_5 [128] = {
2808     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2809     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2810     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2811     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2812     0x00a0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
2813     0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x00ad, 0x040e, 0x040f,
2814     0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2815     0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
2816     0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2817     0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
2818     0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2819     0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f,
2820     0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2821     0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f,
2822     0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
2823     0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x00a7, 0x045e, 0x045f,
2824 };
2825 
2826 static unsigned char const xmltranscodetable_ISO8859_5 [48 + 6 * 64] = {
2827     "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2828     "\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2829     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2830     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2831     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2832     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2833     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2834     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2835     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2836     "\xa0\x00\x00\x00\x00\x00\x00\xfd\x00\x00\x00\x00\x00\xad\x00\x00"
2837     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2838     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\x00\xae\xaf"
2839     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
2840     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2841     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2842     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2843     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\xfe\xff"
2844     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2845     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2846     "\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2847     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2848     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2849     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2850     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2851     "\x00\x00\x00\x00\x00\x00\xf0\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2852     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2853     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2854 };
2855 
2856 static unsigned short const xmlunicodetable_ISO8859_6 [128] = {
2857     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2858     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2859     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2860     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2861     0x00a0, 0x0000, 0x0000, 0x0000, 0x00a4, 0x0000, 0x0000, 0x0000,
2862     0x0000, 0x0000, 0x0000, 0x0000, 0x060c, 0x00ad, 0x0000, 0x0000,
2863     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2864     0x0000, 0x0000, 0x0000, 0x061b, 0x0000, 0x0000, 0x0000, 0x061f,
2865     0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2866     0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
2867     0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
2868     0x0638, 0x0639, 0x063a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2869     0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
2870     0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
2871     0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2872     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2873 };
2874 
2875 static unsigned char const xmltranscodetable_ISO8859_6 [48 + 5 * 64] = {
2876     "\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2877     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x04\x00\x00\x00\x00\x00\x00"
2878     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2879     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2880     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2881     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2882     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2883     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2884     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2885     "\xa0\x00\x00\x00\xa4\x00\x00\x00\x00\x00\x00\x00\x00\xad\x00\x00"
2886     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2887     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2888     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2889     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2890     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2891     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x00\x00\x00"
2892     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xbb\x00\x00\x00\xbf"
2893     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2894     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\x00"
2895     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2896     "\xf0\xf1\xf2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2897     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2898     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2899 };
2900 
2901 static unsigned short const xmlunicodetable_ISO8859_7 [128] = {
2902     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2903     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2904     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2905     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2906     0x00a0, 0x2018, 0x2019, 0x00a3, 0x0000, 0x0000, 0x00a6, 0x00a7,
2907     0x00a8, 0x00a9, 0x0000, 0x00ab, 0x00ac, 0x00ad, 0x0000, 0x2015,
2908     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x0384, 0x0385, 0x0386, 0x00b7,
2909     0x0388, 0x0389, 0x038a, 0x00bb, 0x038c, 0x00bd, 0x038e, 0x038f,
2910     0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2911     0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
2912     0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
2913     0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af,
2914     0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7,
2915     0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf,
2916     0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7,
2917     0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, 0x0000,
2918 };
2919 
2920 static unsigned char const xmltranscodetable_ISO8859_7 [48 + 7 * 64] = {
2921     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x06"
2922     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2923     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2924     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2925     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2926     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2927     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2928     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2929     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2930     "\xa0\x00\x00\xa3\x00\x00\xa6\xa7\xa8\xa9\x00\xab\xac\xad\x00\x00"
2931     "\xb0\xb1\xb2\xb3\x00\x00\x00\xb7\x00\x00\x00\xbb\x00\xbd\x00\x00"
2932     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2933     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2934     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2935     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2936     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2937     "\x00\x00\x00\x00\x00\xaf\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00"
2938     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2939     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2940     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2941     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2942     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2943     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2944     "\x00\x00\x00\x00\xb4\xb5\xb6\x00\xb8\xb9\xba\x00\xbc\x00\xbe\xbf"
2945     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
2946     "\xd0\xd1\x00\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
2947     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
2948     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\x00"
2949     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2950     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2951     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2952 };
2953 
2954 static unsigned short const xmlunicodetable_ISO8859_8 [128] = {
2955     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
2956     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
2957     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
2958     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
2959     0x00a0, 0x0000, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
2960     0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
2961     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
2962     0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x0000,
2963     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2964     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2965     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
2966     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
2967     0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
2968     0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
2969     0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
2970     0x05e8, 0x05e9, 0x05ea, 0x0000, 0x0000, 0x200e, 0x200f, 0x0000,
2971 };
2972 
2973 static unsigned char const xmltranscodetable_ISO8859_8 [48 + 7 * 64] = {
2974     "\x02\x00\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2975     "\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00"
2976     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2977     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2978     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2979     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2980     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2981     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
2982     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
2983     "\xa0\x00\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\x00\xab\xac\xad\xae\xaf"
2984     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\x00\xbb\xbc\xbd\xbe\x00"
2985     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2986     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2987     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2988     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2989     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2990     "\x00\x00\x00\x00\x00\x00\x00\xaa\x00\x00\x00\x00\x00\x00\x00\x00"
2991     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2992     "\x00\x00\x00\x00\x00\x00\x00\xba\x00\x00\x00\x00\x00\x00\x00\x00"
2993     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2994     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2995     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2996     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
2997     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xfd\xfe"
2998     "\x00\x00\x00\x00\x00\x00\x00\xdf\x00\x00\x00\x00\x00\x00\x00\x00"
2999     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3000     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3001     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3002     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3003     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\x00\x00\x00\x00\x00"
3004     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3005 };
3006 
3007 static unsigned short const xmlunicodetable_ISO8859_9 [128] = {
3008     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3009     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3010     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3011     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3012     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
3013     0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3014     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
3015     0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
3016     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3017     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3018     0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3019     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
3020     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3021     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3022     0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3023     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff,
3024 };
3025 
3026 static unsigned char const xmltranscodetable_ISO8859_9 [48 + 5 * 64] = {
3027     "\x00\x00\x01\x02\x03\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3028     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3029     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3030     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3031     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3032     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3033     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3034     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3035     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3036     "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3037     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3038     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3039     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\x00\x00\xdf"
3040     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3041     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\x00\x00\xff"
3042     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3043     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd0\xf0"
3044     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3045     "\xdd\xfd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3046     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3047     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xde\xfe"
3048     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3049     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3050 };
3051 
3052 static unsigned short const xmlunicodetable_ISO8859_10 [128] = {
3053     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3054     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3055     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3056     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3057     0x00a0, 0x0104, 0x0112, 0x0122, 0x012a, 0x0128, 0x0136, 0x00a7,
3058     0x013b, 0x0110, 0x0160, 0x0166, 0x017d, 0x00ad, 0x016a, 0x014a,
3059     0x00b0, 0x0105, 0x0113, 0x0123, 0x012b, 0x0129, 0x0137, 0x00b7,
3060     0x013c, 0x0111, 0x0161, 0x0167, 0x017e, 0x2015, 0x016b, 0x014b,
3061     0x0100, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x012e,
3062     0x010c, 0x00c9, 0x0118, 0x00cb, 0x0116, 0x00cd, 0x00ce, 0x00cf,
3063     0x00d0, 0x0145, 0x014c, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0168,
3064     0x00d8, 0x0172, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3065     0x0101, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x012f,
3066     0x010d, 0x00e9, 0x0119, 0x00eb, 0x0117, 0x00ed, 0x00ee, 0x00ef,
3067     0x00f0, 0x0146, 0x014d, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0169,
3068     0x00f8, 0x0173, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x0138,
3069 };
3070 
3071 static unsigned char const xmltranscodetable_ISO8859_10 [48 + 7 * 64] = {
3072     "\x00\x00\x01\x06\x02\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3073     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3074     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3075     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3076     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3077     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3078     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3079     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3080     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3081     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\x00\x00\x00\x00\xad\x00\x00"
3082     "\xb0\x00\x00\x00\x00\x00\x00\xb7\x00\x00\x00\x00\x00\x00\x00\x00"
3083     "\xc0\xe0\x00\x00\xa1\xb1\x00\x00\x00\x00\x00\x00\xc8\xe8\x00\x00"
3084     "\xa9\xb9\xa2\xb2\x00\x00\xcc\xec\xca\xea\x00\x00\x00\x00\x00\x00"
3085     "\x00\x00\xa3\xb3\x00\x00\x00\x00\xa5\xb5\xa4\xb4\x00\x00\xc7\xe7"
3086     "\x00\x00\x00\x00\x00\x00\xa6\xb6\xff\x00\x00\xa8\xb8\x00\x00\x00"
3087     "\x00\x00\x00\x00\x00\xd1\xf1\x00\x00\x00\xaf\xbf\xd2\xf2\x00\x00"
3088     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3089     "\xaa\xba\x00\x00\x00\x00\xab\xbb\xd7\xf7\xae\xbe\x00\x00\x00\x00"
3090     "\x00\x00\xd9\xf9\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\xbc\x00"
3091     "\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3092     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3093     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3094     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3095     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3096     "\x00\x00\x00\x00\x00\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3097     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3098     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3099     "\x00\xc1\xc2\xc3\xc4\xc5\xc6\x00\x00\xc9\x00\xcb\x00\xcd\xce\xcf"
3100     "\xd0\x00\x00\xd3\xd4\xd5\xd6\x00\xd8\x00\xda\xdb\xdc\xdd\xde\xdf"
3101     "\x00\xe1\xe2\xe3\xe4\xe5\xe6\x00\x00\xe9\x00\xeb\x00\xed\xee\xef"
3102     "\xf0\x00\x00\xf3\xf4\xf5\xf6\x00\xf8\x00\xfa\xfb\xfc\xfd\xfe\x00"
3103 };
3104 
3105 static unsigned short const xmlunicodetable_ISO8859_11 [128] = {
3106     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3107     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3108     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3109     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3110     0x00a0, 0x0e01, 0x0e02, 0x0e03, 0x0e04, 0x0e05, 0x0e06, 0x0e07,
3111     0x0e08, 0x0e09, 0x0e0a, 0x0e0b, 0x0e0c, 0x0e0d, 0x0e0e, 0x0e0f,
3112     0x0e10, 0x0e11, 0x0e12, 0x0e13, 0x0e14, 0x0e15, 0x0e16, 0x0e17,
3113     0x0e18, 0x0e19, 0x0e1a, 0x0e1b, 0x0e1c, 0x0e1d, 0x0e1e, 0x0e1f,
3114     0x0e20, 0x0e21, 0x0e22, 0x0e23, 0x0e24, 0x0e25, 0x0e26, 0x0e27,
3115     0x0e28, 0x0e29, 0x0e2a, 0x0e2b, 0x0e2c, 0x0e2d, 0x0e2e, 0x0e2f,
3116     0x0e30, 0x0e31, 0x0e32, 0x0e33, 0x0e34, 0x0e35, 0x0e36, 0x0e37,
3117     0x0e38, 0x0e39, 0x0e3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0e3f,
3118     0x0e40, 0x0e41, 0x0e42, 0x0e43, 0x0e44, 0x0e45, 0x0e46, 0x0e47,
3119     0x0e48, 0x0e49, 0x0e4a, 0x0e4b, 0x0e4c, 0x0e4d, 0x0e4e, 0x0e4f,
3120     0x0e50, 0x0e51, 0x0e52, 0x0e53, 0x0e54, 0x0e55, 0x0e56, 0x0e57,
3121     0x0e58, 0x0e59, 0x0e5a, 0x0e5b, 0x0000, 0x0000, 0x0000, 0x0000,
3122 };
3123 
3124 static unsigned char const xmltranscodetable_ISO8859_11 [48 + 6 * 64] = {
3125     "\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3126     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3127     "\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3128     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3129     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3130     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3131     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3132     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3133     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3134     "\xa0\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3135     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3136     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3137     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3138     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3139     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x05\x00\x00\x00\x00\x00\x00"
3140     "\x00\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
3141     "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
3142     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3143     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\x00\x00\x00\x00\xdf"
3144     "\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3145     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3146     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3147     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3148     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3149     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\x00\x00\x00\x00"
3150     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3151     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3152 };
3153 
3154 static unsigned short const xmlunicodetable_ISO8859_13 [128] = {
3155     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3156     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3157     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3158     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3159     0x00a0, 0x201d, 0x00a2, 0x00a3, 0x00a4, 0x201e, 0x00a6, 0x00a7,
3160     0x00d8, 0x00a9, 0x0156, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00c6,
3161     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x201c, 0x00b5, 0x00b6, 0x00b7,
3162     0x00f8, 0x00b9, 0x0157, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00e6,
3163     0x0104, 0x012e, 0x0100, 0x0106, 0x00c4, 0x00c5, 0x0118, 0x0112,
3164     0x010c, 0x00c9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012a, 0x013b,
3165     0x0160, 0x0143, 0x0145, 0x00d3, 0x014c, 0x00d5, 0x00d6, 0x00d7,
3166     0x0172, 0x0141, 0x015a, 0x016a, 0x00dc, 0x017b, 0x017d, 0x00df,
3167     0x0105, 0x012f, 0x0101, 0x0107, 0x00e4, 0x00e5, 0x0119, 0x0113,
3168     0x010d, 0x00e9, 0x017a, 0x0117, 0x0123, 0x0137, 0x012b, 0x013c,
3169     0x0161, 0x0144, 0x0146, 0x00f3, 0x014d, 0x00f5, 0x00f6, 0x00f7,
3170     0x0173, 0x0142, 0x015b, 0x016b, 0x00fc, 0x017c, 0x017e, 0x2019,
3171 };
3172 
3173 static unsigned char const xmltranscodetable_ISO8859_13 [48 + 7 * 64] = {
3174     "\x00\x00\x01\x04\x06\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3175     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3176     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3177     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3178     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3179     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3180     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3181     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3182     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3183     "\xa0\x00\xa2\xa3\xa4\x00\xa6\xa7\x00\xa9\x00\xab\xac\xad\xae\x00"
3184     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\x00\xbb\xbc\xbd\xbe\x00"
3185     "\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3186     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3187     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3188     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3189     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3190     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\xb4\xa1\xa5\x00"
3191     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3192     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3193     "\x00\x00\x00\x00\xc4\xc5\xaf\x00\x00\xc9\x00\x00\x00\x00\x00\x00"
3194     "\x00\x00\x00\xd3\x00\xd5\xd6\xd7\xa8\x00\x00\x00\xdc\x00\x00\xdf"
3195     "\x00\x00\x00\x00\xe4\xe5\xbf\x00\x00\xe9\x00\x00\x00\x00\x00\x00"
3196     "\x00\x00\x00\xf3\x00\xf5\xf6\xf7\xb8\x00\x00\x00\xfc\x00\x00\x00"
3197     "\x00\xd9\xf9\xd1\xf1\xd2\xf2\x00\x00\x00\x00\x00\xd4\xf4\x00\x00"
3198     "\x00\x00\x00\x00\x00\x00\xaa\xba\x00\x00\xda\xfa\x00\x00\x00\x00"
3199     "\xd0\xf0\x00\x00\x00\x00\x00\x00\x00\x00\xdb\xfb\x00\x00\x00\x00"
3200     "\x00\x00\xd8\xf8\x00\x00\x00\x00\x00\xca\xea\xdd\xfd\xde\xfe\x00"
3201     "\xc2\xe2\x00\x00\xc0\xe0\xc3\xe3\x00\x00\x00\x00\xc8\xe8\x00\x00"
3202     "\x00\x00\xc7\xe7\x00\x00\xcb\xeb\xc6\xe6\x00\x00\x00\x00\x00\x00"
3203     "\x00\x00\xcc\xec\x00\x00\x00\x00\x00\x00\xce\xee\x00\x00\xc1\xe1"
3204     "\x00\x00\x00\x00\x00\x00\xcd\xed\x00\x00\x00\xcf\xef\x00\x00\x00"
3205 };
3206 
3207 static unsigned short const xmlunicodetable_ISO8859_14 [128] = {
3208     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3209     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3210     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3211     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3212     0x00a0, 0x1e02, 0x1e03, 0x00a3, 0x010a, 0x010b, 0x1e0a, 0x00a7,
3213     0x1e80, 0x00a9, 0x1e82, 0x1e0b, 0x1ef2, 0x00ad, 0x00ae, 0x0178,
3214     0x1e1e, 0x1e1f, 0x0120, 0x0121, 0x1e40, 0x1e41, 0x00b6, 0x1e56,
3215     0x1e81, 0x1e57, 0x1e83, 0x1e60, 0x1ef3, 0x1e84, 0x1e85, 0x1e61,
3216     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3217     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3218     0x0174, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x1e6a,
3219     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x0176, 0x00df,
3220     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3221     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3222     0x0175, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x1e6b,
3223     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x0177, 0x00ff,
3224 };
3225 
3226 static unsigned char const xmltranscodetable_ISO8859_14 [48 + 10 * 64] = {
3227     "\x00\x00\x01\x09\x04\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3228     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3229     "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3230     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3231     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3232     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3233     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3234     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3235     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3236     "\xa0\x00\x00\xa3\x00\x00\x00\xa7\x00\xa9\x00\x00\x00\xad\xae\x00"
3237     "\x00\x00\x00\x00\x00\x00\xb6\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3238     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3239     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3240     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3241     "\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x05\x06\x00\x00\x00\x00"
3242     "\x00\x00\xa1\xa2\x00\x00\x00\x00\x00\x00\xa6\xab\x00\x00\x00\x00"
3243     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb0\xb1"
3244     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3245     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3246     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\xa5\x00\x00\x00\x00"
3247     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3248     "\xb2\xb3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3249     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3250     "\xa8\xb8\xaa\xba\xbd\xbe\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3251     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3252     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3253     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3254     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3255     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3256     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3257     "\x00\x00\xac\xbc\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3258     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3259     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3260     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3261     "\x00\x00\x00\x00\xd0\xf0\xde\xfe\xaf\x00\x00\x00\x00\x00\x00\x00"
3262     "\xb4\xb5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3263     "\x00\x00\x00\x00\x00\x00\xb7\xb9\x00\x00\x00\x00\x00\x00\x00\x00"
3264     "\xbb\xbf\x00\x00\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3265     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3266     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3267     "\x00\xd1\xd2\xd3\xd4\xd5\xd6\x00\xd8\xd9\xda\xdb\xdc\xdd\x00\xdf"
3268     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3269     "\x00\xf1\xf2\xf3\xf4\xf5\xf6\x00\xf8\xf9\xfa\xfb\xfc\xfd\x00\xff"
3270 };
3271 
3272 static unsigned short const xmlunicodetable_ISO8859_15 [128] = {
3273     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3274     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3275     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3276     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3277     0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7,
3278     0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
3279     0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7,
3280     0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf,
3281     0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
3282     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3283     0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
3284     0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
3285     0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
3286     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3287     0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
3288     0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
3289 };
3290 
3291 static unsigned char const xmltranscodetable_ISO8859_15 [48 + 6 * 64] = {
3292     "\x00\x00\x01\x05\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3293     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3294     "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3295     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3296     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3297     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3298     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3299     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3300     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3301     "\xa0\xa1\xa2\xa3\x00\xa5\x00\xa7\x00\xa9\xaa\xab\xac\xad\xae\xaf"
3302     "\xb0\xb1\xb2\xb3\x00\xb5\xb6\xb7\x00\xb9\xba\xbb\x00\x00\x00\xbf"
3303     "\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3304     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3305     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3306     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3307     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3308     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3309     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3310     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3311     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3312     "\x00\x00\xbc\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3313     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3314     "\x00\x00\x00\x00\x00\x00\x00\x00\xbe\x00\x00\x00\x00\xb4\xb8\x00"
3315     "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3316     "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
3317     "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3318     "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
3319 };
3320 
3321 static unsigned short const xmlunicodetable_ISO8859_16 [128] = {
3322     0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
3323     0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
3324     0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
3325     0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
3326     0x00a0, 0x0104, 0x0105, 0x0141, 0x20ac, 0x201e, 0x0160, 0x00a7,
3327     0x0161, 0x00a9, 0x0218, 0x00ab, 0x0179, 0x00ad, 0x017a, 0x017b,
3328     0x00b0, 0x00b1, 0x010c, 0x0142, 0x017d, 0x201d, 0x00b6, 0x00b7,
3329     0x017e, 0x010d, 0x0219, 0x00bb, 0x0152, 0x0153, 0x0178, 0x017c,
3330     0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0106, 0x00c6, 0x00c7,
3331     0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
3332     0x0110, 0x0143, 0x00d2, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x015a,
3333     0x0170, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0118, 0x021a, 0x00df,
3334     0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x0107, 0x00e6, 0x00e7,
3335     0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
3336     0x0111, 0x0144, 0x00f2, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x015b,
3337     0x0171, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0119, 0x021b, 0x00ff,
3338 };
3339 
3340 static unsigned char const xmltranscodetable_ISO8859_16 [48 + 9 * 64] = {
3341     "\x00\x00\x01\x08\x02\x03\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00"
3342     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3343     "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3344     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3345     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3346     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3347     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3348     "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
3349     "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
3350     "\xa0\x00\x00\x00\x00\x00\x00\xa7\x00\xa9\x00\xab\x00\xad\x00\x00"
3351     "\xb0\xb1\x00\x00\x00\x00\xb6\xb7\x00\x00\x00\xbb\x00\x00\x00\x00"
3352     "\x00\x00\xc3\xe3\xa1\xa2\xc5\xe5\x00\x00\x00\x00\xb2\xb9\x00\x00"
3353     "\xd0\xf0\x00\x00\x00\x00\x00\x00\xdd\xfd\x00\x00\x00\x00\x00\x00"
3354     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3355     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3356     "\x00\xa3\xb3\xd1\xf1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3357     "\xd5\xf5\xbc\xbd\x00\x00\x00\x00\x00\x00\xd7\xf7\x00\x00\x00\x00"
3358     "\xa6\xa8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3359     "\xd8\xf8\x00\x00\x00\x00\x00\x00\xbe\xac\xae\xaf\xbf\xb4\xb8\x00"
3360     "\x06\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3361     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3362     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3363     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3364     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3365     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3366     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xa4\x00\x00\x00"
3367     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3368     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3369     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xb5\xa5\x00"
3370     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3371     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3372     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3373     "\x00\x00\x00\x00\x00\x00\x00\x00\xaa\xba\xde\xfe\x00\x00\x00\x00"
3374     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3375     "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
3376     "\xc0\xc1\xc2\x00\xc4\x00\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
3377     "\x00\x00\xd2\xd3\xd4\x00\xd6\x00\x00\xd9\xda\xdb\xdc\x00\x00\xdf"
3378     "\xe0\xe1\xe2\x00\xe4\x00\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
3379     "\x00\x00\xf2\xf3\xf4\x00\xf6\x00\x00\xf9\xfa\xfb\xfc\x00\x00\xff"
3380 };
3381 
3382 
3383 /*
3384  * auto-generated functions for ISO-8859-2 .. ISO-8859-16
3385  */
3386 
ISO8859_2ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3387 static int ISO8859_2ToUTF8 (unsigned char* out, int *outlen,
3388     const unsigned char* in, int *inlen) {
3389     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_2);
3390 }
UTF8ToISO8859_2(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3391 static int UTF8ToISO8859_2 (unsigned char* out, int *outlen,
3392     const unsigned char* in, int *inlen) {
3393     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_2);
3394 }
3395 
ISO8859_3ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3396 static int ISO8859_3ToUTF8 (unsigned char* out, int *outlen,
3397     const unsigned char* in, int *inlen) {
3398     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_3);
3399 }
UTF8ToISO8859_3(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3400 static int UTF8ToISO8859_3 (unsigned char* out, int *outlen,
3401     const unsigned char* in, int *inlen) {
3402     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_3);
3403 }
3404 
ISO8859_4ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3405 static int ISO8859_4ToUTF8 (unsigned char* out, int *outlen,
3406     const unsigned char* in, int *inlen) {
3407     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_4);
3408 }
UTF8ToISO8859_4(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3409 static int UTF8ToISO8859_4 (unsigned char* out, int *outlen,
3410     const unsigned char* in, int *inlen) {
3411     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_4);
3412 }
3413 
ISO8859_5ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3414 static int ISO8859_5ToUTF8 (unsigned char* out, int *outlen,
3415     const unsigned char* in, int *inlen) {
3416     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_5);
3417 }
UTF8ToISO8859_5(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3418 static int UTF8ToISO8859_5 (unsigned char* out, int *outlen,
3419     const unsigned char* in, int *inlen) {
3420     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_5);
3421 }
3422 
ISO8859_6ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3423 static int ISO8859_6ToUTF8 (unsigned char* out, int *outlen,
3424     const unsigned char* in, int *inlen) {
3425     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_6);
3426 }
UTF8ToISO8859_6(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3427 static int UTF8ToISO8859_6 (unsigned char* out, int *outlen,
3428     const unsigned char* in, int *inlen) {
3429     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_6);
3430 }
3431 
ISO8859_7ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3432 static int ISO8859_7ToUTF8 (unsigned char* out, int *outlen,
3433     const unsigned char* in, int *inlen) {
3434     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_7);
3435 }
UTF8ToISO8859_7(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3436 static int UTF8ToISO8859_7 (unsigned char* out, int *outlen,
3437     const unsigned char* in, int *inlen) {
3438     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_7);
3439 }
3440 
ISO8859_8ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3441 static int ISO8859_8ToUTF8 (unsigned char* out, int *outlen,
3442     const unsigned char* in, int *inlen) {
3443     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_8);
3444 }
UTF8ToISO8859_8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3445 static int UTF8ToISO8859_8 (unsigned char* out, int *outlen,
3446     const unsigned char* in, int *inlen) {
3447     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_8);
3448 }
3449 
ISO8859_9ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3450 static int ISO8859_9ToUTF8 (unsigned char* out, int *outlen,
3451     const unsigned char* in, int *inlen) {
3452     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_9);
3453 }
UTF8ToISO8859_9(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3454 static int UTF8ToISO8859_9 (unsigned char* out, int *outlen,
3455     const unsigned char* in, int *inlen) {
3456     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_9);
3457 }
3458 
ISO8859_10ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3459 static int ISO8859_10ToUTF8 (unsigned char* out, int *outlen,
3460     const unsigned char* in, int *inlen) {
3461     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_10);
3462 }
UTF8ToISO8859_10(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3463 static int UTF8ToISO8859_10 (unsigned char* out, int *outlen,
3464     const unsigned char* in, int *inlen) {
3465     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_10);
3466 }
3467 
ISO8859_11ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3468 static int ISO8859_11ToUTF8 (unsigned char* out, int *outlen,
3469     const unsigned char* in, int *inlen) {
3470     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_11);
3471 }
UTF8ToISO8859_11(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3472 static int UTF8ToISO8859_11 (unsigned char* out, int *outlen,
3473     const unsigned char* in, int *inlen) {
3474     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_11);
3475 }
3476 
ISO8859_13ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3477 static int ISO8859_13ToUTF8 (unsigned char* out, int *outlen,
3478     const unsigned char* in, int *inlen) {
3479     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_13);
3480 }
UTF8ToISO8859_13(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3481 static int UTF8ToISO8859_13 (unsigned char* out, int *outlen,
3482     const unsigned char* in, int *inlen) {
3483     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_13);
3484 }
3485 
ISO8859_14ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3486 static int ISO8859_14ToUTF8 (unsigned char* out, int *outlen,
3487     const unsigned char* in, int *inlen) {
3488     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_14);
3489 }
UTF8ToISO8859_14(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3490 static int UTF8ToISO8859_14 (unsigned char* out, int *outlen,
3491     const unsigned char* in, int *inlen) {
3492     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_14);
3493 }
3494 
ISO8859_15ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3495 static int ISO8859_15ToUTF8 (unsigned char* out, int *outlen,
3496     const unsigned char* in, int *inlen) {
3497     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_15);
3498 }
UTF8ToISO8859_15(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3499 static int UTF8ToISO8859_15 (unsigned char* out, int *outlen,
3500     const unsigned char* in, int *inlen) {
3501     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_15);
3502 }
3503 
ISO8859_16ToUTF8(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3504 static int ISO8859_16ToUTF8 (unsigned char* out, int *outlen,
3505     const unsigned char* in, int *inlen) {
3506     return ISO8859xToUTF8 (out, outlen, in, inlen, xmlunicodetable_ISO8859_16);
3507 }
UTF8ToISO8859_16(unsigned char * out,int * outlen,const unsigned char * in,int * inlen)3508 static int UTF8ToISO8859_16 (unsigned char* out, int *outlen,
3509     const unsigned char* in, int *inlen) {
3510     return UTF8ToISO8859x (out, outlen, in, inlen, xmltranscodetable_ISO8859_16);
3511 }
3512 
3513 static void
xmlRegisterCharEncodingHandlersISO8859x(void)3514 xmlRegisterCharEncodingHandlersISO8859x (void) {
3515     xmlNewCharEncodingHandler ("ISO-8859-2", ISO8859_2ToUTF8, UTF8ToISO8859_2);
3516     xmlNewCharEncodingHandler ("ISO-8859-3", ISO8859_3ToUTF8, UTF8ToISO8859_3);
3517     xmlNewCharEncodingHandler ("ISO-8859-4", ISO8859_4ToUTF8, UTF8ToISO8859_4);
3518     xmlNewCharEncodingHandler ("ISO-8859-5", ISO8859_5ToUTF8, UTF8ToISO8859_5);
3519     xmlNewCharEncodingHandler ("ISO-8859-6", ISO8859_6ToUTF8, UTF8ToISO8859_6);
3520     xmlNewCharEncodingHandler ("ISO-8859-7", ISO8859_7ToUTF8, UTF8ToISO8859_7);
3521     xmlNewCharEncodingHandler ("ISO-8859-8", ISO8859_8ToUTF8, UTF8ToISO8859_8);
3522     xmlNewCharEncodingHandler ("ISO-8859-9", ISO8859_9ToUTF8, UTF8ToISO8859_9);
3523     xmlNewCharEncodingHandler ("ISO-8859-10", ISO8859_10ToUTF8, UTF8ToISO8859_10);
3524     xmlNewCharEncodingHandler ("ISO-8859-11", ISO8859_11ToUTF8, UTF8ToISO8859_11);
3525     xmlNewCharEncodingHandler ("ISO-8859-13", ISO8859_13ToUTF8, UTF8ToISO8859_13);
3526     xmlNewCharEncodingHandler ("ISO-8859-14", ISO8859_14ToUTF8, UTF8ToISO8859_14);
3527     xmlNewCharEncodingHandler ("ISO-8859-15", ISO8859_15ToUTF8, UTF8ToISO8859_15);
3528     xmlNewCharEncodingHandler ("ISO-8859-16", ISO8859_16ToUTF8, UTF8ToISO8859_16);
3529 }
3530 
3531 #endif
3532 #endif
3533 
3534 #define bottom_encoding
3535 #include "elfgcchack.h"
3536