• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <iconv.h>
2 #include <errno.h>
3 #include <wchar.h>
4 #include <string.h>
5 #include <stdlib.h>
6 #include <limits.h>
7 #include <stdint.h>
8 #include <pthread.h>
9 #include "locale_impl.h"
10 #ifndef __LITEOS__
11 #ifdef FEATURE_ICU_LOCALE
12 #include <info/device_api_version.h>
13 #endif
14 #endif
15 
16 #define UTF_32BE    0300
17 #define UTF_16LE    0301
18 #define UTF_16BE    0302
19 #define UTF_32LE    0303
20 #define UCS2BE      0304
21 #define UCS2LE      0305
22 #define WCHAR_T     0306
23 #define US_ASCII    0307
24 #define UTF_8       0310
25 #define UTF_16      0312
26 #define UTF_32      0313
27 #define UCS2        0314
28 #define EUC_JP      0320
29 #define SHIFT_JIS   0321
30 #define ISO2022_JP  0322
31 #define GB18030     0330
32 #define GBK         0331
33 #define GB2312      0332
34 #define BIG5        0340
35 #define EUC_KR      0350
36 #ifndef __LITEOS__
37 #ifdef FEATURE_ICU_LOCALE
38 #define ICU_IVALID_CHAR_ERROR 10
39 #define ICU_TRUNCATED_CHAR_ERROR 11
40 #define ICU_ILLEGAL_CHAR_ERROR 12
41 #define ICU_BUFFER_OVERFLOW_ERROR 15
42 #define ICU_SKIP_THRESHOLD 2
43 #define TYPE_FLAG_POS 1
44 #define TO_IGNORE_FLAG_POS 2
45 #define FROM_IGNORE_FLAG_POS 3
46 #define TO_TRANSLIT_FLAG_POS 4
47 #define FROM_TRANSLIT_FLAG_POS 5
48 #define ICU_CHUNK_SIZE 1024
49 #endif
50 #endif
51 /* Definitions of charmaps. Each charmap consists of:
52  * 1. Empty-string-terminated list of null-terminated aliases.
53  * 2. Special type code or number of elided quads of entries.
54  * 3. Character table (size determined by field 2), consisting
55  *    of 5 bytes for every 4 characters, interpreted as 10-bit
56  *    indices into the legacy_chars table. */
57 
58 static const unsigned char charmaps[] =
59 "utf8\0char\0\0\310"
60 "wchart\0\0\306"
61 "ucs2be\0\0\304"
62 "ucs2le\0\0\305"
63 "utf16be\0\0\302"
64 "utf16le\0\0\301"
65 "ucs4be\0utf32be\0\0\300"
66 "ucs4le\0utf32le\0\0\303"
67 "ascii\0usascii\0iso646\0iso646us\0\0\307"
68 "utf16\0\0\312"
69 "ucs4\0utf32\0\0\313"
70 "ucs2\0\0\314"
71 "eucjp\0\0\320"
72 "shiftjis\0sjis\0cp932\0\0\321"
73 "iso2022jp\0\0\322"
74 "gb18030\0\0\330"
75 "gbk\0\0\331"
76 "gb2312\0\0\332"
77 "big5\0bigfive\0cp950\0big5hkscs\0\0\340"
78 "euckr\0ksc5601\0ksx1001\0cp949\0\0\350"
79 #include "codepages.h"
80 ;
81 
82 #ifndef __LITEOS__
83 #ifdef FEATURE_ICU_LOCALE
84 // \0 split alias;  \0\0 split name in icu
85 static const unsigned char icu_name_maps[] =
86 "utf8\0char\0\0UTF-8\0"
87 "utf7\0\0UTF-7\0"
88 "ucs2\0utf16\0ucs2be\0utf16be\0\0UTF-16BE\0"
89 "ucs2le\0utf16le\0\0UTF-16LE\0"
90 "ucs4\0utf32\0ucs4be\0utf32be\0\0UTF-32BE\0"
91 "wchart\0ucs4le\0utf32le\0\0UTF-32LE\0"
92 "ascii\0usascii\0""20127\0iso646\0iso646us\0\0US-ASCII\0"
93 "eucjp\0eucjp2007\0\0euc-jp-2007\0"
94 "shiftjis\0sjis\0cp932\0ibm943p15a2003\0\0ibm-943_P15A-2003\0"
95 "gb18030\0\0gb18030\0"
96 "gbk\0""54936\0windows9362000\0\0windows-936-2000\0"
97 "gb2312\0""52936\0ibm1383p1101999\0\0ibm-1383_P110-1999\0"
98 "big5\0""950\0bigfive\0cp950\0windows9502000\0\0windows-950-2000\0"
99 "big5hk\0big5hkscs\0""951\0ibm1375p1002008\0\0ibm-1375_P100-2008\0"
100 "euckr\0ibm970p110p1102006u2\0\0ibm-970_P110_P110-2006_U2\0"
101 "ksc5601\0ksx1001\0cp949\0windows9492000\0\0windows-949-2000\0"
102 "iso88591\0latin1\0\0ISO-8859-1\0"
103 "iso88592\0ibm912p1001995\0\0ibm-912_P100-1995\0"
104 "iso88593\0ibm913p1002000\0\0ibm-913_P100-2000\0"
105 "iso88594\0ibm914p1001995\0\0ibm-914_P100-1995\0"
106 "iso88595\0ibm915p1001995\0\0ibm-915_P100-1995\0"
107 "iso88596\0ibm1089p1001995\0\0ibm-1089_P100-1995\0"
108 "iso88597\0ibm9005x1102007\0\0ibm-9005_X110-2007\0"
109 "iso88598\0ibm5012p1001999\0\0ibm-5012_P100-1999\0"
110 "iso88599\0ibm920p1001995\0\0ibm-920_P100-1995\0"
111 "iso885910\0iso8859101998\0\0iso-8859_10-1998\0"
112 "iso885911\0iso8859112001\0\0iso-8859_11-2001\0"
113 "tis620\0windows8742000\0\0windows-874-2000\0"
114 "iso885913\0ibm921p1001995\0\0ibm-921_P100-1995\0"
115 "iso885914\0iso8859141998\0\0iso-8859_14-1998\0"
116 "iso885915\0latin9\0ibm923p1001998\0\0ibm-923_P100-1998\0"
117 "cp1250\0windows1250\0ibm5346p1001998\0\0ibm-5346_P100-1998\0"
118 "cp1251\0windows1251\0ibm5347p1001998\0\0ibm-5347_P100-1998\0"
119 "cp1252\0windows1252\0ibm5348p1001997\0\0ibm-5348_P100-1997\0"
120 "cp1253\0windows1253\0ibm5349p1001998\0\0ibm-5349_P100-1998\0"
121 "cp1254\0windows1254\0ibm5350p1001998\0\0ibm-5350_P100-1998\0"
122 "cp1255\0windows1255\0ibm9447p1002002\0\0ibm-9447_P100-2002\0"
123 "cp1256\0windows1256\0ibm9448x1002005\0\0ibm-9448_X100-2005\0"
124 "cp1257\0windows1257\0ibm9449p1002002\0\0ibm-9449_P100-2002\0"
125 "cp1258\0windows1258\0ibm5354p1001998\0\0ibm-5354_P100-1998\0"
126 "koi8r\0ibm878p1001996\0\0ibm-878_P100-1996\0"
127 "koi8u\0ibm1168p1002002\0\0ibm-1168_P100-2002\0"
128 "cp437\0ibm437p1001995\0\0ibm-437_P100-1995\0"
129 "cp850\0ibm850p1001995\0\0ibm-850_P100-1995\0"
130 "cp866\0ibm866p1001995\0\0ibm-866_P100-1995\0"
131 "ibm1047\0cp1047\0ibm1047p1001995\0\0ibm-1047_P100-1995\0"
132 ;
133 #endif
134 #endif
135 
136 /* Table of characters that appear in legacy 8-bit codepages,
137  * limited to 1024 slots (10 bit indices). The first 256 entries
138  * are elided since those characters are obviously all included. */
139 static const unsigned short legacy_chars[] = {
140 #include "legacychars.h"
141 };
142 
143 static const unsigned short jis0208[84][94] = {
144 #include "jis0208.h"
145 };
146 
147 static const unsigned short gb18030[126][190] = {
148 #include "gb18030.h"
149 };
150 
151 static const unsigned short big5[89][157] = {
152 #include "big5.h"
153 };
154 
155 static const unsigned short hkscs[] = {
156 #include "hkscs.h"
157 };
158 
159 static const unsigned short ksc[93][94] = {
160 #include "ksc.h"
161 };
162 
163 static const unsigned short rev_jis[] = {
164 #include "revjis.h"
165 };
166 
fuzzycmp(const unsigned char * a,const unsigned char * b)167 static int fuzzycmp(const unsigned char *a, const unsigned char *b)
168 {
169 	for (; *a && *b; a++, b++) {
170 		while (*a && (*a|32U)-'a'>26 && *a-'0'>10U) a++;
171 		if ((*a|32U) != *b) return 1;
172 	}
173 	return *a != *b;
174 }
175 
find_charmap(const void * name)176 static size_t find_charmap(const void *name)
177 {
178 	const unsigned char *s;
179 	if (!*(char *)name) name=charmaps; /* "utf8" */
180 	for (s=charmaps; *s; ) {
181 		if (!fuzzycmp(name, s)) {
182 			for (; *s; s+=strlen((void *)s)+1);
183 			return s+1-charmaps;
184 		}
185 		s += strlen((void *)s)+1;
186 		if (!*s) {
187 			if (s[1] > 0200) s+=2;
188 			else s+=2+(64U-s[1])*5;
189 		}
190 	}
191 	return -1;
192 }
193 
194 #ifndef __LITEOS__
195 #ifdef FEATURE_ICU_LOCALE
find_icu_map(const void * query_name)196 static const unsigned char* find_icu_map(const void *query_name)
197 {
198     if (!*(char *)query_name) {
199         query_name = icu_name_maps;
200     }
201 
202     const unsigned char *icu_name = icu_name_maps;
203     while (*icu_name) {
204         if (!fuzzycmp(query_name, icu_name)) {
205             while (*icu_name) {
206                 icu_name += strlen((void *)icu_name) + 1;  //find nearly \0\0
207             }
208             return icu_name + 1;
209         }
210         icu_name += strlen((void *)icu_name) + 1;  // skip \0
211         if (!*icu_name) {  // skip \0\0
212             icu_name++;
213             while (*icu_name) {icu_name++;}
214             icu_name++;
215         }
216     }
217     return NULL;
218 }
219 #endif
220 #endif
221 
222 struct stateful_cd {
223 #ifndef __LITEOS__
224 #ifdef FEATURE_ICU_LOCALE
225     unsigned sign;
226     const unsigned char* to;
227     const unsigned char* from;
228 #endif
229 #endif
230 	iconv_t base_cd;
231 	unsigned state;
232 };
233 
combine_to_from(size_t t,size_t f)234 static iconv_t combine_to_from(size_t t, size_t f)
235 {
236 	return (void *)(f<<16 | t<<1 | 1);
237 }
238 
extract_from(iconv_t cd)239 static size_t extract_from(iconv_t cd)
240 {
241 	return (size_t)cd >> 16;
242 }
243 
extract_to(iconv_t cd)244 static size_t extract_to(iconv_t cd)
245 {
246 	return (size_t)cd >> 1 & 0x7fff;
247 }
248 
249 #ifndef __LITEOS__
250 #ifdef FEATURE_ICU_LOCALE
set_type_flag(unsigned * value)251 static void set_type_flag(unsigned* value) {*value = (1 << TYPE_FLAG_POS) | *value;}
set_to_ignore_flag(unsigned * value)252 static void set_to_ignore_flag(unsigned* value) {*value = (1 << TO_IGNORE_FLAG_POS) | *value;}
set_from_ignore_flag(unsigned * value)253 static void set_from_ignore_flag(unsigned* value) {*value = (1 << FROM_IGNORE_FLAG_POS) | *value;}
set_to_translit_flag(unsigned * value)254 static void set_to_translit_flag(unsigned* value) {*value = (1 << TO_TRANSLIT_FLAG_POS) | *value;}
set_from_translit_flag(unsigned * value)255 static void set_from_translit_flag(unsigned* value) {*value = (1 << FROM_TRANSLIT_FLAG_POS) | *value;}
get_type_flag(unsigned value)256 static bool get_type_flag(unsigned value) {return (value >> TYPE_FLAG_POS) & 1;}
get_to_ignore_flag(unsigned value)257 static bool get_to_ignore_flag(unsigned value) {return (value >> TO_IGNORE_FLAG_POS) & 1;}
get_from_ignore_flag(unsigned value)258 static bool get_from_ignore_flag(unsigned value) {return (value >> FROM_IGNORE_FLAG_POS) & 1;}
get_to_translit_flag(unsigned value)259 static bool get_to_translit_flag(unsigned value) {return (value >> TO_TRANSLIT_FLAG_POS) & 1;}
get_from_translit_flag(unsigned value)260 static bool get_from_translit_flag(unsigned value) {return (value >> FROM_TRANSLIT_FLAG_POS) & 1;}
261 
deal_with_tail(const char * ins,unsigned * sign,const unsigned char ** res,bool is_from)262 static bool deal_with_tail(const char* ins, unsigned* sign, const unsigned char** res, bool is_from)
263 {
264     char* ins_tmp = strdup(ins);
265     if (!ins_tmp) {return false;}
266     char* ins_ignore_pos = strstr(ins_tmp, "//IGNORE");
267     char* ins_translit_pos = strstr(ins_tmp, "//TRANSLIT");
268     if (ins_ignore_pos) {
269         if (is_from) {
270             set_from_ignore_flag(sign);
271         } else {
272             set_to_ignore_flag(sign);
273         }
274         *ins_ignore_pos = '\0';
275         *res = find_icu_map((void*)ins_tmp);
276     } else if (ins_translit_pos) {
277         if (is_from) {
278             set_from_translit_flag(sign);
279         } else {
280             set_to_translit_flag(sign);
281         }
282         *ins_translit_pos = '\0';
283         *res = find_icu_map((void*)ins_tmp);
284     } else {
285         *res = find_icu_map(ins);
286     }
287     free(ins_tmp);
288     return true;
289 }
290 
291 bool icu_locale_enable = false;
292 
293 pthread_mutex_t icu_init_mutex = PTHREAD_MUTEX_INITIALIZER;
294 
295 /**
296 * @Description: The set_icu_enable function is used to set the internal implementation of iconv to the implementation of the ICU library.
297 * The iconv internal implementation may have been set to the ICU library implementation before the function was executed. In this case,
298 * the function also returns success.
299 * @return:If the function call is successful, the returned value will be zero; otherwise, the returned value will be a non-zero error code.
300 */
301 
set_iconv_icu_enable()302 int set_iconv_icu_enable()
303 {
304 	pthread_mutex_lock(&icu_init_mutex);
305 	if (!icuuc_handle_init()) {
306 		pthread_mutex_unlock(&icu_init_mutex);
307 		return ICU_SYMBOL_LOAD_ERROR;
308 	}
309 
310 	icu_locale_enable = true;
311 	pthread_mutex_unlock(&icu_init_mutex);
312 	return ICU_ZERO_ERROR;
313 }
314 
315 #endif
316 #endif
317 
iconv_open(const char * to,const char * from)318 iconv_t iconv_open(const char *to, const char *from)
319 {
320     struct stateful_cd *scd;
321 
322 #ifndef __LITEOS__
323 #ifdef FEATURE_ICU_LOCALE
324     bool is_basic_open = false;
325 
326     for (const char* s = "iso885916\0iso2022jp\0\0"; *s;) {  // icu not support
327         if (!fuzzycmp((void*)to, (void*)s) || !fuzzycmp((void*)from, (void*)s)) {
328             is_basic_open = true;
329         }
330         s += strlen(s) + 1;
331     }
332 
333     // icu open
334     if (!is_basic_open && icu_locale_enable) {
335         scd = malloc(sizeof *scd);
336         if (!scd) {return (iconv_t)-1;}
337         scd->sign = 0;
338         scd->state = 0;
339 
340         if (!deal_with_tail(to, &scd->sign, &scd->to, false)) {return (iconv_t)-1;}
341         if (!deal_with_tail(from, &scd->sign, &scd->from, true)) {return (iconv_t)-1;}
342 
343         if (!scd->to || !scd->from) {
344             errno = EINVAL;
345             free(scd);
346             return (iconv_t)-1;
347         }
348 
349         set_type_flag(&scd->sign);
350         return (iconv_t)scd;
351     }
352 #endif
353 #endif
354 
355     // basic open
356     size_t f, t;
357 	if ((t = find_charmap(to))==-1
358 	 || (f = find_charmap(from))==-1
359 	 || (charmaps[t] >= 0330)) {
360 		errno = EINVAL;
361 		return (iconv_t)-1;
362 	}
363 	iconv_t cd = combine_to_from(t, f);
364 
365 	switch (charmaps[f]) {
366 	case UTF_16:
367 	case UTF_32:
368 	case UCS2:
369 	case ISO2022_JP:
370 		scd = malloc(sizeof *scd);
371 		if (!scd) return (iconv_t)-1;
372         memset(scd, 0, sizeof(*scd));
373 		scd->base_cd = cd;
374 		scd->state = 0;
375 		cd = (iconv_t)scd;
376 	}
377 
378 	return cd;
379 }
380 
get_16(const unsigned char * s,int e)381 static unsigned get_16(const unsigned char *s, int e)
382 {
383 	e &= 1;
384 	return s[e]<<8 | s[1-e];
385 }
386 
put_16(unsigned char * s,unsigned c,int e)387 static void put_16(unsigned char *s, unsigned c, int e)
388 {
389 	e &= 1;
390 	s[e] = c>>8;
391 	s[1-e] = c;
392 }
393 
get_32(const unsigned char * s,int e)394 static unsigned get_32(const unsigned char *s, int e)
395 {
396 	e &= 3;
397 	return s[e]+0U<<24 | s[e^1]<<16 | s[e^2]<<8 | s[e^3];
398 }
399 
put_32(unsigned char * s,unsigned c,int e)400 static void put_32(unsigned char *s, unsigned c, int e)
401 {
402 	e &= 3;
403 	s[e^0] = c>>24;
404 	s[e^1] = c>>16;
405 	s[e^2] = c>>8;
406 	s[e^3] = c;
407 }
408 
409 /* Adapt as needed */
410 #define mbrtowc_utf8 mbrtowc
411 #define wctomb_utf8 wctomb
412 
legacy_map(const unsigned char * map,unsigned c)413 static unsigned legacy_map(const unsigned char *map, unsigned c)
414 {
415 	if (c < 4*map[-1]) return c;
416 	unsigned x = c - 4*map[-1];
417 	x = map[x*5/4]>>2*x%8 | map[x*5/4+1]<<8-2*x%8 & 1023;
418 	return x < 256 ? x : legacy_chars[x-256];
419 }
420 
uni_to_jis(unsigned c)421 static unsigned uni_to_jis(unsigned c)
422 {
423 	unsigned nel = sizeof rev_jis / sizeof *rev_jis;
424 	unsigned d, j, i, b = 0;
425 	for (;;) {
426 		i = nel/2;
427 		j = rev_jis[b+i];
428 		d = jis0208[j/256][j%256];
429 		if (d==c) return j + 0x2121;
430 		else if (nel == 1) return 0;
431 		else if (c < d)
432 			nel /= 2;
433 		else {
434 			b += i;
435 			nel -= nel/2;
436 		}
437 	}
438 }
439 
440 #ifndef __LITEOS__
441 #ifdef FEATURE_ICU_LOCALE
ucnv_from_u_callback_ignore(const void * context,void * fromUArgs,const void * codeUnits,int32_t length,int32_t codePoint,int reason,int * err)442 static void ucnv_from_u_callback_ignore(
443     const void* context,
444     void* fromUArgs,
445     const void* codeUnits,
446     int32_t length,
447     int32_t codePoint,
448     int reason,
449     int* err)
450 {
451     if (reason <= ICU_SKIP_THRESHOLD) {
452         *err = ICU_ZERO_ERROR;
453     }
454 }
455 
ucnv_from_u_callback_stop(const void * context,...)456 static void ucnv_from_u_callback_stop(const void* context, ...) { }
457 
ucnv_to_u_callback_ignore(const void * context,void * toUArgs,const void * codeUnits,int32_t length,int reason,int * err)458 static void ucnv_to_u_callback_ignore(
459     const void* context,
460     void* toUArgs,
461     const void* codeUnits,
462     int32_t length,
463     int reason,
464     int* err)
465 {
466     if (reason <= ICU_SKIP_THRESHOLD) {
467         *err = ICU_ZERO_ERROR;
468     }
469 }
470 
ucnv_to_u_callback_stop(const void * context,...)471 static void ucnv_to_u_callback_stop(const void* context, ...) { }
472 
set_errno(int errCode)473 static void set_errno(int errCode)
474 {
475     if (errCode == ICU_ZERO_ERROR) {
476         errno = 0;
477     } else if (errCode == ICU_BUFFER_OVERFLOW_ERROR) {
478         errno = E2BIG;
479     } else if (errCode == ICU_IVALID_CHAR_ERROR ||
480                errCode == ICU_TRUNCATED_CHAR_ERROR ||
481                errCode == ICU_ILLEGAL_CHAR_ERROR) {
482         errno = EILSEQ;
483     } else {
484         errno = EINVAL;
485     }
486 }
487 
iconv_icu(unsigned sign,const unsigned char * to,const unsigned char * from,char ** restrict in,size_t * restrict inb,char ** restrict out,size_t * restrict outb)488 static size_t iconv_icu(unsigned sign, const unsigned char* to, const unsigned char* from,
489 char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)
490 {
491     int errCode = ICU_ZERO_ERROR;
492 
493     void* conv_in = g_icu_opt_func.ucnv_open((void*)from, &errCode);
494     if (get_from_ignore_flag(sign)) {
495         g_icu_opt_func.ucnv_setToUCallBack(conv_in, ucnv_to_u_callback_ignore, NULL, NULL, NULL, &errCode);
496     } else if (!get_from_translit_flag(sign)) {
497         g_icu_opt_func.ucnv_setFromUCallBack(conv_in, ucnv_to_u_callback_stop, NULL, NULL, NULL, &errCode);
498     }
499 
500     void* conv_out = g_icu_opt_func.ucnv_open((void*)to, &errCode);
501     if (get_to_ignore_flag(sign)) {
502         g_icu_opt_func.ucnv_setFromUCallBack(conv_out, ucnv_from_u_callback_ignore, NULL, NULL, NULL, &errCode);
503     } else if (!get_to_translit_flag(sign)) {
504         g_icu_opt_func.ucnv_setFromUCallBack(conv_out, ucnv_from_u_callback_stop, NULL, NULL, NULL, &errCode);
505     }
506 
507 	u_char pivot_buffer[ICU_CHUNK_SIZE];
508 	u_char *pivot, *pivot2;
509 	char *mytarget;
510 	const char *source_limit;
511 	const char *target_limit;
512 	int32_t target_length = 0;
513 	source_limit = *in + *inb;
514 	pivot = pivot2 = pivot_buffer;
515 	mytarget = *out;
516     target_limit = *out + *outb;
517 	g_icu_opt_func.ucnv_convertEx(conv_out, conv_in, &mytarget, target_limit, (const char **)in, source_limit,
518 						pivot_buffer, &pivot, &pivot2, pivot_buffer + ICU_CHUNK_SIZE, false, true, &errCode);
519 	target_length = (int32_t)(mytarget - *out);
520     if (errCode > ICU_ZERO_ERROR) {
521         set_errno(errCode);
522         return (size_t)-1;
523     } else {
524         errCode = ICU_ZERO_ERROR;
525     }
526     g_icu_opt_func.ucnv_close(conv_in);
527 	g_icu_opt_func.ucnv_close(conv_out);
528 
529     *out += target_length;
530     *outb -= target_length;
531     *in += *inb;
532     *inb -= *inb;
533     set_errno(errCode);
534 
535     return (size_t)errCode;
536 }
537 #endif
538 #endif
539 
iconv(iconv_t cd,char ** restrict in,size_t * restrict inb,char ** restrict out,size_t * restrict outb)540 size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb)
541 {
542     if (!in || !*in || !*inb) {
543         return 0;
544     }
545 
546     size_t x=0;
547 	struct stateful_cd *scd=0;
548 	if (!((size_t)cd & 1)) {
549 		scd = (void *)cd;
550 		cd = scd->base_cd;
551 #ifndef __LITEOS__
552 #ifdef FEATURE_ICU_LOCALE
553         if (get_type_flag(scd->sign)) {
554             return iconv_icu(scd->sign, scd->to, scd->from, in, inb, out, outb);
555         }
556 #endif
557 #endif
558 	}
559 	unsigned to = extract_to(cd);
560 	unsigned from = extract_from(cd);
561 	const unsigned char *map = charmaps+from+1;
562 	const unsigned char *tomap = charmaps+to+1;
563 	mbstate_t st = {0};
564 	wchar_t wc;
565 	unsigned c, d;
566 	size_t k, l;
567 	int err;
568 	unsigned char type = map[-1];
569 	unsigned char totype = tomap[-1];
570 	locale_t *ploc = &CURRENT_LOCALE, loc = *ploc;
571 
572 	*ploc = UTF8_LOCALE;
573 
574 	for (; *inb; *in+=l, *inb-=l) {
575 		c = *(unsigned char *)*in;
576 		l = 1;
577 
578 		switch (type) {
579 		case UTF_8:
580 			if (c < 128) break;
581 			l = mbrtowc_utf8(&wc, *in, *inb, &st);
582 			if (l == (size_t)-1) goto ilseq;
583 			if (l == (size_t)-2) goto starved;
584 			c = wc;
585 			break;
586 		case US_ASCII:
587 			if (c >= 128) goto ilseq;
588 			break;
589 		case WCHAR_T:
590 			l = sizeof(wchar_t);
591 			if (*inb < l) goto starved;
592 			c = *(wchar_t *)*in;
593 			if (0) {
594 		case UTF_32BE:
595 		case UTF_32LE:
596 			l = 4;
597 			if (*inb < 4) goto starved;
598 			c = get_32((void *)*in, type);
599 			}
600 			if (c-0xd800u < 0x800u || c >= 0x110000u) goto ilseq;
601 			break;
602 		case UCS2BE:
603 		case UCS2LE:
604 		case UTF_16BE:
605 		case UTF_16LE:
606 			l = 2;
607 			if (*inb < 2) goto starved;
608 			c = get_16((void *)*in, type);
609 			if ((unsigned)(c-0xdc00) < 0x400) goto ilseq;
610 			if ((unsigned)(c-0xd800) < 0x400) {
611 				if (type-UCS2BE < 2U) goto ilseq;
612 				l = 4;
613 				if (*inb < 4) goto starved;
614 				d = get_16((void *)(*in + 2), type);
615 				if ((unsigned)(d-0xdc00) >= 0x400) goto ilseq;
616 				c = ((c-0xd7c0)<<10) + (d-0xdc00);
617 			}
618 			break;
619 		case UCS2:
620 		case UTF_16:
621 			l = 0;
622 			if (!scd->state) {
623 				if (*inb < 2) goto starved;
624 				c = get_16((void *)*in, 0);
625 				scd->state = type==UCS2
626 					? c==0xfffe ? UCS2LE : UCS2BE
627 					: c==0xfffe ? UTF_16LE : UTF_16BE;
628 				if (c == 0xfffe || c == 0xfeff)
629 					l = 2;
630 			}
631 			type = scd->state;
632 			continue;
633 		case UTF_32:
634 			l = 0;
635 			if (!scd->state) {
636 				if (*inb < 4) goto starved;
637 				c = get_32((void *)*in, 0);
638 				scd->state = c==0xfffe0000 ? UTF_32LE : UTF_32BE;
639 				if (c == 0xfffe0000 || c == 0xfeff)
640 					l = 4;
641 			}
642 			type = scd->state;
643 			continue;
644 		case SHIFT_JIS:
645 			if (c < 128) break;
646 			if (c-0xa1 <= 0xdf-0xa1) {
647 				c += 0xff61-0xa1;
648 				break;
649 			}
650 			l = 2;
651 			if (*inb < 2) goto starved;
652 			d = *((unsigned char *)*in + 1);
653 			if (c-129 <= 159-129) c -= 129;
654 			else if (c-224 <= 239-224) c -= 193;
655 			else goto ilseq;
656 			c *= 2;
657 			if (d-64 <= 158-64) {
658 				if (d==127) goto ilseq;
659 				if (d>127) d--;
660 				d -= 64;
661 			} else if (d-159 <= 252-159) {
662 				c++;
663 				d -= 159;
664 			}
665 			c = jis0208[c][d];
666 			if (!c) goto ilseq;
667 			break;
668 		case EUC_JP:
669 			if (c < 128) break;
670 			l = 2;
671 			if (*inb < 2) goto starved;
672 			d = *((unsigned char *)*in + 1);
673 			if (c==0x8e) {
674 				c = d;
675 				if (c-0xa1 > 0xdf-0xa1) goto ilseq;
676 				c += 0xff61 - 0xa1;
677 				break;
678 			}
679 			c -= 0xa1;
680 			d -= 0xa1;
681 			if (c >= 84 || d >= 94) goto ilseq;
682 			c = jis0208[c][d];
683 			if (!c) goto ilseq;
684 			break;
685 		case ISO2022_JP:
686 			if (c >= 128) goto ilseq;
687 			if (c == '\033') {
688 				l = 3;
689 				if (*inb < 3) goto starved;
690 				c = *((unsigned char *)*in + 1);
691 				d = *((unsigned char *)*in + 2);
692 				if (c != '(' && c != '$') goto ilseq;
693 				switch (128*(c=='$') + d) {
694 				case 'B': scd->state=0; continue;
695 				case 'J': scd->state=1; continue;
696 				case 'I': scd->state=4; continue;
697 				case 128+'@': scd->state=2; continue;
698 				case 128+'B': scd->state=3; continue;
699 				}
700 				goto ilseq;
701 			}
702 			switch (scd->state) {
703 			case 1:
704 				if (c=='\\') c = 0xa5;
705 				if (c=='~') c = 0x203e;
706 				break;
707 			case 2:
708 			case 3:
709 				l = 2;
710 				if (*inb < 2) goto starved;
711 				d = *((unsigned char *)*in + 1);
712 				c -= 0x21;
713 				d -= 0x21;
714 				if (c >= 84 || d >= 94) goto ilseq;
715 				c = jis0208[c][d];
716 				if (!c) goto ilseq;
717 				break;
718 			case 4:
719 				if (c-0x60 < 0x1f) goto ilseq;
720 				if (c-0x21 < 0x5e) c += 0xff61-0x21;
721 				break;
722 			}
723 			break;
724 		case GB2312:
725 			if (c < 128) break;
726 			if (c < 0xa1) goto ilseq;
727 		case GBK:
728 		case GB18030:
729 			if (c < 128) break;
730 			c -= 0x81;
731 			if (c >= 126) goto ilseq;
732 			l = 2;
733 			if (*inb < 2) goto starved;
734 			d = *((unsigned char *)*in + 1);
735 			if (d < 0xa1 && type == GB2312) goto ilseq;
736 			if (d-0x40>=191 || d==127) {
737 				if (d-'0'>9 || type != GB18030)
738 					goto ilseq;
739 				l = 4;
740 				if (*inb < 4) goto starved;
741 				c = (10*c + d-'0') * 1260;
742 				d = *((unsigned char *)*in + 2);
743 				if (d-0x81>126) goto ilseq;
744 				c += 10*(d-0x81);
745 				d = *((unsigned char *)*in + 3);
746 				if (d-'0'>9) goto ilseq;
747 				c += d-'0';
748 				c += 128;
749 				for (d=0; d<=c; ) {
750 					k = 0;
751 					for (int i=0; i<126; i++)
752 						for (int j=0; j<190; j++)
753 							if (gb18030[i][j]-d <= c-d)
754 								k++;
755 					d = c+1;
756 					c += k;
757 				}
758 				break;
759 			}
760 			d -= 0x40;
761 			if (d>63) d--;
762 			c = gb18030[c][d];
763 			break;
764 		case BIG5:
765 			if (c < 128) break;
766 			l = 2;
767 			if (*inb < 2) goto starved;
768 			d = *((unsigned char *)*in + 1);
769 			if (d-0x40>=0xff-0x40 || d-0x7f<0xa1-0x7f) goto ilseq;
770 			d -= 0x40;
771 			if (d > 0x3e) d -= 0x22;
772 			if (c-0xa1>=0xfa-0xa1) {
773 				if (c-0x87>=0xff-0x87) goto ilseq;
774 				if (c < 0xa1) c -= 0x87;
775 				else c -= 0x87 + (0xfa-0xa1);
776 				c = (hkscs[4867+(c*157+d)/16]>>(c*157+d)%16)%2<<17
777 					| hkscs[c*157+d];
778 				/* A few HKSCS characters map to pairs of UCS
779 				 * characters. These are mapped to surrogate
780 				 * range in the hkscs table then hard-coded
781 				 * here. Ugly, yes. */
782 				if (c/256 == 0xdc) {
783 					union {
784 						char c[8];
785 						wchar_t wc[2];
786 					} tmp;
787 					char *ptmp = tmp.c;
788 					size_t tmpx = iconv(combine_to_from(to, find_charmap("utf8")),
789 						&(char *){"\303\212\314\204"
790 						"\303\212\314\214"
791 						"\303\252\314\204"
792 						"\303\252\314\214"
793 						+c%256}, &(size_t){4},
794 						&ptmp, &(size_t){sizeof tmp});
795 					size_t tmplen = ptmp - tmp.c;
796 					if (tmplen > *outb) goto toobig;
797 					if (tmpx) x++;
798 					memcpy(*out, &tmp, tmplen);
799 					*out += tmplen;
800 					*outb -= tmplen;
801 					continue;
802 				}
803 				if (!c) goto ilseq;
804 				break;
805 			}
806 			c -= 0xa1;
807 			c = big5[c][d]|(c==0x27&&(d==0x3a||d==0x3c||d==0x42))<<17;
808 			if (!c) goto ilseq;
809 			break;
810 		case EUC_KR:
811 			if (c < 128) break;
812 			l = 2;
813 			if (*inb < 2) goto starved;
814 			d = *((unsigned char *)*in + 1);
815 			c -= 0xa1;
816 			d -= 0xa1;
817 			if (c >= 93 || d >= 94) {
818 				c += (0xa1-0x81);
819 				d += 0xa1;
820 				if (c > 0xc6-0x81 || c==0xc6-0x81 && d>0x52)
821 					goto ilseq;
822 				if (d-'A'<26) d = d-'A';
823 				else if (d-'a'<26) d = d-'a'+26;
824 				else if (d-0x81<0xff-0x81) d = d-0x81+52;
825 				else goto ilseq;
826 				if (c < 0x20) c = 178*c + d;
827 				else c = 178*0x20 + 84*(c-0x20) + d;
828 				c += 0xac00;
829 				for (d=0xac00; d<=c; ) {
830 					k = 0;
831 					for (int i=0; i<93; i++)
832 						for (int j=0; j<94; j++)
833 							if (ksc[i][j]-d <= c-d)
834 								k++;
835 					d = c+1;
836 					c += k;
837 				}
838 				break;
839 			}
840 			c = ksc[c][d];
841 			if (!c) goto ilseq;
842 			break;
843 		default:
844 			if (!c) break;
845 			c = legacy_map(map, c);
846 			if (!c) goto ilseq;
847 		}
848 
849 		switch (totype) {
850 		case WCHAR_T:
851 			if (*outb < sizeof(wchar_t)) goto toobig;
852 			*(wchar_t *)*out = c;
853 			*out += sizeof(wchar_t);
854 			*outb -= sizeof(wchar_t);
855 			break;
856 		case UTF_8:
857 			if (*outb < 4) {
858 				char tmp[4];
859 				k = wctomb_utf8(tmp, c);
860 				if (*outb < k) goto toobig;
861 				memcpy(*out, tmp, k);
862 			} else k = wctomb_utf8(*out, c);
863             /* This failure condition should be unreachable, but
864              * is included to prevent decoder bugs from translating
865              * into advancement outside the output buffer range. */
866             if (k>4) goto ilseq;
867 			*out += k;
868 			*outb -= k;
869 			break;
870 		case US_ASCII:
871 			if (c > 0x7f) subst: x++, c='*';
872 		default:
873 			if (*outb < 1) goto toobig;
874 			if (c<256 && c==legacy_map(tomap, c)) {
875 			revout:
876 				if (*outb < 1) goto toobig;
877 				*(*out)++ = c;
878 				*outb -= 1;
879 				break;
880 			}
881 			d = c;
882 			for (c=4*totype; c<256; c++) {
883 				if (d == legacy_map(tomap, c)) {
884 					goto revout;
885 				}
886 			}
887 			goto subst;
888 		case SHIFT_JIS:
889 			if (c < 128) goto revout;
890 			if (c == 0xa5) {
891 				x++;
892 				c = '\\';
893 				goto revout;
894 			}
895 			if (c == 0x203e) {
896 				x++;
897 				c = '~';
898 				goto revout;
899 			}
900 			if (c-0xff61 <= 0xdf-0xa1) {
901 				c += 0xa1 - 0xff61;
902 				goto revout;
903 			}
904 			c = uni_to_jis(c);
905 			if (!c) goto subst;
906 			if (*outb < 2) goto toobig;
907 			d = c%256;
908 			c = c/256;
909 			*(*out)++ = (c+1)/2 + (c<95 ? 112 : 176);
910 			*(*out)++ = c%2 ? d + 31 + d/96 : d + 126;
911 			*outb -= 2;
912 			break;
913 		case EUC_JP:
914 			if (c < 128) goto revout;
915 			if (c-0xff61 <= 0xdf-0xa1) {
916 				c += 0x0e00 + 0x21 - 0xff61;
917 			} else {
918 				c = uni_to_jis(c);
919 			}
920 			if (!c) goto subst;
921 			if (*outb < 2) goto toobig;
922 			*(*out)++ = c/256 + 0x80;
923 			*(*out)++ = c%256 + 0x80;
924 			*outb -= 2;
925 			break;
926 		case ISO2022_JP:
927 			if (c < 128) goto revout;
928 			if (c-0xff61 <= 0xdf-0xa1 || c==0xa5 || c==0x203e) {
929 				if (*outb < 7) goto toobig;
930 				*(*out)++ = '\033';
931 				*(*out)++ = '(';
932 				if (c==0xa5) {
933 					*(*out)++ = 'J';
934 					*(*out)++ = '\\';
935 				} else if (c==0x203e) {
936 					*(*out)++ = 'J';
937 					*(*out)++ = '~';
938 				} else {
939 					*(*out)++ = 'I';
940 					*(*out)++ = c-0xff61+0x21;
941 				}
942 				*(*out)++ = '\033';
943 				*(*out)++ = '(';
944 				*(*out)++ = 'B';
945 				*outb -= 7;
946 				break;
947 			}
948 			c = uni_to_jis(c);
949 			if (!c) goto subst;
950 			if (*outb < 8) goto toobig;
951 			*(*out)++ = '\033';
952 			*(*out)++ = '$';
953 			*(*out)++ = 'B';
954 			*(*out)++ = c/256;
955 			*(*out)++ = c%256;
956 			*(*out)++ = '\033';
957 			*(*out)++ = '(';
958 			*(*out)++ = 'B';
959 			*outb -= 8;
960 			break;
961 		case UCS2:
962 			totype = UCS2BE;
963 		case UCS2BE:
964 		case UCS2LE:
965 		case UTF_16:
966 		case UTF_16BE:
967 		case UTF_16LE:
968 			if (c < 0x10000 || totype-UCS2BE < 2U) {
969 				if (c >= 0x10000) c = 0xFFFD;
970 				if (*outb < 2) goto toobig;
971 				put_16((void *)*out, c, totype);
972 				*out += 2;
973 				*outb -= 2;
974 				break;
975 			}
976 			if (*outb < 4) goto toobig;
977 			c -= 0x10000;
978 			put_16((void *)*out, (c>>10)|0xd800, totype);
979 			put_16((void *)(*out + 2), (c&0x3ff)|0xdc00, totype);
980 			*out += 4;
981 			*outb -= 4;
982 			break;
983 		case UTF_32:
984 			totype = UTF_32BE;
985 		case UTF_32BE:
986 		case UTF_32LE:
987 			if (*outb < 4) goto toobig;
988 			put_32((void *)*out, c, totype);
989 			*out += 4;
990 			*outb -= 4;
991 			break;
992 		}
993 	}
994 	*ploc = loc;
995 	return x;
996 ilseq:
997 	err = EILSEQ;
998 	x = -1;
999 	goto end;
1000 toobig:
1001 	err = E2BIG;
1002 	x = -1;
1003 	goto end;
1004 starved:
1005 	err = EINVAL;
1006 	x = -1;
1007 end:
1008 	errno = err;
1009 	*ploc = loc;
1010 	return x;
1011 }
1012