• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Conversion module for ARIB-STD-B24.
2    Copyright (C) 1998-2014 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <http://www.gnu.org/licenses/>.  */
18 
19 /*
20  * Conversion module for the character encoding
21  * defined in ARIB STD-B24 Volume 1, Part 2, Chapter 7.
22  *    http://www.arib.or.jp/english/html/overview/doc/6-STD-B24v5_2-1p3-E1.pdf
23  *    http://www.arib.or.jp/english/html/overview/sb_ej.html
24  *    https://sites.google.com/site/unicodesymbols/Home/japanese-tv-symbols/
25  * It is based on ISO-2022, and used in Japanese digital televsion.
26  *
27  * Note 1: "mosaic" characters are not supported in this module.
28  * Note 2: Control characters (for subtitles) are discarded.
29  */
30 
31 #include <assert.h>
32 #include <dlfcn.h>
33 #include <gconv.h>
34 #include <stdint.h>
35 #include <stdlib.h>
36 #include <string.h>
37 
38 #include "jis0201.h"
39 #include "jis0208.h"
40 #include "jisx0213.h"
41 
42 /* Definitions used in the body of the `gconv' function.  */
43 #define CHARSET_NAME		"ARIB-STD-B24//"
44 #define DEFINE_INIT		1
45 #define DEFINE_FINI		1
46 #define ONE_DIRECTION		0
47 #define FROM_LOOP		from_aribb24_loop
48 #define TO_LOOP			to_aribb24_loop
49 #define FROM_LOOP_MIN_NEEDED_FROM 1
50 #define FROM_LOOP_MAX_NEEDED_FROM 1
51 #define FROM_LOOP_MIN_NEEDED_TO 4
52 #define FROM_LOOP_MAX_NEEDED_TO (4 * 4)
53 #define TO_LOOP_MIN_NEEDED_FROM 4
54 #define TO_LOOP_MAX_NEEDED_FROM 4
55 #define TO_LOOP_MIN_NEEDED_TO 1
56 #define TO_LOOP_MAX_NEEDED_TO 7
57 
58 #define PREPARE_LOOP \
59   __mbstate_t saved_state;						      \
60   __mbstate_t *statep = data->__statep;					      \
61   status = __GCONV_OK;
62 
63 /* Since we might have to reset input pointer we must be able to save
64    and retore the state.  */
65 #define SAVE_RESET_STATE(Save) \
66   {									      \
67     if (Save)								      \
68       saved_state = *statep;						      \
69     else								      \
70       *statep = saved_state;						      \
71   }
72 
73 /* During UCS-4 to ARIB-STD-B24 conversion, the state contains the last
74    two bytes to be output, in .prev member. */
75 
76 /* Since this is a stateful encoding we have to provide code which resets
77    the output state to the initial state.  This has to be done during the
78    flushing.  */
79 #define EMIT_SHIFT_TO_INIT \
80   {									      \
81     if (!FROM_DIRECTION)						      \
82       status = out_buffered((struct state_to *) data->__statep,		      \
83 			    &outbuf, outend);				      \
84     /* we don't have to emit anything, just reset the state.  */	      \
85     memset (data->__statep, '\0', sizeof (*data->__statep));		      \
86   }
87 
88 
89 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
90 #define ESC 0x1b
91 /* other control characters */
92 #define SS2 0x19
93 #define SS3 0x1d
94 #define LS0 0x0f
95 #define LS1 0x0e
96 
97 #define LS2 0x6e
98 #define LS3 0x6f
99 #define LS1R 0x7e
100 #define LS2R 0x7d
101 #define LS3R 0x7c
102 
103 #define LF 0x0a
104 #define CR 0x0d
105 #define BEL 0x07
106 #define BS 0x08
107 #define COL 0x90
108 #define CDC 0x92
109 #define MACRO_CTRL 0x95
110 #define CSI 0x9b
111 #define TIME 0x9d
112 
113 /* code sets */
114 enum g_set
115 {
116   KANJI_set = '\x42',         /* 2Byte set */
117   ASCII_set = '\x40',
118   ASCII_x_set = '\x4a',
119   HIRAGANA_set = '\x30',
120   KATAKANA_set = '\x31',
121   MOSAIC_A_set = '\x32',
122   MOSAIC_B_set = '\x33',
123   MOSAIC_C_set = '\x34',
124   MOSAIC_D_set = '\x35',
125   PROP_ASCII_set = '\x36',
126   PROP_HIRA_set = '\x37',
127   PROP_KATA_set = '\x38',
128   JIS0201_KATA_set = '\x49',
129   JISX0213_1_set = '\x39',    /* 2Byte set */
130   JISX0213_2_set = '\x3a',    /* 2Byte set */
131   EXTRA_SYMBOLS_set = '\x3b', /* 2Byte set */
132 
133   DRCS0_set = 0x40 | 0x80,    /* 2Byte set */
134   DRCS1_set = 0x41 | 0x80,
135   DRCS15_set = 0x4f | 0x80,
136   MACRO_set = 0x70 | 0x80,
137 };
138 
139 
140 /* First define the conversion function from ARIB-STD-B24 to UCS-4.  */
141 
142 enum mode_e
143 {
144   NORMAL,
145   ESCAPE,
146   G_SEL_1B,
147   G_SEL_MB,
148   CTRL_SEQ,
149   DESIGNATE_MB,
150   DRCS_SEL_1B,
151   DRCS_SEL_MB,
152   MB_2ND,
153 };
154 
155 /*
156  * __GCONV_INPUT_INCOMPLETE is never used in this conversion, thus
157  * we can re-use mbstate_t.__value and .__count:3 for the other purpose.
158  */
159 struct state_from {
160   /* __count */
161   uint8_t cnt:3;	/* for use in skelton.c. always 0 */
162   uint8_t pad0:1;
163   uint8_t gl:2;		/* idx of the G-set invoked into GL */
164   uint8_t gr:2;		/*  ... to GR */
165   uint8_t ss:2;		/* SS state. 0: no shift, 2:SS2, 3:SS3 */
166   uint8_t gidx:2;	/* currently designated G-set */
167   uint8_t mode:4;	/* current input mode. see below. */
168   uint8_t skip;		/* [CTRL_SEQ] # of char to skip */
169   uint8_t prev;		/* previously input char [in MB_2ND] or,*/
170 			/* input char to wait for. [CTRL_SEQ (.skip == 0)] */
171 
172   /* __value */
173   uint8_t g[4];		/* code set for G0..G3 */
174 } __attribute__((packed));
175 
176 static const struct state_from def_state_from = {
177   .cnt = 0,
178   .gl = 0,
179   .gr = 2,
180   .ss = 0,
181   .gidx = 0,
182   .mode = NORMAL,
183   .skip = 0,
184   .prev = '\0',
185   .g[0] = KANJI_set,
186   .g[1] = ASCII_set,
187   .g[2] = HIRAGANA_set,
188   .g[3] = KATAKANA_set,
189 };
190 
191 #define EXTRA_LOOP_DECLS	, __mbstate_t *statep
192 #define EXTRA_LOOP_ARGS		, statep
193 
194 #define INIT_PARAMS \
195   struct state_from st = *((struct state_from *)statep);		      \
196   if (st.g[0] == 0)							      \
197     st = def_state_from;
198 
199 #define UPDATE_PARAMS		*statep = *((__mbstate_t *)&st)
200 
201 #define LOOP_NEED_FLAGS
202 
203 #define MIN_NEEDED_INPUT	FROM_LOOP_MIN_NEEDED_FROM
204 #define MAX_NEEDED_INPUT	FROM_LOOP_MAX_NEEDED_FROM
205 #define MIN_NEEDED_OUTPUT	FROM_LOOP_MIN_NEEDED_TO
206 #define MAX_NEEDED_OUTPUT	FROM_LOOP_MAX_NEEDED_TO
207 #define LOOPFCT			FROM_LOOP
208 
209 /* tables and functions used in BODY */
210 
211 static const uint16_t kata_punc[] = {
212   0x30fd, 0x30fe, 0x30fc, 0x3002, 0x300c, 0x300d, 0x3001, 0x30fb
213 };
214 
215 static const uint16_t hira_punc[] = {
216   0x309d, 0x309e
217 };
218 
219 static const uint16_t nonspacing_symbol[] = {
220   0x0301, 0x0300, 0x0308, 0x0302, 0x0304, 0x0332
221 };
222 
223 static const uint32_t extra_kanji[] = {
224   /* row 85 */
225   /* col 0..15 */
226   0, 0x3402, 0x20158, 0x4efd, 0x4eff, 0x4f9a, 0x4fc9, 0x509c,
227   0x511e, 0x51bc, 0x351f, 0x5307, 0x5361, 0x536c, 0x8a79, 0x20bb7,
228   /* col. 16..31 */
229   0x544d, 0x5496, 0x549c, 0x54a9, 0x550e, 0x554a, 0x5672, 0x56e4,
230   0x5733, 0x5734, 0xfa10, 0x5880, 0x59e4, 0x5a23, 0x5a55, 0x5bec,
231   /* col. 32..47 */
232   0xfa11, 0x37e2, 0x5eac, 0x5f34, 0x5f45, 0x5fb7, 0x6017, 0xfa6b,
233   0x6130, 0x6624, 0x66c8, 0x66d9, 0x66fa, 0x66fb, 0x6852, 0x9fc4,
234   /* col. 48..63 */
235   0x6911, 0x693b, 0x6a45, 0x6a91, 0x6adb, 0x233cc, 0x233fe, 0x235c4,
236   0x6bf1, 0x6ce0, 0x6d2e, 0xfa45, 0x6dbf, 0x6dca, 0x6df8, 0xfa46,
237   /* col. 64..79 */
238   0x6f5e, 0x6ff9, 0x7064, 0xfa6c, 0x242ee, 0x7147, 0x71c1, 0x7200,
239   0x739f, 0x73a8, 0x73c9, 0x73d6, 0x741b, 0x7421, 0xfa4a, 0x7426,
240   /* col. 80..96 */
241   0x742a, 0x742c, 0x7439, 0x744b, 0x3eda, 0x7575, 0x7581, 0x7772,
242   0x4093, 0x78c8, 0x78e0, 0x7947, 0x79ae, 0x9fc6, 0x4103, 0,
243 
244   /* row 86 */
245   /* col 0..15 */
246   0, 0x9fc5, 0x79da, 0x7a1e, 0x7b7f, 0x7c31, 0x4264, 0x7d8b,
247   0x7fa1, 0x8118, 0x813a, 0xfa6d, 0x82ae, 0x845b, 0x84dc, 0x84ec,
248   /* col. 16..31 */
249   0x8559, 0x85ce, 0x8755, 0x87ec, 0x880b, 0x88f5, 0x2ec6, 0x8af6,
250   0x8dce, 0x8fbb, 0x8ff6, 0x90dd, 0x9127, 0x912d, 0x91b2, 0x9233,
251   /* col. 32..43 */
252   0x9288, 0x9321, 0x9348, 0x9592, 0x96de, 0x9903, 0x9940, 0x9ad9,
253   0x9bd6, 0x9dd7, 0x9eb4, 0x9eb5
254 };
255 
256 static const uint32_t extra_symbols[5][96] = {
257   /* row 90 */
258   {
259     /* col 0..15 */
260     0, 0x26cc, 0x26cd, 0x2757, 0x26cf, 0x26d0, 0x26d1, 0,
261     0x26d2, 0x26d5, 0x26d3, 0x26d4, 0, 0, 0, 0,
262     /* col 16..31 */
263     0x1f17f, 0x1f18a, 0, 0, 0x26d6, 0x26d7, 0x26d8, 0x26d9,
264     0x26da, 0x26db, 0x26dc, 0x26dd, 0x26de, 0x26df, 0x26e0, 0x26e1,
265     /* col 32..47 */
266     0x2b55, 0x3248, 0x3249, 0x324a, 0x324b, 0x324c, 0x324d, 0x324e,
267     0x324f, 0, 0, 0, 0, 0x2491, 0x2492, 0x2493,
268     /* col 48..63 */
269     0x1f14a, 0x1f14c, 0x1f13F, 0x1f146, 0x1f14b, 0x1f210, 0x1f211, 0x1f212,
270     0x1f213, 0x1f142, 0x1f214, 0x1f215, 0x1f216, 0x1f14d, 0x1f131, 0x1f13d,
271     /* col 64..79 */
272     0x2b1b, 0x2b24, 0x1f217, 0x1f218, 0x1f219, 0x1f21a, 0x1f21b, 0x26bf,
273     0x1f21c, 0x1f21d, 0x1f21e, 0x1f21f, 0x1f220, 0x1f221, 0x1f222, 0x1f223,
274     /* col 80..95 */
275     0x1f224, 0x1f225, 0x1f14e, 0x3299, 0x1f200, 0, 0, 0,
276     0, 0, 0, 0, 0, 0, 0, 0
277   },
278   /* row 91 */
279   {
280     /* col 0..15 */
281     0, 0x26e3, 0x2b56, 0x2b57, 0x2b58, 0x2b59, 0x2613, 0x328b,
282     0x3012, 0x26e8, 0x3246, 0x3245, 0x26e9, 0x0fd6, 0x26ea, 0x26eb,
283     /* col 16..31 */
284     0x26ec, 0x2668, 0x26ed, 0x26ee, 0x26ef, 0x2693, 0x2708, 0x26f0,
285     0x26f1, 0x26f2, 0x26f3, 0x26f4, 0x26f5, 0x1f157, 0x24b9, 0x24c8,
286     /* col 32..47 */
287     0x26f6, 0x1f15f, 0x1f18b, 0x1f18d, 0x1f18c, 0x1f179, 0x26f7, 0x26f8,
288     0x26f9, 0x26fa, 0x1f17b, 0x260e, 0x26fb, 0x26fc, 0x26fd, 0x26fe,
289     /* col 48..63 */
290     0x1f17c, 0x26ff,
291   },
292   /* row 92 */
293   {
294     /* col 0..15 */
295     0, 0x27a1, 0x2b05, 0x2b06, 0x2b07, 0x2b2f, 0x2b2e, 0x5e74,
296     0x6708, 0x65e5, 0x5186, 0x33a1, 0x33a5, 0x339d, 0x33a0, 0x33a4,
297     /* col 16..31 */
298     0x1f100, 0x2488, 0x2489, 0x248a, 0x248b, 0x248c, 0x248d, 0x248e,
299     0x248f, 0x2490, 0xe290, 0xe291, 0xe292, 0xe293, 0xe294, 0xe295,
300     /* col 32..47 */
301     0x1f101, 0x1f102, 0x1f103, 0x1f104, 0x1f105, 0x1f106, 0x1f107, 0x1f108,
302     0x1f109, 0x1f10a, 0x3233, 0x3236, 0x3232, 0x3231, 0x3239, 0x3244,
303     /* col 48..63 */
304     0x25b6, 0x25c0, 0x3016, 0x3017, 0x27d0, 0x00b2, 0x00b3, 0x1f12d,
305     0xe2a5, 0xe2a6, 0xe2a7, 0xe2a8, 0xe2a9, 0xe2aa, 0xe2ab, 0xe2ac,
306     /* col 64..79 */
307     0xe2ad, 0xe2ae, 0xe2af, 0xe2b0, 0xe2b1, 0xe2b2, 0xe2b3, 0xe2b4,
308     0xe2b5, 0xe2b6, 0xe2b7, 0xe2b8, 0xe2b9, 0xe2ba, 0xe2bb, 0xe2bc,
309     /* col 80..95 */
310     0xe2bd, 0xe2be, 0xe2bf, 0xe2c0, 0xe2c1, 0xe2c2, 0x1f12c, 0x1f12b,
311     0x3247, 0x1f190, 0x1f226, 0x213b, 0, 0, 0, 0
312   },
313   /* row 93 */
314   {
315     /* col 0..15 */
316     0, 0x322a, 0x322b, 0x322c, 0x322d, 0x322e, 0x322f, 0x3230,
317     0x3237, 0x337e, 0x337d, 0x337c, 0x337b, 0x2116, 0x2121, 0x3036,
318     /* col 16..31 */
319     0x26be, 0x1f240, 0x1f241, 0x1f242, 0x1f243, 0x1f244, 0x1f245, 0x1f246,
320     0x1f247, 0x1f248, 0x1f12a, 0x1f227, 0x1f228, 0x1f229, 0x1f214, 0x1f22a,
321     /* col 32..47 */
322     0x1f22b, 0x1f22c, 0x1f22d, 0x1f22e, 0x1f22f, 0x1f230, 0x1f231, 0x2113,
323     0x338f, 0x3390, 0x33ca, 0x339e, 0x33a2, 0x3371, 0, 0,
324     /* col 48..63 */
325     0x00bd, 0x2189, 0x2153, 0x2154, 0x00bc, 0x00be, 0x2155, 0x2156,
326     0x2157, 0x2158, 0x2159, 0x215a, 0x2150, 0x215b, 0x2151, 0x2152,
327     /* col 64..79 */
328     0x2600, 0x2601, 0x2602, 0x2603, 0x2616, 0x2617, 0x26c9, 0x26ca,
329     0x2666, 0x2665, 0x2663, 0x2660, 0x26cb, 0x2a00, 0x203c, 0x2049,
330     /* col 80..95 */
331     0x26c5, 0x2614, 0x26c6, 0x26c4, 0x26c7, 0x26a1, 0x26c8, 0,
332     0x269e, 0x269f, 0x266c, 0x260e, 0, 0, 0, 0
333   },
334   /* row 94 */
335   {
336     /* col 0..15 */
337     0, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166,
338     0x2167, 0x2168, 0x2169, 0x216a, 0x216b, 0x2470, 0x2471, 0x2472,
339     /* col 16..31 */
340     0x2473, 0x2474, 0x2475, 0x2476, 0x2477, 0x2478, 0x2479, 0x247a,
341     0x247b, 0x247c, 0x247d, 0x247e, 0x247f, 0x3251, 0x3252, 0x3253,
342     /* col 32..47 */
343     0x3254, 0x1f110, 0x1f111, 0x1f112, 0x1f113, 0x1f114, 0x1f115, 0x1f116,
344     0x1f117, 0x1f118, 0x1f119, 0x1f11a, 0x1f11b, 0x1f11c, 0x1f11d, 0x1f11e,
345     /* col 48..63 */
346     0x1f11f, 0x1f120, 0x1f121, 0x1f122, 0x1f123, 0x1f124, 0x1f125, 0x1f126,
347     0x1f127, 0x1f128, 0x1f129, 0x3255, 0x3256, 0x3257, 0x3258, 0x3259,
348     /* col 64..79 */
349     0x325a, 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466,
350     0x2467, 0x2468, 0x2469, 0x246a, 0x246b, 0x246c, 0x246d, 0x246e,
351     /* col 80..95 */
352     0x246f, 0x2776, 0x2777, 0x2778, 0x2779, 0x277a, 0x277b, 0x277c,
353     0x277d, 0x277e, 0x277f, 0x24eb, 0x24ec, 0x325b, 0, 0
354   },
355 };
356 
357 struct mchar_entry {
358   uint32_t len;
359   uint32_t to[4];
360 };
361 
362 /* list of transliterations. */
363 
364 /* small/subscript-ish KANJI. map to the normal sized version */
365 static const struct mchar_entry ext_sym_smallk[] = {
366   {.len = 1, .to = { 0x6c0f }},
367   {.len = 1, .to = { 0x526f }},
368   {.len = 1, .to = { 0x5143 }},
369   {.len = 1, .to = { 0x6545 }},
370   {.len = 1, .to = { 0x52ed }},
371   {.len = 1, .to = { 0x65b0 }},
372 };
373 
374 /* symbols of music instruments */
375 static const struct mchar_entry ext_sym_music[] = {
376   {.len = 4, .to = { 0x0028, 0x0076, 0x006e, 0x0029 }},
377   {.len = 4, .to = { 0x0028, 0x006f, 0x0062, 0x0029 }},
378   {.len = 4, .to = { 0x0028, 0x0063, 0x0062, 0x0029 }},
379   {.len = 3, .to = { 0x0028, 0x0063, 0x0065 }},
380   {.len = 3, .to = { 0x006d, 0x0062, 0x0029 }},
381   {.len = 4, .to = { 0x0028, 0x0068, 0x0070, 0x0029 }},
382   {.len = 4, .to = { 0x0028, 0x0062, 0x0072, 0x0029 }},
383   {.len = 3, .to = { 0x0028, 0x0070, 0x0029 }},
384 
385   {.len = 3, .to = { 0x0028, 0x0073, 0x0029 }},
386   {.len = 4, .to = { 0x0028, 0x006d, 0x0073, 0x0029 }},
387   {.len = 3, .to = { 0x0028, 0x0074, 0x0029 }},
388   {.len = 4, .to = { 0x0028, 0x0062, 0x0073, 0x0029 }},
389   {.len = 3, .to = { 0x0028, 0x0062, 0x0029 }},
390   {.len = 4, .to = { 0x0028, 0x0074, 0x0062, 0x0029 }},
391   {.len = 4, .to = { 0x0028, 0x0076, 0x0070, 0x0029 }},
392   {.len = 4, .to = { 0x0028, 0x0064, 0x0073, 0x0029 }},
393 
394   {.len = 4, .to = { 0x0028, 0x0061, 0x0067, 0x0029 }},
395   {.len = 4, .to = { 0x0028, 0x0065, 0x0067, 0x0029 }},
396   {.len = 4, .to = { 0x0028, 0x0076, 0x006f, 0x0029 }},
397   {.len = 4, .to = { 0x0028, 0x0066, 0x006c, 0x0029 }},
398   {.len = 3, .to = { 0x0028, 0x006b, 0x0065 }},
399   {.len = 2, .to = { 0x0079, 0x0029 }},
400   {.len = 3, .to = { 0x0028, 0x0073, 0x0061 }},
401   {.len = 2, .to = { 0x0078, 0x0029 }},
402 
403   {.len = 3, .to = { 0x0028, 0x0073, 0x0079 }},
404   {.len = 2, .to = { 0x006e, 0x0029 }},
405   {.len = 3, .to = { 0x0028, 0x006f, 0x0072 }},
406   {.len = 2, .to = { 0x0067, 0x0029 }},
407   {.len = 3, .to = { 0x0028, 0x0070, 0x0065 }},
408   {.len = 2, .to = { 0x0072, 0x0029 }},
409 };
410 
411 
412 int
b24_char_conv(int set,unsigned char c1,unsigned char c2,uint32_t * out)413 b24_char_conv (int set, unsigned char c1, unsigned char c2, uint32_t *out)
414 {
415   int len;
416   uint32_t ch;
417 
418   if (set > DRCS0_set && set <= DRCS15_set)
419     set = DRCS0_set;
420 
421   switch (set)
422     {
423       case ASCII_set:
424       case ASCII_x_set:
425       case PROP_ASCII_set:
426 	if (c1 == 0x7e)
427 	  *out = 0x203e;
428 	else if (c1 == 0x5c)
429 	  *out = 0xa5;
430 	else
431 	  *out = c1;
432 	return 1;
433 
434       case KATAKANA_set:
435       case PROP_KATA_set:
436 	if (c1 <= 0x76)
437 	  *out = 0x3080 + c1;
438 	else
439 	  *out = kata_punc[c1 - 0x77];
440 	return 1;
441 
442       case HIRAGANA_set:
443       case PROP_HIRA_set:
444 	if (c1 <= 0x73)
445 	  *out = 0x3020 + c1;
446 	else if (c1 == 0x77 || c1 == 0x78)
447 	  *out = hira_punc[c1 - 0x77];
448 	else if (c1 >= 0x79)
449 	  *out = kata_punc[c1 - 0x77];
450 	else
451 	  return 0;
452 	return 1;
453 
454       case JIS0201_KATA_set:
455 	if (c1 > 0x5f)
456 	  return 0;
457 	*out = 0xff40 + c1;
458 	return 1;
459 
460       case EXTRA_SYMBOLS_set:
461 	if (c1 == 0x75 || (c1 == 0x76 && (c2 - 0x20) <=43))
462 	  {
463 	    *out = extra_kanji[(c1 - 0x75) * 96 + (c2 - 0x20)];
464 	    return 1;
465 	  }
466 	/* fall through */
467       case KANJI_set:
468 	/* check extra symbols */
469 	if (c1 >= 0x7a && c1 <= 0x7e)
470 	  {
471 	    const struct mchar_entry *entry;
472 
473 	    c1 -= 0x20;
474 	    c2 -= 0x20;
475 	    if (c1 == 0x5c && c2 >= 0x1a && c2 <= 0x1f)
476 	      entry = &ext_sym_smallk[c2 - 0x1a];
477 	    else if (c1 == 0x5c && c2 >= 0x38 && c2 <= 0x55)
478 	      entry = &ext_sym_music[c2 - 0x38];
479 	    else
480 	      entry = NULL;
481 
482 	    if (entry)
483 	      {
484 		int i;
485 
486 		for (i = 0; i < entry->len; i++)
487 		  out[i] = entry->to[i];
488 		return i;
489 	      }
490 
491 	    *out = extra_symbols[c1 - 0x5a][c2];
492 	    if (*out == 0)
493 	      return 0;
494 
495 	    return 1;
496 	  }
497 	/* Some ARIB strings somehow require that
498 	 * EXTRA_SYMBOLS codepoints fallback to KANJI_set.
499 	 * so just fall through here.
500 	 */
501 
502 	/* non-JISX0213 modification. (combining chars) */
503 	if (c1 == 0x22 && c2 == 0x7e)
504 	  {
505 	    *out = 0x20dd;
506 	    return 1;
507 	  }
508 	else if (c1 == 0x21 && c2 >= 0x2d && c2 <= 0x32)
509 	  {
510 	    *out = nonspacing_symbol[c2 - 0x2d];
511 	    return 1;
512 	  }
513 	/* fall through */
514       case JISX0213_1_set:
515       case JISX0213_2_set:
516 	len = 1;
517 	ch = jisx0213_to_ucs4(c1 | (set == JISX0213_2_set ? 0x0200 : 0x0100),
518 			      c2);
519 	if (ch == 0)
520 	  return 0;
521 	if (ch < 0x80)
522 	  {
523 	    len = 2;
524 	    out[0] = __jisx0213_to_ucs_combining[ch - 1][0];
525 	    out[1] = __jisx0213_to_ucs_combining[ch - 1][1];
526 	  }
527 	else
528 	  *out = ch;
529 	return len;
530 
531       case MOSAIC_A_set:
532       case MOSAIC_B_set:
533       case MOSAIC_C_set:
534       case MOSAIC_D_set:
535       case DRCS0_set:
536       case MACRO_set:
537 	*out = __UNKNOWN_10646_CHAR;
538 	return 1;
539 
540       default:
541 	break;
542     }
543 
544   return 0;
545 }
546 
547 #define BODY \
548   {									      \
549     uint32_t ch = *inptr;						      \
550 									      \
551     if (ch == 0)							      \
552       {									      \
553 	st.mode = NORMAL;						      \
554 	++ inptr;							      \
555 	continue;							      \
556       }									      \
557     if (__glibc_unlikely (st.mode == CTRL_SEQ))				      \
558       {									      \
559 	if (st.skip)							      \
560 	  {								      \
561 	    --st.skip;							      \
562 	    if (st.skip == 0)						      \
563 	      st.mode = NORMAL;						      \
564 	    if (ch < 0x40 || ch > 0x7f)					      \
565 	      STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
566 	  }								      \
567 	else if (st.prev == MACRO_CTRL)					      \
568 	  {								      \
569 	    if (ch == MACRO_CTRL)					      \
570 	      st.skip = 1;						      \
571 	    else if (ch == LF || ch == CR) {				      \
572 	      st = def_state_from;					      \
573 	      put32(outptr, ch);					      \
574 	      outptr += 4;						      \
575 	    }								      \
576 	  }								      \
577 	else if (st.prev == CSI && (ch == 0x5b || ch == 0x5c || ch == 0x6f))  \
578 	  st.mode = NORMAL;						      \
579 	else if (st.prev == TIME || st.prev == CSI)			      \
580 	  {								      \
581 	    if (ch == 0x20 || (st.prev == TIME && ch == 0x28))		      \
582 	      st.skip = 1;						      \
583 	    else if (!((st.prev == TIME && ch == 0x29)			      \
584 		       || ch == 0x3b || (ch >= 0x30 && ch <= 0x39)))	      \
585 	      {								      \
586 		st.mode = NORMAL;					      \
587 		STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
588 	      }								      \
589 	  }								      \
590 	else if (st.prev == COL || st.prev == CDC)			      \
591 	  {								      \
592 	    if (ch == 0x20)						      \
593 	      st.skip = 1;						      \
594 	    else							      \
595 	      {								      \
596 		st.mode = NORMAL;					      \
597 		if (ch < 0x40 || ch > 0x7f)				      \
598 		  STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
599 	      }								      \
600 	  }								      \
601 	++ inptr;							      \
602 	continue;							      \
603       }									      \
604 									      \
605     if (__glibc_unlikely (ch == LF))					      \
606       {									      \
607 	st = def_state_from;						      \
608 	put32 (outptr, ch);						      \
609 	outptr += 4;							      \
610 	++ inptr;							      \
611 	continue;							      \
612       }									      \
613 									      \
614     if (__glibc_unlikely (st.mode == ESCAPE))				      \
615       {									      \
616 	if (ch == LS2 || ch == LS3)					      \
617 	  {								      \
618 	    st.mode = NORMAL;						      \
619 	    st.gl = (ch == LS2) ? 2 : 3;				      \
620 	    st.ss = 0;							      \
621 	  }								      \
622 	else if (ch == LS1R || ch == LS2R || ch == LS3R)		      \
623 	  {								      \
624 	    st.mode = NORMAL;						      \
625 	    st.gr = (ch == LS1R) ? 1 : (ch == LS2R) ? 2 : 3;		      \
626 	    st.ss = 0;							      \
627 	  }								      \
628 	else if (ch == 0x24) 						      \
629 	  st.mode = DESIGNATE_MB;					      \
630 	else if (ch >= 0x28 && ch <= 0x2b)				      \
631 	  {								      \
632 	    st.mode = G_SEL_1B;						      \
633 	    st.gidx = ch - 0x28;					      \
634 	  }								      \
635 	else								      \
636 	  {								      \
637 	    st.mode = NORMAL;						      \
638 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
639 	  }								      \
640 	++ inptr;							      \
641 	continue;							      \
642       }									      \
643 									      \
644     if (__glibc_unlikely (st.mode == DESIGNATE_MB))			      \
645       {									      \
646 	if (ch == KANJI_set || ch == JISX0213_1_set || ch == JISX0213_2_set   \
647 	    || ch == EXTRA_SYMBOLS_set)					      \
648 	  {								      \
649 	    st.mode = NORMAL;						      \
650 	    st.g[0] = ch;						      \
651 	  }								      \
652 	else if (ch >= 0x28 && ch <= 0x2b)				      \
653 	  {								      \
654 	  st.mode = G_SEL_MB;						      \
655 	  st.gidx = ch - 0x28;						      \
656 	  }								      \
657 	else								      \
658 	  {								      \
659 	    st.mode = NORMAL;						      \
660 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
661 	  }								      \
662 	++ inptr;							      \
663 	continue;							      \
664       }									      \
665 									      \
666     if (__glibc_unlikely (st.mode == G_SEL_1B))				      \
667       {									      \
668 	if (ch == ASCII_set || ch == ASCII_x_set || ch == JIS0201_KATA_set    \
669 	    || (ch >= 0x30 && ch <= 0x38))				      \
670 	  {								      \
671 	    st.g[st.gidx] = ch;						      \
672 	    st.mode = NORMAL;						      \
673 	  }								      \
674 	else if (ch == 0x20)						      \
675 	    st.mode = DRCS_SEL_1B;					      \
676 	else								      \
677 	  {								      \
678 	    st.mode = NORMAL;						      \
679 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
680 	  }								      \
681 	++ inptr;							      \
682 	continue;							      \
683       }									      \
684 									      \
685     if (__glibc_unlikely (st.mode == G_SEL_MB))				      \
686       {									      \
687 	if (ch == KANJI_set || ch == JISX0213_1_set || ch == JISX0213_2_set   \
688 	    || ch == EXTRA_SYMBOLS_set)					      \
689 	  {								      \
690 	    st.g[st.gidx] = ch;						      \
691 	    st.mode = NORMAL;						      \
692 	  }								      \
693 	else if (ch == 0x20)						      \
694 	  st.mode = DRCS_SEL_MB;					      \
695 	else								      \
696 	  {								      \
697 	    st.mode = NORMAL;						      \
698 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
699 	  }								      \
700 	++ inptr;							      \
701 	continue;							      \
702       }									      \
703 									      \
704     if (__glibc_unlikely (st.mode == DRCS_SEL_1B))			      \
705       {									      \
706 	st.mode = NORMAL;						      \
707 	if (ch == 0x70 || (ch >= 0x41 && ch <= 0x4f))			      \
708 	  st.g[st.gidx] = ch | 0x80;					      \
709 	else								      \
710 	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
711 	++ inptr;							      \
712 	continue;							      \
713       }									      \
714 									      \
715     if (__glibc_unlikely (st.mode == DRCS_SEL_MB))			      \
716       {									      \
717 	st.mode = NORMAL;						      \
718 	if (ch == 0x40)							      \
719 	  st.g[st.gidx] = ch | 0x80;					      \
720 	else								      \
721 	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
722 	++ inptr;							      \
723 	continue;							      \
724       }									      \
725 									      \
726     if (st.mode == MB_2ND)						      \
727       {									      \
728 	int gidx;							      \
729 	int i, len;							      \
730 	uint32_t out[MAX_NEEDED_OUTPUT];				      \
731 									      \
732 	gidx = (st.ss) ? st.ss : (ch & 0x80) ? st.gr : st.gl;		      \
733 	st.mode = NORMAL;						      \
734 	st.ss = 0;							      \
735 	if (__glibc_unlikely (!(ch & 0x60))) /* C0/C1 */		      \
736 	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
737 	if (__glibc_unlikely (st.ss > 0 && (ch & 0x80)))		      \
738 	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
739 	if (__glibc_unlikely ((st.prev & 0x80) != (ch & 0x80)))		      \
740 	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
741 	len = b24_char_conv(st.g[gidx], (st.prev & 0x7f), (ch & 0x7f), out);  \
742 	if (len == 0)							      \
743 	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
744 	if (outptr + 4 * len > outend)					      \
745 	  {								      \
746 	    result = __GCONV_FULL_OUTPUT;				      \
747 	    break;							      \
748 	  }								      \
749 	for (i = 0; i < len; i++)					      \
750 	  {								      \
751 	    if (irreversible						      \
752 		&& __builtin_expect (out[i] == __UNKNOWN_10646_CHAR, 0))      \
753 	      ++ *irreversible;						      \
754 	    put32 (outptr, out[i]);					      \
755 	    outptr += 4;						      \
756 	  }								      \
757 	++ inptr;							      \
758 	continue;							      \
759       }									      \
760 									      \
761     if (st.mode == NORMAL)						      \
762       {									      \
763 	int gidx, set;							      \
764 									      \
765 	if (__glibc_unlikely (!(ch & 0x60))) /* C0/C1 */		      \
766 	  {								      \
767 	    if (ch == ESC)						      \
768 	      st.mode = ESCAPE;						      \
769 	    else if (ch == SS2)						      \
770 	      st.ss = 2;						      \
771 	    else if (ch == SS3)						      \
772 	      st.ss = 3;						      \
773 	    else if (ch == LS0)						      \
774 	      {								      \
775 		st.ss = 0;						      \
776 		st.gl = 0;						      \
777 	      }								      \
778 	    else if (ch == LS1)						      \
779 	      {								      \
780 		st.ss = 0;						      \
781 		st.gl = 1;						      \
782 	      }								      \
783 	    else if (ch == BEL || ch == BS || ch == CR)			      \
784 	      {								      \
785 		st.ss = 0;						      \
786 		put32 (outptr, ch);					      \
787 		outptr += 4;						      \
788 	      }								      \
789 	    else if (ch == 0x09 || ch == 0x0b || ch == 0x0c || ch == 0x18     \
790 		     || ch == 0x1e || ch == 0x1f || (ch >= 0x80 && ch <= 0x8a)\
791 		     || ch == 0x99 || ch == 0x9a)			      \
792 	      {								      \
793 		/* do nothing. just skip */				      \
794 	      }								      \
795 	    else if (ch == 0x16 || ch == 0x8b || ch == 0x91 || ch == 0x93     \
796 		     || ch == 0x94 || ch == 0x97 || ch == 0x98)		      \
797 	      {								      \
798 		st.mode = CTRL_SEQ;					      \
799 		st.skip = 1;						      \
800 	      }								      \
801 	    else if (ch == 0x1c)					      \
802 	      {								      \
803 		st.mode = CTRL_SEQ;					      \
804 		st.skip = 2;						      \
805 	      }								      \
806 	    else if (ch == COL || ch == CDC || ch == MACRO_CTRL		      \
807 		     || ch == CSI ||ch == TIME)				      \
808 	      {								      \
809 		st.mode = CTRL_SEQ;					      \
810 		st.skip = 0;						      \
811 		st.prev = ch;						      \
812 	      }								      \
813 	    else							      \
814 	      STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
815 									      \
816 	    ++ inptr;							      \
817 	    continue;							      \
818 	  }								      \
819 									      \
820 	if (__glibc_unlikely ((ch & 0x7f) == 0x20 || ch == 0x7f))	      \
821 	  {								      \
822 	    st.ss = 0;							      \
823 	    put32 (outptr, ch);						      \
824 	    outptr += 4;						      \
825 	    ++ inptr;							      \
826 	    continue;							      \
827 	  }								      \
828 	if (__glibc_unlikely (ch == 0xff))				      \
829 	  {								      \
830 	    st.ss = 0;							      \
831 	    put32 (outptr, __UNKNOWN_10646_CHAR);			      \
832 	    if (irreversible)						      \
833 	      ++ *irreversible;						      \
834 	    outptr += 4;						      \
835 	    ++ inptr;							      \
836 	    continue;							      \
837 	  }								      \
838 									      \
839 	if (__glibc_unlikely (st.ss > 0 && (ch & 0x80)))		      \
840 	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
841 									      \
842 	gidx = (st.ss) ? st.ss : (ch & 0x80) ? st.gr : st.gl;		      \
843 	set = st.g[gidx];						      \
844 	if (set == DRCS0_set || set == KANJI_set || set == JISX0213_1_set     \
845 	    || set == JISX0213_2_set || set == EXTRA_SYMBOLS_set)	      \
846 	  {								      \
847 	    st.mode = MB_2ND;						      \
848 	    st.prev = ch;						      \
849 	  }								      \
850 	else								      \
851 	  {								      \
852 	    uint32_t out;						      \
853 									      \
854 	    st.ss = 0;							      \
855 	    if (b24_char_conv(set, (ch & 0x7f), 0, &out) == 0)		      \
856 	      STANDARD_FROM_LOOP_ERR_HANDLER (1);			      \
857 	    if (out == __UNKNOWN_10646_CHAR && irreversible)		      \
858 	      ++ *irreversible;						      \
859 	    put32 (outptr, out);					      \
860 	    outptr += 4;						      \
861 	  }								      \
862 	++ inptr;							      \
863 	continue;							      \
864       }									      \
865   }
866 #include <iconv/loop.c>
867 
868 
869 /* Next, define the other direction, from UCS-4 to ARIB-STD-B24.  */
870 
871 /* As MIN_INPUT is 4 (> 1), .cnt & .value must be put aside for skeleton.c.
872  * To reduce the size of the state and fit into mbstate_t,
873  * put constraints on G-set that can be locking-shift'ed to GL/GR.
874  * GL is limited to invoke G0/G1, GR to G2/G3. i.e. LS2,LS3, LS1R are not used.
875  * G0 is fixed to KANJI, G1 to ASCII.
876  * G2 can be either HIRAGANA/JISX0213_{1,2},
877  * G3 can be either KATAKANA/JISX0201_KATA/EXTRA_SYMBOLS.
878  * JISX0213_{1,2},EXTRA_SYMBOLS are invoked into GR by SS2/SS3
879  * if it is not already invoked to GR.
880  * plus, charset is referenced by an index instead of its designation char.
881  */
882 enum gset_idx {
883   KANJI_idx,
884   ASCII_idx,
885   HIRAGANA_idx,
886   KATAKANA_idx,
887   JIS0201_KATA_idx,
888   JISX0213_1_idx,
889   JISX0213_2_idx,
890   EXTRA_SYMBOLS_idx,
891 };
892 
893 struct state_to {
894   /* __count */
895   uint32_t cnt:3;	/* for use in skelton.c.*/
896   uint32_t gl:1;	/* 0: GL<-G0, 1: GL<-G1 */
897   uint32_t gr:1;	/* 0: GR<-G2, 1: GR<-G3 */
898   uint32_t g2:3;	/* Gset idx which is designated to G0 */
899   uint32_t g3:3;	/* same to G1 */
900   uint32_t prev:21;	/* previously input, combining char (for JISX0213) */
901 
902   /* __value */
903   uint32_t __value;	/* used in skeleton.c */
904 } __attribute__((packed));
905 
906 static const struct state_to def_state_to = {
907   .cnt = 0,
908   .gl = 0,
909   .gr = 0,
910   .g2 = HIRAGANA_idx,
911   .g3 = KATAKANA_idx,
912   .prev = 0,
913   .__value = 0
914 };
915 
916 #define EXTRA_LOOP_DECLS	, __mbstate_t *statep
917 #define EXTRA_LOOP_ARGS		, statep
918 
919 #define INIT_PARAMS \
920   struct state_to st = *((struct state_to *) statep);			      \
921   if (st.g2 == 0)							      \
922     st = def_state_to;							      \
923 
924 #define REINIT_PARAMS \
925   do									      \
926     {									      \
927       st = *((struct state_to *) statep);				      \
928       if (st.g2 == 0)							      \
929 	st = def_state_to;						      \
930     }									      \
931   while (0)
932 
933 #define LOOP_NEED_FLAGS
934 
935 #define MIN_NEEDED_INPUT	TO_LOOP_MIN_NEEDED_FROM
936 #define MAX_NEEDED_INPUT	TO_LOOP_MAX_NEEDED_FROM
937 #define MIN_NEEDED_OUTPUT	TO_LOOP_MIN_NEEDED_TO
938 #define MAX_NEEDED_OUTPUT	TO_LOOP_MAX_NEEDED_TO
939 #define LOOPFCT			TO_LOOP
940 
941 /* tables and functions used in BODY */
942 
943 /* Composition tables for each of the relevant combining characters.  */
944 static const struct
945 {
946   uint16_t base;
947   uint16_t composed;
948 } comp_table_data[] =
949 {
950 #define COMP_TABLE_IDX_02E5 0
951 #define COMP_TABLE_LEN_02E5 1
952   { 0x2b64, 0x2b65 }, /* 0x12B65 = 0x12B64 U+02E5 */
953 #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
954 #define COMP_TABLE_LEN_02E9 1
955   { 0x2b60, 0x2b66 }, /* 0x12B66 = 0x12B60 U+02E9 */
956 #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
957 #define COMP_TABLE_LEN_0300 5
958   { 0x295c, 0x2b44 }, /* 0x12B44 = 0x1295C U+0300 */
959   { 0x2b38, 0x2b48 }, /* 0x12B48 = 0x12B38 U+0300 */
960   { 0x2b37, 0x2b4a }, /* 0x12B4A = 0x12B37 U+0300 */
961   { 0x2b30, 0x2b4c }, /* 0x12B4C = 0x12B30 U+0300 */
962   { 0x2b43, 0x2b4e }, /* 0x12B4E = 0x12B43 U+0300 */
963 #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
964 #define COMP_TABLE_LEN_0301 4
965   { 0x2b38, 0x2b49 }, /* 0x12B49 = 0x12B38 U+0301 */
966   { 0x2b37, 0x2b4b }, /* 0x12B4B = 0x12B37 U+0301 */
967   { 0x2b30, 0x2b4d }, /* 0x12B4D = 0x12B30 U+0301 */
968   { 0x2b43, 0x2b4f }, /* 0x12B4F = 0x12B43 U+0301 */
969 #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
970 #define COMP_TABLE_LEN_309A 14
971   { 0x242b, 0x2477 }, /* 0x12477 = 0x1242B U+309A */
972   { 0x242d, 0x2478 }, /* 0x12478 = 0x1242D U+309A */
973   { 0x242f, 0x2479 }, /* 0x12479 = 0x1242F U+309A */
974   { 0x2431, 0x247a }, /* 0x1247A = 0x12431 U+309A */
975   { 0x2433, 0x247b }, /* 0x1247B = 0x12433 U+309A */
976   { 0x252b, 0x2577 }, /* 0x12577 = 0x1252B U+309A */
977   { 0x252d, 0x2578 }, /* 0x12578 = 0x1252D U+309A */
978   { 0x252f, 0x2579 }, /* 0x12579 = 0x1252F U+309A */
979   { 0x2531, 0x257a }, /* 0x1257A = 0x12531 U+309A */
980   { 0x2533, 0x257b }, /* 0x1257B = 0x12533 U+309A */
981   { 0x253b, 0x257c }, /* 0x1257C = 0x1253B U+309A */
982   { 0x2544, 0x257d }, /* 0x1257D = 0x12544 U+309A */
983   { 0x2548, 0x257e }, /* 0x1257E = 0x12548 U+309A */
984   { 0x2675, 0x2678 }, /* 0x12678 = 0x12675 U+309A */
985 };
986 
987 static const uint32_t ucs4_to_nonsp_kanji[][2] = {
988   {0x20dd, 0x227e}, {0x0300, 0x212e}, {0x0301, 0x212d}, {0x0302, 0x2130},
989   {0x0304, 0x2131}, {0x0308, 0x212f}, {0x0332, 0x2132}
990 };
991 
992 static const uint32_t ucs4_to_extsym[][2] = {
993   {0x00b2, 0x7c55}, {0x00b3, 0x7c56}, {0x00bc, 0x7d54}, {0x00bd, 0x7d50},
994   {0x00be, 0x7d55}, {0x0fd6, 0x7b2d}, {0x203c, 0x7d6e}, {0x2049, 0x7d6f},
995   {0x2113, 0x7d47}, {0x2116, 0x7d2d}, {0x2121, 0x7d2e}, {0x213b, 0x7c7b},
996   {0x2150, 0x7d5c}, {0x2151, 0x7d5e}, {0x2152, 0x7d5f}, {0x2153, 0x7d52},
997   {0x2154, 0x7d53}, {0x2155, 0x7d56}, {0x2156, 0x7d57}, {0x2157, 0x7d58},
998   {0x2158, 0x7d59}, {0x2159, 0x7d5a}, {0x215a, 0x7d5b}, {0x215b, 0x7d5d},
999   {0x2160, 0x7e21}, {0x2161, 0x7e22}, {0x2162, 0x7e23}, {0x2163, 0x7e24},
1000   {0x2164, 0x7e25}, {0x2165, 0x7e26}, {0x2166, 0x7e27}, {0x2167, 0x7e28},
1001   {0x2168, 0x7e29}, {0x2169, 0x7e2a}, {0x216a, 0x7e2b}, {0x216b, 0x7e2c},
1002   {0x2189, 0x7d51}, {0x2460, 0x7e61}, {0x2461, 0x7e62}, {0x2462, 0x7e63},
1003   {0x2463, 0x7e64}, {0x2464, 0x7e65}, {0x2465, 0x7e66}, {0x2466, 0x7e67},
1004   {0x2467, 0x7e68}, {0x2468, 0x7e69}, {0x2469, 0x7e6a}, {0x246a, 0x7e6b},
1005   {0x246b, 0x7e6c}, {0x246c, 0x7e6d}, {0x246d, 0x7e6e}, {0x246e, 0x7e6f},
1006   {0x246f, 0x7e70}, {0x2470, 0x7e2d}, {0x2471, 0x7e2e}, {0x2472, 0x7e2f},
1007   {0x2473, 0x7e30}, {0x2474, 0x7e31}, {0x2475, 0x7e32}, {0x2476, 0x7e33},
1008   {0x2477, 0x7e34}, {0x2478, 0x7e35}, {0x2479, 0x7e36}, {0x247a, 0x7e37},
1009   {0x247b, 0x7e38}, {0x247c, 0x7e39}, {0x247d, 0x7e3a}, {0x247e, 0x7e3b},
1010   {0x247f, 0x7e3c}, {0x2488, 0x7c31}, {0x2489, 0x7c32}, {0x248a, 0x7c33},
1011   {0x248b, 0x7c34}, {0x248c, 0x7c35}, {0x248d, 0x7c36}, {0x248e, 0x7c37},
1012   {0x248f, 0x7c38}, {0x2490, 0x7c39}, {0x2491, 0x7a4d}, {0x2492, 0x7a4e},
1013   {0x2493, 0x7a4f}, {0x24b9, 0x7b3e}, {0x24c8, 0x7b3f}, {0x24eb, 0x7e7b},
1014   {0x24ec, 0x7e7c}, {0x25b6, 0x7c50}, {0x25c0, 0x7c51}, {0x2600, 0x7d60},
1015   {0x2601, 0x7d61}, {0x2602, 0x7d62}, {0x2603, 0x7d73}, {0x260e, 0x7b4b},
1016   {0x260e, 0x7d7b}, {0x2613, 0x7b26}, {0x2614, 0x7d71}, {0x2616, 0x7d64},
1017   {0x2617, 0x7d65}, {0x2660, 0x7d6b}, {0x2663, 0x7d6a}, {0x2665, 0x7d69},
1018   {0x2666, 0x7d68}, {0x2668, 0x7b31}, {0x266c, 0x7d7a}, {0x2693, 0x7b35},
1019   {0x269e, 0x7d78}, {0x269f, 0x7d79}, {0x26a1, 0x7d75}, {0x26be, 0x7d30},
1020   {0x26bf, 0x7a67}, {0x26c4, 0x7d63}, {0x26c5, 0x7d70}, {0x26c6, 0x7d72},
1021   {0x26c7, 0x7d74}, {0x26c8, 0x7d76}, {0x26c9, 0x7d66}, {0x26ca, 0x7d67},
1022   {0x26cb, 0x7d6c}, {0x26cc, 0x7a21}, {0x26cd, 0x7a22}, {0x26cf, 0x7a24},
1023   {0x26d0, 0x7a25}, {0x26d1, 0x7a26}, {0x26d2, 0x7a28}, {0x26d3, 0x7a2a},
1024   {0x26d4, 0x7a2b}, {0x26d5, 0x7a29}, {0x26d6, 0x7a34}, {0x26d7, 0x7a35},
1025   {0x26d8, 0x7a36}, {0x26d9, 0x7a37}, {0x26da, 0x7a38}, {0x26db, 0x7a39},
1026   {0x26dc, 0x7a3a}, {0x26dd, 0x7a3b}, {0x26de, 0x7a3c}, {0x26df, 0x7a3d},
1027   {0x26e0, 0x7a3e}, {0x26e1, 0x7a3f}, {0x26e3, 0x7b21}, {0x26e8, 0x7b29},
1028   {0x26e9, 0x7b2c}, {0x26ea, 0x7b2e}, {0x26eb, 0x7b2f}, {0x26ec, 0x7b30},
1029   {0x26ed, 0x7b32}, {0x26ee, 0x7b33}, {0x26ef, 0x7b34}, {0x26f0, 0x7b37},
1030   {0x26f1, 0x7b38}, {0x26f2, 0x7b39}, {0x26f3, 0x7b3a}, {0x26f4, 0x7b3b},
1031   {0x26f5, 0x7b3c}, {0x26f6, 0x7b40}, {0x26f7, 0x7b46}, {0x26f8, 0x7b47},
1032   {0x26f9, 0x7b48}, {0x26fa, 0x7b49}, {0x26fb, 0x7b4c}, {0x26fc, 0x7b4d},
1033   {0x26fd, 0x7b4e}, {0x26fe, 0x7b4f}, {0x26ff, 0x7b51}, {0x2762, 0x7a23},
1034   {0x2776, 0x7e71}, {0x2777, 0x7e72}, {0x2778, 0x7e73}, {0x2779, 0x7e74},
1035   {0x277a, 0x7e75}, {0x277b, 0x7e76}, {0x277c, 0x7e77}, {0x277d, 0x7e78},
1036   {0x277e, 0x7e79}, {0x277f, 0x7e7a}, {0x27a1, 0x7c21}, {0x27d0, 0x7c54},
1037   {0x2a00, 0x7d6d}, {0x2b05, 0x7c22}, {0x2b06, 0x7c23}, {0x2b07, 0x7c24},
1038   {0x2b1b, 0x7a60}, {0x2b24, 0x7a61}, {0x2b2e, 0x7c26}, {0x2b2f, 0x7c25},
1039   {0x2b55, 0x7a40}, {0x2b56, 0x7b22}, {0x2b57, 0x7b23}, {0x2b58, 0x7b24},
1040   {0x2b59, 0x7b25}, {0x3012, 0x7b28}, {0x3016, 0x7c52}, {0x3017, 0x7c53},
1041   {0x3036, 0x7d2f}, {0x322a, 0x7d21}, {0x322b, 0x7d22}, {0x322c, 0x7d23},
1042   {0x322d, 0x7d24}, {0x322e, 0x7d25}, {0x322f, 0x7d26}, {0x3230, 0x7d27},
1043   {0x3231, 0x7c4d}, {0x3232, 0x7c4c}, {0x3233, 0x7c4a}, {0x3236, 0x7c4b},
1044   {0x3237, 0x7d28}, {0x3239, 0x7c4e}, {0x3244, 0x7c4f}, {0x3245, 0x7b2b},
1045   {0x3246, 0x7b2a}, {0x3247, 0x7c78}, {0x3248, 0x7a41}, {0x3249, 0x7a42},
1046   {0x324a, 0x7a43}, {0x324b, 0x7a44}, {0x324c, 0x7a45}, {0x324d, 0x7a46},
1047   {0x324e, 0x7a47}, {0x324f, 0x7a48}, {0x3251, 0x7e3d}, {0x3252, 0x7e3e},
1048   {0x3253, 0x7e3f}, {0x3254, 0x7e40}, {0x3255, 0x7e5b}, {0x3256, 0x7e5c},
1049   {0x3257, 0x7e5d}, {0x3258, 0x7e5e}, {0x3259, 0x7e5f}, {0x325a, 0x7e60},
1050   {0x325b, 0x7e7d}, {0x328b, 0x7b27}, {0x3299, 0x7a73}, {0x3371, 0x7d4d},
1051   {0x337b, 0x7d2c}, {0x337c, 0x7d2b}, {0x337d, 0x7d2a}, {0x337e, 0x7d29},
1052   {0x338f, 0x7d48}, {0x3390, 0x7d49}, {0x339d, 0x7c2d}, {0x339e, 0x7d4b},
1053   {0x33a0, 0x7c2e}, {0x33a1, 0x7c2b}, {0x33a2, 0x7d4c}, {0x33a4, 0x7c2f},
1054   {0x33a5, 0x7c2c}, {0x33ca, 0x7d4a}, {0x3402, 0x7521}, {0x351f, 0x752a},
1055   {0x37e2, 0x7541}, {0x3eda, 0x7574}, {0x4093, 0x7578}, {0x4103, 0x757e},
1056   {0x4264, 0x7626}, {0x4efd, 0x7523}, {0x4eff, 0x7524}, {0x4f9a, 0x7525},
1057   {0x4fc9, 0x7526}, {0x509c, 0x7527}, {0x511e, 0x7528}, {0x5186, 0x7c2a},
1058   {0x51bc, 0x7529}, {0x5307, 0x752b}, {0x5361, 0x752c}, {0x536c, 0x752d},
1059   {0x544d, 0x7530}, {0x5496, 0x7531}, {0x549c, 0x7532}, {0x54a9, 0x7533},
1060   {0x550e, 0x7534}, {0x554a, 0x7535}, {0x5672, 0x7536}, {0x56e4, 0x7537},
1061   {0x5733, 0x7538}, {0x5734, 0x7539}, {0x5880, 0x753b}, {0x59e4, 0x753c},
1062   {0x5a23, 0x753d}, {0x5a55, 0x753e}, {0x5bec, 0x753f}, {0x5e74, 0x7c27},
1063   {0x5eac, 0x7542}, {0x5f34, 0x7543}, {0x5f45, 0x7544}, {0x5fb7, 0x7545},
1064   {0x6017, 0x7546}, {0x6130, 0x7548}, {0x65e5, 0x7c29}, {0x6624, 0x7549},
1065   {0x66c8, 0x754a}, {0x66d9, 0x754b}, {0x66fa, 0x754c}, {0x66fb, 0x754d},
1066   {0x6708, 0x7c28}, {0x6852, 0x754e}, {0x6911, 0x7550}, {0x693b, 0x7551},
1067   {0x6a45, 0x7552}, {0x6a91, 0x7553}, {0x6adb, 0x7554}, {0x6bf1, 0x7558},
1068   {0x6ce0, 0x7559}, {0x6d2e, 0x755a}, {0x6dbf, 0x755c}, {0x6dca, 0x755d},
1069   {0x6df8, 0x755e}, {0x6f5e, 0x7560}, {0x6ff9, 0x7561}, {0x7064, 0x7562},
1070   {0x7147, 0x7565}, {0x71c1, 0x7566}, {0x7200, 0x7567}, {0x739f, 0x7568},
1071   {0x73a8, 0x7569}, {0x73c9, 0x756a}, {0x73d6, 0x756b}, {0x741b, 0x756c},
1072   {0x7421, 0x756d}, {0x7426, 0x756f}, {0x742a, 0x7570}, {0x742c, 0x7571},
1073   {0x7439, 0x7572}, {0x744b, 0x7573}, {0x7575, 0x7575}, {0x7581, 0x7576},
1074   {0x7772, 0x7577}, {0x78c8, 0x7579}, {0x78e0, 0x757a}, {0x7947, 0x757b},
1075   {0x79ae, 0x757c}, {0x79da, 0x7622}, {0x7a1e, 0x7623}, {0x7b7f, 0x7624},
1076   {0x7c31, 0x7625}, {0x7d8b, 0x7627}, {0x7fa1, 0x7628}, {0x8118, 0x7629},
1077   {0x813a, 0x762a}, {0x82ae, 0x762c}, {0x845b, 0x762d}, {0x84dc, 0x762e},
1078   {0x84ec, 0x762f}, {0x8559, 0x7630}, {0x85ce, 0x7631}, {0x8755, 0x7632},
1079   {0x87ec, 0x7633}, {0x880b, 0x7634}, {0x88f5, 0x7635}, {0x89d2, 0x7636},
1080   {0x8a79, 0x752e}, {0x8af6, 0x7637}, {0x8dce, 0x7638}, {0x8fbb, 0x7639},
1081   {0x8ff6, 0x763a}, {0x90dd, 0x763b}, {0x9127, 0x763c}, {0x912d, 0x763d},
1082   {0x91b2, 0x763e}, {0x9233, 0x763f}, {0x9288, 0x7640}, {0x9321, 0x7641},
1083   {0x9348, 0x7642}, {0x9592, 0x7643}, {0x96de, 0x7644}, {0x9903, 0x7645},
1084   {0x9940, 0x7646}, {0x9ad9, 0x7647}, {0x9bd6, 0x7648}, {0x9dd7, 0x7649},
1085   {0x9eb4, 0x764a}, {0x9eb5, 0x764b}, {0x9fc4, 0x754f}, {0x9fc5, 0x7621},
1086   {0x9fc6, 0x757d}, {0xfa10, 0x753a}, {0xfa11, 0x7540}, {0xfa45, 0x755b},
1087   {0xfa46, 0x755f}, {0xfa4a, 0x756e}, {0xfa6b, 0x7547}, {0xfa6c, 0x7563},
1088   {0xfa6d, 0x762b}, {0x1f100, 0x7c30}, {0x1f101, 0x7c40}, {0x1f102, 0x7c41},
1089   {0x1f103, 0x7c42}, {0x1f104, 0x7c43}, {0x1f105, 0x7c44}, {0x1f106, 0x7c45},
1090   {0x1f107, 0x7c46}, {0x1f108, 0x7c47}, {0x1f109, 0x7c48}, {0x1f10a, 0x7c49},
1091   {0x1f110, 0x7e41}, {0x1f111, 0x7e42}, {0x1f112, 0x7e43}, {0x1f113, 0x7e44},
1092   {0x1f114, 0x7e45}, {0x1f115, 0x7e46}, {0x1f116, 0x7e47}, {0x1f117, 0x7e48},
1093   {0x1f118, 0x7e49}, {0x1f119, 0x7e4a}, {0x1f11a, 0x7e4b}, {0x1f11b, 0x7e4c},
1094   {0x1f11c, 0x7e4d}, {0x1f11d, 0x7e4e}, {0x1f11e, 0x7e4f}, {0x1f11f, 0x7e50},
1095   {0x1f120, 0x7e51}, {0x1f121, 0x7e52}, {0x1f122, 0x7e53}, {0x1f123, 0x7e54},
1096   {0x1f124, 0x7e55}, {0x1f125, 0x7e56}, {0x1f126, 0x7e57}, {0x1f127, 0x7e58},
1097   {0x1f128, 0x7e59}, {0x1f129, 0x7e5a}, {0x1f12a, 0x7d3a}, {0x1f12b, 0x7c77},
1098   {0x1f12c, 0x7c76}, {0x1f12d, 0x7c57}, {0x1f131, 0x7a5e}, {0x1f13d, 0x7a5f},
1099   {0x1f13f, 0x7a52}, {0x1f142, 0x7a59}, {0x1f146, 0x7a53}, {0x1f14a, 0x7a50},
1100   {0x1f14b, 0x7a54}, {0x1f14c, 0x7a51}, {0x1f14d, 0x7a5d}, {0x1f14e, 0x7a72},
1101   {0x1f157, 0x7b3d}, {0x1f15f, 0x7b41}, {0x1f179, 0x7b45}, {0x1f17b, 0x7b4a},
1102   {0x1f17c, 0x7b50}, {0x1f17f, 0x7a30}, {0x1f18a, 0x7a31}, {0x1f18b, 0x7b42},
1103   {0x1f18c, 0x7b44}, {0x1f18d, 0x7b43}, {0x1f190, 0x7c79}, {0x1f200, 0x7a74},
1104   {0x1f210, 0x7a55}, {0x1f211, 0x7a56}, {0x1f212, 0x7a57}, {0x1f213, 0x7a58},
1105   {0x1f214, 0x7a5a}, {0x1f214, 0x7d3e}, {0x1f215, 0x7a5b}, {0x1f216, 0x7a5c},
1106   {0x1f217, 0x7a62}, {0x1f218, 0x7a63}, {0x1f219, 0x7a64}, {0x1f21a, 0x7a65},
1107   {0x1f21b, 0x7a66}, {0x1f21c, 0x7a68}, {0x1f21d, 0x7a69}, {0x1f21e, 0x7a6a},
1108   {0x1f21f, 0x7a6b}, {0x1f220, 0x7a6c}, {0x1f221, 0x7a6d}, {0x1f222, 0x7a6e},
1109   {0x1f223, 0x7a6f}, {0x1f224, 0x7a70}, {0x1f225, 0x7a71}, {0x1f226, 0x7c7a},
1110   {0x1f227, 0x7d3b}, {0x1f228, 0x7d3c}, {0x1f229, 0x7d3d}, {0x1f22a, 0x7d3f},
1111   {0x1f22b, 0x7d40}, {0x1f22c, 0x7d41}, {0x1f22d, 0x7d42}, {0x1f22e, 0x7d43},
1112   {0x1f22f, 0x7d44}, {0x1f230, 0x7d45}, {0x1f231, 0x7d46}, {0x1f240, 0x7d31},
1113   {0x1f241, 0x7d32}, {0x1f242, 0x7d33}, {0x1f243, 0x7d34}, {0x1f244, 0x7d35},
1114   {0x1f245, 0x7d36}, {0x1f246, 0x7d37}, {0x1f247, 0x7d38}, {0x1f248, 0x7d39},
1115   {0x1f6e7, 0x7b36}, {0x20158, 0x7522}, {0x20bb7, 0x752f}, {0x233cc, 0x7555},
1116   {0x233fe, 0x7556}, {0x235c4, 0x7557}, {0x242ee, 0x7564}
1117 };
1118 
1119 static int
out_ascii(struct state_to * st,uint32_t ch,unsigned char ** outptr,const unsigned char * outend)1120 out_ascii (struct state_to *st, uint32_t ch,
1121 	   unsigned char **outptr, const unsigned char *outend)
1122 {
1123   size_t esc_seqs;
1124   unsigned char *op = *outptr;
1125 
1126   esc_seqs = 0;
1127   if ((ch & 0x60) && st->gl == 0 && ch != 0x20 && ch != 0x7f && ch != 0xa0)
1128     ++ esc_seqs;
1129 
1130   if (__glibc_unlikely (op + esc_seqs + 1 > outend))
1131     return __GCONV_FULL_OUTPUT;
1132 
1133   if (esc_seqs > 0)
1134     {
1135       *op++ = LS1;
1136       st->gl = 1;
1137     }
1138   *op++ = ch & 0xff;
1139   if (ch == 0 || ch == LF)
1140     *st = def_state_to;
1141   *outptr = op;
1142   return __GCONV_OK;
1143 }
1144 
1145 static int
out_jisx0201(struct state_to * st,uint32_t ch,unsigned char ** outptr,const unsigned char * outend)1146 out_jisx0201 (struct state_to *st, uint32_t ch,
1147 	      unsigned char **outptr, const unsigned char *outend)
1148 {
1149   size_t esc_seqs;
1150   unsigned char *op = *outptr;
1151 
1152   esc_seqs = 0;
1153   if (st->g3 != JIS0201_KATA_idx)
1154     esc_seqs += 3;
1155   if (st->gr == 0) /* need LS3R */
1156     esc_seqs += 2;
1157 
1158   if (__glibc_unlikely (op + esc_seqs + 1 > outend))
1159     return __GCONV_FULL_OUTPUT;
1160 
1161   if (esc_seqs >= 3)
1162     {
1163       /* need charset designation */
1164       *op++ = ESC;
1165       *op++ = '\x2b'; /* designate single byte charset to G3 */
1166       *op++ = JIS0201_KATA_set;
1167       st->g3 = JIS0201_KATA_idx;
1168     }
1169   if (esc_seqs == 2 || esc_seqs == 5)
1170     {
1171       *op++ = ESC;
1172       *op++ = LS3R;
1173       st->gr = 1;
1174     }
1175   *op++ = ch & 0xff;
1176   *outptr = op;
1177   return __GCONV_OK;
1178 }
1179 
1180 static int
out_katakana(struct state_to * st,unsigned char ch,unsigned char ** outptr,const unsigned char * outend)1181 out_katakana (struct state_to *st, unsigned char ch,
1182 	      unsigned char **outptr, const unsigned char *outend)
1183 {
1184   size_t esc_seqs;
1185   unsigned char *op = *outptr;
1186 
1187   esc_seqs = 0;
1188   if (st->g3 != KATAKANA_idx)
1189     esc_seqs += 3;
1190   if (st->gr == 0) /* need LS3R */
1191     esc_seqs += 2;
1192 
1193   if (__glibc_unlikely (op + esc_seqs + 1 > outend))
1194     return __GCONV_FULL_OUTPUT;
1195 
1196   if (esc_seqs >= 3)
1197     {
1198       /* need charset designation */
1199       *op++ = ESC;
1200       *op++ = '\x2b'; /* designate single byte charset to G3 */
1201       *op++ = KATAKANA_set;
1202       st->g3 = KATAKANA_idx;
1203     }
1204   if (esc_seqs == 2 || esc_seqs == 5)
1205     {
1206       *op++ = ESC;
1207       *op++ = LS3R;
1208       st->gr = 1;
1209     }
1210   *op++ = ch | 0x80;
1211   *outptr = op;
1212   return __GCONV_OK;
1213 }
1214 
1215 static int
out_hiragana(struct state_to * st,unsigned char ch,unsigned char ** outptr,const unsigned char * outend)1216 out_hiragana (struct state_to *st, unsigned char ch,
1217 	      unsigned char **outptr, const unsigned char *outend)
1218 {
1219   size_t esc_seqs;
1220   unsigned char *op = *outptr;
1221 
1222   esc_seqs = 0;
1223   if (st->g2 != HIRAGANA_idx)
1224     esc_seqs += 3;
1225   if (st->gr == 1) /* need LS2R */
1226     esc_seqs += 2;
1227 
1228   if (__glibc_unlikely (op + esc_seqs + 1 > outend))
1229     return __GCONV_FULL_OUTPUT;
1230 
1231   if (esc_seqs >= 3)
1232     {
1233       /* need charset designation */
1234       *op++ = ESC;
1235       *op++ = '\x2a'; /* designate single byte charset to G2 */
1236       *op++ = HIRAGANA_set;
1237       st->g2 = HIRAGANA_idx;
1238     }
1239   if (esc_seqs == 2 || esc_seqs == 5)
1240     {
1241       *op++ = ESC;
1242       *op++ = LS2R;
1243       st->gr = 0;
1244     }
1245   *op++ = ch | 0x80;
1246   *outptr = op;
1247   return __GCONV_OK;
1248 }
1249 
1250 static int
is_kana_punc(uint32_t ch)1251 is_kana_punc (uint32_t ch)
1252 {
1253   int i;
1254   size_t len;
1255 
1256   len = NELEMS (hira_punc);
1257   for (i = 0; i < len; i++)
1258     if (ch == hira_punc[i])
1259       return i;
1260 
1261   len = NELEMS (kata_punc);
1262   for (i = 0; i < len; i++)
1263     if (ch == kata_punc[i])
1264       return i + NELEMS (hira_punc);
1265   return -1;
1266 }
1267 
1268 static int
out_kana_punc(struct state_to * st,int idx,unsigned char ** outptr,const unsigned char * outend)1269 out_kana_punc (struct state_to *st, int idx,
1270 	       unsigned char **outptr, const unsigned char *outend)
1271 {
1272   size_t len = NELEMS (hira_punc);
1273 
1274   if (idx < len)
1275     return out_hiragana (st, 0x77 + idx, outptr, outend);
1276   idx -= len;
1277   if (idx >= 2)
1278     {
1279       /* common punc. symbols shared by katakana/hiragana */
1280       /* guess which is used currently */
1281       if (st->gr == 0 && st->g2 == HIRAGANA_idx)
1282 	return out_hiragana (st, 0x77 + idx, outptr, outend);
1283       else if (st->gr == 1 && st->g3 == KATAKANA_idx)
1284 	return out_katakana (st, 0x77 + idx, outptr, outend);
1285       else if (st->g2 == HIRAGANA_idx && st->g3 != KATAKANA_idx)
1286 	return out_hiragana (st, 0x77 + idx, outptr, outend);
1287       /* fall through */
1288     }
1289   return out_katakana (st, 0x77 + idx, outptr, outend);
1290 }
1291 
1292 static int
out_kanji(struct state_to * st,uint32_t ch,unsigned char ** outptr,const unsigned char * outend)1293 out_kanji (struct state_to *st, uint32_t ch,
1294 	   unsigned char **outptr, const unsigned char *outend)
1295 {
1296   size_t esc_seqs;
1297   unsigned char *op = *outptr;
1298 
1299   esc_seqs = 0;
1300   if (st->gl)
1301     ++ esc_seqs;
1302 
1303   if (__glibc_unlikely (op + esc_seqs + 2 > outend))
1304     return __GCONV_FULL_OUTPUT;
1305 
1306   if (st->gl)
1307     {
1308       *op++ = LS0;
1309       st->gl = 0;
1310     }
1311   *op++ = (ch >> 8) & 0x7f;
1312   *op++ = ch & 0x7f;
1313   *outptr = op;
1314   return __GCONV_OK;
1315 }
1316 
1317 /* convert JISX0213_{1,2} to ARIB-STD-B24 */
1318 /* assert(set_idx == JISX0213_1_idx || set_idx == JISX0213_2_idx); */
1319 static int
out_jisx0213(struct state_to * st,uint32_t ch,int set_idx,unsigned char ** outptr,const unsigned char * outend)1320 out_jisx0213 (struct state_to *st, uint32_t ch, int set_idx,
1321 	      unsigned char **outptr, const unsigned char *outend)
1322 {
1323   size_t esc_seqs;
1324   unsigned char *op = *outptr;
1325 
1326   esc_seqs = 0;
1327   if (st->g2 != set_idx)
1328     esc_seqs += 4; /* designate to G2 */
1329   if (st->gr) /* if GR does not designate G2 */
1330     esc_seqs ++; /* SS3 */
1331 
1332   if (__glibc_unlikely (op + esc_seqs + 2 > outend))
1333     return __GCONV_FULL_OUTPUT;
1334 
1335   if (esc_seqs >= 4)
1336     {
1337       /* need charset designation */
1338       *op++ = ESC;
1339       *op++ = '\x24'; /* designate multibyte charset */
1340       *op++ = '\x2a'; /* to G2 */
1341       *op++ = (set_idx == JISX0213_1_idx) ? JISX0213_1_set : JISX0213_2_set;
1342       st->g2 = JISX0213_1_idx;
1343     }
1344   if (st->gr)
1345     *op++ = SS2; /* GR designates G3 now. insert SS2 */
1346   else
1347     ch |= 0x8080; /* use GR(G2) */
1348   *op++ = (ch >> 8) & 0xff;
1349   *op++ = ch & 0xff;
1350   *outptr = op;
1351   return __GCONV_OK;
1352 }
1353 
1354 static int
out_extsym(struct state_to * st,uint32_t ch,unsigned char ** outptr,const unsigned char * outend)1355 out_extsym (struct state_to *st, uint32_t ch,
1356 	    unsigned char **outptr, const unsigned char *outend)
1357 {
1358   size_t esc_seqs;
1359   unsigned char *op = *outptr;
1360 
1361   esc_seqs = 0;
1362   if (st->g3 != EXTRA_SYMBOLS_idx)
1363     esc_seqs += 4;
1364   if (st->gr == 0) /* if GR designates G2, use SS3 */
1365     ++ esc_seqs;
1366 
1367   if (__glibc_unlikely (op + esc_seqs + 2 > outend))
1368     return __GCONV_FULL_OUTPUT;
1369 
1370   if (esc_seqs >= 4)
1371     {
1372       /* need charset designation */
1373       *op++ = ESC;
1374       *op++ = '\x24'; /* designate multibyte charset */
1375       *op++ = '\x2b'; /* to G3 */
1376       *op++ = EXTRA_SYMBOLS_set;
1377       st->g3 = EXTRA_SYMBOLS_idx;
1378     }
1379   if (st->gr == 0)
1380     *op++ = SS3;
1381   else
1382     ch |= 0x8080;
1383   *op++ = (ch >> 8) & 0xff;
1384   *op++ = ch & 0xff;
1385   *outptr = op;
1386   return __GCONV_OK;
1387 }
1388 
1389 static int
out_buffered(struct state_to * st,unsigned char ** outptr,const unsigned char * outend)1390 out_buffered (struct state_to *st,
1391 	      unsigned char **outptr, const unsigned char *outend)
1392 {
1393   int r;
1394 
1395   if (st->prev == 0)
1396     return __GCONV_OK;
1397 
1398   if (st->prev >> 16)
1399     r = out_jisx0213 (st, st->prev & 0x7f7f, JISX0213_1_idx, outptr, outend);
1400   else if ((st->prev & 0x7f00) == 0x2400)
1401     r = out_hiragana (st, st->prev, outptr, outend);
1402   else if ((st->prev & 0x7f00) == 0x2500)
1403     r = out_katakana (st, st->prev, outptr, outend);
1404   else /* should not be reached */
1405     r = out_kanji (st, st->prev, outptr, outend);
1406 
1407   st->prev = 0;
1408   return r;
1409 }
1410 
1411 static int
cmp_u32(const void * a,const void * b)1412 cmp_u32 (const void *a, const void *b)
1413 {
1414   return *(const uint32_t *)a - *(const uint32_t *)b;
1415 }
1416 
1417 static int
find_extsym_idx(uint32_t ch)1418 find_extsym_idx (uint32_t ch)
1419 {
1420   const uint32_t (*p)[2];
1421 
1422   p = bsearch (&ch, ucs4_to_extsym,
1423 	       NELEMS (ucs4_to_extsym), sizeof (ucs4_to_extsym[0]), cmp_u32);
1424   return p ? (p - ucs4_to_extsym) : -1;
1425 }
1426 
1427 #define BODY \
1428   {									      \
1429     uint32_t ch, jch;							      \
1430     unsigned char buf[2];						      \
1431     int r;								      \
1432 									      \
1433     ch = get32 (inptr);							      \
1434     if (st.prev != 0)							      \
1435       {									      \
1436 	/* Attempt to combine the last character with this one.  */	      \
1437 	unsigned int idx;						      \
1438 	unsigned int len;						      \
1439 									      \
1440 	if (ch == 0x02e5)						      \
1441 	  idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5;		      \
1442 	else if (ch == 0x02e9)						      \
1443 	  idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9;		      \
1444 	else if (ch == 0x0300)						      \
1445 	  idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300;		      \
1446 	else if (ch == 0x0301)						      \
1447 	  idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301;		      \
1448 	else if (ch == 0x309a)						      \
1449 	  idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A;		      \
1450 	else								      \
1451 	  idx = 0, len = 0;						      \
1452 									      \
1453 	for (;len > 0; ++idx, --len)					      \
1454 	  if (comp_table_data[idx].base == (st.prev & 0x7f7f))		      \
1455 	    break;							      \
1456 									      \
1457 	if (len > 0)							      \
1458 	  {								      \
1459 	    /* Output the combined character.  */			      \
1460 	    /* We know the combined character is in JISX0213 plane 1 */	      \
1461 	    r = out_jisx0213 (&st, comp_table_data[idx].composed,	      \
1462 				JISX0213_1_idx, &outptr, outend);	      \
1463 	    st.prev = 0;						      \
1464 	    goto next;							      \
1465 	  }								      \
1466 									      \
1467 	/* not a combining character */					      \
1468 	/* Output the buffered character. */				      \
1469 	/* We know it is in JISX0208(HIRA/KATA) or in JISX0213 plane 1. */    \
1470 	r = out_buffered (&st, &outptr, outend);			      \
1471 	if (r != __GCONV_OK)						      \
1472 	  {								      \
1473 	    result = r;							      \
1474 	    break;							      \
1475 	  }								      \
1476 	/* fall through & output the current character (ch). */		      \
1477      }									      \
1478 									      \
1479     /* ASCII or C0/C1 or NBSP */					      \
1480     if (ch <= 0xa0)							      \
1481       {									      \
1482 	if ((ch & 0x60) || ch == 0 || ch == LF || ch == CR || ch == BS)	      \
1483 	  r = out_ascii (&st, ch, &outptr, outend);			      \
1484 	else								      \
1485 	  STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
1486 	goto next;							      \
1487       }									      \
1488 									      \
1489     /* half-width KATAKANA */						      \
1490     if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR)		      \
1491       {									      \
1492 	if (__glibc_unlikely (buf[0] < 0x80)) /* yen sign or overline */      \
1493 	  r = out_ascii (&st, buf[0], &outptr, outend);			      \
1494 	else								      \
1495 	  r = out_jisx0201 (&st, buf[0], &outptr, outend);		      \
1496 	goto next;							      \
1497       }									      \
1498 									      \
1499     /* check kana punct. symbols (prefer 1-Byte charset over KANJI_set) */    \
1500     r = is_kana_punc (ch);						      \
1501     if (r >= 0)								      \
1502       {									      \
1503 	r = out_kana_punc (&st, r, &outptr, outend);			      \
1504 	goto next;							      \
1505       }									      \
1506 									      \
1507     if (ch >= ucs4_to_nonsp_kanji[0][0] &&				      \
1508 	ch <= ucs4_to_nonsp_kanji[NELEMS (ucs4_to_nonsp_kanji) - 1][0])	      \
1509       {									      \
1510 	int i;								      \
1511 									      \
1512 	for (i = 0; i < NELEMS (ucs4_to_nonsp_kanji); i++)		      \
1513 	  {								      \
1514 	    if (ch < ucs4_to_nonsp_kanji[i][0])				      \
1515 	      break;							      \
1516 	    else if (ch == ucs4_to_nonsp_kanji[i][0])			      \
1517 	      {								      \
1518 		r = out_kanji (&st, ucs4_to_nonsp_kanji[i][1],		      \
1519 			       &outptr, outend);			      \
1520 		goto next;						      \
1521 	      }								      \
1522 	  }								      \
1523       }									      \
1524 									      \
1525     jch = ucs4_to_jisx0213 (ch);					      \
1526 									      \
1527     if (ucs4_to_jisx0208 (ch, buf, 2) != __UNKNOWN_10646_CHAR)		      \
1528       {									      \
1529 	if (jch & 0x0080)						      \
1530 	  {								      \
1531 	    /* A possible match in comp_table_data.  Buffer it.  */	      \
1532 									      \
1533 	    /* We know it's a JISX 0213 plane 1 character.  */		      \
1534 	    assert ((jch & 0x8000) == 0);				      \
1535 									      \
1536 	    st.prev = jch & 0x7f7f;					      \
1537 	    r = __GCONV_OK;						      \
1538 	    goto next;							      \
1539 	  }								      \
1540 	/* check HIRAGANA/KATAKANA (prefer 1-Byte charset over KANJI_set) */  \
1541 	if (buf[0] == 0x24)						      \
1542 	  r = out_hiragana (&st, buf[1], &outptr, outend);		      \
1543 	else if (buf[0] == 0x25)					      \
1544 	  r = out_katakana (&st, buf[1], &outptr, outend);		      \
1545 	else if (jch == 0x227e || (jch >= 0x212d && jch <= 0x2132))	      \
1546 	  r = out_jisx0213 (&st, jch, JISX0213_1_idx, &outptr, outend);	      \
1547 	else								      \
1548 	  r = out_kanji (&st, jch, &outptr, outend);			      \
1549 	goto next;							      \
1550       }									      \
1551 									      \
1552     if (jch & 0x0080)							      \
1553       {									      \
1554 	st.prev = (jch & 0x7f7f) | 0x10000;				      \
1555 	r = __GCONV_OK;							      \
1556 	goto next;							      \
1557       }									      \
1558 									      \
1559     /* KANJI shares some chars with EXTRA_SYMBOLS, but prefer extra symbols*/ \
1560     r = find_extsym_idx (ch);						      \
1561     if (r >= 0)								      \
1562       {									      \
1563 	ch = ucs4_to_extsym[r][1];					      \
1564 	r = out_extsym (&st, ch, &outptr, outend);			      \
1565 	goto next;							      \
1566       }									      \
1567 									      \
1568     if (jch != 0)							      \
1569       {									      \
1570 	r = out_jisx0213 (&st, jch & 0x7f7f,				      \
1571 			  (jch & 0x8000) ? JISX0213_2_idx : JISX0213_1_idx,   \
1572 			  &outptr, outend);				      \
1573 	goto next;							      \
1574       }									      \
1575 									      \
1576     UNICODE_TAG_HANDLER (ch, 4);					      \
1577     STANDARD_TO_LOOP_ERR_HANDLER (4);					      \
1578 									      \
1579 next:									      \
1580     if (r != __GCONV_OK)						      \
1581       {									      \
1582 	result = r;							      \
1583 	break;								      \
1584       }									      \
1585     inptr += 4;								      \
1586   }
1587 #include <iconv/loop.c>
1588 
1589 /* Now define the toplevel functions.  */
1590 #include <iconv/skeleton.c>
1591