1 /* Conversion module for ARIB-STD-B24.
2 Copyright (C) 1998-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 /*
20 * Conversion module for the character encoding
21 * defined in ARIB STD-B24 Volume 1, Part 2, Chapter 7.
22 * http://www.arib.or.jp/english/html/overview/doc/6-STD-B24v5_2-1p3-E1.pdf
23 * http://www.arib.or.jp/english/html/overview/sb_ej.html
24 * https://sites.google.com/site/unicodesymbols/Home/japanese-tv-symbols/
25 * It is based on ISO-2022, and used in Japanese digital televsion.
26 *
27 * Note 1: "mosaic" characters are not supported in this module.
28 * Note 2: Control characters (for subtitles) are discarded.
29 */
30
31 #include <assert.h>
32 #include <dlfcn.h>
33 #include <gconv.h>
34 #include <stdint.h>
35 #include <stdlib.h>
36 #include <string.h>
37
38 #include "jis0201.h"
39 #include "jis0208.h"
40 #include "jisx0213.h"
41
42 /* Definitions used in the body of the `gconv' function. */
43 #define CHARSET_NAME "ARIB-STD-B24//"
44 #define DEFINE_INIT 1
45 #define DEFINE_FINI 1
46 #define ONE_DIRECTION 0
47 #define FROM_LOOP from_aribb24_loop
48 #define TO_LOOP to_aribb24_loop
49 #define FROM_LOOP_MIN_NEEDED_FROM 1
50 #define FROM_LOOP_MAX_NEEDED_FROM 1
51 #define FROM_LOOP_MIN_NEEDED_TO 4
52 #define FROM_LOOP_MAX_NEEDED_TO (4 * 4)
53 #define TO_LOOP_MIN_NEEDED_FROM 4
54 #define TO_LOOP_MAX_NEEDED_FROM 4
55 #define TO_LOOP_MIN_NEEDED_TO 1
56 #define TO_LOOP_MAX_NEEDED_TO 7
57
58 #define PREPARE_LOOP \
59 __mbstate_t saved_state; \
60 __mbstate_t *statep = data->__statep; \
61 status = __GCONV_OK;
62
63 /* Since we might have to reset input pointer we must be able to save
64 and retore the state. */
65 #define SAVE_RESET_STATE(Save) \
66 { \
67 if (Save) \
68 saved_state = *statep; \
69 else \
70 *statep = saved_state; \
71 }
72
73 /* During UCS-4 to ARIB-STD-B24 conversion, the state contains the last
74 two bytes to be output, in .prev member. */
75
76 /* Since this is a stateful encoding we have to provide code which resets
77 the output state to the initial state. This has to be done during the
78 flushing. */
79 #define EMIT_SHIFT_TO_INIT \
80 { \
81 if (!FROM_DIRECTION) \
82 status = out_buffered((struct state_to *) data->__statep, \
83 &outbuf, outend); \
84 /* we don't have to emit anything, just reset the state. */ \
85 memset (data->__statep, '\0', sizeof (*data->__statep)); \
86 }
87
88
89 /* This makes obvious what everybody knows: 0x1b is the Esc character. */
90 #define ESC 0x1b
91 /* other control characters */
92 #define SS2 0x19
93 #define SS3 0x1d
94 #define LS0 0x0f
95 #define LS1 0x0e
96
97 #define LS2 0x6e
98 #define LS3 0x6f
99 #define LS1R 0x7e
100 #define LS2R 0x7d
101 #define LS3R 0x7c
102
103 #define LF 0x0a
104 #define CR 0x0d
105 #define BEL 0x07
106 #define BS 0x08
107 #define COL 0x90
108 #define CDC 0x92
109 #define MACRO_CTRL 0x95
110 #define CSI 0x9b
111 #define TIME 0x9d
112
113 /* code sets */
114 enum g_set
115 {
116 KANJI_set = '\x42', /* 2Byte set */
117 ASCII_set = '\x40',
118 ASCII_x_set = '\x4a',
119 HIRAGANA_set = '\x30',
120 KATAKANA_set = '\x31',
121 MOSAIC_A_set = '\x32',
122 MOSAIC_B_set = '\x33',
123 MOSAIC_C_set = '\x34',
124 MOSAIC_D_set = '\x35',
125 PROP_ASCII_set = '\x36',
126 PROP_HIRA_set = '\x37',
127 PROP_KATA_set = '\x38',
128 JIS0201_KATA_set = '\x49',
129 JISX0213_1_set = '\x39', /* 2Byte set */
130 JISX0213_2_set = '\x3a', /* 2Byte set */
131 EXTRA_SYMBOLS_set = '\x3b', /* 2Byte set */
132
133 DRCS0_set = 0x40 | 0x80, /* 2Byte set */
134 DRCS1_set = 0x41 | 0x80,
135 DRCS15_set = 0x4f | 0x80,
136 MACRO_set = 0x70 | 0x80,
137 };
138
139
140 /* First define the conversion function from ARIB-STD-B24 to UCS-4. */
141
142 enum mode_e
143 {
144 NORMAL,
145 ESCAPE,
146 G_SEL_1B,
147 G_SEL_MB,
148 CTRL_SEQ,
149 DESIGNATE_MB,
150 DRCS_SEL_1B,
151 DRCS_SEL_MB,
152 MB_2ND,
153 };
154
155 /*
156 * __GCONV_INPUT_INCOMPLETE is never used in this conversion, thus
157 * we can re-use mbstate_t.__value and .__count:3 for the other purpose.
158 */
159 struct state_from {
160 /* __count */
161 uint8_t cnt:3; /* for use in skelton.c. always 0 */
162 uint8_t pad0:1;
163 uint8_t gl:2; /* idx of the G-set invoked into GL */
164 uint8_t gr:2; /* ... to GR */
165 uint8_t ss:2; /* SS state. 0: no shift, 2:SS2, 3:SS3 */
166 uint8_t gidx:2; /* currently designated G-set */
167 uint8_t mode:4; /* current input mode. see below. */
168 uint8_t skip; /* [CTRL_SEQ] # of char to skip */
169 uint8_t prev; /* previously input char [in MB_2ND] or,*/
170 /* input char to wait for. [CTRL_SEQ (.skip == 0)] */
171
172 /* __value */
173 uint8_t g[4]; /* code set for G0..G3 */
174 } __attribute__((packed));
175
176 static const struct state_from def_state_from = {
177 .cnt = 0,
178 .gl = 0,
179 .gr = 2,
180 .ss = 0,
181 .gidx = 0,
182 .mode = NORMAL,
183 .skip = 0,
184 .prev = '\0',
185 .g[0] = KANJI_set,
186 .g[1] = ASCII_set,
187 .g[2] = HIRAGANA_set,
188 .g[3] = KATAKANA_set,
189 };
190
191 #define EXTRA_LOOP_DECLS , __mbstate_t *statep
192 #define EXTRA_LOOP_ARGS , statep
193
194 #define INIT_PARAMS \
195 struct state_from st = *((struct state_from *)statep); \
196 if (st.g[0] == 0) \
197 st = def_state_from;
198
199 #define UPDATE_PARAMS *statep = *((__mbstate_t *)&st)
200
201 #define LOOP_NEED_FLAGS
202
203 #define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
204 #define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
205 #define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
206 #define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
207 #define LOOPFCT FROM_LOOP
208
209 /* tables and functions used in BODY */
210
211 static const uint16_t kata_punc[] = {
212 0x30fd, 0x30fe, 0x30fc, 0x3002, 0x300c, 0x300d, 0x3001, 0x30fb
213 };
214
215 static const uint16_t hira_punc[] = {
216 0x309d, 0x309e
217 };
218
219 static const uint16_t nonspacing_symbol[] = {
220 0x0301, 0x0300, 0x0308, 0x0302, 0x0304, 0x0332
221 };
222
223 static const uint32_t extra_kanji[] = {
224 /* row 85 */
225 /* col 0..15 */
226 0, 0x3402, 0x20158, 0x4efd, 0x4eff, 0x4f9a, 0x4fc9, 0x509c,
227 0x511e, 0x51bc, 0x351f, 0x5307, 0x5361, 0x536c, 0x8a79, 0x20bb7,
228 /* col. 16..31 */
229 0x544d, 0x5496, 0x549c, 0x54a9, 0x550e, 0x554a, 0x5672, 0x56e4,
230 0x5733, 0x5734, 0xfa10, 0x5880, 0x59e4, 0x5a23, 0x5a55, 0x5bec,
231 /* col. 32..47 */
232 0xfa11, 0x37e2, 0x5eac, 0x5f34, 0x5f45, 0x5fb7, 0x6017, 0xfa6b,
233 0x6130, 0x6624, 0x66c8, 0x66d9, 0x66fa, 0x66fb, 0x6852, 0x9fc4,
234 /* col. 48..63 */
235 0x6911, 0x693b, 0x6a45, 0x6a91, 0x6adb, 0x233cc, 0x233fe, 0x235c4,
236 0x6bf1, 0x6ce0, 0x6d2e, 0xfa45, 0x6dbf, 0x6dca, 0x6df8, 0xfa46,
237 /* col. 64..79 */
238 0x6f5e, 0x6ff9, 0x7064, 0xfa6c, 0x242ee, 0x7147, 0x71c1, 0x7200,
239 0x739f, 0x73a8, 0x73c9, 0x73d6, 0x741b, 0x7421, 0xfa4a, 0x7426,
240 /* col. 80..96 */
241 0x742a, 0x742c, 0x7439, 0x744b, 0x3eda, 0x7575, 0x7581, 0x7772,
242 0x4093, 0x78c8, 0x78e0, 0x7947, 0x79ae, 0x9fc6, 0x4103, 0,
243
244 /* row 86 */
245 /* col 0..15 */
246 0, 0x9fc5, 0x79da, 0x7a1e, 0x7b7f, 0x7c31, 0x4264, 0x7d8b,
247 0x7fa1, 0x8118, 0x813a, 0xfa6d, 0x82ae, 0x845b, 0x84dc, 0x84ec,
248 /* col. 16..31 */
249 0x8559, 0x85ce, 0x8755, 0x87ec, 0x880b, 0x88f5, 0x2ec6, 0x8af6,
250 0x8dce, 0x8fbb, 0x8ff6, 0x90dd, 0x9127, 0x912d, 0x91b2, 0x9233,
251 /* col. 32..43 */
252 0x9288, 0x9321, 0x9348, 0x9592, 0x96de, 0x9903, 0x9940, 0x9ad9,
253 0x9bd6, 0x9dd7, 0x9eb4, 0x9eb5
254 };
255
256 static const uint32_t extra_symbols[5][96] = {
257 /* row 90 */
258 {
259 /* col 0..15 */
260 0, 0x26cc, 0x26cd, 0x2757, 0x26cf, 0x26d0, 0x26d1, 0,
261 0x26d2, 0x26d5, 0x26d3, 0x26d4, 0, 0, 0, 0,
262 /* col 16..31 */
263 0x1f17f, 0x1f18a, 0, 0, 0x26d6, 0x26d7, 0x26d8, 0x26d9,
264 0x26da, 0x26db, 0x26dc, 0x26dd, 0x26de, 0x26df, 0x26e0, 0x26e1,
265 /* col 32..47 */
266 0x2b55, 0x3248, 0x3249, 0x324a, 0x324b, 0x324c, 0x324d, 0x324e,
267 0x324f, 0, 0, 0, 0, 0x2491, 0x2492, 0x2493,
268 /* col 48..63 */
269 0x1f14a, 0x1f14c, 0x1f13F, 0x1f146, 0x1f14b, 0x1f210, 0x1f211, 0x1f212,
270 0x1f213, 0x1f142, 0x1f214, 0x1f215, 0x1f216, 0x1f14d, 0x1f131, 0x1f13d,
271 /* col 64..79 */
272 0x2b1b, 0x2b24, 0x1f217, 0x1f218, 0x1f219, 0x1f21a, 0x1f21b, 0x26bf,
273 0x1f21c, 0x1f21d, 0x1f21e, 0x1f21f, 0x1f220, 0x1f221, 0x1f222, 0x1f223,
274 /* col 80..95 */
275 0x1f224, 0x1f225, 0x1f14e, 0x3299, 0x1f200, 0, 0, 0,
276 0, 0, 0, 0, 0, 0, 0, 0
277 },
278 /* row 91 */
279 {
280 /* col 0..15 */
281 0, 0x26e3, 0x2b56, 0x2b57, 0x2b58, 0x2b59, 0x2613, 0x328b,
282 0x3012, 0x26e8, 0x3246, 0x3245, 0x26e9, 0x0fd6, 0x26ea, 0x26eb,
283 /* col 16..31 */
284 0x26ec, 0x2668, 0x26ed, 0x26ee, 0x26ef, 0x2693, 0x2708, 0x26f0,
285 0x26f1, 0x26f2, 0x26f3, 0x26f4, 0x26f5, 0x1f157, 0x24b9, 0x24c8,
286 /* col 32..47 */
287 0x26f6, 0x1f15f, 0x1f18b, 0x1f18d, 0x1f18c, 0x1f179, 0x26f7, 0x26f8,
288 0x26f9, 0x26fa, 0x1f17b, 0x260e, 0x26fb, 0x26fc, 0x26fd, 0x26fe,
289 /* col 48..63 */
290 0x1f17c, 0x26ff,
291 },
292 /* row 92 */
293 {
294 /* col 0..15 */
295 0, 0x27a1, 0x2b05, 0x2b06, 0x2b07, 0x2b2f, 0x2b2e, 0x5e74,
296 0x6708, 0x65e5, 0x5186, 0x33a1, 0x33a5, 0x339d, 0x33a0, 0x33a4,
297 /* col 16..31 */
298 0x1f100, 0x2488, 0x2489, 0x248a, 0x248b, 0x248c, 0x248d, 0x248e,
299 0x248f, 0x2490, 0xe290, 0xe291, 0xe292, 0xe293, 0xe294, 0xe295,
300 /* col 32..47 */
301 0x1f101, 0x1f102, 0x1f103, 0x1f104, 0x1f105, 0x1f106, 0x1f107, 0x1f108,
302 0x1f109, 0x1f10a, 0x3233, 0x3236, 0x3232, 0x3231, 0x3239, 0x3244,
303 /* col 48..63 */
304 0x25b6, 0x25c0, 0x3016, 0x3017, 0x27d0, 0x00b2, 0x00b3, 0x1f12d,
305 0xe2a5, 0xe2a6, 0xe2a7, 0xe2a8, 0xe2a9, 0xe2aa, 0xe2ab, 0xe2ac,
306 /* col 64..79 */
307 0xe2ad, 0xe2ae, 0xe2af, 0xe2b0, 0xe2b1, 0xe2b2, 0xe2b3, 0xe2b4,
308 0xe2b5, 0xe2b6, 0xe2b7, 0xe2b8, 0xe2b9, 0xe2ba, 0xe2bb, 0xe2bc,
309 /* col 80..95 */
310 0xe2bd, 0xe2be, 0xe2bf, 0xe2c0, 0xe2c1, 0xe2c2, 0x1f12c, 0x1f12b,
311 0x3247, 0x1f190, 0x1f226, 0x213b, 0, 0, 0, 0
312 },
313 /* row 93 */
314 {
315 /* col 0..15 */
316 0, 0x322a, 0x322b, 0x322c, 0x322d, 0x322e, 0x322f, 0x3230,
317 0x3237, 0x337e, 0x337d, 0x337c, 0x337b, 0x2116, 0x2121, 0x3036,
318 /* col 16..31 */
319 0x26be, 0x1f240, 0x1f241, 0x1f242, 0x1f243, 0x1f244, 0x1f245, 0x1f246,
320 0x1f247, 0x1f248, 0x1f12a, 0x1f227, 0x1f228, 0x1f229, 0x1f214, 0x1f22a,
321 /* col 32..47 */
322 0x1f22b, 0x1f22c, 0x1f22d, 0x1f22e, 0x1f22f, 0x1f230, 0x1f231, 0x2113,
323 0x338f, 0x3390, 0x33ca, 0x339e, 0x33a2, 0x3371, 0, 0,
324 /* col 48..63 */
325 0x00bd, 0x2189, 0x2153, 0x2154, 0x00bc, 0x00be, 0x2155, 0x2156,
326 0x2157, 0x2158, 0x2159, 0x215a, 0x2150, 0x215b, 0x2151, 0x2152,
327 /* col 64..79 */
328 0x2600, 0x2601, 0x2602, 0x2603, 0x2616, 0x2617, 0x26c9, 0x26ca,
329 0x2666, 0x2665, 0x2663, 0x2660, 0x26cb, 0x2a00, 0x203c, 0x2049,
330 /* col 80..95 */
331 0x26c5, 0x2614, 0x26c6, 0x26c4, 0x26c7, 0x26a1, 0x26c8, 0,
332 0x269e, 0x269f, 0x266c, 0x260e, 0, 0, 0, 0
333 },
334 /* row 94 */
335 {
336 /* col 0..15 */
337 0, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166,
338 0x2167, 0x2168, 0x2169, 0x216a, 0x216b, 0x2470, 0x2471, 0x2472,
339 /* col 16..31 */
340 0x2473, 0x2474, 0x2475, 0x2476, 0x2477, 0x2478, 0x2479, 0x247a,
341 0x247b, 0x247c, 0x247d, 0x247e, 0x247f, 0x3251, 0x3252, 0x3253,
342 /* col 32..47 */
343 0x3254, 0x1f110, 0x1f111, 0x1f112, 0x1f113, 0x1f114, 0x1f115, 0x1f116,
344 0x1f117, 0x1f118, 0x1f119, 0x1f11a, 0x1f11b, 0x1f11c, 0x1f11d, 0x1f11e,
345 /* col 48..63 */
346 0x1f11f, 0x1f120, 0x1f121, 0x1f122, 0x1f123, 0x1f124, 0x1f125, 0x1f126,
347 0x1f127, 0x1f128, 0x1f129, 0x3255, 0x3256, 0x3257, 0x3258, 0x3259,
348 /* col 64..79 */
349 0x325a, 0x2460, 0x2461, 0x2462, 0x2463, 0x2464, 0x2465, 0x2466,
350 0x2467, 0x2468, 0x2469, 0x246a, 0x246b, 0x246c, 0x246d, 0x246e,
351 /* col 80..95 */
352 0x246f, 0x2776, 0x2777, 0x2778, 0x2779, 0x277a, 0x277b, 0x277c,
353 0x277d, 0x277e, 0x277f, 0x24eb, 0x24ec, 0x325b, 0, 0
354 },
355 };
356
357 struct mchar_entry {
358 uint32_t len;
359 uint32_t to[4];
360 };
361
362 /* list of transliterations. */
363
364 /* small/subscript-ish KANJI. map to the normal sized version */
365 static const struct mchar_entry ext_sym_smallk[] = {
366 {.len = 1, .to = { 0x6c0f }},
367 {.len = 1, .to = { 0x526f }},
368 {.len = 1, .to = { 0x5143 }},
369 {.len = 1, .to = { 0x6545 }},
370 {.len = 1, .to = { 0x52ed }},
371 {.len = 1, .to = { 0x65b0 }},
372 };
373
374 /* symbols of music instruments */
375 static const struct mchar_entry ext_sym_music[] = {
376 {.len = 4, .to = { 0x0028, 0x0076, 0x006e, 0x0029 }},
377 {.len = 4, .to = { 0x0028, 0x006f, 0x0062, 0x0029 }},
378 {.len = 4, .to = { 0x0028, 0x0063, 0x0062, 0x0029 }},
379 {.len = 3, .to = { 0x0028, 0x0063, 0x0065 }},
380 {.len = 3, .to = { 0x006d, 0x0062, 0x0029 }},
381 {.len = 4, .to = { 0x0028, 0x0068, 0x0070, 0x0029 }},
382 {.len = 4, .to = { 0x0028, 0x0062, 0x0072, 0x0029 }},
383 {.len = 3, .to = { 0x0028, 0x0070, 0x0029 }},
384
385 {.len = 3, .to = { 0x0028, 0x0073, 0x0029 }},
386 {.len = 4, .to = { 0x0028, 0x006d, 0x0073, 0x0029 }},
387 {.len = 3, .to = { 0x0028, 0x0074, 0x0029 }},
388 {.len = 4, .to = { 0x0028, 0x0062, 0x0073, 0x0029 }},
389 {.len = 3, .to = { 0x0028, 0x0062, 0x0029 }},
390 {.len = 4, .to = { 0x0028, 0x0074, 0x0062, 0x0029 }},
391 {.len = 4, .to = { 0x0028, 0x0076, 0x0070, 0x0029 }},
392 {.len = 4, .to = { 0x0028, 0x0064, 0x0073, 0x0029 }},
393
394 {.len = 4, .to = { 0x0028, 0x0061, 0x0067, 0x0029 }},
395 {.len = 4, .to = { 0x0028, 0x0065, 0x0067, 0x0029 }},
396 {.len = 4, .to = { 0x0028, 0x0076, 0x006f, 0x0029 }},
397 {.len = 4, .to = { 0x0028, 0x0066, 0x006c, 0x0029 }},
398 {.len = 3, .to = { 0x0028, 0x006b, 0x0065 }},
399 {.len = 2, .to = { 0x0079, 0x0029 }},
400 {.len = 3, .to = { 0x0028, 0x0073, 0x0061 }},
401 {.len = 2, .to = { 0x0078, 0x0029 }},
402
403 {.len = 3, .to = { 0x0028, 0x0073, 0x0079 }},
404 {.len = 2, .to = { 0x006e, 0x0029 }},
405 {.len = 3, .to = { 0x0028, 0x006f, 0x0072 }},
406 {.len = 2, .to = { 0x0067, 0x0029 }},
407 {.len = 3, .to = { 0x0028, 0x0070, 0x0065 }},
408 {.len = 2, .to = { 0x0072, 0x0029 }},
409 };
410
411
412 int
b24_char_conv(int set,unsigned char c1,unsigned char c2,uint32_t * out)413 b24_char_conv (int set, unsigned char c1, unsigned char c2, uint32_t *out)
414 {
415 int len;
416 uint32_t ch;
417
418 if (set > DRCS0_set && set <= DRCS15_set)
419 set = DRCS0_set;
420
421 switch (set)
422 {
423 case ASCII_set:
424 case ASCII_x_set:
425 case PROP_ASCII_set:
426 if (c1 == 0x7e)
427 *out = 0x203e;
428 else if (c1 == 0x5c)
429 *out = 0xa5;
430 else
431 *out = c1;
432 return 1;
433
434 case KATAKANA_set:
435 case PROP_KATA_set:
436 if (c1 <= 0x76)
437 *out = 0x3080 + c1;
438 else
439 *out = kata_punc[c1 - 0x77];
440 return 1;
441
442 case HIRAGANA_set:
443 case PROP_HIRA_set:
444 if (c1 <= 0x73)
445 *out = 0x3020 + c1;
446 else if (c1 == 0x77 || c1 == 0x78)
447 *out = hira_punc[c1 - 0x77];
448 else if (c1 >= 0x79)
449 *out = kata_punc[c1 - 0x77];
450 else
451 return 0;
452 return 1;
453
454 case JIS0201_KATA_set:
455 if (c1 > 0x5f)
456 return 0;
457 *out = 0xff40 + c1;
458 return 1;
459
460 case EXTRA_SYMBOLS_set:
461 if (c1 == 0x75 || (c1 == 0x76 && (c2 - 0x20) <=43))
462 {
463 *out = extra_kanji[(c1 - 0x75) * 96 + (c2 - 0x20)];
464 return 1;
465 }
466 /* fall through */
467 case KANJI_set:
468 /* check extra symbols */
469 if (c1 >= 0x7a && c1 <= 0x7e)
470 {
471 const struct mchar_entry *entry;
472
473 c1 -= 0x20;
474 c2 -= 0x20;
475 if (c1 == 0x5c && c2 >= 0x1a && c2 <= 0x1f)
476 entry = &ext_sym_smallk[c2 - 0x1a];
477 else if (c1 == 0x5c && c2 >= 0x38 && c2 <= 0x55)
478 entry = &ext_sym_music[c2 - 0x38];
479 else
480 entry = NULL;
481
482 if (entry)
483 {
484 int i;
485
486 for (i = 0; i < entry->len; i++)
487 out[i] = entry->to[i];
488 return i;
489 }
490
491 *out = extra_symbols[c1 - 0x5a][c2];
492 if (*out == 0)
493 return 0;
494
495 return 1;
496 }
497 /* Some ARIB strings somehow require that
498 * EXTRA_SYMBOLS codepoints fallback to KANJI_set.
499 * so just fall through here.
500 */
501
502 /* non-JISX0213 modification. (combining chars) */
503 if (c1 == 0x22 && c2 == 0x7e)
504 {
505 *out = 0x20dd;
506 return 1;
507 }
508 else if (c1 == 0x21 && c2 >= 0x2d && c2 <= 0x32)
509 {
510 *out = nonspacing_symbol[c2 - 0x2d];
511 return 1;
512 }
513 /* fall through */
514 case JISX0213_1_set:
515 case JISX0213_2_set:
516 len = 1;
517 ch = jisx0213_to_ucs4(c1 | (set == JISX0213_2_set ? 0x0200 : 0x0100),
518 c2);
519 if (ch == 0)
520 return 0;
521 if (ch < 0x80)
522 {
523 len = 2;
524 out[0] = __jisx0213_to_ucs_combining[ch - 1][0];
525 out[1] = __jisx0213_to_ucs_combining[ch - 1][1];
526 }
527 else
528 *out = ch;
529 return len;
530
531 case MOSAIC_A_set:
532 case MOSAIC_B_set:
533 case MOSAIC_C_set:
534 case MOSAIC_D_set:
535 case DRCS0_set:
536 case MACRO_set:
537 *out = __UNKNOWN_10646_CHAR;
538 return 1;
539
540 default:
541 break;
542 }
543
544 return 0;
545 }
546
547 #define BODY \
548 { \
549 uint32_t ch = *inptr; \
550 \
551 if (ch == 0) \
552 { \
553 st.mode = NORMAL; \
554 ++ inptr; \
555 continue; \
556 } \
557 if (__glibc_unlikely (st.mode == CTRL_SEQ)) \
558 { \
559 if (st.skip) \
560 { \
561 --st.skip; \
562 if (st.skip == 0) \
563 st.mode = NORMAL; \
564 if (ch < 0x40 || ch > 0x7f) \
565 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
566 } \
567 else if (st.prev == MACRO_CTRL) \
568 { \
569 if (ch == MACRO_CTRL) \
570 st.skip = 1; \
571 else if (ch == LF || ch == CR) { \
572 st = def_state_from; \
573 put32(outptr, ch); \
574 outptr += 4; \
575 } \
576 } \
577 else if (st.prev == CSI && (ch == 0x5b || ch == 0x5c || ch == 0x6f)) \
578 st.mode = NORMAL; \
579 else if (st.prev == TIME || st.prev == CSI) \
580 { \
581 if (ch == 0x20 || (st.prev == TIME && ch == 0x28)) \
582 st.skip = 1; \
583 else if (!((st.prev == TIME && ch == 0x29) \
584 || ch == 0x3b || (ch >= 0x30 && ch <= 0x39))) \
585 { \
586 st.mode = NORMAL; \
587 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
588 } \
589 } \
590 else if (st.prev == COL || st.prev == CDC) \
591 { \
592 if (ch == 0x20) \
593 st.skip = 1; \
594 else \
595 { \
596 st.mode = NORMAL; \
597 if (ch < 0x40 || ch > 0x7f) \
598 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
599 } \
600 } \
601 ++ inptr; \
602 continue; \
603 } \
604 \
605 if (__glibc_unlikely (ch == LF)) \
606 { \
607 st = def_state_from; \
608 put32 (outptr, ch); \
609 outptr += 4; \
610 ++ inptr; \
611 continue; \
612 } \
613 \
614 if (__glibc_unlikely (st.mode == ESCAPE)) \
615 { \
616 if (ch == LS2 || ch == LS3) \
617 { \
618 st.mode = NORMAL; \
619 st.gl = (ch == LS2) ? 2 : 3; \
620 st.ss = 0; \
621 } \
622 else if (ch == LS1R || ch == LS2R || ch == LS3R) \
623 { \
624 st.mode = NORMAL; \
625 st.gr = (ch == LS1R) ? 1 : (ch == LS2R) ? 2 : 3; \
626 st.ss = 0; \
627 } \
628 else if (ch == 0x24) \
629 st.mode = DESIGNATE_MB; \
630 else if (ch >= 0x28 && ch <= 0x2b) \
631 { \
632 st.mode = G_SEL_1B; \
633 st.gidx = ch - 0x28; \
634 } \
635 else \
636 { \
637 st.mode = NORMAL; \
638 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
639 } \
640 ++ inptr; \
641 continue; \
642 } \
643 \
644 if (__glibc_unlikely (st.mode == DESIGNATE_MB)) \
645 { \
646 if (ch == KANJI_set || ch == JISX0213_1_set || ch == JISX0213_2_set \
647 || ch == EXTRA_SYMBOLS_set) \
648 { \
649 st.mode = NORMAL; \
650 st.g[0] = ch; \
651 } \
652 else if (ch >= 0x28 && ch <= 0x2b) \
653 { \
654 st.mode = G_SEL_MB; \
655 st.gidx = ch - 0x28; \
656 } \
657 else \
658 { \
659 st.mode = NORMAL; \
660 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
661 } \
662 ++ inptr; \
663 continue; \
664 } \
665 \
666 if (__glibc_unlikely (st.mode == G_SEL_1B)) \
667 { \
668 if (ch == ASCII_set || ch == ASCII_x_set || ch == JIS0201_KATA_set \
669 || (ch >= 0x30 && ch <= 0x38)) \
670 { \
671 st.g[st.gidx] = ch; \
672 st.mode = NORMAL; \
673 } \
674 else if (ch == 0x20) \
675 st.mode = DRCS_SEL_1B; \
676 else \
677 { \
678 st.mode = NORMAL; \
679 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
680 } \
681 ++ inptr; \
682 continue; \
683 } \
684 \
685 if (__glibc_unlikely (st.mode == G_SEL_MB)) \
686 { \
687 if (ch == KANJI_set || ch == JISX0213_1_set || ch == JISX0213_2_set \
688 || ch == EXTRA_SYMBOLS_set) \
689 { \
690 st.g[st.gidx] = ch; \
691 st.mode = NORMAL; \
692 } \
693 else if (ch == 0x20) \
694 st.mode = DRCS_SEL_MB; \
695 else \
696 { \
697 st.mode = NORMAL; \
698 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
699 } \
700 ++ inptr; \
701 continue; \
702 } \
703 \
704 if (__glibc_unlikely (st.mode == DRCS_SEL_1B)) \
705 { \
706 st.mode = NORMAL; \
707 if (ch == 0x70 || (ch >= 0x41 && ch <= 0x4f)) \
708 st.g[st.gidx] = ch | 0x80; \
709 else \
710 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
711 ++ inptr; \
712 continue; \
713 } \
714 \
715 if (__glibc_unlikely (st.mode == DRCS_SEL_MB)) \
716 { \
717 st.mode = NORMAL; \
718 if (ch == 0x40) \
719 st.g[st.gidx] = ch | 0x80; \
720 else \
721 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
722 ++ inptr; \
723 continue; \
724 } \
725 \
726 if (st.mode == MB_2ND) \
727 { \
728 int gidx; \
729 int i, len; \
730 uint32_t out[MAX_NEEDED_OUTPUT]; \
731 \
732 gidx = (st.ss) ? st.ss : (ch & 0x80) ? st.gr : st.gl; \
733 st.mode = NORMAL; \
734 st.ss = 0; \
735 if (__glibc_unlikely (!(ch & 0x60))) /* C0/C1 */ \
736 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
737 if (__glibc_unlikely (st.ss > 0 && (ch & 0x80))) \
738 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
739 if (__glibc_unlikely ((st.prev & 0x80) != (ch & 0x80))) \
740 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
741 len = b24_char_conv(st.g[gidx], (st.prev & 0x7f), (ch & 0x7f), out); \
742 if (len == 0) \
743 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
744 if (outptr + 4 * len > outend) \
745 { \
746 result = __GCONV_FULL_OUTPUT; \
747 break; \
748 } \
749 for (i = 0; i < len; i++) \
750 { \
751 if (irreversible \
752 && __builtin_expect (out[i] == __UNKNOWN_10646_CHAR, 0)) \
753 ++ *irreversible; \
754 put32 (outptr, out[i]); \
755 outptr += 4; \
756 } \
757 ++ inptr; \
758 continue; \
759 } \
760 \
761 if (st.mode == NORMAL) \
762 { \
763 int gidx, set; \
764 \
765 if (__glibc_unlikely (!(ch & 0x60))) /* C0/C1 */ \
766 { \
767 if (ch == ESC) \
768 st.mode = ESCAPE; \
769 else if (ch == SS2) \
770 st.ss = 2; \
771 else if (ch == SS3) \
772 st.ss = 3; \
773 else if (ch == LS0) \
774 { \
775 st.ss = 0; \
776 st.gl = 0; \
777 } \
778 else if (ch == LS1) \
779 { \
780 st.ss = 0; \
781 st.gl = 1; \
782 } \
783 else if (ch == BEL || ch == BS || ch == CR) \
784 { \
785 st.ss = 0; \
786 put32 (outptr, ch); \
787 outptr += 4; \
788 } \
789 else if (ch == 0x09 || ch == 0x0b || ch == 0x0c || ch == 0x18 \
790 || ch == 0x1e || ch == 0x1f || (ch >= 0x80 && ch <= 0x8a)\
791 || ch == 0x99 || ch == 0x9a) \
792 { \
793 /* do nothing. just skip */ \
794 } \
795 else if (ch == 0x16 || ch == 0x8b || ch == 0x91 || ch == 0x93 \
796 || ch == 0x94 || ch == 0x97 || ch == 0x98) \
797 { \
798 st.mode = CTRL_SEQ; \
799 st.skip = 1; \
800 } \
801 else if (ch == 0x1c) \
802 { \
803 st.mode = CTRL_SEQ; \
804 st.skip = 2; \
805 } \
806 else if (ch == COL || ch == CDC || ch == MACRO_CTRL \
807 || ch == CSI ||ch == TIME) \
808 { \
809 st.mode = CTRL_SEQ; \
810 st.skip = 0; \
811 st.prev = ch; \
812 } \
813 else \
814 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
815 \
816 ++ inptr; \
817 continue; \
818 } \
819 \
820 if (__glibc_unlikely ((ch & 0x7f) == 0x20 || ch == 0x7f)) \
821 { \
822 st.ss = 0; \
823 put32 (outptr, ch); \
824 outptr += 4; \
825 ++ inptr; \
826 continue; \
827 } \
828 if (__glibc_unlikely (ch == 0xff)) \
829 { \
830 st.ss = 0; \
831 put32 (outptr, __UNKNOWN_10646_CHAR); \
832 if (irreversible) \
833 ++ *irreversible; \
834 outptr += 4; \
835 ++ inptr; \
836 continue; \
837 } \
838 \
839 if (__glibc_unlikely (st.ss > 0 && (ch & 0x80))) \
840 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
841 \
842 gidx = (st.ss) ? st.ss : (ch & 0x80) ? st.gr : st.gl; \
843 set = st.g[gidx]; \
844 if (set == DRCS0_set || set == KANJI_set || set == JISX0213_1_set \
845 || set == JISX0213_2_set || set == EXTRA_SYMBOLS_set) \
846 { \
847 st.mode = MB_2ND; \
848 st.prev = ch; \
849 } \
850 else \
851 { \
852 uint32_t out; \
853 \
854 st.ss = 0; \
855 if (b24_char_conv(set, (ch & 0x7f), 0, &out) == 0) \
856 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
857 if (out == __UNKNOWN_10646_CHAR && irreversible) \
858 ++ *irreversible; \
859 put32 (outptr, out); \
860 outptr += 4; \
861 } \
862 ++ inptr; \
863 continue; \
864 } \
865 }
866 #include <iconv/loop.c>
867
868
869 /* Next, define the other direction, from UCS-4 to ARIB-STD-B24. */
870
871 /* As MIN_INPUT is 4 (> 1), .cnt & .value must be put aside for skeleton.c.
872 * To reduce the size of the state and fit into mbstate_t,
873 * put constraints on G-set that can be locking-shift'ed to GL/GR.
874 * GL is limited to invoke G0/G1, GR to G2/G3. i.e. LS2,LS3, LS1R are not used.
875 * G0 is fixed to KANJI, G1 to ASCII.
876 * G2 can be either HIRAGANA/JISX0213_{1,2},
877 * G3 can be either KATAKANA/JISX0201_KATA/EXTRA_SYMBOLS.
878 * JISX0213_{1,2},EXTRA_SYMBOLS are invoked into GR by SS2/SS3
879 * if it is not already invoked to GR.
880 * plus, charset is referenced by an index instead of its designation char.
881 */
882 enum gset_idx {
883 KANJI_idx,
884 ASCII_idx,
885 HIRAGANA_idx,
886 KATAKANA_idx,
887 JIS0201_KATA_idx,
888 JISX0213_1_idx,
889 JISX0213_2_idx,
890 EXTRA_SYMBOLS_idx,
891 };
892
893 struct state_to {
894 /* __count */
895 uint32_t cnt:3; /* for use in skelton.c.*/
896 uint32_t gl:1; /* 0: GL<-G0, 1: GL<-G1 */
897 uint32_t gr:1; /* 0: GR<-G2, 1: GR<-G3 */
898 uint32_t g2:3; /* Gset idx which is designated to G0 */
899 uint32_t g3:3; /* same to G1 */
900 uint32_t prev:21; /* previously input, combining char (for JISX0213) */
901
902 /* __value */
903 uint32_t __value; /* used in skeleton.c */
904 } __attribute__((packed));
905
906 static const struct state_to def_state_to = {
907 .cnt = 0,
908 .gl = 0,
909 .gr = 0,
910 .g2 = HIRAGANA_idx,
911 .g3 = KATAKANA_idx,
912 .prev = 0,
913 .__value = 0
914 };
915
916 #define EXTRA_LOOP_DECLS , __mbstate_t *statep
917 #define EXTRA_LOOP_ARGS , statep
918
919 #define INIT_PARAMS \
920 struct state_to st = *((struct state_to *) statep); \
921 if (st.g2 == 0) \
922 st = def_state_to; \
923
924 #define REINIT_PARAMS \
925 do \
926 { \
927 st = *((struct state_to *) statep); \
928 if (st.g2 == 0) \
929 st = def_state_to; \
930 } \
931 while (0)
932
933 #define LOOP_NEED_FLAGS
934
935 #define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
936 #define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
937 #define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
938 #define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
939 #define LOOPFCT TO_LOOP
940
941 /* tables and functions used in BODY */
942
943 /* Composition tables for each of the relevant combining characters. */
944 static const struct
945 {
946 uint16_t base;
947 uint16_t composed;
948 } comp_table_data[] =
949 {
950 #define COMP_TABLE_IDX_02E5 0
951 #define COMP_TABLE_LEN_02E5 1
952 { 0x2b64, 0x2b65 }, /* 0x12B65 = 0x12B64 U+02E5 */
953 #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
954 #define COMP_TABLE_LEN_02E9 1
955 { 0x2b60, 0x2b66 }, /* 0x12B66 = 0x12B60 U+02E9 */
956 #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
957 #define COMP_TABLE_LEN_0300 5
958 { 0x295c, 0x2b44 }, /* 0x12B44 = 0x1295C U+0300 */
959 { 0x2b38, 0x2b48 }, /* 0x12B48 = 0x12B38 U+0300 */
960 { 0x2b37, 0x2b4a }, /* 0x12B4A = 0x12B37 U+0300 */
961 { 0x2b30, 0x2b4c }, /* 0x12B4C = 0x12B30 U+0300 */
962 { 0x2b43, 0x2b4e }, /* 0x12B4E = 0x12B43 U+0300 */
963 #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
964 #define COMP_TABLE_LEN_0301 4
965 { 0x2b38, 0x2b49 }, /* 0x12B49 = 0x12B38 U+0301 */
966 { 0x2b37, 0x2b4b }, /* 0x12B4B = 0x12B37 U+0301 */
967 { 0x2b30, 0x2b4d }, /* 0x12B4D = 0x12B30 U+0301 */
968 { 0x2b43, 0x2b4f }, /* 0x12B4F = 0x12B43 U+0301 */
969 #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
970 #define COMP_TABLE_LEN_309A 14
971 { 0x242b, 0x2477 }, /* 0x12477 = 0x1242B U+309A */
972 { 0x242d, 0x2478 }, /* 0x12478 = 0x1242D U+309A */
973 { 0x242f, 0x2479 }, /* 0x12479 = 0x1242F U+309A */
974 { 0x2431, 0x247a }, /* 0x1247A = 0x12431 U+309A */
975 { 0x2433, 0x247b }, /* 0x1247B = 0x12433 U+309A */
976 { 0x252b, 0x2577 }, /* 0x12577 = 0x1252B U+309A */
977 { 0x252d, 0x2578 }, /* 0x12578 = 0x1252D U+309A */
978 { 0x252f, 0x2579 }, /* 0x12579 = 0x1252F U+309A */
979 { 0x2531, 0x257a }, /* 0x1257A = 0x12531 U+309A */
980 { 0x2533, 0x257b }, /* 0x1257B = 0x12533 U+309A */
981 { 0x253b, 0x257c }, /* 0x1257C = 0x1253B U+309A */
982 { 0x2544, 0x257d }, /* 0x1257D = 0x12544 U+309A */
983 { 0x2548, 0x257e }, /* 0x1257E = 0x12548 U+309A */
984 { 0x2675, 0x2678 }, /* 0x12678 = 0x12675 U+309A */
985 };
986
987 static const uint32_t ucs4_to_nonsp_kanji[][2] = {
988 {0x20dd, 0x227e}, {0x0300, 0x212e}, {0x0301, 0x212d}, {0x0302, 0x2130},
989 {0x0304, 0x2131}, {0x0308, 0x212f}, {0x0332, 0x2132}
990 };
991
992 static const uint32_t ucs4_to_extsym[][2] = {
993 {0x00b2, 0x7c55}, {0x00b3, 0x7c56}, {0x00bc, 0x7d54}, {0x00bd, 0x7d50},
994 {0x00be, 0x7d55}, {0x0fd6, 0x7b2d}, {0x203c, 0x7d6e}, {0x2049, 0x7d6f},
995 {0x2113, 0x7d47}, {0x2116, 0x7d2d}, {0x2121, 0x7d2e}, {0x213b, 0x7c7b},
996 {0x2150, 0x7d5c}, {0x2151, 0x7d5e}, {0x2152, 0x7d5f}, {0x2153, 0x7d52},
997 {0x2154, 0x7d53}, {0x2155, 0x7d56}, {0x2156, 0x7d57}, {0x2157, 0x7d58},
998 {0x2158, 0x7d59}, {0x2159, 0x7d5a}, {0x215a, 0x7d5b}, {0x215b, 0x7d5d},
999 {0x2160, 0x7e21}, {0x2161, 0x7e22}, {0x2162, 0x7e23}, {0x2163, 0x7e24},
1000 {0x2164, 0x7e25}, {0x2165, 0x7e26}, {0x2166, 0x7e27}, {0x2167, 0x7e28},
1001 {0x2168, 0x7e29}, {0x2169, 0x7e2a}, {0x216a, 0x7e2b}, {0x216b, 0x7e2c},
1002 {0x2189, 0x7d51}, {0x2460, 0x7e61}, {0x2461, 0x7e62}, {0x2462, 0x7e63},
1003 {0x2463, 0x7e64}, {0x2464, 0x7e65}, {0x2465, 0x7e66}, {0x2466, 0x7e67},
1004 {0x2467, 0x7e68}, {0x2468, 0x7e69}, {0x2469, 0x7e6a}, {0x246a, 0x7e6b},
1005 {0x246b, 0x7e6c}, {0x246c, 0x7e6d}, {0x246d, 0x7e6e}, {0x246e, 0x7e6f},
1006 {0x246f, 0x7e70}, {0x2470, 0x7e2d}, {0x2471, 0x7e2e}, {0x2472, 0x7e2f},
1007 {0x2473, 0x7e30}, {0x2474, 0x7e31}, {0x2475, 0x7e32}, {0x2476, 0x7e33},
1008 {0x2477, 0x7e34}, {0x2478, 0x7e35}, {0x2479, 0x7e36}, {0x247a, 0x7e37},
1009 {0x247b, 0x7e38}, {0x247c, 0x7e39}, {0x247d, 0x7e3a}, {0x247e, 0x7e3b},
1010 {0x247f, 0x7e3c}, {0x2488, 0x7c31}, {0x2489, 0x7c32}, {0x248a, 0x7c33},
1011 {0x248b, 0x7c34}, {0x248c, 0x7c35}, {0x248d, 0x7c36}, {0x248e, 0x7c37},
1012 {0x248f, 0x7c38}, {0x2490, 0x7c39}, {0x2491, 0x7a4d}, {0x2492, 0x7a4e},
1013 {0x2493, 0x7a4f}, {0x24b9, 0x7b3e}, {0x24c8, 0x7b3f}, {0x24eb, 0x7e7b},
1014 {0x24ec, 0x7e7c}, {0x25b6, 0x7c50}, {0x25c0, 0x7c51}, {0x2600, 0x7d60},
1015 {0x2601, 0x7d61}, {0x2602, 0x7d62}, {0x2603, 0x7d73}, {0x260e, 0x7b4b},
1016 {0x260e, 0x7d7b}, {0x2613, 0x7b26}, {0x2614, 0x7d71}, {0x2616, 0x7d64},
1017 {0x2617, 0x7d65}, {0x2660, 0x7d6b}, {0x2663, 0x7d6a}, {0x2665, 0x7d69},
1018 {0x2666, 0x7d68}, {0x2668, 0x7b31}, {0x266c, 0x7d7a}, {0x2693, 0x7b35},
1019 {0x269e, 0x7d78}, {0x269f, 0x7d79}, {0x26a1, 0x7d75}, {0x26be, 0x7d30},
1020 {0x26bf, 0x7a67}, {0x26c4, 0x7d63}, {0x26c5, 0x7d70}, {0x26c6, 0x7d72},
1021 {0x26c7, 0x7d74}, {0x26c8, 0x7d76}, {0x26c9, 0x7d66}, {0x26ca, 0x7d67},
1022 {0x26cb, 0x7d6c}, {0x26cc, 0x7a21}, {0x26cd, 0x7a22}, {0x26cf, 0x7a24},
1023 {0x26d0, 0x7a25}, {0x26d1, 0x7a26}, {0x26d2, 0x7a28}, {0x26d3, 0x7a2a},
1024 {0x26d4, 0x7a2b}, {0x26d5, 0x7a29}, {0x26d6, 0x7a34}, {0x26d7, 0x7a35},
1025 {0x26d8, 0x7a36}, {0x26d9, 0x7a37}, {0x26da, 0x7a38}, {0x26db, 0x7a39},
1026 {0x26dc, 0x7a3a}, {0x26dd, 0x7a3b}, {0x26de, 0x7a3c}, {0x26df, 0x7a3d},
1027 {0x26e0, 0x7a3e}, {0x26e1, 0x7a3f}, {0x26e3, 0x7b21}, {0x26e8, 0x7b29},
1028 {0x26e9, 0x7b2c}, {0x26ea, 0x7b2e}, {0x26eb, 0x7b2f}, {0x26ec, 0x7b30},
1029 {0x26ed, 0x7b32}, {0x26ee, 0x7b33}, {0x26ef, 0x7b34}, {0x26f0, 0x7b37},
1030 {0x26f1, 0x7b38}, {0x26f2, 0x7b39}, {0x26f3, 0x7b3a}, {0x26f4, 0x7b3b},
1031 {0x26f5, 0x7b3c}, {0x26f6, 0x7b40}, {0x26f7, 0x7b46}, {0x26f8, 0x7b47},
1032 {0x26f9, 0x7b48}, {0x26fa, 0x7b49}, {0x26fb, 0x7b4c}, {0x26fc, 0x7b4d},
1033 {0x26fd, 0x7b4e}, {0x26fe, 0x7b4f}, {0x26ff, 0x7b51}, {0x2762, 0x7a23},
1034 {0x2776, 0x7e71}, {0x2777, 0x7e72}, {0x2778, 0x7e73}, {0x2779, 0x7e74},
1035 {0x277a, 0x7e75}, {0x277b, 0x7e76}, {0x277c, 0x7e77}, {0x277d, 0x7e78},
1036 {0x277e, 0x7e79}, {0x277f, 0x7e7a}, {0x27a1, 0x7c21}, {0x27d0, 0x7c54},
1037 {0x2a00, 0x7d6d}, {0x2b05, 0x7c22}, {0x2b06, 0x7c23}, {0x2b07, 0x7c24},
1038 {0x2b1b, 0x7a60}, {0x2b24, 0x7a61}, {0x2b2e, 0x7c26}, {0x2b2f, 0x7c25},
1039 {0x2b55, 0x7a40}, {0x2b56, 0x7b22}, {0x2b57, 0x7b23}, {0x2b58, 0x7b24},
1040 {0x2b59, 0x7b25}, {0x3012, 0x7b28}, {0x3016, 0x7c52}, {0x3017, 0x7c53},
1041 {0x3036, 0x7d2f}, {0x322a, 0x7d21}, {0x322b, 0x7d22}, {0x322c, 0x7d23},
1042 {0x322d, 0x7d24}, {0x322e, 0x7d25}, {0x322f, 0x7d26}, {0x3230, 0x7d27},
1043 {0x3231, 0x7c4d}, {0x3232, 0x7c4c}, {0x3233, 0x7c4a}, {0x3236, 0x7c4b},
1044 {0x3237, 0x7d28}, {0x3239, 0x7c4e}, {0x3244, 0x7c4f}, {0x3245, 0x7b2b},
1045 {0x3246, 0x7b2a}, {0x3247, 0x7c78}, {0x3248, 0x7a41}, {0x3249, 0x7a42},
1046 {0x324a, 0x7a43}, {0x324b, 0x7a44}, {0x324c, 0x7a45}, {0x324d, 0x7a46},
1047 {0x324e, 0x7a47}, {0x324f, 0x7a48}, {0x3251, 0x7e3d}, {0x3252, 0x7e3e},
1048 {0x3253, 0x7e3f}, {0x3254, 0x7e40}, {0x3255, 0x7e5b}, {0x3256, 0x7e5c},
1049 {0x3257, 0x7e5d}, {0x3258, 0x7e5e}, {0x3259, 0x7e5f}, {0x325a, 0x7e60},
1050 {0x325b, 0x7e7d}, {0x328b, 0x7b27}, {0x3299, 0x7a73}, {0x3371, 0x7d4d},
1051 {0x337b, 0x7d2c}, {0x337c, 0x7d2b}, {0x337d, 0x7d2a}, {0x337e, 0x7d29},
1052 {0x338f, 0x7d48}, {0x3390, 0x7d49}, {0x339d, 0x7c2d}, {0x339e, 0x7d4b},
1053 {0x33a0, 0x7c2e}, {0x33a1, 0x7c2b}, {0x33a2, 0x7d4c}, {0x33a4, 0x7c2f},
1054 {0x33a5, 0x7c2c}, {0x33ca, 0x7d4a}, {0x3402, 0x7521}, {0x351f, 0x752a},
1055 {0x37e2, 0x7541}, {0x3eda, 0x7574}, {0x4093, 0x7578}, {0x4103, 0x757e},
1056 {0x4264, 0x7626}, {0x4efd, 0x7523}, {0x4eff, 0x7524}, {0x4f9a, 0x7525},
1057 {0x4fc9, 0x7526}, {0x509c, 0x7527}, {0x511e, 0x7528}, {0x5186, 0x7c2a},
1058 {0x51bc, 0x7529}, {0x5307, 0x752b}, {0x5361, 0x752c}, {0x536c, 0x752d},
1059 {0x544d, 0x7530}, {0x5496, 0x7531}, {0x549c, 0x7532}, {0x54a9, 0x7533},
1060 {0x550e, 0x7534}, {0x554a, 0x7535}, {0x5672, 0x7536}, {0x56e4, 0x7537},
1061 {0x5733, 0x7538}, {0x5734, 0x7539}, {0x5880, 0x753b}, {0x59e4, 0x753c},
1062 {0x5a23, 0x753d}, {0x5a55, 0x753e}, {0x5bec, 0x753f}, {0x5e74, 0x7c27},
1063 {0x5eac, 0x7542}, {0x5f34, 0x7543}, {0x5f45, 0x7544}, {0x5fb7, 0x7545},
1064 {0x6017, 0x7546}, {0x6130, 0x7548}, {0x65e5, 0x7c29}, {0x6624, 0x7549},
1065 {0x66c8, 0x754a}, {0x66d9, 0x754b}, {0x66fa, 0x754c}, {0x66fb, 0x754d},
1066 {0x6708, 0x7c28}, {0x6852, 0x754e}, {0x6911, 0x7550}, {0x693b, 0x7551},
1067 {0x6a45, 0x7552}, {0x6a91, 0x7553}, {0x6adb, 0x7554}, {0x6bf1, 0x7558},
1068 {0x6ce0, 0x7559}, {0x6d2e, 0x755a}, {0x6dbf, 0x755c}, {0x6dca, 0x755d},
1069 {0x6df8, 0x755e}, {0x6f5e, 0x7560}, {0x6ff9, 0x7561}, {0x7064, 0x7562},
1070 {0x7147, 0x7565}, {0x71c1, 0x7566}, {0x7200, 0x7567}, {0x739f, 0x7568},
1071 {0x73a8, 0x7569}, {0x73c9, 0x756a}, {0x73d6, 0x756b}, {0x741b, 0x756c},
1072 {0x7421, 0x756d}, {0x7426, 0x756f}, {0x742a, 0x7570}, {0x742c, 0x7571},
1073 {0x7439, 0x7572}, {0x744b, 0x7573}, {0x7575, 0x7575}, {0x7581, 0x7576},
1074 {0x7772, 0x7577}, {0x78c8, 0x7579}, {0x78e0, 0x757a}, {0x7947, 0x757b},
1075 {0x79ae, 0x757c}, {0x79da, 0x7622}, {0x7a1e, 0x7623}, {0x7b7f, 0x7624},
1076 {0x7c31, 0x7625}, {0x7d8b, 0x7627}, {0x7fa1, 0x7628}, {0x8118, 0x7629},
1077 {0x813a, 0x762a}, {0x82ae, 0x762c}, {0x845b, 0x762d}, {0x84dc, 0x762e},
1078 {0x84ec, 0x762f}, {0x8559, 0x7630}, {0x85ce, 0x7631}, {0x8755, 0x7632},
1079 {0x87ec, 0x7633}, {0x880b, 0x7634}, {0x88f5, 0x7635}, {0x89d2, 0x7636},
1080 {0x8a79, 0x752e}, {0x8af6, 0x7637}, {0x8dce, 0x7638}, {0x8fbb, 0x7639},
1081 {0x8ff6, 0x763a}, {0x90dd, 0x763b}, {0x9127, 0x763c}, {0x912d, 0x763d},
1082 {0x91b2, 0x763e}, {0x9233, 0x763f}, {0x9288, 0x7640}, {0x9321, 0x7641},
1083 {0x9348, 0x7642}, {0x9592, 0x7643}, {0x96de, 0x7644}, {0x9903, 0x7645},
1084 {0x9940, 0x7646}, {0x9ad9, 0x7647}, {0x9bd6, 0x7648}, {0x9dd7, 0x7649},
1085 {0x9eb4, 0x764a}, {0x9eb5, 0x764b}, {0x9fc4, 0x754f}, {0x9fc5, 0x7621},
1086 {0x9fc6, 0x757d}, {0xfa10, 0x753a}, {0xfa11, 0x7540}, {0xfa45, 0x755b},
1087 {0xfa46, 0x755f}, {0xfa4a, 0x756e}, {0xfa6b, 0x7547}, {0xfa6c, 0x7563},
1088 {0xfa6d, 0x762b}, {0x1f100, 0x7c30}, {0x1f101, 0x7c40}, {0x1f102, 0x7c41},
1089 {0x1f103, 0x7c42}, {0x1f104, 0x7c43}, {0x1f105, 0x7c44}, {0x1f106, 0x7c45},
1090 {0x1f107, 0x7c46}, {0x1f108, 0x7c47}, {0x1f109, 0x7c48}, {0x1f10a, 0x7c49},
1091 {0x1f110, 0x7e41}, {0x1f111, 0x7e42}, {0x1f112, 0x7e43}, {0x1f113, 0x7e44},
1092 {0x1f114, 0x7e45}, {0x1f115, 0x7e46}, {0x1f116, 0x7e47}, {0x1f117, 0x7e48},
1093 {0x1f118, 0x7e49}, {0x1f119, 0x7e4a}, {0x1f11a, 0x7e4b}, {0x1f11b, 0x7e4c},
1094 {0x1f11c, 0x7e4d}, {0x1f11d, 0x7e4e}, {0x1f11e, 0x7e4f}, {0x1f11f, 0x7e50},
1095 {0x1f120, 0x7e51}, {0x1f121, 0x7e52}, {0x1f122, 0x7e53}, {0x1f123, 0x7e54},
1096 {0x1f124, 0x7e55}, {0x1f125, 0x7e56}, {0x1f126, 0x7e57}, {0x1f127, 0x7e58},
1097 {0x1f128, 0x7e59}, {0x1f129, 0x7e5a}, {0x1f12a, 0x7d3a}, {0x1f12b, 0x7c77},
1098 {0x1f12c, 0x7c76}, {0x1f12d, 0x7c57}, {0x1f131, 0x7a5e}, {0x1f13d, 0x7a5f},
1099 {0x1f13f, 0x7a52}, {0x1f142, 0x7a59}, {0x1f146, 0x7a53}, {0x1f14a, 0x7a50},
1100 {0x1f14b, 0x7a54}, {0x1f14c, 0x7a51}, {0x1f14d, 0x7a5d}, {0x1f14e, 0x7a72},
1101 {0x1f157, 0x7b3d}, {0x1f15f, 0x7b41}, {0x1f179, 0x7b45}, {0x1f17b, 0x7b4a},
1102 {0x1f17c, 0x7b50}, {0x1f17f, 0x7a30}, {0x1f18a, 0x7a31}, {0x1f18b, 0x7b42},
1103 {0x1f18c, 0x7b44}, {0x1f18d, 0x7b43}, {0x1f190, 0x7c79}, {0x1f200, 0x7a74},
1104 {0x1f210, 0x7a55}, {0x1f211, 0x7a56}, {0x1f212, 0x7a57}, {0x1f213, 0x7a58},
1105 {0x1f214, 0x7a5a}, {0x1f214, 0x7d3e}, {0x1f215, 0x7a5b}, {0x1f216, 0x7a5c},
1106 {0x1f217, 0x7a62}, {0x1f218, 0x7a63}, {0x1f219, 0x7a64}, {0x1f21a, 0x7a65},
1107 {0x1f21b, 0x7a66}, {0x1f21c, 0x7a68}, {0x1f21d, 0x7a69}, {0x1f21e, 0x7a6a},
1108 {0x1f21f, 0x7a6b}, {0x1f220, 0x7a6c}, {0x1f221, 0x7a6d}, {0x1f222, 0x7a6e},
1109 {0x1f223, 0x7a6f}, {0x1f224, 0x7a70}, {0x1f225, 0x7a71}, {0x1f226, 0x7c7a},
1110 {0x1f227, 0x7d3b}, {0x1f228, 0x7d3c}, {0x1f229, 0x7d3d}, {0x1f22a, 0x7d3f},
1111 {0x1f22b, 0x7d40}, {0x1f22c, 0x7d41}, {0x1f22d, 0x7d42}, {0x1f22e, 0x7d43},
1112 {0x1f22f, 0x7d44}, {0x1f230, 0x7d45}, {0x1f231, 0x7d46}, {0x1f240, 0x7d31},
1113 {0x1f241, 0x7d32}, {0x1f242, 0x7d33}, {0x1f243, 0x7d34}, {0x1f244, 0x7d35},
1114 {0x1f245, 0x7d36}, {0x1f246, 0x7d37}, {0x1f247, 0x7d38}, {0x1f248, 0x7d39},
1115 {0x1f6e7, 0x7b36}, {0x20158, 0x7522}, {0x20bb7, 0x752f}, {0x233cc, 0x7555},
1116 {0x233fe, 0x7556}, {0x235c4, 0x7557}, {0x242ee, 0x7564}
1117 };
1118
1119 static int
out_ascii(struct state_to * st,uint32_t ch,unsigned char ** outptr,const unsigned char * outend)1120 out_ascii (struct state_to *st, uint32_t ch,
1121 unsigned char **outptr, const unsigned char *outend)
1122 {
1123 size_t esc_seqs;
1124 unsigned char *op = *outptr;
1125
1126 esc_seqs = 0;
1127 if ((ch & 0x60) && st->gl == 0 && ch != 0x20 && ch != 0x7f && ch != 0xa0)
1128 ++ esc_seqs;
1129
1130 if (__glibc_unlikely (op + esc_seqs + 1 > outend))
1131 return __GCONV_FULL_OUTPUT;
1132
1133 if (esc_seqs > 0)
1134 {
1135 *op++ = LS1;
1136 st->gl = 1;
1137 }
1138 *op++ = ch & 0xff;
1139 if (ch == 0 || ch == LF)
1140 *st = def_state_to;
1141 *outptr = op;
1142 return __GCONV_OK;
1143 }
1144
1145 static int
out_jisx0201(struct state_to * st,uint32_t ch,unsigned char ** outptr,const unsigned char * outend)1146 out_jisx0201 (struct state_to *st, uint32_t ch,
1147 unsigned char **outptr, const unsigned char *outend)
1148 {
1149 size_t esc_seqs;
1150 unsigned char *op = *outptr;
1151
1152 esc_seqs = 0;
1153 if (st->g3 != JIS0201_KATA_idx)
1154 esc_seqs += 3;
1155 if (st->gr == 0) /* need LS3R */
1156 esc_seqs += 2;
1157
1158 if (__glibc_unlikely (op + esc_seqs + 1 > outend))
1159 return __GCONV_FULL_OUTPUT;
1160
1161 if (esc_seqs >= 3)
1162 {
1163 /* need charset designation */
1164 *op++ = ESC;
1165 *op++ = '\x2b'; /* designate single byte charset to G3 */
1166 *op++ = JIS0201_KATA_set;
1167 st->g3 = JIS0201_KATA_idx;
1168 }
1169 if (esc_seqs == 2 || esc_seqs == 5)
1170 {
1171 *op++ = ESC;
1172 *op++ = LS3R;
1173 st->gr = 1;
1174 }
1175 *op++ = ch & 0xff;
1176 *outptr = op;
1177 return __GCONV_OK;
1178 }
1179
1180 static int
out_katakana(struct state_to * st,unsigned char ch,unsigned char ** outptr,const unsigned char * outend)1181 out_katakana (struct state_to *st, unsigned char ch,
1182 unsigned char **outptr, const unsigned char *outend)
1183 {
1184 size_t esc_seqs;
1185 unsigned char *op = *outptr;
1186
1187 esc_seqs = 0;
1188 if (st->g3 != KATAKANA_idx)
1189 esc_seqs += 3;
1190 if (st->gr == 0) /* need LS3R */
1191 esc_seqs += 2;
1192
1193 if (__glibc_unlikely (op + esc_seqs + 1 > outend))
1194 return __GCONV_FULL_OUTPUT;
1195
1196 if (esc_seqs >= 3)
1197 {
1198 /* need charset designation */
1199 *op++ = ESC;
1200 *op++ = '\x2b'; /* designate single byte charset to G3 */
1201 *op++ = KATAKANA_set;
1202 st->g3 = KATAKANA_idx;
1203 }
1204 if (esc_seqs == 2 || esc_seqs == 5)
1205 {
1206 *op++ = ESC;
1207 *op++ = LS3R;
1208 st->gr = 1;
1209 }
1210 *op++ = ch | 0x80;
1211 *outptr = op;
1212 return __GCONV_OK;
1213 }
1214
1215 static int
out_hiragana(struct state_to * st,unsigned char ch,unsigned char ** outptr,const unsigned char * outend)1216 out_hiragana (struct state_to *st, unsigned char ch,
1217 unsigned char **outptr, const unsigned char *outend)
1218 {
1219 size_t esc_seqs;
1220 unsigned char *op = *outptr;
1221
1222 esc_seqs = 0;
1223 if (st->g2 != HIRAGANA_idx)
1224 esc_seqs += 3;
1225 if (st->gr == 1) /* need LS2R */
1226 esc_seqs += 2;
1227
1228 if (__glibc_unlikely (op + esc_seqs + 1 > outend))
1229 return __GCONV_FULL_OUTPUT;
1230
1231 if (esc_seqs >= 3)
1232 {
1233 /* need charset designation */
1234 *op++ = ESC;
1235 *op++ = '\x2a'; /* designate single byte charset to G2 */
1236 *op++ = HIRAGANA_set;
1237 st->g2 = HIRAGANA_idx;
1238 }
1239 if (esc_seqs == 2 || esc_seqs == 5)
1240 {
1241 *op++ = ESC;
1242 *op++ = LS2R;
1243 st->gr = 0;
1244 }
1245 *op++ = ch | 0x80;
1246 *outptr = op;
1247 return __GCONV_OK;
1248 }
1249
1250 static int
is_kana_punc(uint32_t ch)1251 is_kana_punc (uint32_t ch)
1252 {
1253 int i;
1254 size_t len;
1255
1256 len = NELEMS (hira_punc);
1257 for (i = 0; i < len; i++)
1258 if (ch == hira_punc[i])
1259 return i;
1260
1261 len = NELEMS (kata_punc);
1262 for (i = 0; i < len; i++)
1263 if (ch == kata_punc[i])
1264 return i + NELEMS (hira_punc);
1265 return -1;
1266 }
1267
1268 static int
out_kana_punc(struct state_to * st,int idx,unsigned char ** outptr,const unsigned char * outend)1269 out_kana_punc (struct state_to *st, int idx,
1270 unsigned char **outptr, const unsigned char *outend)
1271 {
1272 size_t len = NELEMS (hira_punc);
1273
1274 if (idx < len)
1275 return out_hiragana (st, 0x77 + idx, outptr, outend);
1276 idx -= len;
1277 if (idx >= 2)
1278 {
1279 /* common punc. symbols shared by katakana/hiragana */
1280 /* guess which is used currently */
1281 if (st->gr == 0 && st->g2 == HIRAGANA_idx)
1282 return out_hiragana (st, 0x77 + idx, outptr, outend);
1283 else if (st->gr == 1 && st->g3 == KATAKANA_idx)
1284 return out_katakana (st, 0x77 + idx, outptr, outend);
1285 else if (st->g2 == HIRAGANA_idx && st->g3 != KATAKANA_idx)
1286 return out_hiragana (st, 0x77 + idx, outptr, outend);
1287 /* fall through */
1288 }
1289 return out_katakana (st, 0x77 + idx, outptr, outend);
1290 }
1291
1292 static int
out_kanji(struct state_to * st,uint32_t ch,unsigned char ** outptr,const unsigned char * outend)1293 out_kanji (struct state_to *st, uint32_t ch,
1294 unsigned char **outptr, const unsigned char *outend)
1295 {
1296 size_t esc_seqs;
1297 unsigned char *op = *outptr;
1298
1299 esc_seqs = 0;
1300 if (st->gl)
1301 ++ esc_seqs;
1302
1303 if (__glibc_unlikely (op + esc_seqs + 2 > outend))
1304 return __GCONV_FULL_OUTPUT;
1305
1306 if (st->gl)
1307 {
1308 *op++ = LS0;
1309 st->gl = 0;
1310 }
1311 *op++ = (ch >> 8) & 0x7f;
1312 *op++ = ch & 0x7f;
1313 *outptr = op;
1314 return __GCONV_OK;
1315 }
1316
1317 /* convert JISX0213_{1,2} to ARIB-STD-B24 */
1318 /* assert(set_idx == JISX0213_1_idx || set_idx == JISX0213_2_idx); */
1319 static int
out_jisx0213(struct state_to * st,uint32_t ch,int set_idx,unsigned char ** outptr,const unsigned char * outend)1320 out_jisx0213 (struct state_to *st, uint32_t ch, int set_idx,
1321 unsigned char **outptr, const unsigned char *outend)
1322 {
1323 size_t esc_seqs;
1324 unsigned char *op = *outptr;
1325
1326 esc_seqs = 0;
1327 if (st->g2 != set_idx)
1328 esc_seqs += 4; /* designate to G2 */
1329 if (st->gr) /* if GR does not designate G2 */
1330 esc_seqs ++; /* SS3 */
1331
1332 if (__glibc_unlikely (op + esc_seqs + 2 > outend))
1333 return __GCONV_FULL_OUTPUT;
1334
1335 if (esc_seqs >= 4)
1336 {
1337 /* need charset designation */
1338 *op++ = ESC;
1339 *op++ = '\x24'; /* designate multibyte charset */
1340 *op++ = '\x2a'; /* to G2 */
1341 *op++ = (set_idx == JISX0213_1_idx) ? JISX0213_1_set : JISX0213_2_set;
1342 st->g2 = JISX0213_1_idx;
1343 }
1344 if (st->gr)
1345 *op++ = SS2; /* GR designates G3 now. insert SS2 */
1346 else
1347 ch |= 0x8080; /* use GR(G2) */
1348 *op++ = (ch >> 8) & 0xff;
1349 *op++ = ch & 0xff;
1350 *outptr = op;
1351 return __GCONV_OK;
1352 }
1353
1354 static int
out_extsym(struct state_to * st,uint32_t ch,unsigned char ** outptr,const unsigned char * outend)1355 out_extsym (struct state_to *st, uint32_t ch,
1356 unsigned char **outptr, const unsigned char *outend)
1357 {
1358 size_t esc_seqs;
1359 unsigned char *op = *outptr;
1360
1361 esc_seqs = 0;
1362 if (st->g3 != EXTRA_SYMBOLS_idx)
1363 esc_seqs += 4;
1364 if (st->gr == 0) /* if GR designates G2, use SS3 */
1365 ++ esc_seqs;
1366
1367 if (__glibc_unlikely (op + esc_seqs + 2 > outend))
1368 return __GCONV_FULL_OUTPUT;
1369
1370 if (esc_seqs >= 4)
1371 {
1372 /* need charset designation */
1373 *op++ = ESC;
1374 *op++ = '\x24'; /* designate multibyte charset */
1375 *op++ = '\x2b'; /* to G3 */
1376 *op++ = EXTRA_SYMBOLS_set;
1377 st->g3 = EXTRA_SYMBOLS_idx;
1378 }
1379 if (st->gr == 0)
1380 *op++ = SS3;
1381 else
1382 ch |= 0x8080;
1383 *op++ = (ch >> 8) & 0xff;
1384 *op++ = ch & 0xff;
1385 *outptr = op;
1386 return __GCONV_OK;
1387 }
1388
1389 static int
out_buffered(struct state_to * st,unsigned char ** outptr,const unsigned char * outend)1390 out_buffered (struct state_to *st,
1391 unsigned char **outptr, const unsigned char *outend)
1392 {
1393 int r;
1394
1395 if (st->prev == 0)
1396 return __GCONV_OK;
1397
1398 if (st->prev >> 16)
1399 r = out_jisx0213 (st, st->prev & 0x7f7f, JISX0213_1_idx, outptr, outend);
1400 else if ((st->prev & 0x7f00) == 0x2400)
1401 r = out_hiragana (st, st->prev, outptr, outend);
1402 else if ((st->prev & 0x7f00) == 0x2500)
1403 r = out_katakana (st, st->prev, outptr, outend);
1404 else /* should not be reached */
1405 r = out_kanji (st, st->prev, outptr, outend);
1406
1407 st->prev = 0;
1408 return r;
1409 }
1410
1411 static int
cmp_u32(const void * a,const void * b)1412 cmp_u32 (const void *a, const void *b)
1413 {
1414 return *(const uint32_t *)a - *(const uint32_t *)b;
1415 }
1416
1417 static int
find_extsym_idx(uint32_t ch)1418 find_extsym_idx (uint32_t ch)
1419 {
1420 const uint32_t (*p)[2];
1421
1422 p = bsearch (&ch, ucs4_to_extsym,
1423 NELEMS (ucs4_to_extsym), sizeof (ucs4_to_extsym[0]), cmp_u32);
1424 return p ? (p - ucs4_to_extsym) : -1;
1425 }
1426
1427 #define BODY \
1428 { \
1429 uint32_t ch, jch; \
1430 unsigned char buf[2]; \
1431 int r; \
1432 \
1433 ch = get32 (inptr); \
1434 if (st.prev != 0) \
1435 { \
1436 /* Attempt to combine the last character with this one. */ \
1437 unsigned int idx; \
1438 unsigned int len; \
1439 \
1440 if (ch == 0x02e5) \
1441 idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5; \
1442 else if (ch == 0x02e9) \
1443 idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9; \
1444 else if (ch == 0x0300) \
1445 idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300; \
1446 else if (ch == 0x0301) \
1447 idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301; \
1448 else if (ch == 0x309a) \
1449 idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A; \
1450 else \
1451 idx = 0, len = 0; \
1452 \
1453 for (;len > 0; ++idx, --len) \
1454 if (comp_table_data[idx].base == (st.prev & 0x7f7f)) \
1455 break; \
1456 \
1457 if (len > 0) \
1458 { \
1459 /* Output the combined character. */ \
1460 /* We know the combined character is in JISX0213 plane 1 */ \
1461 r = out_jisx0213 (&st, comp_table_data[idx].composed, \
1462 JISX0213_1_idx, &outptr, outend); \
1463 st.prev = 0; \
1464 goto next; \
1465 } \
1466 \
1467 /* not a combining character */ \
1468 /* Output the buffered character. */ \
1469 /* We know it is in JISX0208(HIRA/KATA) or in JISX0213 plane 1. */ \
1470 r = out_buffered (&st, &outptr, outend); \
1471 if (r != __GCONV_OK) \
1472 { \
1473 result = r; \
1474 break; \
1475 } \
1476 /* fall through & output the current character (ch). */ \
1477 } \
1478 \
1479 /* ASCII or C0/C1 or NBSP */ \
1480 if (ch <= 0xa0) \
1481 { \
1482 if ((ch & 0x60) || ch == 0 || ch == LF || ch == CR || ch == BS) \
1483 r = out_ascii (&st, ch, &outptr, outend); \
1484 else \
1485 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1486 goto next; \
1487 } \
1488 \
1489 /* half-width KATAKANA */ \
1490 if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR) \
1491 { \
1492 if (__glibc_unlikely (buf[0] < 0x80)) /* yen sign or overline */ \
1493 r = out_ascii (&st, buf[0], &outptr, outend); \
1494 else \
1495 r = out_jisx0201 (&st, buf[0], &outptr, outend); \
1496 goto next; \
1497 } \
1498 \
1499 /* check kana punct. symbols (prefer 1-Byte charset over KANJI_set) */ \
1500 r = is_kana_punc (ch); \
1501 if (r >= 0) \
1502 { \
1503 r = out_kana_punc (&st, r, &outptr, outend); \
1504 goto next; \
1505 } \
1506 \
1507 if (ch >= ucs4_to_nonsp_kanji[0][0] && \
1508 ch <= ucs4_to_nonsp_kanji[NELEMS (ucs4_to_nonsp_kanji) - 1][0]) \
1509 { \
1510 int i; \
1511 \
1512 for (i = 0; i < NELEMS (ucs4_to_nonsp_kanji); i++) \
1513 { \
1514 if (ch < ucs4_to_nonsp_kanji[i][0]) \
1515 break; \
1516 else if (ch == ucs4_to_nonsp_kanji[i][0]) \
1517 { \
1518 r = out_kanji (&st, ucs4_to_nonsp_kanji[i][1], \
1519 &outptr, outend); \
1520 goto next; \
1521 } \
1522 } \
1523 } \
1524 \
1525 jch = ucs4_to_jisx0213 (ch); \
1526 \
1527 if (ucs4_to_jisx0208 (ch, buf, 2) != __UNKNOWN_10646_CHAR) \
1528 { \
1529 if (jch & 0x0080) \
1530 { \
1531 /* A possible match in comp_table_data. Buffer it. */ \
1532 \
1533 /* We know it's a JISX 0213 plane 1 character. */ \
1534 assert ((jch & 0x8000) == 0); \
1535 \
1536 st.prev = jch & 0x7f7f; \
1537 r = __GCONV_OK; \
1538 goto next; \
1539 } \
1540 /* check HIRAGANA/KATAKANA (prefer 1-Byte charset over KANJI_set) */ \
1541 if (buf[0] == 0x24) \
1542 r = out_hiragana (&st, buf[1], &outptr, outend); \
1543 else if (buf[0] == 0x25) \
1544 r = out_katakana (&st, buf[1], &outptr, outend); \
1545 else if (jch == 0x227e || (jch >= 0x212d && jch <= 0x2132)) \
1546 r = out_jisx0213 (&st, jch, JISX0213_1_idx, &outptr, outend); \
1547 else \
1548 r = out_kanji (&st, jch, &outptr, outend); \
1549 goto next; \
1550 } \
1551 \
1552 if (jch & 0x0080) \
1553 { \
1554 st.prev = (jch & 0x7f7f) | 0x10000; \
1555 r = __GCONV_OK; \
1556 goto next; \
1557 } \
1558 \
1559 /* KANJI shares some chars with EXTRA_SYMBOLS, but prefer extra symbols*/ \
1560 r = find_extsym_idx (ch); \
1561 if (r >= 0) \
1562 { \
1563 ch = ucs4_to_extsym[r][1]; \
1564 r = out_extsym (&st, ch, &outptr, outend); \
1565 goto next; \
1566 } \
1567 \
1568 if (jch != 0) \
1569 { \
1570 r = out_jisx0213 (&st, jch & 0x7f7f, \
1571 (jch & 0x8000) ? JISX0213_2_idx : JISX0213_1_idx, \
1572 &outptr, outend); \
1573 goto next; \
1574 } \
1575 \
1576 UNICODE_TAG_HANDLER (ch, 4); \
1577 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1578 \
1579 next: \
1580 if (r != __GCONV_OK) \
1581 { \
1582 result = r; \
1583 break; \
1584 } \
1585 inptr += 4; \
1586 }
1587 #include <iconv/loop.c>
1588
1589 /* Now define the toplevel functions. */
1590 #include <iconv/skeleton.c>
1591