• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * _codecs_hk.c: Codecs collection for encodings from Hong Kong
3  *
4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
5  */
6 
7 #define USING_IMPORTED_MAPS
8 
9 #include "cjkcodecs.h"
10 #include "mappings_hk.h"
11 
12 /*
13  * BIG5HKSCS codec
14  */
15 
16 static const encode_map *big5_encmap = NULL;
17 static const decode_map *big5_decmap = NULL;
18 
CODEC_INIT(big5hkscs)19 CODEC_INIT(big5hkscs)
20 {
21     static int initialized = 0;
22 
23     if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
24         return -1;
25     initialized = 1;
26     return 0;
27 }
28 
29 /*
30  * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
31  *  U+00CA U+0304 -> 8862  (U+00CA alone is mapped to 8866)
32  *  U+00CA U+030C -> 8864
33  *  U+00EA U+0304 -> 88a3  (U+00EA alone is mapped to 88a7)
34  *  U+00EA U+030C -> 88a5
35  * These are handled by not mapping tables but a hand-written code.
36  */
37 static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
38 
ENCODER(big5hkscs)39 ENCODER(big5hkscs)
40 {
41     while (inleft > 0) {
42         ucs4_t c = **inbuf;
43         DBCHAR code;
44         Py_ssize_t insize;
45 
46         if (c < 0x80) {
47             REQUIRE_OUTBUF(1)
48             **outbuf = (unsigned char)c;
49             NEXT(1, 1)
50             continue;
51         }
52 
53         DECODE_SURROGATE(c)
54         insize = GET_INSIZE(c);
55 
56         REQUIRE_OUTBUF(2)
57 
58         if (c < 0x10000) {
59             TRYMAP_ENC(big5hkscs_bmp, code, c) {
60                 if (code == MULTIC) {
61                     if (inleft >= 2 &&
62                         ((c & 0xffdf) == 0x00ca) &&
63                         (((*inbuf)[1] & 0xfff7) == 0x0304)) {
64                         code = big5hkscs_pairenc_table[
65                             ((c >> 4) |
66                              ((*inbuf)[1] >> 3)) & 3];
67                         insize = 2;
68                     }
69                     else if (inleft < 2 &&
70                              !(flags & MBENC_FLUSH))
71                         return MBERR_TOOFEW;
72                     else {
73                         if (c == 0xca)
74                             code = 0x8866;
75                         else /* c == 0xea */
76                             code = 0x88a7;
77                     }
78                 }
79             }
80             else TRYMAP_ENC(big5, code, c);
81             else return 1;
82         }
83         else if (c < 0x20000)
84             return insize;
85         else if (c < 0x30000) {
86             TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
87             else return insize;
88         }
89         else
90             return insize;
91 
92         OUT1(code >> 8)
93         OUT2(code & 0xFF)
94         NEXT(insize, 2)
95     }
96 
97     return 0;
98 }
99 
100 #define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
101 
DECODER(big5hkscs)102 DECODER(big5hkscs)
103 {
104     while (inleft > 0) {
105         unsigned char c = IN1;
106         ucs4_t decoded;
107 
108         REQUIRE_OUTBUF(1)
109 
110         if (c < 0x80) {
111             OUT1(c)
112             NEXT(1, 1)
113             continue;
114         }
115 
116         REQUIRE_INBUF(2)
117 
118         if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
119             TRYMAP_DEC(big5, **outbuf, c, IN2) {
120                 NEXT(2, 1)
121                 continue;
122             }
123         }
124 
125         TRYMAP_DEC(big5hkscs, decoded, c, IN2)
126         {
127             int s = BH2S(c, IN2);
128             const unsigned char *hintbase;
129 
130             assert(0x87 <= c && c <= 0xfe);
131             assert(0x40 <= IN2 && IN2 <= 0xfe);
132 
133             if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
134                     hintbase = big5hkscs_phint_0;
135                     s -= BH2S(0x87, 0x40);
136             }
137             else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
138                     hintbase = big5hkscs_phint_12130;
139                     s -= BH2S(0xc6, 0xa1);
140             }
141             else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
142                     hintbase = big5hkscs_phint_21924;
143                     s -= BH2S(0xf9, 0xd6);
144             }
145             else
146                     return MBERR_INTERNAL;
147 
148             if (hintbase[s >> 3] & (1 << (s & 7))) {
149                     WRITEUCS4(decoded | 0x20000)
150                     NEXT_IN(2)
151             }
152             else {
153                     OUT1(decoded)
154                     NEXT(2, 1)
155             }
156             continue;
157         }
158 
159         switch ((c << 8) | IN2) {
160         case 0x8862: WRITE2(0x00ca, 0x0304); break;
161         case 0x8864: WRITE2(0x00ca, 0x030c); break;
162         case 0x88a3: WRITE2(0x00ea, 0x0304); break;
163         case 0x88a5: WRITE2(0x00ea, 0x030c); break;
164         default: return 2;
165         }
166 
167         NEXT(2, 2) /* all decoded code points are pairs, above. */
168     }
169 
170     return 0;
171 }
172 
173 
174 BEGIN_MAPPINGS_LIST
175   MAPPING_DECONLY(big5hkscs)
176   MAPPING_ENCONLY(big5hkscs_bmp)
177   MAPPING_ENCONLY(big5hkscs_nonbmp)
178 END_MAPPINGS_LIST
179 
180 BEGIN_CODECS_LIST
181   CODEC_STATELESS_WINIT(big5hkscs)
182 END_CODECS_LIST
183 
184 I_AM_A_MODULE_FOR(hk)
185