• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * _codecs_hk.c: Codecs collection for encodings from Hong Kong
3  *
4  * Written by Hye-Shik Chang <perky@FreeBSD.org>
5  */
6 
7 #define USING_IMPORTED_MAPS
8 
9 #include "cjkcodecs.h"
10 #include "mappings_hk.h"
11 
12 /*
13  * BIG5HKSCS codec
14  */
15 
16 static const encode_map *big5_encmap = NULL;
17 static const decode_map *big5_decmap = NULL;
18 
CODEC_INIT(big5hkscs)19 CODEC_INIT(big5hkscs)
20 {
21     static int initialized = 0;
22 
23     if (!initialized && IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap))
24         return -1;
25     initialized = 1;
26     return 0;
27 }
28 
29 /*
30  * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
31  *  U+00CA U+0304 -> 8862  (U+00CA alone is mapped to 8866)
32  *  U+00CA U+030C -> 8864
33  *  U+00EA U+0304 -> 88a3  (U+00EA alone is mapped to 88a7)
34  *  U+00EA U+030C -> 88a5
35  * These are handled by not mapping tables but a hand-written code.
36  */
37 static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
38 
ENCODER(big5hkscs)39 ENCODER(big5hkscs)
40 {
41     while (*inpos < inlen) {
42         Py_UCS4 c = INCHAR1;
43         DBCHAR code;
44         Py_ssize_t insize;
45 
46         if (c < 0x80) {
47             REQUIRE_OUTBUF(1);
48             **outbuf = (unsigned char)c;
49             NEXT(1, 1);
50             continue;
51         }
52 
53         insize = 1;
54         REQUIRE_OUTBUF(2);
55 
56         if (c < 0x10000) {
57             if (TRYMAP_ENC(big5hkscs_bmp, code, c)) {
58                 if (code == MULTIC) {
59                     Py_UCS4 c2;
60                     if (inlen - *inpos >= 2)
61                         c2 = INCHAR2;
62                     else
63                         c2 = 0;
64 
65                     if (inlen - *inpos >= 2 &&
66                         ((c & 0xffdf) == 0x00ca) &&
67                         ((c2 & 0xfff7) == 0x0304)) {
68                         code = big5hkscs_pairenc_table[
69                             ((c >> 4) |
70                              (c2 >> 3)) & 3];
71                         insize = 2;
72                     }
73                     else if (inlen - *inpos < 2 &&
74                              !(flags & MBENC_FLUSH))
75                         return MBERR_TOOFEW;
76                     else {
77                         if (c == 0xca)
78                             code = 0x8866;
79                         else /* c == 0xea */
80                             code = 0x88a7;
81                     }
82                 }
83             }
84             else if (TRYMAP_ENC(big5, code, c))
85                 ;
86             else
87                 return 1;
88         }
89         else if (c < 0x20000)
90             return insize;
91         else if (c < 0x30000) {
92             if (TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff))
93                 ;
94             else
95                 return insize;
96         }
97         else
98             return insize;
99 
100         OUTBYTE1(code >> 8);
101         OUTBYTE2(code & 0xFF);
102         NEXT(insize, 2);
103     }
104 
105     return 0;
106 }
107 
108 #define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
109 
DECODER(big5hkscs)110 DECODER(big5hkscs)
111 {
112     while (inleft > 0) {
113         unsigned char c = INBYTE1;
114         Py_UCS4 decoded;
115 
116         if (c < 0x80) {
117             OUTCHAR(c);
118             NEXT_IN(1);
119             continue;
120         }
121 
122         REQUIRE_INBUF(2);
123 
124         if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
125             if (TRYMAP_DEC(big5, decoded, c, INBYTE2)) {
126                 OUTCHAR(decoded);
127                 NEXT_IN(2);
128                 continue;
129             }
130         }
131 
132         if (TRYMAP_DEC(big5hkscs, decoded, c, INBYTE2))
133         {
134             int s = BH2S(c, INBYTE2);
135             const unsigned char *hintbase;
136 
137             assert(0x87 <= c && c <= 0xfe);
138             assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
139 
140             if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
141                     hintbase = big5hkscs_phint_0;
142                     s -= BH2S(0x87, 0x40);
143             }
144             else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
145                     hintbase = big5hkscs_phint_12130;
146                     s -= BH2S(0xc6, 0xa1);
147             }
148             else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
149                     hintbase = big5hkscs_phint_21924;
150                     s -= BH2S(0xf9, 0xd6);
151             }
152             else
153                     return MBERR_INTERNAL;
154 
155             if (hintbase[s >> 3] & (1 << (s & 7))) {
156                     OUTCHAR(decoded | 0x20000);
157                     NEXT_IN(2);
158             }
159             else {
160                     OUTCHAR(decoded);
161                     NEXT_IN(2);
162             }
163             continue;
164         }
165 
166         switch ((c << 8) | INBYTE2) {
167         case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
168         case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
169         case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
170         case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
171         default: return 1;
172         }
173 
174         NEXT_IN(2); /* all decoded code points are pairs, above. */
175     }
176 
177     return 0;
178 }
179 
180 
181 BEGIN_MAPPINGS_LIST
182   MAPPING_DECONLY(big5hkscs)
183   MAPPING_ENCONLY(big5hkscs_bmp)
184   MAPPING_ENCONLY(big5hkscs_nonbmp)
185 END_MAPPINGS_LIST
186 
187 BEGIN_CODECS_LIST
188   CODEC_STATELESS_WINIT(big5hkscs)
189 END_CODECS_LIST
190 
191 I_AM_A_MODULE_FOR(hk)
192