1# 2# genmap_ja_codecs.py: Japanese Codecs Map Generator 3# 4# Original Author: Hye-Shik Chang <perky@FreeBSD.org> 5# Modified Author: Dong-hee Na <donghee.na92@gmail.com> 6# 7import os 8 9from genmap_support import * 10 11JISX0208_C1 = (0x21, 0x74) 12JISX0208_C2 = (0x21, 0x7e) 13JISX0212_C1 = (0x22, 0x6d) 14JISX0212_C2 = (0x21, 0x7e) 15JISX0213_C1 = (0x21, 0x7e) 16JISX0213_C2 = (0x21, 0x7e) 17CP932P0_C1 = (0x81, 0x81) # patches between shift-jis and cp932 18CP932P0_C2 = (0x5f, 0xca) 19CP932P1_C1 = (0x87, 0x87) # CP932 P1 20CP932P1_C2 = (0x40, 0x9c) 21CP932P2_C1 = (0xed, 0xfc) # CP932 P2 22CP932P2_C2 = (0x40, 0xfc) 23 24MAPPINGS_JIS0208 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT' 25MAPPINGS_JIS0212 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT' 26MAPPINGS_CP932 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT' 27MAPPINGS_JISX0213_2004 = 'http://wakaba-web.hp.infoseek.co.jp/table/jisx0213-2004-std.txt' 28 29 30def loadmap_jisx0213(fo): 31 decmap3, decmap4 = {}, {} # maps to BMP for level 3 and 4 32 decmap3_2, decmap4_2 = {}, {} # maps to U+2xxxx for level 3 and 4 33 decmap3_pair = {} # maps to BMP-pair for level 3 34 for line in fo: 35 line = line.split('#', 1)[0].strip() 36 if not line or len(line.split()) < 2: 37 continue 38 39 row = line.split() 40 loc = eval('0x' + row[0][2:]) 41 level = eval(row[0][0]) 42 m = None 43 if len(row[1].split('+')) == 2: # single unicode 44 uni = eval('0x' + row[1][2:]) 45 if level == 3: 46 if uni < 0x10000: 47 m = decmap3 48 elif 0x20000 <= uni < 0x30000: 49 uni -= 0x20000 50 m = decmap3_2 51 elif level == 4: 52 if uni < 0x10000: 53 m = decmap4 54 elif 0x20000 <= uni < 0x30000: 55 uni -= 0x20000 56 m = decmap4_2 57 m.setdefault((loc >> 8), {}) 58 m[(loc >> 8)][(loc & 0xff)] = uni 59 else: # pair 60 uniprefix = eval('0x' + row[1][2:6]) # body 61 uni = eval('0x' + row[1][7:11]) # modifier 62 if level != 3: 63 raise ValueError("invalid map") 64 decmap3_pair.setdefault(uniprefix, {}) 65 m = decmap3_pair[uniprefix] 66 67 if m is None: 68 raise ValueError("invalid map") 69 m.setdefault((loc >> 8), {}) 70 m[(loc >> 8)][(loc & 0xff)] = uni 71 72 return decmap3, decmap4, decmap3_2, decmap4_2, decmap3_pair 73 74 75def main(): 76 jisx0208file = open_mapping_file('python-mappings/JIS0208.TXT', MAPPINGS_JIS0208) 77 jisx0212file = open_mapping_file('python-mappings/JIS0212.TXT', MAPPINGS_JIS0212) 78 cp932file = open_mapping_file('python-mappings/CP932.TXT', MAPPINGS_CP932) 79 jisx0213file = open_mapping_file('python-mappings/jisx0213-2004-std.txt', MAPPINGS_JISX0213_2004) 80 81 print("Loading Mapping File...") 82 83 sjisdecmap = loadmap(jisx0208file, natcol=0, unicol=2) 84 jisx0208decmap = loadmap(jisx0208file, natcol=1, unicol=2) 85 jisx0212decmap = loadmap(jisx0212file) 86 cp932decmap = loadmap(cp932file) 87 jis3decmap, jis4decmap, jis3_2_decmap, jis4_2_decmap, jis3_pairdecmap = loadmap_jisx0213(jisx0213file) 88 89 if jis3decmap[0x21][0x24] != 0xff0c: 90 raise SystemExit('Please adjust your JIS X 0213 map using jisx0213-2000-std.txt.diff') 91 92 sjisencmap, cp932encmap = {}, {} 93 jisx0208_0212encmap = {} 94 for c1, m in sjisdecmap.items(): 95 for c2, code in m.items(): 96 sjisencmap.setdefault(code >> 8, {}) 97 sjisencmap[code >> 8][code & 0xff] = c1 << 8 | c2 98 for c1, m in cp932decmap.items(): 99 for c2, code in m.items(): 100 cp932encmap.setdefault(code >> 8, {}) 101 if (code & 0xff) not in cp932encmap[code >> 8]: 102 cp932encmap[code >> 8][code & 0xff] = c1 << 8 | c2 103 for c1, m in cp932encmap.copy().items(): 104 for c2, code in m.copy().items(): 105 if c1 in sjisencmap and c2 in sjisencmap[c1] and sjisencmap[c1][c2] == code: 106 del cp932encmap[c1][c2] 107 if not cp932encmap[c1]: 108 del cp932encmap[c1] 109 110 jisx0213pairdecmap = {} 111 jisx0213pairencmap = [] 112 for unibody, m1 in jis3_pairdecmap.items(): 113 for c1, m2 in m1.items(): 114 for c2, modifier in m2.items(): 115 jisx0213pairencmap.append((unibody, modifier, c1 << 8 | c2)) 116 jisx0213pairdecmap.setdefault(c1, {}) 117 jisx0213pairdecmap[c1][c2] = unibody << 16 | modifier 118 119 # Twinmap for both of JIS X 0208 (MSB unset) and JIS X 0212 (MSB set) 120 for c1, m in jisx0208decmap.items(): 121 for c2, code in m.items(): 122 jisx0208_0212encmap.setdefault(code >> 8, {}) 123 jisx0208_0212encmap[code >> 8][code & 0xff] = c1 << 8 | c2 124 125 for c1, m in jisx0212decmap.items(): 126 for c2, code in m.items(): 127 jisx0208_0212encmap.setdefault(code >> 8, {}) 128 if (code & 0xff) in jisx0208_0212encmap[code >> 8]: 129 print("OOPS!!!", (code)) 130 jisx0208_0212encmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 131 132 jisx0213bmpencmap = {} 133 for c1, m in jis3decmap.copy().items(): 134 for c2, code in m.copy().items(): 135 if c1 in jisx0208decmap and c2 in jisx0208decmap[c1]: 136 if code in jis3_pairdecmap: 137 jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair 138 jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) 139 elif jisx0208decmap[c1][c2] == code: 140 del jis3decmap[c1][c2] 141 if not jis3decmap[c1]: 142 del jis3decmap[c1] 143 else: 144 raise ValueError("Difference between JIS X 0208 and JIS X 0213 Plane 1 is found.") 145 else: 146 jisx0213bmpencmap.setdefault(code >> 8, {}) 147 if code not in jis3_pairdecmap: 148 jisx0213bmpencmap[code >> 8][code & 0xff] = c1 << 8 | c2 149 else: 150 jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair 151 jisx0213pairencmap.append((code, 0, c1 << 8 | c2)) 152 153 for c1, m in jis4decmap.items(): 154 for c2, code in m.items(): 155 jisx0213bmpencmap.setdefault(code >> 8, {}) 156 jisx0213bmpencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 157 158 jisx0213empencmap = {} 159 for c1, m in jis3_2_decmap.items(): 160 for c2, code in m.items(): 161 jisx0213empencmap.setdefault(code >> 8, {}) 162 jisx0213empencmap[code >> 8][code & 0xff] = c1 << 8 | c2 163 for c1, m in jis4_2_decmap.items(): 164 for c2, code in m.items(): 165 jisx0213empencmap.setdefault(code >> 8, {}) 166 jisx0213empencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2 167 168 with open("mappings_jp.h", "w") as fp: 169 print_autogen(fp, os.path.basename(__file__)) 170 print("Generating JIS X 0208 decode map...") 171 writer = DecodeMapWriter(fp, "jisx0208", jisx0208decmap) 172 writer.update_decode_map(JISX0208_C1, JISX0208_C2) 173 writer.generate() 174 175 print("Generating JIS X 0212 decode map...") 176 writer = DecodeMapWriter(fp, "jisx0212", jisx0212decmap) 177 writer.update_decode_map(JISX0212_C1, JISX0212_C2) 178 writer.generate() 179 180 print("Generating JIS X 0208 && JIS X 0212 encode map...") 181 writer = EncodeMapWriter(fp, "jisxcommon", jisx0208_0212encmap) 182 writer.generate() 183 184 print("Generating CP932 Extension decode map...") 185 writer = DecodeMapWriter(fp, "cp932ext", cp932decmap) 186 writer.update_decode_map(CP932P0_C1, CP932P0_C2) 187 writer.update_decode_map(CP932P1_C1, CP932P1_C2) 188 writer.update_decode_map(CP932P2_C1, CP932P2_C2) 189 writer.generate() 190 191 print("Generating CP932 Extension encode map...") 192 writer = EncodeMapWriter(fp, "cp932ext", cp932encmap) 193 writer.generate() 194 195 print("Generating JIS X 0213 Plane 1 BMP decode map...") 196 writer = DecodeMapWriter(fp, "jisx0213_1_bmp", jis3decmap) 197 writer.update_decode_map(JISX0213_C1, JISX0213_C2) 198 writer.generate() 199 200 print("Generating JIS X 0213 Plane 2 BMP decode map...") 201 writer = DecodeMapWriter(fp, "jisx0213_2_bmp", jis4decmap) 202 writer.update_decode_map(JISX0213_C1, JISX0213_C2) 203 writer.generate() 204 205 print("Generating JIS X 0213 BMP encode map...") 206 writer = EncodeMapWriter(fp, "jisx0213_bmp", jisx0213bmpencmap) 207 writer.generate() 208 209 print("Generating JIS X 0213 Plane 1 EMP decode map...") 210 writer = DecodeMapWriter(fp, "jisx0213_1_emp", jis3_2_decmap) 211 writer.update_decode_map(JISX0213_C1, JISX0213_C2) 212 writer.generate() 213 214 print("Generating JIS X 0213 Plane 2 EMP decode map...") 215 writer = DecodeMapWriter(fp, "jisx0213_2_emp", jis4_2_decmap) 216 writer.update_decode_map(JISX0213_C1, JISX0213_C2) 217 writer.generate() 218 219 print("Generating JIS X 0213 EMP encode map...") 220 writer = EncodeMapWriter(fp, "jisx0213_emp", jisx0213empencmap) 221 writer.generate() 222 223 with open('mappings_jisx0213_pair.h', 'w') as fp: 224 print_autogen(fp, os.path.basename(__file__)) 225 fp.write(f"#define JISX0213_ENCPAIRS {len(jisx0213pairencmap)}\n") 226 fp.write("""\ 227#ifdef EXTERN_JISX0213_PAIR 228static const struct widedbcs_index *jisx0213_pair_decmap; 229static const struct pair_encodemap *jisx0213_pair_encmap; 230#else 231""") 232 233 print("Generating JIS X 0213 unicode-pair decode map...") 234 writer = DecodeMapWriter(fp, "jisx0213_pair", jisx0213pairdecmap) 235 writer.update_decode_map(JISX0213_C1, JISX0213_C2) 236 writer.generate(wide=True) 237 238 print("Generating JIS X 0213 unicode-pair encode map...") 239 jisx0213pairencmap.sort() 240 fp.write("static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {\n") 241 filler = BufferedFiller() 242 for body, modifier, jis in jisx0213pairencmap: 243 filler.write('{', '0x%04x%04x,' % (body, modifier), '0x%04x' % jis, '},') 244 filler.printout(fp) 245 fp.write("};\n") 246 fp.write("#endif\n") 247 248 print("Done!") 249 250if __name__ == '__main__': 251 main() 252