1# 2# genmap_support.py: Multibyte Codec Map Generator 3# 4# Original Author: Hye-Shik Chang <perky@FreeBSD.org> 5# Modified Author: Dong-hee Na <donghee.na92@gmail.com> 6# 7 8 9class BufferedFiller: 10 def __init__(self, column=78): 11 self.column = column 12 self.buffered = [] 13 self.cline = [] 14 self.clen = 0 15 self.count = 0 16 17 def write(self, *data): 18 for s in data: 19 if len(s) > self.column: 20 raise ValueError("token is too long") 21 if len(s) + self.clen > self.column: 22 self.flush() 23 self.clen += len(s) 24 self.cline.append(s) 25 self.count += 1 26 27 def flush(self): 28 if not self.cline: 29 return 30 self.buffered.append(''.join(self.cline)) 31 self.clen = 0 32 del self.cline[:] 33 34 def printout(self, fp): 35 self.flush() 36 for l in self.buffered: 37 fp.write(f'{l}\n') 38 del self.buffered[:] 39 40 def __len__(self): 41 return self.count 42 43 44class DecodeMapWriter: 45 filler_class = BufferedFiller 46 47 def __init__(self, fp, prefix, decode_map): 48 self.fp = fp 49 self.prefix = prefix 50 self.decode_map = decode_map 51 self.filler = self.filler_class() 52 53 def update_decode_map(self, c1range, c2range, onlymask=(), wide=0): 54 c2values = range(c2range[0], c2range[1] + 1) 55 56 for c1 in range(c1range[0], c1range[1] + 1): 57 if c1 not in self.decode_map or (onlymask and c1 not in onlymask): 58 continue 59 c2map = self.decode_map[c1] 60 rc2values = [n for n in c2values if n in c2map] 61 if not rc2values: 62 continue 63 64 c2map[self.prefix] = True 65 c2map['min'] = rc2values[0] 66 c2map['max'] = rc2values[-1] 67 c2map['midx'] = len(self.filler) 68 69 for v in range(rc2values[0], rc2values[-1] + 1): 70 if v in c2map: 71 self.filler.write('%d,' % c2map[v]) 72 else: 73 self.filler.write('U,') 74 75 def generate(self, wide=False): 76 if not wide: 77 self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n") 78 else: 79 self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n") 80 81 self.filler.printout(self.fp) 82 self.fp.write("};\n\n") 83 84 if not wide: 85 self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n") 86 else: 87 self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n") 88 89 for i in range(256): 90 if i in self.decode_map and self.prefix in self.decode_map[i]: 91 m = self.decode_map 92 prefix = self.prefix 93 else: 94 self.filler.write("{", "0,", "0,", "0", "},") 95 continue 96 97 self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'], 98 ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},") 99 self.filler.printout(self.fp) 100 self.fp.write("};\n\n") 101 102 103class EncodeMapWriter: 104 filler_class = BufferedFiller 105 elemtype = 'DBCHAR' 106 indextype = 'struct unim_index' 107 108 def __init__(self, fp, prefix, encode_map): 109 self.fp = fp 110 self.prefix = prefix 111 self.encode_map = encode_map 112 self.filler = self.filler_class() 113 114 def generate(self): 115 self.buildmap() 116 self.printmap() 117 118 def buildmap(self): 119 for c1 in range(0, 256): 120 if c1 not in self.encode_map: 121 continue 122 c2map = self.encode_map[c1] 123 rc2values = [k for k in c2map.keys()] 124 rc2values.sort() 125 if not rc2values: 126 continue 127 128 c2map[self.prefix] = True 129 c2map['min'] = rc2values[0] 130 c2map['max'] = rc2values[-1] 131 c2map['midx'] = len(self.filler) 132 133 for v in range(rc2values[0], rc2values[-1] + 1): 134 if v not in c2map: 135 self.write_nochar() 136 elif isinstance(c2map[v], int): 137 self.write_char(c2map[v]) 138 elif isinstance(c2map[v], tuple): 139 self.write_multic(c2map[v]) 140 else: 141 raise ValueError 142 143 def write_nochar(self): 144 self.filler.write('N,') 145 146 def write_multic(self, point): 147 self.filler.write('M,') 148 149 def write_char(self, point): 150 self.filler.write(str(point) + ',') 151 152 def printmap(self): 153 self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n") 154 self.filler.printout(self.fp) 155 self.fp.write("};\n\n") 156 self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n") 157 158 for i in range(256): 159 if i in self.encode_map and self.prefix in self.encode_map[i]: 160 self.filler.write("{", "__%s_encmap" % self.prefix, "+", 161 "%d" % self.encode_map[i]['midx'], ",", 162 "%d," % self.encode_map[i]['min'], 163 "%d" % self.encode_map[i]['max'], "},") 164 else: 165 self.filler.write("{", "0,", "0,", "0", "},") 166 continue 167 self.filler.printout(self.fp) 168 self.fp.write("};\n\n") 169 170 171def open_mapping_file(path, source): 172 try: 173 f = open(path) 174 except IOError: 175 raise SystemExit(f'{source} is needed') 176 return f 177 178 179def print_autogen(fo, source): 180 fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n') 181 182 183def loadmap(fo, natcol=0, unicol=1, sbcs=0): 184 print("Loading from", fo) 185 fo.seek(0, 0) 186 decmap = {} 187 for line in fo: 188 line = line.split('#', 1)[0].strip() 189 if not line or len(line.split()) < 2: 190 continue 191 192 row = [eval(e) for e in line.split()] 193 loc, uni = row[natcol], row[unicol] 194 if loc >= 0x100 or sbcs: 195 decmap.setdefault((loc >> 8), {}) 196 decmap[(loc >> 8)][(loc & 0xff)] = uni 197 198 return decmap 199