• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #
2 # genmap_support.py: Multibyte Codec Map Generator
3 #
4 # Original Author:  Hye-Shik Chang <perky@FreeBSD.org>
5 # Modified Author:  Dong-hee Na <donghee.na92@gmail.com>
6 #
7 
8 
9 class BufferedFiller:
10     def __init__(self, column=78):
11         self.column = column
12         self.buffered = []
13         self.cline = []
14         self.clen = 0
15         self.count = 0
16 
17     def write(self, *data):
18         for s in data:
19             if len(s) > self.column:
20                 raise ValueError("token is too long")
21             if len(s) + self.clen > self.column:
22                 self.flush()
23             self.clen += len(s)
24             self.cline.append(s)
25             self.count += 1
26 
27     def flush(self):
28         if not self.cline:
29             return
30         self.buffered.append(''.join(self.cline))
31         self.clen = 0
32         del self.cline[:]
33 
34     def printout(self, fp):
35         self.flush()
36         for l in self.buffered:
37             fp.write(f'{l}\n')
38         del self.buffered[:]
39 
40     def __len__(self):
41         return self.count
42 
43 
44 class DecodeMapWriter:
45     filler_class = BufferedFiller
46 
47     def __init__(self, fp, prefix, decode_map):
48         self.fp = fp
49         self.prefix = prefix
50         self.decode_map = decode_map
51         self.filler = self.filler_class()
52 
53     def update_decode_map(self, c1range, c2range, onlymask=(), wide=0):
54         c2values = range(c2range[0], c2range[1] + 1)
55 
56         for c1 in range(c1range[0], c1range[1] + 1):
57             if c1 not in self.decode_map or (onlymask and c1 not in onlymask):
58                 continue
59             c2map = self.decode_map[c1]
60             rc2values = [n for n in c2values if n in c2map]
61             if not rc2values:
62                 continue
63 
64             c2map[self.prefix] = True
65             c2map['min'] = rc2values[0]
66             c2map['max'] = rc2values[-1]
67             c2map['midx'] = len(self.filler)
68 
69             for v in range(rc2values[0], rc2values[-1] + 1):
70                 if v in c2map:
71                     self.filler.write('%d,' % c2map[v])
72                 else:
73                     self.filler.write('U,')
74 
75     def generate(self, wide=False):
76         if not wide:
77             self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
78         else:
79             self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
80 
81         self.filler.printout(self.fp)
82         self.fp.write("};\n\n")
83 
84         if not wide:
85             self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n")
86         else:
87             self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n")
88 
89         for i in range(256):
90             if i in self.decode_map and self.prefix in self.decode_map[i]:
91                 m = self.decode_map
92                 prefix = self.prefix
93             else:
94                 self.filler.write("{", "0,", "0,", "0", "},")
95                 continue
96 
97             self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'],
98                               ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},")
99         self.filler.printout(self.fp)
100         self.fp.write("};\n\n")
101 
102 
103 class EncodeMapWriter:
104     filler_class = BufferedFiller
105     elemtype = 'DBCHAR'
106     indextype = 'struct unim_index'
107 
108     def __init__(self, fp, prefix, encode_map):
109         self.fp = fp
110         self.prefix = prefix
111         self.encode_map = encode_map
112         self.filler = self.filler_class()
113 
114     def generate(self):
115         self.buildmap()
116         self.printmap()
117 
118     def buildmap(self):
119         for c1 in range(0, 256):
120             if c1 not in self.encode_map:
121                 continue
122             c2map = self.encode_map[c1]
123             rc2values = [k for k in c2map.keys()]
124             rc2values.sort()
125             if not rc2values:
126                 continue
127 
128             c2map[self.prefix] = True
129             c2map['min'] = rc2values[0]
130             c2map['max'] = rc2values[-1]
131             c2map['midx'] = len(self.filler)
132 
133             for v in range(rc2values[0], rc2values[-1] + 1):
134                 if v not in c2map:
135                     self.write_nochar()
136                 elif isinstance(c2map[v], int):
137                     self.write_char(c2map[v])
138                 elif isinstance(c2map[v], tuple):
139                     self.write_multic(c2map[v])
140                 else:
141                     raise ValueError
142 
143     def write_nochar(self):
144         self.filler.write('N,')
145 
146     def write_multic(self, point):
147         self.filler.write('M,')
148 
149     def write_char(self, point):
150         self.filler.write(str(point) + ',')
151 
152     def printmap(self):
153         self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n")
154         self.filler.printout(self.fp)
155         self.fp.write("};\n\n")
156         self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n")
157 
158         for i in range(256):
159             if i in self.encode_map and self.prefix in self.encode_map[i]:
160                 self.filler.write("{", "__%s_encmap" % self.prefix, "+",
161                                   "%d" % self.encode_map[i]['midx'], ",",
162                                   "%d," % self.encode_map[i]['min'],
163                                   "%d" % self.encode_map[i]['max'], "},")
164             else:
165                 self.filler.write("{", "0,", "0,", "0", "},")
166                 continue
167         self.filler.printout(self.fp)
168         self.fp.write("};\n\n")
169 
170 
171 def open_mapping_file(path, source):
172     try:
173         f = open(path)
174     except IOError:
175         raise SystemExit(f'{source} is needed')
176     return f
177 
178 
179 def print_autogen(fo, source):
180     fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n')
181 
182 
183 def loadmap(fo, natcol=0, unicol=1, sbcs=0):
184     print("Loading from", fo)
185     fo.seek(0, 0)
186     decmap = {}
187     for line in fo:
188         line = line.split('#', 1)[0].strip()
189         if not line or len(line.split()) < 2:
190             continue
191 
192         row = [eval(e) for e in line.split()]
193         loc, uni = row[natcol], row[unicol]
194         if loc >= 0x100 or sbcs:
195             decmap.setdefault((loc >> 8), {})
196             decmap[(loc >> 8)][(loc & 0xff)] = uni
197 
198     return decmap
199