• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2
3from __future__ import print_function, division, absolute_import
4
5import io, os.path, sys, re
6import logging
7logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
8
9if len (sys.argv) not in (2, 3):
10	print("usage: ./gen-ucd-table ucd.nounihan.grouped.xml [/path/to/hb-common.h]", file=sys.stderr)
11	sys.exit(1)
12
13# https://github.com/harfbuzz/packtab
14import packTab
15import packTab.ucdxml
16
17logging.info('Loading UCDXML...')
18ucdxml = packTab.ucdxml.load_ucdxml(sys.argv[1])
19ucd = packTab.ucdxml.ucdxml_get_repertoire(ucdxml)
20
21hb_common_h = 'hb-common.h' if len (sys.argv) < 3 else sys.argv[2]
22
23logging.info('Preparing data tables...')
24
25gc = [u['gc'] for u in ucd]
26ccc = [int(u['ccc']) for u in ucd]
27bmg = [int(v, 16) - int(u) if v else 0 for u,v in enumerate(u['bmg'] for u in ucd)]
28#gc_ccc_non0 = set((cat,klass) for cat,klass in zip(gc,ccc) if klass)
29#gc_bmg_non0 = set((cat,mirr) for cat,mirr in zip(gc, bmg) if mirr)
30
31sc = [u['sc'] for u in ucd]
32
33dm = {i:tuple(int(v, 16) for v in u['dm'].split()) for i,u in enumerate(ucd)
34      if u['dm'] != '#' and u['dt'] == 'can' and not (0xAC00 <= i < 0xAC00+11172)}
35ce = {i for i,u in enumerate(ucd) if u['Comp_Ex'] == 'Y'}
36
37assert not any(v for v in dm.values() if len(v) not in (1,2))
38dm1 = sorted(set(v for v in dm.values() if len(v) == 1))
39assert all((v[0] >> 16) in (0,2) for v in dm1)
40dm1_p0_array = ['0x%04Xu' % (v[0] & 0xFFFF) for v in dm1 if (v[0] >> 16) == 0]
41dm1_p2_array = ['0x%04Xu' % (v[0] & 0xFFFF) for v in dm1 if (v[0] >> 16) == 2]
42dm1_order = {v:i+1 for i,v in enumerate(dm1)}
43
44dm2 = sorted((v+(i if i not in ce and not ccc[i] else 0,), v)
45             for i,v in dm.items() if len(v) == 2)
46
47filt = lambda v: ((v[0] & 0xFFFFF800) == 0x0000 and
48                  (v[1] & 0xFFFFFF80) == 0x0300 and
49                  (v[2] & 0xFFF0C000) == 0x0000)
50dm2_u32_array = [v for v in dm2 if filt(v[0])]
51dm2_u64_array = [v for v in dm2 if not filt(v[0])]
52assert dm2_u32_array + dm2_u64_array == dm2
53dm2_u32_array = ["HB_CODEPOINT_ENCODE3_11_7_14 (0x%04Xu, 0x%04Xu, 0x%04Xu)" % v[0] for v in dm2_u32_array]
54dm2_u64_array = ["HB_CODEPOINT_ENCODE3 (0x%04Xu, 0x%04Xu, 0x%04Xu)" % v[0] for v in dm2_u64_array]
55
56l = 1 + len(dm1_p0_array) + len(dm1_p2_array)
57dm2_order = {v[1]:i+l for i,v in enumerate(dm2)}
58
59dm_order = {None: 0}
60dm_order.update(dm1_order)
61dm_order.update(dm2_order)
62
63gc_order = dict()
64for i,v in enumerate(('Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu',
65                      'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf',
66                      'Pi', 'Po', 'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs',)):
67    gc_order[i] = v
68    gc_order[v] = i
69
70sc_order = dict()
71sc_array = []
72sc_re = re.compile(r"\b(HB_SCRIPT_[_A-Z]*).*HB_TAG [(]'(.)','(.)','(.)','(.)'[)]")
73for line in open(hb_common_h):
74    m = sc_re.search (line)
75    if not m: continue
76    name = m.group(1)
77    tag = ''.join(m.group(i) for i in range(2, 6))
78    i = len(sc_array)
79    sc_order[tag] = i
80    sc_order[i] = tag
81    sc_array.append(name)
82
83DEFAULT = 1
84COMPACT = 3
85SLOPPY  = 5
86
87
88logging.info('Generating output...')
89print("/* == Start of generated table == */")
90print("/*")
91print(" * The following table is generated by running:")
92print(" *")
93print(" *   ./gen-ucd-table.py ucd.nounihan.grouped.xml")
94print(" *")
95print(" * on file with this description:", ucdxml.description)
96print(" */")
97print()
98print("#ifndef HB_UCD_TABLE_HH")
99print("#define HB_UCD_TABLE_HH")
100print()
101print('#include "hb.hh"')
102print()
103
104code = packTab.Code('_hb_ucd')
105sc_array, _ = code.addArray('hb_script_t', 'sc_map', sc_array)
106dm1_p0_array, _ = code.addArray('uint16_t', 'dm1_p0_map', dm1_p0_array)
107dm1_p2_array, _ = code.addArray('uint16_t', 'dm1_p2_map', dm1_p2_array)
108dm2_u32_array, _ = code.addArray('uint32_t', 'dm2_u32_map', dm2_u32_array)
109dm2_u64_array, _ = code.addArray('uint64_t', 'dm2_u64_map', dm2_u64_array)
110code.print_c(linkage='static inline')
111
112datasets = [
113    ('gc', gc, 'Cn', gc_order),
114    ('ccc', ccc, 0, None),
115    ('bmg', bmg, 0, None),
116    ('sc', sc, 'Zzzz', sc_order),
117    ('dm', dm, None, dm_order),
118]
119
120for compression in (DEFAULT, COMPACT, SLOPPY):
121    logging.info('  Compression=%d:' % compression)
122    print()
123    if compression == DEFAULT:
124        print('#ifndef HB_OPTIMIZE_SIZE')
125    elif compression == COMPACT:
126        print('#elif !defined(HB_NO_UCD_UNASSIGNED)')
127    else:
128        print('#else')
129    print()
130
131    if compression == SLOPPY:
132        for i in range(len(gc)):
133            if (i % 128) and gc[i] == 'Cn':
134                gc[i] = gc[i - 1]
135        for i in range(len(gc) - 2, -1, -1):
136            if ((i + 1) % 128) and gc[i] == 'Cn':
137                gc[i] = gc[i + 1]
138        for i in range(len(sc)):
139            if (i % 128) and sc[i] == 'Zzzz':
140                sc[i] = sc[i - 1]
141        for i in range(len(sc) - 2, -1, -1):
142            if ((i + 1) % 128) and sc[i] == 'Zzzz':
143                sc[i] = sc[i + 1]
144
145
146    code = packTab.Code('_hb_ucd')
147
148    for name,data,default,mapping in datasets:
149        sol = packTab.pack_table(data, default, mapping=mapping, compression=compression)
150        logging.info('      Dataset=%-8s FullCost=%d' % (name, sol.fullCost))
151        sol.genCode(code, name)
152
153    code.print_c(linkage='static inline')
154
155    print()
156
157print('#endif')
158print()
159
160print()
161print("#endif /* HB_UCD_TABLE_HH */")
162print()
163print("/* == End of generated table == */")
164logging.info('Done.')
165