1#!/usr/bin/env python 2 3from __future__ import print_function, division, absolute_import 4 5import io, os.path, sys, re 6import logging 7logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO) 8 9if len (sys.argv) not in (2, 3): 10 print("usage: ./gen-ucd-table ucd.nounihan.grouped.xml [/path/to/hb-common.h]", file=sys.stderr) 11 sys.exit(1) 12 13# https://github.com/harfbuzz/packtab 14import packTab 15import packTab.ucdxml 16 17logging.info('Loading UCDXML...') 18ucdxml = packTab.ucdxml.load_ucdxml(sys.argv[1]) 19ucd = packTab.ucdxml.ucdxml_get_repertoire(ucdxml) 20 21hb_common_h = 'hb-common.h' if len (sys.argv) < 3 else sys.argv[2] 22 23logging.info('Preparing data tables...') 24 25gc = [u['gc'] for u in ucd] 26ccc = [int(u['ccc']) for u in ucd] 27bmg = [int(v, 16) - int(u) if v else 0 for u,v in enumerate(u['bmg'] for u in ucd)] 28#gc_ccc_non0 = set((cat,klass) for cat,klass in zip(gc,ccc) if klass) 29#gc_bmg_non0 = set((cat,mirr) for cat,mirr in zip(gc, bmg) if mirr) 30 31sc = [u['sc'] for u in ucd] 32 33dm = {i:tuple(int(v, 16) for v in u['dm'].split()) for i,u in enumerate(ucd) 34 if u['dm'] != '#' and u['dt'] == 'can' and not (0xAC00 <= i < 0xAC00+11172)} 35ce = {i for i,u in enumerate(ucd) if u['Comp_Ex'] == 'Y'} 36 37assert not any(v for v in dm.values() if len(v) not in (1,2)) 38dm1 = sorted(set(v for v in dm.values() if len(v) == 1)) 39assert all((v[0] >> 16) in (0,2) for v in dm1) 40dm1_p0_array = ['0x%04Xu' % (v[0] & 0xFFFF) for v in dm1 if (v[0] >> 16) == 0] 41dm1_p2_array = ['0x%04Xu' % (v[0] & 0xFFFF) for v in dm1 if (v[0] >> 16) == 2] 42dm1_order = {v:i+1 for i,v in enumerate(dm1)} 43 44dm2 = sorted((v+(i if i not in ce and not ccc[i] else 0,), v) 45 for i,v in dm.items() if len(v) == 2) 46 47filt = lambda v: ((v[0] & 0xFFFFF800) == 0x0000 and 48 (v[1] & 0xFFFFFF80) == 0x0300 and 49 (v[2] & 0xFFF0C000) == 0x0000) 50dm2_u32_array = [v for v in dm2 if filt(v[0])] 51dm2_u64_array = [v for v in dm2 if not filt(v[0])] 52assert dm2_u32_array + dm2_u64_array == dm2 53dm2_u32_array = ["HB_CODEPOINT_ENCODE3_11_7_14 (0x%04Xu, 0x%04Xu, 0x%04Xu)" % v[0] for v in dm2_u32_array] 54dm2_u64_array = ["HB_CODEPOINT_ENCODE3 (0x%04Xu, 0x%04Xu, 0x%04Xu)" % v[0] for v in dm2_u64_array] 55 56l = 1 + len(dm1_p0_array) + len(dm1_p2_array) 57dm2_order = {v[1]:i+l for i,v in enumerate(dm2)} 58 59dm_order = {None: 0} 60dm_order.update(dm1_order) 61dm_order.update(dm2_order) 62 63gc_order = dict() 64for i,v in enumerate(('Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 65 'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 66 'Pi', 'Po', 'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs',)): 67 gc_order[i] = v 68 gc_order[v] = i 69 70sc_order = dict() 71sc_array = [] 72sc_re = re.compile(r"\b(HB_SCRIPT_[_A-Z]*).*HB_TAG [(]'(.)','(.)','(.)','(.)'[)]") 73for line in open(hb_common_h): 74 m = sc_re.search (line) 75 if not m: continue 76 name = m.group(1) 77 tag = ''.join(m.group(i) for i in range(2, 6)) 78 i = len(sc_array) 79 sc_order[tag] = i 80 sc_order[i] = tag 81 sc_array.append(name) 82 83DEFAULT = 1 84COMPACT = 3 85SLOPPY = 5 86 87 88logging.info('Generating output...') 89print("/* == Start of generated table == */") 90print("/*") 91print(" * The following table is generated by running:") 92print(" *") 93print(" * ./gen-ucd-table.py ucd.nounihan.grouped.xml") 94print(" *") 95print(" * on file with this description:", ucdxml.description) 96print(" */") 97print() 98print("#ifndef HB_UCD_TABLE_HH") 99print("#define HB_UCD_TABLE_HH") 100print() 101print('#include "hb.hh"') 102print() 103 104code = packTab.Code('_hb_ucd') 105sc_array, _ = code.addArray('hb_script_t', 'sc_map', sc_array) 106dm1_p0_array, _ = code.addArray('uint16_t', 'dm1_p0_map', dm1_p0_array) 107dm1_p2_array, _ = code.addArray('uint16_t', 'dm1_p2_map', dm1_p2_array) 108dm2_u32_array, _ = code.addArray('uint32_t', 'dm2_u32_map', dm2_u32_array) 109dm2_u64_array, _ = code.addArray('uint64_t', 'dm2_u64_map', dm2_u64_array) 110code.print_c(linkage='static inline') 111 112datasets = [ 113 ('gc', gc, 'Cn', gc_order), 114 ('ccc', ccc, 0, None), 115 ('bmg', bmg, 0, None), 116 ('sc', sc, 'Zzzz', sc_order), 117 ('dm', dm, None, dm_order), 118] 119 120for compression in (DEFAULT, COMPACT, SLOPPY): 121 logging.info(' Compression=%d:' % compression) 122 print() 123 if compression == DEFAULT: 124 print('#ifndef HB_OPTIMIZE_SIZE') 125 elif compression == COMPACT: 126 print('#elif !defined(HB_NO_UCD_UNASSIGNED)') 127 else: 128 print('#else') 129 print() 130 131 if compression == SLOPPY: 132 for i in range(len(gc)): 133 if (i % 128) and gc[i] == 'Cn': 134 gc[i] = gc[i - 1] 135 for i in range(len(gc) - 2, -1, -1): 136 if ((i + 1) % 128) and gc[i] == 'Cn': 137 gc[i] = gc[i + 1] 138 for i in range(len(sc)): 139 if (i % 128) and sc[i] == 'Zzzz': 140 sc[i] = sc[i - 1] 141 for i in range(len(sc) - 2, -1, -1): 142 if ((i + 1) % 128) and sc[i] == 'Zzzz': 143 sc[i] = sc[i + 1] 144 145 146 code = packTab.Code('_hb_ucd') 147 148 for name,data,default,mapping in datasets: 149 sol = packTab.pack_table(data, default, mapping=mapping, compression=compression) 150 logging.info(' Dataset=%-8s FullCost=%d' % (name, sol.fullCost)) 151 sol.genCode(code, name) 152 153 code.print_c(linkage='static inline') 154 155 print() 156 157print('#endif') 158print() 159 160print() 161print("#endif /* HB_UCD_TABLE_HH */") 162print() 163print("/* == End of generated table == */") 164logging.info('Done.') 165