1#!/usr/bin/env python3 2 3"""usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt 4 5Input files: 6* https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt 7* https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt 8* https://unicode.org/Public/UCD/latest/ucd/Blocks.txt 9""" 10 11import sys 12 13if len (sys.argv) != 4: 14 sys.exit (__doc__) 15 16ALLOWED_SINGLES = [0x00A0, 0x25CC] 17ALLOWED_BLOCKS = [ 18 'Basic Latin', 19 'Latin-1 Supplement', 20 'Devanagari', 21 'Bengali', 22 'Gurmukhi', 23 'Gujarati', 24 'Oriya', 25 'Tamil', 26 'Telugu', 27 'Kannada', 28 'Malayalam', 29 'Myanmar', 30 'Khmer', 31 'Vedic Extensions', 32 'General Punctuation', 33 'Superscripts and Subscripts', 34 'Devanagari Extended', 35 'Myanmar Extended-B', 36 'Myanmar Extended-A', 37] 38 39files = [open (x, encoding='utf-8') for x in sys.argv[1:]] 40 41headers = [[f.readline () for i in range (2)] for f in files] 42 43unicode_data = [{} for _ in files] 44for i, f in enumerate (files): 45 for line in f: 46 47 j = line.find ('#') 48 if j >= 0: 49 line = line[:j] 50 51 fields = [x.strip () for x in line.split (';')] 52 if len (fields) == 1: 53 continue 54 55 uu = fields[0].split ('..') 56 start = int (uu[0], 16) 57 if len (uu) == 1: 58 end = start 59 else: 60 end = int (uu[1], 16) 61 62 t = fields[1] 63 64 for u in range (start, end + 1): 65 unicode_data[i][u] = t 66 67# Merge data into one dict: 68defaults = ('Other', 'Not_Applicable', 'No_Block') 69combined = {} 70for i,d in enumerate (unicode_data): 71 for u,v in d.items (): 72 if i == 2 and not u in combined: 73 continue 74 if not u in combined: 75 combined[u] = list (defaults) 76 combined[u][i] = v 77combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS} 78 79 80# Convert categories & positions types 81 82categories = { 83 'indic' : [ 84 'X', 85 'C', 86 'V', 87 'N', 88 'H', 89 'ZWNJ', 90 'ZWJ', 91 'M', 92 'SM', 93 'A', 94 'VD', 95 'PLACEHOLDER', 96 'DOTTEDCIRCLE', 97 'RS', 98 'MPst', 99 'Repha', 100 'Ra', 101 'CM', 102 'Symbol', 103 'CS', 104 ], 105 'khmer' : [ 106 'VAbv', 107 'VBlw', 108 'VPre', 109 'VPst', 110 111 'Robatic', 112 'Xgroup', 113 'Ygroup', 114 ], 115 'myanmar' : [ 116 'VAbv', 117 'VBlw', 118 'VPre', 119 'VPst', 120 121 'IV', 122 'As', 123 'DB', 124 'GB', 125 'MH', 126 'MR', 127 'MW', 128 'MY', 129 'PT', 130 'VS', 131 'ML', 132 ], 133} 134 135category_map = { 136 'Other' : 'X', 137 'Avagraha' : 'Symbol', 138 'Bindu' : 'SM', 139 'Brahmi_Joining_Number' : 'PLACEHOLDER', # Don't care. 140 'Cantillation_Mark' : 'A', 141 'Consonant' : 'C', 142 'Consonant_Dead' : 'C', 143 'Consonant_Final' : 'CM', 144 'Consonant_Head_Letter' : 'C', 145 'Consonant_Initial_Postfixed' : 'C', # TODO 146 'Consonant_Killer' : 'M', # U+17CD only. 147 'Consonant_Medial' : 'CM', 148 'Consonant_Placeholder' : 'PLACEHOLDER', 149 'Consonant_Preceding_Repha' : 'Repha', 150 'Consonant_Prefixed' : 'X', # Don't care. 151 'Consonant_Subjoined' : 'CM', 152 'Consonant_Succeeding_Repha' : 'CM', 153 'Consonant_With_Stacker' : 'CS', 154 'Gemination_Mark' : 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552 155 'Invisible_Stacker' : 'H', 156 'Joiner' : 'ZWJ', 157 'Modifying_Letter' : 'X', 158 'Non_Joiner' : 'ZWNJ', 159 'Nukta' : 'N', 160 'Number' : 'PLACEHOLDER', 161 'Number_Joiner' : 'PLACEHOLDER', # Don't care. 162 'Pure_Killer' : 'M', # Is like a vowel matra. 163 'Register_Shifter' : 'RS', 164 'Syllable_Modifier' : 'SM', 165 'Tone_Letter' : 'X', 166 'Tone_Mark' : 'N', 167 'Virama' : 'H', 168 'Visarga' : 'SM', 169 'Vowel' : 'V', 170 'Vowel_Dependent' : 'M', 171 'Vowel_Independent' : 'V', 172} 173position_map = { 174 'Not_Applicable' : 'END', 175 176 'Left' : 'PRE_C', 177 'Top' : 'ABOVE_C', 178 'Bottom' : 'BELOW_C', 179 'Right' : 'POST_C', 180 181 # These should resolve to the position of the last part of the split sequence. 182 'Bottom_And_Right' : 'POST_C', 183 'Left_And_Right' : 'POST_C', 184 'Top_And_Bottom' : 'BELOW_C', 185 'Top_And_Bottom_And_Left' : 'BELOW_C', 186 'Top_And_Bottom_And_Right' : 'POST_C', 187 'Top_And_Left' : 'ABOVE_C', 188 'Top_And_Left_And_Right' : 'POST_C', 189 'Top_And_Right' : 'POST_C', 190 191 'Overstruck' : 'AFTER_MAIN', 192 'Visual_order_left' : 'PRE_M', 193} 194 195category_overrides = { 196 197 # These are the variation-selectors. They only appear in the Myanmar grammar 198 # but are not Myanmar-specific 199 0xFE00: 'VS', 200 0xFE01: 'VS', 201 0xFE02: 'VS', 202 0xFE03: 'VS', 203 0xFE04: 'VS', 204 0xFE05: 'VS', 205 0xFE06: 'VS', 206 0xFE07: 'VS', 207 0xFE08: 'VS', 208 0xFE09: 'VS', 209 0xFE0A: 'VS', 210 0xFE0B: 'VS', 211 0xFE0C: 'VS', 212 0xFE0D: 'VS', 213 0xFE0E: 'VS', 214 0xFE0F: 'VS', 215 216 # These appear in the OT Myanmar spec, but are not Myanmar-specific 217 0x2015: 'PLACEHOLDER', 218 0x2022: 'PLACEHOLDER', 219 0x25FB: 'PLACEHOLDER', 220 0x25FC: 'PLACEHOLDER', 221 0x25FD: 'PLACEHOLDER', 222 0x25FE: 'PLACEHOLDER', 223 224 225 # Indic 226 227 0x0930: 'Ra', # Devanagari 228 0x09B0: 'Ra', # Bengali 229 0x09F0: 'Ra', # Bengali 230 0x0A30: 'Ra', # Gurmukhi No Reph 231 0x0AB0: 'Ra', # Gujarati 232 0x0B30: 'Ra', # Oriya 233 0x0BB0: 'Ra', # Tamil No Reph 234 0x0C30: 'Ra', # Telugu Reph formed only with ZWJ 235 0x0CB0: 'Ra', # Kannada 236 0x0D30: 'Ra', # Malayalam No Reph, Logical Repha 237 238 # The following act more like the Bindus. 239 0x0953: 'SM', 240 0x0954: 'SM', 241 242 # U+0A40 GURMUKHI VOWEL SIGN II may be preceded by U+0A02 GURMUKHI SIGN BINDI. 243 0x0A40: 'MPst', 244 245 # The following act like consonants. 246 0x0A72: 'C', 247 0x0A73: 'C', 248 0x1CF5: 'C', 249 0x1CF6: 'C', 250 251 # TODO: The following should only be allowed after a Visarga. 252 # For now, just treat them like regular tone marks. 253 0x1CE2: 'A', 254 0x1CE3: 'A', 255 0x1CE4: 'A', 256 0x1CE5: 'A', 257 0x1CE6: 'A', 258 0x1CE7: 'A', 259 0x1CE8: 'A', 260 261 # TODO: The following should only be allowed after some of 262 # the nasalization marks, maybe only for U+1CE9..U+1CF1. 263 # For now, just treat them like tone marks. 264 0x1CED: 'A', 265 266 # The following take marks in standalone clusters, similar to Avagraha. 267 0xA8F2: 'Symbol', 268 0xA8F3: 'Symbol', 269 0xA8F4: 'Symbol', 270 0xA8F5: 'Symbol', 271 0xA8F6: 'Symbol', 272 0xA8F7: 'Symbol', 273 0x1CE9: 'Symbol', 274 0x1CEA: 'Symbol', 275 0x1CEB: 'Symbol', 276 0x1CEC: 'Symbol', 277 0x1CEE: 'Symbol', 278 0x1CEF: 'Symbol', 279 0x1CF0: 'Symbol', 280 0x1CF1: 'Symbol', 281 282 0x0A51: 'M', # https://github.com/harfbuzz/harfbuzz/issues/524 283 284 # According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, 285 # so the Indic shaper needs to know their categories. 286 0x11301: 'SM', 287 0x11302: 'SM', 288 0x11303: 'SM', 289 0x1133B: 'N', 290 0x1133C: 'N', 291 292 0x0AFB: 'N', # https://github.com/harfbuzz/harfbuzz/issues/552 293 0x0B55: 'N', # https://github.com/harfbuzz/harfbuzz/issues/2849 294 295 0x09FC: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/1613 296 0x0C80: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/623 297 0x0D04: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/3511 298 299 0x25CC: 'DOTTEDCIRCLE', 300 301 302 # Khmer 303 304 0x179A: 'Ra', 305 306 0x17CC: 'Robatic', 307 0x17C9: 'Robatic', 308 0x17CA: 'Robatic', 309 310 0x17C6: 'Xgroup', 311 0x17CB: 'Xgroup', 312 0x17CD: 'Xgroup', 313 0x17CE: 'Xgroup', 314 0x17CF: 'Xgroup', 315 0x17D0: 'Xgroup', 316 0x17D1: 'Xgroup', 317 318 0x17C7: 'Ygroup', 319 0x17C8: 'Ygroup', 320 0x17DD: 'Ygroup', 321 0x17D3: 'Ygroup', # Just guessing. Uniscribe doesn't categorize it. 322 323 0x17D9: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/issues/2384 324 325 326 # Myanmar 327 328 # https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze 329 330 0x104E: 'C', # The spec says C, IndicSyllableCategory says Consonant_Placeholder 331 332 0x1004: 'Ra', 333 0x101B: 'Ra', 334 0x105A: 'Ra', 335 336 0x1032: 'A', 337 0x1036: 'A', 338 339 0x103A: 'As', 340 341 #0x1040: 'D0', # XXX The spec says D0, but Uniscribe doesn't seem to do. 342 343 0x103E: 'MH', 344 0x1060: 'ML', 345 0x103C: 'MR', 346 0x103D: 'MW', 347 0x1082: 'MW', 348 0x103B: 'MY', 349 0x105E: 'MY', 350 0x105F: 'MY', 351 352 0x1063: 'PT', 353 0x1064: 'PT', 354 0x1069: 'PT', 355 0x106A: 'PT', 356 0x106B: 'PT', 357 0x106C: 'PT', 358 0x106D: 'PT', 359 0xAA7B: 'PT', 360 361 0x1038: 'SM', 362 0x1087: 'SM', 363 0x1088: 'SM', 364 0x1089: 'SM', 365 0x108A: 'SM', 366 0x108B: 'SM', 367 0x108C: 'SM', 368 0x108D: 'SM', 369 0x108F: 'SM', 370 0x109A: 'SM', 371 0x109B: 'SM', 372 0x109C: 'SM', 373 374 0x104A: 'PLACEHOLDER', 375} 376position_overrides = { 377 378 0x0A51: 'BELOW_C', # https://github.com/harfbuzz/harfbuzz/issues/524 379 380 0x0B01: 'BEFORE_SUB', # Oriya Bindu is BeforeSub in the spec. 381} 382 383def matra_pos_left(u, block): 384 return "PRE_M" 385def matra_pos_right(u, block): 386 if block == 'Devanagari': return 'AFTER_SUB' 387 if block == 'Bengali': return 'AFTER_POST' 388 if block == 'Gurmukhi': return 'AFTER_POST' 389 if block == 'Gujarati': return 'AFTER_POST' 390 if block == 'Oriya': return 'AFTER_POST' 391 if block == 'Tamil': return 'AFTER_POST' 392 if block == 'Telugu': return 'BEFORE_SUB' if u <= 0x0C42 else 'AFTER_SUB' 393 if block == 'Kannada': return 'BEFORE_SUB' if u < 0x0CC3 or u > 0x0CD6 else 'AFTER_SUB' 394 if block == 'Malayalam': return 'AFTER_POST' 395 return 'AFTER_SUB' 396def matra_pos_top(u, block): 397 # BENG and MLYM don't have top matras. 398 if block == 'Devanagari': return 'AFTER_SUB' 399 if block == 'Gurmukhi': return 'AFTER_POST' # Deviate from spec 400 if block == 'Gujarati': return 'AFTER_SUB' 401 if block == 'Oriya': return 'AFTER_MAIN' 402 if block == 'Tamil': return 'AFTER_SUB' 403 if block == 'Telugu': return 'BEFORE_SUB' 404 if block == 'Kannada': return 'BEFORE_SUB' 405 return 'AFTER_SUB' 406def matra_pos_bottom(u, block): 407 if block == 'Devanagari': return 'AFTER_SUB' 408 if block == 'Bengali': return 'AFTER_SUB' 409 if block == 'Gurmukhi': return 'AFTER_POST' 410 if block == 'Gujarati': return 'AFTER_POST' 411 if block == 'Oriya': return 'AFTER_SUB' 412 if block == 'Tamil': return 'AFTER_POST' 413 if block == 'Telugu': return 'BEFORE_SUB' 414 if block == 'Kannada': return 'BEFORE_SUB' 415 if block == 'Malayalam': return 'AFTER_POST' 416 return "AFTER_SUB" 417def indic_matra_position(u, pos, block): # Reposition matra 418 if pos == 'PRE_C': return matra_pos_left(u, block) 419 if pos == 'POST_C': return matra_pos_right(u, block) 420 if pos == 'ABOVE_C': return matra_pos_top(u, block) 421 if pos == 'BELOW_C': return matra_pos_bottom(u, block) 422 assert (False) 423 424def position_to_category(pos): 425 if pos == 'PRE_C': return 'VPre' 426 if pos == 'ABOVE_C': return 'VAbv' 427 if pos == 'BELOW_C': return 'VBlw' 428 if pos == 'POST_C': return 'VPst' 429 assert(False) 430 431 432defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2]) 433 434indic_data = {} 435for k, (cat, pos, block) in combined.items(): 436 cat = category_map[cat] 437 pos = position_map[pos] 438 indic_data[k] = (cat, pos, block) 439 440for k,new_cat in category_overrides.items(): 441 (cat, pos, _) = indic_data.get(k, defaults) 442 indic_data[k] = (new_cat, pos, unicode_data[2][k]) 443 444# We only expect position for certain types 445positioned_categories = ('CM', 'SM', 'RS', 'H', 'M', 'MPst') 446for k, (cat, pos, block) in indic_data.items(): 447 if cat not in positioned_categories: 448 pos = 'END' 449 indic_data[k] = (cat, pos, block) 450 451# Position overrides are more complicated 452 453# Keep in sync with CONSONANT_FLAGS in the shaper 454consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE') 455matra_categories = ('M', 'MPst') 456smvd_categories = ('SM', 'VD', 'A', 'Symbol') 457for k, (cat, pos, block) in indic_data.items(): 458 if cat in consonant_categories: 459 pos = 'BASE_C' 460 elif cat in matra_categories: 461 if block.startswith('Khmer') or block.startswith('Myanmar'): 462 cat = position_to_category(pos) 463 else: 464 pos = indic_matra_position(k, pos, block) 465 elif cat in smvd_categories: 466 pos = 'SMVD'; 467 indic_data[k] = (cat, pos, block) 468 469for k,new_pos in position_overrides.items(): 470 (cat, pos, _) = indic_data.get(k, defaults) 471 indic_data[k] = (cat, new_pos, unicode_data[2][k]) 472 473 474values = [{_: 1} for _ in defaults] 475for vv in indic_data.values(): 476 for i,v in enumerate(vv): 477 values[i][v] = values[i].get (v, 0) + 1 478 479 480 481 482# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out 483singles = {} 484for u in ALLOWED_SINGLES: 485 singles[u] = indic_data[u] 486 del indic_data[u] 487 488print ("/* == Start of generated table == */") 489print ("/*") 490print (" * The following table is generated by running:") 491print (" *") 492print (" * ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt") 493print (" *") 494print (" * on files with these headers:") 495print (" *") 496for h in headers: 497 for l in h: 498 print (" * %s" % (l.strip())) 499print (" */") 500print () 501print ('#include "hb.hh"') 502print () 503print ('#ifndef HB_NO_OT_SHAPE') 504print () 505print ('#include "hb-ot-shaper-indic.hh"') 506print () 507print ('#pragma GCC diagnostic push') 508print ('#pragma GCC diagnostic ignored "-Wunused-macros"') 509print () 510 511# Print categories 512for shaper in categories: 513 print ('#include "hb-ot-shaper-%s-machine.hh"' % shaper) 514print () 515done = {} 516for shaper, shaper_cats in categories.items(): 517 print ('/* %s */' % shaper) 518 for cat in shaper_cats: 519 v = shaper[0].upper() 520 if cat not in done: 521 print ("#define OT_%s %s_Cat(%s)" % (cat, v, cat)) 522 done[cat] = v 523 else: 524 print ('static_assert (OT_%s == %s_Cat(%s), "");' % (cat, v, cat)) 525print () 526 527# Shorten values 528short = [{ 529 "Repha": 'Rf', 530 "PLACEHOLDER": 'GB', 531 "DOTTEDCIRCLE": 'DC', 532 "VPst": 'VR', 533 "VPre": 'VL', 534 "Robatic": 'Rt', 535 "Xgroup": 'Xg', 536 "Ygroup": 'Yg', 537 "As": 'As', 538},{ 539 "END": 'X', 540 "BASE_C": 'C', 541 "ABOVE_C": 'T', 542 "BELOW_C": 'B', 543 "POST_C": 'R', 544 "PRE_C": 'L', 545 "PRE_M": 'LM', 546 "AFTER_MAIN": 'A', 547 "AFTER_SUB": 'AS', 548 "BEFORE_SUB": 'BS', 549 "AFTER_POST": 'AP', 550 "SMVD": 'SM', 551}] 552all_shorts = [{},{}] 553 554# Add some of the values, to make them more readable, and to avoid duplicates 555 556for i in range (2): 557 for v,s in short[i].items (): 558 all_shorts[i][s] = v 559 560what = ["OT", "POS"] 561what_short = ["_OT", "_POS"] 562cat_defs = [] 563for i in range (2): 564 vv = sorted (values[i].keys ()) 565 for v in vv: 566 v_no_and = v.replace ('_And_', '_') 567 if v in short[i]: 568 s = short[i][v] 569 else: 570 s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')]) 571 if s in all_shorts[i]: 572 raise Exception ("Duplicate short value alias", v, all_shorts[i][s]) 573 all_shorts[i][s] = v 574 short[i][v] = s 575 cat_defs.append ((what_short[i] + '_' + s, what[i] + '_' + (v.upper () if i else v), str (values[i][v]), v)) 576 577maxlen_s = max ([len (c[0]) for c in cat_defs]) 578maxlen_l = max ([len (c[1]) for c in cat_defs]) 579maxlen_n = max ([len (c[2]) for c in cat_defs]) 580for s in what_short: 581 print () 582 for c in [c for c in cat_defs if s in c[0]]: 583 print ("#define %s %s /* %s chars; %s */" % 584 (c[0].ljust (maxlen_s), c[1].ljust (maxlen_l), c[2].rjust (maxlen_n), c[3])) 585print () 586print ('#pragma GCC diagnostic pop') 587print () 588print ("#define INDIC_COMBINE_CATEGORIES(S,M) ((S) | ((M) << 8))") 589print () 590print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (%s_##S, %s_##M)" % tuple(what_short)) 591print () 592print () 593 594total = 0 595used = 0 596last_block = None 597def print_block (block, start, end, data): 598 global total, used, last_block 599 if block and block != last_block: 600 print () 601 print () 602 print (" /* %s */" % block) 603 num = 0 604 assert start % 8 == 0 605 assert (end+1) % 8 == 0 606 for u in range (start, end+1): 607 if u % 8 == 0: 608 print () 609 print (" /* %04X */" % u, end="") 610 if u in data: 611 num += 1 612 d = data.get (u, defaults) 613 print ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])), end="") 614 615 total += end - start + 1 616 used += num 617 if block: 618 last_block = block 619 620uu = sorted (indic_data) 621 622last = -100000 623num = 0 624offset = 0 625starts = [] 626ends = [] 627print ("static const uint16_t indic_table[] = {") 628for u in uu: 629 if u <= last: 630 continue 631 block = indic_data[u][2] 632 633 start = u//8*8 634 end = start+1 635 while end in uu and block == indic_data[end][2]: 636 end += 1 637 end = (end-1)//8*8 + 7 638 639 if start != last + 1: 640 if start - last <= 1+16*2: 641 print_block (None, last+1, start-1, indic_data) 642 else: 643 if last >= 0: 644 ends.append (last + 1) 645 offset += ends[-1] - starts[-1] 646 print () 647 print () 648 print ("#define indic_offset_0x%04xu %d" % (start, offset)) 649 starts.append (start) 650 651 print_block (block, start, end, indic_data) 652 last = end 653ends.append (last + 1) 654offset += ends[-1] - starts[-1] 655print () 656print () 657occupancy = used * 100. / total 658page_bits = 12 659print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)) 660print () 661print ("uint16_t") 662print ("hb_indic_get_categories (hb_codepoint_t u)") 663print ("{") 664print (" switch (u >> %d)" % page_bits) 665print (" {") 666pages = set ([u>>page_bits for u in starts+ends+list (singles.keys ())]) 667for p in sorted(pages): 668 print (" case 0x%0Xu:" % p) 669 for u,d in singles.items (): 670 if p != u>>page_bits: continue 671 print (" if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])) 672 for (start,end) in zip (starts, ends): 673 if p not in [start>>page_bits, end>>page_bits]: continue 674 offset = "indic_offset_0x%04xu" % start 675 print (" if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)) 676 print (" break;") 677 print ("") 678print (" default:") 679print (" break;") 680print (" }") 681print (" return _(X,X);") 682print ("}") 683print () 684print ("#undef _") 685print ("#undef INDIC_COMBINE_CATEGORIES") 686for i in range (2): 687 print () 688 vv = sorted (values[i].keys ()) 689 for v in vv: 690 print ("#undef %s_%s" % 691 (what_short[i], short[i][v])) 692print () 693print ('#endif') 694print () 695print ("/* == End of generated table == */") 696 697# Maintain at least 50% occupancy in the table */ 698if occupancy < 50: 699 raise Exception ("Table too sparse, please investigate: ", occupancy) 700