1#!/usr/bin/env python3 2 3"""usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt 4 5Input files: 6* https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt 7* https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt 8* https://unicode.org/Public/UCD/latest/ucd/Blocks.txt 9""" 10 11import sys 12 13if len (sys.argv) != 4: 14 sys.exit (__doc__) 15 16ALLOWED_SINGLES = [0x00A0, 0x25CC] 17ALLOWED_BLOCKS = [ 18 'Basic Latin', 19 'Latin-1 Supplement', 20 'Devanagari', 21 'Bengali', 22 'Gurmukhi', 23 'Gujarati', 24 'Oriya', 25 'Tamil', 26 'Telugu', 27 'Kannada', 28 'Malayalam', 29 'Myanmar', 30 'Khmer', 31 'Vedic Extensions', 32 'General Punctuation', 33 'Superscripts and Subscripts', 34 'Devanagari Extended', 35 'Myanmar Extended-B', 36 'Myanmar Extended-A', 37 'Myanmar Extended-C', 38] 39 40files = [open (x, encoding='utf-8') for x in sys.argv[1:]] 41 42headers = [[f.readline () for i in range (2)] for f in files] 43 44unicode_data = [{} for _ in files] 45for i, f in enumerate (files): 46 for line in f: 47 48 j = line.find ('#') 49 if j >= 0: 50 line = line[:j] 51 52 fields = [x.strip () for x in line.split (';')] 53 if len (fields) == 1: 54 continue 55 56 uu = fields[0].split ('..') 57 start = int (uu[0], 16) 58 if len (uu) == 1: 59 end = start 60 else: 61 end = int (uu[1], 16) 62 63 t = fields[1] 64 65 for u in range (start, end + 1): 66 unicode_data[i][u] = t 67 68# Merge data into one dict: 69defaults = ('Other', 'Not_Applicable', 'No_Block') 70combined = {} 71for i,d in enumerate (unicode_data): 72 for u,v in d.items (): 73 if i == 2 and not u in combined: 74 continue 75 if not u in combined: 76 combined[u] = list (defaults) 77 combined[u][i] = v 78combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS} 79 80 81# Convert categories & positions types 82 83categories = { 84 'indic' : [ 85 'X', 86 'C', 87 'V', 88 'N', 89 'H', 90 'ZWNJ', 91 'ZWJ', 92 'M', 93 'SM', 94 'A', 95 'VD', 96 'PLACEHOLDER', 97 'DOTTEDCIRCLE', 98 'RS', 99 'MPst', 100 'Repha', 101 'Ra', 102 'CM', 103 'Symbol', 104 'CS', 105 'SMPst', 106 ], 107 'khmer' : [ 108 'VAbv', 109 'VBlw', 110 'VPre', 111 'VPst', 112 113 'Robatic', 114 'Xgroup', 115 'Ygroup', 116 ], 117 'myanmar' : [ 118 'VAbv', 119 'VBlw', 120 'VPre', 121 'VPst', 122 123 'IV', 124 'As', 125 'DB', 126 'GB', 127 'MH', 128 'MR', 129 'MW', 130 'MY', 131 'PT', 132 'VS', 133 'ML', 134 ], 135} 136 137category_map = { 138 'Other' : 'X', 139 'Avagraha' : 'Symbol', 140 'Bindu' : 'SM', 141 'Brahmi_Joining_Number' : 'PLACEHOLDER', # Don't care. 142 'Cantillation_Mark' : 'A', 143 'Consonant' : 'C', 144 'Consonant_Dead' : 'C', 145 'Consonant_Final' : 'CM', 146 'Consonant_Head_Letter' : 'C', 147 'Consonant_Initial_Postfixed' : 'C', # TODO 148 'Consonant_Killer' : 'M', # U+17CD only. 149 'Consonant_Medial' : 'CM', 150 'Consonant_Placeholder' : 'PLACEHOLDER', 151 'Consonant_Preceding_Repha' : 'Repha', 152 'Consonant_Prefixed' : 'X', # Don't care. 153 'Consonant_Subjoined' : 'CM', 154 'Consonant_Succeeding_Repha' : 'CM', 155 'Consonant_With_Stacker' : 'CS', 156 'Gemination_Mark' : 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552 157 'Invisible_Stacker' : 'H', 158 'Joiner' : 'ZWJ', 159 'Modifying_Letter' : 'X', 160 'Non_Joiner' : 'ZWNJ', 161 'Nukta' : 'N', 162 'Number' : 'PLACEHOLDER', 163 'Number_Joiner' : 'PLACEHOLDER', # Don't care. 164 'Pure_Killer' : 'M', # Is like a vowel matra. 165 'Register_Shifter' : 'RS', 166 'Syllable_Modifier' : 'SM', 167 'Tone_Letter' : 'X', 168 'Tone_Mark' : 'N', 169 'Virama' : 'H', 170 'Visarga' : 'SM', 171 'Vowel' : 'V', 172 'Vowel_Dependent' : 'M', 173 'Vowel_Independent' : 'V', 174} 175position_map = { 176 'Not_Applicable' : 'END', 177 178 'Left' : 'PRE_C', 179 'Top' : 'ABOVE_C', 180 'Bottom' : 'BELOW_C', 181 'Right' : 'POST_C', 182 183 # These should resolve to the position of the last part of the split sequence. 184 'Bottom_And_Right' : 'POST_C', 185 'Left_And_Right' : 'POST_C', 186 'Top_And_Bottom' : 'BELOW_C', 187 'Top_And_Bottom_And_Left' : 'BELOW_C', 188 'Top_And_Bottom_And_Right' : 'POST_C', 189 'Top_And_Left' : 'ABOVE_C', 190 'Top_And_Left_And_Right' : 'POST_C', 191 'Top_And_Right' : 'POST_C', 192 193 'Overstruck' : 'AFTER_MAIN', 194 'Visual_order_left' : 'PRE_M', 195} 196 197category_overrides = { 198 199 # These are the variation-selectors. They only appear in the Myanmar grammar 200 # but are not Myanmar-specific 201 0xFE00: 'VS', 202 0xFE01: 'VS', 203 0xFE02: 'VS', 204 0xFE03: 'VS', 205 0xFE04: 'VS', 206 0xFE05: 'VS', 207 0xFE06: 'VS', 208 0xFE07: 'VS', 209 0xFE08: 'VS', 210 0xFE09: 'VS', 211 0xFE0A: 'VS', 212 0xFE0B: 'VS', 213 0xFE0C: 'VS', 214 0xFE0D: 'VS', 215 0xFE0E: 'VS', 216 0xFE0F: 'VS', 217 218 # These appear in the OT Myanmar spec, but are not Myanmar-specific 219 0x2015: 'PLACEHOLDER', 220 0x2022: 'PLACEHOLDER', 221 0x25FB: 'PLACEHOLDER', 222 0x25FC: 'PLACEHOLDER', 223 0x25FD: 'PLACEHOLDER', 224 0x25FE: 'PLACEHOLDER', 225 226 227 # Indic 228 229 0x0930: 'Ra', # Devanagari 230 0x09B0: 'Ra', # Bengali 231 0x09F0: 'Ra', # Bengali 232 0x0A30: 'Ra', # Gurmukhi No Reph 233 0x0AB0: 'Ra', # Gujarati 234 0x0B30: 'Ra', # Oriya 235 0x0BB0: 'Ra', # Tamil No Reph 236 0x0C30: 'Ra', # Telugu Reph formed only with ZWJ 237 0x0CB0: 'Ra', # Kannada 238 0x0D30: 'Ra', # Malayalam No Reph, Logical Repha 239 240 # The following act more like the Bindus. 241 0x0953: 'SM', 242 0x0954: 'SM', 243 244 # U+0A40 GURMUKHI VOWEL SIGN II may be preceded by U+0A02 GURMUKHI SIGN BINDI. 245 0x0A40: 'MPst', 246 247 # The following act like consonants. 248 0x0A72: 'C', 249 0x0A73: 'C', 250 0x1CF5: 'C', 251 0x1CF6: 'C', 252 253 # TODO: The following should only be allowed after a Visarga. 254 # For now, just treat them like regular tone marks. 255 0x1CE2: 'A', 256 0x1CE3: 'A', 257 0x1CE4: 'A', 258 0x1CE5: 'A', 259 0x1CE6: 'A', 260 0x1CE7: 'A', 261 0x1CE8: 'A', 262 263 # TODO: The following should only be allowed after some of 264 # the nasalization marks, maybe only for U+1CE9..U+1CF1. 265 # For now, just treat them like tone marks. 266 0x1CED: 'A', 267 268 # The following take marks in standalone clusters, similar to Avagraha. 269 0xA8F2: 'Symbol', 270 0xA8F3: 'Symbol', 271 0xA8F4: 'Symbol', 272 0xA8F5: 'Symbol', 273 0xA8F6: 'Symbol', 274 0xA8F7: 'Symbol', 275 0x1CE9: 'Symbol', 276 0x1CEA: 'Symbol', 277 0x1CEB: 'Symbol', 278 0x1CEC: 'Symbol', 279 0x1CEE: 'Symbol', 280 0x1CEF: 'Symbol', 281 0x1CF0: 'Symbol', 282 0x1CF1: 'Symbol', 283 284 0x0A51: 'M', # https://github.com/harfbuzz/harfbuzz/issues/524 285 286 # According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, 287 # so the Indic shaper needs to know their categories. 288 0x11301: 'SM', 289 0x11302: 'SM', 290 0x11303: 'SM', 291 0x1133B: 'N', 292 0x1133C: 'N', 293 294 0x0AFB: 'N', # https://github.com/harfbuzz/harfbuzz/issues/552 295 0x0B55: 'N', # https://github.com/harfbuzz/harfbuzz/issues/2849 296 297 0x09FC: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/1613 298 0x0C80: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/623 299 0x0D04: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/3511 300 301 0x25CC: 'DOTTEDCIRCLE', 302 303 304 # Khmer 305 306 0x179A: 'Ra', 307 308 0x17CC: 'Robatic', 309 0x17C9: 'Robatic', 310 0x17CA: 'Robatic', 311 312 0x17C6: 'Xgroup', 313 0x17CB: 'Xgroup', 314 0x17CD: 'Xgroup', 315 0x17CE: 'Xgroup', 316 0x17CF: 'Xgroup', 317 0x17D0: 'Xgroup', 318 0x17D1: 'Xgroup', 319 320 0x17C7: 'Ygroup', 321 0x17C8: 'Ygroup', 322 0x17DD: 'Ygroup', 323 0x17D3: 'Ygroup', # Just guessing. Uniscribe doesn't categorize it. 324 325 0x17D9: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/issues/2384 326 327 328 # Myanmar 329 330 # https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze 331 332 0x104E: 'C', # The spec says C, IndicSyllableCategory says Consonant_Placeholder 333 334 0x1004: 'Ra', 335 0x101B: 'Ra', 336 0x105A: 'Ra', 337 338 0x1032: 'A', 339 0x1036: 'A', 340 341 0x103A: 'As', 342 343 #0x1040: 'D0', # XXX The spec says D0, but Uniscribe doesn't seem to do. 344 345 0x103E: 'MH', 346 0x1060: 'ML', 347 0x103C: 'MR', 348 0x103D: 'MW', 349 0x1082: 'MW', 350 0x103B: 'MY', 351 0x105E: 'MY', 352 0x105F: 'MY', 353 354 0x1063: 'PT', 355 0x1064: 'PT', 356 0x1069: 'PT', 357 0x106A: 'PT', 358 0x106B: 'PT', 359 0x106C: 'PT', 360 0x106D: 'PT', 361 0xAA7B: 'PT', 362 363 0x1038: 'SM', 364 0x1087: 'SM', 365 0x1088: 'SM', 366 0x1089: 'SM', 367 0x108A: 'SM', 368 0x108B: 'SM', 369 0x108C: 'SM', 370 0x108D: 'SM', 371 0x108F: 'SM', 372 0x109A: 'SM', 373 0x109B: 'SM', 374 0x109C: 'SM', 375 376 0x104A: 'PLACEHOLDER', 377} 378position_overrides = { 379 380 0x0A51: 'BELOW_C', # https://github.com/harfbuzz/harfbuzz/issues/524 381 382 0x0B01: 'BEFORE_SUB', # Oriya Bindu is BeforeSub in the spec. 383} 384 385def matra_pos_left(u, block): 386 return "PRE_M" 387def matra_pos_right(u, block): 388 if block == 'Devanagari': return 'AFTER_SUB' 389 if block == 'Bengali': return 'AFTER_POST' 390 if block == 'Gurmukhi': return 'AFTER_POST' 391 if block == 'Gujarati': return 'AFTER_POST' 392 if block == 'Oriya': return 'AFTER_POST' 393 if block == 'Tamil': return 'AFTER_POST' 394 if block == 'Telugu': return 'BEFORE_SUB' if u <= 0x0C42 else 'AFTER_SUB' 395 if block == 'Kannada': return 'BEFORE_SUB' if u < 0x0CC3 or u > 0x0CD6 else 'AFTER_SUB' 396 if block == 'Malayalam': return 'AFTER_POST' 397 return 'AFTER_SUB' 398def matra_pos_top(u, block): 399 # BENG and MLYM don't have top matras. 400 if block == 'Devanagari': return 'AFTER_SUB' 401 if block == 'Gurmukhi': return 'AFTER_POST' # Deviate from spec 402 if block == 'Gujarati': return 'AFTER_SUB' 403 if block == 'Oriya': return 'AFTER_MAIN' 404 if block == 'Tamil': return 'AFTER_SUB' 405 if block == 'Telugu': return 'BEFORE_SUB' 406 if block == 'Kannada': return 'BEFORE_SUB' 407 return 'AFTER_SUB' 408def matra_pos_bottom(u, block): 409 if block == 'Devanagari': return 'AFTER_SUB' 410 if block == 'Bengali': return 'AFTER_SUB' 411 if block == 'Gurmukhi': return 'AFTER_POST' 412 if block == 'Gujarati': return 'AFTER_POST' 413 if block == 'Oriya': return 'AFTER_SUB' 414 if block == 'Tamil': return 'AFTER_POST' 415 if block == 'Telugu': return 'BEFORE_SUB' 416 if block == 'Kannada': return 'BEFORE_SUB' 417 if block == 'Malayalam': return 'AFTER_POST' 418 return "AFTER_SUB" 419def indic_matra_position(u, pos, block): # Reposition matra 420 if pos == 'PRE_C': return matra_pos_left(u, block) 421 if pos == 'POST_C': return matra_pos_right(u, block) 422 if pos == 'ABOVE_C': return matra_pos_top(u, block) 423 if pos == 'BELOW_C': return matra_pos_bottom(u, block) 424 assert (False) 425 426def position_to_category(pos): 427 if pos == 'PRE_C': return 'VPre' 428 if pos == 'ABOVE_C': return 'VAbv' 429 if pos == 'BELOW_C': return 'VBlw' 430 if pos == 'POST_C': return 'VPst' 431 assert(False) 432 433 434defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2]) 435 436indic_data = {} 437for k, (cat, pos, block) in combined.items(): 438 cat = category_map[cat] 439 if cat == 'SM' and pos == 'Not_Applicable': 440 cat = 'SMPst' 441 pos = position_map[pos] 442 indic_data[k] = (cat, pos, block) 443 444for k,new_cat in category_overrides.items(): 445 (cat, pos, _) = indic_data.get(k, defaults) 446 indic_data[k] = (new_cat, pos, unicode_data[2][k]) 447 448# We only expect position for certain types 449positioned_categories = ('CM', 'SM', 'RS', 'H', 'M', 'MPst') 450for k, (cat, pos, block) in indic_data.items(): 451 if cat not in positioned_categories: 452 pos = 'END' 453 indic_data[k] = (cat, pos, block) 454 455# Position overrides are more complicated 456 457# Keep in sync with CONSONANT_FLAGS in the shaper 458consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE') 459matra_categories = ('M', 'MPst') 460smvd_categories = ('SM', 'SMPst', 'VD', 'A', 'Symbol') 461for k, (cat, pos, block) in indic_data.items(): 462 if cat in consonant_categories: 463 pos = 'BASE_C' 464 elif cat in matra_categories: 465 if block.startswith('Khmer') or block.startswith('Myanmar'): 466 cat = position_to_category(pos) 467 else: 468 pos = indic_matra_position(k, pos, block) 469 elif cat in smvd_categories: 470 pos = 'SMVD'; 471 indic_data[k] = (cat, pos, block) 472 473for k,new_pos in position_overrides.items(): 474 (cat, pos, _) = indic_data.get(k, defaults) 475 indic_data[k] = (cat, new_pos, unicode_data[2][k]) 476 477 478values = [{_: 1} for _ in defaults] 479for vv in indic_data.values(): 480 for i,v in enumerate(vv): 481 values[i][v] = values[i].get (v, 0) + 1 482 483 484 485 486# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out 487singles = {} 488for u in ALLOWED_SINGLES: 489 singles[u] = indic_data[u] 490 del indic_data[u] 491 492print ("/* == Start of generated table == */") 493print ("/*") 494print (" * The following table is generated by running:") 495print (" *") 496print (" * ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt") 497print (" *") 498print (" * on files with these headers:") 499print (" *") 500for h in headers: 501 for l in h: 502 print (" * %s" % (l.strip())) 503print (" */") 504print () 505print ('#include "hb.hh"') 506print () 507print ('#ifndef HB_NO_OT_SHAPE') 508print () 509print ('#include "hb-ot-shaper-indic.hh"') 510print () 511print ('#pragma GCC diagnostic push') 512print ('#pragma GCC diagnostic ignored "-Wunused-macros"') 513print () 514 515# Print categories 516for shaper in categories: 517 print ('#include "hb-ot-shaper-%s-machine.hh"' % shaper) 518print () 519done = {} 520for shaper, shaper_cats in categories.items(): 521 print ('/* %s */' % shaper) 522 for cat in shaper_cats: 523 v = shaper[0].upper() 524 if cat not in done: 525 print ("#define OT_%s %s_Cat(%s)" % (cat, v, cat)) 526 done[cat] = v 527 else: 528 print ('static_assert (OT_%s == %s_Cat(%s), "");' % (cat, v, cat)) 529print () 530 531# Shorten values 532short = [{ 533 "Repha": 'Rf', 534 "PLACEHOLDER": 'GB', 535 "DOTTEDCIRCLE": 'DC', 536 "SMPst": 'SP', 537 "VPst": 'VR', 538 "VPre": 'VL', 539 "Robatic": 'Rt', 540 "Xgroup": 'Xg', 541 "Ygroup": 'Yg', 542 "As": 'As', 543},{ 544 "END": 'X', 545 "BASE_C": 'C', 546 "ABOVE_C": 'T', 547 "BELOW_C": 'B', 548 "POST_C": 'R', 549 "PRE_C": 'L', 550 "PRE_M": 'LM', 551 "AFTER_MAIN": 'A', 552 "AFTER_SUB": 'AS', 553 "BEFORE_SUB": 'BS', 554 "AFTER_POST": 'AP', 555 "SMVD": 'SM', 556}] 557all_shorts = [{},{}] 558 559# Add some of the values, to make them more readable, and to avoid duplicates 560 561for i in range (2): 562 for v,s in short[i].items (): 563 all_shorts[i][s] = v 564 565what = ["OT", "POS"] 566what_short = ["_OT", "_POS"] 567cat_defs = [] 568for i in range (2): 569 vv = sorted (values[i].keys ()) 570 for v in vv: 571 v_no_and = v.replace ('_And_', '_') 572 if v in short[i]: 573 s = short[i][v] 574 else: 575 s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')]) 576 if s in all_shorts[i]: 577 raise Exception ("Duplicate short value alias", v, all_shorts[i][s]) 578 all_shorts[i][s] = v 579 short[i][v] = s 580 cat_defs.append ((what_short[i] + '_' + s, what[i] + '_' + (v.upper () if i else v), str (values[i][v]), v)) 581 582maxlen_s = max ([len (c[0]) for c in cat_defs]) 583maxlen_l = max ([len (c[1]) for c in cat_defs]) 584maxlen_n = max ([len (c[2]) for c in cat_defs]) 585for s in what_short: 586 print () 587 for c in [c for c in cat_defs if s in c[0]]: 588 print ("#define %s %s /* %s chars; %s */" % 589 (c[0].ljust (maxlen_s), c[1].ljust (maxlen_l), c[2].rjust (maxlen_n), c[3])) 590print () 591print ('#pragma GCC diagnostic pop') 592print () 593print ("#define INDIC_COMBINE_CATEGORIES(S,M) ((S) | ((M) << 8))") 594print () 595print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (%s_##S, %s_##M)" % tuple(what_short)) 596print () 597print () 598 599total = 0 600used = 0 601last_block = None 602def print_block (block, start, end, data): 603 global total, used, last_block 604 if block and block != last_block: 605 print () 606 print () 607 print (" /* %s */" % block) 608 num = 0 609 assert start % 8 == 0 610 assert (end+1) % 8 == 0 611 for u in range (start, end+1): 612 if u % 8 == 0: 613 print () 614 print (" /* %04X */" % u, end="") 615 if u in data: 616 num += 1 617 d = data.get (u, defaults) 618 print ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])), end="") 619 620 total += end - start + 1 621 used += num 622 if block: 623 last_block = block 624 625uu = sorted (indic_data) 626 627last = -100000 628num = 0 629offset = 0 630starts = [] 631ends = [] 632print ("static const uint16_t indic_table[] = {") 633for u in uu: 634 if u <= last: 635 continue 636 block = indic_data[u][2] 637 638 start = u//8*8 639 end = start+1 640 while end in uu and block == indic_data[end][2]: 641 end += 1 642 end = (end-1)//8*8 + 7 643 644 if start != last + 1: 645 if start - last <= 1+16*2: 646 print_block (None, last+1, start-1, indic_data) 647 else: 648 if last >= 0: 649 ends.append (last + 1) 650 offset += ends[-1] - starts[-1] 651 print () 652 print () 653 print ("#define indic_offset_0x%04xu %d" % (start, offset)) 654 starts.append (start) 655 656 print_block (block, start, end, indic_data) 657 last = end 658ends.append (last + 1) 659offset += ends[-1] - starts[-1] 660print () 661print () 662occupancy = used * 100. / total 663page_bits = 12 664print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)) 665print () 666print ("uint16_t") 667print ("hb_indic_get_categories (hb_codepoint_t u)") 668print ("{") 669print (" switch (u >> %d)" % page_bits) 670print (" {") 671pages = set ([u>>page_bits for u in starts+ends+list (singles.keys ())]) 672for p in sorted(pages): 673 print (" case 0x%0Xu:" % p) 674 for u,d in singles.items (): 675 if p != u>>page_bits: continue 676 print (" if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])) 677 for (start,end) in zip (starts, ends): 678 if p not in [start>>page_bits, end>>page_bits]: continue 679 offset = "indic_offset_0x%04xu" % start 680 print (" if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)) 681 print (" break;") 682 print ("") 683print (" default:") 684print (" break;") 685print (" }") 686print (" return _(X,X);") 687print ("}") 688print () 689print ("#undef _") 690print ("#undef INDIC_COMBINE_CATEGORIES") 691for i in range (2): 692 print () 693 vv = sorted (values[i].keys ()) 694 for v in vv: 695 print ("#undef %s_%s" % 696 (what_short[i], short[i][v])) 697print () 698print ('#endif') 699print () 700print ("/* == End of generated table == */") 701 702# Maintain at least 50% occupancy in the table */ 703if occupancy < 50: 704 raise Exception ("Table too sparse, please investigate: ", occupancy) 705