• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2
3"""usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt
4
5Input files:
6* https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt
7* https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt
8* https://unicode.org/Public/UCD/latest/ucd/Blocks.txt
9"""
10
11import sys
12
13if len (sys.argv) != 4:
14	sys.exit (__doc__)
15
16ALLOWED_SINGLES = [0x00A0, 0x25CC]
17ALLOWED_BLOCKS = [
18	'Basic Latin',
19	'Latin-1 Supplement',
20	'Devanagari',
21	'Bengali',
22	'Gurmukhi',
23	'Gujarati',
24	'Oriya',
25	'Tamil',
26	'Telugu',
27	'Kannada',
28	'Malayalam',
29	'Myanmar',
30	'Khmer',
31	'Vedic Extensions',
32	'General Punctuation',
33	'Superscripts and Subscripts',
34	'Devanagari Extended',
35	'Myanmar Extended-B',
36	'Myanmar Extended-A',
37]
38
39files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
40
41headers = [[f.readline () for i in range (2)] for f in files]
42
43unicode_data = [{} for _ in files]
44for i, f in enumerate (files):
45	for line in f:
46
47		j = line.find ('#')
48		if j >= 0:
49			line = line[:j]
50
51		fields = [x.strip () for x in line.split (';')]
52		if len (fields) == 1:
53			continue
54
55		uu = fields[0].split ('..')
56		start = int (uu[0], 16)
57		if len (uu) == 1:
58			end = start
59		else:
60			end = int (uu[1], 16)
61
62		t = fields[1]
63
64		for u in range (start, end + 1):
65			unicode_data[i][u] = t
66
67# Merge data into one dict:
68defaults = ('Other', 'Not_Applicable', 'No_Block')
69combined = {}
70for i,d in enumerate (unicode_data):
71	for u,v in d.items ():
72		if i == 2 and not u in combined:
73			continue
74		if not u in combined:
75			combined[u] = list (defaults)
76		combined[u][i] = v
77combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
78
79
80# Convert categories & positions types
81
82categories = {
83  'indic' : [
84    'X',
85    'C',
86    'V',
87    'N',
88    'H',
89    'ZWNJ',
90    'ZWJ',
91    'M',
92    'SM',
93    'A',
94    'VD',
95    'PLACEHOLDER',
96    'DOTTEDCIRCLE',
97    'RS',
98    'MPst',
99    'Repha',
100    'Ra',
101    'CM',
102    'Symbol',
103    'CS',
104  ],
105  'khmer' : [
106    'VAbv',
107    'VBlw',
108    'VPre',
109    'VPst',
110
111    'Robatic',
112    'Xgroup',
113    'Ygroup',
114  ],
115  'myanmar' : [
116    'VAbv',
117    'VBlw',
118    'VPre',
119    'VPst',
120
121    'IV',
122    'As',
123    'DB',
124    'GB',
125    'MH',
126    'MR',
127    'MW',
128    'MY',
129    'PT',
130    'VS',
131    'ML',
132  ],
133}
134
135category_map = {
136  'Other'			: 'X',
137  'Avagraha'			: 'Symbol',
138  'Bindu'			: 'SM',
139  'Brahmi_Joining_Number'	: 'PLACEHOLDER', # Don't care.
140  'Cantillation_Mark'		: 'A',
141  'Consonant'			: 'C',
142  'Consonant_Dead'		: 'C',
143  'Consonant_Final'		: 'CM',
144  'Consonant_Head_Letter'	: 'C',
145  'Consonant_Initial_Postfixed'	: 'C', # TODO
146  'Consonant_Killer'		: 'M', # U+17CD only.
147  'Consonant_Medial'		: 'CM',
148  'Consonant_Placeholder'	: 'PLACEHOLDER',
149  'Consonant_Preceding_Repha'	: 'Repha',
150  'Consonant_Prefixed'		: 'X', # Don't care.
151  'Consonant_Subjoined'		: 'CM',
152  'Consonant_Succeeding_Repha'	: 'CM',
153  'Consonant_With_Stacker'	: 'CS',
154  'Gemination_Mark'		: 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552
155  'Invisible_Stacker'		: 'H',
156  'Joiner'			: 'ZWJ',
157  'Modifying_Letter'		: 'X',
158  'Non_Joiner'			: 'ZWNJ',
159  'Nukta'			: 'N',
160  'Number'			: 'PLACEHOLDER',
161  'Number_Joiner'		: 'PLACEHOLDER', # Don't care.
162  'Pure_Killer'			: 'M', # Is like a vowel matra.
163  'Register_Shifter'		: 'RS',
164  'Syllable_Modifier'		: 'SM',
165  'Tone_Letter'			: 'X',
166  'Tone_Mark'			: 'N',
167  'Virama'			: 'H',
168  'Visarga'			: 'SM',
169  'Vowel'			: 'V',
170  'Vowel_Dependent'		: 'M',
171  'Vowel_Independent'		: 'V',
172}
173position_map = {
174  'Not_Applicable'		: 'END',
175
176  'Left'			: 'PRE_C',
177  'Top'				: 'ABOVE_C',
178  'Bottom'			: 'BELOW_C',
179  'Right'			: 'POST_C',
180
181  # These should resolve to the position of the last part of the split sequence.
182  'Bottom_And_Right'		: 'POST_C',
183  'Left_And_Right'		: 'POST_C',
184  'Top_And_Bottom'		: 'BELOW_C',
185  'Top_And_Bottom_And_Left'	: 'BELOW_C',
186  'Top_And_Bottom_And_Right'	: 'POST_C',
187  'Top_And_Left'		: 'ABOVE_C',
188  'Top_And_Left_And_Right'	: 'POST_C',
189  'Top_And_Right'		: 'POST_C',
190
191  'Overstruck'			: 'AFTER_MAIN',
192  'Visual_order_left'		: 'PRE_M',
193}
194
195category_overrides = {
196
197  # These are the variation-selectors. They only appear in the Myanmar grammar
198  # but are not Myanmar-specific
199  0xFE00: 'VS',
200  0xFE01: 'VS',
201  0xFE02: 'VS',
202  0xFE03: 'VS',
203  0xFE04: 'VS',
204  0xFE05: 'VS',
205  0xFE06: 'VS',
206  0xFE07: 'VS',
207  0xFE08: 'VS',
208  0xFE09: 'VS',
209  0xFE0A: 'VS',
210  0xFE0B: 'VS',
211  0xFE0C: 'VS',
212  0xFE0D: 'VS',
213  0xFE0E: 'VS',
214  0xFE0F: 'VS',
215
216  # These appear in the OT Myanmar spec, but are not Myanmar-specific
217  0x2015: 'PLACEHOLDER',
218  0x2022: 'PLACEHOLDER',
219  0x25FB: 'PLACEHOLDER',
220  0x25FC: 'PLACEHOLDER',
221  0x25FD: 'PLACEHOLDER',
222  0x25FE: 'PLACEHOLDER',
223
224
225  # Indic
226
227  0x0930: 'Ra', # Devanagari
228  0x09B0: 'Ra', # Bengali
229  0x09F0: 'Ra', # Bengali
230  0x0A30: 'Ra', # Gurmukhi 	No Reph
231  0x0AB0: 'Ra', # Gujarati
232  0x0B30: 'Ra', # Oriya
233  0x0BB0: 'Ra', # Tamil 	No Reph
234  0x0C30: 'Ra', # Telugu 	Reph formed only with ZWJ
235  0x0CB0: 'Ra', # Kannada
236  0x0D30: 'Ra', # Malayalam 	No Reph, Logical Repha
237
238  # The following act more like the Bindus.
239  0x0953: 'SM',
240  0x0954: 'SM',
241
242  # U+0A40 GURMUKHI VOWEL SIGN II may be preceded by U+0A02 GURMUKHI SIGN BINDI.
243  0x0A40: 'MPst',
244
245  # The following act like consonants.
246  0x0A72: 'C',
247  0x0A73: 'C',
248  0x1CF5: 'C',
249  0x1CF6: 'C',
250
251  # TODO: The following should only be allowed after a Visarga.
252  # For now, just treat them like regular tone marks.
253  0x1CE2: 'A',
254  0x1CE3: 'A',
255  0x1CE4: 'A',
256  0x1CE5: 'A',
257  0x1CE6: 'A',
258  0x1CE7: 'A',
259  0x1CE8: 'A',
260
261  # TODO: The following should only be allowed after some of
262  # the nasalization marks, maybe only for U+1CE9..U+1CF1.
263  # For now, just treat them like tone marks.
264  0x1CED: 'A',
265
266  # The following take marks in standalone clusters, similar to Avagraha.
267  0xA8F2: 'Symbol',
268  0xA8F3: 'Symbol',
269  0xA8F4: 'Symbol',
270  0xA8F5: 'Symbol',
271  0xA8F6: 'Symbol',
272  0xA8F7: 'Symbol',
273  0x1CE9: 'Symbol',
274  0x1CEA: 'Symbol',
275  0x1CEB: 'Symbol',
276  0x1CEC: 'Symbol',
277  0x1CEE: 'Symbol',
278  0x1CEF: 'Symbol',
279  0x1CF0: 'Symbol',
280  0x1CF1: 'Symbol',
281
282  0x0A51: 'M', # https://github.com/harfbuzz/harfbuzz/issues/524
283
284  # According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
285  # so the Indic shaper needs to know their categories.
286  0x11301: 'SM',
287  0x11302: 'SM',
288  0x11303: 'SM',
289  0x1133B: 'N',
290  0x1133C: 'N',
291
292  0x0AFB: 'N', # https://github.com/harfbuzz/harfbuzz/issues/552
293  0x0B55: 'N', # https://github.com/harfbuzz/harfbuzz/issues/2849
294
295  0x09FC: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/1613
296  0x0C80: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/623
297  0x0D04: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/3511
298
299  0x25CC: 'DOTTEDCIRCLE',
300
301
302  # Khmer
303
304  0x179A: 'Ra',
305
306  0x17CC: 'Robatic',
307  0x17C9: 'Robatic',
308  0x17CA: 'Robatic',
309
310  0x17C6: 'Xgroup',
311  0x17CB: 'Xgroup',
312  0x17CD: 'Xgroup',
313  0x17CE: 'Xgroup',
314  0x17CF: 'Xgroup',
315  0x17D0: 'Xgroup',
316  0x17D1: 'Xgroup',
317
318  0x17C7: 'Ygroup',
319  0x17C8: 'Ygroup',
320  0x17DD: 'Ygroup',
321  0x17D3: 'Ygroup', # Just guessing. Uniscribe doesn't categorize it.
322
323  0x17D9: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/issues/2384
324
325
326  # Myanmar
327
328  # https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze
329
330  0x104E: 'C', # The spec says C, IndicSyllableCategory says Consonant_Placeholder
331
332  0x1004: 'Ra',
333  0x101B: 'Ra',
334  0x105A: 'Ra',
335
336  0x1032: 'A',
337  0x1036: 'A',
338
339  0x103A: 'As',
340
341  #0x1040: 'D0', # XXX The spec says D0, but Uniscribe doesn't seem to do.
342
343  0x103E: 'MH',
344  0x1060: 'ML',
345  0x103C: 'MR',
346  0x103D: 'MW',
347  0x1082: 'MW',
348  0x103B: 'MY',
349  0x105E: 'MY',
350  0x105F: 'MY',
351
352  0x1063: 'PT',
353  0x1064: 'PT',
354  0x1069: 'PT',
355  0x106A: 'PT',
356  0x106B: 'PT',
357  0x106C: 'PT',
358  0x106D: 'PT',
359  0xAA7B: 'PT',
360
361  0x1038: 'SM',
362  0x1087: 'SM',
363  0x1088: 'SM',
364  0x1089: 'SM',
365  0x108A: 'SM',
366  0x108B: 'SM',
367  0x108C: 'SM',
368  0x108D: 'SM',
369  0x108F: 'SM',
370  0x109A: 'SM',
371  0x109B: 'SM',
372  0x109C: 'SM',
373
374  0x104A: 'PLACEHOLDER',
375}
376position_overrides = {
377
378  0x0A51: 'BELOW_C', # https://github.com/harfbuzz/harfbuzz/issues/524
379
380  0x0B01: 'BEFORE_SUB', # Oriya Bindu is BeforeSub in the spec.
381}
382
383def matra_pos_left(u, block):
384  return "PRE_M"
385def matra_pos_right(u, block):
386  if block == 'Devanagari':	return  'AFTER_SUB'
387  if block == 'Bengali':	return  'AFTER_POST'
388  if block == 'Gurmukhi':	return  'AFTER_POST'
389  if block == 'Gujarati':	return  'AFTER_POST'
390  if block == 'Oriya':		return  'AFTER_POST'
391  if block == 'Tamil':		return  'AFTER_POST'
392  if block == 'Telugu':		return  'BEFORE_SUB' if u <= 0x0C42 else 'AFTER_SUB'
393  if block == 'Kannada':	return  'BEFORE_SUB' if u < 0x0CC3 or u > 0x0CD6 else 'AFTER_SUB'
394  if block == 'Malayalam':	return  'AFTER_POST'
395  return 'AFTER_SUB'
396def matra_pos_top(u, block):
397  # BENG and MLYM don't have top matras.
398  if block == 'Devanagari':	return  'AFTER_SUB'
399  if block == 'Gurmukhi':	return  'AFTER_POST' # Deviate from spec
400  if block == 'Gujarati':	return  'AFTER_SUB'
401  if block == 'Oriya':		return  'AFTER_MAIN'
402  if block == 'Tamil':		return  'AFTER_SUB'
403  if block == 'Telugu':		return  'BEFORE_SUB'
404  if block == 'Kannada':	return  'BEFORE_SUB'
405  return 'AFTER_SUB'
406def matra_pos_bottom(u, block):
407  if block == 'Devanagari':	return  'AFTER_SUB'
408  if block == 'Bengali':	return  'AFTER_SUB'
409  if block == 'Gurmukhi':	return  'AFTER_POST'
410  if block == 'Gujarati':	return  'AFTER_POST'
411  if block == 'Oriya':		return  'AFTER_SUB'
412  if block == 'Tamil':		return  'AFTER_POST'
413  if block == 'Telugu':		return  'BEFORE_SUB'
414  if block == 'Kannada':	return  'BEFORE_SUB'
415  if block == 'Malayalam':	return  'AFTER_POST'
416  return "AFTER_SUB"
417def indic_matra_position(u, pos, block): # Reposition matra
418  if pos == 'PRE_C':	return matra_pos_left(u, block)
419  if pos == 'POST_C':	return matra_pos_right(u, block)
420  if pos == 'ABOVE_C':	return matra_pos_top(u, block)
421  if pos == 'BELOW_C':	return matra_pos_bottom(u, block)
422  assert (False)
423
424def position_to_category(pos):
425  if pos == 'PRE_C':	return 'VPre'
426  if pos == 'ABOVE_C':	return 'VAbv'
427  if pos == 'BELOW_C':	return 'VBlw'
428  if pos == 'POST_C':	return 'VPst'
429  assert(False)
430
431
432defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2])
433
434indic_data = {}
435for k, (cat, pos, block) in combined.items():
436  cat = category_map[cat]
437  pos = position_map[pos]
438  indic_data[k] = (cat, pos, block)
439
440for k,new_cat in category_overrides.items():
441  (cat, pos, _) = indic_data.get(k, defaults)
442  indic_data[k] = (new_cat, pos, unicode_data[2][k])
443
444# We only expect position for certain types
445positioned_categories = ('CM', 'SM', 'RS', 'H', 'M', 'MPst')
446for k, (cat, pos, block) in indic_data.items():
447  if cat not in positioned_categories:
448    pos = 'END'
449    indic_data[k] = (cat, pos, block)
450
451# Position overrides are more complicated
452
453# Keep in sync with CONSONANT_FLAGS in the shaper
454consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE')
455matra_categories = ('M', 'MPst')
456smvd_categories = ('SM', 'VD', 'A', 'Symbol')
457for k, (cat, pos, block) in indic_data.items():
458  if cat in consonant_categories:
459    pos = 'BASE_C'
460  elif cat in matra_categories:
461    if block.startswith('Khmer') or block.startswith('Myanmar'):
462      cat = position_to_category(pos)
463    else:
464      pos = indic_matra_position(k, pos, block)
465  elif cat in smvd_categories:
466    pos = 'SMVD';
467  indic_data[k] = (cat, pos, block)
468
469for k,new_pos in position_overrides.items():
470  (cat, pos, _) = indic_data.get(k, defaults)
471  indic_data[k] = (cat, new_pos, unicode_data[2][k])
472
473
474values = [{_: 1} for _ in defaults]
475for vv in indic_data.values():
476  for i,v in enumerate(vv):
477    values[i][v] = values[i].get (v, 0) + 1
478
479
480
481
482# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
483singles = {}
484for u in ALLOWED_SINGLES:
485	singles[u] = indic_data[u]
486	del indic_data[u]
487
488print ("/* == Start of generated table == */")
489print ("/*")
490print (" * The following table is generated by running:")
491print (" *")
492print (" *   ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt")
493print (" *")
494print (" * on files with these headers:")
495print (" *")
496for h in headers:
497	for l in h:
498		print (" * %s" % (l.strip()))
499print (" */")
500print ()
501print ('#include "hb.hh"')
502print ()
503print ('#ifndef HB_NO_OT_SHAPE')
504print ()
505print ('#include "hb-ot-shaper-indic.hh"')
506print ()
507print ('#pragma GCC diagnostic push')
508print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
509print ()
510
511# Print categories
512for shaper in categories:
513  print ('#include "hb-ot-shaper-%s-machine.hh"' % shaper)
514print ()
515done = {}
516for shaper, shaper_cats in categories.items():
517  print ('/* %s */' % shaper)
518  for cat in shaper_cats:
519    v = shaper[0].upper()
520    if cat not in done:
521      print ("#define OT_%s %s_Cat(%s)" % (cat, v, cat))
522      done[cat] = v
523    else:
524      print ('static_assert (OT_%s == %s_Cat(%s), "");' % (cat, v, cat))
525print ()
526
527# Shorten values
528short = [{
529	"Repha":		'Rf',
530	"PLACEHOLDER":		'GB',
531	"DOTTEDCIRCLE":		'DC',
532	"VPst":			'VR',
533	"VPre":			'VL',
534	"Robatic":		'Rt',
535	"Xgroup":		'Xg',
536	"Ygroup":		'Yg',
537	"As":			'As',
538},{
539	"END":			'X',
540	"BASE_C":		'C',
541	"ABOVE_C":		'T',
542	"BELOW_C":		'B',
543	"POST_C":		'R',
544	"PRE_C":		'L',
545	"PRE_M":		'LM',
546	"AFTER_MAIN":		'A',
547	"AFTER_SUB":		'AS',
548	"BEFORE_SUB":		'BS',
549	"AFTER_POST":		'AP',
550	"SMVD":			'SM',
551}]
552all_shorts = [{},{}]
553
554# Add some of the values, to make them more readable, and to avoid duplicates
555
556for i in range (2):
557	for v,s in short[i].items ():
558		all_shorts[i][s] = v
559
560what = ["OT", "POS"]
561what_short = ["_OT", "_POS"]
562cat_defs = []
563for i in range (2):
564	vv = sorted (values[i].keys ())
565	for v in vv:
566		v_no_and = v.replace ('_And_', '_')
567		if v in short[i]:
568			s = short[i][v]
569		else:
570			s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
571			if s in all_shorts[i]:
572				raise Exception ("Duplicate short value alias", v, all_shorts[i][s])
573			all_shorts[i][s] = v
574			short[i][v] = s
575		cat_defs.append ((what_short[i] + '_' + s, what[i] + '_' + (v.upper () if i else v), str (values[i][v]), v))
576
577maxlen_s = max ([len (c[0]) for c in cat_defs])
578maxlen_l = max ([len (c[1]) for c in cat_defs])
579maxlen_n = max ([len (c[2]) for c in cat_defs])
580for s in what_short:
581	print ()
582	for c in [c for c in cat_defs if s in c[0]]:
583		print ("#define %s %s /* %s chars; %s */" %
584			(c[0].ljust (maxlen_s), c[1].ljust (maxlen_l), c[2].rjust (maxlen_n), c[3]))
585print ()
586print ('#pragma GCC diagnostic pop')
587print ()
588print ("#define INDIC_COMBINE_CATEGORIES(S,M) ((S) | ((M) << 8))")
589print ()
590print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (%s_##S, %s_##M)" % tuple(what_short))
591print ()
592print ()
593
594total = 0
595used = 0
596last_block = None
597def print_block (block, start, end, data):
598	global total, used, last_block
599	if block and block != last_block:
600		print ()
601		print ()
602		print ("  /* %s */" % block)
603	num = 0
604	assert start % 8 == 0
605	assert (end+1) % 8 == 0
606	for u in range (start, end+1):
607		if u % 8 == 0:
608			print ()
609			print ("  /* %04X */" % u, end="")
610		if u in data:
611			num += 1
612		d = data.get (u, defaults)
613		print ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])), end="")
614
615	total += end - start + 1
616	used += num
617	if block:
618		last_block = block
619
620uu = sorted (indic_data)
621
622last = -100000
623num = 0
624offset = 0
625starts = []
626ends = []
627print ("static const uint16_t indic_table[] = {")
628for u in uu:
629	if u <= last:
630		continue
631	block = indic_data[u][2]
632
633	start = u//8*8
634	end = start+1
635	while end in uu and block == indic_data[end][2]:
636		end += 1
637	end = (end-1)//8*8 + 7
638
639	if start != last + 1:
640		if start - last <= 1+16*2:
641			print_block (None, last+1, start-1, indic_data)
642		else:
643			if last >= 0:
644				ends.append (last + 1)
645				offset += ends[-1] - starts[-1]
646			print ()
647			print ()
648			print ("#define indic_offset_0x%04xu %d" % (start, offset))
649			starts.append (start)
650
651	print_block (block, start, end, indic_data)
652	last = end
653ends.append (last + 1)
654offset += ends[-1] - starts[-1]
655print ()
656print ()
657occupancy = used * 100. / total
658page_bits = 12
659print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
660print ()
661print ("uint16_t")
662print ("hb_indic_get_categories (hb_codepoint_t u)")
663print ("{")
664print ("  switch (u >> %d)" % page_bits)
665print ("  {")
666pages = set ([u>>page_bits for u in starts+ends+list (singles.keys ())])
667for p in sorted(pages):
668	print ("    case 0x%0Xu:" % p)
669	for u,d in singles.items ():
670		if p != u>>page_bits: continue
671		print ("      if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]]))
672	for (start,end) in zip (starts, ends):
673		if p not in [start>>page_bits, end>>page_bits]: continue
674		offset = "indic_offset_0x%04xu" % start
675		print ("      if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset))
676	print ("      break;")
677	print ("")
678print ("    default:")
679print ("      break;")
680print ("  }")
681print ("  return _(X,X);")
682print ("}")
683print ()
684print ("#undef _")
685print ("#undef INDIC_COMBINE_CATEGORIES")
686for i in range (2):
687	print ()
688	vv = sorted (values[i].keys ())
689	for v in vv:
690		print ("#undef %s_%s" %
691			(what_short[i], short[i][v]))
692print ()
693print ('#endif')
694print ()
695print ("/* == End of generated table == */")
696
697# Maintain at least 50% occupancy in the table */
698if occupancy < 50:
699	raise Exception ("Table too sparse, please investigate: ", occupancy)
700