• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2
3"""usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt
4
5Input files:
6* https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt
7* https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt
8* https://unicode.org/Public/UCD/latest/ucd/Blocks.txt
9"""
10
11import sys
12
13if len (sys.argv) != 4:
14	sys.exit (__doc__)
15
16ALLOWED_SINGLES = [0x00A0, 0x25CC]
17ALLOWED_BLOCKS = [
18	'Basic Latin',
19	'Latin-1 Supplement',
20	'Devanagari',
21	'Bengali',
22	'Gurmukhi',
23	'Gujarati',
24	'Oriya',
25	'Tamil',
26	'Telugu',
27	'Kannada',
28	'Malayalam',
29	'Myanmar',
30	'Khmer',
31	'Vedic Extensions',
32	'General Punctuation',
33	'Superscripts and Subscripts',
34	'Devanagari Extended',
35	'Myanmar Extended-B',
36	'Myanmar Extended-A',
37	'Myanmar Extended-C',
38]
39
40files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
41
42headers = [[f.readline () for i in range (2)] for f in files]
43
44unicode_data = [{} for _ in files]
45for i, f in enumerate (files):
46	for line in f:
47
48		j = line.find ('#')
49		if j >= 0:
50			line = line[:j]
51
52		fields = [x.strip () for x in line.split (';')]
53		if len (fields) == 1:
54			continue
55
56		uu = fields[0].split ('..')
57		start = int (uu[0], 16)
58		if len (uu) == 1:
59			end = start
60		else:
61			end = int (uu[1], 16)
62
63		t = fields[1]
64
65		for u in range (start, end + 1):
66			unicode_data[i][u] = t
67
68# Merge data into one dict:
69defaults = ('Other', 'Not_Applicable', 'No_Block')
70combined = {}
71for i,d in enumerate (unicode_data):
72	for u,v in d.items ():
73		if i == 2 and not u in combined:
74			continue
75		if not u in combined:
76			combined[u] = list (defaults)
77		combined[u][i] = v
78combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
79
80
81# Convert categories & positions types
82
83categories = {
84  'indic' : [
85    'X',
86    'C',
87    'V',
88    'N',
89    'H',
90    'ZWNJ',
91    'ZWJ',
92    'M',
93    'SM',
94    'A',
95    'VD',
96    'PLACEHOLDER',
97    'DOTTEDCIRCLE',
98    'RS',
99    'MPst',
100    'Repha',
101    'Ra',
102    'CM',
103    'Symbol',
104    'CS',
105    'SMPst',
106  ],
107  'khmer' : [
108    'VAbv',
109    'VBlw',
110    'VPre',
111    'VPst',
112
113    'Robatic',
114    'Xgroup',
115    'Ygroup',
116  ],
117  'myanmar' : [
118    'VAbv',
119    'VBlw',
120    'VPre',
121    'VPst',
122
123    'IV',
124    'As',
125    'DB',
126    'GB',
127    'MH',
128    'MR',
129    'MW',
130    'MY',
131    'PT',
132    'VS',
133    'ML',
134  ],
135}
136
137category_map = {
138  'Other'			: 'X',
139  'Avagraha'			: 'Symbol',
140  'Bindu'			: 'SM',
141  'Brahmi_Joining_Number'	: 'PLACEHOLDER', # Don't care.
142  'Cantillation_Mark'		: 'A',
143  'Consonant'			: 'C',
144  'Consonant_Dead'		: 'C',
145  'Consonant_Final'		: 'CM',
146  'Consonant_Head_Letter'	: 'C',
147  'Consonant_Initial_Postfixed'	: 'C', # TODO
148  'Consonant_Killer'		: 'M', # U+17CD only.
149  'Consonant_Medial'		: 'CM',
150  'Consonant_Placeholder'	: 'PLACEHOLDER',
151  'Consonant_Preceding_Repha'	: 'Repha',
152  'Consonant_Prefixed'		: 'X', # Don't care.
153  'Consonant_Subjoined'		: 'CM',
154  'Consonant_Succeeding_Repha'	: 'CM',
155  'Consonant_With_Stacker'	: 'CS',
156  'Gemination_Mark'		: 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552
157  'Invisible_Stacker'		: 'H',
158  'Joiner'			: 'ZWJ',
159  'Modifying_Letter'		: 'X',
160  'Non_Joiner'			: 'ZWNJ',
161  'Nukta'			: 'N',
162  'Number'			: 'PLACEHOLDER',
163  'Number_Joiner'		: 'PLACEHOLDER', # Don't care.
164  'Pure_Killer'			: 'M', # Is like a vowel matra.
165  'Register_Shifter'		: 'RS',
166  'Syllable_Modifier'		: 'SM',
167  'Tone_Letter'			: 'X',
168  'Tone_Mark'			: 'N',
169  'Virama'			: 'H',
170  'Visarga'			: 'SM',
171  'Vowel'			: 'V',
172  'Vowel_Dependent'		: 'M',
173  'Vowel_Independent'		: 'V',
174}
175position_map = {
176  'Not_Applicable'		: 'END',
177
178  'Left'			: 'PRE_C',
179  'Top'				: 'ABOVE_C',
180  'Bottom'			: 'BELOW_C',
181  'Right'			: 'POST_C',
182
183  # These should resolve to the position of the last part of the split sequence.
184  'Bottom_And_Right'		: 'POST_C',
185  'Left_And_Right'		: 'POST_C',
186  'Top_And_Bottom'		: 'BELOW_C',
187  'Top_And_Bottom_And_Left'	: 'BELOW_C',
188  'Top_And_Bottom_And_Right'	: 'POST_C',
189  'Top_And_Left'		: 'ABOVE_C',
190  'Top_And_Left_And_Right'	: 'POST_C',
191  'Top_And_Right'		: 'POST_C',
192
193  'Overstruck'			: 'AFTER_MAIN',
194  'Visual_order_left'		: 'PRE_M',
195}
196
197category_overrides = {
198
199  # These are the variation-selectors. They only appear in the Myanmar grammar
200  # but are not Myanmar-specific
201  0xFE00: 'VS',
202  0xFE01: 'VS',
203  0xFE02: 'VS',
204  0xFE03: 'VS',
205  0xFE04: 'VS',
206  0xFE05: 'VS',
207  0xFE06: 'VS',
208  0xFE07: 'VS',
209  0xFE08: 'VS',
210  0xFE09: 'VS',
211  0xFE0A: 'VS',
212  0xFE0B: 'VS',
213  0xFE0C: 'VS',
214  0xFE0D: 'VS',
215  0xFE0E: 'VS',
216  0xFE0F: 'VS',
217
218  # These appear in the OT Myanmar spec, but are not Myanmar-specific
219  0x2015: 'PLACEHOLDER',
220  0x2022: 'PLACEHOLDER',
221  0x25FB: 'PLACEHOLDER',
222  0x25FC: 'PLACEHOLDER',
223  0x25FD: 'PLACEHOLDER',
224  0x25FE: 'PLACEHOLDER',
225
226
227  # Indic
228
229  0x0930: 'Ra', # Devanagari
230  0x09B0: 'Ra', # Bengali
231  0x09F0: 'Ra', # Bengali
232  0x0A30: 'Ra', # Gurmukhi 	No Reph
233  0x0AB0: 'Ra', # Gujarati
234  0x0B30: 'Ra', # Oriya
235  0x0BB0: 'Ra', # Tamil 	No Reph
236  0x0C30: 'Ra', # Telugu 	Reph formed only with ZWJ
237  0x0CB0: 'Ra', # Kannada
238  0x0D30: 'Ra', # Malayalam 	No Reph, Logical Repha
239
240  # The following act more like the Bindus.
241  0x0953: 'SM',
242  0x0954: 'SM',
243
244  # U+0A40 GURMUKHI VOWEL SIGN II may be preceded by U+0A02 GURMUKHI SIGN BINDI.
245  0x0A40: 'MPst',
246
247  # The following act like consonants.
248  0x0A72: 'C',
249  0x0A73: 'C',
250  0x1CF5: 'C',
251  0x1CF6: 'C',
252
253  # TODO: The following should only be allowed after a Visarga.
254  # For now, just treat them like regular tone marks.
255  0x1CE2: 'A',
256  0x1CE3: 'A',
257  0x1CE4: 'A',
258  0x1CE5: 'A',
259  0x1CE6: 'A',
260  0x1CE7: 'A',
261  0x1CE8: 'A',
262
263  # TODO: The following should only be allowed after some of
264  # the nasalization marks, maybe only for U+1CE9..U+1CF1.
265  # For now, just treat them like tone marks.
266  0x1CED: 'A',
267
268  # The following take marks in standalone clusters, similar to Avagraha.
269  0xA8F2: 'Symbol',
270  0xA8F3: 'Symbol',
271  0xA8F4: 'Symbol',
272  0xA8F5: 'Symbol',
273  0xA8F6: 'Symbol',
274  0xA8F7: 'Symbol',
275  0x1CE9: 'Symbol',
276  0x1CEA: 'Symbol',
277  0x1CEB: 'Symbol',
278  0x1CEC: 'Symbol',
279  0x1CEE: 'Symbol',
280  0x1CEF: 'Symbol',
281  0x1CF0: 'Symbol',
282  0x1CF1: 'Symbol',
283
284  0x0A51: 'M', # https://github.com/harfbuzz/harfbuzz/issues/524
285
286  # According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
287  # so the Indic shaper needs to know their categories.
288  0x11301: 'SM',
289  0x11302: 'SM',
290  0x11303: 'SM',
291  0x1133B: 'N',
292  0x1133C: 'N',
293
294  0x0AFB: 'N', # https://github.com/harfbuzz/harfbuzz/issues/552
295  0x0B55: 'N', # https://github.com/harfbuzz/harfbuzz/issues/2849
296
297  0x09FC: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/1613
298  0x0C80: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/623
299  0x0D04: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/3511
300
301  0x25CC: 'DOTTEDCIRCLE',
302
303
304  # Khmer
305
306  0x179A: 'Ra',
307
308  0x17CC: 'Robatic',
309  0x17C9: 'Robatic',
310  0x17CA: 'Robatic',
311
312  0x17C6: 'Xgroup',
313  0x17CB: 'Xgroup',
314  0x17CD: 'Xgroup',
315  0x17CE: 'Xgroup',
316  0x17CF: 'Xgroup',
317  0x17D0: 'Xgroup',
318  0x17D1: 'Xgroup',
319
320  0x17C7: 'Ygroup',
321  0x17C8: 'Ygroup',
322  0x17DD: 'Ygroup',
323  0x17D3: 'Ygroup', # Just guessing. Uniscribe doesn't categorize it.
324
325  0x17D9: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/issues/2384
326
327
328  # Myanmar
329
330  # https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze
331
332  0x104E: 'C', # The spec says C, IndicSyllableCategory says Consonant_Placeholder
333
334  0x1004: 'Ra',
335  0x101B: 'Ra',
336  0x105A: 'Ra',
337
338  0x1032: 'A',
339  0x1036: 'A',
340
341  0x103A: 'As',
342
343  #0x1040: 'D0', # XXX The spec says D0, but Uniscribe doesn't seem to do.
344
345  0x103E: 'MH',
346  0x1060: 'ML',
347  0x103C: 'MR',
348  0x103D: 'MW',
349  0x1082: 'MW',
350  0x103B: 'MY',
351  0x105E: 'MY',
352  0x105F: 'MY',
353
354  0x1063: 'PT',
355  0x1064: 'PT',
356  0x1069: 'PT',
357  0x106A: 'PT',
358  0x106B: 'PT',
359  0x106C: 'PT',
360  0x106D: 'PT',
361  0xAA7B: 'PT',
362
363  0x1038: 'SM',
364  0x1087: 'SM',
365  0x1088: 'SM',
366  0x1089: 'SM',
367  0x108A: 'SM',
368  0x108B: 'SM',
369  0x108C: 'SM',
370  0x108D: 'SM',
371  0x108F: 'SM',
372  0x109A: 'SM',
373  0x109B: 'SM',
374  0x109C: 'SM',
375
376  0x104A: 'PLACEHOLDER',
377}
378position_overrides = {
379
380  0x0A51: 'BELOW_C', # https://github.com/harfbuzz/harfbuzz/issues/524
381
382  0x0B01: 'BEFORE_SUB', # Oriya Bindu is BeforeSub in the spec.
383}
384
385def matra_pos_left(u, block):
386  return "PRE_M"
387def matra_pos_right(u, block):
388  if block == 'Devanagari':	return  'AFTER_SUB'
389  if block == 'Bengali':	return  'AFTER_POST'
390  if block == 'Gurmukhi':	return  'AFTER_POST'
391  if block == 'Gujarati':	return  'AFTER_POST'
392  if block == 'Oriya':		return  'AFTER_POST'
393  if block == 'Tamil':		return  'AFTER_POST'
394  if block == 'Telugu':		return  'BEFORE_SUB' if u <= 0x0C42 else 'AFTER_SUB'
395  if block == 'Kannada':	return  'BEFORE_SUB' if u < 0x0CC3 or u > 0x0CD6 else 'AFTER_SUB'
396  if block == 'Malayalam':	return  'AFTER_POST'
397  return 'AFTER_SUB'
398def matra_pos_top(u, block):
399  # BENG and MLYM don't have top matras.
400  if block == 'Devanagari':	return  'AFTER_SUB'
401  if block == 'Gurmukhi':	return  'AFTER_POST' # Deviate from spec
402  if block == 'Gujarati':	return  'AFTER_SUB'
403  if block == 'Oriya':		return  'AFTER_MAIN'
404  if block == 'Tamil':		return  'AFTER_SUB'
405  if block == 'Telugu':		return  'BEFORE_SUB'
406  if block == 'Kannada':	return  'BEFORE_SUB'
407  return 'AFTER_SUB'
408def matra_pos_bottom(u, block):
409  if block == 'Devanagari':	return  'AFTER_SUB'
410  if block == 'Bengali':	return  'AFTER_SUB'
411  if block == 'Gurmukhi':	return  'AFTER_POST'
412  if block == 'Gujarati':	return  'AFTER_POST'
413  if block == 'Oriya':		return  'AFTER_SUB'
414  if block == 'Tamil':		return  'AFTER_POST'
415  if block == 'Telugu':		return  'BEFORE_SUB'
416  if block == 'Kannada':	return  'BEFORE_SUB'
417  if block == 'Malayalam':	return  'AFTER_POST'
418  return "AFTER_SUB"
419def indic_matra_position(u, pos, block): # Reposition matra
420  if pos == 'PRE_C':	return matra_pos_left(u, block)
421  if pos == 'POST_C':	return matra_pos_right(u, block)
422  if pos == 'ABOVE_C':	return matra_pos_top(u, block)
423  if pos == 'BELOW_C':	return matra_pos_bottom(u, block)
424  assert (False)
425
426def position_to_category(pos):
427  if pos == 'PRE_C':	return 'VPre'
428  if pos == 'ABOVE_C':	return 'VAbv'
429  if pos == 'BELOW_C':	return 'VBlw'
430  if pos == 'POST_C':	return 'VPst'
431  assert(False)
432
433
434defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2])
435
436indic_data = {}
437for k, (cat, pos, block) in combined.items():
438  cat = category_map[cat]
439  if cat == 'SM' and pos == 'Not_Applicable':
440    cat = 'SMPst'
441  pos = position_map[pos]
442  indic_data[k] = (cat, pos, block)
443
444for k,new_cat in category_overrides.items():
445  (cat, pos, _) = indic_data.get(k, defaults)
446  indic_data[k] = (new_cat, pos, unicode_data[2][k])
447
448# We only expect position for certain types
449positioned_categories = ('CM', 'SM', 'RS', 'H', 'M', 'MPst')
450for k, (cat, pos, block) in indic_data.items():
451  if cat not in positioned_categories:
452    pos = 'END'
453    indic_data[k] = (cat, pos, block)
454
455# Position overrides are more complicated
456
457# Keep in sync with CONSONANT_FLAGS in the shaper
458consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE')
459matra_categories = ('M', 'MPst')
460smvd_categories = ('SM', 'SMPst', 'VD', 'A', 'Symbol')
461for k, (cat, pos, block) in indic_data.items():
462  if cat in consonant_categories:
463    pos = 'BASE_C'
464  elif cat in matra_categories:
465    if block.startswith('Khmer') or block.startswith('Myanmar'):
466      cat = position_to_category(pos)
467    else:
468      pos = indic_matra_position(k, pos, block)
469  elif cat in smvd_categories:
470    pos = 'SMVD';
471  indic_data[k] = (cat, pos, block)
472
473for k,new_pos in position_overrides.items():
474  (cat, pos, _) = indic_data.get(k, defaults)
475  indic_data[k] = (cat, new_pos, unicode_data[2][k])
476
477
478values = [{_: 1} for _ in defaults]
479for vv in indic_data.values():
480  for i,v in enumerate(vv):
481    values[i][v] = values[i].get (v, 0) + 1
482
483
484
485
486# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
487singles = {}
488for u in ALLOWED_SINGLES:
489	singles[u] = indic_data[u]
490	del indic_data[u]
491
492print ("/* == Start of generated table == */")
493print ("/*")
494print (" * The following table is generated by running:")
495print (" *")
496print (" *   ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt")
497print (" *")
498print (" * on files with these headers:")
499print (" *")
500for h in headers:
501	for l in h:
502		print (" * %s" % (l.strip()))
503print (" */")
504print ()
505print ('#include "hb.hh"')
506print ()
507print ('#ifndef HB_NO_OT_SHAPE')
508print ()
509print ('#include "hb-ot-shaper-indic.hh"')
510print ()
511print ('#pragma GCC diagnostic push')
512print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
513print ()
514
515# Print categories
516for shaper in categories:
517  print ('#include "hb-ot-shaper-%s-machine.hh"' % shaper)
518print ()
519done = {}
520for shaper, shaper_cats in categories.items():
521  print ('/* %s */' % shaper)
522  for cat in shaper_cats:
523    v = shaper[0].upper()
524    if cat not in done:
525      print ("#define OT_%s %s_Cat(%s)" % (cat, v, cat))
526      done[cat] = v
527    else:
528      print ('static_assert (OT_%s == %s_Cat(%s), "");' % (cat, v, cat))
529print ()
530
531# Shorten values
532short = [{
533	"Repha":		'Rf',
534	"PLACEHOLDER":		'GB',
535	"DOTTEDCIRCLE":		'DC',
536	"SMPst":		'SP',
537	"VPst":			'VR',
538	"VPre":			'VL',
539	"Robatic":		'Rt',
540	"Xgroup":		'Xg',
541	"Ygroup":		'Yg',
542	"As":			'As',
543},{
544	"END":			'X',
545	"BASE_C":		'C',
546	"ABOVE_C":		'T',
547	"BELOW_C":		'B',
548	"POST_C":		'R',
549	"PRE_C":		'L',
550	"PRE_M":		'LM',
551	"AFTER_MAIN":		'A',
552	"AFTER_SUB":		'AS',
553	"BEFORE_SUB":		'BS',
554	"AFTER_POST":		'AP',
555	"SMVD":			'SM',
556}]
557all_shorts = [{},{}]
558
559# Add some of the values, to make them more readable, and to avoid duplicates
560
561for i in range (2):
562	for v,s in short[i].items ():
563		all_shorts[i][s] = v
564
565what = ["OT", "POS"]
566what_short = ["_OT", "_POS"]
567cat_defs = []
568for i in range (2):
569	vv = sorted (values[i].keys ())
570	for v in vv:
571		v_no_and = v.replace ('_And_', '_')
572		if v in short[i]:
573			s = short[i][v]
574		else:
575			s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
576			if s in all_shorts[i]:
577				raise Exception ("Duplicate short value alias", v, all_shorts[i][s])
578			all_shorts[i][s] = v
579			short[i][v] = s
580		cat_defs.append ((what_short[i] + '_' + s, what[i] + '_' + (v.upper () if i else v), str (values[i][v]), v))
581
582maxlen_s = max ([len (c[0]) for c in cat_defs])
583maxlen_l = max ([len (c[1]) for c in cat_defs])
584maxlen_n = max ([len (c[2]) for c in cat_defs])
585for s in what_short:
586	print ()
587	for c in [c for c in cat_defs if s in c[0]]:
588		print ("#define %s %s /* %s chars; %s */" %
589			(c[0].ljust (maxlen_s), c[1].ljust (maxlen_l), c[2].rjust (maxlen_n), c[3]))
590print ()
591print ('#pragma GCC diagnostic pop')
592print ()
593print ("#define INDIC_COMBINE_CATEGORIES(S,M) ((S) | ((M) << 8))")
594print ()
595print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (%s_##S, %s_##M)" % tuple(what_short))
596print ()
597print ()
598
599total = 0
600used = 0
601last_block = None
602def print_block (block, start, end, data):
603	global total, used, last_block
604	if block and block != last_block:
605		print ()
606		print ()
607		print ("  /* %s */" % block)
608	num = 0
609	assert start % 8 == 0
610	assert (end+1) % 8 == 0
611	for u in range (start, end+1):
612		if u % 8 == 0:
613			print ()
614			print ("  /* %04X */" % u, end="")
615		if u in data:
616			num += 1
617		d = data.get (u, defaults)
618		print ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])), end="")
619
620	total += end - start + 1
621	used += num
622	if block:
623		last_block = block
624
625uu = sorted (indic_data)
626
627last = -100000
628num = 0
629offset = 0
630starts = []
631ends = []
632print ("static const uint16_t indic_table[] = {")
633for u in uu:
634	if u <= last:
635		continue
636	block = indic_data[u][2]
637
638	start = u//8*8
639	end = start+1
640	while end in uu and block == indic_data[end][2]:
641		end += 1
642	end = (end-1)//8*8 + 7
643
644	if start != last + 1:
645		if start - last <= 1+16*2:
646			print_block (None, last+1, start-1, indic_data)
647		else:
648			if last >= 0:
649				ends.append (last + 1)
650				offset += ends[-1] - starts[-1]
651			print ()
652			print ()
653			print ("#define indic_offset_0x%04xu %d" % (start, offset))
654			starts.append (start)
655
656	print_block (block, start, end, indic_data)
657	last = end
658ends.append (last + 1)
659offset += ends[-1] - starts[-1]
660print ()
661print ()
662occupancy = used * 100. / total
663page_bits = 12
664print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
665print ()
666print ("uint16_t")
667print ("hb_indic_get_categories (hb_codepoint_t u)")
668print ("{")
669print ("  switch (u >> %d)" % page_bits)
670print ("  {")
671pages = set ([u>>page_bits for u in starts+ends+list (singles.keys ())])
672for p in sorted(pages):
673	print ("    case 0x%0Xu:" % p)
674	for u,d in singles.items ():
675		if p != u>>page_bits: continue
676		print ("      if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]]))
677	for (start,end) in zip (starts, ends):
678		if p not in [start>>page_bits, end>>page_bits]: continue
679		offset = "indic_offset_0x%04xu" % start
680		print ("      if (hb_in_range<hb_codepoint_t> (u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset))
681	print ("      break;")
682	print ("")
683print ("    default:")
684print ("      break;")
685print ("  }")
686print ("  return _(X,X);")
687print ("}")
688print ()
689print ("#undef _")
690print ("#undef INDIC_COMBINE_CATEGORIES")
691for i in range (2):
692	print ()
693	vv = sorted (values[i].keys ())
694	for v in vv:
695		print ("#undef %s_%s" %
696			(what_short[i], short[i][v]))
697print ()
698print ('#endif')
699print ()
700print ("/* == End of generated table == */")
701
702# Maintain at least 50% occupancy in the table */
703if occupancy < 50:
704	raise Exception ("Table too sparse, please investigate: ", occupancy)
705