• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Override values For Indic_Syllabic_Category
2# Not derivable
3# Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
4# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
5# Updated for Unicode 12.1 by Andrew Glass 2019-05-24
6# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
7# Updated for Unicode 14.0 by Andrew Glass 2021-09-25
8# Updated for Unicode 15.0 by Andrew Glass 2022-09-16
9
10# ================================================
11# OVERRIDES TO ASSIGNED VALUES
12# ================================================
13
14# Indic_Syllabic_Category=Bindu
15193A          ; Bindu  # Mn       LIMBU SIGN KEMPHRENG
16AA29          ; Bindu  # Mn       CHAM VOWEL SIGN AA
1710A0D         ; Bindu  # Mn       KHAROSHTHI SIGN DOUBLE RING BELOW
18
19# ================================================
20
21# Indic_Syllabic_Category=Consonant
2219C1..19C7    ; Consonant # Lo   [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant
2325CC          ; Consonant # So       DOTTED CIRCLE #Reassigned to allow it to cluster as a generic base
24
25# ================================================
26
27# Indic_Syllabic_Category=Consonant_Dead
280F7F          ; Consonant_Dead    # Mc       TIBETAN SIGN RNAM BCAD # reassigned so that visarga can form an independent cluster, but see #19
29
30# ================================================
31
32# Indic_Syllabic_Category=Consonant_Final_Modifier
331C36          ; Consonant_Final_Modifier  # Mn   LEPCHA SIGN RAN
34
35# ================================================
36
37# Indic_Syllabic_Category=Gemination_Mark
3811134         ; Gemination_Mark  # Mc      CHAKMA MAAYYAA
39
40# ================================================
41
42# Indic_Syllabic_Category=Nukta
430F71          ; Nukta            # Mn       TIBETAN VOWEL SIGN AA # Reassigned to get this before an above vowel, but see #22
441BF2..1BF3    ; Nukta            # Mc   [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20
45
46# ================================================
47
48# Indic_Syllabic_Category=Tone_Mark
491A7B..1A7C    ; Tone_Mark         # Mn   [2] TAI THAM SIGN MAI SAM..TAI THAM SIGN KHUEN-LUE KARAN
501A7F          ; Tone_Mark         # Mn       TAI THAM COMBINING CRYPTOGRAMMIC DOT
51
52# ================================================
53
54# Indic_Syllabic_Category=Vowel_Independent
55AAB1          ; Vowel_Independent # Lo       TAI VIET VOWEL AA
56AABA          ; Vowel_Independent # Lo       TAI VIET VOWEL UA
57AABD          ; Vowel_Independent # Lo       TAI VIET VOWEL AN
58
59# ================================================
60# ================================================
61# VALUES NOT ASSIGNED IN Indic_Syllabic_Category
62# ================================================
63# ================================================
64
65# Indic_Syllabic_Category=Consonant
660800..0815    ; Consonant # Lo   [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
670840..0858    ; Consonant # Lo   [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
680F00..0F01    ; Consonant # Lo    [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED
690F04..0F06    ; Consonant # Po        TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA
701800          ; Consonant # Po        MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work
711807          ; Consonant # Po        MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER
72180A          ; Consonant # Po        MONGOLIAN NIRUGU
731820..1878    ; Consonant # Lo   [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS
741843          ; Consonant # Lm        MONGOLIAN LETTER TODO LONG VOWEL SIGN
752D30..2D67    ; Consonant # Lo   [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO
762D6F          ; Consonant # Lm        TIFINAGH MODIFIER LETTER LABIALIZATION MARK
7710570..1057A  ; Consonant # Lo   [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA
781057C..1058A  ; Consonant # Lo   [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE
791058C..10592  ; Consonant # Lo    [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
8010594..10595  ; Consonant # Lo    [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
8110597..105A1  ; Consonant # Lo   [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA
82105A3..105B1  ; Consonant # Lo   [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
83105B3..105B9  ; Consonant # Lo    [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
84105BB..105BC  ; Consonant # Lo    [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
8510AC0..10AC7  ; Consonant # Lo    [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW
8610AC9..10AE4  ; Consonant # Lo   [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW
8710D00..10D23  ; Consonant # Lo   [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
8810E80..10EA9  ; Consonant # Lo   [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
8910EB0..10EB1  ; Consonant # Lo    [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
9010F30..10F45  ; Consonant # Lo   [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
9110F70..10F81  ; Consonant # Lo   [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH
92111DA         ; Consonant # Lo        SHARADA EKAM
93#HIEROGLYPHS to be moved to new category
9413000..1342F  ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
95#For the Begin and End segment to be handled fully correctly, the cluster model needs to be modified.
9613437..13438  ; Consonant # Lo    [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT
9713441..13446  ; Consonant # Lo    [6] EGYPTIAN HIEROGLYPH FULL BLANK..HIEROGLYPH WIDE LOST SIGN
9816B00..16B2F  ; Consonant # Lo   [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
9916F00..16F4A  ; Consonant # Lo   [75] MIAO LETTER PA..MIAO LETTER RTE
10016FE4         ; Consonant # Mn        KHITAN SMALL SCRIPT FILLER          # Avoids Mn pushing this into VOWEL class
10118B00..18CD5  ; Consonant # Lo  [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
1021BC00..1BC6A  ; Consonant # Lo  [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
1031BC70..1BC7C  ; Consonant # Lo   [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1041BC80..1BC88  ; Consonant # Lo    [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
1051BC90..1BC99  ; Consonant # Lo   [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
1061E100..1E12C  ; Consonant # Lo   [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
1071E137..1E13D  ; Consonant # Lm    [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
1081E14E         ; Consonant # Lo        NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ
1091E14F         ; Consonant # So        NYIAKENG PUACHUE HMONG CIRCLED CA
1101E290..1E2AD  ; Consonant # Lo   [30] TOTO LETTER PA..TOTO LETTER A
1111E2C0..1E2EB  ; Consonant # Lo   [44] WANCHO LETTER AA..WANCHO LETTER YIH
1121E4D0..1E4EA  ; Consonant # Lo   [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
1131E4EB         ; Consonant # Lm        NAG MUNDARI SIGN OJOD
1141E900..1E921  ; Consonant # Lu   [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
1151E922..1E943  ; Consonant # Ll   [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
1161E94B         ; Consonant # Lm        ADLAM NASALIZATION MARK
117
118# ================================================
119
120# Indic_Syllabic_Category=Consonant_Placeholder
1211880..1884 ; Consonant_Placeholder # Lo   [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
122
123# ================================================
124
125# Indic_Syllabic_Category=Gemination_Mark
12610D27         ; Gemination_Mark   # Mn       HANIFI ROHINGYA SIGN TASSI
127
128# ================================================
129
130# Indic_Syllabic_Category=Modifying_Letter
131FE00..FE0F    ; Modifying_Letter  # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16# Need to treat them as isolated bases so they don't merge with a cluster in invalid scenarios
13216F50         ; Modifying_Letter  # Lo       MIAO LETTER NASALIZATION
133
134# ================================================
135
136# Indic_Syllabic_Category=Nukta
1370859..085B    ; Nukta            # Mn   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
1380F39          ; Nukta            # Mn       TIBETAN MARK TSA -PHRU # NOW IN UNICODE 10.0
1391885..1886    ; Nukta            # Mn   [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
14018A9          ; Nukta            # Mn       MONGOLIAN LETTER ALI GALI DAGALGA
14110AE5..10AE6  ; Nukta            # Mn   [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
14216F4F         ; Nukta            # Mn       MIAO SIGN CONSONANT MODIFIER BAR
1431BC9D..1BC9E  ; Nukta            # Mn   [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1441E944..1E94A  ; Nukta            # Mn   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
14510F82..10F85  ; Nukta            # Mn   [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
146
147# ================================================
148
149# Indic_Syllabic_Category=Number
15010D30..10D39  ; Number              # Nd  [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
15110F51..10F54  ; Number              # No   [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
15216AC0..16AC9  ; Number              # Nd  [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
1531E140..1E149  ; Number              # Nd  [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
1541E2F0..1E2F9  ; Number              # Nd  [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
1551E4F0..1E4F9  ; Number              # Nd  [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
1561E950..1E959  ; Number              # Nd  [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
157
158# ================================================
159
160# Indic_Syllabic_Category=Tone_Mark
16107EB..07F3    ; Tone_Mark           # Mn   [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
16207FD          ; Tone_Mark           # Mn       NKO DANTAYALAN
1630F86..0F87    ; Tone_Mark           # Mn   [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
16417CF          ; Tone_Mark           # Mn       KHMER SIGN AHSDA
16510D24..10D26  ; Tone_Mark           # Mn   [3] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TANA
16610F46..10F50  ; Tone_Mark           # Mn  [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
16716B30..16B36  ; Tone_Mark           # Mn   [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
16816F8F..16F92  ; Tone_Mark           # Mn   [4] MIAO TONE RIGHT..MIAO TONE BELOW
1691E130..1E136  ; Tone_Mark           # Mn   [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
1701E2AE         ; Tone_Mark           # Mn       TOTO SIGN RISING TONE
1711E2EC..1E2EF  ; Tone_Mark           # Mn   [4] WANCHO TONE TUP..WANCHO TONE KOINI
172
173# ================================================
174
175# Indic_Syllabic_Category=Virama
1762D7F          ; Virama              # Mn       TIFINAGH CONSONANT JOINER
177#HIEROGLYPHS to be moved to new category
17813430..13436  ; Virama              # Cf   [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
17913439..1343B  ; Virama              # Cf   [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM
180
181# ================================================
182
183# Indic_Syllabic_Category=Vowel_Independent
184AAB1          ; Vowel_Independent   # Lo       TAI VIET VOWEL AA
185AABA          ; Vowel_Independent   # Lo       TAI VIET VOWEL UA
186AABD          ; Vowel_Independent   # Lo       TAI VIET VOWEL AN
187
188# ================================================
189
190# Indic_Syllabic_Category=Vowel_Dependent
1910B55          ; Vowel_Dependent     # Mn       ORIYA SIGN OVERLINE
19210EAB..10EAC  ; Vowel_Dependent     # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
19316F51..16F87  ; Vowel_Dependent     # Mc  [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
1941E4EC..1E4EF  ; Vowel_Dependent     # Mn   [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
195
196# ================================================
197
198# Indic_Syllabic_Category=Cantillation_Mark
199
2001CF8..1CF9    ; Cantillation_Mark   # Mn   [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
201#HIEROGLYPHS to be moved to new category
20213440         ; Cantillation_Mark   # Mn       EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
20313447..13455  ; Cantillation_Mark   # Mn  [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
204
205# ================================================
206
207# Indic_Syllabic_Category=Symbol_Modifier
2081B6B..1B73    ; Symbol_Modifier     # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
209
210# ================================================
211# ================================================
212# PROPERTIES NOT ASSIGNED IN Indic_Syllabic_Category
213# ================================================
214# ================================================
215
216# USE, Extended_Syllabic_Category=Hieroglyph
217# 13000..1342F ; Hieroglyph          # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
218# 13441..13446 ; Hieroglyph          # Lo    [6] EGYPTIAN HIEROGLYPH FULL BLANK..HIEROGLYPH WIDE LOST SIGN
219
220# ================================================
221
222# USE, Extended_Syllabic_Category=Hieroglyph_Joiner
223# 13430..13436 ; Hieroglyph_Joiner   # Cf    [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
224# 13439..1343B ; Hieroglyph_Joiner   # Cf    [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM
225
226# ================================================
227
228# USE, Extended_Syllabic_Category=Hieroglyph_Mark_Begin
229# 005B        ; Hieroglyph_Mark_Begin  # Ps  LEFT SQUARE BRACKET
230# 007B        ; Hieroglyph_Mark_Begin  # Ps  LEFT CURLY BRACKET
231# 27E6        ; Hieroglyph_Mark_Begin  # Ps  MATHEMATICAL LEFT WHITE SQUARE BRACKET
232# 27E8        ; Hieroglyph_Mark_Begin  # Ps  MATHEMATICAL LEFT ANGLE BRACKET
233# 2E22        ; Hieroglyph_Mark_Begin  # Ps  TOP LEFT HALF BRACKET
234# 2E24        ; Hieroglyph_Mark_Begin  # Ps  BOTTOM LEFT HALF BRACKET
235
236# ================================================
237
238# USE, Extended_Syllabic_Category=Hieroglyph_Mark_End
239# 005D        ; Hieroglyph_Mark_Begin  # Pe  RIGHT SQUARE BRACKET
240# 007D        ; Hieroglyph_Mark_Begin  # Pe  RIGHT CURLY BRACKET
241# 27E7        ; Hieroglyph_Mark_Begin  # Pe  MATHEMATICAL RIGHT WHITE SQUARE BRACKET
242# 27E9        ; Hieroglyph_Mark_Begin  # Pe  MATHEMATICAL RIGHT ANGLE BRACKET
243# 2E23        ; Hieroglyph_Mark_Begin  # Pe  TOP RIGHT HALF BRACKET
244# 2E25        ; Hieroglyph_Mark_Begin  # Pe  BOTTOM RIGHT HALF BRACKET
245
246# ================================================
247
248# USE, Extended_Syllabic_Category=Hieroglyph_Segment_Begin
249# 13437        ; Hieroglyph_Segment_Begin  # Cf  EGYPTIAN HIEROGLYPH BEGIN SEGMENT
250
251# ================================================
252
253# USE, Extended_Syllabic_Category=Hieroglyph_Segment_End
254# 13438        ; Hieroglyph_Segment_End    # Cf  EGYPTIAN HIEROGLYPH END SEGMENT
255
256# ================================================
257
258# USE, Extended_Syllabic_Category=Hieroglyph_Mirror
259# 13440        ; Hieroglyph_Mirror    # Mn       EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
260
261# ================================================
262
263# USE, Extended_Syllabic_Category=Hieroglyph_Modifier
264# 13447..13455 ; Hieroglyph_Modifier    # Mn  [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
265
266# ================================================
267
268# eof
269