• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
4# File: Latin_InterIndic.txt
5# Generated from CLDR
6#
7
8# Latin-InterIndic
9#:: NFD;
10#\u0E00 reserved
11#consonants
12$chandrabindu=\uE001;
13$anusvara=\uE002;
14$visarga=\uE003;
15#\u0E004 reserved
16# w←vowel→ represents the stand-alone form
17$wa=\uE005;
18$waa=\uE006;
19$wi=\uE007;
20$wii=\uE008;
21$wu=\uE009;
22$wuu=\uE00A;
23$wr=\uE00B;
24$wl=\uE00C;
25$wce=\uE00D; # LETTER CANDRA E
26$wse=\uE00E; # LETTER SHORT E
27$we=\uE00F;  # ए LETTER E
28$wai=\uE010;
29$wco=\uE011; # LETTER CANDRA O
30$wso=\uE012; # LETTER SHORT O
31$wo=\uE013;  # ओ LETTER O
32$wau=\uE014;
33$ka=\uE015;
34$kha=\uE016;
35$ga=\uE017;
36$gha=\uE018;
37$nga=\uE019;
38$ca=\uE01A;
39$cha=\uE01B;
40$ja=\uE01C;
41$jha=\uE01D;
42$nya=\uE01E;
43$tta=\uE01F;
44$ttha=\uE020;
45$dda=\uE021;
46$ddha=\uE022;
47$nna=\uE023;
48$ta=\uE024;
49$tha=\uE025;
50$da=\uE026;
51$dha=\uE027;
52$na=\uE028;
53$ena=\uE029; #compatibility
54$pa=\uE02A;
55$pha=\uE02B;
56$ba=\uE02C;
57$bha=\uE02D;
58$ma=\uE02E;
59$ya=\uE02F;
60$ra=\uE030;
61$rra=\uE031;
62$la=\uE032;
63$lla=\uE033;
64$ela=\uE034; #compatibility
65$va=\uE035;
66$vva=\uE081;
67$sha=\uE036;
68$ssa=\uE037;
69$sa=\uE038;
70$ha=\uE039;
71#\u093A Reserved
72#\u093B Reserved
73$nukta=\uE03C;
74$avagraha=\uE03D; # SIGN AVAGRAHA
75# ←vowel→ represents the dependent form
76$aa=\uE03E;
77$i=\uE03F;
78$ii=\uE040;
79$u=\uE041;
80$uu=\uE042;
81$rh=\uE043;
82$rrh=\uE044;
83$ce=\uE045; #VOWEL SIGN CANDRA E
84$se=\uE046; #VOWEL SIGN SHORT E
85$e=\uE047;
86$ai=\uE048;
87$co=\uE049; # VOWEL SIGN CANDRA O
88$so=\uE04A; # VOWEL SIGN SHORT O
89$o=\uE04B;  # ो
90$au=\uE04C;
91$virama=\uE04D;
92# \u094E Reserved
93# \u094F Reserved
94$om = \uE050; # OM
95# \u0951→;        # UNMAPPED STRESS SIGN UDATTA
96# \u0952→;        # UNMAPPED STRESS SIGN ANUDATTA
97# \u0953→;        # UNMAPPED GRAVE ACCENT
98# \u0954→;        # UNMAPPED ACUTE ACCENT
99$lm = \uE055;#  Telugu Length Mark
100$ailm=\uE056;#  AI Length Mark
101$aulm=\uE057;#  AU Length Mark
102#urdu compatibity forms
103$uka=\uE058;
104$ukha=\uE059;
105$ugha=\uE05A;
106$ujha=\uE05B;
107$uddha=\uE05C;
108$udha=\uE05D;
109$ufa=\uE05E;
110$uya=\uE05F;
111$wrr=\uE060;
112$wll=\uE061;
113$lh=\uE062;
114$llh=\uE063;
115$danda=\uE064;
116$doubleDanda=\uE065;
117$zero=\uE066;     # DIGIT ZERO
118$one=\uE067;      # DIGIT ONE
119$two=\uE068;      # DIGIT TWO
120$three=\uE069;    # DIGIT THREE
121$four=\uE06A;     # DIGIT FOUR
122$five=\uE06B;     # DIGIT FIVE
123$six=\uE06C;      # DIGIT SIX
124$seven=\uE06D;    # DIGIT SEVEN
125$eight=\uE06E;    # DIGIT EIGHT
126$nine=\uE06F;     # DIGIT NINE
127$dgs=\uE082;
128# For all other scripts
129$ecp0=\uE070;
130$ecp1=\uE071;
131$ecp2=\uE072;
132$ecp3=\uE073;
133$ecp4=\uE074;
134$ecp5=\uE075;
135$ecp6=\uE076;
136$ecp7=\uE077;
137$ecp8=\uE078;
138$ecp9=\uE079;
139$ecpA=\uE07A;
140$ecpB=\uE07B;
141$ecpC=\uE07C;
142$ecpD=\uE07D;
143$ecpE=\uE07E;
144$ecpF=\uE07F;
145# Khanda-ta
146$kta=\uE083;
147# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
148$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
149$depVowelBelow=[\uE041-\uE044];
150$endThing=[$danda$doubleDanda];
151# $x was originally called '§'; $z was '%'
152$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
153$z=[bcdfghjklmnpqrstvwxyz];
154$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
155\u0315 → $avagraha;
156\u0303→$chandrabindu$anusvara;
157m\u0310→$chandrabindu;
158h\u0323→$visarga;
159x→$ka$virama$sa;
160# convert to independent forms at start of word or syllable:
161# dependent forms for roundtrip
162\u0314a\u0304→$aa;
163\u0314ai→$ai;
164\u0314au→$au;
165\u0314ii→$ii;
166\u0314i\u0304→$ii;
167\u0314i→$i;
168\u0314u\u0304→$uu;
169\u0314u→$u;
170\u0314r\u0325\u0304→$rrh;
171\u0314r\u0325→$rh;
172\u0314l\u0325\u0304→$llh;
173\u0314lh→$lh;
174\u0314l\u0325→$lh;
175\u0314e\u0304→$e;
176\u0314o\u0304→$o;
177\u0314a→;
178\u0314e\u0306→$ce;
179\u0314o\u0306→$co;
180\u0314e→$se;
181\u0314o→$so;
182# preceeded by consonants
183$consonants{ a\u0304→$aa;
184$consonants{ ai→$ai;
185$consonants{ au→$au;
186$consonants{ ii→$ii;
187$consonants{ i\u0304→$ii;
188$consonants{ i→$i;
189$consonants{ u\u0304→$uu;
190$consonants{ u→$u;
191$consonants{ r\u0325\u0304→$rrh;
192$consonants{ r\u0325a→$rh;
193$consonants{ r\u0325→$rh;
194$consonants{ l\u0325\u0304→$llh;
195$consonants{ lh→$lh;
196$consonants{ l\u0325→$lh;
197$consonants{ e\u0304→$e;
198$consonants{ o\u0304→$o;
199$consonants{ e\u0306→$ce;
200$consonants{ o\u0306→$co;
201$consonants{ e→$se;
202$consonants{ o→$so;
203# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
204a\u0304→$waa;
205ai→$wai;
206au→$wau;
207i\u0304→$wii;
208i→$wi;
209u\u0304→$wuu;
210u→$wu;
211r\u0325\u0304→$wrr;
212r\u0325→$wr;
213l\u0325\u0304→$wll;
214lh→$wl;
215l\u0325→$wl;
216e\u0304→$we;
217o\u0304→$wo;
218a→$wa;
219e\u0306→$wce;
220o\u0306→$wco;
221e→$wse;
222''om→$om;
223o→$wso;
224# rules for anusvara
225n}r\u0325           → $na|$virama;
226n}l\u0325           → $na|$virama;
227n}na                → $na|$virama;
228n\u0307}[kg]        → $anusvara;
229n\u0307}n\u0307     → $anusvara;
230n\u0304}[cj]        → $anusvara;
231n\u0304}n\u0303     → $anusvara;
232n\u0323}[tdn]\u0323 → $anusvara;
233n}[tdn]             → $anusvara;
234m}[pbm]             → $anusvara;
235n}[ylvshr]          → $anusvara;
236m\u0307             → $anusvara;
237#urdu compatibility
238q→$uka|$virama;
239k\u0331h\u0331→$ukha |$virama;
240g\u0307→ $ugha | $virama;
241z → $ujha |$virama;
242f → $ufa|$virama;
243t\u0331→$kta;
244# dev
245y\u0307→$uya|$virama;
246l\u0331→$ela|$virama;
247n\u0331→$ena|$virama;
248n\u0307→$nga|$virama;
249n\u0303→$nya|$virama;
250n\u0323→$nna|$virama;
251t\u0323h→$ttha|$virama;
252t\u0323→$tta|$virama;
253r\u0323h→$udha|$virama;
254r\u0323→$uddha|$virama;
255d\u0323h→$ddha|$virama;
256d\u0323→$dda|$virama;
257kh→$kha|$virama;
258k→$ka|$virama;
259gh→$gha|$virama;
260g→$ga|$virama;
261ch→$cha|$virama;
262c→$ca|$virama;
263jh→$jha|$virama;
264j→$ja|$virama;
265ny→$nya|$virama;
266tth→$ttha|$virama;
267ddh→$ddha|$virama;
268th→$tha|$virama;
269t→$ta|$virama;
270dh→$dha|$virama;
271d→$da|$virama;
272n→$na|$virama;
273ph→$pha|$virama;
274p→$pa|$virama;
275bh→$bha|$virama;
276b→$ba|$virama;
277m→$ma|$virama;
278y→$ya|$virama;
279r\u0331→$rra|$virama;
280r→$ra|$virama;
281l\u0323→$lla|$virama;
282l→$la|$virama;
283v→$va|$virama;
284w\u0307→$vva|$virama;
285w→$va|$virama;
286sh→$sha|$virama;
287ss→$ssa|$virama;
288s\u0323→$ssa|$virama;
289s\u0301→$sha|$virama;
290s→$sa|$virama;
291h→$ha|$virama;
292'.'→$danda;
293$danda'.'→$doubleDanda;
294$depVowelAbove{'~'→$anusvara;
295$depVowelBelow{'~'→$chandrabindu;
296# convert to dependent forms after consonant with no vowel:
297# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
298#$virama aa→$aa;
299$virama a\u0304→$aa;
300$virama ai→$ai;
301$virama au→$au;
302$virama ii→$ii;
303$virama i\u0304→$ii;
304$virama i→$i;
305#$virama uu→$uu;
306$virama u\u0304→$uu;
307$virama u→$u;
308#$virama rrh→$rrh;
309$virama r\u0325\u0304→$rrh;
310#$virama rh→$rh;
311$virama r\u0325a→$rh;
312$virama r\u0325→$rh;
313$virama l\u0325\u0304→$llh;
314$virama lh→$lh;
315$virama l\u0325→$lh;
316$virama e\u0304→$e;
317$virama o\u0304→$o;
318$virama a→;
319$virama e\u0306→$ce;
320$virama o\u0306→$co;
321$virama e→$se;
322$virama o→$so;
323# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
324#$virama''aa→$waa;
325$virama''a\u0304→$waa;
326$virama''ai→$wai;
327$virama''au→$wau;
328#$virama''ii→$wii;
329$virama''i\u0304→$wii;
330$virama''i→$wi;
331#$virama''uu→$wuu;
332$virama''u\u0304→$wuu;
333$virama''u→$wu;
334#$virama''rrh→$wrr;
335$virama''r\u0325\u0304→$wrr;
336#$virama''rh→$wr;
337$virama''r\u0325→$wr;
338$virama''l\u0325\u0304→$wll;
339#$virama''lh→$wl;
340$virama''l\u0325→$wl;
341$virama''e\u0304→$we;
342$virama''o\u0304→$wo;
343$virama''a→$wa;
344$virama''e\u0306→$wce;
345$virama''o\u0306→$wco;
346$virama''e→$wse;
347$virama''o→$wso;
348# no virama
349''a\u0304→$waa;
350''ai→$wai;
351''au→$wau;
352''i\u0304→$wii;
353''i→$wi;
354''u\u0304→$wuu;
355''u→$wu;
356''r\u0325\u0304→$wrr;
357''r\u0325→$wr;
358''l\u0325\u0304→$wll;
359''l\u0325→$wl;
360''e\u0304→$we;
361''o\u0304→$wo;
362''a→$wa;
363''e\u0306→$wce;
364''o\u0306→$wco;
365''e→$wse;
366''o→$wso;
367$virama } [$z] → $virama;
368$virama } ' ' → $virama ;
369$virama}$endThing→;
370ʔ→$dgs; # Glottal Stop
3710→$zero;
3721→$one;
3732→$two;
3743→$three;
3754→$four;
3765→$five;
3776→$six;
3787→$seven;
3798→$eight;
3809→$nine;
381''→;
382#:: NFC (NFD) ;
383
384