• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
4#
5# File: ThaiLogical_Latin.txt
6# Generated from CLDR
7#
8
9# Thai-Latin
10# This set of rules follows ISO 11940
11#     see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
12# except that that does not mention an implicit vowel, so we use o\u0323
13#
14# The transcription is fairly ugly, so we ought to also do the UNGEGN version
15#     see: http://www.eki.ee/wgrs/rom1_th.pdf
16# and probably make that the main variant.
17#
18# Note: this is an internal file. The NFD/NFC is handled externally, in the index
19# The insertion of spaces between words, the reversal of the vowels
20# and the conversion of space to semicolon are done *outside* of these rules.
21# So as far as these rules are concerned, the vowels are in logical order!
22# insert implicit vowel (and remove it going the other way)
23# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
24#$consonant = [ก-ฮ];
25#$vowel = [ะ-\u0E3Aเ-ไ\u0E47];
26#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
27#\uE000 → o\u0323 ;
28# ← o\u0323 ;
29$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
30$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
31# Consonants
32# Warning: the 'h's need to be handled carefully!
33# What we really want to say is the following, but we can't
34# $notHAccent = !($notAbove*   \u0304 | $notBelow*   \u0323) ;
35# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
36$freeStandingBelow = [\u0325  ];
37$hAccent =  [   \u0304     \u0323];
38$notHAccent0 = [^$freeStandingBelow$hAccent];
39$notHAccent1 = $freeStandingBelow [^$hAccent];
40ห → h\u0304 ; # THAI CHARACTER HO HIP
41ห | $1 ← h ($notAbove*)    \u0304; # backward case, account for reordering
42ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK
43ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI
44ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT
45ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON
46ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG
47ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
48ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
49ก ↔ k ; # THAI CHARACTER KO KAI
50ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO
51ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG
52พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
53พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
54ป ↔ p ; # THAI CHARACTER PO PLA
55ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING
56ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE
57ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
58ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
59จ ↔ c ; # THAI CHARACTER CHO CHAN
60ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN
61ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO
62ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO
63ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG
64ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG
65ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
66ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
67#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
68ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK
69ต ↔ t ; # THAI CHARACTER TO TAO
70# since there is no singleton g (generated), don't worry about that.
71ง ↔ ng ; # THAI CHARACTER NGO NGU
72ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN
73น ↔ n ; # THAI CHARACTER NO NU
74ญ ↔ y\u0323  ; # THAI CHARACTER YO YING
75ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA
76ด ↔ d ; # THAI CHARACTER DO DEK
77บ ↔ b ; # THAI CHARACTER BO BAIMAI
78ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA
79ฝ | $1 ← f ($notAbove*)    \u0304; # backward case, account for reordering
80ม ↔ m ; # THAI CHARACTER MO MA
81ย ↔ y ; # THAI CHARACTER YO YAK
82ร ↔ r ; # THAI CHARACTER RO RUA
83ฤ ↔ v ; # THAI CHARACTER RU
84ฦ ↔ ł ; # THAI CHARACTER LU
85ว ↔ w ; # THAI CHARACTER WO WAEN
86ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA***
87ศ | $1 ← s    \u0323 ($notAbove*)    \u0304; # backward case, account for reordering
88ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI
89ส → s\u0304 ; # THAI CHARACTER SO SUA***
90ส | $1 ← s ($notAbove*)    \u0304; # backward case, account for reordering
91ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA
92ล ↔ l ; # THAI CHARACTER LO LING
93ฟ ↔ f ; # THAI CHARACTER FO FAN
94อ ↔ x ; # THAI CHARACTER O ANG
95ซ ↔ s ; # THAI CHARACTER SO SO
96# vowels
97\u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT
98า → a\u0304 ; # THAI CHARACTER SARA AA
99า | $1 ← a ($notAbove*)    \u0304; # backward case, account for reordering
100# We deviate from ISO for SARA AM for disambiguation
101ำ → a  \u0309; # THAI CHARACTER SARA AM
102ำ | $1 ← a ($notAbove*)  \u0309 ; # backward case, account for reordering
103ะ ↔ a ; # THAI CHARACTER SARA A
104\u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II
105\u0E35 | $1 ← i ($notAbove*)    \u0304  ; # backward case, account for reordering
106\u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE
107\u0E37 | $1 ← u   \u0323 ($notAbove*)    \u0304  ; # backward case, account for reordering
108\u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE
109\u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU
110\u0E39 | $1 ← u  ($notAbove*)    \u0304  ; # backward case, account for reordering
111\u0E38 ↔ u ; # THAI CHARACTER SARA U
112ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI
113# ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT
114เ ↔ e ; # THAI CHARACTER SARA E
115แ ↔ æ ; # THAI CHARACTER SARA AE
116โ ↔ o ; # THAI CHARACTER SARA O
117ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN
118ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI
119ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO
120\u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU
121\u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK
122\u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO
123\u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI
124\u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA
125\u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT
126\u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN
127# We deviate from ISO for disambiguation
128\u0E4D ↔  \u030A ; # THAI CHARACTER NIKHAHIT
129๏ ↔ '§' ; # THAI CHARACTER FONGMAN
130๐ ↔ 0 ; # THAI DIGIT ZERO
131๑ ↔ 1 ; # THAI DIGIT ONE
132๒ ↔ 2 ; # THAI DIGIT TWO
133๓ ↔ 3 ; # THAI DIGIT THREE
134๔ ↔ 4 ; # THAI DIGIT FOUR
135๕ ↔ 5 ; # THAI DIGIT FIVE
136๖ ↔ 6 ; # THAI DIGIT SIX
137๗ ↔ 7 ; # THAI DIGIT SEVEN
138๘ ↔ 8 ; # THAI DIGIT EIGHT
139๙ ↔ 9 ; # THAI DIGIT NINE
140๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU
141๛ ↔ » ; # THAI CHARACTER KHOMUT
142ๆ ↔ « ; # THAI CHARACTER MAIYAMOK
143# moved down to make shorter first
144#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
145\u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU
146\u0E34 ↔ i ; # THAI CHARACTER SARA I
147# fallbacks
148| k ← g ;
149| k ← h ;
150| c ← j ;
151| k ← q ;
152| s ← z ;
153:: (lower);
154
155