• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<?xml version="1.0" encoding="UTF-8" ?>
2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
3<!--
4Copyright © 1991-2013 Unicode, Inc.
5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
6For terms of use, see http://www.unicode.org/copyright.html
7-->
8<supplementalData>
9	<version number="$Revision: 12139 $"/>
10	<transforms>
11		<transform source="ThaiLogical" target="Latin" direction="both" visibility="internal">
12			<tRule><![CDATA[
13# Thai-Latin
14# This set of rules follows ISO 11940
15#     see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
16# except that that does not mention an implicit vowel, so we use ọ
17#
18# The transcription is fairly ugly, so we ought to also do the UNGEGN version
19#     see: http://www.eki.ee/wgrs/rom1_th.pdf
20# and probably make that the main variant.
21#
22# Note: this is an internal file. The NFD/NFC is handled externally, in the index
23# The insertion of spaces between words, the reversal of the vowels
24# and the conversion of space to semicolon are done *outside* of these rules.
25# So as far as these rules are concerned, the vowels are in logical order!
26# insert implicit vowel (and remove it going the other way)
27# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
28#$consonant = [ก-ฮ];
29#$vowel = [ะ-ฺเ-ไ็];
30#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
31#\uE000 → ọ ;
32# ← ọ ;
33$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
34$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
35# Consonants
36# Warning: the 'h's need to be handled carefully!
37# What we really want to say is the following, but we can't
38# $notHAccent = !($notAbove*   ̄ | $notBelow*   ̣) ;
39# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
40$freeStandingBelow = [̥  ];
41$hAccent =  [   ̄     ̣];
42$notHAccent0 = [^$freeStandingBelow$hAccent];
43$notHAccent1 = $freeStandingBelow [^$hAccent];
44ห → h̄ ; # THAI CHARACTER HO HIP
45ห | $1 ← h ($notAbove*)    ̄; # backward case, account for reordering
46ฮ ↔ ḥ ; # THAI CHARACTER HO NOKHUK
47ข ↔ k̄h ; # THAI CHARACTER KHO KHAI
48ฃ ↔ ḳ̄h ; # THAI CHARACTER KHO KHUAT
49ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON
50ฆ ↔ ḳh ; # THAI CHARACTER KHO RAKHANG
51ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
52ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
53ก ↔ k ; # THAI CHARACTER KO KAI
54ภ ↔ p̣h ; # THAI CHARACTER PHO SAMPHAO
55ผ ↔ p̄h ; # THAI CHARACTER PHO PHUNG
56พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
57พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
58ป ↔ p ; # THAI CHARACTER PO PLA
59ฉ ↔ c̄h ; # THAI CHARACTER CHO CHING
60ฌ ↔ c̣h ; # THAI CHARACTER CHO CHOE
61ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
62ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
63จ ↔ c ; # THAI CHARACTER CHO CHAN
64ฐ ↔ ṭ̄h ; # THAI CHARACTER THO THAN
65ฑ ↔ ṯh ; # THAI CHARACTER THO NANGMONTHO
66ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO
67ถ ↔ t̄h ; # THAI CHARACTER THO THUNG
68ธ ↔ ṭh ; # THAI CHARACTER THO THONG
69ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
70ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
71#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
72ฏ ↔ t̩ ; # THAI CHARACTER TO PATAK
73ต ↔ t ; # THAI CHARACTER TO TAO
74# since there is no singleton g (generated), don't worry about that.
75ง ↔ ng ; # THAI CHARACTER NGO NGU
76ณ ↔ ṇ ; # THAI CHARACTER NO NEN
77น ↔ n ; # THAI CHARACTER NO NU
78ญ ↔ ỵ  ; # THAI CHARACTER YO YING
79ฎ ↔ ḍ ; # THAI CHARACTER DO CHADA
80ด ↔ d ; # THAI CHARACTER DO DEK
81บ ↔ b ; # THAI CHARACTER BO BAIMAI
82ฝ ↔ f̄ ; # THAI CHARACTER FO FA
83ฝ | $1 ← f ($notAbove*)    ̄; # backward case, account for reordering
84ม ↔ m ; # THAI CHARACTER MO MA
85ย ↔ y ; # THAI CHARACTER YO YAK
86ร ↔ r ; # THAI CHARACTER RO RUA
87ฤ ↔ v ; # THAI CHARACTER RU
88ฦ ↔ ł ; # THAI CHARACTER LU
89ว ↔ w ; # THAI CHARACTER WO WAEN
90ศ ↔ ṣ̄ ; # THAI CHARACTER SO SALA***
91ศ | $1 ← s    ̣ ($notAbove*)    ̄; # backward case, account for reordering
92ษ ↔ s̄ʹ ; # THAI CHARACTER SO RUSI
93ส → s̄ ; # THAI CHARACTER SO SUA***
94ส | $1 ← s ($notAbove*)    ̄; # backward case, account for reordering
95ฬ ↔ ḷ ; # THAI CHARACTER LO CHULA
96ล ↔ l ; # THAI CHARACTER LO LING
97ฟ ↔ f ; # THAI CHARACTER FO FAN
98อ ↔ x ; # THAI CHARACTER O ANG
99ซ ↔ s ; # THAI CHARACTER SO SO
100# vowels
101ั ↔ ạ ; # THAI CHARACTER MAI HAN-AKAT
102า → ā ; # THAI CHARACTER SARA AA
103า | $1 ← a ($notAbove*)    ̄; # backward case, account for reordering
104# We deviate from ISO for SARA AM for disambiguation
105ำ → a  ̉; # THAI CHARACTER SARA AM
106ำ | $1 ← a ($notAbove*)  ̉ ; # backward case, account for reordering
107ะ ↔ a ; # THAI CHARACTER SARA A
108ี ↔ ī ; # THAI CHARACTER SARA II
109ี | $1 ← i ($notAbove*)    ̄  ; # backward case, account for reordering
110ื ↔ ụ̄ ; # THAI CHARACTER SARA UEE
111ื | $1 ← u   ̣ ($notAbove*)    ̄  ; # backward case, account for reordering
112ึ ↔ ụ ; # THAI CHARACTER SARA UE
113ู ↔ ū ; # THAI CHARACTER SARA UU
114ู | $1 ← u  ($notAbove*)    ̄  ; # backward case, account for reordering
115ุ ↔ u ; # THAI CHARACTER SARA U
116ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI
117# ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT
118เ ↔ e ; # THAI CHARACTER SARA E
119แ ↔ æ ; # THAI CHARACTER SARA AE
120โ ↔ o ; # THAI CHARACTER SARA O
121ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN
122ไ ↔ ị ; # THAI CHARACTER SARA AI MAIMALAI
123ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO
124็ ↔ ̆ ; # THAI CHARACTER MAITAIKHU
125่ ↔ ̀ ; # THAI CHARACTER MAI EK
126้ ↔ ̂ ; # THAI CHARACTER MAI THO
127๊ ↔ ́ ; # THAI CHARACTER MAI TRI
128๋ ↔ ̌ ; # THAI CHARACTER MAI CHATTAWA
129์ ↔ ̒ ; # THAI CHARACTER THANTHAKHAT
130๎ ↔ '~' ; # THAI CHARACTER YAMAKKAN
131# We deviate from ISO for disambiguation
132ํ ↔  ̊ ; # THAI CHARACTER NIKHAHIT
133๏ ↔ '§' ; # THAI CHARACTER FONGMAN
134๐ ↔ 0 ; # THAI DIGIT ZERO
135๑ ↔ 1 ; # THAI DIGIT ONE
136๒ ↔ 2 ; # THAI DIGIT TWO
137๓ ↔ 3 ; # THAI DIGIT THREE
138๔ ↔ 4 ; # THAI DIGIT FOUR
139๕ ↔ 5 ; # THAI DIGIT FIVE
140๖ ↔ 6 ; # THAI DIGIT SIX
141๗ ↔ 7 ; # THAI DIGIT SEVEN
142๘ ↔ 8 ; # THAI DIGIT EIGHT
143๙ ↔ 9 ; # THAI DIGIT NINE
144๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU
145๛ ↔ » ; # THAI CHARACTER KHOMUT
146ๆ ↔ « ; # THAI CHARACTER MAIYAMOK
147# moved down to make shorter first
148#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
149ฺ ↔ ˌ ; # THAI CHARACTER PHINTHU
150ิ ↔ i ; # THAI CHARACTER SARA I
151# fallbacks
152| k ← g ;
153| k ← h ;
154| c ← j ;
155| k ← q ;
156| s ← z ;
157:: (lower);
158			]]></tRule>
159		</transform>
160	</transforms>
161</supplementalData>
162