1<?xml version="1.0" encoding="UTF-8" ?> 2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3<!-- 4Copyright © 1991-2013 Unicode, Inc. 5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6For terms of use, see http://www.unicode.org/copyright.html 7--> 8<supplementalData> 9 <version number="$Revision: 12139 $"/> 10 <transforms> 11 <transform source="ThaiLogical" target="Latin" direction="both" visibility="internal"> 12 <tRule><![CDATA[ 13# Thai-Latin 14# This set of rules follows ISO 11940 15# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf 16# except that that does not mention an implicit vowel, so we use ọ 17# 18# The transcription is fairly ugly, so we ought to also do the UNGEGN version 19# see: http://www.eki.ee/wgrs/rom1_th.pdf 20# and probably make that the main variant. 21# 22# Note: this is an internal file. The NFD/NFC is handled externally, in the index 23# The insertion of spaces between words, the reversal of the vowels 24# and the conversion of space to semicolon are done *outside* of these rules. 25# So as far as these rules are concerned, the vowels are in logical order! 26# insert implicit vowel (and remove it going the other way) 27# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically 28#$consonant = [ก-ฮ]; 29#$vowel = [ะ-ฺเ-ไ็]; 30#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ; 31#\uE000 → ọ ; 32# ← ọ ; 33$notAbove = [^\p{ccc=0}\p{ccc=above}] ; 34$notBelow = [^\p{ccc=0}\p{ccc=below}] ; 35# Consonants 36# Warning: the 'h's need to be handled carefully! 37# What we really want to say is the following, but we can't 38# $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ; 39# Since the only accents we care about that could cause problems are free-standing accents below, we use instead: 40$freeStandingBelow = [̥ ]; 41$hAccent = [ ̄ ̣]; 42$notHAccent0 = [^$freeStandingBelow$hAccent]; 43$notHAccent1 = $freeStandingBelow [^$hAccent]; 44ห → h̄ ; # THAI CHARACTER HO HIP 45ห | $1 ← h ($notAbove*) ̄; # backward case, account for reordering 46ฮ ↔ ḥ ; # THAI CHARACTER HO NOKHUK 47ข ↔ k̄h ; # THAI CHARACTER KHO KHAI 48ฃ ↔ ḳ̄h ; # THAI CHARACTER KHO KHUAT 49ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON 50ฆ ↔ ḳh ; # THAI CHARACTER KHO RAKHANG 51ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI 52ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI 53ก ↔ k ; # THAI CHARACTER KO KAI 54ภ ↔ p̣h ; # THAI CHARACTER PHO SAMPHAO 55ผ ↔ p̄h ; # THAI CHARACTER PHO PHUNG 56พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN 57พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN 58ป ↔ p ; # THAI CHARACTER PO PLA 59ฉ ↔ c̄h ; # THAI CHARACTER CHO CHING 60ฌ ↔ c̣h ; # THAI CHARACTER CHO CHOE 61ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG 62ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG 63จ ↔ c ; # THAI CHARACTER CHO CHAN 64ฐ ↔ ṭ̄h ; # THAI CHARACTER THO THAN 65ฑ ↔ ṯh ; # THAI CHARACTER THO NANGMONTHO 66ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO 67ถ ↔ t̄h ; # THAI CHARACTER THO THUNG 68ธ ↔ ṭh ; # THAI CHARACTER THO THONG 69ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN 70ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN 71#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. 72ฏ ↔ t̩ ; # THAI CHARACTER TO PATAK 73ต ↔ t ; # THAI CHARACTER TO TAO 74# since there is no singleton g (generated), don't worry about that. 75ง ↔ ng ; # THAI CHARACTER NGO NGU 76ณ ↔ ṇ ; # THAI CHARACTER NO NEN 77น ↔ n ; # THAI CHARACTER NO NU 78ญ ↔ ỵ ; # THAI CHARACTER YO YING 79ฎ ↔ ḍ ; # THAI CHARACTER DO CHADA 80ด ↔ d ; # THAI CHARACTER DO DEK 81บ ↔ b ; # THAI CHARACTER BO BAIMAI 82ฝ ↔ f̄ ; # THAI CHARACTER FO FA 83ฝ | $1 ← f ($notAbove*) ̄; # backward case, account for reordering 84ม ↔ m ; # THAI CHARACTER MO MA 85ย ↔ y ; # THAI CHARACTER YO YAK 86ร ↔ r ; # THAI CHARACTER RO RUA 87ฤ ↔ v ; # THAI CHARACTER RU 88ฦ ↔ ł ; # THAI CHARACTER LU 89ว ↔ w ; # THAI CHARACTER WO WAEN 90ศ ↔ ṣ̄ ; # THAI CHARACTER SO SALA*** 91ศ | $1 ← s ̣ ($notAbove*) ̄; # backward case, account for reordering 92ษ ↔ s̄ʹ ; # THAI CHARACTER SO RUSI 93ส → s̄ ; # THAI CHARACTER SO SUA*** 94ส | $1 ← s ($notAbove*) ̄; # backward case, account for reordering 95ฬ ↔ ḷ ; # THAI CHARACTER LO CHULA 96ล ↔ l ; # THAI CHARACTER LO LING 97ฟ ↔ f ; # THAI CHARACTER FO FAN 98อ ↔ x ; # THAI CHARACTER O ANG 99ซ ↔ s ; # THAI CHARACTER SO SO 100# vowels 101ั ↔ ạ ; # THAI CHARACTER MAI HAN-AKAT 102า → ā ; # THAI CHARACTER SARA AA 103า | $1 ← a ($notAbove*) ̄; # backward case, account for reordering 104# We deviate from ISO for SARA AM for disambiguation 105ำ → a ̉; # THAI CHARACTER SARA AM 106ำ | $1 ← a ($notAbove*) ̉ ; # backward case, account for reordering 107ะ ↔ a ; # THAI CHARACTER SARA A 108ี ↔ ī ; # THAI CHARACTER SARA II 109ี | $1 ← i ($notAbove*) ̄ ; # backward case, account for reordering 110ื ↔ ụ̄ ; # THAI CHARACTER SARA UEE 111ื | $1 ← u ̣ ($notAbove*) ̄ ; # backward case, account for reordering 112ึ ↔ ụ ; # THAI CHARACTER SARA UE 113ู ↔ ū ; # THAI CHARACTER SARA UU 114ู | $1 ← u ($notAbove*) ̄ ; # backward case, account for reordering 115ุ ↔ u ; # THAI CHARACTER SARA U 116ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI 117# ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT 118เ ↔ e ; # THAI CHARACTER SARA E 119แ ↔ æ ; # THAI CHARACTER SARA AE 120โ ↔ o ; # THAI CHARACTER SARA O 121ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN 122ไ ↔ ị ; # THAI CHARACTER SARA AI MAIMALAI 123ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO 124็ ↔ ̆ ; # THAI CHARACTER MAITAIKHU 125่ ↔ ̀ ; # THAI CHARACTER MAI EK 126้ ↔ ̂ ; # THAI CHARACTER MAI THO 127๊ ↔ ́ ; # THAI CHARACTER MAI TRI 128๋ ↔ ̌ ; # THAI CHARACTER MAI CHATTAWA 129์ ↔ ̒ ; # THAI CHARACTER THANTHAKHAT 130๎ ↔ '~' ; # THAI CHARACTER YAMAKKAN 131# We deviate from ISO for disambiguation 132ํ ↔ ̊ ; # THAI CHARACTER NIKHAHIT 133๏ ↔ '§' ; # THAI CHARACTER FONGMAN 134๐ ↔ 0 ; # THAI DIGIT ZERO 135๑ ↔ 1 ; # THAI DIGIT ONE 136๒ ↔ 2 ; # THAI DIGIT TWO 137๓ ↔ 3 ; # THAI DIGIT THREE 138๔ ↔ 4 ; # THAI DIGIT FOUR 139๕ ↔ 5 ; # THAI DIGIT FIVE 140๖ ↔ 6 ; # THAI DIGIT SIX 141๗ ↔ 7 ; # THAI DIGIT SEVEN 142๘ ↔ 8 ; # THAI DIGIT EIGHT 143๙ ↔ 9 ; # THAI DIGIT NINE 144๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU 145๛ ↔ » ; # THAI CHARACTER KHOMUT 146ๆ ↔ « ; # THAI CHARACTER MAIYAMOK 147# moved down to make shorter first 148#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. 149ฺ ↔ ˌ ; # THAI CHARACTER PHINTHU 150ิ ↔ i ; # THAI CHARACTER SARA I 151# fallbacks 152| k ← g ; 153| k ← h ; 154| c ← j ; 155| k ← q ; 156| s ← z ; 157:: (lower); 158 ]]></tRule> 159 </transform> 160 </transforms> 161</supplementalData> 162