1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml 4# 5# File: ThaiLogical_Latin.txt 6# Generated from CLDR 7# 8 9# Thai-Latin 10# This set of rules follows ISO 11940 11# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf 12# except that that does not mention an implicit vowel, so we use o\u0323 13# 14# The transcription is fairly ugly, so we ought to also do the UNGEGN version 15# see: http://www.eki.ee/wgrs/rom1_th.pdf 16# and probably make that the main variant. 17# 18# Note: this is an internal file. The NFD/NFC is handled externally, in the index 19# The insertion of spaces between words, the reversal of the vowels 20# and the conversion of space to semicolon are done *outside* of these rules. 21# So as far as these rules are concerned, the vowels are in logical order! 22# insert implicit vowel (and remove it going the other way) 23# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically 24#$consonant = [ก-ฮ]; 25#$vowel = [ะ-\u0E3Aเ-ไ\u0E47]; 26#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ; 27#\uE000 → o\u0323 ; 28# ← o\u0323 ; 29$notAbove = [^\p{ccc=0}\p{ccc=above}] ; 30$notBelow = [^\p{ccc=0}\p{ccc=below}] ; 31# Consonants 32# Warning: the 'h's need to be handled carefully! 33# What we really want to say is the following, but we can't 34# $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ; 35# Since the only accents we care about that could cause problems are free-standing accents below, we use instead: 36$freeStandingBelow = [\u0325 ]; 37$hAccent = [ \u0304 \u0323]; 38$notHAccent0 = [^$freeStandingBelow$hAccent]; 39$notHAccent1 = $freeStandingBelow [^$hAccent]; 40ห → h\u0304 ; # THAI CHARACTER HO HIP 41ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering 42ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK 43ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI 44ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT 45ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON 46ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG 47ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI 48ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI 49ก ↔ k ; # THAI CHARACTER KO KAI 50ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO 51ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG 52พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN 53พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN 54ป ↔ p ; # THAI CHARACTER PO PLA 55ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING 56ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE 57ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG 58ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG 59จ ↔ c ; # THAI CHARACTER CHO CHAN 60ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN 61ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO 62ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO 63ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG 64ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG 65ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN 66ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN 67#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. 68ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK 69ต ↔ t ; # THAI CHARACTER TO TAO 70# since there is no singleton g (generated), don't worry about that. 71ง ↔ ng ; # THAI CHARACTER NGO NGU 72ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN 73น ↔ n ; # THAI CHARACTER NO NU 74ญ ↔ y\u0323 ; # THAI CHARACTER YO YING 75ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA 76ด ↔ d ; # THAI CHARACTER DO DEK 77บ ↔ b ; # THAI CHARACTER BO BAIMAI 78ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA 79ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering 80ม ↔ m ; # THAI CHARACTER MO MA 81ย ↔ y ; # THAI CHARACTER YO YAK 82ร ↔ r ; # THAI CHARACTER RO RUA 83ฤ ↔ v ; # THAI CHARACTER RU 84ฦ ↔ ł ; # THAI CHARACTER LU 85ว ↔ w ; # THAI CHARACTER WO WAEN 86ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA*** 87ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering 88ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI 89ส → s\u0304 ; # THAI CHARACTER SO SUA*** 90ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering 91ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA 92ล ↔ l ; # THAI CHARACTER LO LING 93ฟ ↔ f ; # THAI CHARACTER FO FAN 94อ ↔ x ; # THAI CHARACTER O ANG 95ซ ↔ s ; # THAI CHARACTER SO SO 96# vowels 97\u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT 98า → a\u0304 ; # THAI CHARACTER SARA AA 99า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering 100# We deviate from ISO for SARA AM for disambiguation 101ำ → a \u0309; # THAI CHARACTER SARA AM 102ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering 103ะ ↔ a ; # THAI CHARACTER SARA A 104\u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II 105\u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering 106\u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE 107\u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering 108\u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE 109\u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU 110\u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering 111\u0E38 ↔ u ; # THAI CHARACTER SARA U 112ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI 113# ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT 114เ ↔ e ; # THAI CHARACTER SARA E 115แ ↔ æ ; # THAI CHARACTER SARA AE 116โ ↔ o ; # THAI CHARACTER SARA O 117ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN 118ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI 119ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO 120\u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU 121\u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK 122\u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO 123\u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI 124\u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA 125\u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT 126\u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN 127# We deviate from ISO for disambiguation 128\u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT 129๏ ↔ '§' ; # THAI CHARACTER FONGMAN 130๐ ↔ 0 ; # THAI DIGIT ZERO 131๑ ↔ 1 ; # THAI DIGIT ONE 132๒ ↔ 2 ; # THAI DIGIT TWO 133๓ ↔ 3 ; # THAI DIGIT THREE 134๔ ↔ 4 ; # THAI DIGIT FOUR 135๕ ↔ 5 ; # THAI DIGIT FIVE 136๖ ↔ 6 ; # THAI DIGIT SIX 137๗ ↔ 7 ; # THAI DIGIT SEVEN 138๘ ↔ 8 ; # THAI DIGIT EIGHT 139๙ ↔ 9 ; # THAI DIGIT NINE 140๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU 141๛ ↔ » ; # THAI CHARACTER KHOMUT 142ๆ ↔ « ; # THAI CHARACTER MAIYAMOK 143# moved down to make shorter first 144#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. 145\u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU 146\u0E34 ↔ i ; # THAI CHARACTER SARA I 147# fallbacks 148| k ← g ; 149| k ← h ; 150| c ← j ; 151| k ← q ; 152| s ← z ; 153:: (lower); 154 155