1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml 4# 5# File: my_my_FONIPA.txt 6# Generated from CLDR 7# 8 9# Pronunciation rules for Burmese. 10# 11# The following rules are lexical and heuristic: lexical in the sense 12# that they generate phoneme strings which may further undergo 13# post-lexical phonological processes, in particular voicing, to 14# result in actual surface forms; heuristic in the sense that they try 15# to resolve ambiguities, especially around reduced vowels, in a 16# systematic way that may be incorrect in many situations. Vowel 17# reduction depends on many factors, such as morphemic structure, 18# which are not available here. 19# 20# Definitions 21# 22# Dependent vowel signs 23$vs_AA = \u102B; 24$vs_aa = \u102C; 25$vs_i = \u102D; 26$vs_ii = \u102E; 27$vs_u = \u102F; 28$vs_uu = \u1030; 29$vs_e = \u1031; 30$vs_ai = \u1032; 31# Various signs 32$anusvara = \u1036; 33$visarga = \u1038; 34$virama = \u1039; 35$asat = \u103A; 36# Dependent (medial) consonant signs 37$med_y = \u103B; 38$med_r = \u103C; 39$med_w = \u103D; 40$med_h = \u103E; 41# Independent letters and letter-like punctuation symbols 42$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055]; 43$creaky = \u0330; 44$high = \u0301; 45$low = \u0300; 46$coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused 47# 48# Preprocessing 49# 50::NFC; 51# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical. 52$vs_AA → $vs_aa; 53# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A. 54# Hmm, what would happen if the syllable ending in kinzi had non-low tone? 55င\u103A $virama → င\u103A; 56# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT. 57$virama → $asat; 58# Unstack U+103F GREAT SA. 59ဿ → သ\u103Aသ; 60# Insert a syllable boundary marker /./ before every independent letter. 61::Null; 62[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.; 63# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else. 64::Null; 65([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky; 66([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə; 67# Allow for additional coda consonants. 68# 69# This only covers a few of the cases in which full coda consonants 70# can appear in loanwords. The general situation is somewhat rare and 71# is more easily dealt with in a formalism that can impose structural 72# constraints on syllables more easily. 73::Null; 74$asat ($visarga)? [\u1000-\u102A] { $asat → ; 75# Deal with ၎င\u103Aး early. 76၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ; 77# 78# Rhymes 79# 80::Null; 81က\u103A → ɛʔ; 82ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/ 83င\u1037\u103A → ɪ $creaky ɴ; 84င\u103Aး → ɪ $high ɴ; 85င\u103A → ɪ $low ɴ; 86စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/ 87ဉ\u1037\u103A → ɪ $creaky ɴ; 88ဉ\u103Aး → ɪ $high ɴ; 89ဉ\u103A → ɪ $low ɴ; 90ည\u1037\u103A → ɛ $creaky; 91ည\u103Aး → ɛ $high; 92ည\u103A → ɛ $low; 93ဏ\u1037\u103A → a $creaky ɴ; 94ဏ\u103Aး → a $high ɴ; 95ဏ\u103A → a $low ɴ; 96တ\u103A → aʔ; 97န\u1037\u103A → a $creaky ɴ; 98န\u103Aး → a $high ɴ; 99န\u103A → a $low ɴ; 100ပ\u103A → aʔ; 101မ\u1037\u103A → a $creaky ɴ; 102မ\u103Aး → a $high ɴ; 103မ\u103A → a $low ɴ; 104ယ\u1037\u103A → ɛ $creaky; 105ယ\u103Aး → ɛ $high; 106ယ\u103A → ɛ $low; 107သ\u103A → aʔ; 108$vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ; 109$vs_aa ဉ\u103Aး → ɪ $high ɴ; 110$vs_aa ဉ\u103A → ɪ $low ɴ; 111$vs_aa တ\u103A → aʔ; 112$vs_aa ဏ\u1037\u103A → a $creaky ɴ; 113$vs_aa ဏ\u103Aး → a $high ɴ; 114$vs_aa ဏ\u103A → a $low ɴ; 115$vs_aa န\u1037\u103A → a $creaky ɴ; 116$vs_aa န\u103Aး → a $high ɴ; 117$vs_aa န\u103A → a $low ɴ; 118$vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell) 119$vs_aa ယ\u1037\u103A → ɛ $creaky; 120$vs_aa ယ\u103Aး → ɛ $high; 121$vs_aa ယ\u103A → ɛ $low; 122$vs_aa \u1037 → a $creaky; # redundant creaky tone 123$vs_aa း → a $high; 124$vs_aa → a $low; 125$vs_i က\u103A → eɪ\u032Fʔ; 126$vs_i စ\u103A → eɪ\u032Fʔ; 127$vs_i တ\u103A → eɪ\u032Fʔ; 128$vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ; 129$vs_i န\u103Aး → e $high ɪ\u032Fɴ; 130$vs_i န\u103A → e $low ɪ\u032Fɴ; 131$vs_i ပ\u103A → eɪ\u032Fʔ; 132$vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ; 133$vs_i မ\u103Aး → e $high ɪ\u032Fɴ; 134$vs_i မ\u103A → e $low ɪ\u032Fɴ; 135$vs_i $vs_u က\u103A → aɪ\u032Fʔ; 136$vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ; 137$vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ; 138$vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ; 139$vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ; 140$vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ; 141$vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ; 142$vs_i $vs_u ယ\u1037\u103A → o $creaky; 143$vs_i $vs_u ယ\u103Aး → o $high; 144$vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/ 145$vs_i $vs_u \u1037 → o $creaky; 146$vs_i $vs_u း → o $high; 147$vs_i $vs_u → o $low; 148$vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ; 149$vs_i $anusvara း → e $high ɪ\u032Fɴ; 150$vs_i $anusvara → e $low ɪ\u032Fɴ; 151$vs_i → i $creaky; 152$vs_ii \u1037 → i $creaky; # this does not usually occur 153$vs_ii း → i $high; 154$vs_ii → i $low; 155$vs_u က\u103A → oʊ\u032Fʔ; 156$vs_u ဂ\u103A → oʊ\u032Fʔ; 157$vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ; 158$vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ; 159$vs_u ဏ\u103A → o $low ʊ\u032Fɴ; 160$vs_u တ\u103A → oʊ\u032Fʔ; 161$vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ; 162$vs_u န\u103Aး → o $high ʊ\u032Fɴ; 163$vs_u န\u103A → o $low ʊ\u032Fɴ; 164$vs_u ပ\u103A → oʊ\u032Fʔ; 165$vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ; 166$vs_u မ\u103Aး → o $high ʊ\u032Fɴ; 167$vs_u မ\u103A → o $low ʊ\u032Fɴ; 168$vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ; 169$vs_u $anusvara း → o $high ʊ\u032Fɴ; 170$vs_u $anusvara → o $low ʊ\u032Fɴ; 171$vs_u → u $creaky; 172$vs_uu \u1037 → u $creaky; # this does not usually occur 173$vs_uu း → u $high; 174$vs_uu → u $low; 175$vs_e တ\u103A → ɪʔ; 176$vs_e $vs_aa က\u103A → aʊ\u032Fʔ; 177$vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ; 178$vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ; 179$vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ; 180$vs_e $vs_aa \u1037 → ɔ $creaky; 181$vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur 182$vs_e $vs_aa \u103A → ɔ $low; 183$vs_e $vs_aa → ɔ $high; 184$vs_e \u1037 → e $creaky; 185$vs_e း → e $high; 186$vs_e → e $low; 187$vs_ai \u1037 → ɛ $creaky; 188$vs_ai း → ɛ $high; # redundant high tone; this does not usually occur 189$vs_ai → ɛ $high; 190$anusvara \u1037 → a $creaky ɴ; 191$anusvara း → a $high ɴ; 192$anusvara → a $low ɴ; 193$med_w တ\u103A → ʊʔ; 194$med_w န\u1037\u103A → ʊ $creaky ɴ; 195$med_w န\u103Aး → ʊ $high ɴ; 196$med_w န\u103A → ʊ $low ɴ; 197$med_w ပ\u103A → ʊʔ; 198$med_w မ\u1037\u103A → ʊ $creaky ɴ; 199$med_w မ\u103Aး → ʊ $high ɴ; 200$med_w မ\u103A → ʊ $low ɴ; 201# 202# Medials 203# 204::Null; 205# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA: 206# velar + /j/ ==> modern palatals. 207ကျ → t\u0361ɕ; 208ချ → t\u0361ɕʰ; 209ဂျ → d\u0361ʑ; 210ဃျ → d\u0361ʑ; 211ကြ → t\u0361ɕ; 212ခြ → t\u0361ɕʰ; 213ဂြ → d\u0361ʑ; 214ဃြ → d\u0361ʑ; 215# Remove redundant MEDIAL YA and MEDIAL RA after initial YA. 216ယ { [$med_y $med_r] → ; 217# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any 218# other medials. 219# First, push U+103E MEDIAL HA before U+103D MEDIAL WA. 220\u103D \u103E → \u103E \u103D; 221::Null; 222# Now MEDIAL WA comes last. 223# Produce the palatal ʃ from (SA|LA)+YA+HA. 224သျ\u103E → ʃ; 225လျ\u103E → ʃ; 226# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA. 227\u103C \u103E → \u103E \u103C; 228::Null; 229# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA. 230\u103B \u103E → \u103E \u103B; 231::Null; 232# Consume MEDIAL HA and apply devoicing. 233င\u103E → ŋ\u030A; 234ဉ\u103E → ɲ\u0325; 235ည\u103E → ɲ\u0325; 236ဏ\u103E → n\u0325; 237န\u103E → n\u0325; 238မ\u103E → m\u0325; 239ယ\u103E → ʃ; 240ရ\u103E → ʃ; 241လ\u103E → l\u0325; 242ဝ\u103E → w\u0325; 243ဠ\u103E → l\u0325; 244# Drop any remaining U+103E MEDIAL HA. 245\u103E → ; 246# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and 247# U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this 248\u103B } \u103D → ; 249\u103C } \u103D → ; 250\u103B → j; 251\u103C → j; 252\u103D → w; 253# 254# Initials 255# 256# Velars 257က → k; 258ခ → kʰ; 259ဂ → ɡ; 260ဃ → ɡ; 261င → ŋ; 262# Historic palatals 263စ → s; 264ဆ → sʰ; 265ဇ → z; 266ဈ → z; 267ဉ → ɲ; 268ည → ɲ; 269# Alveolars 270ဋ → t; 271ဌ → tʰ; 272ဍ → d; 273ဎ → d; 274ဏ → n; 275# Historic dentals ==> alveolars 276တ → t; 277ထ → tʰ; 278ဒ → d; 279ဓ → d; 280န → n; 281# Labials 282ပ → p; 283ဖ → pʰ; 284ဗ → b; 285ဘ → b; 286မ → m; 287# Other letters 288ယ → j; 289ရ → j; # historic /r/ 290လ\u103A → ; # final, typically not pronounced in native words 291လ → l; 292ဝ → w; 293သ → θ; # historic /s/ ==> modern dental 294ဟ → h; 295ဠ → l; 296အ → ʔ; 297# Independent vowels 298ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur 299ဣး → ʔí; # this does not usually occur 300ဣ → ʔḭ; 301ဤ\u1037 → ʔḭ; # this does not usually occur 302ဤး → ʔí; # this does not usually occur 303ဤ → ʔì; 304ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur 305ဥး → ʔú; # this does not usually occur 306ဥ → ʔṵ; 307ဦ\u1037 → ʔṵ; # this does not usually occur 308ဦး → ʔú; 309ဦ → ʔù; 310ဧ\u1037 → ʔḛ; # this does not usually occur 311ဧး → ʔé; 312ဧ → ʔè; 313ဩ\u1037 → ʔɔ\u0330; # this does not usually occur 314ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur 315ဩ → ʔɔ\u0301; 316ဪ\u1037 → ʔɔ\u0330; # this does not usually occur 317ဪး → ʔɔ\u0301; # this does not usually occur 318ဪ → ʔɔ\u0300; 319# Various signs 320၌ → n\u0325aɪ\u032Fʔ; 321၍ → jwḛ; 322# ၎င\u103Aး was handled earlier. 323၏ → ʔḭ; 324# 325# Postprocessing 326# 327# Delete any remaining U+103A ASAT. 328$asat → ; 329# Delete zero-width space, non-joiner, joiner. 330[\u200B-\u200D] → ; 331::NFC; 332 333