1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html#License 3# 4# File: my_my_FONIPA.txt 5# Generated from CLDR 6# 7 8# Pronunciation rules for Burmese. 9# 10# The following rules are lexical and heuristic: lexical in the sense 11# that they generate phoneme strings which may further undergo 12# post-lexical phonological processes, in particular voicing, to 13# result in actual surface forms; heuristic in the sense that they try 14# to resolve ambiguities, especially around reduced vowels, in a 15# systematic way that may be incorrect in many situations. Vowel 16# reduction depends on many factors, such as morphemic structure, 17# which are not available here. 18# 19# Definitions 20# 21# Dependent vowel signs 22$vs_AA = \u102B; 23$vs_aa = \u102C; 24$vs_i = \u102D; 25$vs_ii = \u102E; 26$vs_u = \u102F; 27$vs_uu = \u1030; 28$vs_e = \u1031; 29$vs_ai = \u1032; 30# Various signs 31$anusvara = \u1036; 32$visarga = \u1038; 33$virama = \u1039; 34$asat = \u103A; 35# Dependent (medial) consonant signs 36$med_y = \u103B; 37$med_r = \u103C; 38$med_w = \u103D; 39$med_h = \u103E; 40# Independent letters and letter-like punctuation symbols 41$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055]; 42$creaky = \u0330; 43$high = \u0301; 44$low = \u0300; 45$coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused 46# 47# Preprocessing 48# 49::NFC; 50# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical. 51$vs_AA → $vs_aa; 52# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A. 53# Hmm, what would happen if the syllable ending in kinzi had non-low tone? 54င\u103A $virama → င\u103A; 55# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT. 56$virama → $asat; 57# Unstack U+103F GREAT SA. 58ဿ → သ\u103Aသ; 59# Insert a syllable boundary marker /./ before every independent letter. 60::Null; 61[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.; 62# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else. 63::Null; 64([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky; 65([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə; 66# Allow for additional coda consonants. 67# 68# This only covers a few of the cases in which full coda consonants 69# can appear in loanwords. The general situation is somewhat rare and 70# is more easily dealt with in a formalism that can impose structural 71# constraints on syllables more easily. 72::Null; 73$asat ($visarga)? [\u1000-\u102A] { $asat → ; 74# Deal with ၎င\u103Aး early. 75၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ; 76# 77# Rhymes 78# 79::Null; 80က\u103A → ɛʔ; 81ဂ\u103A → ɛʔ; # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/ 82င\u1037\u103A → ɪ $creaky ɴ; 83င\u103Aး → ɪ $high ɴ; 84င\u103A → ɪ $low ɴ; 85စ\u103A → ɪʔ; # maybe sometimes /eɪ\u032Fʔ/ 86ဉ\u1037\u103A → ɪ $creaky ɴ; 87ဉ\u103Aး → ɪ $high ɴ; 88ဉ\u103A → ɪ $low ɴ; 89ည\u1037\u103A → ɛ $creaky; 90ည\u103Aး → ɛ $high; 91ည\u103A → ɛ $low; 92ဏ\u1037\u103A → a $creaky ɴ; 93ဏ\u103Aး → a $high ɴ; 94ဏ\u103A → a $low ɴ; 95တ\u103A → aʔ; 96န\u1037\u103A → a $creaky ɴ; 97န\u103Aး → a $high ɴ; 98န\u103A → a $low ɴ; 99ပ\u103A → aʔ; 100မ\u1037\u103A → a $creaky ɴ; 101မ\u103Aး → a $high ɴ; 102မ\u103A → a $low ɴ; 103ယ\u1037\u103A → ɛ $creaky; 104ယ\u103Aး → ɛ $high; 105ယ\u103A → ɛ $low; 106သ\u103A → aʔ; 107$vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ; 108$vs_aa ဉ\u103Aး → ɪ $high ɴ; 109$vs_aa ဉ\u103A → ɪ $low ɴ; 110$vs_aa တ\u103A → aʔ; 111$vs_aa ဏ\u1037\u103A → a $creaky ɴ; 112$vs_aa ဏ\u103Aး → a $high ɴ; 113$vs_aa ဏ\u103A → a $low ɴ; 114$vs_aa န\u1037\u103A → a $creaky ɴ; 115$vs_aa န\u103Aး → a $high ɴ; 116$vs_aa န\u103A → a $low ɴ; 117$vs_aa ပ\u103A → aʔ; # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell) 118$vs_aa ယ\u1037\u103A → ɛ $creaky; 119$vs_aa ယ\u103Aး → ɛ $high; 120$vs_aa ယ\u103A → ɛ $low; 121$vs_aa \u1037 → a $creaky; # redundant creaky tone 122$vs_aa း → a $high; 123$vs_aa → a $low; 124$vs_i က\u103A → eɪ\u032Fʔ; 125$vs_i စ\u103A → eɪ\u032Fʔ; 126$vs_i တ\u103A → eɪ\u032Fʔ; 127$vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ; 128$vs_i န\u103Aး → e $high ɪ\u032Fɴ; 129$vs_i န\u103A → e $low ɪ\u032Fɴ; 130$vs_i ပ\u103A → eɪ\u032Fʔ; 131$vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ; 132$vs_i မ\u103Aး → e $high ɪ\u032Fɴ; 133$vs_i မ\u103A → e $low ɪ\u032Fɴ; 134$vs_i $vs_u က\u103A → aɪ\u032Fʔ; 135$vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ; 136$vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ; 137$vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ; 138$vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ; 139$vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ; 140$vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ; 141$vs_i $vs_u ယ\u1037\u103A → o $creaky; 142$vs_i $vs_u ယ\u103Aး → o $high; 143$vs_i $vs_u ယ\u103A → o $low; # in က\u102D\u102Fယ\u103A /kò/ 144$vs_i $vs_u \u1037 → o $creaky; 145$vs_i $vs_u း → o $high; 146$vs_i $vs_u → o $low; 147$vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ; 148$vs_i $anusvara း → e $high ɪ\u032Fɴ; 149$vs_i $anusvara → e $low ɪ\u032Fɴ; 150$vs_i → i $creaky; 151$vs_ii \u1037 → i $creaky; # this does not usually occur 152$vs_ii း → i $high; 153$vs_ii → i $low; 154$vs_u က\u103A → oʊ\u032Fʔ; 155$vs_u ဂ\u103A → oʊ\u032Fʔ; 156$vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ; 157$vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ; 158$vs_u ဏ\u103A → o $low ʊ\u032Fɴ; 159$vs_u တ\u103A → oʊ\u032Fʔ; 160$vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ; 161$vs_u န\u103Aး → o $high ʊ\u032Fɴ; 162$vs_u န\u103A → o $low ʊ\u032Fɴ; 163$vs_u ပ\u103A → oʊ\u032Fʔ; 164$vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ; 165$vs_u မ\u103Aး → o $high ʊ\u032Fɴ; 166$vs_u မ\u103A → o $low ʊ\u032Fɴ; 167$vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ; 168$vs_u $anusvara း → o $high ʊ\u032Fɴ; 169$vs_u $anusvara → o $low ʊ\u032Fɴ; 170$vs_u → u $creaky; 171$vs_uu \u1037 → u $creaky; # this does not usually occur 172$vs_uu း → u $high; 173$vs_uu → u $low; 174$vs_e တ\u103A → ɪʔ; 175$vs_e $vs_aa က\u103A → aʊ\u032Fʔ; 176$vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ; 177$vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ; 178$vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ; 179$vs_e $vs_aa \u1037 → ɔ $creaky; 180$vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur 181$vs_e $vs_aa \u103A → ɔ $low; 182$vs_e $vs_aa → ɔ $high; 183$vs_e \u1037 → e $creaky; 184$vs_e း → e $high; 185$vs_e → e $low; 186$vs_ai \u1037 → ɛ $creaky; 187$vs_ai း → ɛ $high; # redundant high tone; this does not usually occur 188$vs_ai → ɛ $high; 189$anusvara \u1037 → a $creaky ɴ; 190$anusvara း → a $high ɴ; 191$anusvara → a $low ɴ; 192$med_w တ\u103A → ʊʔ; 193$med_w န\u1037\u103A → ʊ $creaky ɴ; 194$med_w န\u103Aး → ʊ $high ɴ; 195$med_w န\u103A → ʊ $low ɴ; 196$med_w ပ\u103A → ʊʔ; 197$med_w မ\u1037\u103A → ʊ $creaky ɴ; 198$med_w မ\u103Aး → ʊ $high ɴ; 199$med_w မ\u103A → ʊ $low ɴ; 200# 201# Medials 202# 203::Null; 204# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA: 205# velar + /j/ ==> modern palatals. 206ကျ → t\u0361ɕ; 207ချ → t\u0361ɕʰ; 208ဂျ → d\u0361ʑ; 209ဃျ → d\u0361ʑ; 210ကြ → t\u0361ɕ; 211ခြ → t\u0361ɕʰ; 212ဂြ → d\u0361ʑ; 213ဃြ → d\u0361ʑ; 214# Remove redundant MEDIAL YA and MEDIAL RA after initial YA. 215ယ { [$med_y $med_r] → ; 216# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any 217# other medials. 218# First, push U+103E MEDIAL HA before U+103D MEDIAL WA. 219\u103D \u103E → \u103E \u103D; 220::Null; 221# Now MEDIAL WA comes last. 222# Produce the palatal ʃ from (SA|LA)+YA+HA. 223သျ\u103E → ʃ; 224လျ\u103E → ʃ; 225# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA. 226\u103C \u103E → \u103E \u103C; 227::Null; 228# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA. 229\u103B \u103E → \u103E \u103B; 230::Null; 231# Consume MEDIAL HA and apply devoicing. 232င\u103E → ŋ\u030A; 233ဉ\u103E → ɲ\u0325; 234ည\u103E → ɲ\u0325; 235ဏ\u103E → n\u0325; 236န\u103E → n\u0325; 237မ\u103E → m\u0325; 238ယ\u103E → ʃ; 239ရ\u103E → ʃ; 240လ\u103E → l\u0325; 241ဝ\u103E → w\u0325; 242ဠ\u103E → l\u0325; 243# Drop any remaining U+103E MEDIAL HA. 244\u103E → ; 245# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and 246# U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this 247\u103B } \u103D → ; 248\u103C } \u103D → ; 249\u103B → j; 250\u103C → j; 251\u103D → w; 252# 253# Initials 254# 255# Velars 256က → k; 257ခ → kʰ; 258ဂ → ɡ; 259ဃ → ɡ; 260င → ŋ; 261# Historic palatals 262စ → s; 263ဆ → sʰ; 264ဇ → z; 265ဈ → z; 266ဉ → ɲ; 267ည → ɲ; 268# Alveolars 269ဋ → t; 270ဌ → tʰ; 271ဍ → d; 272ဎ → d; 273ဏ → n; 274# Historic dentals ==> alveolars 275တ → t; 276ထ → tʰ; 277ဒ → d; 278ဓ → d; 279န → n; 280# Labials 281ပ → p; 282ဖ → pʰ; 283ဗ → b; 284ဘ → b; 285မ → m; 286# Other letters 287ယ → j; 288ရ → j; # historic /r/ 289လ\u103A → ; # final, typically not pronounced in native words 290လ → l; 291ဝ → w; 292သ → θ; # historic /s/ ==> modern dental 293ဟ → h; 294ဠ → l; 295အ → ʔ; 296# Independent vowels 297ဣ\u1037 → ʔḭ; # redundant creaky tone; this does not usually occur 298ဣး → ʔí; # this does not usually occur 299ဣ → ʔḭ; 300ဤ\u1037 → ʔḭ; # this does not usually occur 301ဤး → ʔí; # this does not usually occur 302ဤ → ʔì; 303ဥ\u1037 → ʔṵ; # redundant creaky tone; this does not usually occur 304ဥး → ʔú; # this does not usually occur 305ဥ → ʔṵ; 306ဦ\u1037 → ʔṵ; # this does not usually occur 307ဦး → ʔú; 308ဦ → ʔù; 309ဧ\u1037 → ʔḛ; # this does not usually occur 310ဧး → ʔé; 311ဧ → ʔè; 312ဩ\u1037 → ʔɔ\u0330; # this does not usually occur 313ဩး → ʔɔ\u0301; # redundant high tone; this does not usually occur 314ဩ → ʔɔ\u0301; 315ဪ\u1037 → ʔɔ\u0330; # this does not usually occur 316ဪး → ʔɔ\u0301; # this does not usually occur 317ဪ → ʔɔ\u0300; 318# Various signs 319၌ → n\u0325aɪ\u032Fʔ; 320၍ → jwḛ; 321# ၎င\u103Aး was handled earlier. 322၏ → ʔḭ; 323# 324# Postprocessing 325# 326# Delete any remaining U+103A ASAT. 327$asat → ; 328# Delete zero-width space, non-joiner, joiner. 329[\u200B-\u200D] → ; 330::NFC; 331 332