1<?xml version="1.0" encoding="UTF-8" ?> 2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3<!-- Copyright © 1991-2015 Unicode, Inc. 4CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 5For terms of use, see http://www.unicode.org/copyright.html --> 6<supplementalData> 7 <version number="$Revision: 12347 $" /> 8 <transforms> 9 <transform source="my" target="my_FONIPA" direction="forward" alias="my-fonipa-t-my"> 10 <tRule><![CDATA[ 11 12# Pronunciation rules for Burmese. 13# 14# The following rules are lexical and heuristic: lexical in the sense 15# that they generate phoneme strings which may further undergo 16# post-lexical phonological processes, in particular voicing, to 17# result in actual surface forms; heuristic in the sense that they try 18# to resolve ambiguities, especially around reduced vowels, in a 19# systematic way that may be incorrect in many situations. Vowel 20# reduction depends on many factors, such as morphemic structure, 21# which are not available here. 22 23# 24# Definitions 25# 26 27# Dependent vowel signs 28$vs_AA = \u102B; 29$vs_aa = \u102C; 30$vs_i = \u102D; 31$vs_ii = \u102E; 32$vs_u = \u102F; 33$vs_uu = \u1030; 34$vs_e = \u1031; 35$vs_ai = \u1032; 36 37# Various signs 38$anusvara = \u1036; 39$visarga = \u1038; 40$virama = \u1039; 41$asat = \u103A; 42 43# Dependent (medial) consonant signs 44$med_y = \u103B; 45$med_r = \u103C; 46$med_w = \u103D; 47$med_h = \u103E; 48 49# Independent letters and letter-like punctuation symbols 50$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055]; 51 52$creaky = \u0330; 53$high = \u0301; 54$low = \u0300; 55$coda = [$creaky $high $low ɴ ʔ ə]; # TODO: remove if unused 56 57# 58# Preprocessing 59# 60 61::NFC; 62 63# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical. 64$vs_AA → $vs_aa; 65 66# Unstack kinzi (င် plus U+1039 VIRAMA) into plain င်. 67# Hmm, what would happen if the syllable ending in kinzi had non-low tone? 68င် $virama → င်; 69 70# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT. 71$virama → $asat; 72 73# Unstack U+103F GREAT SA. 74ဿ → သ်သ; 75 76# Insert a syllable boundary marker /./ before every independent letter. 77::Null; 78[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.; 79 80# Insert default inherent vowel: /a̰/ at the end, /ə/ everywhere else. 81::Null; 82([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky; 83([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \. → $1 ə; 84 85# Allow for additional coda consonants. 86# 87# This only covers a few of the cases in which full coda consonants 88# can appear in loanwords. The general situation is somewhat rare and 89# is more easily dealt with in a formalism that can impose structural 90# constraints on syllables more easily. 91::Null; 92$asat ($visarga)? [\u1000-\u102A] { $asat → ; 93 94# Deal with ၎င်း early. 95၎င်း → lə\.ɡa $high ʊ̯ɴ; 96 97# 98# Rhymes 99# 100 101::Null; 102 103က် → ɛʔ; 104 105ဂ် → ɛʔ; # in မဂ္ဂဇင်း ~ မဂ်ဂဇင်း /mɛʔ.ɡə.zɪ́ɴ/ 106 107င့် → ɪ $creaky ɴ; 108င်း → ɪ $high ɴ; 109င် → ɪ $low ɴ; 110 111စ် → ɪʔ; # maybe sometimes /eɪ̯ʔ/ 112 113ဉ့် → ɪ $creaky ɴ; 114ဉ်း → ɪ $high ɴ; 115ဉ် → ɪ $low ɴ; 116 117ည့် → ɛ $creaky; 118ည်း → ɛ $high; 119ည် → ɛ $low; 120 121ဏ့် → a $creaky ɴ; 122ဏ်း → a $high ɴ; 123ဏ် → a $low ɴ; 124 125တ် → aʔ; 126 127န့် → a $creaky ɴ; 128န်း → a $high ɴ; 129န် → a $low ɴ; 130 131ပ် → aʔ; 132 133မ့် → a $creaky ɴ; 134မ်း → a $high ɴ; 135မ် → a $low ɴ; 136 137ယ့် → ɛ $creaky; 138ယ်း → ɛ $high; 139ယ် → ɛ $low; 140 141သ် → aʔ; 142 143$vs_aa ဉ့် → ɪ $creaky ɴ; 144$vs_aa ဉ်း → ɪ $high ɴ; 145$vs_aa ဉ် → ɪ $low ɴ; 146$vs_aa တ် → aʔ; 147$vs_aa ဏ့် → a $creaky ɴ; 148$vs_aa ဏ်း → a $high ɴ; 149$vs_aa ဏ် → a $low ɴ; 150$vs_aa န့် → a $creaky ɴ; 151$vs_aa န်း → a $high ɴ; 152$vs_aa န် → a $low ɴ; 153$vs_aa ပ် → aʔ; # in ကလာပ်စည်း /kə.laʔ.sɛ́/ (club cell) 154$vs_aa ယ့် → ɛ $creaky; 155$vs_aa ယ်း → ɛ $high; 156$vs_aa ယ် → ɛ $low; 157$vs_aa ့ → a $creaky; # redundant creaky tone 158$vs_aa း → a $high; 159$vs_aa → a $low; 160 161$vs_i က် → eɪ̯ʔ; 162$vs_i စ် → eɪ̯ʔ; 163$vs_i တ် → eɪ̯ʔ; 164$vs_i န့် → e $creaky ɪ̯ɴ; 165$vs_i န်း → e $high ɪ̯ɴ; 166$vs_i န် → e $low ɪ̯ɴ; 167$vs_i ပ် → eɪ̯ʔ; 168$vs_i မ့် → e $creaky ɪ̯ɴ; 169$vs_i မ်း → e $high ɪ̯ɴ; 170$vs_i မ် → e $low ɪ̯ɴ; 171$vs_i $vs_u က် → aɪ̯ʔ; 172$vs_i $vs_u င့် → a $creaky ɪ̯ɴ; 173$vs_i $vs_u င်း → a $high ɪ̯ɴ; 174$vs_i $vs_u င် → a $low ɪ̯ɴ; 175$vs_i $vs_u ဏ့် → a $creaky ɪ̯ɴ; 176$vs_i $vs_u ဏ်း → a $high ɪ̯ɴ; 177$vs_i $vs_u ဏ် → a $low ɪ̯ɴ; 178$vs_i $vs_u ယ့် → o $creaky; 179$vs_i $vs_u ယ်း → o $high; 180$vs_i $vs_u ယ် → o $low; # in ကိုယ် /kò/ 181$vs_i $vs_u ့ → o $creaky; 182$vs_i $vs_u း → o $high; 183$vs_i $vs_u → o $low; 184$vs_i $anusvara ့ → e $creaky ɪ̯ɴ; 185$vs_i $anusvara း → e $high ɪ̯ɴ; 186$vs_i $anusvara → e $low ɪ̯ɴ; 187$vs_i → i $creaky; 188 189$vs_ii ့ → i $creaky; # this does not usually occur 190$vs_ii း → i $high; 191$vs_ii → i $low; 192 193$vs_u က် → oʊ̯ʔ; 194$vs_u ဂ် → oʊ̯ʔ; 195$vs_u ဏ့် → o $creaky ʊ̯ɴ; 196$vs_u ဏ်း → o $high ʊ̯ɴ; 197$vs_u ဏ် → o $low ʊ̯ɴ; 198$vs_u တ် → oʊ̯ʔ; 199$vs_u န့် → o $creaky ʊ̯ɴ; 200$vs_u န်း → o $high ʊ̯ɴ; 201$vs_u န် → o $low ʊ̯ɴ; 202$vs_u ပ် → oʊ̯ʔ; 203$vs_u မ့် → o $creaky ʊ̯ɴ; 204$vs_u မ်း → o $high ʊ̯ɴ; 205$vs_u မ် → o $low ʊ̯ɴ; 206$vs_u $anusvara ့ → o $creaky ʊ̯ɴ; 207$vs_u $anusvara း → o $high ʊ̯ɴ; 208$vs_u $anusvara → o $low ʊ̯ɴ; 209$vs_u → u $creaky; 210 211$vs_uu ့ → u $creaky; # this does not usually occur 212$vs_uu း → u $high; 213$vs_uu → u $low; 214 215$vs_e တ် → ɪʔ; 216$vs_e $vs_aa က် → aʊ̯ʔ; 217$vs_e $vs_aa င့် → a $creaky ʊ̯ɴ; 218$vs_e $vs_aa င်း → a $high ʊ̯ɴ; 219$vs_e $vs_aa င် → a $low ʊ̯ɴ; 220$vs_e $vs_aa ့ → ɔ $creaky; 221$vs_e $vs_aa း → ɔ $high; # redundant high tone; this does not usually occur 222$vs_e $vs_aa ် → ɔ $low; 223$vs_e $vs_aa → ɔ $high; 224$vs_e ့ → e $creaky; 225$vs_e း → e $high; 226$vs_e → e $low; 227 228$vs_ai ့ → ɛ $creaky; 229$vs_ai း → ɛ $high; # redundant high tone; this does not usually occur 230$vs_ai → ɛ $high; 231 232$anusvara ့ → a $creaky ɴ; 233$anusvara း → a $high ɴ; 234$anusvara → a $low ɴ; 235 236$med_w တ် → ʊʔ; 237$med_w န့် → ʊ $creaky ɴ; 238$med_w န်း → ʊ $high ɴ; 239$med_w န် → ʊ $low ɴ; 240$med_w ပ် → ʊʔ; 241$med_w မ့် → ʊ $creaky ɴ; 242$med_w မ်း → ʊ $high ɴ; 243$med_w မ် → ʊ $low ɴ; 244 245# 246# Medials 247# 248 249::Null; 250 251# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA: 252# velar + /j/ ==> modern palatals. 253 254ကျ → t͡ɕ; 255ချ → t͡ɕʰ; 256ဂျ → d͡ʑ; 257ဃျ → d͡ʑ; 258 259ကြ → t͡ɕ; 260ခြ → t͡ɕʰ; 261ဂြ → d͡ʑ; 262ဃြ → d͡ʑ; 263 264# Remove redundant MEDIAL YA and MEDIAL RA after initial YA. 265ယ { [$med_y $med_r] → ; 266 267# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any 268# other medials. 269 270# First, push U+103E MEDIAL HA before U+103D MEDIAL WA. 271\u103D \u103E → \u103E \u103D; 272::Null; 273# Now MEDIAL WA comes last. 274 275# Produce the palatal ʃ from (SA|LA)+YA+HA. 276သျှ → ʃ; 277လျှ → ʃ; 278 279# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA. 280\u103C \u103E → \u103E \u103C; 281::Null; 282 283# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA. 284\u103B \u103E → \u103E \u103B; 285::Null; 286 287# Consume MEDIAL HA and apply devoicing. 288 289ငှ → ŋ̊; 290ဉှ → ɲ̥; 291ညှ → ɲ̥; 292ဏှ → n̥; 293နှ → n̥; 294မှ → m̥; 295ယှ → ʃ; 296ရှ → ʃ; 297လှ → l̥; 298ဝှ → w̥; 299ဠှ → l̥; 300 301# Drop any remaining U+103E MEDIAL HA. 302\u103E → ; 303 304# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and 305# U+103C MEDIAL RA before U+103D MEDIAL WA. # TODO: revisit this 306\u103B } \u103D → ; 307\u103C } \u103D → ; 308 309\u103B → j; 310\u103C → j; 311\u103D → w; 312 313# 314# Initials 315# 316 317# Velars 318က → k; 319ခ → kʰ; 320ဂ → ɡ; 321ဃ → ɡ; 322င → ŋ; 323 324# Historic palatals 325စ → s; 326ဆ → sʰ; 327ဇ → z; 328ဈ → z; 329ဉ → ɲ; 330ည → ɲ; 331 332# Alveolars 333ဋ → t; 334ဌ → tʰ; 335ဍ → d; 336ဎ → d; 337ဏ → n; 338 339# Historic dentals ==> alveolars 340တ → t; 341ထ → tʰ; 342ဒ → d; 343ဓ → d; 344န → n; 345 346# Labials 347ပ → p; 348ဖ → pʰ; 349ဗ → b; 350ဘ → b; 351မ → m; 352 353# Other letters 354ယ → j; 355ရ → j; # historic /r/ 356လ် → ; # final, typically not pronounced in native words 357လ → l; 358ဝ → w; 359သ → θ; # historic /s/ ==> modern dental 360ဟ → h; 361ဠ → l; 362အ → ʔ; 363 364# Independent vowels 365 366ဣ့ → ʔḭ; # redundant creaky tone; this does not usually occur 367ဣး → ʔí; # this does not usually occur 368ဣ → ʔḭ; 369 370ဤ့ → ʔḭ; # this does not usually occur 371ဤး → ʔí; # this does not usually occur 372ဤ → ʔì; 373 374ဥ့ → ʔṵ; # redundant creaky tone; this does not usually occur 375ဥး → ʔú; # this does not usually occur 376ဥ → ʔṵ; 377 378ဦ့ → ʔṵ; # this does not usually occur 379ဦး → ʔú; 380ဦ → ʔù; 381 382ဧ့ → ʔḛ; # this does not usually occur 383ဧး → ʔé; 384ဧ → ʔè; 385 386ဩ့ → ʔɔ̰; # this does not usually occur 387ဩး → ʔɔ́; # redundant high tone; this does not usually occur 388ဩ → ʔɔ́; 389 390ဪ့ → ʔɔ̰; # this does not usually occur 391ဪး → ʔɔ́; # this does not usually occur 392ဪ → ʔɔ̀; 393 394# Various signs 395 396၌ → n̥aɪ̯ʔ; 397၍ → jwḛ; 398# ၎င်း was handled earlier. 399၏ → ʔḭ; 400 401# 402# Postprocessing 403# 404 405# Delete any remaining U+103A ASAT. 406$asat → ; 407 408# Delete zero-width space, non-joiner, joiner. 409[\u200B-\u200D] → ; 410 411::NFC; 412 413 ]]></tRule> 414 </transform> 415 </transforms> 416</supplementalData> 417