• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
4# File: my_my_FONIPA.txt
5# Generated from CLDR
6#
7
8# Pronunciation rules for Burmese.
9#
10# The following rules are lexical and heuristic: lexical in the sense
11# that they generate phoneme strings which may further undergo
12# post-lexical phonological processes, in particular voicing, to
13# result in actual surface forms; heuristic in the sense that they try
14# to resolve ambiguities, especially around reduced vowels, in a
15# systematic way that may be incorrect in many situations. Vowel
16# reduction depends on many factors, such as morphemic structure,
17# which are not available here.
18#
19# Definitions
20#
21# Dependent vowel signs
22$vs_AA = \u102B;
23$vs_aa = \u102C;
24$vs_i = \u102D;
25$vs_ii = \u102E;
26$vs_u = \u102F;
27$vs_uu = \u1030;
28$vs_e = \u1031;
29$vs_ai = \u1032;
30# Various signs
31$anusvara = \u1036;
32$visarga = \u1038;
33$virama = \u1039;
34$asat = \u103A;
35# Dependent (medial) consonant signs
36$med_y = \u103B;
37$med_r = \u103C;
38$med_w = \u103D;
39$med_h = \u103E;
40# Independent letters and letter-like punctuation symbols
41$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
42$creaky = \u0330;
43$high = \u0301;
44$low = \u0300;
45$coda = [$creaky $high $low ɴ ʔ ə];  # TODO: remove if unused
46#
47# Preprocessing
48#
49::NFC;
50# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
51$vs_AA → $vs_aa;
52# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A.
53# Hmm, what would happen if the syllable ending in kinzi had non-low tone?
54င\u103A $virama → င\u103A;
55# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
56$virama → $asat;
57# Unstack U+103F GREAT SA.
58ဿ → သ\u103Aသ;
59# Insert a syllable boundary marker /./ before every independent letter.
60::Null;
61[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.;
62# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else.
63::Null;
64([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky;
65([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \.  → $1 ə;
66# Allow for additional coda consonants.
67#
68# This only covers a few of the cases in which full coda consonants
69# can appear in loanwords. The general situation is somewhat rare and
70# is more easily dealt with in a formalism that can impose structural
71# constraints on syllables more easily.
72::Null;
73$asat ($visarga)? [\u1000-\u102A] { $asat → ;
74# Deal with ၎င\u103Aး early.
75၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ;
76#
77# Rhymes
78#
79::Null;
80က\u103A → ɛʔ;
81ဂ\u103A → ɛʔ;  # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/
82င\u1037\u103A → ɪ $creaky ɴ;
83င\u103Aး → ɪ $high ɴ;
84င\u103A → ɪ $low ɴ;
85စ\u103A → ɪʔ;  # maybe sometimes /eɪ\u032Fʔ/
86ဉ\u1037\u103A → ɪ $creaky ɴ;
87ဉ\u103Aး → ɪ $high ɴ;
88ဉ\u103A → ɪ $low ɴ;
89ည\u1037\u103A → ɛ $creaky;
90ည\u103Aး → ɛ $high;
91ည\u103A → ɛ $low;
92ဏ\u1037\u103A → a $creaky ɴ;
93ဏ\u103Aး → a $high ɴ;
94ဏ\u103A → a $low ɴ;
95တ\u103A → aʔ;
96န\u1037\u103A → a $creaky ɴ;
97န\u103Aး → a $high ɴ;
98န\u103A → a $low ɴ;
99ပ\u103A → aʔ;
100မ\u1037\u103A → a $creaky ɴ;
101မ\u103Aး → a $high ɴ;
102မ\u103A → a $low ɴ;
103ယ\u1037\u103A → ɛ $creaky;
104ယ\u103Aး → ɛ $high;
105ယ\u103A → ɛ $low;
106သ\u103A → aʔ;
107$vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ;
108$vs_aa ဉ\u103Aး → ɪ $high ɴ;
109$vs_aa ဉ\u103A → ɪ $low ɴ;
110$vs_aa တ\u103A → aʔ;
111$vs_aa ဏ\u1037\u103A → a $creaky ɴ;
112$vs_aa ဏ\u103Aး → a $high ɴ;
113$vs_aa ဏ\u103A → a $low ɴ;
114$vs_aa န\u1037\u103A → a $creaky ɴ;
115$vs_aa န\u103Aး → a $high ɴ;
116$vs_aa န\u103A → a $low ɴ;
117$vs_aa ပ\u103A → aʔ;  # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell)
118$vs_aa ယ\u1037\u103A → ɛ $creaky;
119$vs_aa ယ\u103Aး → ɛ $high;
120$vs_aa ယ\u103A → ɛ $low;
121$vs_aa \u1037 → a $creaky;  # redundant creaky tone
122$vs_aa း → a $high;
123$vs_aa → a $low;
124$vs_i က\u103A → eɪ\u032Fʔ;
125$vs_i စ\u103A → eɪ\u032Fʔ;
126$vs_i တ\u103A → eɪ\u032Fʔ;
127$vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ;
128$vs_i န\u103Aး → e $high ɪ\u032Fɴ;
129$vs_i န\u103A → e $low ɪ\u032Fɴ;
130$vs_i ပ\u103A → eɪ\u032Fʔ;
131$vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ;
132$vs_i မ\u103Aး → e $high ɪ\u032Fɴ;
133$vs_i မ\u103A → e $low ɪ\u032Fɴ;
134$vs_i $vs_u က\u103A → aɪ\u032Fʔ;
135$vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ;
136$vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ;
137$vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ;
138$vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ;
139$vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ;
140$vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ;
141$vs_i $vs_u ယ\u1037\u103A → o $creaky;
142$vs_i $vs_u ယ\u103Aး → o $high;
143$vs_i $vs_u ယ\u103A → o $low;  # in က\u102D\u102Fယ\u103A /kò/
144$vs_i $vs_u \u1037 → o $creaky;
145$vs_i $vs_u း → o $high;
146$vs_i $vs_u → o $low;
147$vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ;
148$vs_i $anusvara း → e $high ɪ\u032Fɴ;
149$vs_i $anusvara → e $low ɪ\u032Fɴ;
150$vs_i → i $creaky;
151$vs_ii \u1037 → i $creaky;  # this does not usually occur
152$vs_ii း → i $high;
153$vs_ii → i $low;
154$vs_u က\u103A → oʊ\u032Fʔ;
155$vs_u ဂ\u103A → oʊ\u032Fʔ;
156$vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ;
157$vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ;
158$vs_u ဏ\u103A → o $low ʊ\u032Fɴ;
159$vs_u တ\u103A → oʊ\u032Fʔ;
160$vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ;
161$vs_u န\u103Aး → o $high ʊ\u032Fɴ;
162$vs_u န\u103A → o $low ʊ\u032Fɴ;
163$vs_u ပ\u103A → oʊ\u032Fʔ;
164$vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ;
165$vs_u မ\u103Aး → o $high ʊ\u032Fɴ;
166$vs_u မ\u103A → o $low ʊ\u032Fɴ;
167$vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ;
168$vs_u $anusvara း → o $high ʊ\u032Fɴ;
169$vs_u $anusvara → o $low ʊ\u032Fɴ;
170$vs_u → u $creaky;
171$vs_uu \u1037 → u $creaky;  # this does not usually occur
172$vs_uu း → u $high;
173$vs_uu → u $low;
174$vs_e တ\u103A → ɪʔ;
175$vs_e $vs_aa က\u103A → aʊ\u032Fʔ;
176$vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ;
177$vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ;
178$vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ;
179$vs_e $vs_aa \u1037 → ɔ $creaky;
180$vs_e $vs_aa း → ɔ $high;  # redundant high tone; this does not usually occur
181$vs_e $vs_aa \u103A → ɔ $low;
182$vs_e $vs_aa → ɔ $high;
183$vs_e \u1037 → e $creaky;
184$vs_e း → e $high;
185$vs_e → e $low;
186$vs_ai \u1037 → ɛ $creaky;
187$vs_ai း → ɛ $high;  # redundant high tone; this does not usually occur
188$vs_ai → ɛ $high;
189$anusvara \u1037 → a $creaky ɴ;
190$anusvara း → a $high ɴ;
191$anusvara → a $low ɴ;
192$med_w တ\u103A → ʊʔ;
193$med_w န\u1037\u103A → ʊ $creaky ɴ;
194$med_w န\u103Aး → ʊ $high ɴ;
195$med_w န\u103A → ʊ $low ɴ;
196$med_w ပ\u103A → ʊʔ;
197$med_w မ\u1037\u103A → ʊ $creaky ɴ;
198$med_w မ\u103Aး → ʊ $high ɴ;
199$med_w မ\u103A → ʊ $low ɴ;
200#
201# Medials
202#
203::Null;
204# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
205# velar + /j/ ==> modern palatals.
206ကျ → t\u0361ɕ;
207ချ → t\u0361ɕʰ;
208ဂျ → d\u0361ʑ;
209ဃျ → d\u0361ʑ;
210ကြ → t\u0361ɕ;
211ခြ → t\u0361ɕʰ;
212ဂြ → d\u0361ʑ;
213ဃြ → d\u0361ʑ;
214# Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
215ယ { [$med_y $med_r] → ;
216# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
217# other medials.
218# First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
219\u103D \u103E → \u103E \u103D;
220::Null;
221# Now MEDIAL WA comes last.
222# Produce the palatal ʃ from (SA|LA)+YA+HA.
223သျ\u103E → ʃ;
224လျ\u103E → ʃ;
225# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
226\u103C \u103E → \u103E \u103C;
227::Null;
228# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
229\u103B \u103E → \u103E \u103B;
230::Null;
231# Consume MEDIAL HA and apply devoicing.
232င\u103E → ŋ\u030A;
233ဉ\u103E → ɲ\u0325;
234ည\u103E → ɲ\u0325;
235ဏ\u103E → n\u0325;
236န\u103E → n\u0325;
237မ\u103E → m\u0325;
238ယ\u103E → ʃ;
239ရ\u103E → ʃ;
240လ\u103E → l\u0325;
241ဝ\u103E → w\u0325;
242ဠ\u103E → l\u0325;
243# Drop any remaining U+103E MEDIAL HA.
244\u103E → ;
245# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
246# U+103C MEDIAL RA before U+103D MEDIAL WA.  # TODO: revisit this
247\u103B } \u103D → ;
248\u103C } \u103D → ;
249\u103B → j;
250\u103C → j;
251\u103D → w;
252#
253# Initials
254#
255# Velars
256က → k;
257ခ → kʰ;
258ဂ → ɡ;
259ဃ → ɡ;
260င → ŋ;
261# Historic palatals
262စ → s;
263ဆ → sʰ;
264ဇ → z;
265ဈ → z;
266ဉ → ɲ;
267ည → ɲ;
268# Alveolars
269ဋ → t;
270ဌ → tʰ;
271ဍ → d;
272ဎ → d;
273ဏ → n;
274# Historic dentals ==> alveolars
275တ → t;
276ထ → tʰ;
277ဒ → d;
278ဓ → d;
279န → n;
280# Labials
281ပ → p;
282ဖ → pʰ;
283ဗ → b;
284ဘ → b;
285မ → m;
286# Other letters
287ယ → j;
288ရ → j;  # historic /r/
289လ\u103A → ;  # final, typically not pronounced in native words
290လ → l;
291ဝ → w;
292သ → θ;  # historic /s/ ==> modern dental
293ဟ → h;
294ဠ → l;
295အ → ʔ;
296# Independent vowels
297ဣ\u1037 → ʔḭ;  # redundant creaky tone; this does not usually occur
298ဣး → ʔí;  # this does not usually occur
299ဣ → ʔḭ;
300ဤ\u1037 → ʔḭ;  # this does not usually occur
301ဤး → ʔí;  # this does not usually occur
302ဤ → ʔì;
303ဥ\u1037 → ʔṵ;  # redundant creaky tone; this does not usually occur
304ဥး → ʔú;  # this does not usually occur
305ဥ → ʔṵ;
306ဦ\u1037 → ʔṵ;  # this does not usually occur
307ဦး → ʔú;
308ဦ → ʔù;
309ဧ\u1037 → ʔḛ;  # this does not usually occur
310ဧး → ʔé;
311ဧ → ʔè;
312ဩ\u1037 → ʔɔ\u0330;  # this does not usually occur
313ဩး → ʔɔ\u0301;  # redundant high tone; this does not usually occur
314ဩ → ʔɔ\u0301;
315ဪ\u1037 → ʔɔ\u0330;  # this does not usually occur
316ဪး → ʔɔ\u0301;  # this does not usually occur
317ဪ → ʔɔ\u0300;
318# Various signs
319၌ → n\u0325aɪ\u032Fʔ;
320၍ → jwḛ;
321# ၎င\u103Aး was handled earlier.
322၏ → ʔḭ;
323#
324# Postprocessing
325#
326# Delete any remaining U+103A ASAT.
327$asat → ;
328# Delete zero-width space, non-joiner, joiner.
329[\u200B-\u200D] → ;
330::NFC;
331
332