• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
4#
5# File: my_my_FONIPA.txt
6# Generated from CLDR
7#
8
9# Pronunciation rules for Burmese.
10#
11# The following rules are lexical and heuristic: lexical in the sense
12# that they generate phoneme strings which may further undergo
13# post-lexical phonological processes, in particular voicing, to
14# result in actual surface forms; heuristic in the sense that they try
15# to resolve ambiguities, especially around reduced vowels, in a
16# systematic way that may be incorrect in many situations. Vowel
17# reduction depends on many factors, such as morphemic structure,
18# which are not available here.
19#
20# Definitions
21#
22# Dependent vowel signs
23$vs_AA = \u102B;
24$vs_aa = \u102C;
25$vs_i = \u102D;
26$vs_ii = \u102E;
27$vs_u = \u102F;
28$vs_uu = \u1030;
29$vs_e = \u1031;
30$vs_ai = \u1032;
31# Various signs
32$anusvara = \u1036;
33$visarga = \u1038;
34$virama = \u1039;
35$asat = \u103A;
36# Dependent (medial) consonant signs
37$med_y = \u103B;
38$med_r = \u103C;
39$med_w = \u103D;
40$med_h = \u103E;
41# Independent letters and letter-like punctuation symbols
42$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
43$creaky = \u0330;
44$high = \u0301;
45$low = \u0300;
46$coda = [$creaky $high $low ɴ ʔ ə];  # TODO: remove if unused
47#
48# Preprocessing
49#
50::NFC;
51# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
52$vs_AA → $vs_aa;
53# Unstack kinzi (င\u103A plus U+1039 VIRAMA) into plain င\u103A.
54# Hmm, what would happen if the syllable ending in kinzi had non-low tone?
55င\u103A $virama → င\u103A;
56# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
57$virama → $asat;
58# Unstack U+103F GREAT SA.
59ဿ → သ\u103Aသ;
60# Insert a syllable boundary marker /./ before every independent letter.
61::Null;
62[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.;
63# Insert default inherent vowel: /a\u0330/ at the end, /ə/ everywhere else.
64::Null;
65([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky;
66([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \.  → $1 ə;
67# Allow for additional coda consonants.
68#
69# This only covers a few of the cases in which full coda consonants
70# can appear in loanwords. The general situation is somewhat rare and
71# is more easily dealt with in a formalism that can impose structural
72# constraints on syllables more easily.
73::Null;
74$asat ($visarga)? [\u1000-\u102A] { $asat → ;
75# Deal with ၎င\u103Aး early.
76၎င\u103Aး → lə\.ɡa $high ʊ\u032Fɴ;
77#
78# Rhymes
79#
80::Null;
81က\u103A → ɛʔ;
82ဂ\u103A → ɛʔ;  # in မဂ\u1039ဂဇင\u103Aး ~ မဂ\u103Aဂဇင\u103Aး /mɛʔ.ɡə.zɪ\u0301ɴ/
83င\u1037\u103A → ɪ $creaky ɴ;
84င\u103Aး → ɪ $high ɴ;
85င\u103A → ɪ $low ɴ;
86စ\u103A → ɪʔ;  # maybe sometimes /eɪ\u032Fʔ/
87ဉ\u1037\u103A → ɪ $creaky ɴ;
88ဉ\u103Aး → ɪ $high ɴ;
89ဉ\u103A → ɪ $low ɴ;
90ည\u1037\u103A → ɛ $creaky;
91ည\u103Aး → ɛ $high;
92ည\u103A → ɛ $low;
93ဏ\u1037\u103A → a $creaky ɴ;
94ဏ\u103Aး → a $high ɴ;
95ဏ\u103A → a $low ɴ;
96တ\u103A → aʔ;
97န\u1037\u103A → a $creaky ɴ;
98န\u103Aး → a $high ɴ;
99န\u103A → a $low ɴ;
100ပ\u103A → aʔ;
101မ\u1037\u103A → a $creaky ɴ;
102မ\u103Aး → a $high ɴ;
103မ\u103A → a $low ɴ;
104ယ\u1037\u103A → ɛ $creaky;
105ယ\u103Aး → ɛ $high;
106ယ\u103A → ɛ $low;
107သ\u103A → aʔ;
108$vs_aa ဉ\u1037\u103A → ɪ $creaky ɴ;
109$vs_aa ဉ\u103Aး → ɪ $high ɴ;
110$vs_aa ဉ\u103A → ɪ $low ɴ;
111$vs_aa တ\u103A → aʔ;
112$vs_aa ဏ\u1037\u103A → a $creaky ɴ;
113$vs_aa ဏ\u103Aး → a $high ɴ;
114$vs_aa ဏ\u103A → a $low ɴ;
115$vs_aa န\u1037\u103A → a $creaky ɴ;
116$vs_aa န\u103Aး → a $high ɴ;
117$vs_aa န\u103A → a $low ɴ;
118$vs_aa ပ\u103A → aʔ;  # in ကလာပ\u103Aစည\u103Aး /kə.laʔ.sɛ\u0301/ (club cell)
119$vs_aa ယ\u1037\u103A → ɛ $creaky;
120$vs_aa ယ\u103Aး → ɛ $high;
121$vs_aa ယ\u103A → ɛ $low;
122$vs_aa \u1037 → a $creaky;  # redundant creaky tone
123$vs_aa း → a $high;
124$vs_aa → a $low;
125$vs_i က\u103A → eɪ\u032Fʔ;
126$vs_i စ\u103A → eɪ\u032Fʔ;
127$vs_i တ\u103A → eɪ\u032Fʔ;
128$vs_i န\u1037\u103A → e $creaky ɪ\u032Fɴ;
129$vs_i န\u103Aး → e $high ɪ\u032Fɴ;
130$vs_i န\u103A → e $low ɪ\u032Fɴ;
131$vs_i ပ\u103A → eɪ\u032Fʔ;
132$vs_i မ\u1037\u103A → e $creaky ɪ\u032Fɴ;
133$vs_i မ\u103Aး → e $high ɪ\u032Fɴ;
134$vs_i မ\u103A → e $low ɪ\u032Fɴ;
135$vs_i $vs_u က\u103A → aɪ\u032Fʔ;
136$vs_i $vs_u င\u1037\u103A → a $creaky ɪ\u032Fɴ;
137$vs_i $vs_u င\u103Aး → a $high ɪ\u032Fɴ;
138$vs_i $vs_u င\u103A → a $low ɪ\u032Fɴ;
139$vs_i $vs_u ဏ\u1037\u103A → a $creaky ɪ\u032Fɴ;
140$vs_i $vs_u ဏ\u103Aး → a $high ɪ\u032Fɴ;
141$vs_i $vs_u ဏ\u103A → a $low ɪ\u032Fɴ;
142$vs_i $vs_u ယ\u1037\u103A → o $creaky;
143$vs_i $vs_u ယ\u103Aး → o $high;
144$vs_i $vs_u ယ\u103A → o $low;  # in က\u102D\u102Fယ\u103A /kò/
145$vs_i $vs_u \u1037 → o $creaky;
146$vs_i $vs_u း → o $high;
147$vs_i $vs_u → o $low;
148$vs_i $anusvara \u1037 → e $creaky ɪ\u032Fɴ;
149$vs_i $anusvara း → e $high ɪ\u032Fɴ;
150$vs_i $anusvara → e $low ɪ\u032Fɴ;
151$vs_i → i $creaky;
152$vs_ii \u1037 → i $creaky;  # this does not usually occur
153$vs_ii း → i $high;
154$vs_ii → i $low;
155$vs_u က\u103A → oʊ\u032Fʔ;
156$vs_u ဂ\u103A → oʊ\u032Fʔ;
157$vs_u ဏ\u1037\u103A → o $creaky ʊ\u032Fɴ;
158$vs_u ဏ\u103Aး → o $high ʊ\u032Fɴ;
159$vs_u ဏ\u103A → o $low ʊ\u032Fɴ;
160$vs_u တ\u103A → oʊ\u032Fʔ;
161$vs_u န\u1037\u103A → o $creaky ʊ\u032Fɴ;
162$vs_u န\u103Aး → o $high ʊ\u032Fɴ;
163$vs_u န\u103A → o $low ʊ\u032Fɴ;
164$vs_u ပ\u103A → oʊ\u032Fʔ;
165$vs_u မ\u1037\u103A → o $creaky ʊ\u032Fɴ;
166$vs_u မ\u103Aး → o $high ʊ\u032Fɴ;
167$vs_u မ\u103A → o $low ʊ\u032Fɴ;
168$vs_u $anusvara \u1037 → o $creaky ʊ\u032Fɴ;
169$vs_u $anusvara း → o $high ʊ\u032Fɴ;
170$vs_u $anusvara → o $low ʊ\u032Fɴ;
171$vs_u → u $creaky;
172$vs_uu \u1037 → u $creaky;  # this does not usually occur
173$vs_uu း → u $high;
174$vs_uu → u $low;
175$vs_e တ\u103A → ɪʔ;
176$vs_e $vs_aa က\u103A → aʊ\u032Fʔ;
177$vs_e $vs_aa င\u1037\u103A → a $creaky ʊ\u032Fɴ;
178$vs_e $vs_aa င\u103Aး → a $high ʊ\u032Fɴ;
179$vs_e $vs_aa င\u103A → a $low ʊ\u032Fɴ;
180$vs_e $vs_aa \u1037 → ɔ $creaky;
181$vs_e $vs_aa း → ɔ $high;  # redundant high tone; this does not usually occur
182$vs_e $vs_aa \u103A → ɔ $low;
183$vs_e $vs_aa → ɔ $high;
184$vs_e \u1037 → e $creaky;
185$vs_e း → e $high;
186$vs_e → e $low;
187$vs_ai \u1037 → ɛ $creaky;
188$vs_ai း → ɛ $high;  # redundant high tone; this does not usually occur
189$vs_ai → ɛ $high;
190$anusvara \u1037 → a $creaky ɴ;
191$anusvara း → a $high ɴ;
192$anusvara → a $low ɴ;
193$med_w တ\u103A → ʊʔ;
194$med_w န\u1037\u103A → ʊ $creaky ɴ;
195$med_w န\u103Aး → ʊ $high ɴ;
196$med_w န\u103A → ʊ $low ɴ;
197$med_w ပ\u103A → ʊʔ;
198$med_w မ\u1037\u103A → ʊ $creaky ɴ;
199$med_w မ\u103Aး → ʊ $high ɴ;
200$med_w မ\u103A → ʊ $low ɴ;
201#
202# Medials
203#
204::Null;
205# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
206# velar + /j/ ==> modern palatals.
207ကျ → t\u0361ɕ;
208ချ → t\u0361ɕʰ;
209ဂျ → d\u0361ʑ;
210ဃျ → d\u0361ʑ;
211ကြ → t\u0361ɕ;
212ခြ → t\u0361ɕʰ;
213ဂြ → d\u0361ʑ;
214ဃြ → d\u0361ʑ;
215# Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
216ယ { [$med_y $med_r] → ;
217# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
218# other medials.
219# First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
220\u103D \u103E → \u103E \u103D;
221::Null;
222# Now MEDIAL WA comes last.
223# Produce the palatal ʃ from (SA|LA)+YA+HA.
224သျ\u103E → ʃ;
225လျ\u103E → ʃ;
226# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
227\u103C \u103E → \u103E \u103C;
228::Null;
229# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
230\u103B \u103E → \u103E \u103B;
231::Null;
232# Consume MEDIAL HA and apply devoicing.
233င\u103E → ŋ\u030A;
234ဉ\u103E → ɲ\u0325;
235ည\u103E → ɲ\u0325;
236ဏ\u103E → n\u0325;
237န\u103E → n\u0325;
238မ\u103E → m\u0325;
239ယ\u103E → ʃ;
240ရ\u103E → ʃ;
241လ\u103E → l\u0325;
242ဝ\u103E → w\u0325;
243ဠ\u103E → l\u0325;
244# Drop any remaining U+103E MEDIAL HA.
245\u103E → ;
246# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
247# U+103C MEDIAL RA before U+103D MEDIAL WA.  # TODO: revisit this
248\u103B } \u103D → ;
249\u103C } \u103D → ;
250\u103B → j;
251\u103C → j;
252\u103D → w;
253#
254# Initials
255#
256# Velars
257က → k;
258ခ → kʰ;
259ဂ → ɡ;
260ဃ → ɡ;
261င → ŋ;
262# Historic palatals
263စ → s;
264ဆ → sʰ;
265ဇ → z;
266ဈ → z;
267ဉ → ɲ;
268ည → ɲ;
269# Alveolars
270ဋ → t;
271ဌ → tʰ;
272ဍ → d;
273ဎ → d;
274ဏ → n;
275# Historic dentals ==> alveolars
276တ → t;
277ထ → tʰ;
278ဒ → d;
279ဓ → d;
280န → n;
281# Labials
282ပ → p;
283ဖ → pʰ;
284ဗ → b;
285ဘ → b;
286မ → m;
287# Other letters
288ယ → j;
289ရ → j;  # historic /r/
290လ\u103A → ;  # final, typically not pronounced in native words
291လ → l;
292ဝ → w;
293သ → θ;  # historic /s/ ==> modern dental
294ဟ → h;
295ဠ → l;
296အ → ʔ;
297# Independent vowels
298ဣ\u1037 → ʔḭ;  # redundant creaky tone; this does not usually occur
299ဣး → ʔí;  # this does not usually occur
300ဣ → ʔḭ;
301ဤ\u1037 → ʔḭ;  # this does not usually occur
302ဤး → ʔí;  # this does not usually occur
303ဤ → ʔì;
304ဥ\u1037 → ʔṵ;  # redundant creaky tone; this does not usually occur
305ဥး → ʔú;  # this does not usually occur
306ဥ → ʔṵ;
307ဦ\u1037 → ʔṵ;  # this does not usually occur
308ဦး → ʔú;
309ဦ → ʔù;
310ဧ\u1037 → ʔḛ;  # this does not usually occur
311ဧး → ʔé;
312ဧ → ʔè;
313ဩ\u1037 → ʔɔ\u0330;  # this does not usually occur
314ဩး → ʔɔ\u0301;  # redundant high tone; this does not usually occur
315ဩ → ʔɔ\u0301;
316ဪ\u1037 → ʔɔ\u0330;  # this does not usually occur
317ဪး → ʔɔ\u0301;  # this does not usually occur
318ဪ → ʔɔ\u0300;
319# Various signs
320၌ → n\u0325aɪ\u032Fʔ;
321၍ → jwḛ;
322# ၎င\u103Aး was handled earlier.
323၏ → ʔḭ;
324#
325# Postprocessing
326#
327# Delete any remaining U+103A ASAT.
328$asat → ;
329# Delete zero-width space, non-joiner, joiner.
330[\u200B-\u200D] → ;
331::NFC;
332
333