• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<?xml version="1.0" encoding="UTF-8" ?>
2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
3<!-- Copyright © 1991-2015 Unicode, Inc.
4CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
5For terms of use, see http://www.unicode.org/copyright.html -->
6<supplementalData>
7	<version number="$Revision: 12347 $" />
8	<transforms>
9		<transform source="my" target="my_FONIPA" direction="forward" alias="my-fonipa-t-my">
10			<tRule><![CDATA[
11
12# Pronunciation rules for Burmese.
13#
14# The following rules are lexical and heuristic: lexical in the sense
15# that they generate phoneme strings which may further undergo
16# post-lexical phonological processes, in particular voicing, to
17# result in actual surface forms; heuristic in the sense that they try
18# to resolve ambiguities, especially around reduced vowels, in a
19# systematic way that may be incorrect in many situations. Vowel
20# reduction depends on many factors, such as morphemic structure,
21# which are not available here.
22
23#
24# Definitions
25#
26
27# Dependent vowel signs
28$vs_AA = \u102B;
29$vs_aa = \u102C;
30$vs_i = \u102D;
31$vs_ii = \u102E;
32$vs_u = \u102F;
33$vs_uu = \u1030;
34$vs_e = \u1031;
35$vs_ai = \u1032;
36
37# Various signs
38$anusvara = \u1036;
39$visarga = \u1038;
40$virama = \u1039;
41$asat = \u103A;
42
43# Dependent (medial) consonant signs
44$med_y = \u103B;
45$med_r = \u103C;
46$med_w = \u103D;
47$med_h = \u103E;
48
49# Independent letters and letter-like punctuation symbols
50$independent = [\u1000-\u102A \u103F \u104C-\u104F \u1050-\u1055];
51
52$creaky = \u0330;
53$high = \u0301;
54$low = \u0300;
55$coda = [$creaky $high $low ɴ ʔ ə];  # TODO: remove if unused
56
57#
58# Preprocessing
59#
60
61::NFC;
62
63# Replace U+102B TALL AA with U+102C AA. Their pronunciation is identical.
64$vs_AA → $vs_aa;
65
66# Unstack kinzi (င် plus U+1039 VIRAMA) into plain င်.
67# Hmm, what would happen if the syllable ending in kinzi had non-low tone?
68င် $virama → င်;
69
70# Unstack everything else, i.e. replace U+1039 VIRAMA with U+103A ASAT.
71$virama → $asat;
72
73# Unstack U+103F GREAT SA.
74ဿ → သ်သ;
75
76# Insert a syllable boundary marker /./ before every independent letter.
77::Null;
78[^.$] { } $independent ([\u1037\u103B-\u103E])* [^\u103A] → \.;
79
80# Insert default inherent vowel: /a̰/ at the end, /ə/ everywhere else.
81::Null;
82([\u1000-\u1021\u103F] [\u103B-\u103E]*) } [$] → $1 a $creaky;
83([\u1000-\u1021\u103F] [\u103B-\u103E]*) } \.  → $1 ə;
84
85# Allow for additional coda consonants.
86#
87# This only covers a few of the cases in which full coda consonants
88# can appear in loanwords. The general situation is somewhat rare and
89# is more easily dealt with in a formalism that can impose structural
90# constraints on syllables more easily.
91::Null;
92$asat ($visarga)? [\u1000-\u102A] { $asat → ;
93
94# Deal with ၎င်း early.
95၎င်း → lə\.ɡa $high ʊ̯ɴ;
96
97#
98# Rhymes
99#
100
101::Null;
102
103က် → ɛʔ;
104
105ဂ် → ɛʔ;  # in မဂ္ဂဇင်း ~ မဂ်ဂဇင်း /mɛʔ.ɡə.zɪ́ɴ/
106
107င့် → ɪ $creaky ɴ;
108င်း → ɪ $high ɴ;
109င် → ɪ $low ɴ;
110
111စ် → ɪʔ;  # maybe sometimes /eɪ̯ʔ/
112
113ဉ့် → ɪ $creaky ɴ;
114ဉ်း → ɪ $high ɴ;
115ဉ် → ɪ $low ɴ;
116
117ည့် → ɛ $creaky;
118ည်း → ɛ $high;
119ည် → ɛ $low;
120
121ဏ့် → a $creaky ɴ;
122ဏ်း → a $high ɴ;
123ဏ် → a $low ɴ;
124
125တ် → aʔ;
126
127န့် → a $creaky ɴ;
128န်း → a $high ɴ;
129န် → a $low ɴ;
130
131ပ် → aʔ;
132
133မ့် → a $creaky ɴ;
134မ်း → a $high ɴ;
135မ် → a $low ɴ;
136
137ယ့် → ɛ $creaky;
138ယ်း → ɛ $high;
139ယ် → ɛ $low;
140
141သ် → aʔ;
142
143$vs_aa ဉ့် → ɪ $creaky ɴ;
144$vs_aa ဉ်း → ɪ $high ɴ;
145$vs_aa ဉ် → ɪ $low ɴ;
146$vs_aa တ် → aʔ;
147$vs_aa ဏ့် → a $creaky ɴ;
148$vs_aa ဏ်း → a $high ɴ;
149$vs_aa ဏ် → a $low ɴ;
150$vs_aa န့် → a $creaky ɴ;
151$vs_aa န်း → a $high ɴ;
152$vs_aa န် → a $low ɴ;
153$vs_aa ပ် → aʔ;  # in ကလာပ်စည်း /kə.laʔ.sɛ́/ (club cell)
154$vs_aa ယ့် → ɛ $creaky;
155$vs_aa ယ်း → ɛ $high;
156$vs_aa ယ် → ɛ $low;
157$vs_aa ့ → a $creaky;  # redundant creaky tone
158$vs_aa း → a $high;
159$vs_aa → a $low;
160
161$vs_i က် → eɪ̯ʔ;
162$vs_i စ် → eɪ̯ʔ;
163$vs_i တ် → eɪ̯ʔ;
164$vs_i န့် → e $creaky ɪ̯ɴ;
165$vs_i န်း → e $high ɪ̯ɴ;
166$vs_i န် → e $low ɪ̯ɴ;
167$vs_i ပ် → eɪ̯ʔ;
168$vs_i မ့် → e $creaky ɪ̯ɴ;
169$vs_i မ်း → e $high ɪ̯ɴ;
170$vs_i မ် → e $low ɪ̯ɴ;
171$vs_i $vs_u က် → aɪ̯ʔ;
172$vs_i $vs_u င့် → a $creaky ɪ̯ɴ;
173$vs_i $vs_u င်း → a $high ɪ̯ɴ;
174$vs_i $vs_u င် → a $low ɪ̯ɴ;
175$vs_i $vs_u ဏ့် → a $creaky ɪ̯ɴ;
176$vs_i $vs_u ဏ်း → a $high ɪ̯ɴ;
177$vs_i $vs_u ဏ် → a $low ɪ̯ɴ;
178$vs_i $vs_u ယ့် → o $creaky;
179$vs_i $vs_u ယ်း → o $high;
180$vs_i $vs_u ယ် → o $low;  # in ကိုယ် /kò/
181$vs_i $vs_u ့ → o $creaky;
182$vs_i $vs_u း → o $high;
183$vs_i $vs_u → o $low;
184$vs_i $anusvara ့ → e $creaky ɪ̯ɴ;
185$vs_i $anusvara း → e $high ɪ̯ɴ;
186$vs_i $anusvara → e $low ɪ̯ɴ;
187$vs_i → i $creaky;
188
189$vs_ii ့ → i $creaky;  # this does not usually occur
190$vs_ii း → i $high;
191$vs_ii → i $low;
192
193$vs_u က် → oʊ̯ʔ;
194$vs_u ဂ် → oʊ̯ʔ;
195$vs_u ဏ့် → o $creaky ʊ̯ɴ;
196$vs_u ဏ်း → o $high ʊ̯ɴ;
197$vs_u ဏ် → o $low ʊ̯ɴ;
198$vs_u တ် → oʊ̯ʔ;
199$vs_u န့် → o $creaky ʊ̯ɴ;
200$vs_u န်း → o $high ʊ̯ɴ;
201$vs_u န် → o $low ʊ̯ɴ;
202$vs_u ပ် → oʊ̯ʔ;
203$vs_u မ့် → o $creaky ʊ̯ɴ;
204$vs_u မ်း → o $high ʊ̯ɴ;
205$vs_u မ် → o $low ʊ̯ɴ;
206$vs_u $anusvara ့ → o $creaky ʊ̯ɴ;
207$vs_u $anusvara း → o $high ʊ̯ɴ;
208$vs_u $anusvara → o $low ʊ̯ɴ;
209$vs_u → u $creaky;
210
211$vs_uu ့ → u $creaky;  # this does not usually occur
212$vs_uu း → u $high;
213$vs_uu → u $low;
214
215$vs_e တ် → ɪʔ;
216$vs_e $vs_aa က် → aʊ̯ʔ;
217$vs_e $vs_aa င့် → a $creaky ʊ̯ɴ;
218$vs_e $vs_aa င်း → a $high ʊ̯ɴ;
219$vs_e $vs_aa င် → a $low ʊ̯ɴ;
220$vs_e $vs_aa ့ → ɔ $creaky;
221$vs_e $vs_aa း → ɔ $high;  # redundant high tone; this does not usually occur
222$vs_e $vs_aa ် → ɔ $low;
223$vs_e $vs_aa → ɔ $high;
224$vs_e ့ → e $creaky;
225$vs_e း → e $high;
226$vs_e → e $low;
227
228$vs_ai ့ → ɛ $creaky;
229$vs_ai း → ɛ $high;  # redundant high tone; this does not usually occur
230$vs_ai → ɛ $high;
231
232$anusvara ့ → a $creaky ɴ;
233$anusvara း → a $high ɴ;
234$anusvara → a $low ɴ;
235
236$med_w တ် → ʊʔ;
237$med_w န့် → ʊ $creaky ɴ;
238$med_w န်း → ʊ $high ɴ;
239$med_w န် → ʊ $low ɴ;
240$med_w ပ် → ʊʔ;
241$med_w မ့် → ʊ $creaky ɴ;
242$med_w မ်း → ʊ $high ɴ;
243$med_w မ် → ʊ $low ɴ;
244
245#
246# Medials
247#
248
249::Null;
250
251# Palatalization of the velar stops before MEDIAL YA and MEDIAL RA:
252# velar + /j/ ==> modern palatals.
253
254ကျ → t͡ɕ;
255ချ → t͡ɕʰ;
256ဂျ → d͡ʑ;
257ဃျ → d͡ʑ;
258
259ကြ → t͡ɕ;
260ခြ → t͡ɕʰ;
261ဂြ → d͡ʑ;
262ဃြ → d͡ʑ;
263
264# Remove redundant MEDIAL YA and MEDIAL RA after initial YA.
265ယ { [$med_y $med_r] → ;
266
267# Reorder the medials so that U+103E SIGN MEDIAL HA comes before any
268# other medials.
269
270# First, push U+103E MEDIAL HA before U+103D MEDIAL WA.
271\u103D \u103E → \u103E \u103D;
272::Null;
273# Now MEDIAL WA comes last.
274
275# Produce the palatal ʃ from (SA|LA)+YA+HA.
276သျှ → ʃ;
277လျှ → ʃ;
278
279# Second, push U+103E MEDIAL HA before U+103C MEDIAL RA.
280\u103C \u103E → \u103E \u103C;
281::Null;
282
283# Finally, push U+103E MEDIAL HA before U+103B MEDIAL YA.
284\u103B \u103E → \u103E \u103B;
285::Null;
286
287# Consume MEDIAL HA and apply devoicing.
288
289ငှ → ŋ̊;
290ဉှ → ɲ̥;
291ညှ → ɲ̥;
292ဏှ → n̥;
293နှ → n̥;
294မှ → m̥;
295ယှ → ʃ;
296ရှ → ʃ;
297လှ → l̥;
298ဝှ → w̥;
299ဠှ → l̥;
300
301# Drop any remaining U+103E MEDIAL HA.
302\u103E → ;
303
304# Simplify medial cluster /jw/ to /w/, i.e. drop U+103B MEDIAL YA and
305# U+103C MEDIAL RA before U+103D MEDIAL WA.  # TODO: revisit this
306\u103B } \u103D → ;
307\u103C } \u103D → ;
308
309\u103B → j;
310\u103C → j;
311\u103D → w;
312
313#
314# Initials
315#
316
317# Velars
318က → k;
319ခ → kʰ;
320ဂ → ɡ;
321ဃ → ɡ;
322င → ŋ;
323
324# Historic palatals
325စ → s;
326ဆ → sʰ;
327ဇ → z;
328ဈ → z;
329ဉ → ɲ;
330ည → ɲ;
331
332# Alveolars
333ဋ → t;
334ဌ → tʰ;
335ဍ → d;
336ဎ → d;
337ဏ → n;
338
339# Historic dentals ==> alveolars
340တ → t;
341ထ → tʰ;
342ဒ → d;
343ဓ → d;
344န → n;
345
346# Labials
347ပ → p;
348ဖ → pʰ;
349ဗ → b;
350ဘ → b;
351မ → m;
352
353# Other letters
354ယ → j;
355ရ → j;  # historic /r/
356လ် → ;  # final, typically not pronounced in native words
357လ → l;
358ဝ → w;
359သ → θ;  # historic /s/ ==> modern dental
360ဟ → h;
361ဠ → l;
362အ → ʔ;
363
364# Independent vowels
365
366ဣ့ → ʔḭ;  # redundant creaky tone; this does not usually occur
367ဣး → ʔí;  # this does not usually occur
368ဣ → ʔḭ;
369
370ဤ့ → ʔḭ;  # this does not usually occur
371ဤး → ʔí;  # this does not usually occur
372ဤ → ʔì;
373
374ဥ့ → ʔṵ;  # redundant creaky tone; this does not usually occur
375ဥး → ʔú;  # this does not usually occur
376ဥ → ʔṵ;
377
378ဦ့ → ʔṵ;  # this does not usually occur
379ဦး → ʔú;
380ဦ → ʔù;
381
382ဧ့ → ʔḛ;  # this does not usually occur
383ဧး → ʔé;
384ဧ → ʔè;
385
386ဩ့ → ʔɔ̰;  # this does not usually occur
387ဩး → ʔɔ́;  # redundant high tone; this does not usually occur
388ဩ → ʔɔ́;
389
390ဪ့ → ʔɔ̰;  # this does not usually occur
391ဪး → ʔɔ́;  # this does not usually occur
392ဪ → ʔɔ̀;
393
394# Various signs
395
396၌ → n̥aɪ̯ʔ;
397၍ → jwḛ;
398# ၎င်း was handled earlier.
399၏ → ʔḭ;
400
401#
402# Postprocessing
403#
404
405# Delete any remaining U+103A ASAT.
406$asat → ;
407
408# Delete zero-width space, non-joiner, joiner.
409[\u200B-\u200D] → ;
410
411::NFC;
412
413			]]></tRule>
414		</transform>
415	</transforms>
416</supplementalData>
417