• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<?xml version="1.0" encoding="UTF-8" ?>
2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
3<!-- Copyright © 1991-2015 Unicode, Inc.
4CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
5For terms of use, see http://www.unicode.org/copyright.html -->
6<supplementalData>
7	<version number="$Revision: 12347 $" />
8	<transforms>
9		<transform source="si" target="si_FONIPA" direction="forward" alias="si-fonipa-t-si">
10			<tRule><![CDATA[
11# Sinhala pronunciation rules
12#
13# Output
14#     k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f
15#     ə əː a aː æ æː i iː u uː e eː o oː
16#
17# References
18# [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage:
19#     Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis.
20#     Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions,
21#     pages 890–897. http://www.aclweb.org/anthology/P06-2114
22
23# Simplify ya + yansaya to plain ya after a consonant.
24[\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය්‍ය → ය;
25
26# Delete ZWNJ and ZWJ to simplify further processing.
27\u200C → ;
28\u200D → ;
29
30# Insert a schwa after every consonant that is not followed by a dependent vowel
31# or virama.
32::Null;
33([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə;
34
35# Pronunciation rules proper.
36::Null;
37
38# fප is an alternative spelling of ෆ.
39# This occurs e.g. in ඩේවිඩ් කොපර්fපීල්ඩ් (David Copperfield)
40# [see http://bradshawofthefuture.blogspot.com/2013/02/f.html].
41[Ff]ප → f;
42
43# zස is seemingly the only way to unambiguously indicate a voiced /z/ sound.
44# This occurs in e.g. ඇල්zසයිම' රෝගය (Alzheimer's disease)
45# [see https://si.wikipedia.org/wiki/ඇල්zසයිම%27_රෝගය]
46# or in zසීබ්‍රා (zebra) [see https://si.wikipedia.org/wiki/‍zසීබ්‍රා].
47[Zz]ස → z;
48
49ං → ŋ;
50o → ŋ;  # common substitution for anusvaraya
51
52ඃ ([\u0D9A-\u0DC6]) → | $1 \u0DCA $1;  # TODO: check which consonants geminate
53ඃ → h;
54
55අ → a;
56ආ → aː;
57ඇ → æ;
58ඈ → æː;
59ඉ → i;
60ඊ → iː;
61උ → u;
62ඌ → uː;
63ඍ → ri;
64ඎ → ruː;
65ඏ → ilu;
66ඐ → iluː;
67එ → e;
68ඒ → eː;
69ඓ → aj;
70ඔ → o;
71ඕ → oː;
72ඖ → aw;  # TODO: check if this is correct
73
74ක → k;
75ඛ → k;
76ග → ɡ;
77ඝ → ɡ;
78ඞ → ŋ;
79ඟ → ᵑɡ;
80ච → c;
81ඡ → c;
82ජ → ɟ;
83ඣ → ɟ;
84ඤ → ɲ;
85ඥ → kɲ;  # TODO: double-check
86ඦ → ɟ;
87ට → ʈ;
88ඨ → ʈ;
89ඩ → ɖ;
90ඪ → ɖ;
91ණ → n;
92ඬ → ⁿɖ;
93ත → t;
94ථ → t;
95ද → d;
96ධ → d;
97න → n;
98ඳ → ⁿd;
99ප → p;
100ඵ → p;
101බ → b;
102භ → b;
103ම → m;
104ඹ → ᵐb;
105ය → j;
106ර → r;
107ල → l;
108ව → w;
109ශ → ʃ;
110ෂ → ʃ;
111ස → s;
112හ → h;
113ළ → l;
114ෆ → f;
115
116\u0DCA → ;  # delete virama
117
118ා → aː;
119ැ → æ;
120ෑ → æː;
121\u0DD2 → i;
122\u0DD3 → iː;
123\u0DD4 → u;
124\u0DD6 → uː;
125ෘ → ru;
126ෙ → e;
127ේ → eː;
128ෛ → aj;
129ො → o;
130ෝ → oː;
131ෞ → aw;  # TODO: check if this is correct
132ෟ → lu;
133ෲ → ruː;
134ෳ → luː;
135
136# Heuristics for turning /ə/ into /a/. Based on [1].
137
138$c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f];
139
140$s=[:^L:];
141
142# Rule #1
143::Null;
144$s sv    { ə      → ə;  # exception (a)
145$s k     { ə } r  → ə;  # exception (b)
146$s $c    { ə } $s → ə;  # exception (c)
147$s $c $c { ə      → a;
148$s $c    { ə      → a;
149
150# Rule #2
151::Null;
152$c r { ə } $c → a;  # clause (a) and (b)
153$c r { a } h  → a;  # clause (d), exception
154$c r { a } $c → ə;  # clause (c)
155
156# Rule #3
157# The paper is unclear about what this rule means. The interpretation here
158# assumes that "preceded" in the paper is a typo and should be read "followed".
159::Null;
160[a e æ o ə] h { ə → a;
161
162# Rules #4 through #7
163::Null;
164    ə } $c $c     → a;  # Rule #4
165    ə } [rbɖʈ] $s → ə;  # Rule #5 exception
166    ə } $c     $s → a;  # Rule #5
167    ə } ji     $s → a;  # Rule #6
168k { ə } [rl] u    → a;  # Rule #7
169
170# Rule #8
171# Note that the paper doesn't say explicitly that this rule should be
172# anchored at the beginning of a word, but the remarks before the rules
173# seem to imply this.
174::Null;
175$s k { a } l[aeo]ːj   → ə;  # Typo in paper: /j/ was /y/.
176$s k { a } le[mh][ui] → ə;
177$s k { alə } h[ui]    → əle;
178$s k { a } lə         → ə;
179
180# Diphthongs
181::Null;
182www+ → ww;  # යෞව්වන
183[i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w;
184
185əji → aj;
186iji → iː;  # perhaps: ij
187[u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j;
188			]]></tRule>
189		</transform>
190	</transforms>
191</supplementalData>
192