1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml 4# 5# File: si_si_FONIPA.txt 6# Generated from CLDR 7# 8 9# Sinhala pronunciation rules 10# 11# Output 12# k ɡ ŋ ᵑɡ c ɟ ɲ ʈ ɖ ⁿɖ t d n ⁿd p b m ᵐb j r l w ʃ s h f 13# ə əː a aː æ æː i iː u uː e eː o oː 14# 15# References 16# [1] Asanka Wasala, Ruvan Weerasinghe, and Kumudu Gamage: 17# Sinhala Grapheme-to-Phoneme Conversion and Rules for Schwa Epenthesis. 18# Proceedings of the COLING/ACL 2006 Main Conference Poster Sessions, 19# pages 890–897. http://www.aclweb.org/anthology/P06-2114 20# Simplify ya + yansaya to plain ya after a consonant. 21[\u0D9A-\u0DC6] \u0DCA (\u200D)? { ය\u0DCAය → ය; 22# Delete ZWNJ and ZWJ to simplify further processing. 23\u200C → ; 24\u200D → ; 25# Insert a schwa after every consonant that is not followed by a dependent vowel 26# or virama. 27::Null; 28([\u0D9A-\u0DC6]) } [^\u0DCA-\u0DDF \u0DF2\u0DF3] → $1 ə; 29# Pronunciation rules proper. 30::Null; 31# fප is an alternative spelling of ෆ. 32# This occurs e.g. in ඩේව\u0DD2ඩ\u0DCA කොපර\u0DCAfප\u0DD3ල\u0DCAඩ\u0DCA (David Copperfield) 33# [see http://bradshawofthefuture.blogspot.com/2013/02/f.html]. 34[Ff]ප → f; 35# zස is seemingly the only way to unambiguously indicate a voiced /z/ sound. 36# This occurs in e.g. ඇල\u0DCAzසය\u0DD2ම' රෝගය (Alzheimer's disease) 37# [see https://si.wikipedia.org/wiki/ඇල\u0DCAzසය\u0DD2ම%27_රෝගය] 38# or in zස\u0DD3බ\u0DCAරා (zebra) [see https://si.wikipedia.org/wiki/zස\u0DD3බ\u0DCAරා]. 39[Zz]ස → z; 40ං → ŋ; 41o → ŋ; # common substitution for anusvaraya 42ඃ ([\u0D9A-\u0DC6]) → | $1 \u0DCA $1; # TODO: check which consonants geminate 43ඃ → h; 44අ → a; 45ආ → aː; 46ඇ → æ; 47ඈ → æː; 48ඉ → i; 49ඊ → iː; 50උ → u; 51ඌ → uː; 52ඍ → ri; 53ඎ → ruː; 54ඏ → ilu; 55ඐ → iluː; 56එ → e; 57ඒ → eː; 58ඓ → aj; 59ඔ → o; 60ඕ → oː; 61ඖ → aw; # TODO: check if this is correct 62ක → k; 63ඛ → k; 64ග → ɡ; 65ඝ → ɡ; 66ඞ → ŋ; 67ඟ → ᵑɡ; 68ච → c; 69ඡ → c; 70ජ → ɟ; 71ඣ → ɟ; 72ඤ → ɲ; 73ඥ → kɲ; # TODO: double-check 74ඦ → ɟ; 75ට → ʈ; 76ඨ → ʈ; 77ඩ → ɖ; 78ඪ → ɖ; 79ණ → n; 80ඬ → ⁿɖ; 81ත → t; 82ථ → t; 83ද → d; 84ධ → d; 85න → n; 86ඳ → ⁿd; 87ප → p; 88ඵ → p; 89බ → b; 90භ → b; 91ම → m; 92ඹ → ᵐb; 93ය → j; 94ර → r; 95ල → l; 96ව → w; 97ශ → ʃ; 98ෂ → ʃ; 99ස → s; 100හ → h; 101ළ → l; 102ෆ → f; 103\u0DCA → ; # delete virama 104ා → aː; 105ැ → æ; 106ෑ → æː; 107\u0DD2 → i; 108\u0DD3 → iː; 109\u0DD4 → u; 110\u0DD6 → uː; 111ෘ → ru; 112ෙ → e; 113ේ → eː; 114ෛ → aj; 115ො → o; 116ෝ → oː; 117ෞ → aw; # TODO: check if this is correct 118ෟ → lu; 119ෲ → ruː; 120ෳ → luː; 121# Heuristics for turning /ə/ into /a/. Based on [1]. 122$c=[k ɡ ŋ {ᵑɡ} c ɟ ɲ ʈ ɖ {ⁿɖ} t d n {ⁿd} p b m {ᵐb} j r l w ʃ s z h f]; 123$s=[:^L:]; 124# Rule #1 125::Null; 126$s sv { ə → ə; # exception (a) 127$s k { ə } r → ə; # exception (b) 128$s $c { ə } $s → ə; # exception (c) 129$s $c $c { ə → a; 130$s $c { ə → a; 131# Rule #2 132::Null; 133$c r { ə } $c → a; # clause (a) and (b) 134$c r { a } h → a; # clause (d), exception 135$c r { a } $c → ə; # clause (c) 136# Rule #3 137# The paper is unclear about what this rule means. The interpretation here 138# assumes that "preceded" in the paper is a typo and should be read "followed". 139::Null; 140[a e æ o ə] h { ə → a; 141# Rules #4 through #7 142::Null; 143ə } $c $c → a; # Rule #4 144ə } [rbɖʈ] $s → ə; # Rule #5 exception 145ə } $c $s → a; # Rule #5 146ə } ji $s → a; # Rule #6 147k { ə } [rl] u → a; # Rule #7 148# Rule #8 149# Note that the paper doesn't say explicitly that this rule should be 150# anchored at the beginning of a word, but the remarks before the rules 151# seem to imply this. 152::Null; 153$s k { a } l[aeo]ːj → ə; # Typo in paper: /j/ was /y/. 154$s k { a } le[mh][ui] → ə; 155$s k { alə } h[ui] → əle; 156$s k { a } lə → ə; 157# Diphthongs 158::Null; 159www+ → ww; # යෞව\u0DCAවන 160[i {iː} e {eː} æ {æː} o {oː} a {aː}] { wu → w; 161əji → aj; 162iji → iː; # perhaps: ij 163[u {uː} e {eː} æ {æː} o {oː} a {aː}] { ji → j; 164 165