1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html#License 3# 4# File: und_FONIPA_fa.txt 5# Generated from CLDR 6# 7 8# Vowels 9# ------ 10# In these rules, we produce ی و ا both for short and for long vowels. 11# This would be wrong for writing Farsi or Arabic, but when transliterating 12# foreign words and names, it is strongly preferred to vowel marks. 13# Short schwa [ə] and a few other, schwa-like vowels get omitted entirely 14# unless at the end of the word, in which case we emit ه whose Farsi 15# word-final pronunciation comes close to [ə]. At the beginning of words, 16# Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding 17# dark vowels; note that this use of آ is quite different from Arabic. 18$IVowel = [i ɪ e {e\u031E}]; 19$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɔ w {w\u0325} ʍ ʷ]; 20$AVowel = [ɛ œ ɜ æ ɶ]; 21$DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ\u0308}]; # آ instead of ا at beginning of words 22$SchwaVowel = [ɘ ɵ ə {ɵ\u031E}]; 23$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ]; 24$Boundary = [^[:L:][:M:][:N:]]; 25::NFD; 26[ʰ ʱ ʼ \u0303 \u0330 \u030B \u0301 \u0304 \u0300 \u030F \u030C \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ \u0361 \u035C \u032F] → ; 27ʲ → j; 28ᵐ → m; 29ⁿ → n; 30ᵑ → ŋ; 31::NFC; 32# TODO: Diphthongs probably need more work. 33# Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک 34$UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia; 35# Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز 36yʉ → iu; 37::NULL; 38# Vowels 39$Boundary {$SchwaVowel ː?} → ای; 40$SchwaVowel ː → ی; 41{[$SchwaVowel e {e\u031E}]} [^[:L:][:M:][:N:][\.]] → ه; 42$SchwaVowel → ; 43$Boundary {$IVowel ː?} → ای; 44$IVowel ː? j? → ی; 45$Boundary {$UVowel ː?} → او; 46$UVowel ː? → و; 47$Boundary {$AVowel ː?} → ا; 48$AVowel ː? → ا; 49$Boundary {$DarkAVowel ː?} → آ; 50$DarkAVowel ː? → ا; 51# Shadda for long (geminated) consonants 52ː → \u0651; 53# Affricates 54[{t\u0361ʃ} ʧ] → چ; 55# Clicks 56[ɡ g ɠ k] $Click → کچ; 57[n ɲ]? $Click → نچ; 58# Nasal stops 59[{m\u0325} m ɱ] → م; 60[{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن; 61[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نک; 62[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g]? → نگ; 63# Non-nasal stops 64[p {p\u032A}] → پ; 65[b {b\u032A} ɓ] → ب; 66[{d\u033C} d ɗ ᶑ] → د; 67[{t\u033C} t] → ت; 68[ʈ] → ط; 69[ɖ] → ض; 70c → چ; 71ɟ → دج; 72k → ک; 73[ɡ g ɠ] → گ; 74[q ɢ ʡ ʛ] → ق; 75ʔ → ; 76# Sibilant fricatives 77s → س; 78z → ز; 79[ʃ ʂ ɕ ʄ] → ش; 80[ʒ ʐ ʑ] → ژ; 81# Non-sibilant fricatives 82[ɸ f] → ف; 83[β v] → و; 84[{θ\u033C} θ {θ\u0331}] → ث; 85[{ð\u033C} ð {ð\u0320}] → ذ; 86ç → ش; 87ʝ $IVowel? ː? → ی; 88[x χ] → خ; 89[ɣ ʁ] → غ; 90ħ → ح; 91ʕ → ع; 92[h ɦ {ʔ\u031E}] → ه; 93# Approximants, trills, flaps 94ʋ → و; 95ʙ → بر; 96{r\u031D} → رژ; 97[{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر; 98[{ʀ\u0325} ʀ] → غ; 99ʜ → ح; 100ʢ → ع; 101j $IVowel? ː? → ی; 102# Laterals 103ɬ → شل; 104ɮ → ژل; 105{[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لی; 106[{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل; 107[ʟ {ʟ\u0320}] → غ; 108# Independent pass for misc cleanup. 109::NULL; 110# Strip off syllable markers 111\. → ; 112# Sequences of three or more ووو look very confusing; we shorten them. 113# Polish Darłowo [darwɔvɔ] → داروو → داروووو 114ووو+ → وو; 115 116