• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
4# File: und_FONIPA_fa.txt
5# Generated from CLDR
6#
7
8# Vowels
9# ------
10# In these rules, we produce ی و ا both for short and for long vowels.
11# This would be wrong for writing Farsi or Arabic, but when transliterating
12# foreign words and names, it is strongly preferred to vowel marks.
13# Short schwa [ə] and a few other, schwa-like vowels get omitted entirely
14# unless at the end of the word, in which case we emit ه whose Farsi
15# word-final pronunciation comes close to [ə]. At the beginning of words,
16# Farsi speakers prefer to see آ for [ɑ] and a few other similar-sounding
17# dark vowels; note that this use of آ is quite different from Arabic.
18$IVowel = [i ɪ e {e\u031E}];
19$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ\u0308} {ʊ\u0308} {ɯ\u033D} {ʊ} ø ɤ o {ø\u031E} {ɤ\u031E} {o\u031E} ɔ w {w\u0325} ʍ ʷ];
20$AVowel = [ɛ œ ɜ æ ɶ];
21$DarkAVowel = [ʌ a ɑ ɒ ɐ ɞ {ä} {ɒ\u0308}];  # آ instead of ا at beginning of words
22$SchwaVowel = [ɘ ɵ ə {ɵ\u031E}];
23$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ];
24$Boundary =  [^[:L:][:M:][:N:]];
25::NFD;
26[ʰ ʱ ʼ  \u0303  \u0330  \u030B  \u0301  \u0304  \u0300  \u030F  \u030C  \u0302 ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘  \u0361  \u035C  \u032F] → ;
27ʲ → j;
28ᵐ → m;
29ⁿ → n;
30ᵑ → ŋ;
31::NFC;
32# TODO: Diphthongs probably need more work.
33# Romanian [sekujesk] → [sekujask], for emitting سیکویاسک not سیکویسک
34$UVowel [j $IVowel] [e {e\u031E} $SchwaVowel] → uia;
35# Kazakh Аягөз [ɑjɑɡy\u032Fʉz] → [ɑjɑɡiuz], to emit آیاگیوز not آیاگووز
36yʉ → iu;
37::NULL;
38# Vowels
39$Boundary {$SchwaVowel ː?} → ای;
40$SchwaVowel ː → ی;
41{[$SchwaVowel e {e\u031E}]} [^[:L:][:M:][:N:][\.]] → ه;
42$SchwaVowel → ;
43$Boundary {$IVowel ː?} → ای;
44$IVowel ː? j? → ی;
45$Boundary {$UVowel ː?} → او;
46$UVowel ː? → و;
47$Boundary {$AVowel ː?} → ا;
48$AVowel ː? → ا;
49$Boundary {$DarkAVowel ː?} → آ;
50$DarkAVowel ː? → ا;
51# Shadda for long (geminated) consonants
52ː → \u0651;
53# Affricates
54[{t\u0361ʃ} ʧ] → چ;
55# Clicks
56[ɡ g ɠ k] $Click → کچ;
57[n ɲ]? $Click → نچ;
58# Nasal stops
59[{m\u0325} m ɱ] → م;
60[{n\u033C\u030A} {n\u033C} {n\u0325} n {ɳ\u030A} ɳ {ɲ\u030A} {ɲ\u0325} ɲ] → ن;
61[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] k → نک;
62[{ŋ\u030A} ŋ {ɴ\u0325} ɴ] [ɡ g]? → نگ;
63# Non-nasal stops
64[p {p\u032A}] → پ;
65[b {b\u032A} ɓ] → ب;
66[{d\u033C} d ɗ ᶑ] → د;
67[{t\u033C} t] → ت;
68[ʈ] → ط;
69[ɖ] → ض;
70c → چ;
71ɟ → دج;
72k → ک;
73[ɡ g ɠ] → گ;
74[q ɢ ʡ ʛ] → ق;
75ʔ → ;
76# Sibilant fricatives
77s → س;
78z → ز;
79[ʃ ʂ ɕ ʄ] → ش;
80[ʒ ʐ ʑ] → ژ;
81# Non-sibilant fricatives
82[ɸ f] → ف;
83[β v] → و;
84[{θ\u033C} θ {θ\u0331}] → ث;
85[{ð\u033C} ð {ð\u0320}] → ذ;
86ç → ش;
87ʝ $IVowel? ː? → ی;
88[x χ] → خ;
89[ɣ ʁ] → غ;
90ħ → ح;
91ʕ → ع;
92[h ɦ {ʔ\u031E}] → ه;
93# Approximants, trills, flaps
94ʋ → و;
95ʙ → بر;
96{r\u031D} → رژ;
97[{ɹ\u0325} {ɹ} {ɻ\u030A} {ɻ} {ɾ\u0325} ɾ {ɽ\u030A} ɽ {r\u033C} {r\u0325} r] → ر;
98[{ʀ\u0325} ʀ] → غ;
99ʜ → ح;
100ʢ → ع;
101j $IVowel? ː? → ی;
102# Laterals
103ɬ → شل;
104ɮ → ژل;
105{[{ʎ\u0325} ʎ]} [^ $IVowel j ʝ] → لی;
106[{l\u033C} {l\u0325} l {ɭ\u030A} ɭ {ʎ\u0325} ʎ] → ل;
107[ʟ {ʟ\u0320}] → غ;
108# Independent pass for misc cleanup.
109::NULL;
110# Strip off syllable markers
111\. → ;
112# Sequences of three or more ووو look very confusing; we shorten them.
113# Polish Darłowo [darwɔvɔ] → داروو → داروووو
114ووو+ → وو;
115
116