• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
4#
5# File: Grek_Latn_UNGEGN.txt
6# Generated from CLDR
7#
8
9# For modern Greek, based on UNGEGN rules.
10# Rules are predicated on running NFD first, and NFC afterwards
11# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
12# WARNING: need to add accents to both filters ###
13# :: [\u0301\u0304\u0306\u0308;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ\u0300\u0302\u0313-\u0314\u0340\u0342-\u0343\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;
14:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;
15::NFD (NFC) ;
16# Useful variables
17$lower = [[:latin:][:greek:] & [:Ll:]] ;
18$upper = [[:latin:][:greek:] & [:Lu:]] ;
19$accent = [[:Mn:][:Me:]] ;
20$macron = \u0304 ;
21$ddot = \u0308 ;
22$lcgvowel = [αεηιουω] ;
23$ucgvowel = [ΑΕΗΙΟΥΩ] ;
24$gvowel = [$lcgvowel $ucgvowel] ;
25$lcgvowelC = [$lcgvowel $accent] ;
26$evowel = [aeiouyAEIOUY];
27$vowel = [ $evowel $gvowel] ;
28$beforeLower = $accent * $lower ;
29$gammaLike = [ΓΚΞΧγκξχϰ] ;
30$egammaLike = [GKXCgkxc] ;
31$smooth = \u0313 ;
32$rough = \u0314 ;
33$iotasub = \u0345 ;
34$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
35$under = \u0331;
36$caron = \u030C;
37$afterLetter = [:L:] [\'$accent]* ;
38$beforeLetter = [\'$accent]* [:L:] ;
39# Fix punctuation
40# preserve orginal
41\: ↔ \: $under ;
42\? ↔ \? $under ;
43\; ↔ \? ;
44· ↔ \: ;
45# Fix any ancient characters that creep in
46\u0342 → \u0301 ;
47\u0302 → \u0301 ;
48\u0300 → \u0301 ;
49$smooth → ;
50$rough → ;
51$iotasub → ;
52ͺ → ;
53# need to have these up here so the rules don't mask
54η ↔ i $under ;
55Η ↔ I $under ;
56Ψ } $beforeLower ↔ Ps ;
57Ψ ↔ PS ;
58ψ ↔ ps ;
59ω ↔ o $under ;
60Ω ↔  O $under;
61# at begining or end of word, convert mp to b
62[^[:L:]$accent] { μπ → b ;
63μπ } [^[:L:]$accent] → b ;
64[^[:L:]$accent] { [Μμ][Ππ] → B ;
65[Μμ][Ππ] } [^[:L:]$accent] → B ;
66μπ ← b ;
67Μπ ← B } $beforeLower ;
68ΜΠ ← B ;
69# handle diphthongs ending with upsilon
70ου ↔ ou ;
71ΟΥ ↔ OU ;
72Ου ↔ Ou ;
73οΥ ↔ oU ;
74$fmaker = [aeiAEI] $under ? ;
75$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
76$fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ;
77υ $1 ← ( $shiftForwardVowels )* v $under ;
78$fmaker { υ ( $shiftForwardVowels )* } → $1 f $under;
79υ $1 ← ( $shiftForwardVowels )* f $under ;
80$fmaker { Υ } $softener ↔ V $under ;
81$fmaker { Υ ↔ U $under ;
82υ ↔ y ;
83Υ ↔ Y ;
84# NORMAL
85α ↔ a ;
86Α ↔ A ;
87β ↔ v ;
88Β ↔ V ;
89γ } $gammaLike ↔ n } $egammaLike ;
90γ ↔ g ;
91Γ } $gammaLike ↔ N } $egammaLike ;
92Γ ↔ G ;
93δ ↔ d ;
94Δ ↔ D ;
95ε ↔ e ;
96Ε ↔ E ;
97ζ ↔ z ;
98Ζ ↔ Z ;
99θ ↔ th ;
100Θ } $beforeLower ↔ Th ;
101Θ ↔ TH ;
102ι ↔ i ;
103Ι ↔ I ;
104κ ↔ k ;
105Κ ↔ K ;
106λ ↔ l ;
107Λ ↔ L ;
108μ ↔ m ;
109Μ ↔ M ;
110ν } $gammaLike → n\' ;
111ν ↔ n ;
112Ν } $gammaLike ↔ N\' ;
113Ν ↔ N ;
114ξ ↔ x ;
115Ξ ↔ X ;
116ο ↔ o ;
117Ο ↔ O ;
118π ↔ p ;
119Π ↔ P ;
120ρ ↔ r ;
121Ρ ↔ R ;
122# insert separator before things that turn into s
123[Pp] { } [ςσΣϷϸϺϻ] → \' ;
124# special S variants
125Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
126ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
127Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
128ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
129# Caron means exception
130# before a letter, initial
131ς } $beforeLetter ↔ s $under } $beforeLetter;
132σ } $beforeLetter ↔ s } $beforeLetter;
133# otherwise, after a letter = final
134$afterLetter { σ ↔ $afterLetter { s $under;
135$afterLetter { ς ↔ $afterLetter { s ;
136# otherwise (isolated) = initial
137ς ↔ s $under;
138σ ↔ s ;
139# [Pp] { Σ ↔ \'S ;
140Σ ↔ S ;
141τ ↔ t ;
142Τ ↔ T ;
143φ ↔ f ;
144Φ ↔ F ;
145χ ↔ ch ;
146Χ } $beforeLower ↔ Ch ;
147Χ ↔ CH ;
148# Completeness for ASCII
149# $ignore = [[:Mark:]''] * ;
150| ch ← h ;
151| k  ← c ;
152| i  ← j ;
153| k ← q ;
154| b ← u } $vowel ;
155| b ← w } $vowel ;
156| y ← u ;
157| y ← w ;
158| Ch ← H ;
159| K ← C ;
160| I ← J ;
161| K ← Q ;
162| B ← W } $vowel ;
163| B ← U } $vowel ;
164| Y ← W ;
165| Y ← U ;
166# Completeness for Greek
167ϐ → | β ;
168ϑ → | θ ;
169ϒ → | Υ ;
170ϕ → | φ ;
171ϖ → | π ;
172ϰ → | κ ;
173ϱ → | ρ ;
174ϲ → | σ ;
175Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
176ϳ → j ;
177ϴ → | Θ ;
178ϵ → | ε ;
179µ → | μ ;
180# delete any trailing ' marks used for roundtripping
181← [Ππ] { \' } [Ss] ;
182← [Νν] { \' } $egammaLike ;
183::NFC (NFD) ;
184# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
185:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;
186
187