• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
4#
5# File: Grek_Latn.txt
6# Generated from CLDR
7#
8
9# Rules are predicated on running NFD first, and NFC afterwards
10# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
11# MINIMAL FILTER GENERATED FOR: Greek-Latin
12:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
13:: NFD (NFC) ;
14# TEST CASES
15# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
16# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
17# ᾳ ῃ ῳ ὃ ὄ
18# ὠς ὡς ὢς ὣς
19# Ὠς Ὡς Ὢς Ὣς
20# ὨΣ ὩΣ ὪΣ ὫΣ
21# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
22# Useful variables
23$lower = [[:latin:][:greek:] & [:Ll:]];
24$glower = [[:greek:] & [:Ll:]];
25$upper = [[:latin:][:greek:] & [:Lu:]] ;
26$accent = [:M:] ;
27# NOTE: restrict to just the Greek & Latin accents that we care about
28# TODO: broaden out once interation is fixed
29$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
30$macron = \u0304 ;
31$ddot = \u0308 ;
32$ddotmac = [$ddot$macron];
33$lcgvowel = [αεηιουω] ;
34$ucgvowel = [ΑΕΗΙΟΥΩ] ;
35$gvowel = [$lcgvowel $ucgvowel] ;
36$lcgvowelC = [$lcgvowel $accent] ;
37$evowel = [aeiouyAEIOUY];
38$evowel2 = [iuyIUY];
39$vowel = [ $evowel $gvowel] ;
40$gammaLike = [ΓΚΞΧγκξχϰ] ;
41$egammaLike = [GKXCgkxc] ;
42$smooth = \u0313 ;
43$rough = \u0314 ;
44$iotasub = \u0345 ;
45$evowel_i = [$evowel-[iI]] ;
46$evowel2_i = [uyUY];
47$underbar = \u0331;
48$afterLetter = [:L:] [[:M:]\']* ;
49$beforeLetter = [[:M:]\']* [:L:] ;
50$beforeLower = $accent * $lower ;
51$notLetter = [^[:L:][:M:]] ;
52$under = \u0331;
53# Fix punctuation
54# preserve original
55\: ↔ \: $under ;
56\? ↔ \? $under ;
57\; ↔ \? ;
58· ↔ \: ;
59# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
60\u0342 ↔ \u0302 ;
61# IOTA: convert iota subscript to iota
62# first make previous alpha long!
63$accent_minus = [[$accent]-[$iotasub$macron]];
64Α } $accent_minus * $iotasub → | Α $macron ;
65α } $accent_minus * $iotasub → | α $macron ;
66# now convert to uppercase if after uppercase, ow to lowercase
67$upper $accent * { $iotasub → I ;
68$iotasub → i ;
69| $1 $iotasub ← ($evowel $macron $accentMinus *) i ;
70| $1 $iotasub ← ($evowel $macron $accentMinus *) I ;
71# BREATHING
72# Convert rough breathing to h, and move before letters.
73# Make A ` x = → H a x
74Α ($macron?) $rough } $beforeLower → H | α $1;
75Ε $rough } $beforeLower → H | ε;
76Η $rough } $beforeLower → H | η ;
77Ι ($ddot?) $rough } $beforeLower → H | ι  $1;
78Ο $rough } $beforeLower → H | ο ;
79Υ $rough } $beforeLower → H | υ ;
80Ω ($ddot?) $rough } $beforeLower → H | ω $1;
81# Make A x ` = → H a x
82Α ($glower $macron?) $rough → H | α $1 ;
83Ε ($glower) $rough → H | ε $1 ;
84Η ($glower) $rough → H | η $1 ;
85Ι ($glower $ddot?) $rough → H | ι $1 ;
86Ο ($glower) $rough → H | ο $1 ;
87Υ ($glower) $rough → H | υ $1 ;
88Ω ($glower  $ddot?) $rough → H | ω $1 ;
89#Otherwise, make x ` into h x and X ` into H X
90($lcgvowel + $ddotmac? ) $rough → h | $1 ;
91($gvowel + $ddotmac? ) $rough → H | $1 ;
92# Go backwards with H
93| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;
94| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;
95| $1 $rough ← h ($evowel $macron? $ddot?) ;
96| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
97| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;
98| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;
99# titlecase, have to fix individually
100# in the future, we should add &uppercase() to make this easier
101| A $1 $rough ← H a ($macron  $ddot? $evowel2_i $macron?) ;
102| E $1 $rough ← H e ($macron  $ddot? $evowel2_i $macron?) ;
103| I $1 $rough ← H i ($macron  $ddot? $evowel2_i $macron?) ;
104| O $1 $rough ← H o ($macron  $ddot? $evowel2_i $macron?) ;
105| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;
106| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;
107| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;
108| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;
109| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;
110| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;
111| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;
112| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;
113| A $1 $rough ← H a ($macron? $ddot? ) ;
114| E $1 $rough ← H e ($macron? $ddot? ) ;
115| I $1 $rough ← H i ($macron? $ddot? ) ;
116| O $1 $rough ← H o ($macron? $ddot? ) ;
117| U $1 $rough ← H u ($macron? $ddot? ) ;
118| Y $1 $rough ← H y ($macron? $ddot? ) ;
119# Now do smooth
120#delete smooth breathing for Latin
121$smooth → ;
122# insert in Greek
123# the assumption is that all Marks are on letters.
124| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;
125| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
126| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
127# TODO: preserve smooth/rough breathing if not
128# on initial vowel sequence
129# need to have these up here so the rules don't mask
130# remove now superfluous macron when returning
131Α ← A $macron ;
132α ← a $macron ;
133η ↔ e $macron ;
134Η ↔ E $macron ;
135φ ↔ ph ;
136Ψ } $beforeLower ↔ Ps ;
137Ψ ↔ PS ;
138Φ } $beforeLower ↔ Ph ;
139Φ ↔ PH ;
140ψ ↔ ps ;
141ω ↔ o $macron ;
142Ω ↔  O $macron;
143# NORMAL
144α ↔ a ;
145Α ↔ A ;
146β ↔ b ;
147Β ↔ B ;
148γ } $gammaLike ↔ n } $egammaLike ;
149γ ↔ g ;
150Γ } $gammaLike ↔ N } $egammaLike ;
151Γ ↔ G ;
152δ ↔ d ;
153Δ ↔ D ;
154ε ↔ e ;
155Ε ↔ E ;
156ζ ↔ z ;
157Ζ ↔ Z ;
158θ ↔ th ;
159Θ } $beforeLower ↔ Th ;
160Θ ↔ TH ;
161ι ↔ i ;
162Ι ↔ I ;
163κ ↔ k ;
164Κ ↔ K ;
165λ ↔ l ;
166Λ ↔ L ;
167μ ↔ m ;
168Μ ↔ M ;
169ν } $gammaLike → n\' ;
170ν ↔ n ;
171Ν } $gammaLike ↔ N\' ;
172Ν ↔ N ;
173ξ ↔ x ;
174Ξ ↔ X ;
175ο ↔ o ;
176Ο ↔ O ;
177π ↔ p ;
178Π ↔ P ;
179ρ $rough ↔ rh;
180Ρ $rough } $beforeLower ↔ Rh ;
181Ρ $rough ↔ RH ;
182ρ ↔ r ;
183Ρ ↔ R ;
184# insert separator before things that turn into s
185[Pp] { } [ςσΣϷϸϺϻ] → \' ;
186# special S variants
187Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
188ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
189Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
190ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
191# underbar means exception
192# before a letter, initial
193ς } $beforeLetter ↔ s $underbar } $beforeLetter;
194σ } $beforeLetter ↔ s } $beforeLetter;
195# otherwise, after a letter = final
196$afterLetter { σ ↔ $afterLetter { s $underbar;
197$afterLetter { ς ↔ $afterLetter { s ;
198# otherwise (isolated) = initial
199ς ↔ s $underbar;
200σ ↔ s ;
201# [Pp] { Σ ↔ \'S ;
202Σ ↔ S ;
203τ ↔ t ;
204Τ ↔ T ;
205$vowel {υ } ↔ u ;
206υ ↔ y ;
207$vowel { Υ ↔ U ;
208Υ ↔ Y ;
209χ ↔ ch ;
210Χ } $beforeLower ↔ Ch ;
211Χ ↔ CH ;
212# Completeness for ASCII
213$ignore = [[:Mark:]''] * ;
214| k  ← c ;
215| ph ← f ;
216| i  ← j ;
217| k ← q ;
218| b ← v } $vowel ;
219| b ← w } $vowel;
220| u ← v ;
221| u ← w;
222| K ← C ;
223| Ph ← F ;
224| I ← J ;
225| K ← Q ;
226| B ← V  } $vowel ;
227| B ← W  } $vowel ;
228| U ← V ;
229| U ← W ;
230$rough } $ignore [:UppercaseLetter:] → H ;
231$ignore [:UppercaseLetter:] { $rough → H ;
232$rough ← H ;
233$rough ↔ h ;
234# Completeness for Greek
235ϐ → | β ;
236ϑ → | θ ;
237ϒ → | Υ ;
238ϕ → | φ ;
239ϖ → | π ;
240ϰ → | κ ;
241ϱ → | ρ ;
242ϲ → | σ ;
243Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
244ϳ → j ;
245ϴ → | Θ ;
246ϵ → | ε ;
247µ → | μ ;
248ͺ → i;
249# delete any trailing ' marks used for roundtripping
250← [Ππ] { \' } [Ss] ;
251← [Νν] { \' } $egammaLike ;
252::NFC (NFD) ;
253# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
254# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
255# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
256:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;
257
258