1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml 4# 5# File: Grek_Latn.txt 6# Generated from CLDR 7# 8 9# Rules are predicated on running NFD first, and NFC afterwards 10# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ; 11# MINIMAL FILTER GENERATED FOR: Greek-Latin 12:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ; 13:: NFD (NFC) ; 14# TEST CASES 15# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος 16# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ 17# ᾳ ῃ ῳ ὃ ὄ 18# ὠς ὡς ὢς ὣς 19# Ὠς Ὡς Ὢς Ὣς 20# ὨΣ ὩΣ ὪΣ ὫΣ 21# Ạ, ạ, Ẹ, ẹ, Ọ, ọ 22# Useful variables 23$lower = [[:latin:][:greek:] & [:Ll:]]; 24$glower = [[:greek:] & [:Ll:]]; 25$upper = [[:latin:][:greek:] & [:Lu:]] ; 26$accent = [:M:] ; 27# NOTE: restrict to just the Greek & Latin accents that we care about 28# TODO: broaden out once interation is fixed 29$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ; 30$macron = \u0304 ; 31$ddot = \u0308 ; 32$ddotmac = [$ddot$macron]; 33$lcgvowel = [αεηιουω] ; 34$ucgvowel = [ΑΕΗΙΟΥΩ] ; 35$gvowel = [$lcgvowel $ucgvowel] ; 36$lcgvowelC = [$lcgvowel $accent] ; 37$evowel = [aeiouyAEIOUY]; 38$evowel2 = [iuyIUY]; 39$vowel = [ $evowel $gvowel] ; 40$gammaLike = [ΓΚΞΧγκξχϰ] ; 41$egammaLike = [GKXCgkxc] ; 42$smooth = \u0313 ; 43$rough = \u0314 ; 44$iotasub = \u0345 ; 45$evowel_i = [$evowel-[iI]] ; 46$evowel2_i = [uyUY]; 47$underbar = \u0331; 48$afterLetter = [:L:] [[:M:]\']* ; 49$beforeLetter = [[:M:]\']* [:L:] ; 50$beforeLower = $accent * $lower ; 51$notLetter = [^[:L:][:M:]] ; 52$under = \u0331; 53# Fix punctuation 54# preserve original 55\: ↔ \: $under ; 56\? ↔ \? $under ; 57\; ↔ \? ; 58· ↔ \: ; 59# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve 60\u0342 ↔ \u0302 ; 61# IOTA: convert iota subscript to iota 62# first make previous alpha long! 63$accent_minus = [[$accent]-[$iotasub$macron]]; 64Α } $accent_minus * $iotasub → | Α $macron ; 65α } $accent_minus * $iotasub → | α $macron ; 66# now convert to uppercase if after uppercase, ow to lowercase 67$upper $accent * { $iotasub → I ; 68$iotasub → i ; 69| $1 $iotasub ← ($evowel $macron $accentMinus *) i ; 70| $1 $iotasub ← ($evowel $macron $accentMinus *) I ; 71# BREATHING 72# Convert rough breathing to h, and move before letters. 73# Make A ` x = → H a x 74Α ($macron?) $rough } $beforeLower → H | α $1; 75Ε $rough } $beforeLower → H | ε; 76Η $rough } $beforeLower → H | η ; 77Ι ($ddot?) $rough } $beforeLower → H | ι $1; 78Ο $rough } $beforeLower → H | ο ; 79Υ $rough } $beforeLower → H | υ ; 80Ω ($ddot?) $rough } $beforeLower → H | ω $1; 81# Make A x ` = → H a x 82Α ($glower $macron?) $rough → H | α $1 ; 83Ε ($glower) $rough → H | ε $1 ; 84Η ($glower) $rough → H | η $1 ; 85Ι ($glower $ddot?) $rough → H | ι $1 ; 86Ο ($glower) $rough → H | ο $1 ; 87Υ ($glower) $rough → H | υ $1 ; 88Ω ($glower $ddot?) $rough → H | ω $1 ; 89#Otherwise, make x ` into h x and X ` into H X 90($lcgvowel + $ddotmac? ) $rough → h | $1 ; 91($gvowel + $ddotmac? ) $rough → H | $1 ; 92# Go backwards with H 93| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ; 94| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ; 95| $1 $rough ← h ($evowel $macron? $ddot?) ; 96| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ; 97| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ; 98| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ; 99# titlecase, have to fix individually 100# in the future, we should add &uppercase() to make this easier 101| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ; 102| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ; 103| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ; 104| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ; 105| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ; 106| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ; 107| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ; 108| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ; 109| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ; 110| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ; 111| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ; 112| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ; 113| A $1 $rough ← H a ($macron? $ddot? ) ; 114| E $1 $rough ← H e ($macron? $ddot? ) ; 115| I $1 $rough ← H i ($macron? $ddot? ) ; 116| O $1 $rough ← H o ($macron? $ddot? ) ; 117| U $1 $rough ← H u ($macron? $ddot? ) ; 118| Y $1 $rough ← H y ($macron? $ddot? ) ; 119# Now do smooth 120#delete smooth breathing for Latin 121$smooth → ; 122# insert in Greek 123# the assumption is that all Marks are on letters. 124| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ; 125| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ; 126| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ; 127# TODO: preserve smooth/rough breathing if not 128# on initial vowel sequence 129# need to have these up here so the rules don't mask 130# remove now superfluous macron when returning 131Α ← A $macron ; 132α ← a $macron ; 133η ↔ e $macron ; 134Η ↔ E $macron ; 135φ ↔ ph ; 136Ψ } $beforeLower ↔ Ps ; 137Ψ ↔ PS ; 138Φ } $beforeLower ↔ Ph ; 139Φ ↔ PH ; 140ψ ↔ ps ; 141ω ↔ o $macron ; 142Ω ↔ O $macron; 143# NORMAL 144α ↔ a ; 145Α ↔ A ; 146β ↔ b ; 147Β ↔ B ; 148γ } $gammaLike ↔ n } $egammaLike ; 149γ ↔ g ; 150Γ } $gammaLike ↔ N } $egammaLike ; 151Γ ↔ G ; 152δ ↔ d ; 153Δ ↔ D ; 154ε ↔ e ; 155Ε ↔ E ; 156ζ ↔ z ; 157Ζ ↔ Z ; 158θ ↔ th ; 159Θ } $beforeLower ↔ Th ; 160Θ ↔ TH ; 161ι ↔ i ; 162Ι ↔ I ; 163κ ↔ k ; 164Κ ↔ K ; 165λ ↔ l ; 166Λ ↔ L ; 167μ ↔ m ; 168Μ ↔ M ; 169ν } $gammaLike → n\' ; 170ν ↔ n ; 171Ν } $gammaLike ↔ N\' ; 172Ν ↔ N ; 173ξ ↔ x ; 174Ξ ↔ X ; 175ο ↔ o ; 176Ο ↔ O ; 177π ↔ p ; 178Π ↔ P ; 179ρ $rough ↔ rh; 180Ρ $rough } $beforeLower ↔ Rh ; 181Ρ $rough ↔ RH ; 182ρ ↔ r ; 183Ρ ↔ R ; 184# insert separator before things that turn into s 185[Pp] { } [ςσΣϷϸϺϻ] → \' ; 186# special S variants 187Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L 188ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L 189Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L 190ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L 191# underbar means exception 192# before a letter, initial 193ς } $beforeLetter ↔ s $underbar } $beforeLetter; 194σ } $beforeLetter ↔ s } $beforeLetter; 195# otherwise, after a letter = final 196$afterLetter { σ ↔ $afterLetter { s $underbar; 197$afterLetter { ς ↔ $afterLetter { s ; 198# otherwise (isolated) = initial 199ς ↔ s $underbar; 200σ ↔ s ; 201# [Pp] { Σ ↔ \'S ; 202Σ ↔ S ; 203τ ↔ t ; 204Τ ↔ T ; 205$vowel {υ } ↔ u ; 206υ ↔ y ; 207$vowel { Υ ↔ U ; 208Υ ↔ Y ; 209χ ↔ ch ; 210Χ } $beforeLower ↔ Ch ; 211Χ ↔ CH ; 212# Completeness for ASCII 213$ignore = [[:Mark:]''] * ; 214| k ← c ; 215| ph ← f ; 216| i ← j ; 217| k ← q ; 218| b ← v } $vowel ; 219| b ← w } $vowel; 220| u ← v ; 221| u ← w; 222| K ← C ; 223| Ph ← F ; 224| I ← J ; 225| K ← Q ; 226| B ← V } $vowel ; 227| B ← W } $vowel ; 228| U ← V ; 229| U ← W ; 230$rough } $ignore [:UppercaseLetter:] → H ; 231$ignore [:UppercaseLetter:] { $rough → H ; 232$rough ← H ; 233$rough ↔ h ; 234# Completeness for Greek 235ϐ → | β ; 236ϑ → | θ ; 237ϒ → | Υ ; 238ϕ → | φ ; 239ϖ → | π ; 240ϰ → | κ ; 241ϱ → | ρ ; 242ϲ → | σ ; 243Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL 244ϳ → j ; 245ϴ → | Θ ; 246ϵ → | ε ; 247µ → | μ ; 248ͺ → i; 249# delete any trailing ' marks used for roundtripping 250← [Ππ] { \' } [Ss] ; 251← [Νν] { \' } $egammaLike ; 252::NFC (NFD) ; 253# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ; 254# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ; 255# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD 256:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ; 257 258