1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html#License 3# 4# File: Grek_Latn.txt 5# Generated from CLDR 6# 7 8# Rules are predicated on running NFD first, and NFC afterwards 9# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ; 10# MINIMAL FILTER GENERATED FOR: Greek-Latin 11:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ; 12:: NFD (NFC) ; 13# TEST CASES 14# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος 15# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ 16# ᾳ ῃ ῳ ὃ ὄ 17# ὠς ὡς ὢς ὣς 18# Ὠς Ὡς Ὢς Ὣς 19# ὨΣ ὩΣ ὪΣ ὫΣ 20# Ạ, ạ, Ẹ, ẹ, Ọ, ọ 21# Useful variables 22$lower = [[:latin:][:greek:] & [:Ll:]]; 23$glower = [[:greek:] & [:Ll:]]; 24$upper = [[:latin:][:greek:] & [:Lu:]] ; 25$accent = [:M:] ; 26# NOTE: restrict to just the Greek & Latin accents that we care about 27# TODO: broaden out once interation is fixed 28$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ; 29$macron = \u0304 ; 30$ddot = \u0308 ; 31$ddotmac = [$ddot$macron]; 32$lcgvowel = [αεηιουω] ; 33$ucgvowel = [ΑΕΗΙΟΥΩ] ; 34$gvowel = [$lcgvowel $ucgvowel] ; 35$lcgvowelC = [$lcgvowel $accent] ; 36$evowel = [aeiouyAEIOUY]; 37$evowel2 = [iuyIUY]; 38$vowel = [ $evowel $gvowel] ; 39$gammaLike = [ΓΚΞΧγκξχϰ] ; 40$egammaLike = [GKXCgkxc] ; 41$smooth = \u0313 ; 42$rough = \u0314 ; 43$iotasub = \u0345 ; 44$evowel_i = [$evowel-[iI]] ; 45$evowel2_i = [uyUY]; 46$underbar = \u0331; 47$afterLetter = [:L:] [[:M:]\']* ; 48$beforeLetter = [[:M:]\']* [:L:] ; 49$beforeLower = $accent * $lower ; 50$notLetter = [^[:L:][:M:]] ; 51$under = \u0331; 52# Fix punctuation 53# preserve original 54\: ↔ \: $under ; 55\? ↔ \? $under ; 56\; ↔ \? ; 57· ↔ \: ; 58# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve 59\u0342 ↔ \u0302 ; 60# IOTA: convert iota subscript to iota 61# first make previous alpha long! 62$accent_minus = [[$accent]-[$iotasub$macron]]; 63Α } $accent_minus * $iotasub → | Α $macron ; 64α } $accent_minus * $iotasub → | α $macron ; 65# now convert to uppercase if after uppercase, ow to lowercase 66$upper $accent * { $iotasub → I ; 67$iotasub → i ; 68| $1 $iotasub ← ($evowel $macron $accentMinus *) i ; 69| $1 $iotasub ← ($evowel $macron $accentMinus *) I ; 70# BREATHING 71# Convert rough breathing to h, and move before letters. 72# Make A ` x = → H a x 73Α ($macron?) $rough } $beforeLower → H | α $1; 74Ε $rough } $beforeLower → H | ε; 75Η $rough } $beforeLower → H | η ; 76Ι ($ddot?) $rough } $beforeLower → H | ι $1; 77Ο $rough } $beforeLower → H | ο ; 78Υ $rough } $beforeLower → H | υ ; 79Ω ($ddot?) $rough } $beforeLower → H | ω $1; 80# Make A x ` = → H a x 81Α ($glower $macron?) $rough → H | α $1 ; 82Ε ($glower) $rough → H | ε $1 ; 83Η ($glower) $rough → H | η $1 ; 84Ι ($glower $ddot?) $rough → H | ι $1 ; 85Ο ($glower) $rough → H | ο $1 ; 86Υ ($glower) $rough → H | υ $1 ; 87Ω ($glower $ddot?) $rough → H | ω $1 ; 88#Otherwise, make x ` into h x and X ` into H X 89($lcgvowel + $ddotmac? ) $rough → h | $1 ; 90($gvowel + $ddotmac? ) $rough → H | $1 ; 91# Go backwards with H 92| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ; 93| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ; 94| $1 $rough ← h ($evowel $macron? $ddot?) ; 95| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ; 96| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ; 97| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ; 98# titlecase, have to fix individually 99# in the future, we should add &uppercase() to make this easier 100| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ; 101| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ; 102| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ; 103| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ; 104| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ; 105| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ; 106| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ; 107| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ; 108| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ; 109| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ; 110| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ; 111| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ; 112| A $1 $rough ← H a ($macron? $ddot? ) ; 113| E $1 $rough ← H e ($macron? $ddot? ) ; 114| I $1 $rough ← H i ($macron? $ddot? ) ; 115| O $1 $rough ← H o ($macron? $ddot? ) ; 116| U $1 $rough ← H u ($macron? $ddot? ) ; 117| Y $1 $rough ← H y ($macron? $ddot? ) ; 118# Now do smooth 119#delete smooth breathing for Latin 120$smooth → ; 121# insert in Greek 122# the assumption is that all Marks are on letters. 123| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ; 124| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ; 125| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ; 126# TODO: preserve smooth/rough breathing if not 127# on initial vowel sequence 128# need to have these up here so the rules don't mask 129# remove now superfluous macron when returning 130Α ← A $macron ; 131α ← a $macron ; 132η ↔ e $macron ; 133Η ↔ E $macron ; 134φ ↔ ph ; 135Ψ } $beforeLower ↔ Ps ; 136Ψ ↔ PS ; 137Φ } $beforeLower ↔ Ph ; 138Φ ↔ PH ; 139ψ ↔ ps ; 140ω ↔ o $macron ; 141Ω ↔ O $macron; 142# NORMAL 143α ↔ a ; 144Α ↔ A ; 145β ↔ b ; 146Β ↔ B ; 147γ } $gammaLike ↔ n } $egammaLike ; 148γ ↔ g ; 149Γ } $gammaLike ↔ N } $egammaLike ; 150Γ ↔ G ; 151δ ↔ d ; 152Δ ↔ D ; 153ε ↔ e ; 154Ε ↔ E ; 155ζ ↔ z ; 156Ζ ↔ Z ; 157θ ↔ th ; 158Θ } $beforeLower ↔ Th ; 159Θ ↔ TH ; 160ι ↔ i ; 161Ι ↔ I ; 162κ ↔ k ; 163Κ ↔ K ; 164λ ↔ l ; 165Λ ↔ L ; 166μ ↔ m ; 167Μ ↔ M ; 168ν } $gammaLike → n\' ; 169ν ↔ n ; 170Ν } $gammaLike ↔ N\' ; 171Ν ↔ N ; 172ξ ↔ x ; 173Ξ ↔ X ; 174ο ↔ o ; 175Ο ↔ O ; 176π ↔ p ; 177Π ↔ P ; 178ρ $rough ↔ rh; 179Ρ $rough } $beforeLower ↔ Rh ; 180Ρ $rough ↔ RH ; 181ρ ↔ r ; 182Ρ ↔ R ; 183# insert separator before things that turn into s 184[Pp] { } [ςσΣϷϸϺϻ] → \' ; 185# special S variants 186Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L 187ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L 188Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L 189ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L 190# underbar means exception 191# before a letter, initial 192ς } $beforeLetter ↔ s $underbar } $beforeLetter; 193σ } $beforeLetter ↔ s } $beforeLetter; 194# otherwise, after a letter = final 195$afterLetter { σ ↔ $afterLetter { s $underbar; 196$afterLetter { ς ↔ $afterLetter { s ; 197# otherwise (isolated) = initial 198ς ↔ s $underbar; 199σ ↔ s ; 200# [Pp] { Σ ↔ \'S ; 201Σ ↔ S ; 202τ ↔ t ; 203Τ ↔ T ; 204$vowel {υ } ↔ u ; 205υ ↔ y ; 206$vowel { Υ ↔ U ; 207Υ ↔ Y ; 208χ ↔ ch ; 209Χ } $beforeLower ↔ Ch ; 210Χ ↔ CH ; 211# Completeness for ASCII 212$ignore = [[:Mark:]''] * ; 213| k ← c ; 214| ph ← f ; 215| i ← j ; 216| k ← q ; 217| b ← v } $vowel ; 218| b ← w } $vowel; 219| u ← v ; 220| u ← w; 221| K ← C ; 222| Ph ← F ; 223| I ← J ; 224| K ← Q ; 225| B ← V } $vowel ; 226| B ← W } $vowel ; 227| U ← V ; 228| U ← W ; 229$rough } $ignore [:UppercaseLetter:] → H ; 230$ignore [:UppercaseLetter:] { $rough → H ; 231$rough ← H ; 232$rough ↔ h ; 233# Completeness for Greek 234ϐ → | β ; 235ϑ → | θ ; 236ϒ → | Υ ; 237ϕ → | φ ; 238ϖ → | π ; 239ϰ → | κ ; 240ϱ → | ρ ; 241ϲ → | σ ; 242Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL 243ϳ → j ; 244ϴ → | Θ ; 245ϵ → | ε ; 246µ → | μ ; 247ͺ → i; 248# delete any trailing ' marks used for roundtripping 249← [Ππ] { \' } [Ss] ; 250← [Νν] { \' } $egammaLike ; 251::NFC (NFD) ; 252# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ; 253# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ; 254# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD 255:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ; 256 257