1<?xml version="1.0" encoding="UTF-8" ?> 2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3<!-- 4Copyright © 1991-2013 Unicode, Inc. 5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6For terms of use, see http://www.unicode.org/copyright.html 7--> 8<supplementalData> 9 <version number="$Revision$"/> 10 <transforms> 11 <transform source="Grek" target="Latn" direction="both" alias="Greek-Latin und-Latn-t-und-grek" backwardAlias="Latin-Greek und-Grek-t-und-latn"> 12 <tRule><![CDATA[ 13# Rules are predicated on running NFD first, and NFC afterwards 14# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ; 15# MINIMAL FILTER GENERATED FOR: Greek-Latin 16:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ̄̈̓-̔͂-ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ; 17:: NFD (NFC) ; 18# TEST CASES 19# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος 20# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ 21# ᾳ ῃ ῳ ὃ ὄ 22# ὠς ὡς ὢς ὣς 23# Ὠς Ὡς Ὢς Ὣς 24# ὨΣ ὩΣ ὪΣ ὫΣ 25# Ạ, ạ, Ẹ, ẹ, Ọ, ọ 26# Useful variables 27$lower = [[:latin:][:greek:] & [:Ll:]]; 28$glower = [[:greek:] & [:Ll:]]; 29$upper = [[:latin:][:greek:] & [:Lu:]] ; 30$accent = [:M:] ; 31# NOTE: restrict to just the Greek & Latin accents that we care about 32# TODO: broaden out once interation is fixed 33$accentMinus = [ [̀-ͅ] & [:M:] - [̸]] ; 34$macron = ̄ ; 35$ddot = ̈ ; 36$ddotmac = [$ddot$macron]; 37$lcgvowel = [αεηιουω] ; 38$ucgvowel = [ΑΕΗΙΟΥΩ] ; 39$gvowel = [$lcgvowel $ucgvowel] ; 40$lcgvowelC = [$lcgvowel $accent] ; 41$evowel = [aeiouyAEIOUY]; 42$evowel2 = [iuyIUY]; 43$vowel = [ $evowel $gvowel] ; 44$gammaLike = [ΓΚΞΧγκξχϰ] ; 45$egammaLike = [GKXCgkxc] ; 46$smooth = ̓ ; 47$rough = ̔ ; 48$iotasub = ͅ ; 49$evowel_i = [$evowel-[iI]] ; 50$evowel2_i = [uyUY]; 51$underbar = ̱; 52$afterLetter = [:L:] [[:M:]\']* ; 53$beforeLetter = [[:M:]\']* [:L:] ; 54$beforeLower = $accent * $lower ; 55$notLetter = [^[:L:][:M:]] ; 56$under = ̱; 57# Fix punctuation 58# preserve original 59\: ↔ \: $under ; 60\? ↔ \? $under ; 61\; ↔ \? ; 62· ↔ \: ; 63# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve 64͂ ↔ ̂ ; 65# IOTA: convert iota subscript to iota 66# first make previous alpha long! 67$accent_minus = [[$accent]-[$iotasub$macron]]; 68Α } $accent_minus * $iotasub → | Α $macron ; 69α } $accent_minus * $iotasub → | α $macron ; 70# now convert to uppercase if after uppercase, ow to lowercase 71$upper $accent * { $iotasub → I ; 72$iotasub → i ; 73| $1 $iotasub ← ($evowel $macron $accentMinus *) i ; 74| $1 $iotasub ← ($evowel $macron $accentMinus *) I ; 75# BREATHING 76# Convert rough breathing to h, and move before letters. 77# Make A ` x = → H a x 78Α ($macron?) $rough } $beforeLower → H | α $1; 79Ε $rough } $beforeLower → H | ε; 80Η $rough } $beforeLower → H | η ; 81Ι ($ddot?) $rough } $beforeLower → H | ι $1; 82Ο $rough } $beforeLower → H | ο ; 83Υ $rough } $beforeLower → H | υ ; 84Ω ($ddot?) $rough } $beforeLower → H | ω $1; 85# Make A x ` = → H a x 86Α ($glower $macron?) $rough → H | α $1 ; 87Ε ($glower) $rough → H | ε $1 ; 88Η ($glower) $rough → H | η $1 ; 89Ι ($glower $ddot?) $rough → H | ι $1 ; 90Ο ($glower) $rough → H | ο $1 ; 91Υ ($glower) $rough → H | υ $1 ; 92Ω ($glower $ddot?) $rough → H | ω $1 ; 93#Otherwise, make x ` into h x and X ` into H X 94($lcgvowel + $ddotmac? ) $rough → h | $1 ; 95($gvowel + $ddotmac? ) $rough → H | $1 ; 96# Go backwards with H 97| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ; 98| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ; 99| $1 $rough ← h ($evowel $macron? $ddot?) ; 100| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ; 101| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ; 102| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ; 103# titlecase, have to fix individually 104# in the future, we should add &uppercase() to make this easier 105| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ; 106| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ; 107| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ; 108| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ; 109| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ; 110| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ; 111| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ; 112| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ; 113| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ; 114| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ; 115| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ; 116| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ; 117| A $1 $rough ← H a ($macron? $ddot? ) ; 118| E $1 $rough ← H e ($macron? $ddot? ) ; 119| I $1 $rough ← H i ($macron? $ddot? ) ; 120| O $1 $rough ← H o ($macron? $ddot? ) ; 121| U $1 $rough ← H u ($macron? $ddot? ) ; 122| Y $1 $rough ← H y ($macron? $ddot? ) ; 123# Now do smooth 124#delete smooth breathing for Latin 125$smooth → ; 126# insert in Greek 127# the assumption is that all Marks are on letters. 128| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ; 129| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ; 130| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ; 131# TODO: preserve smooth/rough breathing if not 132# on initial vowel sequence 133# need to have these up here so the rules don't mask 134# remove now superfluous macron when returning 135Α ← A $macron ; 136α ← a $macron ; 137η ↔ e $macron ; 138Η ↔ E $macron ; 139φ ↔ ph ; 140Ψ } $beforeLower ↔ Ps ; 141Ψ ↔ PS ; 142Φ } $beforeLower ↔ Ph ; 143Φ ↔ PH ; 144ψ ↔ ps ; 145ω ↔ o $macron ; 146Ω ↔ O $macron; 147# NORMAL 148α ↔ a ; 149Α ↔ A ; 150β ↔ b ; 151Β ↔ B ; 152γ } $gammaLike ↔ n } $egammaLike ; 153γ ↔ g ; 154Γ } $gammaLike ↔ N } $egammaLike ; 155Γ ↔ G ; 156δ ↔ d ; 157Δ ↔ D ; 158ε ↔ e ; 159Ε ↔ E ; 160ζ ↔ z ; 161Ζ ↔ Z ; 162θ ↔ th ; 163Θ } $beforeLower ↔ Th ; 164Θ ↔ TH ; 165ι ↔ i ; 166Ι ↔ I ; 167κ ↔ k ; 168Κ ↔ K ; 169λ ↔ l ; 170Λ ↔ L ; 171μ ↔ m ; 172Μ ↔ M ; 173ν } $gammaLike → n\' ; 174ν ↔ n ; 175Ν } $gammaLike ↔ N\' ; 176Ν ↔ N ; 177ξ ↔ x ; 178Ξ ↔ X ; 179ο ↔ o ; 180Ο ↔ O ; 181π ↔ p ; 182Π ↔ P ; 183ρ $rough ↔ rh; 184Ρ $rough } $beforeLower ↔ Rh ; 185Ρ $rough ↔ RH ; 186ρ ↔ r ; 187Ρ ↔ R ; 188# insert separator before things that turn into s 189[Pp] { } [ςσΣϷϸϺϻ] → \' ; 190# special S variants 191Ϸ ↔ Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L 192ϸ ↔ š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L 193Ϻ ↔ Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L 194ϻ ↔ ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L 195# underbar means exception 196# before a letter, initial 197ς } $beforeLetter ↔ s $underbar } $beforeLetter; 198σ } $beforeLetter ↔ s } $beforeLetter; 199# otherwise, after a letter = final 200$afterLetter { σ ↔ $afterLetter { s $underbar; 201$afterLetter { ς ↔ $afterLetter { s ; 202# otherwise (isolated) = initial 203ς ↔ s $underbar; 204σ ↔ s ; 205# [Pp] { Σ ↔ \'S ; 206Σ ↔ S ; 207τ ↔ t ; 208Τ ↔ T ; 209$vowel {υ } ↔ u ; 210υ ↔ y ; 211$vowel { Υ ↔ U ; 212Υ ↔ Y ; 213χ ↔ ch ; 214Χ } $beforeLower ↔ Ch ; 215Χ ↔ CH ; 216# Completeness for ASCII 217$ignore = [[:Mark:]''] * ; 218| k ← c ; 219| ph ← f ; 220| i ← j ; 221| k ← q ; 222| b ← v } $vowel ; 223| b ← w } $vowel; 224| u ← v ; 225| u ← w; 226| K ← C ; 227| Ph ← F ; 228| I ← J ; 229| K ← Q ; 230| B ← V } $vowel ; 231| B ← W } $vowel ; 232| U ← V ; 233| U ← W ; 234$rough } $ignore [:UppercaseLetter:] → H ; 235$ignore [:UppercaseLetter:] { $rough → H ; 236$rough ← H ; 237$rough ↔ h ; 238# Completeness for Greek 239ϐ → | β ; 240ϑ → | θ ; 241ϒ → | Υ ; 242ϕ → | φ ; 243ϖ → | π ; 244ϰ → | κ ; 245ϱ → | ρ ; 246ϲ → | σ ; 247Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL 248ϳ → j ; 249ϴ → | Θ ; 250ϵ → | ε ; 251µ → | μ ; 252ͺ → i; 253# delete any trailing ' marks used for roundtripping 254← [Ππ] { \' } [Ss] ; 255← [Νν] { \' } $egammaLike ; 256::NFC (NFD) ; 257# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ; 258# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ; 259# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD 260:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̀-̷̹-ͅ΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ; 261 ]]></tRule> 262 </transform> 263 </transforms> 264</supplementalData> 265