1<?xml version="1.0" encoding="UTF-8" ?> 2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3<!-- 4Copyright © 1991-2013 Unicode, Inc. 5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6For terms of use, see http://www.unicode.org/copyright.html 7--> 8<supplementalData> 9 <version number="$Revision: 12263 $"/> 10 <transforms> 11 <transform source="ja_Latn" target="ru" direction="forward" alias="ru-t-ja-latn"> 12 <tRule> 13# Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU. 14# Can be run in sequence after e.g. Katakana-Latin. 15# 16# These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian. 17# 18# TODO: Cyrillization needs to respect morpheme/Kanji boundaries. 19# 中井 becomes Накаи, but 北海道 becomes Хоккайдо. We need boundary 20# markup in the input in order to do that properly. 21# 22 23::NFD(NFC); 24::[:Latin:] Lower(); 25# 26# 27 28$lengthMarker = [̂̄]; 29# 30# 31# Delete apostrophes. Apostrophes after "n" are consumed below. 32 33\' → ; 34# 35# 36# Turn long /e:/ into diphthong /ei/. 37# Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи. 38 39e $lengthMarker → эй ; 40# 41# 42# Turn long /i:/ into two vowels /ii/. 43 44i $lengthMarker → | i i ; 45# 46# 47# Ignore vowel length everywhere else. 48 49$lengthMarker → ; 50# 51# 52# Vowels. 53# 54# TODO(mjansche): Enable diphthongs once we have Kanji boundaries. 55## ai → ай ; 56 57a → а ; 58i\~e → | ye ; 59i → и ; 60u\~ → в ; # ウィ etc. 61# 62## ui → уй ; 63 64u → у ; 65e → э ; 66o → о ; 67# 68# 69# Consonants. 70# 71 72k → к ; 73# 74# 75 76sh → | sy ; 77s → с ; 78# 79# 80 81ch → | ty ; 82c } ch → t ; 83te\~ → | t ; # テュ 84to\~ → | t ; # トゥ 85tsu\~ → | ts ; # ツァ, ツィ, etc. 86ts → ц ; 87t → т ; 88# 89# 90 91\~tsu → | tsu ; 92# 93# 94 95n } [bpm] → м ; # 群馬 → Гумма 96n\' → нъ ; 97n → н ; 98# 99# 100 101h → х ; 102fu\~ → | f ; # フュ 103f → ф ; 104# 105# 106 107m → м ; 108# 109# 110 111ya → я ; 112yi → и ; # Added for convenience, after sh, ch, j. 113yu → ю ; 114ye → е ; # ?? unobserved 115yo → ё ; 116# 117# 118 119r → р ; 120# 121# 122 123wa → ва ; 124w → ; 125# 126# 127 128g → г ; 129# 130# 131 132j → | zy ; 133z → дз ; 134# 135# 136 137de\~ → | d ; # デュ 138dji\~ → | z ; # ヂャ, ヂュ, etc. 139dj → | j ; # ヂ 140do\~ → | d ; # ドゥ 141dzu\~ → | z ; # ヅァ, ヅィ, etc. 142dz → | z ; # ヅ 143d → д ; 144# 145# 146 147b → б ; 148vu\~ → | v ; # ヴァ, etc. 149v → в ; # ?? unobserved 150# 151# 152 153p → п ; 154# 155# 156 157::NFC(NFD); 158 </tRule> 159 </transform> 160 </transforms> 161</supplementalData> 162