1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html#License 3# 4# File: ja_Latn_ru.txt 5# Generated from CLDR 6# 7 8# Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU. 9# Can be run in sequence after e.g. Katakana-Latin. 10# 11# These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian. 12# 13# TODO: Cyrillization needs to respect morpheme/Kanji boundaries. 14# 中井 becomes Накаи, but 北海道 becomes Хоккайдо. We need boundary 15# markup in the input in order to do that properly. 16# 17::NFD(NFC); 18::[:Latin:] Lower(); 19# 20# 21$lengthMarker = [\u0302\u0304]; 22# 23# 24# Delete apostrophes. Apostrophes after "n" are consumed below. 25\' → ; 26# 27# 28# Turn long /e:/ into diphthong /ei/. 29# Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи. 30e $lengthMarker → эй ; 31# 32# 33# Turn long /i:/ into two vowels /ii/. 34i $lengthMarker → | i i ; 35# 36# 37# Ignore vowel length everywhere else. 38$lengthMarker → ; 39# 40# 41# Vowels. 42# 43# TODO(mjansche): Enable diphthongs once we have Kanji boundaries. 44## ai → ай ; 45a → а ; 46i\~e → | ye ; 47i → и ; 48u\~ → в ; # ウィ etc. 49# 50## ui → уй ; 51u → у ; 52e → э ; 53o → о ; 54# 55# 56# Consonants. 57# 58k → к ; 59# 60# 61sh → | sy ; 62s → с ; 63# 64# 65ch → | ty ; 66c } ch → t ; 67te\~ → | t ; # テュ 68to\~ → | t ; # トゥ 69tsu\~ → | ts ; # ツァ, ツィ, etc. 70ts → ц ; 71t → т ; 72# 73# 74\~tsu → | tsu ; 75# 76# 77n } [bpm] → м ; # 群馬 → Гумма 78n\' → нъ ; 79n → н ; 80# 81# 82h → х ; 83fu\~ → | f ; # フュ 84f → ф ; 85# 86# 87m → м ; 88# 89# 90ya → я ; 91yi → и ; # Added for convenience, after sh, ch, j. 92yu → ю ; 93ye → е ; # ?? unobserved 94yo → ё ; 95# 96# 97r → р ; 98# 99# 100wa → ва ; 101w → ; 102# 103# 104g → г ; 105# 106# 107j → | zy ; 108z → дз ; 109# 110# 111de\~ → | d ; # デュ 112dji\~ → | z ; # ヂャ, ヂュ, etc. 113dj → | j ; # ヂ 114do\~ → | d ; # ドゥ 115dzu\~ → | z ; # ヅァ, ヅィ, etc. 116dz → | z ; # ヅ 117d → д ; 118# 119# 120b → б ; 121vu\~ → | v ; # ヴァ, etc. 122v → в ; # ?? unobserved 123# 124# 125p → п ; 126# 127# 128::NFC(NFD); 129 130