• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
4# File: ja_Latn_ru.txt
5# Generated from CLDR
6#
7
8# Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU.
9# Can be run in sequence after e.g. Katakana-Latin.
10#
11# These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian.
12#
13# TODO: Cyrillization needs to respect morpheme/Kanji boundaries.
14# 中井 becomes Накаи, but 北海道 becomes Хоккайдо.  We need boundary
15# markup in the input in order to do that properly.
16#
17::NFD(NFC);
18::[:Latin:] Lower();
19#
20#
21$lengthMarker = [\u0302\u0304];
22#
23#
24# Delete apostrophes.  Apostrophes after "n" are consumed below.
25\' → ;
26#
27#
28# Turn long /e:/ into diphthong /ei/.
29# Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи.
30e $lengthMarker → эй ;
31#
32#
33# Turn long /i:/ into two vowels /ii/.
34i $lengthMarker → | i i ;
35#
36#
37# Ignore vowel length everywhere else.
38$lengthMarker → ;
39#
40#
41# Vowels.
42#
43# TODO(mjansche): Enable diphthongs once we have Kanji boundaries.
44## ai → ай ;
45a  → а ;
46i\~e → | ye ;
47i  → и ;
48u\~ → в ;  # ウィ etc.
49#
50## ui → уй ;
51u  → у ;
52e  → э ;
53o  → о ;
54#
55#
56# Consonants.
57#
58k → к ;
59#
60#
61sh → | sy ;
62s → с ;
63#
64#
65ch     → | ty ;
66c } ch → t ;
67te\~   → | t ;   # テュ
68to\~   → | t ;   # トゥ
69tsu\~  → | ts ;  # ツァ, ツィ, etc.
70ts → ц ;
71t  → т ;
72#
73#
74\~tsu → | tsu ;
75#
76#
77n } [bpm] → м ;  # 群馬 → Гумма
78n\' → нъ ;
79n → н ;
80#
81#
82h → х ;
83fu\~ → | f ;  # フュ
84f → ф ;
85#
86#
87m → м ;
88#
89#
90ya → я ;
91yi → и ;  # Added for convenience, after sh, ch, j.
92yu → ю ;
93ye → е ;  # ?? unobserved
94yo → ё ;
95#
96#
97r → р ;
98#
99#
100wa → ва ;
101w → ;
102#
103#
104g → г ;
105#
106#
107j → | zy ;
108z → дз ;
109#
110#
111de\~  → | d ;  # デュ
112dji\~ → | z ;  # ヂャ, ヂュ, etc.
113dj    → | j ;  # ヂ
114do\~  → | d ;  # ドゥ
115dzu\~ → | z ;  # ヅァ, ヅィ, etc.
116dz    → | z ;  # ヅ
117d → д ;
118#
119#
120b → б ;
121vu\~ → | v ;  # ヴァ, etc.
122v → в ;  # ?? unobserved
123#
124#
125p → п ;
126#
127#
128::NFC(NFD);
129
130