• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
4#
5# File: ja_Latn_ru.txt
6# Generated from CLDR
7#
8
9# Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU.
10# Can be run in sequence after e.g. Katakana-Latin.
11#
12# These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian.
13#
14# TODO: Cyrillization needs to respect morpheme/Kanji boundaries.
15# 中井 becomes Накаи, but 北海道 becomes Хоккайдо.  We need boundary
16# markup in the input in order to do that properly.
17#
18::NFD(NFC);
19::[:Latin:] Lower();
20#
21#
22$lengthMarker = [\u0302\u0304];
23#
24#
25# Delete apostrophes.  Apostrophes after "n" are consumed below.
26\' → ;
27#
28#
29# Turn long /e:/ into diphthong /ei/.
30# Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи.
31e $lengthMarker → эй ;
32#
33#
34# Turn long /i:/ into two vowels /ii/.
35i $lengthMarker → | i i ;
36#
37#
38# Ignore vowel length everywhere else.
39$lengthMarker → ;
40#
41#
42# Vowels.
43#
44# TODO(mjansche): Enable diphthongs once we have Kanji boundaries.
45## ai → ай ;
46a  → а ;
47i\~e → | ye ;
48i  → и ;
49u\~ → в ;  # ウィ etc.
50#
51## ui → уй ;
52u  → у ;
53e  → э ;
54o  → о ;
55#
56#
57# Consonants.
58#
59k → к ;
60#
61#
62sh → | sy ;
63s → с ;
64#
65#
66ch     → | ty ;
67c } ch → t ;
68te\~   → | t ;   # テュ
69to\~   → | t ;   # トゥ
70tsu\~  → | ts ;  # ツァ, ツィ, etc.
71ts → ц ;
72t  → т ;
73#
74#
75\~tsu → | tsu ;
76#
77#
78n } [bpm] → м ;  # 群馬 → Гумма
79n\' → нъ ;
80n → н ;
81#
82#
83h → х ;
84fu\~ → | f ;  # フュ
85f → ф ;
86#
87#
88m → м ;
89#
90#
91ya → я ;
92yi → и ;  # Added for convenience, after sh, ch, j.
93yu → ю ;
94ye → е ;  # ?? unobserved
95yo → ё ;
96#
97#
98r → р ;
99#
100#
101wa → ва ;
102w → ;
103#
104#
105g → г ;
106#
107#
108j → | zy ;
109z → дз ;
110#
111#
112de\~  → | d ;  # デュ
113dji\~ → | z ;  # ヂャ, ヂュ, etc.
114dj    → | j ;  # ヂ
115do\~  → | d ;  # ドゥ
116dzu\~ → | z ;  # ヅァ, ヅィ, etc.
117dz    → | z ;  # ヅ
118d → д ;
119#
120#
121b → б ;
122vu\~ → | v ;  # ヴァ, etc.
123v → в ;  # ?? unobserved
124#
125#
126p → п ;
127#
128#
129::NFC(NFD);
130
131