• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<?xml version="1.0" encoding="UTF-8" ?>
2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
3<!--
4Copyright © 1991-2013 Unicode, Inc.
5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
6For terms of use, see http://www.unicode.org/copyright.html
7-->
8<supplementalData>
9	<version number="$Revision: 12263 $"/>
10	<transforms>
11		<transform source="ja_Latn" target="ru" direction="forward" alias="ru-t-ja-latn">
12			<tRule>
13# Japanese (Rōmaji) to Russian (Cyrillic) Polivanov transliteration for ICU.
14# Can be run in sequence after e.g. Katakana-Latin.
15#
16# These rules cannot be used to target Bulgarian, Serbian, Tajik, or Ukrainian.
17#
18# TODO: Cyrillization needs to respect morpheme/Kanji boundaries.
19# 中井 becomes Накаи, but 北海道 becomes Хоккайдо.  We need boundary
20# markup in the input in order to do that properly.
21#
22
23::NFD(NFC);
24::[:Latin:] Lower();
25#
26#
27
28$lengthMarker = [̂̄];
29#
30#
31# Delete apostrophes.  Apostrophes after "n" are consumed below.
32
33\' → ;
34#
35#
36# Turn long /e:/ into diphthong /ei/.
37# Note that /ei/ across a morpheme boundary (e.g. 武井 Takei) becomes эи.
38
39e $lengthMarker → эй ;
40#
41#
42# Turn long /i:/ into two vowels /ii/.
43
44i $lengthMarker → | i i ;
45#
46#
47# Ignore vowel length everywhere else.
48
49$lengthMarker → ;
50#
51#
52# Vowels.
53#
54# TODO(mjansche): Enable diphthongs once we have Kanji boundaries.
55## ai → ай ;
56
57a  → а ;
58i\~e → | ye ;
59i  → и ;
60u\~ → в ;  # ウィ etc.
61#
62## ui → уй ;
63
64u  → у ;
65e  → э ;
66o  → о ;
67#
68#
69# Consonants.
70#
71
72k → к ;
73#
74#
75
76sh → | sy ;
77s → с ;
78#
79#
80
81ch     → | ty ;
82c } ch → t ;
83te\~   → | t ;   # テュ
84to\~   → | t ;   # トゥ
85tsu\~  → | ts ;  # ツァ, ツィ, etc.
86ts → ц ;
87t  → т ;
88#
89#
90
91\~tsu → | tsu ;
92#
93#
94
95n } [bpm] → м ;  # 群馬 → Гумма
96n\' → нъ ;
97n → н ;
98#
99#
100
101h → х ;
102fu\~ → | f ;  # フュ
103f → ф ;
104#
105#
106
107m → м ;
108#
109#
110
111ya → я ;
112yi → и ;  # Added for convenience, after sh, ch, j.
113yu → ю ;
114ye → е ;  # ?? unobserved
115yo → ё ;
116#
117#
118
119r → р ;
120#
121#
122
123wa → ва ;
124w → ;
125#
126#
127
128g → г ;
129#
130#
131
132j → | zy ;
133z → дз ;
134#
135#
136
137de\~  → | d ;  # デュ
138dji\~ → | z ;  # ヂャ, ヂュ, etc.
139dj    → | j ;  # ヂ
140do\~  → | d ;  # ドゥ
141dzu\~ → | z ;  # ヅァ, ヅィ, etc.
142dz    → | z ;  # ヅ
143d → д ;
144#
145#
146
147b → б ;
148vu\~ → | v ;  # ヴァ, etc.
149v → в ;  # ?? unobserved
150#
151#
152
153p → п ;
154#
155#
156
157::NFC(NFD);
158			</tRule>
159		</transform>
160	</transforms>
161</supplementalData>
162