• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<?xml version="1.0" encoding="UTF-8" ?>
2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
3<!--
4Copyright © 1991-2013 Unicode, Inc.
5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
6For terms of use, see http://www.unicode.org/copyright.html
7-->
8<supplementalData>
9	<version number="$Revision: 12263 $"/>
10	<transforms>
11		<transform source="Latn" target="Kana" direction="both" alias="Latin-Katakana und-Kana-t-und-latn" backwardAlias="Katakana-Latin und-Latn-t-und-kana">
12			<tRule>
13# note: a global filter is more efficient, but MUST include all source chars
14#:: [\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]] ;
15# MINIMAL FILTER GENERATED FOR: Latin-Katakana
16### WARNING -- must add width filter, both here and below!!! ###
17:: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」゙-゚ァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̄Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ;
18:: [:Latin:] fullwidth-halfwidth ();
19:: NFD (NFC);
20:: Lower ();    # whenever transliterating from cased to uncased script, include this
21# :: NFD () ;   # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
22# Uses modified Hepburn. Small changes to make  unambiguous.
23# | Kunrei-shiki: Hepburn/MHepburn
24# | ------------------------------
25# | si: shi
26# | si ~ya: sha
27# | si ~yu: shu
28# | si ~yo: sho
29# | zi: ji
30# | zi ~ya: ja
31# | zi ~yu: ju
32# | zi ~yo: jo
33# | ti: chi
34# | ti ~ya: cha
35# | ti ~yu: chu
36# | ti ~yu: cho
37# | tu: tsu
38# | di: ji/dji
39# | du: zu/dzu
40# | hu: fu
41# | For foreign words:
42# | -----------------
43# | se ~i si
44# | si ~e she
45# |
46# | ze ~i zi
47# | zi ~e je
48# |
49# | te ~i ti
50# | ti ~e che
51# | te ~u tu
52# |
53# | de ~i di
54# | de ~u du
55# | de ~i di
56# |
57# | he ~u: hu
58# | hu ~a fa
59# | hu ~i fi
60# | hu ~e he
61# | hu ~o ho
62# Most small forms are generated, but if necessary
63# explicit small forms are given with ~a, ~ya, etc.
64#------------------------------------------------------
65# Variables
66$vowel = [aeiou] ;
67$consonant = [bcdfghjklmnpqrstvwxyz] ;
68$macron = ̄ ;
69# Variables used for doubled-consonants with tsu
70$kana = [ぁ-ゔ] ;
71$voice = [゙゛];
72$semivoice = [゚゜];
73$k_start = [カキクケコかきくけこ] ;
74$s_start = [サシスセソさしすせそ] ;
75$j_start = [シし] $voice ;
76$t_start = [タチツテトたちつてと] ;
77$n_start = [ナニヌネノンなにぬねの] ;
78$h_start = [ハヒヘホはひへほ] ;
79$f_start = [フふ] ;
80$m_start = [マミムメモまみむめも] ;
81$y_start = [ヤユヨやゆよ] ;
82$r_start = [ラリルレロらりるれろ] ;
83$w_start = [ワヰヱヲわゐゑを] ;
84$v_start = [ワヰヱヲ]゙ ;
85$voweled_basekana = [ァ-オカキクケコサシスセソタチッツテトナ-ノハヒフヘホマ-ヲヵヶ] ;
86# if ン is followed by $n_quoter, then it needs an
87# apostrophe after its romaji form to disambiguate it.
88# e.g., ン ア ! =  ナ, so represent as &quot;n'a&quot;, not &quot;na&quot;.
89$n_quoter  =  [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;
90$small_y = [ャィュェョ] ;
91$iteration = ゝ ;
92#------------------------------------------------------
93# katakana rules
94# Punctuation
95'.' ↔ 。;
96',' ↔ 、;
97# ' ' } [a-z] → ; # delete spaces before latin
98# ' ' ← [^' '゠-ヿ] {} ['゠-ヿ] ; #insert spaces before hiragana
99# Iteration Mark
100# Copy previous letter § marks
101# TODO
102# | $1 $1 ← ($kana [[:M:]$voice$semivoice]?) $iteration
103# Specials for katakana -- not shared with hiragana
104va ↔ ヷ ;
105vi ↔ ヸ ;
106ve ↔ ヹ ;
107vo ↔ ヺ ;
108'~ka' ↔ ヵ ;
109'~ke' ↔ ヶ ;
110# ~~~ begin shared rules ~~~
111#special
112ya ← '~'ャ;
113yi ← '~'ィ ;
114yu ← '~'ュ;
115ye ← '~'ェ;
116yo ← '~'ョ;
117#normal
118a ↔ ア ;
119b | '~' ← ヒ ゙} $small_y ;
120by } $vowel → ビ | '~y' ;
121ba ↔ バ ;
122bi ↔ ビ ;
123bu ↔ ブ ;
124be ↔ ベ ;
125bo ↔ ボ ;
126c } i → | s ;
127c } e → | s ;
128da ↔ ダ ;
129di ↔ ディ ;
130du ↔ デゥ ;
131de ↔ デ ;
132do ↔ ド ;
133dzu ↔ ヅ ;
134dja ← ヂャ ;
135dji'~i' ← ヂィ ; # liu
136dju ← ヂュ ;
137dje ← ヂェ ;
138djo ← ヂョ ;
139dji ↔ ヂ ;
140dj  } $vowel → ヂ | '~y' ;
141# TODO: QUESTION: use ĵĴżŻ instead of dj, dz
142cha ← チャ ;
143chi'~i' ← チィ ; # liu
144chu ← チュ ;
145che ← チェ ;
146cho ← チョ ;
147chi ↔ チ ;
148ch } $vowel → チ | '~y' ;
149e ↔ エ ;
150g | '~' ← ギ} $small_y ;
151gy  } $vowel → ギ | '~y' ;
152ga ↔ ガ ;
153gi ↔ ギ ;
154gu ↔ グ ;
155ge ↔ ゲ ;
156go ↔ ゴ ;
157i ↔ イ ;
158# j  } $vowel → ジ | '~y' ;
159ja ↔ ジャ ;
160ji'~i' ← ジィ ; # liu
161ju ↔ ジュ ;
162je ↔ ジェ ;
163jo ↔ ジョ ;
164ji ↔ ジ ;
165k | '~' ← キ} $small_y ;
166ky  } $vowel → キ | '~y' ;
167ka ↔ カ ;
168ki ↔ キ ;
169ku ↔ ク ;
170ke ↔ ケ ;
171ko ↔ コ ;
172m | '~' ← ミ} $small_y ;
173my  } $vowel → ミ | '~y' ;
174ma ↔ マ ;
175mi ↔ ミ ;
176mu ↔ ム ;
177me ↔ メ ;
178mo ↔ モ ;
179m } [pbfv] → ン ;
180n | '~' ← ニ } $small_y ;
181ny  } $vowel → ニ | '~y' ;
182na ↔ ナ ;
183ni ↔ ニ ;
184nu ↔ ヌ ;
185ne ↔ ネ ;
186no ↔ ノ ;
187o ↔ オ ;
188p | '~' ← ピ } $small_y ;
189py  } $vowel → ピ | '~y' ;
190pa ↔ パ ;
191pi ↔ ピ ;
192pu ↔ プ ;
193pe ↔ ペ ;
194po ↔ ポ ;
195h | '~' ← ヒ } $small_y ;
196hy  } $vowel → ヒ | '~y' ;
197ha ↔ ハ ;
198hi ↔ ヒ ;
199hu ↔ ヘゥ ;
200he ↔ ヘ ;
201ho ↔ ホ ;
202# f | '~' ← フ } $small_y ;
203# f } $vowel → フ | '~' ;
204fa ↔ ファ ;
205fi ↔ フィ ;
206fe ↔ フェ ;
207fo ↔ フォ ;
208fu ↔ フ ;
209r | '~' ← リ } $small_y ;
210ry  } $vowel → リ | '~y' ;
211ra ↔ ラ ;
212ri ↔ リ ;
213ru ↔ ル ;
214re ↔ レ ;
215ro ↔ ロ ;
216za ↔ ザ ;
217zi ↔ ゼィ ;
218zu ↔ ズ ;
219ze ↔ ゼ ;
220zo ↔ ゾ ;
221sa ↔ サ ;
222si ↔ セィ ;
223su ↔ ス ;
224se ↔ セ ;
225so ↔ ソ ;
226sha ← シャ ;
227shi'~i' ← シィ ; # liu
228shu ← シュ ;
229she ← シェ ;
230sho ← ショ ;
231shi ↔ シ ;
232sh } $vowel → シ | '~y' ;
233ta ↔ タ ;
234ti ↔ ティ ;
235tu ↔ テゥ ;
236te ↔ テ ;
237to ↔ ト ;
238tsu ↔ ツ ;
239# v  } $vowel → ヴ | '~' ;
240#'v~a' ← ヴァ ; # liu
241#'v~i' ← ヴィ ; # liu
242#'v~e' ← ヴェ ; # liu
243#'v~o' ← ヴォ ; # liu
244vu ↔ ヴ ;
245u ↔ ウ ;
246# w  } $vowel → ウ | '~' ;
247wa ↔ ワ ;
248wi ↔ ヰ ;
249wu → ウ ;
250we ↔ ヱ ;
251wo ↔ ヲ ;
252ya ↔ ヤ ;
253yi → イ ;
254yu ↔ ユ ;
255ye → エ ;
256yo ↔ ヨ ;
257# double consonants
258#specials
259s } sh → ッ ;
260t } ch → ッ ;
261#voiced
262j } j ↔ ッ } $j_start ;
263b } b ↔ ッ } [$h_start$f_start] $voice;
264d } d ↔ ッ } $t_start $voice;
265g } g ↔ ッ } $k_start $voice;
266p } p ↔ ッ } [$h_start$f_start] $semivoice;
267# v } v ↔ ッ } [ワヰウヱヲう]  $voice ;
268z } z ↔ ッ } $s_start $voice;
269v } v ↔ ッ } $v_start;
270# normal
271k } k ↔ ッ } $k_start ;
272m } m ↔ ッ } $m_start ;
273n } n ↔ ッ } $n_start ;
274h } h ↔ ッ } $h_start ;
275f } f ↔ ッ } $f_start ;
276r } r ↔ ッ } $r_start ;
277t } t ↔ ッ } $t_start ;
278s } s ↔ ッ } $s_start ;
279w } w  ↔ ッ } $w_start;
280y } y ↔ ッ } $y_start;
281# completeness
282x } x → ッ ;
283c } k → ッ ;
284c } c → ッ ;
285c } q → ッ ;
286l } l → ッ ;
287q } q → ッ ;
288# y } y → ッ ;
289# w } w → ッ ;
290# prolonged vowel mark. this indicates a doubling of
291# the preceding vowel sound
292#a ← a { ー ; # liu
293#e ← e { ー ; # liu
294#i ← i { ー ; # liu
295#o ← o { ー ; # liu
296#u ← u { ー ; # liu
297$macron ↔ ー ;
298# small forms
299'~a' ↔ ァ ;
300'~i' ↔ ィ ;
301'~u' ↔ ゥ ;
302'~e' ↔ ェ ;
303'~o' ↔ ォ ;
304'~tsu' ↔ ッ ;
305'~wa' ↔ ヮ ;
306'~ya' ↔ ャ ;
307'~yi' → ィ ;
308'~yu' ↔ ュ ;
309'~ye' → ェ ;
310'~yo' ↔ ョ ;
311# iteration marks
312# TODO: make more accurate
313j $1 ← sh (y* $vowel) {ヽ$voice ;
314dj $1 ← ch (y* $vowel) {ヽ$voice ;
315dz $1 ← ts (y* $vowel) {ヽ$voice ;
316g $1 ← k (y* $vowel) {ヽ$voice ;
317z $1 ← s (y* $vowel) {ヽ$voice ;
318d $1 ← t (y* $vowel) {ヽ$voice ;
319h $1 ← b (y* $vowel) {ヽ$voice ;
320v $1 ← w (y* $vowel) {ヽ$voice ;
321sh $1 ← sh (y* $vowel) {ヽ$voice ;
322j $1 ← j (y* $vowel) {ヽ$voice ;
323ch $1 ← ch (y* $vowel) {ヽ$voice ;
324dj $1 ← dj(y* $vowel) {ヽ$voice ;
325ts $1 ← ts (y* $vowel) {ヽ$voice ;
326dz $1 ← dz (y* $vowel) {ヽ$voice ;
327$1 ← ($consonant y* $vowel) {ヽ$voice? ;
328$1 ← (.) {ヽ $voice? ; # otherwise repeat last character
329← ヽ $voice? ; # delete if no characters found
330# h- rule: lengthens vowel if not followed by a vowel.
331# At the point this is applied, latin [cons]?vowel sequences
332# have been converted to katakana in NFD form.
333$voweled_basekana [\u3099 \u309A]? { h → ー ;
334# one-way latin- → kana rules. these do not occur in
335# well-formed romaji representing actual japanese text.
336# their purpose is to make all romaji map to kana of
337# some sort.
338# the following are not really necessary, but produce
339# slightly more natural results.
340cy → セィ ;
341dy → ディ ;
342hy → ヒ ;
343sy → セィ ;
344ty → ティ ;
345zy → ゼィ ;
346h → ヘ ;
347# isolated consonants listed here so as not to mask
348# longer rules above.
349ch → チ;
350sh → シ ;
351dz → ヅ ;
352dj → ヂ;
353b → ブ ;
354d → デ ;
355g → グ ;
356k → ク ;
357m → ム ;
358n'' ← ン } $n_quoter ;
359n ↔ ン ;
360p → プ ;
361r → ル ;
362s → ス ;
363t → テ ;
364y → イ ;
365z → ズ ;
366v → ヴ ;
367f → フ;
368j  → ジ;
369w → ウ;
370ß → | ss ;
371æ → | e ;
372ð → | d ;
373ø → | u ;
374þ → | th ;
375# simple substitutions using backup
376c → | k ;
377l → | r ;
378q → | k ;
379x → | ks ;
380# ~~~ END shared rules ~~~
381#------------------------------------------------------
382# Final cleanup
383'~' → ; # delete stray tildes between letters
384[:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters
385# [ʾ[:Nonspacing Mark:]-[゙-゜]] → ; # delete any non-spacing marks that we didn't use
386:: NFC (NFD) ;
387:: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth);
388# note: a global filter is more efficient, but MUST include all source chars!!
389#:: ([\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]]);
390# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
391:: ( [[\ -~¢-£¥-¦¬̄₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ゙-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ;
392# eof
393			</tRule>
394		</transform>
395	</transforms>
396</supplementalData>
397