1<?xml version="1.0" encoding="UTF-8" ?> 2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3<!-- 4Copyright © 1991-2013 Unicode, Inc. 5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6For terms of use, see http://www.unicode.org/copyright.html 7--> 8<supplementalData> 9 <version number="$Revision: 12263 $"/> 10 <transforms> 11 <transform source="Latn" target="Kana" direction="both" alias="Latin-Katakana und-Kana-t-und-latn" backwardAlias="Katakana-Latin und-Latn-t-und-kana"> 12 <tRule> 13# note: a global filter is more efficient, but MUST include all source chars 14#:: [\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]] ; 15# MINIMAL FILTER GENERATED FOR: Latin-Katakana 16### WARNING -- must add width filter, both here and below!!! ### 17:: [[ᄀ-ᄒᄚᄡ\u1160-ᅵᆪᆬ-ᆭᆰ-ᆵ←-↓│■○\u3000-。「-」゙-゚ァ-ロワヲ-ヴヷヺ-ー!-~¢-₩][',.A-Za-z~À-ÖØ-öø-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǭǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̄Ӣ-ӣӮ-ӯḀ-ẙẠ-ỹᾱᾹῑῙῡῩK-Å]] ; 18:: [:Latin:] fullwidth-halfwidth (); 19:: NFD (NFC); 20:: Lower (); # whenever transliterating from cased to uncased script, include this 21# :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese 22# Uses modified Hepburn. Small changes to make unambiguous. 23# | Kunrei-shiki: Hepburn/MHepburn 24# | ------------------------------ 25# | si: shi 26# | si ~ya: sha 27# | si ~yu: shu 28# | si ~yo: sho 29# | zi: ji 30# | zi ~ya: ja 31# | zi ~yu: ju 32# | zi ~yo: jo 33# | ti: chi 34# | ti ~ya: cha 35# | ti ~yu: chu 36# | ti ~yu: cho 37# | tu: tsu 38# | di: ji/dji 39# | du: zu/dzu 40# | hu: fu 41# | For foreign words: 42# | ----------------- 43# | se ~i si 44# | si ~e she 45# | 46# | ze ~i zi 47# | zi ~e je 48# | 49# | te ~i ti 50# | ti ~e che 51# | te ~u tu 52# | 53# | de ~i di 54# | de ~u du 55# | de ~i di 56# | 57# | he ~u: hu 58# | hu ~a fa 59# | hu ~i fi 60# | hu ~e he 61# | hu ~o ho 62# Most small forms are generated, but if necessary 63# explicit small forms are given with ~a, ~ya, etc. 64#------------------------------------------------------ 65# Variables 66$vowel = [aeiou] ; 67$consonant = [bcdfghjklmnpqrstvwxyz] ; 68$macron = ̄ ; 69# Variables used for doubled-consonants with tsu 70$kana = [ぁ-ゔ] ; 71$voice = [゙゛]; 72$semivoice = [゚゜]; 73$k_start = [カキクケコかきくけこ] ; 74$s_start = [サシスセソさしすせそ] ; 75$j_start = [シし] $voice ; 76$t_start = [タチツテトたちつてと] ; 77$n_start = [ナニヌネノンなにぬねの] ; 78$h_start = [ハヒヘホはひへほ] ; 79$f_start = [フふ] ; 80$m_start = [マミムメモまみむめも] ; 81$y_start = [ヤユヨやゆよ] ; 82$r_start = [ラリルレロらりるれろ] ; 83$w_start = [ワヰヱヲわゐゑを] ; 84$v_start = [ワヰヱヲ]゙ ; 85$voweled_basekana = [ァ-オカキクケコサシスセソタチッツテトナ-ノハヒフヘホマ-ヲヵヶ] ; 86# if ン is followed by $n_quoter, then it needs an 87# apostrophe after its romaji form to disambiguate it. 88# e.g., ン ア ! = ナ, so represent as "n'a", not "na". 89$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ; 90$small_y = [ャィュェョ] ; 91$iteration = ゝ ; 92#------------------------------------------------------ 93# katakana rules 94# Punctuation 95'.' ↔ 。; 96',' ↔ 、; 97# ' ' } [a-z] → ; # delete spaces before latin 98# ' ' ← [^' '゠-ヿ] {} ['゠-ヿ] ; #insert spaces before hiragana 99# Iteration Mark 100# Copy previous letter § marks 101# TODO 102# | $1 $1 ← ($kana [[:M:]$voice$semivoice]?) $iteration 103# Specials for katakana -- not shared with hiragana 104va ↔ ヷ ; 105vi ↔ ヸ ; 106ve ↔ ヹ ; 107vo ↔ ヺ ; 108'~ka' ↔ ヵ ; 109'~ke' ↔ ヶ ; 110# ~~~ begin shared rules ~~~ 111#special 112ya ← '~'ャ; 113yi ← '~'ィ ; 114yu ← '~'ュ; 115ye ← '~'ェ; 116yo ← '~'ョ; 117#normal 118a ↔ ア ; 119b | '~' ← ヒ ゙} $small_y ; 120by } $vowel → ビ | '~y' ; 121ba ↔ バ ; 122bi ↔ ビ ; 123bu ↔ ブ ; 124be ↔ ベ ; 125bo ↔ ボ ; 126c } i → | s ; 127c } e → | s ; 128da ↔ ダ ; 129di ↔ ディ ; 130du ↔ デゥ ; 131de ↔ デ ; 132do ↔ ド ; 133dzu ↔ ヅ ; 134dja ← ヂャ ; 135dji'~i' ← ヂィ ; # liu 136dju ← ヂュ ; 137dje ← ヂェ ; 138djo ← ヂョ ; 139dji ↔ ヂ ; 140dj } $vowel → ヂ | '~y' ; 141# TODO: QUESTION: use ĵĴżŻ instead of dj, dz 142cha ← チャ ; 143chi'~i' ← チィ ; # liu 144chu ← チュ ; 145che ← チェ ; 146cho ← チョ ; 147chi ↔ チ ; 148ch } $vowel → チ | '~y' ; 149e ↔ エ ; 150g | '~' ← ギ} $small_y ; 151gy } $vowel → ギ | '~y' ; 152ga ↔ ガ ; 153gi ↔ ギ ; 154gu ↔ グ ; 155ge ↔ ゲ ; 156go ↔ ゴ ; 157i ↔ イ ; 158# j } $vowel → ジ | '~y' ; 159ja ↔ ジャ ; 160ji'~i' ← ジィ ; # liu 161ju ↔ ジュ ; 162je ↔ ジェ ; 163jo ↔ ジョ ; 164ji ↔ ジ ; 165k | '~' ← キ} $small_y ; 166ky } $vowel → キ | '~y' ; 167ka ↔ カ ; 168ki ↔ キ ; 169ku ↔ ク ; 170ke ↔ ケ ; 171ko ↔ コ ; 172m | '~' ← ミ} $small_y ; 173my } $vowel → ミ | '~y' ; 174ma ↔ マ ; 175mi ↔ ミ ; 176mu ↔ ム ; 177me ↔ メ ; 178mo ↔ モ ; 179m } [pbfv] → ン ; 180n | '~' ← ニ } $small_y ; 181ny } $vowel → ニ | '~y' ; 182na ↔ ナ ; 183ni ↔ ニ ; 184nu ↔ ヌ ; 185ne ↔ ネ ; 186no ↔ ノ ; 187o ↔ オ ; 188p | '~' ← ピ } $small_y ; 189py } $vowel → ピ | '~y' ; 190pa ↔ パ ; 191pi ↔ ピ ; 192pu ↔ プ ; 193pe ↔ ペ ; 194po ↔ ポ ; 195h | '~' ← ヒ } $small_y ; 196hy } $vowel → ヒ | '~y' ; 197ha ↔ ハ ; 198hi ↔ ヒ ; 199hu ↔ ヘゥ ; 200he ↔ ヘ ; 201ho ↔ ホ ; 202# f | '~' ← フ } $small_y ; 203# f } $vowel → フ | '~' ; 204fa ↔ ファ ; 205fi ↔ フィ ; 206fe ↔ フェ ; 207fo ↔ フォ ; 208fu ↔ フ ; 209r | '~' ← リ } $small_y ; 210ry } $vowel → リ | '~y' ; 211ra ↔ ラ ; 212ri ↔ リ ; 213ru ↔ ル ; 214re ↔ レ ; 215ro ↔ ロ ; 216za ↔ ザ ; 217zi ↔ ゼィ ; 218zu ↔ ズ ; 219ze ↔ ゼ ; 220zo ↔ ゾ ; 221sa ↔ サ ; 222si ↔ セィ ; 223su ↔ ス ; 224se ↔ セ ; 225so ↔ ソ ; 226sha ← シャ ; 227shi'~i' ← シィ ; # liu 228shu ← シュ ; 229she ← シェ ; 230sho ← ショ ; 231shi ↔ シ ; 232sh } $vowel → シ | '~y' ; 233ta ↔ タ ; 234ti ↔ ティ ; 235tu ↔ テゥ ; 236te ↔ テ ; 237to ↔ ト ; 238tsu ↔ ツ ; 239# v } $vowel → ヴ | '~' ; 240#'v~a' ← ヴァ ; # liu 241#'v~i' ← ヴィ ; # liu 242#'v~e' ← ヴェ ; # liu 243#'v~o' ← ヴォ ; # liu 244vu ↔ ヴ ; 245u ↔ ウ ; 246# w } $vowel → ウ | '~' ; 247wa ↔ ワ ; 248wi ↔ ヰ ; 249wu → ウ ; 250we ↔ ヱ ; 251wo ↔ ヲ ; 252ya ↔ ヤ ; 253yi → イ ; 254yu ↔ ユ ; 255ye → エ ; 256yo ↔ ヨ ; 257# double consonants 258#specials 259s } sh → ッ ; 260t } ch → ッ ; 261#voiced 262j } j ↔ ッ } $j_start ; 263b } b ↔ ッ } [$h_start$f_start] $voice; 264d } d ↔ ッ } $t_start $voice; 265g } g ↔ ッ } $k_start $voice; 266p } p ↔ ッ } [$h_start$f_start] $semivoice; 267# v } v ↔ ッ } [ワヰウヱヲう] $voice ; 268z } z ↔ ッ } $s_start $voice; 269v } v ↔ ッ } $v_start; 270# normal 271k } k ↔ ッ } $k_start ; 272m } m ↔ ッ } $m_start ; 273n } n ↔ ッ } $n_start ; 274h } h ↔ ッ } $h_start ; 275f } f ↔ ッ } $f_start ; 276r } r ↔ ッ } $r_start ; 277t } t ↔ ッ } $t_start ; 278s } s ↔ ッ } $s_start ; 279w } w ↔ ッ } $w_start; 280y } y ↔ ッ } $y_start; 281# completeness 282x } x → ッ ; 283c } k → ッ ; 284c } c → ッ ; 285c } q → ッ ; 286l } l → ッ ; 287q } q → ッ ; 288# y } y → ッ ; 289# w } w → ッ ; 290# prolonged vowel mark. this indicates a doubling of 291# the preceding vowel sound 292#a ← a { ー ; # liu 293#e ← e { ー ; # liu 294#i ← i { ー ; # liu 295#o ← o { ー ; # liu 296#u ← u { ー ; # liu 297$macron ↔ ー ; 298# small forms 299'~a' ↔ ァ ; 300'~i' ↔ ィ ; 301'~u' ↔ ゥ ; 302'~e' ↔ ェ ; 303'~o' ↔ ォ ; 304'~tsu' ↔ ッ ; 305'~wa' ↔ ヮ ; 306'~ya' ↔ ャ ; 307'~yi' → ィ ; 308'~yu' ↔ ュ ; 309'~ye' → ェ ; 310'~yo' ↔ ョ ; 311# iteration marks 312# TODO: make more accurate 313j $1 ← sh (y* $vowel) {ヽ$voice ; 314dj $1 ← ch (y* $vowel) {ヽ$voice ; 315dz $1 ← ts (y* $vowel) {ヽ$voice ; 316g $1 ← k (y* $vowel) {ヽ$voice ; 317z $1 ← s (y* $vowel) {ヽ$voice ; 318d $1 ← t (y* $vowel) {ヽ$voice ; 319h $1 ← b (y* $vowel) {ヽ$voice ; 320v $1 ← w (y* $vowel) {ヽ$voice ; 321sh $1 ← sh (y* $vowel) {ヽ$voice ; 322j $1 ← j (y* $vowel) {ヽ$voice ; 323ch $1 ← ch (y* $vowel) {ヽ$voice ; 324dj $1 ← dj(y* $vowel) {ヽ$voice ; 325ts $1 ← ts (y* $vowel) {ヽ$voice ; 326dz $1 ← dz (y* $vowel) {ヽ$voice ; 327$1 ← ($consonant y* $vowel) {ヽ$voice? ; 328$1 ← (.) {ヽ $voice? ; # otherwise repeat last character 329← ヽ $voice? ; # delete if no characters found 330# h- rule: lengthens vowel if not followed by a vowel. 331# At the point this is applied, latin [cons]?vowel sequences 332# have been converted to katakana in NFD form. 333$voweled_basekana [\u3099 \u309A]? { h → ー ; 334# one-way latin- → kana rules. these do not occur in 335# well-formed romaji representing actual japanese text. 336# their purpose is to make all romaji map to kana of 337# some sort. 338# the following are not really necessary, but produce 339# slightly more natural results. 340cy → セィ ; 341dy → ディ ; 342hy → ヒ ; 343sy → セィ ; 344ty → ティ ; 345zy → ゼィ ; 346h → ヘ ; 347# isolated consonants listed here so as not to mask 348# longer rules above. 349ch → チ; 350sh → シ ; 351dz → ヅ ; 352dj → ヂ; 353b → ブ ; 354d → デ ; 355g → グ ; 356k → ク ; 357m → ム ; 358n'' ← ン } $n_quoter ; 359n ↔ ン ; 360p → プ ; 361r → ル ; 362s → ス ; 363t → テ ; 364y → イ ; 365z → ズ ; 366v → ヴ ; 367f → フ; 368j → ジ; 369w → ウ; 370ß → | ss ; 371æ → | e ; 372ð → | d ; 373ø → | u ; 374þ → | th ; 375# simple substitutions using backup 376c → | k ; 377l → | r ; 378q → | k ; 379x → | ks ; 380# ~~~ END shared rules ~~~ 381#------------------------------------------------------ 382# Final cleanup 383'~' → ; # delete stray tildes between letters 384[:Katakana:] { '' } [:Latin:] → ; # delete stray quotes between letters 385# [ʾ[:Nonspacing Mark:]-[゙-゜]] → ; # delete any non-spacing marks that we didn't use 386:: NFC (NFD) ; 387:: ([[:Katakana:][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] halfwidth-fullwidth); 388# note: a global filter is more efficient, but MUST include all source chars!! 389#:: ([\u0000-\u007E 、。 ゙-゜ ァ-ー 。-゚ [:Latin:][:Katakana:] [:nonspacing mark:]]); 390# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD 391:: ( [[\ -~¢-£¥-¦¬̄₩。-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ│-○][~、-。がぎぐげござじずぜぞだぢづでどば-ぱび-ぴぶ-ぷべ-ぺぼ-ぽゔ゙-゛ゞァ-ヺー-ヾ][\u309B\u309C\u30A0\u30FC\uFF70\uFF9E\uFF9F]] ) ; 392# eof 393 </tRule> 394 </transform> 395 </transforms> 396</supplementalData> 397