1<?xml version="1.0" encoding="UTF-8" ?> 2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> 3<!-- 4Copyright © 1991-2013 Unicode, Inc. 5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) 6For terms of use, see http://www.unicode.org/copyright.html 7--> 8<supplementalData> 9 <version number="$Revision: 12263 $"/> 10 <transforms> 11 <transform source="Arab" target="Latn" direction="both" alias="Arabic-Latin und-Latn-t-und-arab" backwardAlias="Latin-Arabic und-Arab-t-und-latn"> 12 <tRule><![CDATA[ 13# Generally follows UNGEGN 14# http://www.eki.ee/wgrs/rom1_ar.pdf 15# Occasionally deviates in the direction of ISO 233 16# http://homepage.mac.com/sirbinks/pdf/Arabic.pdf 17# a) where required for disambiguation. 18# b) with underdot instead of cedilla for letter like SAD, 19# since those are explicitly in Unicode for transliteration. 20# c) with extra non-Arabic-language letters, like PEH 21# 22# Does *not* do assimilation of "al", nor hyphenation. 23# While it could be done, we need to determine whether a prefix "al" could 24# occur other than as the definite article (since no space is used). 25:: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ]] ; 26:: NFKD (NFC); 27$disambig = ̱ ; 28$disambig2 = ̰ ; 29$under = ̣ ; 30$descender = ˌ; 31$notAbove = [[:^ccc=0:] & [:^ccc=230:]]; 32 33# non-letters 34[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR 35[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR 36٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR 37٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR 38# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate 39، ↔ ',' ; # ARABIC COMMA 40؛ ↔ ';' ; # ARABIC SEMICOLON 41؟ ↔ '?' ; # ARABIC QUESTION MARK 42٪ ↔ '%' ; # ARABIC PERCENT SIGN 43۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO 44۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE 45۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO 46۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE 47۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR 48۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE 49۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX 50۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN 51۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT 52۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE 53٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO 54١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE 55٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO 56٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE 57٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR 58٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE 59٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX 60٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN 61٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT 62٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE 63 64# letters 65# long vowels 66َا↔ ā ; # ARABIC FATHA, ARABIC LETTER ALEF 67ُو ↔ ū ; # ARABIC DAMMA, ARABIC LETTER WAW 68ِي ↔ ī ; # ARABIC KASRA, ARABIC LETTER YEH 69# longer items moved here to prevent masking 70ث ↔ t h $disambig ; # ARABIC LETTER THEH 71ذ ↔ d h $disambig ; # ARABIC LETTER THAL 72ش ↔ s h $disambig ; # ARABIC LETTER SHEEN 73ص ↔ s $under ; # ARABIC LETTER SAD 74ض ↔ d $under ; # ARABIC LETTER DAD 75ط ↔ t $under ; # ARABIC LETTER TAH 76ظ ↔ z $under ; # ARABIC LETTER ZAH 77غ ↔ g h $disambig ; # ARABIC LETTER GHAIN 78 79# WARNING: special case 80# ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ 81# so on the return, we have to skip over (but preserve) the half-ring below (or others like it) 82# ةٕ ← ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS 83ة ↔ t ̈ ; # ARABIC LETTER TEH MARBUTA 84ة | $1 ← t ($notAbove+) ̈ ; # ARABIC LETTER TEH MARBUTA 85 86# non-Arabic language 87ژ ↔ z h $disambig ; # ARABIC LETTER JEH 88ڭ ↔ n $disambig g ; # ARABIC LETTER NG 89ۋ ↔ v $disambig ; # ARABIC LETTER VE 90ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH 91ښ ↔ s $descender; 92 93# Arabic language 94ء ↔ ʾ ; # ARABIC LETTER HAMZA 95ا ↔ a $under; # ARABIC LETTER ALEF 96ب ↔ b ; # ARABIC LETTER BEH 97ت ↔ t ; # ARABIC LETTER TEH 98ج ↔ j ; # ARABIC LETTER JEEM 99ح ↔ h $under ; # ARABIC LETTER HAH 100خ ↔ k h $disambig ; # ARABIC LETTER KHAH 101د ↔ d ; # ARABIC LETTER DAL 102ر ↔ r ; # ARABIC LETTER REH 103ز ↔ z ; # ARABIC LETTER ZAIN 104س ↔ s ; # ARABIC LETTER SEEN 105ع ↔ ʿ ; # ARABIC LETTER AIN 106ـ → ; # ARABIC TATWEEL 107ف ↔ f ; # ARABIC LETTER FEH 108ق ↔ q ; # ARABIC LETTER QAF 109ک ↔ k $disambig ; # ARABIC LETTER KEHEH 110ك ↔ k ; # ARABIC LETTER KAF 111ل ↔ l ; # ARABIC LETTER LAM 112م ↔ m ; # ARABIC LETTER MEEM 113ن ↔ n ; # ARABIC LETTER NOON 114ه ↔ h ; # ARABIC LETTER HEH 115و ↔ w ; # ARABIC LETTER WAW 116ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA 117ي ↔ y ; # ARABIC LETTER YEH 118ً ↔ aⁿ ; # ARABIC FATHATAN 119ٌ ↔ uⁿ ; # ARABIC DAMMATAN 120ٍ ↔ iⁿ ; # ARABIC KASRATAN 121َ ↔ a ; # ARABIC FATHA 122ُ ↔ u ; # ARABIC DAMMA 123ِ ↔ i ; # ARABIC KASRA 124ّ ↔ ̃ ; # ARABIC SHADDA 125ْ ↔ ̊ ; # ARABIC SUKUN 126 127# special combining marks 128ٓ ↔ ̂ ; # ARABIC MADDAH ABOVE 129ٔ ↔ ̉ ; # ARABIC HAMZA ABOVE 130ٕ ↔ ̹ ; # ARABIC HAMZA BELOW 131 132# Some non-Arabic language (not in UNGEGN) 133پ ↔ p ; # ARABIC LETTER PEH 134چ ↔ c h $disambig ; # ARABIC LETTER TCHEH 135ڤ ↔ v ; # ARABIC LETTER VEH 136# ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW 137# ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW 138گ ↔ g ; # ARABIC LETTER GAF 139 140# fallbacks 141| s ← c } [eiy]; 142| k ← c ; 143| i ← e ; 144| u ← o ; 145| ks ← x ; 146| n ← ⁿ; 147:: (lower) ; 148::NFC (NFD); 149:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ̂-̄̈-̣̰̊-̱̹;ˌ]] ); 150 ]]></tRule> 151 </transform> 152 </transforms> 153</supplementalData> 154