• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1<?xml version="1.0" encoding="UTF-8" ?>
2<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
3<!--
4Copyright © 1991-2013 Unicode, Inc.
5CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
6For terms of use, see http://www.unicode.org/copyright.html
7-->
8<supplementalData>
9	<version number="$Revision: 12263 $"/>
10	<transforms>
11		<transform source="Arab" target="Latn" direction="both" alias="Arabic-Latin und-Latn-t-und-arab" backwardAlias="Latin-Arabic und-Arab-t-und-latn">
12			<tRule><![CDATA[
13# Generally follows UNGEGN
14#     http://www.eki.ee/wgrs/rom1_ar.pdf
15# Occasionally deviates in the direction of ISO 233
16#     http://homepage.mac.com/sirbinks/pdf/Arabic.pdf
17# a) where required for disambiguation.
18# b) with underdot instead of cedilla for letter like SAD,
19#    since those are explicitly in Unicode for transliteration.
20# c) with extra non-Arabic-language letters, like PEH
21#
22# Does *not* do assimilation of "al", nor hyphenation.
23# While it could be done, we need to determine whether a prefix "al" could
24# occur other than as the definite article (since no space is used).
25:: [[:Arabic:][:block=ARABIC:][‎ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ]] ;
26:: NFKD (NFC);
27$disambig =  ̱ ;
28$disambig2 =  ̰ ;
29$under =  ̣ ;
30$descender = ˌ;
31$notAbove = [[:^ccc=0:] & [:^ccc=230:]];
32
33# non-letters
34[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
35[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
36٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
37٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
38#  ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
39، ↔ ',' ; # ARABIC COMMA
40؛ ↔ ';' ; # ARABIC SEMICOLON
41؟ ↔ '?' ; # ARABIC QUESTION MARK
42٪ ↔ '%' ; # ARABIC PERCENT SIGN
43۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
44۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
45۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
46۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
47۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
48۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
49۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
50۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
51۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
52۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
53٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO
54١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE
55٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO
56٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE
57٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR
58٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE
59٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX
60٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN
61٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT
62٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE
63
64# letters
65# long vowels
66َا↔ ā ; # ARABIC FATHA, ARABIC LETTER ALEF
67ُو ↔ ū ; # ARABIC DAMMA, ARABIC LETTER WAW
68ِي ↔ ī ; # ARABIC KASRA, ARABIC LETTER YEH
69# longer items moved here to prevent masking
70ث ↔ t h $disambig ; # ARABIC LETTER THEH
71ذ ↔ d h $disambig ; # ARABIC LETTER THAL
72ش ↔ s h $disambig ; # ARABIC LETTER SHEEN
73ص ↔ s $under ; # ARABIC LETTER SAD
74ض ↔ d $under ; # ARABIC LETTER DAD
75ط ↔ t $under ; # ARABIC LETTER TAH
76ظ ↔ z $under ; # ARABIC LETTER ZAH
77غ ↔ g h $disambig ; # ARABIC LETTER GHAIN
78
79# WARNING: special case
80# ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→
81# so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
82# ةٕ ← ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
83ة ↔ t ̈ ; # ARABIC LETTER TEH MARBUTA
84ة | $1 ← t ($notAbove+) ̈ ; # ARABIC LETTER TEH MARBUTA
85
86# non-Arabic language
87ژ ↔ z h $disambig ; # ARABIC LETTER JEH
88ڭ ↔ n $disambig g ; # ARABIC LETTER NG
89ۋ ↔ v $disambig ; # ARABIC LETTER VE
90ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH
91ښ ↔ s $descender;
92
93# Arabic language
94ء ↔ ʾ ; # ARABIC LETTER HAMZA
95ا ↔ a $under; # ARABIC LETTER ALEF
96ب ↔ b ; # ARABIC LETTER BEH
97ت ↔ t ; # ARABIC LETTER TEH
98ج ↔ j ; # ARABIC LETTER JEEM
99ح ↔ h $under ; # ARABIC LETTER HAH
100خ ↔ k h $disambig ; # ARABIC LETTER KHAH
101د ↔ d ; # ARABIC LETTER DAL
102ر ↔ r ; # ARABIC LETTER REH
103ز ↔ z ; # ARABIC LETTER ZAIN
104س ↔ s ; # ARABIC LETTER SEEN
105ع ↔ ʿ ; # ARABIC LETTER AIN
106ـ → ; # ARABIC TATWEEL
107ف ↔ f ; # ARABIC LETTER FEH
108ق ↔ q ; # ARABIC LETTER QAF
109ک ↔ k $disambig ; # ARABIC LETTER KEHEH
110ك ↔ k ; # ARABIC LETTER KAF
111ل ↔ l ; # ARABIC LETTER LAM
112م ↔ m ; # ARABIC LETTER MEEM
113ن ↔ n ; # ARABIC LETTER NOON
114ه ↔ h ; # ARABIC LETTER HEH
115و ↔ w ; # ARABIC LETTER WAW
116ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA
117ي ↔ y ; # ARABIC LETTER YEH
118ً ↔ aⁿ ; # ARABIC FATHATAN
119ٌ ↔ uⁿ ; # ARABIC DAMMATAN
120ٍ ↔ iⁿ ; # ARABIC KASRATAN
121َ ↔ a ; # ARABIC FATHA
122ُ ↔ u ; # ARABIC DAMMA
123ِ ↔ i ; # ARABIC KASRA
124ّ ↔   ̃ ; # ARABIC SHADDA
125ْ ↔   ̊ ; # ARABIC SUKUN
126
127# special combining marks
128ٓ ↔  ̂ ; # ARABIC MADDAH ABOVE
129ٔ ↔  ̉ ; # ARABIC HAMZA ABOVE
130ٕ ↔  ̹ ; # ARABIC HAMZA BELOW
131
132# Some non-Arabic language (not in UNGEGN)
133پ ↔ p ; # ARABIC LETTER PEH
134چ ↔ c h $disambig ; # ARABIC LETTER TCHEH
135ڤ ↔ v ; # ARABIC LETTER VEH
136# ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
137# ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
138گ ↔ g ; # ARABIC LETTER GAF
139
140# fallbacks
141| s ← c } [eiy];
142| k ← c ;
143| i ← e ;
144| u ← o ;
145| ks ← x ;
146| n ← ‎ⁿ;
147:: (lower) ;
148::NFC (NFD);
149:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ̂-̄̈-̣̰̊-̱̹;ˌ]] );
150			]]></tRule>
151		</transform>
152	</transforms>
153</supplementalData>
154