• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
4#
5# File: Arab_Latn.txt
6# Generated from CLDR
7#
8
9# Generally follows UNGEGN
10#     http://www.eki.ee/wgrs/rom1_ar.pdf
11# Occasionally deviates in the direction of ISO 233
12#     http://homepage.mac.com/sirbinks/pdf/Arabic.pdf
13# a) where required for disambiguation.
14# b) with underdot instead of cedilla for letter like SAD,
15#    since those are explicitly in Unicode for transliteration.
16# c) with extra non-Arabic-language letters, like PEH
17#
18# Does *not* do assimilation of "al", nor hyphenation.
19# While it could be done, we need to determine whether a prefix "al" could
20# occur other than as the definite article (since no space is used).
21:: [[:Arabic:][:block=ARABIC:][‎ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ]] ;
22:: NFKD (NFC);
23$disambig =  \u0331 ;
24$disambig2 =  \u0330 ;
25$under =  \u0323 ;
26$descender = ˌ;
27$notAbove = [[:^ccc=0:] & [:^ccc=230:]];
28# non-letters
29[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
30[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
31٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
32٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
33#  ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
34، ↔ ',' ; # ARABIC COMMA
35؛ ↔ ';' ; # ARABIC SEMICOLON
36؟ ↔ '?' ; # ARABIC QUESTION MARK
37٪ ↔ '%' ; # ARABIC PERCENT SIGN
38۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
39۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
40۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
41۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
42۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
43۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
44۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
45۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
46۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
47۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
48٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO
49١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE
50٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO
51٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE
52٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR
53٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE
54٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX
55٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN
56٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT
57٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE
58# letters
59# long vowels
60\u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF
61\u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW
62\u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH
63# longer items moved here to prevent masking
64ث ↔ t h $disambig ; # ARABIC LETTER THEH
65ذ ↔ d h $disambig ; # ARABIC LETTER THAL
66ش ↔ s h $disambig ; # ARABIC LETTER SHEEN
67ص ↔ s $under ; # ARABIC LETTER SAD
68ض ↔ d $under ; # ARABIC LETTER DAD
69ط ↔ t $under ; # ARABIC LETTER TAH
70ظ ↔ z $under ; # ARABIC LETTER ZAH
71غ ↔ g h $disambig ; # ARABIC LETTER GHAIN
72# WARNING: special case
73# ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→
74# so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
75# ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
76ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA
77ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA
78# non-Arabic language
79ژ ↔ z h $disambig ; # ARABIC LETTER JEH
80ڭ ↔ n $disambig g ; # ARABIC LETTER NG
81ۋ ↔ v $disambig ; # ARABIC LETTER VE
82ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH
83ښ ↔ s $descender;
84# Arabic language
85ء ↔ ʾ ; # ARABIC LETTER HAMZA
86ا ↔ a $under; # ARABIC LETTER ALEF
87ب ↔ b ; # ARABIC LETTER BEH
88ت ↔ t ; # ARABIC LETTER TEH
89ج ↔ j ; # ARABIC LETTER JEEM
90ح ↔ h $under ; # ARABIC LETTER HAH
91خ ↔ k h $disambig ; # ARABIC LETTER KHAH
92د ↔ d ; # ARABIC LETTER DAL
93ر ↔ r ; # ARABIC LETTER REH
94ز ↔ z ; # ARABIC LETTER ZAIN
95س ↔ s ; # ARABIC LETTER SEEN
96ع ↔ ʿ ; # ARABIC LETTER AIN
97ـ → ; # ARABIC TATWEEL
98ف ↔ f ; # ARABIC LETTER FEH
99ق ↔ q ; # ARABIC LETTER QAF
100ک ↔ k $disambig ; # ARABIC LETTER KEHEH
101ك ↔ k ; # ARABIC LETTER KAF
102ل ↔ l ; # ARABIC LETTER LAM
103م ↔ m ; # ARABIC LETTER MEEM
104ن ↔ n ; # ARABIC LETTER NOON
105ه ↔ h ; # ARABIC LETTER HEH
106و ↔ w ; # ARABIC LETTER WAW
107ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA
108ي ↔ y ; # ARABIC LETTER YEH
109\u064B ↔ aⁿ ; # ARABIC FATHATAN
110\u064C ↔ uⁿ ; # ARABIC DAMMATAN
111\u064D ↔ iⁿ ; # ARABIC KASRATAN
112\u064E ↔ a ; # ARABIC FATHA
113\u064F ↔ u ; # ARABIC DAMMA
114\u0650 ↔ i ; # ARABIC KASRA
115\u0651 ↔   \u0303 ; # ARABIC SHADDA
116\u0652 ↔   \u030A ; # ARABIC SUKUN
117# special combining marks
118\u0653 ↔  \u0302 ; # ARABIC MADDAH ABOVE
119\u0654 ↔  \u0309 ; # ARABIC HAMZA ABOVE
120\u0655 ↔  \u0339 ; # ARABIC HAMZA BELOW
121# Some non-Arabic language (not in UNGEGN)
122پ ↔ p ; # ARABIC LETTER PEH
123چ ↔ c h $disambig ; # ARABIC LETTER TCHEH
124ڤ ↔ v ; # ARABIC LETTER VEH
125# ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
126# ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
127گ ↔ g ; # ARABIC LETTER GAF
128# fallbacks
129| s ← c } [eiy];
130| k ← c ;
131| i ← e ;
132| u ← o ;
133| ks ← x ;
134| n ← ‎ⁿ;
135:: (lower) ;
136::NFC (NFD);
137:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] );
138
139