• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
4# File: ar_ar_Latn_BGN.txt
5# Generated from CLDR
6#
7
8#
9########################################################################
10# BGN/PCGN 1956 System
11#
12# This system was adopted by the BGN in 1946 and by the PCGN
13# in 1956 and has been applied in the systematic romanization
14# of geographic names in Bahrain, Egypt, Iraq, Jordan,
15# Kuwait, Lebanon, Libya, Oman, Qatar, Saudi Arabia, Sudan,
16# Syria, Tunisia, the United Arab Emirates, and Yemen, all
17# of which has been covered by published BGN engineers.
18#
19# Originally prepared by Michael Everson <everson@evertype.com>
20########################################################################
21#
22# MINIMAL FILTER: Arabic-Latin
23#
24:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىي\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652٠١٢٣٤٥٦٧٨٩ٱ]] ;
25:: NFKD (NFC) ;
26#
27#
28########################################################################
29#
30########################################################################
31#
32# Define All Transformation Variables
33#
34########################################################################
35#
36$alef = ’;
37$ayin = ‘;
38$disambig =  \u0331 ;
39#
40#
41# Use this $wordBoundary until bug 2034 is fixed in ICU:
42# http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest
43#
44$wordBoundary =  [^[:L:][:M:][:N:]] ;
45#
46#
47########################################################################
48# non-letters
49[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
50[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
51٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
52٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
53#  ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
54، ↔ ',' ; # ARABIC COMMA
55؛ ↔ ';' ; # ARABIC SEMICOLON
56؟ ↔ '?' ; # ARABIC QUESTION MARK
57٪ ↔ '%' ; # ARABIC PERCENT SIGN
58۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
59۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
60۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
61۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
62۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
63۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
64۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
65۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
66۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
67۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
68٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO
69١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE
70٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO
71٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE
72٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR
73٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE
74٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX
75٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN
76٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT
77٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE
78#
79########################################################################
80#
81# Rules moved to front to avoid masking
82#
83########################################################################
84#
85########################################################################
86#
87# BGN Page 8 Rule 5
88#
89# The character sequences ت , كه , ته , and سه may be romanized t·h, k·h,
90# d·h, and s·h in order to differentiate those romanizations from the
91# digraphs th, kh, dh, and sh.
92#
93########################################################################
94#
95ته → t·h ; # ARABIC LETTER TEH + HEH
96كه → k·h ; # ARABIC LETTER KAF + HEH
97ده → d·h ; # ARABIC LETTER DAL + HEH
98سه → s·h ; # ARABIC LETTER SEEN + HEH
99#
100#
101########################################################################
102#
103# End Rule 5
104#
105########################################################################
106########################################################################
107#
108#
109# BGN Page 8 Rule 9
110#
111# Doubles consonant sounds are represented in Arabic script by placing
112# a shaddah ( \u0651 ) over a consonant character. In romanization the letter
113# should be doubled. [The remainder of this rule deals with the definite
114# article and is lexical.]
115#
116########################################################################
117#
118ب\u0651 → bb ; # ARABIC LETTER BEH + SHADDA
119ت\u0651 → tt ; # ARABIC LETTER TEH + SHADDA
120ث\u0651 → thth ; # ARABIC LETTER THEH + SHADDA
121ج\u0651 → jj ; # ARABIC LETTER JEEM + SHADDA
122ح\u0651 → ḥḥ ; # ARABIC LETTER HAH + SHADDA
123خ\u0651 → khkh ; # ARABIC LETTER KHAH + SHADDA
124د\u0651 → dd ; # ARABIC LETTER DAL + SHADDA
125ذ\u0651 → dhdh ; # ARABIC LETTER THAL + SHADDA
126ر\u0651 → rr ; # ARABIC LETTER REH + SHADDA
127ز\u0651 → zz ; # ARABIC LETTER ZAIN + SHADDA
128س\u0651 → ss ; # ARABIC LETTER SEEN + SHADDA
129ش\u0651 → shsh ; # ARABIC LETTER SHEEN + SHADDA
130ص\u0651 → ṣṣ ; # ARABIC LETTER SAD + SHADDA
131ض\u0651 → ḍḍ ; # ARABIC LETTER DAD + SHADDA
132ط\u0651 → ṭṭ ; # ARABIC LETTER TAH + SHADDA
133ظ\u0651 → ẓẓ ; # ARABIC LETTER ZAH + SHADDA
134ع\u0651 → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA
135غ\u0651 → ghgh ; # ARABIC LETTER GHAIN + SHADDA
136ف\u0651 → ff ; # ARABIC LETTER FEH + SHADDA
137ق\u0651 → qq ; # ARABIC LETTER QAF + SHADDA
138ك\u0651 → kk ; # ARABIC LETTER KAF + SHADDA
139ل\u0651 → ll ; # ARABIC LETTER LAM + SHADDA
140م\u0651 → mm ; # ARABIC LETTER MEEM + SHADDA
141ن\u0651 → nn ; # ARABIC LETTER NOON + SHADDA
142ه\u0651 → hh ; # ARABIC LETTER HEH + SHADDA
143و\u0651 → ww ; # ARABIC LETTER WAW + SHADDA
144ى\u0651 → yy ; # ARABIC LETTER YEH + SHADDA
145#
146#
147########################################################################
148#
149# End Rule 9
150#
151########################################################################
152#
153########################################################################
154#
155# Start of Transformations
156#
157########################################################################
158#
159$wordBoundary{ء →  ; # ARABIC LETTER HAMZA
160ء → $alef ; # ARABIC LETTER HAMZA
161$wordBoundary{ا →  ; # ARABIC LETTER ALEF
162ٱ → $alef ; # ARABIC LETTER ALEF WASLA
163$wordBoundary{آ → ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE
164آ → $alef ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE
165ب → b ; # ARABIC LETTER BEH
166ت → t ; # ARABIC LETTER TEH
167ة → h ; # ARABIC LETTER TEH MARBUTA
168ث → th ; # ARABIC LETTER THEH
169ج → j ; # ARABIC LETTER JEEM
170ح → ḩ ; # ARABIC LETTER HAH
171خ → kh ; # ARABIC LETTER KHAH
172د → d ; # ARABIC LETTER DAL
173ذ → dh ; # ARABIC LETTER THAL
174ر → r ; # ARABIC LETTER REH
175ز → z ; # ARABIC LETTER ZAIN
176س → s ; # ARABIC LETTER SEEN
177ش → sh ; # ARABIC LETTER SHEEN
178ص → ş ; # ARABIC LETTER SAD
179ض → ḑ ; # ARABIC LETTER DAD
180ط → ţ ; # ARABIC LETTER TAH
181ظ → z\u0327 ; # ARABIC LETTER ZAH
182ع → $ayin ; # ARABIC LETTER AIN
183غ → gh ; # ARABIC LETTER GHAIN
184ف → f ; # ARABIC LETTER FEH
185ق → q ; # ARABIC LETTER QAF
186ک ↔ k $disambig ; # ARABIC LETTER KEHEH
187ك ↔ k ; # ARABIC LETTER KAF
188ل → l ; # ARABIC LETTER LAM
189م → m ; # ARABIC LETTER MEEM
190ن → n ; # ARABIC LETTER NOON
191ه → h ; # ARABIC LETTER HEH
192و → w ; # ARABIC LETTER WAW
193ى → y ; # ARABIC LETTER YEH
194\u064Eا → ā ; # ARABIC FATHA + ALEF
195\u064Eى → á ; # ARABIC FATHA + ALEF MAKSURA
196\u064Eي\u0652 → ay ; # ARABIC FATHA + YEH + SUKUN
197\u064Eو\u0652 → aw ; # ARABIC FATHA + WAW + SUKUN
198\u064E → a ; # ARABIC FATHA
199\u0650ي → ī ; # ARABIC KASRA + YEH
200\u0650 → i ; # ARABIC KASRA
201\u064Fو → ū ; # ARABIC DAMMA + WAW
202\u064F → u ; # ARABIC DAMMA
203\u0652 →  ; # ARABIC SUKUN
204\u064B → aⁿ ; # ARABIC FATHATAN
205\u064D → iⁿ ; # ARABIC KASRATAN
206\u064C → uⁿ ; # ARABIC DAMMATAN
207::NFC (NFD) ;
208#
209#
210########################################################################
211
212