1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml 4# 5# File: Arab_Latn.txt 6# Generated from CLDR 7# 8 9# Generally follows UNGEGN 10# http://www.eki.ee/wgrs/rom1_ar.pdf 11# Occasionally deviates in the direction of ISO 233 12# http://homepage.mac.com/sirbinks/pdf/Arabic.pdf 13# a) where required for disambiguation. 14# b) with underdot instead of cedilla for letter like SAD, 15# since those are explicitly in Unicode for transliteration. 16# c) with extra non-Arabic-language letters, like PEH 17# 18# Does *not* do assimilation of "al", nor hyphenation. 19# While it could be done, we need to determine whether a prefix "al" could 20# occur other than as the definite article (since no space is used). 21:: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ]] ; 22:: NFKD (NFC); 23$disambig = \u0331 ; 24$disambig2 = \u0330 ; 25$under = \u0323 ; 26$descender = ˌ; 27$notAbove = [[:^ccc=0:] & [:^ccc=230:]]; 28# non-letters 29[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR 30[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR 31٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR 32٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR 33# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate 34، ↔ ',' ; # ARABIC COMMA 35؛ ↔ ';' ; # ARABIC SEMICOLON 36؟ ↔ '?' ; # ARABIC QUESTION MARK 37٪ ↔ '%' ; # ARABIC PERCENT SIGN 38۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO 39۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE 40۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO 41۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE 42۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR 43۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE 44۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX 45۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN 46۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT 47۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE 48٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO 49١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE 50٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO 51٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE 52٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR 53٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE 54٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX 55٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN 56٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT 57٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE 58# letters 59# long vowels 60\u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF 61\u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW 62\u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH 63# longer items moved here to prevent masking 64ث ↔ t h $disambig ; # ARABIC LETTER THEH 65ذ ↔ d h $disambig ; # ARABIC LETTER THAL 66ش ↔ s h $disambig ; # ARABIC LETTER SHEEN 67ص ↔ s $under ; # ARABIC LETTER SAD 68ض ↔ d $under ; # ARABIC LETTER DAD 69ط ↔ t $under ; # ARABIC LETTER TAH 70ظ ↔ z $under ; # ARABIC LETTER ZAH 71غ ↔ g h $disambig ; # ARABIC LETTER GHAIN 72# WARNING: special case 73# ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ 74# so on the return, we have to skip over (but preserve) the half-ring below (or others like it) 75# ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS 76ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA 77ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA 78# non-Arabic language 79ژ ↔ z h $disambig ; # ARABIC LETTER JEH 80ڭ ↔ n $disambig g ; # ARABIC LETTER NG 81ۋ ↔ v $disambig ; # ARABIC LETTER VE 82ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH 83ښ ↔ s $descender; 84# Arabic language 85ء ↔ ʾ ; # ARABIC LETTER HAMZA 86ا ↔ a $under; # ARABIC LETTER ALEF 87ب ↔ b ; # ARABIC LETTER BEH 88ت ↔ t ; # ARABIC LETTER TEH 89ج ↔ j ; # ARABIC LETTER JEEM 90ح ↔ h $under ; # ARABIC LETTER HAH 91خ ↔ k h $disambig ; # ARABIC LETTER KHAH 92د ↔ d ; # ARABIC LETTER DAL 93ر ↔ r ; # ARABIC LETTER REH 94ز ↔ z ; # ARABIC LETTER ZAIN 95س ↔ s ; # ARABIC LETTER SEEN 96ع ↔ ʿ ; # ARABIC LETTER AIN 97ـ → ; # ARABIC TATWEEL 98ف ↔ f ; # ARABIC LETTER FEH 99ق ↔ q ; # ARABIC LETTER QAF 100ک ↔ k $disambig ; # ARABIC LETTER KEHEH 101ك ↔ k ; # ARABIC LETTER KAF 102ل ↔ l ; # ARABIC LETTER LAM 103م ↔ m ; # ARABIC LETTER MEEM 104ن ↔ n ; # ARABIC LETTER NOON 105ه ↔ h ; # ARABIC LETTER HEH 106و ↔ w ; # ARABIC LETTER WAW 107ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA 108ي ↔ y ; # ARABIC LETTER YEH 109\u064B ↔ aⁿ ; # ARABIC FATHATAN 110\u064C ↔ uⁿ ; # ARABIC DAMMATAN 111\u064D ↔ iⁿ ; # ARABIC KASRATAN 112\u064E ↔ a ; # ARABIC FATHA 113\u064F ↔ u ; # ARABIC DAMMA 114\u0650 ↔ i ; # ARABIC KASRA 115\u0651 ↔ \u0303 ; # ARABIC SHADDA 116\u0652 ↔ \u030A ; # ARABIC SUKUN 117# special combining marks 118\u0653 ↔ \u0302 ; # ARABIC MADDAH ABOVE 119\u0654 ↔ \u0309 ; # ARABIC HAMZA ABOVE 120\u0655 ↔ \u0339 ; # ARABIC HAMZA BELOW 121# Some non-Arabic language (not in UNGEGN) 122پ ↔ p ; # ARABIC LETTER PEH 123چ ↔ c h $disambig ; # ARABIC LETTER TCHEH 124ڤ ↔ v ; # ARABIC LETTER VEH 125# ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW 126# ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW 127گ ↔ g ; # ARABIC LETTER GAF 128# fallbacks 129| s ← c } [eiy]; 130| k ← c ; 131| i ← e ; 132| u ← o ; 133| ks ← x ; 134| n ← ⁿ; 135:: (lower) ; 136::NFC (NFD); 137:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] ); 138 139