1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html#License 3# 4# File: Arab_Latn.txt 5# Generated from CLDR 6# 7 8# Generally follows UNGEGN 9# http://www.eki.ee/wgrs/rom1_ar.pdf 10# Occasionally deviates in the direction of ISO 233 11# http://homepage.mac.com/sirbinks/pdf/Arabic.pdf 12# a) where required for disambiguation. 13# b) with underdot instead of cedilla for letter like SAD, 14# since those are explicitly in Unicode for transliteration. 15# c) with extra non-Arabic-language letters, like PEH 16# 17# Does *not* do assimilation of "al", nor hyphenation. 18# While it could be done, we need to determine whether a prefix "al" could 19# occur other than as the definite article (since no space is used). 20:: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ]] ; 21:: NFKD (NFC); 22$disambig = \u0331 ; 23$disambig2 = \u0330 ; 24$under = \u0323 ; 25$descender = ˌ; 26$notAbove = [[:^ccc=0:] & [:^ccc=230:]]; 27# non-letters 28[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR 29[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR 30٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR 31٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR 32# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate 33، ↔ ',' ; # ARABIC COMMA 34؛ ↔ ';' ; # ARABIC SEMICOLON 35؟ ↔ '?' ; # ARABIC QUESTION MARK 36٪ ↔ '%' ; # ARABIC PERCENT SIGN 37۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO 38۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE 39۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO 40۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE 41۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR 42۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE 43۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX 44۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN 45۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT 46۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE 47٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO 48١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE 49٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO 50٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE 51٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR 52٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE 53٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX 54٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN 55٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT 56٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE 57# letters 58# long vowels 59\u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF 60\u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW 61\u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH 62# longer items moved here to prevent masking 63ث ↔ t h $disambig ; # ARABIC LETTER THEH 64ذ ↔ d h $disambig ; # ARABIC LETTER THAL 65ش ↔ s h $disambig ; # ARABIC LETTER SHEEN 66ص ↔ s $under ; # ARABIC LETTER SAD 67ض ↔ d $under ; # ARABIC LETTER DAD 68ط ↔ t $under ; # ARABIC LETTER TAH 69ظ ↔ z $under ; # ARABIC LETTER ZAH 70غ ↔ g h $disambig ; # ARABIC LETTER GHAIN 71# WARNING: special case 72# ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ 73# so on the return, we have to skip over (but preserve) the half-ring below (or others like it) 74# ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS 75ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA 76ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA 77# non-Arabic language 78ژ ↔ z h $disambig ; # ARABIC LETTER JEH 79ڭ ↔ n $disambig g ; # ARABIC LETTER NG 80ۋ ↔ v $disambig ; # ARABIC LETTER VE 81ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH 82ښ ↔ s $descender; 83# Arabic language 84ء ↔ ʾ ; # ARABIC LETTER HAMZA 85ا ↔ a $under; # ARABIC LETTER ALEF 86ب ↔ b ; # ARABIC LETTER BEH 87ت ↔ t ; # ARABIC LETTER TEH 88ج ↔ j ; # ARABIC LETTER JEEM 89ح ↔ h $under ; # ARABIC LETTER HAH 90خ ↔ k h $disambig ; # ARABIC LETTER KHAH 91د ↔ d ; # ARABIC LETTER DAL 92ر ↔ r ; # ARABIC LETTER REH 93ز ↔ z ; # ARABIC LETTER ZAIN 94س ↔ s ; # ARABIC LETTER SEEN 95ع ↔ ʿ ; # ARABIC LETTER AIN 96ـ → ; # ARABIC TATWEEL 97ف ↔ f ; # ARABIC LETTER FEH 98ق ↔ q ; # ARABIC LETTER QAF 99ک ↔ k $disambig ; # ARABIC LETTER KEHEH 100ك ↔ k ; # ARABIC LETTER KAF 101ل ↔ l ; # ARABIC LETTER LAM 102م ↔ m ; # ARABIC LETTER MEEM 103ن ↔ n ; # ARABIC LETTER NOON 104ه ↔ h ; # ARABIC LETTER HEH 105و ↔ w ; # ARABIC LETTER WAW 106ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA 107ي ↔ y ; # ARABIC LETTER YEH 108\u064B ↔ aⁿ ; # ARABIC FATHATAN 109\u064C ↔ uⁿ ; # ARABIC DAMMATAN 110\u064D ↔ iⁿ ; # ARABIC KASRATAN 111\u064E ↔ a ; # ARABIC FATHA 112\u064F ↔ u ; # ARABIC DAMMA 113\u0650 ↔ i ; # ARABIC KASRA 114\u0651 ↔ \u0303 ; # ARABIC SHADDA 115\u0652 ↔ \u030A ; # ARABIC SUKUN 116# special combining marks 117\u0653 ↔ \u0302 ; # ARABIC MADDAH ABOVE 118\u0654 ↔ \u0309 ; # ARABIC HAMZA ABOVE 119\u0655 ↔ \u0339 ; # ARABIC HAMZA BELOW 120# Some non-Arabic language (not in UNGEGN) 121پ ↔ p ; # ARABIC LETTER PEH 122چ ↔ c h $disambig ; # ARABIC LETTER TCHEH 123ڤ ↔ v ; # ARABIC LETTER VEH 124# ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW 125# ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW 126گ ↔ g ; # ARABIC LETTER GAF 127# fallbacks 128| s ← c } [eiy]; 129| k ← c ; 130| i ← e ; 131| u ← o ; 132| ks ← x ; 133| n ← ⁿ; 134:: (lower) ; 135::NFC (NFD); 136:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] ); 137 138