######################################################################## # # MINIMAL FILTER: Persian-Latin # :: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهویيَُِّْ٠١٢٣٤٥٦٧٨٩پچژگی]] ; :: NFKD (NFC) ; # # ######################################################################## # ######################################################################## # # Define All Transformation Variables # ######################################################################## # $alef = ’; $ayin = ‘; $disambig = ̱ ; # # # Use this $wordBoundary until bug 2034 is fixed in ICU: # http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest # $wordBoundary = [^[:L:][:M:][:N:]] ; # # ######################################################################## # non-letters [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate ، ↔ ',' ; # ARABIC COMMA ؛ ↔ ';' ; # ARABIC SEMICOLON ؟ ↔ '?' ; # ARABIC QUESTION MARK ٪ ↔ '%' ; # ARABIC PERCENT SIGN ٠ ↔ 0 $disambig ; # ARABIC-INDIC DIGIT ZERO ١ ↔ 1 $disambig ; # ARABIC-INDIC DIGIT ONE ٢ ↔ 2 $disambig ; # ARABIC-INDIC DIGIT TWO ٣ ↔ 3 $disambig ; # ARABIC-INDIC DIGIT THREE ٤ ↔ 4 $disambig ; # ARABIC-INDIC DIGIT FOUR ٥ ↔ 5 $disambig ; # ARABIC-INDIC DIGIT FIVE ٦ ↔ 6 $disambig ; # ARABIC-INDIC DIGIT SIX ٧ ↔ 7 $disambig ; # ARABIC-INDIC DIGIT SEVEN ٨ ↔ 8 $disambig ; # ARABIC-INDIC DIGIT EIGHT ٩ ↔ 9 $disambig ; # ARABIC-INDIC DIGIT NINE ۰ ↔ 0 ; # EXTENDED ARABIC-INDIC DIGIT ZERO ۱ ↔ 1 ; # EXTENDED ARABIC-INDIC DIGIT ONE ۲ ↔ 2 ; # EXTENDED ARABIC-INDIC DIGIT TWO ۳ ↔ 3 ; # EXTENDED ARABIC-INDIC DIGIT THREE ۴ ↔ 4 ; # EXTENDED ARABIC-INDIC DIGIT FOUR ۵ ↔ 5 ; # EXTENDED ARABIC-INDIC DIGIT FIVE ۶ ↔ 6 ; # EXTENDED ARABIC-INDIC DIGIT SIX ۷ ↔ 7 ; # EXTENDED ARABIC-INDIC DIGIT SEVEN ۸ ↔ 8 ; # EXTENDED ARABIC-INDIC DIGIT EIGHT ۹ ↔ 9 ; # EXTENDED ARABIC-INDIC DIGIT NINE # ######################################################################## # # Rules moved to front to avoid masking # ######################################################################## # ######################################################################## # # BGN Page 89 Rule 4 # # The character sequences كه , زه , سه , and گه may be romanized k·h, z·h, # s·h, and g·h in order to differentiate those romanizations from the # digraphs kh, zh, sh, and gh. # ######################################################################## # كه → k·h ; # ARABIC LETTER KAF + HEH زه → z·h ; # ARABIC LETTER ZAIN + HEH سه → s·h ; # ARABIC LETTER SEEN + HEH گه → g·h ; # ARABIC LETTER GAF + HEH # # ######################################################################## # # End Rule 4 # ######################################################################## # ######################################################################## # # BGN Page 91 Rule 7 # # Doubles consonant sounds are represented in Arabic script by # placing a shaddah ( ّ ) over a consonant character. In romanization # the letter should be doubled. [The remainder of this rule deals with # the definite article and is lexical.] # ######################################################################## # بّ → bb ; # ARABIC LETTER BEH + SHADDA پّ → pp ; # ARABIC LETTER PEH + SHADDA تّ → tt ; # ARABIC LETTER TEH + SHADDA ثّ → s̄s̄ ; # ARABIC LETTER THEH + SHADDA جّ → jj ; # ARABIC LETTER JEEM + SHADDA چّ → chch ; # ARABIC LETTER TCHEH + SHADDA حّ → ḥḥ ; # ARABIC LETTER HAH + SHADDA خّ → khkh ; # ARABIC LETTER KHAH + SHADDA دّ → dd ; # ARABIC LETTER DAL + SHADDA ذّ → z̄z̄ ; # ARABIC LETTER THAL + SHADDA رّ → rr ; # ARABIC LETTER REH + SHADDA زّ → zz ; # ARABIC LETTER ZAIN + SHADDA ژّ → zhzh ; # ARABIC LETTER JEH + SHADDA سّ → ss ; # ARABIC LETTER SEEN + SHADDA شّ → shsh ; # ARABIC LETTER SHEEN + SHADDA صّ → ṣṣ ; # ARABIC LETTER SAD + SHADDA ضّ → ḍḍ ; # ARABIC LETTER DAD + SHADDA طّ → ṭṭ ; # ARABIC LETTER TAH + SHADDA ظّ → ẓẓ ; # ARABIC LETTER ZAH + SHADDA عّ → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA غّ → ghgh ; # ARABIC LETTER GHAIN + SHADDA فّ → ff ; # ARABIC LETTER FEH + SHADDA قّ → qq ; # ARABIC LETTER QAF + SHADDA كّ → kk ; # ARABIC LETTER KAF + SHADDA لّ → ll ; # ARABIC LETTER LAM + SHADDA مّ → mm ; # ARABIC LETTER MEEM + SHADDA نّ → nn ; # ARABIC LETTER NOON + SHADDA هّ → hh ; # ARABIC LETTER HEH + SHADDA وّ → ww ; # ARABIC LETTER WAW + SHADDA یّ → yy ; # ARABIC LETTER FARSI YEH + SHADDA # # ######################################################################## # # End Rule 7 # ######################################################################## # ######################################################################## # # Start of Transformations # ######################################################################## # $wordBoundary{ء → ; # ARABIC LETTER HAMZA ء → $alef ; # ARABIC LETTER HAMZA $wordBoundary{ا → ; # ARABIC LETTER ALEF آ → $alef ā ; # ARABIC FATHA ALEF WITH MADDA ABOVE ب → b ; # ARABIC LETTER BEH پ → p ; # ARABIC LETTER PEH ت → t ; # ARABIC LETTER TEH ة → h ; # ARABIC LETTER TEH MARBUTA ث → s̄ ; # ARABIC LETTER THEH ج → j ; # ARABIC LETTER JEEM چ → ch ; # ARABIC LETTER TCHEH ح → ḥ ; # ARABIC LETTER HAH خ → kh ; # ARABIC LETTER KHAH د → d ; # ARABIC LETTER DAL ذ → z̄ ; # ARABIC LETTER THAL ر → r ; # ARABIC LETTER REH ز → z ; # ARABIC LETTER ZAIN ژ → zh ; # ARABIC LETTER JEH س → s ; # ARABIC LETTER SEEN ش → sh ; # ARABIC LETTER SHEEN ص → ṣ ; # ARABIC LETTER SAD ض → ẕ ; # ARABIC LETTER DAD ط → ṭ ; # ARABIC LETTER TAH ظ → ẓ ; # ARABIC LETTER ZAH ع → $ayin ; # ARABIC LETTER AIN غ → gh ; # ARABIC LETTER GHAIN ف → f ; # ARABIC LETTER FEH ق → q ; # ARABIC LETTER QAF ک ↔ k ; # ARABIC LETTER KEHEH ك ↔ k $disambig ; # ARABIC LETTER KAF گ → g ; # ARABIC LETTER GAF ل → l ; # ARABIC LETTER LAM م → m ; # ARABIC LETTER MEEM ن → n ; # ARABIC LETTER NOON ه → h ; # ARABIC LETTER HEH و → v ; # ARABIC LETTER WAW ی → y ; # ARABIC LETTER FARSI YEH َا → ā ; # ARABIC FATHA + ALEF َی → á ; # ARABIC FATHA + FARSI YEH َوْ → ow ; # ARABIC FATHA + WAW + SUKUN َ → a ; # ARABIC FATHA ِي → ī ; # ARABIC KASRA + YEH ِ → e ; # ARABIC KASRA ُو → ū ; # ARABIC DAMMA + WAW ُ → o ; # ARABIC DAMMA ْ → ; # ARABIC SUKUN ::NFC (NFD) ; # # ######################################################################## ]]>