1# © 2016 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml 4# 5# File: sat_Olck_sat_FONIPA.txt 6# Generated from CLDR 7# 8 9# Santali (Ol Chiki) → Santali (International Phonetic Alphabet) 10# Output 11# ------ 12# m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː 13# p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ 14# s sː h 15# d\u0361ʒ 16# ɽ r 17# l lː 18# w wː w\u0303 w\u0303ː 19# 20# i iː ĩ ĩː u uː ũ ũː 21# e eː ẽ ẽː ə əː ə\u0303 ə\u0303ː o oː õ õː 22# ɛ ɛː ɛ\u0303 ɛ\u0303ː ɔ ɔː ɔ\u0303 ɔ\u0303ː 23# a aː ã ãː 24# References 25# ---------- 26# [1] Michael Everson: Final proposal to encode the Ol Chiki script 27# in the UCS. ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R, 28# September 21, 2005. http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf 29# 30# [2] George L. Campbell: Compendium of the World's Languages. 31# Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3. Taylor & Francis, 2000. 32# Pages 1454 to 1458. 33# Notes 34# ----- 35# According to [1] (page 3), ᱽ can only follow the four ejective 36# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become 37# ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d\u0361ʒ/, and ᱜᱽ /ɡ/. In online texts, however, 38# we have occasionally encountered ᱽ following non-ejective plosives, 39# for example after ᱯ /p/. These might possibly be typos. Our rules 40# try to be resilient and handle ᱯᱽ as /b/. 41# 42# According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal” 43# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually 44# ejective, not glottal). In online texts, however, we have frequently 45# encountered ᱼ following non-ejective consonants. 46$inword = [[:L:][:M:]]; 47# Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG. 48ᱹᱸ → ᱺ ; 49ᱸᱹ → ᱺ ; 50::null(); 51# To simplify the rules below, enforce a uniform ordering of marks. 52ᱻᱹ → ᱹᱻ ; 53ᱻᱸ → ᱸᱻ ; 54ᱻᱺ → ᱺᱻ ; 55ᱼᱹ → ᱹᱼ ; 56ᱼᱸ → ᱸᱼ ; 57ᱼᱺ → ᱺᱼ ; 58::null(); 59# Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating 60# long phonemes, presumably because the graphemes look similar in some fonts. 61# Since phaarkaa is used for voicing ejectives and plosives (which cannot 62# be lenghtened), we rewrite phaarkaa to relaa. 63[ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ; 64::null(); 65ᱚᱹᱻ → ɔː ; 66ᱚᱹ → ɔ ; 67ᱚᱸᱻ → ɔ\u0303ː ; 68ᱚᱸ → ɔ\u0303 ; 69ᱚᱺᱻ → ɔ\u0303ː ; 70ᱚᱺ → ɔ\u0303 ; 71ᱚᱻ → ɔː ; 72ᱚ → ɔ ; 73ᱛᱼ → t ; 74ᱛᱷ → tʰ ; 75ᱛᱽ → d ; 76$inword {ᱛ} → d ; 77ᱛ → t ; 78ᱜᱼ → kʼ ; 79ᱜᱷ → kʰ ; 80ᱜᱽ → ɡ ; 81$inword {ᱜ} → ɡ ; 82ᱜ → kʼ ; 83ᱝᱻ → ŋː ; 84ᱝ → ŋ ; 85ᱞᱻ → lː ; 86ᱞ → l ; 87ᱟᱹᱻ → əː ; 88ᱟᱹ → ə ; 89ᱟᱸᱻ → ãː ; 90ᱟᱸ → ã ; 91ᱟᱺᱻ → ə\u0303ː ; 92ᱟᱺ → ə\u0303 ; 93ᱟᱻ → aː ; 94ᱟ → a ; 95ᱠᱼ → k ; 96ᱠᱷ → kʰ ; 97ᱠᱽ → ɡ ; 98ᱠ → k ; 99ᱡᱼ → cʼ ; 100ᱡᱷ → cʰ ; 101ᱡᱽ → d\u0361ʒ ; 102$inword {ᱡ} → d\u0361ʒ ; 103ᱡ → cʼ ; 104ᱢᱻ → mː ; 105ᱢ → m ; 106# According to [1], ᱣ is sometimes /v/ and sometimes /w/. 107# TODO: Find out if there is a rule for this. 108ᱣᱸ → w\u0303 ; 109ᱣ → w ; 110ᱤᱹᱻ → iː ; 111ᱤᱹ → i ; 112ᱤᱸᱻ → ĩː ; 113ᱤᱸ → ĩ ; 114ᱤᱺᱻ → ĩː ; 115ᱤᱺ → ĩ ; 116ᱤᱻ → iː ; 117ᱤ → i ; 118ᱥᱻ → sː ; 119ᱥ → s ; 120# According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/. 121# TODO: Find out if there is a rule for this. 122ᱦ → h ; 123ᱧᱻ → ɲː ; 124ᱧ → ɲ ; 125ᱨᱻ → r ; 126ᱨ → r ; 127ᱩᱹᱻ → uː ; 128ᱩᱹ → u ; 129ᱩᱸᱻ → ũː ; 130ᱩᱸ → ũ ; 131ᱩᱺᱻ → ũː ; 132ᱩᱺ → ũ ; 133ᱩᱻ → uː ; 134ᱩ → u ; 135ᱪᱼ → c ; 136ᱪᱷ → cʰ ; 137ᱪᱽ → d\u0361ʒ ; 138ᱪ → c ; 139ᱫᱼ → tʼ ; 140ᱫᱷ → tʰ ; 141ᱫᱽ → d ; 142$inword {ᱫ} → d ; 143ᱫ → tʼ ; 144ᱬᱻ → ɳː ; 145ᱬ → ɳ ; 146# TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify. 147ᱭ → h ; 148ᱮᱹᱻ → ɛː ; 149ᱮᱹ → ɛ ; 150ᱮᱺᱻ → ɛ\u0303ː ; 151ᱮᱺ → ɛ\u0303 ; 152ᱮᱸᱻ → ẽː ; 153ᱮᱸ → ẽ ; 154ᱮᱻ → eː ; 155ᱮ → e ; 156ᱯᱼ → p ; 157ᱯᱷ → pʰ ; 158ᱯᱽ → b ; 159ᱯ → p ; 160ᱰᱷ → ɖʰ ; 161ᱰ → ɖ ; 162ᱱᱻ → nː ; 163ᱱ → n ; 164ᱲᱻ → ɽ ; 165ᱲ → ɽ ; 166ᱳᱸᱻ → õː ; 167ᱳᱸ → õ ; 168ᱳᱻ → oː ; 169ᱳ → o ; 170ᱴᱼ → ʈ ; 171ᱴᱷ → ʈʰ ; 172ᱴᱽ → ɖ ; 173ᱴ → ʈ ; 174ᱵᱼ → pʼ ; 175ᱵᱷ → bʰ ; 176ᱵᱽ → b ; 177$inword {ᱵ} → b ; 178ᱵ → pʼ ; 179ᱶᱻ → w\u0303ː ; 180ᱶ → w\u0303 ; 181 182