1from __future__ import ( 2 print_function, division, absolute_import, unicode_literals) 3from fontTools.misc.py23 import * 4 5from fontTools import unicodedata 6 7import pytest 8 9 10def test_script(): 11 assert unicodedata.script("a") == "Latn" 12 assert unicodedata.script(unichr(0)) == "Zyyy" 13 assert unicodedata.script(unichr(0x0378)) == "Zzzz" 14 assert unicodedata.script(unichr(0x10FFFF)) == "Zzzz" 15 16 # these were randomly sampled, one character per script 17 assert unicodedata.script(unichr(0x1E918)) == 'Adlm' 18 assert unicodedata.script(unichr(0x1170D)) == 'Ahom' 19 assert unicodedata.script(unichr(0x145A0)) == 'Hluw' 20 assert unicodedata.script(unichr(0x0607)) == 'Arab' 21 assert unicodedata.script(unichr(0x056C)) == 'Armn' 22 assert unicodedata.script(unichr(0x10B27)) == 'Avst' 23 assert unicodedata.script(unichr(0x1B41)) == 'Bali' 24 assert unicodedata.script(unichr(0x168AD)) == 'Bamu' 25 assert unicodedata.script(unichr(0x16ADD)) == 'Bass' 26 assert unicodedata.script(unichr(0x1BE5)) == 'Batk' 27 assert unicodedata.script(unichr(0x09F3)) == 'Beng' 28 assert unicodedata.script(unichr(0x11C5B)) == 'Bhks' 29 assert unicodedata.script(unichr(0x3126)) == 'Bopo' 30 assert unicodedata.script(unichr(0x1103B)) == 'Brah' 31 assert unicodedata.script(unichr(0x2849)) == 'Brai' 32 assert unicodedata.script(unichr(0x1A0A)) == 'Bugi' 33 assert unicodedata.script(unichr(0x174E)) == 'Buhd' 34 assert unicodedata.script(unichr(0x18EE)) == 'Cans' 35 assert unicodedata.script(unichr(0x102B7)) == 'Cari' 36 assert unicodedata.script(unichr(0x1053D)) == 'Aghb' 37 assert unicodedata.script(unichr(0x11123)) == 'Cakm' 38 assert unicodedata.script(unichr(0xAA1F)) == 'Cham' 39 assert unicodedata.script(unichr(0xAB95)) == 'Cher' 40 assert unicodedata.script(unichr(0x1F0C7)) == 'Zyyy' 41 assert unicodedata.script(unichr(0x2C85)) == 'Copt' 42 assert unicodedata.script(unichr(0x12014)) == 'Xsux' 43 assert unicodedata.script(unichr(0x1082E)) == 'Cprt' 44 assert unicodedata.script(unichr(0xA686)) == 'Cyrl' 45 assert unicodedata.script(unichr(0x10417)) == 'Dsrt' 46 assert unicodedata.script(unichr(0x093E)) == 'Deva' 47 assert unicodedata.script(unichr(0x1BC4B)) == 'Dupl' 48 assert unicodedata.script(unichr(0x1310C)) == 'Egyp' 49 assert unicodedata.script(unichr(0x1051C)) == 'Elba' 50 assert unicodedata.script(unichr(0x2DA6)) == 'Ethi' 51 assert unicodedata.script(unichr(0x10AD)) == 'Geor' 52 assert unicodedata.script(unichr(0x2C52)) == 'Glag' 53 assert unicodedata.script(unichr(0x10343)) == 'Goth' 54 assert unicodedata.script(unichr(0x11371)) == 'Gran' 55 assert unicodedata.script(unichr(0x03D0)) == 'Grek' 56 assert unicodedata.script(unichr(0x0AAA)) == 'Gujr' 57 assert unicodedata.script(unichr(0x0A4C)) == 'Guru' 58 assert unicodedata.script(unichr(0x23C9F)) == 'Hani' 59 assert unicodedata.script(unichr(0xC259)) == 'Hang' 60 assert unicodedata.script(unichr(0x1722)) == 'Hano' 61 assert unicodedata.script(unichr(0x108F5)) == 'Hatr' 62 assert unicodedata.script(unichr(0x05C2)) == 'Hebr' 63 assert unicodedata.script(unichr(0x1B072)) == 'Hira' 64 assert unicodedata.script(unichr(0x10847)) == 'Armi' 65 assert unicodedata.script(unichr(0x033A)) == 'Zinh' 66 assert unicodedata.script(unichr(0x10B66)) == 'Phli' 67 assert unicodedata.script(unichr(0x10B4B)) == 'Prti' 68 assert unicodedata.script(unichr(0xA98A)) == 'Java' 69 assert unicodedata.script(unichr(0x110B2)) == 'Kthi' 70 assert unicodedata.script(unichr(0x0CC6)) == 'Knda' 71 assert unicodedata.script(unichr(0x3337)) == 'Kana' 72 assert unicodedata.script(unichr(0xA915)) == 'Kali' 73 assert unicodedata.script(unichr(0x10A2E)) == 'Khar' 74 assert unicodedata.script(unichr(0x17AA)) == 'Khmr' 75 assert unicodedata.script(unichr(0x11225)) == 'Khoj' 76 assert unicodedata.script(unichr(0x112B6)) == 'Sind' 77 assert unicodedata.script(unichr(0x0ED7)) == 'Laoo' 78 assert unicodedata.script(unichr(0xAB3C)) == 'Latn' 79 assert unicodedata.script(unichr(0x1C48)) == 'Lepc' 80 assert unicodedata.script(unichr(0x1923)) == 'Limb' 81 assert unicodedata.script(unichr(0x1071D)) == 'Lina' 82 assert unicodedata.script(unichr(0x100EC)) == 'Linb' 83 assert unicodedata.script(unichr(0xA4E9)) == 'Lisu' 84 assert unicodedata.script(unichr(0x10284)) == 'Lyci' 85 assert unicodedata.script(unichr(0x10926)) == 'Lydi' 86 assert unicodedata.script(unichr(0x11161)) == 'Mahj' 87 assert unicodedata.script(unichr(0x0D56)) == 'Mlym' 88 assert unicodedata.script(unichr(0x0856)) == 'Mand' 89 assert unicodedata.script(unichr(0x10AF0)) == 'Mani' 90 assert unicodedata.script(unichr(0x11CB0)) == 'Marc' 91 assert unicodedata.script(unichr(0x11D28)) == 'Gonm' 92 assert unicodedata.script(unichr(0xABDD)) == 'Mtei' 93 assert unicodedata.script(unichr(0x1E897)) == 'Mend' 94 assert unicodedata.script(unichr(0x109B0)) == 'Merc' 95 assert unicodedata.script(unichr(0x10993)) == 'Mero' 96 assert unicodedata.script(unichr(0x16F5D)) == 'Plrd' 97 assert unicodedata.script(unichr(0x1160B)) == 'Modi' 98 assert unicodedata.script(unichr(0x18A8)) == 'Mong' 99 assert unicodedata.script(unichr(0x16A48)) == 'Mroo' 100 assert unicodedata.script(unichr(0x1128C)) == 'Mult' 101 assert unicodedata.script(unichr(0x105B)) == 'Mymr' 102 assert unicodedata.script(unichr(0x108AF)) == 'Nbat' 103 assert unicodedata.script(unichr(0x19B3)) == 'Talu' 104 assert unicodedata.script(unichr(0x1143D)) == 'Newa' 105 assert unicodedata.script(unichr(0x07F4)) == 'Nkoo' 106 assert unicodedata.script(unichr(0x1B192)) == 'Nshu' 107 assert unicodedata.script(unichr(0x169C)) == 'Ogam' 108 assert unicodedata.script(unichr(0x1C56)) == 'Olck' 109 assert unicodedata.script(unichr(0x10CE9)) == 'Hung' 110 assert unicodedata.script(unichr(0x10316)) == 'Ital' 111 assert unicodedata.script(unichr(0x10A93)) == 'Narb' 112 assert unicodedata.script(unichr(0x1035A)) == 'Perm' 113 assert unicodedata.script(unichr(0x103D5)) == 'Xpeo' 114 assert unicodedata.script(unichr(0x10A65)) == 'Sarb' 115 assert unicodedata.script(unichr(0x10C09)) == 'Orkh' 116 assert unicodedata.script(unichr(0x0B60)) == 'Orya' 117 assert unicodedata.script(unichr(0x104CF)) == 'Osge' 118 assert unicodedata.script(unichr(0x104A8)) == 'Osma' 119 assert unicodedata.script(unichr(0x16B12)) == 'Hmng' 120 assert unicodedata.script(unichr(0x10879)) == 'Palm' 121 assert unicodedata.script(unichr(0x11AF1)) == 'Pauc' 122 assert unicodedata.script(unichr(0xA869)) == 'Phag' 123 assert unicodedata.script(unichr(0x10909)) == 'Phnx' 124 assert unicodedata.script(unichr(0x10B81)) == 'Phlp' 125 assert unicodedata.script(unichr(0xA941)) == 'Rjng' 126 assert unicodedata.script(unichr(0x16C3)) == 'Runr' 127 assert unicodedata.script(unichr(0x0814)) == 'Samr' 128 assert unicodedata.script(unichr(0xA88C)) == 'Saur' 129 assert unicodedata.script(unichr(0x111C8)) == 'Shrd' 130 assert unicodedata.script(unichr(0x1045F)) == 'Shaw' 131 assert unicodedata.script(unichr(0x115AD)) == 'Sidd' 132 assert unicodedata.script(unichr(0x1D8C0)) == 'Sgnw' 133 assert unicodedata.script(unichr(0x0DB9)) == 'Sinh' 134 assert unicodedata.script(unichr(0x110F9)) == 'Sora' 135 assert unicodedata.script(unichr(0x11A60)) == 'Soyo' 136 assert unicodedata.script(unichr(0x1B94)) == 'Sund' 137 assert unicodedata.script(unichr(0xA81F)) == 'Sylo' 138 assert unicodedata.script(unichr(0x0740)) == 'Syrc' 139 assert unicodedata.script(unichr(0x1714)) == 'Tglg' 140 assert unicodedata.script(unichr(0x1761)) == 'Tagb' 141 assert unicodedata.script(unichr(0x1965)) == 'Tale' 142 assert unicodedata.script(unichr(0x1A32)) == 'Lana' 143 assert unicodedata.script(unichr(0xAA86)) == 'Tavt' 144 assert unicodedata.script(unichr(0x116A5)) == 'Takr' 145 assert unicodedata.script(unichr(0x0B8E)) == 'Taml' 146 assert unicodedata.script(unichr(0x1754D)) == 'Tang' 147 assert unicodedata.script(unichr(0x0C40)) == 'Telu' 148 assert unicodedata.script(unichr(0x07A4)) == 'Thaa' 149 assert unicodedata.script(unichr(0x0E42)) == 'Thai' 150 assert unicodedata.script(unichr(0x0F09)) == 'Tibt' 151 assert unicodedata.script(unichr(0x2D3A)) == 'Tfng' 152 assert unicodedata.script(unichr(0x114B0)) == 'Tirh' 153 assert unicodedata.script(unichr(0x1038B)) == 'Ugar' 154 assert unicodedata.script(unichr(0xA585)) == 'Vaii' 155 assert unicodedata.script(unichr(0x118CF)) == 'Wara' 156 assert unicodedata.script(unichr(0xA066)) == 'Yiii' 157 assert unicodedata.script(unichr(0x11A31)) == 'Zanb' 158 159 160def test_script_extension(): 161 assert unicodedata.script_extension("a") == {"Latn"} 162 assert unicodedata.script_extension(unichr(0)) == {"Zyyy"} 163 assert unicodedata.script_extension(unichr(0x0378)) == {"Zzzz"} 164 assert unicodedata.script_extension(unichr(0x10FFFF)) == {"Zzzz"} 165 166 assert unicodedata.script_extension("\u0660") == {'Arab', 'Thaa'} 167 assert unicodedata.script_extension("\u0964") == { 168 'Beng', 'Deva', 'Dogr', 'Gong', 'Gran', 'Gujr', 'Guru', 'Knda', 169 'Mahj', 'Mlym', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', 170 'Telu', 'Tirh'} 171 172 173def test_script_name(): 174 assert unicodedata.script_name("Latn") == "Latin" 175 assert unicodedata.script_name("Zyyy") == "Common" 176 assert unicodedata.script_name("Zzzz") == "Unknown" 177 # underscores in long names are replaced by spaces 178 assert unicodedata.script_name("Egyp") == "Egyptian Hieroglyphs" 179 180 with pytest.raises(KeyError): 181 unicodedata.script_name("QQQQ") 182 assert unicodedata.script_name("QQQQ", default="Unknown") 183 184 185def test_script_code(): 186 assert unicodedata.script_code("Latin") == "Latn" 187 assert unicodedata.script_code("Common") == "Zyyy" 188 assert unicodedata.script_code("Unknown") == "Zzzz" 189 # case, whitespace, underscores and hyphens are ignored 190 assert unicodedata.script_code("Egyptian Hieroglyphs") == "Egyp" 191 assert unicodedata.script_code("Egyptian_Hieroglyphs") == "Egyp" 192 assert unicodedata.script_code("egyptianhieroglyphs") == "Egyp" 193 assert unicodedata.script_code("Egyptian-Hieroglyphs") == "Egyp" 194 195 with pytest.raises(KeyError): 196 unicodedata.script_code("Does not exist") 197 assert unicodedata.script_code("Does not exist", default="Zzzz") == "Zzzz" 198 199 200def test_block(): 201 assert unicodedata.block("\x00") == "Basic Latin" 202 assert unicodedata.block("\x7F") == "Basic Latin" 203 assert unicodedata.block("\x80") == "Latin-1 Supplement" 204 assert unicodedata.block("\u1c90") == "Georgian Extended" 205 assert unicodedata.block("\u0870") == "No_Block" 206 207 208def test_ot_tags_from_script(): 209 # simple 210 assert unicodedata.ot_tags_from_script("Latn") == ["latn"] 211 # script mapped to multiple new and old script tags 212 assert unicodedata.ot_tags_from_script("Deva") == ["dev2", "deva"] 213 # exceptions 214 assert unicodedata.ot_tags_from_script("Hira") == ["kana"] 215 # special script codes map to DFLT 216 assert unicodedata.ot_tags_from_script("Zinh") == ["DFLT"] 217 assert unicodedata.ot_tags_from_script("Zyyy") == ["DFLT"] 218 assert unicodedata.ot_tags_from_script("Zzzz") == ["DFLT"] 219 # this is invalid or unknown 220 assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"] 221 222 223def test_ot_tag_to_script(): 224 assert unicodedata.ot_tag_to_script("latn") == "Latn" 225 assert unicodedata.ot_tag_to_script("kana") == "Kana" 226 assert unicodedata.ot_tag_to_script("DFLT") == None 227 assert unicodedata.ot_tag_to_script("aaaa") == None 228 assert unicodedata.ot_tag_to_script("beng") == "Beng" 229 assert unicodedata.ot_tag_to_script("bng2") == "Beng" 230 assert unicodedata.ot_tag_to_script("dev2") == "Deva" 231 assert unicodedata.ot_tag_to_script("gjr2") == "Gujr" 232 assert unicodedata.ot_tag_to_script("yi ") == "Yiii" 233 assert unicodedata.ot_tag_to_script("nko ") == "Nkoo" 234 assert unicodedata.ot_tag_to_script("vai ") == "Vaii" 235 assert unicodedata.ot_tag_to_script("lao ") == "Laoo" 236 assert unicodedata.ot_tag_to_script("yi") == "Yiii" 237 238 for invalid_value in ("", " ", "z zz", "zzzzz"): 239 with pytest.raises(ValueError, match="invalid OpenType tag"): 240 unicodedata.ot_tag_to_script(invalid_value) 241 242 243def test_script_horizontal_direction(): 244 assert unicodedata.script_horizontal_direction("Latn") == "LTR" 245 assert unicodedata.script_horizontal_direction("Arab") == "RTL" 246 assert unicodedata.script_horizontal_direction("Thaa") == "RTL" 247 248 with pytest.raises(KeyError): 249 unicodedata.script_horizontal_direction("Azzz") 250 assert unicodedata.script_horizontal_direction("Azzz", 251 default="LTR") == "LTR" 252 253 254if __name__ == "__main__": 255 import sys 256 sys.exit(pytest.main(sys.argv)) 257