1# Copyright (C) 2018 and later: Unicode, Inc. and others. 2# License & terms of use: http://www.unicode.org/copyright.html 3 4import io as pyio 5import json 6import os 7import unittest 8 9from .. import InFile 10from ..comment_stripper import CommentStripper 11from ..filtration import Filter 12 13EXAMPLE_FILE_STEMS = [ 14 "af_NA", 15 "af_VARIANT", 16 "af_ZA_VARIANT", 17 "af_ZA", 18 "af", 19 "ar", 20 "ar_SA", 21 "ars", 22 "bs_BA", 23 "bs_Cyrl_BA", 24 "bs_Cyrl", 25 "bs_Latn_BA", 26 "bs_Latn", 27 "bs", 28 "en_001", 29 "en_150", 30 "en_DE", 31 "en_GB", 32 "en_US", 33 "root", 34 "sr_BA", 35 "sr_CS", 36 "sr_Cyrl_BA", 37 "sr_Cyrl_CS", 38 "sr_Cyrl_ME", 39 "sr_Cyrl", 40 "sr_Latn_BA", 41 "sr_Latn_CS", 42 "sr_Latn_ME_VARIANT", 43 "sr_Latn_ME", 44 "sr_Latn", 45 "sr_ME", 46 "sr", 47 "vai_Latn_LR", 48 "vai_Latn", 49 "vai_LR", 50 "vai_Vaii_LR", 51 "vai_Vaii", 52 "vai", 53 "yue", 54 "zh_CN", 55 "zh_Hans_CN", 56 "zh_Hans_HK", 57 "zh_Hans_MO", 58 "zh_Hans_SG", 59 "zh_Hans", 60 "zh_Hant_HK", 61 "zh_Hant_MO", 62 "zh_Hant_TW", 63 "zh_Hant", 64 "zh_HK", 65 "zh_MO", 66 "zh_SG", 67 "zh_TW", 68 "zh" 69] 70 71 72class TestIO(object): 73 def __init__(self): 74 pass 75 76 def read_locale_deps(self, tree): 77 if tree not in ("brkitr", "locales", "rbnf"): 78 return None 79 with pyio.open(os.path.join( 80 os.path.dirname(__file__), 81 "sample_data", 82 tree, 83 "LOCALE_DEPS.json" 84 ), "r", encoding="utf-8-sig") as f: 85 return json.load(CommentStripper(f)) 86 87 88class FiltrationTest(unittest.TestCase): 89 90 def test_exclude(self): 91 self._check_filter(Filter.create_from_json({ 92 "filterType": "exclude" 93 }, TestIO()), [ 94 ]) 95 96 def test_default_whitelist(self): 97 self._check_filter(Filter.create_from_json({ 98 "whitelist": [ 99 "ars", 100 "zh_Hans" 101 ] 102 }, TestIO()), [ 103 "ars", 104 "zh_Hans" 105 ]) 106 107 def test_default_blacklist(self): 108 expected_matches = set(EXAMPLE_FILE_STEMS) 109 expected_matches.remove("ars") 110 expected_matches.remove("zh_Hans") 111 self._check_filter(Filter.create_from_json({ 112 "blacklist": [ 113 "ars", 114 "zh_Hans" 115 ] 116 }, TestIO()), expected_matches) 117 118 def test_language_whitelist(self): 119 self._check_filter(Filter.create_from_json({ 120 "filterType": "language", 121 "whitelist": [ 122 "af", 123 "bs" 124 ] 125 }, TestIO()), [ 126 "root", 127 "af_NA", 128 "af_VARIANT", 129 "af_ZA_VARIANT", 130 "af_ZA", 131 "af", 132 "bs_BA", 133 "bs_Cyrl_BA", 134 "bs_Cyrl", 135 "bs_Latn_BA", 136 "bs_Latn", 137 "bs" 138 ]) 139 140 def test_language_blacklist(self): 141 expected_matches = set(EXAMPLE_FILE_STEMS) 142 expected_matches.remove("af_NA") 143 expected_matches.remove("af_VARIANT") 144 expected_matches.remove("af_ZA_VARIANT") 145 expected_matches.remove("af_ZA") 146 expected_matches.remove("af") 147 self._check_filter(Filter.create_from_json({ 148 "filterType": "language", 149 "blacklist": [ 150 "af" 151 ] 152 }, TestIO()), expected_matches) 153 154 def test_regex_whitelist(self): 155 self._check_filter(Filter.create_from_json({ 156 "filterType": "regex", 157 "whitelist": [ 158 r"^ar.*$", 159 r"^zh$" 160 ] 161 }, TestIO()), [ 162 "ar", 163 "ar_SA", 164 "ars", 165 "zh" 166 ]) 167 168 def test_regex_blacklist(self): 169 expected_matches = set(EXAMPLE_FILE_STEMS) 170 expected_matches.remove("ar") 171 expected_matches.remove("ar_SA") 172 expected_matches.remove("ars") 173 expected_matches.remove("zh") 174 self._check_filter(Filter.create_from_json({ 175 "filterType": "regex", 176 "blacklist": [ 177 r"^ar.*$", 178 r"^zh$" 179 ] 180 }, TestIO()), expected_matches) 181 182 def test_locale_basic(self): 183 self._check_filter(Filter.create_from_json({ 184 "filterType": "locale", 185 "whitelist": [ 186 # Default scripts: 187 # sr => Cyrl 188 # vai => Vaii 189 # zh => Hans 190 "bs_BA", # is an alias to bs_Latn_BA 191 "en_DE", 192 "sr", # Language with no script 193 "vai_Latn", # Language with non-default script 194 "zh_Hans" # Language with default script 195 ] 196 }, TestIO()), [ 197 "root", 198 # bs: should include the full dependency tree of bs_BA 199 "bs_BA", 200 "bs_Latn_BA", 201 "bs_Latn", 202 "bs", 203 # en: should include the full dependency tree of en_DE 204 "en", 205 "en_DE", 206 "en_150", 207 "en_001", 208 # sr: include Cyrl, the default, but not Latn. 209 "sr", 210 "sr_BA", 211 "sr_CS", 212 "sr_Cyrl", 213 "sr_Cyrl_BA", 214 "sr_Cyrl_CS", 215 "sr_Cyrl_ME", 216 # vai: include Latn but NOT Vaii. 217 "vai_Latn", 218 "vai_Latn_LR", 219 # zh: include Hans but NOT Hant. 220 "zh", 221 "zh_CN", 222 "zh_SG", 223 "zh_Hans", 224 "zh_Hans_CN", 225 "zh_Hans_HK", 226 "zh_Hans_MO", 227 "zh_Hans_SG" 228 ]) 229 230 def test_locale_no_children(self): 231 self._check_filter(Filter.create_from_json({ 232 "filterType": "locale", 233 "includeChildren": False, 234 "whitelist": [ 235 # See comments in test_locale_basic. 236 "bs_BA", 237 "en_DE", 238 "sr", 239 "vai_Latn", 240 "zh_Hans" 241 ] 242 }, TestIO()), [ 243 "root", 244 "bs_BA", 245 "bs_Latn_BA", 246 "bs_Latn", 247 "bs", 248 "en", 249 "en_DE", 250 "en_150", 251 "en_001", 252 "sr", 253 "vai_Latn", 254 "zh", 255 "zh_Hans", 256 ]) 257 258 def test_locale_include_scripts(self): 259 self._check_filter(Filter.create_from_json({ 260 "filterType": "locale", 261 "includeScripts": True, 262 "whitelist": [ 263 # See comments in test_locale_basic. 264 "bs_BA", 265 "en_DE", 266 "sr", 267 "vai_Latn", 268 "zh_Hans" 269 ] 270 }, TestIO()), [ 271 "root", 272 # bs: includeScripts only works for language-only (without region) 273 "bs_BA", 274 "bs_Latn_BA", 275 "bs_Latn", 276 "bs", 277 # en: should include the full dependency tree of en_DE 278 "en", 279 "en_DE", 280 "en_150", 281 "en_001", 282 # sr: include Latn, since no particular script was requested. 283 "sr_BA", 284 "sr_CS", 285 "sr_Cyrl_BA", 286 "sr_Cyrl_CS", 287 "sr_Cyrl_ME", 288 "sr_Cyrl", 289 "sr_Latn_BA", 290 "sr_Latn_CS", 291 "sr_Latn_ME_VARIANT", 292 "sr_Latn_ME", 293 "sr_Latn", 294 "sr_ME", 295 "sr", 296 # vai: do NOT include Vaii; the script was explicitly requested. 297 "vai_Latn_LR", 298 "vai_Latn", 299 # zh: do NOT include Hant; the script was explicitly requested. 300 "zh_CN", 301 "zh_SG", 302 "zh_Hans_CN", 303 "zh_Hans_HK", 304 "zh_Hans_MO", 305 "zh_Hans_SG", 306 "zh_Hans", 307 "zh" 308 ]) 309 310 def test_locale_no_children_include_scripts(self): 311 self._check_filter(Filter.create_from_json({ 312 "filterType": "locale", 313 "includeChildren": False, 314 "includeScripts": True, 315 "whitelist": [ 316 # See comments in test_locale_basic. 317 "bs_BA", 318 "en_DE", 319 "sr", 320 "vai_Latn", 321 "zh_Hans" 322 ] 323 }, TestIO()), [ 324 "root", 325 # bs: includeScripts only works for language-only (without region) 326 "bs_BA", 327 "bs_Latn_BA", 328 "bs_Latn", 329 "bs", 330 # en: should include the full dependency tree of en_DE 331 "en", 332 "en_DE", 333 "en_150", 334 "en_001", 335 # sr: include Cyrl and Latn but no other children 336 "sr", 337 "sr_Cyrl", 338 "sr_Latn", 339 # vai: include only the requested script 340 "vai_Latn", 341 # zh: include only the requested script 342 "zh", 343 "zh_Hans", 344 ]) 345 346 def test_union(self): 347 self._check_filter(Filter.create_from_json({ 348 "filterType": "union", 349 "unionOf": [ 350 { 351 "whitelist": [ 352 "ars", 353 "zh_Hans" 354 ] 355 }, 356 { 357 "filterType": "regex", 358 "whitelist": [ 359 r"^bs.*$", 360 r"^zh$" 361 ] 362 } 363 ] 364 }, TestIO()), [ 365 "ars", 366 "zh_Hans", 367 "bs_BA", 368 "bs_Cyrl_BA", 369 "bs_Cyrl", 370 "bs_Latn_BA", 371 "bs_Latn", 372 "bs", 373 "zh" 374 ]) 375 376 def test_hk_deps_normal(self): 377 self._check_filter(Filter.create_from_json({ 378 "filterType": "locale", 379 "whitelist": [ 380 "zh_HK" 381 ] 382 }, TestIO()), [ 383 "root", 384 "zh_Hant", 385 "zh_Hant_HK", 386 "zh_HK", 387 ]) 388 389 def test_hk_deps_rbnf(self): 390 self._check_filter(Filter.create_from_json({ 391 "filterType": "locale", 392 "whitelist": [ 393 "zh_HK" 394 ] 395 }, TestIO()), [ 396 "root", 397 "yue", 398 "zh_Hant_HK", 399 "zh_HK", 400 ], "rbnf") 401 402 def test_no_alias_parent_structure(self): 403 self._check_filter(Filter.create_from_json({ 404 "filterType": "locale", 405 "whitelist": [ 406 "zh_HK" 407 ] 408 }, TestIO()), [ 409 "root", 410 "zh_HK", 411 "zh", 412 ], "brkitr") 413 414 def _check_filter(self, filter, expected_matches, tree="locales"): 415 for file_stem in EXAMPLE_FILE_STEMS: 416 is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem))) 417 expected_match = file_stem in expected_matches 418 self.assertEqual(is_match, expected_match, file_stem) 419 420# Export the test for the runner 421suite = unittest.makeSuite(FiltrationTest) 422