# Copyright (C) 2018 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html import io as pyio import json import os import unittest from .. import InFile from ..comment_stripper import CommentStripper from ..filtration import Filter EXAMPLE_FILE_STEMS = [ "af_NA", "af_VARIANT", "af_ZA_VARIANT", "af_ZA", "af", "ar", "ar_SA", "ars", "bs_BA", "bs_Cyrl_BA", "bs_Cyrl", "bs_Latn_BA", "bs_Latn", "bs", "en_001", "en_150", "en_DE", "en_GB", "en_US", "root", "sr_BA", "sr_CS", "sr_Cyrl_BA", "sr_Cyrl_CS", "sr_Cyrl_ME", "sr_Cyrl", "sr_Latn_BA", "sr_Latn_CS", "sr_Latn_ME_VARIANT", "sr_Latn_ME", "sr_Latn", "sr_ME", "sr", "vai_Latn_LR", "vai_Latn", "vai_LR", "vai_Vaii_LR", "vai_Vaii", "vai", "yue", "zh_CN", "zh_Hans_CN", "zh_Hans_HK", "zh_Hans_MO", "zh_Hans_SG", "zh_Hans", "zh_Hant_HK", "zh_Hant_MO", "zh_Hant_TW", "zh_Hant", "zh_HK", "zh_MO", "zh_SG", "zh_TW", "zh" ] class TestIO(object): def __init__(self): pass def read_locale_deps(self, tree): if tree not in ("brkitr", "locales", "rbnf"): return None with pyio.open(os.path.join( os.path.dirname(__file__), "sample_data", tree, "LOCALE_DEPS.json" ), "r", encoding="utf-8-sig") as f: return json.load(CommentStripper(f)) class FiltrationTest(unittest.TestCase): def test_exclude(self): self._check_filter(Filter.create_from_json({ "filterType": "exclude" }, TestIO()), [ ]) def test_default_whitelist(self): self._check_filter(Filter.create_from_json({ "whitelist": [ "ars", "zh_Hans" ] }, TestIO()), [ "ars", "zh_Hans" ]) def test_default_blacklist(self): expected_matches = set(EXAMPLE_FILE_STEMS) expected_matches.remove("ars") expected_matches.remove("zh_Hans") self._check_filter(Filter.create_from_json({ "blacklist": [ "ars", "zh_Hans" ] }, TestIO()), expected_matches) def test_language_whitelist(self): self._check_filter(Filter.create_from_json({ "filterType": "language", "whitelist": [ "af", "bs" ] }, TestIO()), [ "root", "af_NA", "af_VARIANT", "af_ZA_VARIANT", "af_ZA", "af", "bs_BA", "bs_Cyrl_BA", "bs_Cyrl", "bs_Latn_BA", "bs_Latn", "bs" ]) def test_language_blacklist(self): expected_matches = set(EXAMPLE_FILE_STEMS) expected_matches.remove("af_NA") expected_matches.remove("af_VARIANT") expected_matches.remove("af_ZA_VARIANT") expected_matches.remove("af_ZA") expected_matches.remove("af") self._check_filter(Filter.create_from_json({ "filterType": "language", "blacklist": [ "af" ] }, TestIO()), expected_matches) def test_regex_whitelist(self): self._check_filter(Filter.create_from_json({ "filterType": "regex", "whitelist": [ r"^ar.*$", r"^zh$" ] }, TestIO()), [ "ar", "ar_SA", "ars", "zh" ]) def test_regex_blacklist(self): expected_matches = set(EXAMPLE_FILE_STEMS) expected_matches.remove("ar") expected_matches.remove("ar_SA") expected_matches.remove("ars") expected_matches.remove("zh") self._check_filter(Filter.create_from_json({ "filterType": "regex", "blacklist": [ r"^ar.*$", r"^zh$" ] }, TestIO()), expected_matches) def test_locale_basic(self): self._check_filter(Filter.create_from_json({ "filterType": "locale", "whitelist": [ # Default scripts: # sr => Cyrl # vai => Vaii # zh => Hans "bs_BA", # is an alias to bs_Latn_BA "en_DE", "sr", # Language with no script "vai_Latn", # Language with non-default script "zh_Hans" # Language with default script ] }, TestIO()), [ "root", # bs: should include the full dependency tree of bs_BA "bs_BA", "bs_Latn_BA", "bs_Latn", "bs", # en: should include the full dependency tree of en_DE "en", "en_DE", "en_150", "en_001", # sr: include Cyrl, the default, but not Latn. "sr", "sr_BA", "sr_CS", "sr_Cyrl", "sr_Cyrl_BA", "sr_Cyrl_CS", "sr_Cyrl_ME", # vai: include Latn but NOT Vaii. "vai_Latn", "vai_Latn_LR", # zh: include Hans but NOT Hant. "zh", "zh_CN", "zh_SG", "zh_Hans", "zh_Hans_CN", "zh_Hans_HK", "zh_Hans_MO", "zh_Hans_SG" ]) def test_locale_no_children(self): self._check_filter(Filter.create_from_json({ "filterType": "locale", "includeChildren": False, "whitelist": [ # See comments in test_locale_basic. "bs_BA", "en_DE", "sr", "vai_Latn", "zh_Hans" ] }, TestIO()), [ "root", "bs_BA", "bs_Latn_BA", "bs_Latn", "bs", "en", "en_DE", "en_150", "en_001", "sr", "vai_Latn", "zh", "zh_Hans", ]) def test_locale_include_scripts(self): self._check_filter(Filter.create_from_json({ "filterType": "locale", "includeScripts": True, "whitelist": [ # See comments in test_locale_basic. "bs_BA", "en_DE", "sr", "vai_Latn", "zh_Hans" ] }, TestIO()), [ "root", # bs: includeScripts only works for language-only (without region) "bs_BA", "bs_Latn_BA", "bs_Latn", "bs", # en: should include the full dependency tree of en_DE "en", "en_DE", "en_150", "en_001", # sr: include Latn, since no particular script was requested. "sr_BA", "sr_CS", "sr_Cyrl_BA", "sr_Cyrl_CS", "sr_Cyrl_ME", "sr_Cyrl", "sr_Latn_BA", "sr_Latn_CS", "sr_Latn_ME_VARIANT", "sr_Latn_ME", "sr_Latn", "sr_ME", "sr", # vai: do NOT include Vaii; the script was explicitly requested. "vai_Latn_LR", "vai_Latn", # zh: do NOT include Hant; the script was explicitly requested. "zh_CN", "zh_SG", "zh_Hans_CN", "zh_Hans_HK", "zh_Hans_MO", "zh_Hans_SG", "zh_Hans", "zh" ]) def test_locale_no_children_include_scripts(self): self._check_filter(Filter.create_from_json({ "filterType": "locale", "includeChildren": False, "includeScripts": True, "whitelist": [ # See comments in test_locale_basic. "bs_BA", "en_DE", "sr", "vai_Latn", "zh_Hans" ] }, TestIO()), [ "root", # bs: includeScripts only works for language-only (without region) "bs_BA", "bs_Latn_BA", "bs_Latn", "bs", # en: should include the full dependency tree of en_DE "en", "en_DE", "en_150", "en_001", # sr: include Cyrl and Latn but no other children "sr", "sr_Cyrl", "sr_Latn", # vai: include only the requested script "vai_Latn", # zh: include only the requested script "zh", "zh_Hans", ]) def test_union(self): self._check_filter(Filter.create_from_json({ "filterType": "union", "unionOf": [ { "whitelist": [ "ars", "zh_Hans" ] }, { "filterType": "regex", "whitelist": [ r"^bs.*$", r"^zh$" ] } ] }, TestIO()), [ "ars", "zh_Hans", "bs_BA", "bs_Cyrl_BA", "bs_Cyrl", "bs_Latn_BA", "bs_Latn", "bs", "zh" ]) def test_hk_deps_normal(self): self._check_filter(Filter.create_from_json({ "filterType": "locale", "whitelist": [ "zh_HK" ] }, TestIO()), [ "root", "zh_Hant", "zh_Hant_HK", "zh_HK", ]) def test_hk_deps_rbnf(self): self._check_filter(Filter.create_from_json({ "filterType": "locale", "whitelist": [ "zh_HK" ] }, TestIO()), [ "root", "yue", "zh_Hant_HK", "zh_HK", ], "rbnf") def test_no_alias_parent_structure(self): self._check_filter(Filter.create_from_json({ "filterType": "locale", "whitelist": [ "zh_HK" ] }, TestIO()), [ "root", "zh_HK", "zh", ], "brkitr") def _check_filter(self, filter, expected_matches, tree="locales"): for file_stem in EXAMPLE_FILE_STEMS: is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem))) expected_match = file_stem in expected_matches self.assertEqual(is_match, expected_match, file_stem) # Export the test for the runner suite = unittest.makeSuite(FiltrationTest)