• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2018 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3
4import io as pyio
5import json
6import os
7import unittest
8
9from .. import InFile
10from ..comment_stripper import CommentStripper
11from ..filtration import Filter
12
13EXAMPLE_FILE_STEMS = [
14    "af_NA",
15    "af_VARIANT",
16    "af_ZA_VARIANT",
17    "af_ZA",
18    "af",
19    "ar",
20    "ar_SA",
21    "ars",
22    "bs_BA",
23    "bs_Cyrl_BA",
24    "bs_Cyrl",
25    "bs_Latn_BA",
26    "bs_Latn",
27    "bs",
28    "en_001",
29    "en_150",
30    "en_DE",
31    "en_GB",
32    "en_US",
33    "root",
34    "sr_BA",
35    "sr_CS",
36    "sr_Cyrl_BA",
37    "sr_Cyrl_CS",
38    "sr_Cyrl_ME",
39    "sr_Cyrl",
40    "sr_Latn_BA",
41    "sr_Latn_CS",
42    "sr_Latn_ME_VARIANT",
43    "sr_Latn_ME",
44    "sr_Latn",
45    "sr_ME",
46    "sr",
47    "vai_Latn_LR",
48    "vai_Latn",
49    "vai_LR",
50    "vai_Vaii_LR",
51    "vai_Vaii",
52    "vai",
53    "yue",
54    "zh_CN",
55    "zh_Hans_CN",
56    "zh_Hans_HK",
57    "zh_Hans_MO",
58    "zh_Hans_SG",
59    "zh_Hans",
60    "zh_Hant_HK",
61    "zh_Hant_MO",
62    "zh_Hant_TW",
63    "zh_Hant",
64    "zh_HK",
65    "zh_MO",
66    "zh_SG",
67    "zh_TW",
68    "zh"
69]
70
71
72class TestIO(object):
73    def __init__(self):
74        pass
75
76    def read_locale_deps(self, tree):
77        if tree not in ("brkitr", "locales", "rbnf"):
78            return None
79        with pyio.open(os.path.join(
80                os.path.dirname(__file__),
81                "sample_data",
82                tree,
83                "LOCALE_DEPS.json"
84                ), "r", encoding="utf-8-sig") as f:
85            return json.load(CommentStripper(f))
86
87
88class FiltrationTest(unittest.TestCase):
89
90    def test_exclude(self):
91        self._check_filter(Filter.create_from_json({
92            "filterType": "exclude"
93        }, TestIO()), [
94        ])
95
96    def test_default_whitelist(self):
97        self._check_filter(Filter.create_from_json({
98            "whitelist": [
99                "ars",
100                "zh_Hans"
101            ]
102        }, TestIO()), [
103            "ars",
104            "zh_Hans"
105        ])
106
107    def test_default_blacklist(self):
108        expected_matches = set(EXAMPLE_FILE_STEMS)
109        expected_matches.remove("ars")
110        expected_matches.remove("zh_Hans")
111        self._check_filter(Filter.create_from_json({
112            "blacklist": [
113                "ars",
114                "zh_Hans"
115            ]
116        }, TestIO()), expected_matches)
117
118    def test_language_whitelist(self):
119        self._check_filter(Filter.create_from_json({
120            "filterType": "language",
121            "whitelist": [
122                "af",
123                "bs"
124            ]
125        }, TestIO()), [
126            "root",
127            "af_NA",
128            "af_VARIANT",
129            "af_ZA_VARIANT",
130            "af_ZA",
131            "af",
132            "bs_BA",
133            "bs_Cyrl_BA",
134            "bs_Cyrl",
135            "bs_Latn_BA",
136            "bs_Latn",
137            "bs"
138        ])
139
140    def test_language_blacklist(self):
141        expected_matches = set(EXAMPLE_FILE_STEMS)
142        expected_matches.remove("af_NA")
143        expected_matches.remove("af_VARIANT")
144        expected_matches.remove("af_ZA_VARIANT")
145        expected_matches.remove("af_ZA")
146        expected_matches.remove("af")
147        self._check_filter(Filter.create_from_json({
148            "filterType": "language",
149            "blacklist": [
150                "af"
151            ]
152        }, TestIO()), expected_matches)
153
154    def test_regex_whitelist(self):
155        self._check_filter(Filter.create_from_json({
156            "filterType": "regex",
157            "whitelist": [
158                r"^ar.*$",
159                r"^zh$"
160            ]
161        }, TestIO()), [
162            "ar",
163            "ar_SA",
164            "ars",
165            "zh"
166        ])
167
168    def test_regex_blacklist(self):
169        expected_matches = set(EXAMPLE_FILE_STEMS)
170        expected_matches.remove("ar")
171        expected_matches.remove("ar_SA")
172        expected_matches.remove("ars")
173        expected_matches.remove("zh")
174        self._check_filter(Filter.create_from_json({
175            "filterType": "regex",
176            "blacklist": [
177                r"^ar.*$",
178                r"^zh$"
179            ]
180        }, TestIO()), expected_matches)
181
182    def test_locale_basic(self):
183        self._check_filter(Filter.create_from_json({
184            "filterType": "locale",
185            "whitelist": [
186                # Default scripts:
187                # sr => Cyrl
188                # vai => Vaii
189                # zh => Hans
190                "bs_BA", # is an alias to bs_Latn_BA
191                "en_DE",
192                "sr", # Language with no script
193                "vai_Latn", # Language with non-default script
194                "zh_Hans" # Language with default script
195            ]
196        }, TestIO()), [
197            "root",
198            # bs: should include the full dependency tree of bs_BA
199            "bs_BA",
200            "bs_Latn_BA",
201            "bs_Latn",
202            "bs",
203            # en: should include the full dependency tree of en_DE
204            "en",
205            "en_DE",
206            "en_150",
207            "en_001",
208            # sr: include Cyrl, the default, but not Latn.
209            "sr",
210            "sr_BA",
211            "sr_CS",
212            "sr_Cyrl",
213            "sr_Cyrl_BA",
214            "sr_Cyrl_CS",
215            "sr_Cyrl_ME",
216            # vai: include Latn but NOT Vaii.
217            "vai_Latn",
218            "vai_Latn_LR",
219            # zh: include Hans but NOT Hant.
220            "zh",
221            "zh_CN",
222            "zh_SG",
223            "zh_Hans",
224            "zh_Hans_CN",
225            "zh_Hans_HK",
226            "zh_Hans_MO",
227            "zh_Hans_SG"
228        ])
229
230    def test_locale_no_children(self):
231        self._check_filter(Filter.create_from_json({
232            "filterType": "locale",
233            "includeChildren": False,
234            "whitelist": [
235                # See comments in test_locale_basic.
236                "bs_BA",
237                "en_DE",
238                "sr",
239                "vai_Latn",
240                "zh_Hans"
241            ]
242        }, TestIO()), [
243            "root",
244            "bs_BA",
245            "bs_Latn_BA",
246            "bs_Latn",
247            "bs",
248            "en",
249            "en_DE",
250            "en_150",
251            "en_001",
252            "sr",
253            "vai_Latn",
254            "zh",
255            "zh_Hans",
256        ])
257
258    def test_locale_include_scripts(self):
259        self._check_filter(Filter.create_from_json({
260            "filterType": "locale",
261            "includeScripts": True,
262            "whitelist": [
263                # See comments in test_locale_basic.
264                "bs_BA",
265                "en_DE",
266                "sr",
267                "vai_Latn",
268                "zh_Hans"
269            ]
270        }, TestIO()), [
271            "root",
272            # bs: includeScripts only works for language-only (without region)
273            "bs_BA",
274            "bs_Latn_BA",
275            "bs_Latn",
276            "bs",
277            # en: should include the full dependency tree of en_DE
278            "en",
279            "en_DE",
280            "en_150",
281            "en_001",
282            # sr: include Latn, since no particular script was requested.
283            "sr_BA",
284            "sr_CS",
285            "sr_Cyrl_BA",
286            "sr_Cyrl_CS",
287            "sr_Cyrl_ME",
288            "sr_Cyrl",
289            "sr_Latn_BA",
290            "sr_Latn_CS",
291            "sr_Latn_ME_VARIANT",
292            "sr_Latn_ME",
293            "sr_Latn",
294            "sr_ME",
295            "sr",
296            # vai: do NOT include Vaii; the script was explicitly requested.
297            "vai_Latn_LR",
298            "vai_Latn",
299            # zh: do NOT include Hant; the script was explicitly requested.
300            "zh_CN",
301            "zh_SG",
302            "zh_Hans_CN",
303            "zh_Hans_HK",
304            "zh_Hans_MO",
305            "zh_Hans_SG",
306            "zh_Hans",
307            "zh"
308        ])
309
310    def test_locale_no_children_include_scripts(self):
311        self._check_filter(Filter.create_from_json({
312            "filterType": "locale",
313            "includeChildren": False,
314            "includeScripts": True,
315            "whitelist": [
316                # See comments in test_locale_basic.
317                "bs_BA",
318                "en_DE",
319                "sr",
320                "vai_Latn",
321                "zh_Hans"
322            ]
323        }, TestIO()), [
324            "root",
325            # bs: includeScripts only works for language-only (without region)
326            "bs_BA",
327            "bs_Latn_BA",
328            "bs_Latn",
329            "bs",
330            # en: should include the full dependency tree of en_DE
331            "en",
332            "en_DE",
333            "en_150",
334            "en_001",
335            # sr: include Cyrl and Latn but no other children
336            "sr",
337            "sr_Cyrl",
338            "sr_Latn",
339            # vai: include only the requested script
340            "vai_Latn",
341            # zh: include only the requested script
342            "zh",
343            "zh_Hans",
344        ])
345
346    def test_union(self):
347        self._check_filter(Filter.create_from_json({
348            "filterType": "union",
349            "unionOf": [
350                {
351                    "whitelist": [
352                        "ars",
353                        "zh_Hans"
354                    ]
355                },
356                {
357                    "filterType": "regex",
358                    "whitelist": [
359                        r"^bs.*$",
360                        r"^zh$"
361                    ]
362                }
363            ]
364        }, TestIO()), [
365            "ars",
366            "zh_Hans",
367            "bs_BA",
368            "bs_Cyrl_BA",
369            "bs_Cyrl",
370            "bs_Latn_BA",
371            "bs_Latn",
372            "bs",
373            "zh"
374        ])
375
376    def test_hk_deps_normal(self):
377        self._check_filter(Filter.create_from_json({
378            "filterType": "locale",
379            "whitelist": [
380                "zh_HK"
381            ]
382        }, TestIO()), [
383            "root",
384            "zh_Hant",
385            "zh_Hant_HK",
386            "zh_HK",
387        ])
388
389    def test_hk_deps_rbnf(self):
390        self._check_filter(Filter.create_from_json({
391            "filterType": "locale",
392            "whitelist": [
393                "zh_HK"
394            ]
395        }, TestIO()), [
396            "root",
397            "yue",
398            "zh_Hant_HK",
399            "zh_HK",
400        ], "rbnf")
401
402    def test_no_alias_parent_structure(self):
403        self._check_filter(Filter.create_from_json({
404            "filterType": "locale",
405            "whitelist": [
406                "zh_HK"
407            ]
408        }, TestIO()), [
409            "root",
410            "zh_HK",
411            "zh",
412        ], "brkitr")
413
414    def _check_filter(self, filter, expected_matches, tree="locales"):
415        for file_stem in EXAMPLE_FILE_STEMS:
416            is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem)))
417            expected_match = file_stem in expected_matches
418            self.assertEqual(is_match, expected_match, file_stem)
419
420# Export the test for the runner
421suite = unittest.makeSuite(FiltrationTest)
422