• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2018 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3
4# Python 2/3 Compatibility (ICU-20299)
5# TODO(ICU-20301): Remove this.
6from __future__ import print_function
7
8from abc import abstractmethod
9from collections import defaultdict
10import re
11import sys
12
13from . import *
14from . import utils
15from .request_types import *
16
17
18# Note: for this to be a proper abstract class, it should extend abc.ABC.
19# There is no nice way to do this that works in both Python 2 and 3.
20# TODO(ICU-20301): Make this inherit from abc.ABC.
21class Filter(object):
22    @staticmethod
23    def create_from_json(json_data, io):
24        assert io != None
25        if "filterType" in json_data:
26            filter_type = json_data["filterType"]
27        else:
28            filter_type = "file-stem"
29
30        if filter_type == "file-stem":
31            return FileStemFilter(json_data)
32        elif filter_type == "language":
33            return LanguageFilter(json_data)
34        elif filter_type == "regex":
35            return RegexFilter(json_data)
36        elif filter_type == "exclude":
37            return ExclusionFilter()
38        elif filter_type == "union":
39            return UnionFilter(json_data, io)
40        elif filter_type == "locale":
41            return LocaleFilter(json_data, io)
42        else:
43            print("Error: Unknown filterType option: %s" % filter_type, file=sys.stderr)
44            return None
45
46    def filter(self, request):
47        if not request.apply_file_filter(self):
48            return []
49        for file in request.all_input_files():
50            assert self.match(file)
51        return [request]
52
53    @staticmethod
54    def _file_to_file_stem(file):
55        start = file.filename.rfind("/")
56        limit = file.filename.rfind(".")
57        return file.filename[start+1:limit]
58
59    @staticmethod
60    def _file_to_subdir(file):
61        limit = file.filename.rfind("/")
62        if limit == -1:
63            return None
64        return file.filename[:limit]
65
66    @abstractmethod
67    def match(self, file):
68        pass
69
70
71class InclusionFilter(Filter):
72    def match(self, file):
73        return True
74
75
76class ExclusionFilter(Filter):
77    def match(self, file):
78        return False
79
80
81class WhitelistBlacklistFilter(Filter):
82    def __init__(self, json_data):
83        if "whitelist" in json_data:
84            self.is_whitelist = True
85            self.whitelist = json_data["whitelist"]
86        else:
87            assert "blacklist" in json_data, "Need either whitelist or blacklist: %s" % str(json_data)
88            self.is_whitelist = False
89            self.blacklist = json_data["blacklist"]
90
91    def match(self, file):
92        file_stem = self._file_to_file_stem(file)
93        return self._should_include(file_stem)
94
95    @abstractmethod
96    def _should_include(self, file_stem):
97        pass
98
99
100class FileStemFilter(WhitelistBlacklistFilter):
101    def _should_include(self, file_stem):
102        if self.is_whitelist:
103            return file_stem in self.whitelist
104        else:
105            return file_stem not in self.blacklist
106
107
108class LanguageFilter(WhitelistBlacklistFilter):
109    def _should_include(self, file_stem):
110        language = file_stem.split("_")[0]
111        if language == "root":
112            # Always include root.txt
113            return True
114        if self.is_whitelist:
115            return language in self.whitelist
116        else:
117            return language not in self.blacklist
118
119
120class RegexFilter(WhitelistBlacklistFilter):
121    def __init__(self, *args):
122        # TODO(ICU-20301): Change this to: super().__init__(*args)
123        super(RegexFilter, self).__init__(*args)
124        if self.is_whitelist:
125            self.whitelist = [re.compile(pat) for pat in self.whitelist]
126        else:
127            self.blacklist = [re.compile(pat) for pat in self.blacklist]
128
129    def _should_include(self, file_stem):
130        if self.is_whitelist:
131            for pattern in self.whitelist:
132                if pattern.match(file_stem):
133                    return True
134            return False
135        else:
136            for pattern in self.blacklist:
137                if pattern.match(file_stem):
138                    return False
139            return True
140
141
142class UnionFilter(Filter):
143    def __init__(self, json_data, io):
144        # Collect the sub-filters.
145        self.sub_filters = []
146        for filter_json in json_data["unionOf"]:
147            self.sub_filters.append(Filter.create_from_json(filter_json, io))
148
149    def match(self, file):
150        """Match iff any of the sub-filters match."""
151        for filter in self.sub_filters:
152            if filter.match(file):
153                return True
154        return False
155
156
157LANGUAGE_SCRIPT_REGEX = re.compile(r"^([a-z]{2,3})_[A-Z][a-z]{3}$")
158LANGUAGE_ONLY_REGEX = re.compile(r"^[a-z]{2,3}$")
159
160class LocaleFilter(Filter):
161    def __init__(self, json_data, io):
162        self.locales_requested = list(json_data["whitelist"])
163        self.include_children = json_data.get("includeChildren", True)
164        self.include_scripts = json_data.get("includeScripts", False)
165
166        # Load the dependency graph from disk
167        self.dependency_data_by_tree = {
168            tree: io.read_locale_deps(tree)
169            for tree in utils.ALL_TREES
170        }
171
172    def match(self, file):
173        tree = self._file_to_subdir(file)
174        assert tree is not None
175        locale = self._file_to_file_stem(file)
176
177        # A locale is *required* if it is *requested* or an ancestor of a
178        # *requested* locale.
179        if locale in self._locales_required(tree):
180            return True
181
182        # Resolve include_scripts and include_children.
183        return self._match_recursive(locale, tree)
184
185    def _match_recursive(self, locale, tree):
186        # Base case: return True if we reached a *requested* locale,
187        # or False if we ascend out of the locale tree.
188        if locale is None:
189            return False
190        if locale in self.locales_requested:
191            return True
192
193        # Check for alternative scripts.
194        # This causes sr_Latn to check sr instead of going directly to root.
195        if self.include_scripts:
196            match = LANGUAGE_SCRIPT_REGEX.match(locale)
197            if match and self._match_recursive(match.group(1), tree):
198                return True
199
200        # Check if we are a descendant of a *requested* locale.
201        if self.include_children:
202            parent = self._get_parent_locale(locale, tree)
203            if self._match_recursive(parent, tree):
204                return True
205
206        # No matches.
207        return False
208
209    def _get_parent_locale(self, locale, tree):
210        """Gets the parent locale in the given tree, according to dependency data."""
211        dependency_data = self.dependency_data_by_tree[tree]
212        if "parents" in dependency_data and locale in dependency_data["parents"]:
213            return dependency_data["parents"][locale]
214        if "aliases" in dependency_data and locale in dependency_data["aliases"]:
215            return dependency_data["aliases"][locale]
216        if LANGUAGE_ONLY_REGEX.match(locale):
217            return "root"
218        i = locale.rfind("_")
219        if i < 0:
220            assert locale == "root", "Invalid locale: %s/%s" % (tree, locale)
221            return None
222        return locale[:i]
223
224    def _locales_required(self, tree):
225        """Returns a generator of all required locales in the given tree."""
226        for locale in self.locales_requested:
227            while locale is not None:
228                yield locale
229                locale = self._get_parent_locale(locale, tree)
230
231
232def apply_filters(requests, config, io):
233    """Runs the filters and returns a new list of requests."""
234    requests = _apply_file_filters(requests, config, io)
235    requests = _apply_resource_filters(requests, config, io)
236    return requests
237
238
239def _apply_file_filters(old_requests, config, io):
240    """Filters out entire files."""
241    filters = _preprocess_file_filters(old_requests, config, io)
242    new_requests = []
243    for request in old_requests:
244        category = request.category
245        if category in filters:
246            new_requests += filters[category].filter(request)
247        else:
248            new_requests.append(request)
249    return new_requests
250
251
252def _preprocess_file_filters(requests, config, io):
253    all_categories = set(
254        request.category
255        for request in requests
256    )
257    all_categories.remove(None)
258    all_categories = list(sorted(all_categories))
259    json_data = config.filters_json_data
260    filters = {}
261    default_filter_json = "exclude" if config.strategy == "additive" else "include"
262    for category in all_categories:
263        filter_json = default_filter_json
264        # Figure out the correct filter to create
265        if "featureFilters" in json_data and category in json_data["featureFilters"]:
266            filter_json = json_data["featureFilters"][category]
267        if filter_json == "include" and "localeFilter" in json_data and category.endswith("_tree"):
268            filter_json = json_data["localeFilter"]
269        # Resolve the filter JSON into a filter object
270        if filter_json == "exclude":
271            filters[category] = ExclusionFilter()
272        elif filter_json == "include":
273            pass  # no-op
274        else:
275            filters[category] = Filter.create_from_json(filter_json, io)
276    if "featureFilters" in json_data:
277        for category in json_data["featureFilters"]:
278            if category not in all_categories:
279                print("Warning: category %s is not known" % category, file=sys.stderr)
280    return filters
281
282
283class ResourceFilterInfo(object):
284    def __init__(self, category, strategy):
285        self.category = category
286        self.strategy = strategy
287        self.filter_tmp_dir = "filters/%s" % category
288        self.input_files = None
289        self.filter_files = None
290        self.rules_by_file = None
291
292    def apply_to_requests(self, all_requests):
293        # Call this method only once per list of requests.
294        assert self.input_files is None
295        for request in all_requests:
296            if request.category != self.category:
297                continue
298            if not isinstance(request, AbstractExecutionRequest):
299                continue
300            if request.tool != IcuTool("genrb"):
301                continue
302            if not request.input_files:
303                continue
304            self._set_files(request.input_files)
305            request.dep_targets += [self.filter_files[:]]
306            arg_str = "--filterDir {TMP_DIR}/%s" % self.filter_tmp_dir
307            request.args = "%s %s" % (arg_str, request.args)
308
309        # Make sure we found the target request
310        if self.input_files is None:
311            print("WARNING: Category not found: %s" % self.category, file=sys.stderr)
312            self.input_files = []
313            self.filter_files = []
314            self.rules_by_file = []
315
316    def _set_files(self, files):
317        # Note: The input files to genrb for a certain category should always
318        # be the same. For example, there are often two genrb calls: one for
319        # --writePoolBundle, and the other for --usePoolBundle. They are both
320        # expected to have the same list of input files.
321        if self.input_files is not None:
322            assert self.input_files == files
323            return
324        self.input_files = list(files)
325        self.filter_files = [
326            TmpFile("%s/%s" % (self.filter_tmp_dir, basename))
327            for basename in (
328                file.filename[file.filename.rfind("/")+1:]
329                for file in files
330            )
331        ]
332        if self.strategy == "additive":
333            self.rules_by_file = [
334                [r"-/", r"+/%%ALIAS", r"+/%%Parent"]
335                for _ in range(len(files))
336            ]
337        else:
338            self.rules_by_file = [
339                [r"+/"]
340                for _ in range(len(files))
341            ]
342
343    def add_rules(self, file_filter, rules):
344        for file, rule_list in zip(self.input_files, self.rules_by_file):
345            if file_filter.match(file):
346                rule_list += rules
347
348    def make_requests(self):
349        # Map from rule list to filter files with that rule list
350        unique_rules = defaultdict(list)
351        for filter_file, rules in zip(self.filter_files, self.rules_by_file):
352            unique_rules[tuple(rules)].append(filter_file)
353
354        new_requests = []
355        i = 0
356        for rules, filter_files in unique_rules.items():
357            base_filter_file = filter_files[0]
358            new_requests += [
359                PrintFileRequest(
360                    name = "%s_print_%d" % (self.category, i),
361                    output_file = base_filter_file,
362                    content = self._generate_resource_filter_txt(rules)
363                )
364            ]
365            i += 1
366            for filter_file in filter_files[1:]:
367                new_requests += [
368                    CopyRequest(
369                        name = "%s_copy_%d" % (self.category, i),
370                        input_file = base_filter_file,
371                        output_file = filter_file
372                    )
373                ]
374                i += 1
375        return new_requests
376
377    @staticmethod
378    def _generate_resource_filter_txt(rules):
379        result = "# Caution: This file is automatically generated\n\n"
380        result += "\n".join(rules)
381        return result
382
383
384def _apply_resource_filters(all_requests, config, io):
385    """Creates filters for looking within resource bundle files."""
386    json_data = config.filters_json_data
387    if "resourceFilters" not in json_data:
388        return all_requests
389
390    collected = {}
391    for entry in json_data["resourceFilters"]:
392        if "files" in entry:
393            file_filter = Filter.create_from_json(entry["files"], io)
394        else:
395            file_filter = InclusionFilter()
396        for category in entry["categories"]:
397            # not defaultdict because we need to pass arguments to the constructor
398            if category not in collected:
399                filter_info = ResourceFilterInfo(category, config.strategy)
400                filter_info.apply_to_requests(all_requests)
401                collected[category] = filter_info
402            else:
403                filter_info = collected[category]
404            filter_info.add_rules(file_filter, entry["rules"])
405
406    # Add the filter generation requests to the beginning so that by default
407    # they are made before genrb gets run (order is required by windirect)
408    new_requests = []
409    for filter_info in collected.values():
410        new_requests += filter_info.make_requests()
411    new_requests += all_requests
412    return new_requests
413