• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Inclusive language presubmit check."""
15
16import dataclasses
17from pathlib import Path
18import re
19
20from . import presubmit, presubmit_context
21
22# List borrowed from Android:
23# https://source.android.com/setup/contribute/respectful-code
24# inclusive-language: disable
25NON_INCLUSIVE_WORDS = [
26    r'master',
27    r'slave',
28    r'red[-\s]?line',
29    r'(white|gr[ae]y|black)[-\s]*(list|hat)',
30    r'craz(y|ie)',
31    r'insane',
32    r'crip+led?',
33    r'sanity',
34    r'sane',
35    r'dummy',
36    r'grandfather',
37    r's?he',
38    r'his',
39    r'her',
40    r'm[ae]n[-\s]*in[-\s]*the[-\s]*middle',
41    r'mitm',
42    r'first[-\s]?class[-\s]?citizen',
43]
44# inclusive-language: enable
45
46# Test: master  # inclusive-language: ignore
47# Test: master
48
49
50def _process_inclusive_language(*words):
51    """Turn word list into one big regex with common inflections."""
52
53    if not words:
54        words = tuple(NON_INCLUSIVE_WORDS)
55
56    all_words = []
57    for entry in words:
58        if isinstance(entry, str):
59            all_words.append(entry)
60        elif isinstance(entry, (list, tuple)):
61            all_words.extend(entry)
62        all_words.extend(x for x in words)
63    all_words = tuple(all_words)
64
65    # Confirm each individual word compiles as a valid regex.
66    for word in all_words:
67        _ = re.compile(word)
68
69    word_boundary = (
70        r'(\b|_|(?<=[a-z])(?=[A-Z])|(?<=[0-9])(?=\w)|(?<=\w)(?=[0-9]))'
71    )
72
73    return re.compile(
74        r"({b})(?i:{w})(e?[sd]{b}|{b})".format(
75            w='|'.join(all_words), b=word_boundary
76        ),
77    )
78
79
80NON_INCLUSIVE_WORDS_REGEX = _process_inclusive_language()
81
82# If seen, ignore this line and the next.
83_IGNORE = 'inclusive-language: ignore'
84
85# Ignore a whole section. Please do not change the order of these lines.
86_DISABLE = 'inclusive-language: disable'
87_ENABLE = 'inclusive-language: enable'
88
89
90@dataclasses.dataclass
91class PathMatch:
92    word: str
93
94    def __repr__(self):
95        return f'Found non-inclusive word "{self.word}" in file path'
96
97
98@dataclasses.dataclass
99class LineMatch:
100    line: int
101    word: str
102
103    def __repr__(self):
104        return f'Found non-inclusive word "{self.word}" on line {self.line}'
105
106
107@presubmit.check(name='inclusive_language')
108def presubmit_check(
109    ctx: presubmit_context.PresubmitContext,
110    words_regex=NON_INCLUSIVE_WORDS_REGEX,
111):
112    """Presubmit check that ensures files do not contain banned words."""
113
114    # No subprocesses are run for inclusive_language so don't perform this check
115    # if dry_run is on.
116    if ctx.dry_run:
117        return
118
119    found_words: dict[Path, list[PathMatch | LineMatch]] = {}
120
121    ctx.paths = presubmit_context.apply_exclusions(ctx)
122
123    for path in ctx.paths:
124        match = words_regex.search(str(path.relative_to(ctx.root)))
125        if match:
126            found_words.setdefault(path, [])
127            found_words[path].append(PathMatch(match.group(0)))
128
129        if path.is_symlink() or path.is_dir():
130            continue
131
132        try:
133            with open(path, 'r') as ins:
134                enabled = True
135                prev = ''
136                for i, line in enumerate(ins, start=1):
137                    if _DISABLE in line:
138                        enabled = False
139                    if _ENABLE in line:
140                        enabled = True
141
142                    # If we see the ignore line on this or the previous line we
143                    # ignore any bad words on this line.
144                    ignored = _IGNORE in prev or _IGNORE in line
145
146                    if enabled and not ignored:
147                        match = words_regex.search(line)
148
149                        if match:
150                            found_words.setdefault(path, [])
151                            found_words[path].append(
152                                LineMatch(i, match.group(0))
153                            )
154
155                    # Not using 'continue' so this line always executes.
156                    prev = line
157
158        except UnicodeDecodeError:
159            # File is not text, like a gif.
160            pass
161
162    if found_words:
163        with open(ctx.failure_summary_log, 'w') as outs:
164            for i, (path, matches) in enumerate(found_words.items()):
165                if i:
166                    print('=' * 40, file=outs)
167                print(path, file=outs)
168                for match in matches:
169                    print(match, file=outs)
170
171        print(ctx.failure_summary_log.read_text(), end=None)
172
173        print()
174        print(
175            """
176Individual lines can be ignored with "inclusive-language: ignore". Blocks can be
177ignored with "inclusive-language: disable" and reenabled with
178"inclusive-language: enable".
179""".strip()
180        )
181        # Re-enable just in case: inclusive-language: enable.
182
183        raise presubmit_context.PresubmitFailure
184
185
186def inclusive_language_checker(*words):
187    """Create banned words checker for the given list of banned words."""
188
189    regex = _process_inclusive_language(*words)
190
191    def inclusive_language(  # pylint: disable=redefined-outer-name
192        ctx: presubmit_context.PresubmitContext,
193    ):
194        globals()['inclusive_language'](ctx, regex)
195
196    return inclusive_language
197