• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Inclusive language presubmit check."""
15
16import dataclasses
17from pathlib import Path
18import re
19from typing import Dict, List, Union
20
21from . import presubmit
22
23# List borrowed from Android:
24# https://source.android.com/setup/contribute/respectful-code
25# inclusive-language: disable
26NON_INCLUSIVE_WORDS = [
27    r'master',
28    r'slave',
29    r'red[-\s]?line',
30    r'(white|gr[ae]y|black)[-\s]*(list|hat)',
31    r'craz(y|ie)',
32    r'insane',
33    r'crip+led?',
34    r'sanity',
35    r'sane',
36    r'dummy',
37    r'grandfather',
38    r's?he',
39    r'his',
40    r'her',
41    r'm[ae]n[-\s]*in[-\s]*the[-\s]*middle',
42    r'mitm',
43    r'first[-\s]?class[-\s]?citizen',
44]
45# inclusive-language: enable
46
47# Test: master  # inclusive-language: ignore
48# Test: master
49
50
51def _process_inclusive_language(*words):
52    """Turn word list into one big regex with common inflections."""
53
54    if not words:
55        words = tuple(NON_INCLUSIVE_WORDS)
56
57    all_words = []
58    for entry in words:
59        if isinstance(entry, str):
60            all_words.append(entry)
61        elif isinstance(entry, (list, tuple)):
62            all_words.extend(entry)
63        all_words.extend(x for x in words)
64    all_words = tuple(all_words)
65
66    # Confirm each individual word compiles as a valid regex.
67    for word in all_words:
68        _ = re.compile(word)
69
70    word_boundary = (
71        r'(\b|_|(?<=[a-z])(?=[A-Z])|(?<=[0-9])(?=\w)|(?<=\w)(?=[0-9]))'
72    )
73
74    return re.compile(
75        r"({b})(?i:{w})(e?[sd]{b}|{b})".format(
76            w='|'.join(all_words), b=word_boundary
77        ),
78    )
79
80
81NON_INCLUSIVE_WORDS_REGEX = _process_inclusive_language()
82
83# If seen, ignore this line and the next.
84_IGNORE = 'inclusive-language: ignore'
85
86# Ignore a whole section. Please do not change the order of these lines.
87_DISABLE = 'inclusive-language: disable'
88_ENABLE = 'inclusive-language: enable'
89
90
91@dataclasses.dataclass
92class PathMatch:
93    word: str
94
95    def __repr__(self):
96        return f'Found non-inclusive word "{self.word}" in file path'
97
98
99@dataclasses.dataclass
100class LineMatch:
101    line: int
102    word: str
103
104    def __repr__(self):
105        return f'Found non-inclusive word "{self.word}" on line {self.line}'
106
107
108@presubmit.check(name='inclusive_language')
109def presubmit_check(
110    ctx: presubmit.PresubmitContext,
111    words_regex=NON_INCLUSIVE_WORDS_REGEX,
112):
113    """Presubmit check that ensures files do not contain banned words."""
114
115    found_words: Dict[Path, List[Union[PathMatch, LineMatch]]] = {}
116
117    for path in ctx.paths:
118        match = words_regex.search(str(path.relative_to(ctx.root)))
119        if match:
120            found_words.setdefault(path, [])
121            found_words[path].append(PathMatch(match.group(0)))
122
123        if path.is_symlink() or path.is_dir():
124            continue
125
126        try:
127            with open(path, 'r') as ins:
128                enabled = True
129                prev = ''
130                for i, line in enumerate(ins, start=1):
131                    if _DISABLE in line:
132                        enabled = False
133                    if _ENABLE in line:
134                        enabled = True
135
136                    # If we see the ignore line on this or the previous line we
137                    # ignore any bad words on this line.
138                    ignored = _IGNORE in prev or _IGNORE in line
139
140                    if enabled and not ignored:
141                        match = words_regex.search(line)
142
143                        if match:
144                            found_words.setdefault(path, [])
145                            found_words[path].append(
146                                LineMatch(i, match.group(0))
147                            )
148
149                    # Not using 'continue' so this line always executes.
150                    prev = line
151
152        except UnicodeDecodeError:
153            # File is not text, like a gif.
154            pass
155
156    if found_words:
157        with open(ctx.failure_summary_log, 'w') as outs:
158            for i, (path, matches) in enumerate(found_words.items()):
159                if i:
160                    print('=' * 40, file=outs)
161                print(path, file=outs)
162                for match in matches:
163                    print(match, file=outs)
164
165        print(ctx.failure_summary_log.read_text(), end=None)
166
167        print()
168        print(
169            """
170Individual lines can be ignored with "inclusive-language: ignore". Blocks can be
171ignored with "inclusive-language: disable" and reenabled with
172"inclusive-language: enable".
173""".strip()
174        )
175        # Re-enable just in case: inclusive-language: enable.
176
177        raise presubmit.PresubmitFailure
178
179
180def inclusive_language_checker(*words):
181    """Create banned words checker for the given list of banned words."""
182
183    regex = _process_inclusive_language(*words)
184
185    def inclusive_language(  # pylint: disable=redefined-outer-name
186        ctx: presubmit.PresubmitContext,
187    ):
188        globals()['inclusive_language'](ctx, regex)
189
190    return inclusive_language
191