• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2021 The Pigweed Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may not
4# use this file except in compliance with the License. You may obtain a copy of
5# the License at
6#
7#     https://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations under
13# the License.
14"""Inclusive language presubmit check."""
15
16import dataclasses
17from pathlib import Path
18import re
19from typing import Dict, List, Union
20
21from . import presubmit
22
23# List borrowed from Android:
24# https://source.android.com/setup/contribute/respectful-code
25# inclusive-language: disable
26NON_INCLUSIVE_WORDS = [
27    r'master',
28    r'slave',
29    r'(white|gr[ae]y|black)\s*(list|hat)',
30    r'craz(y|ie)',
31    r'insane',
32    r'crip+led?',
33    r'sanity',
34    r'sane',
35    r'dummy',
36    r'grandfather',
37    r's?he',
38    r'his',
39    r'her',
40    r'm[ae]n[-\s]*in[-\s]*the[-\s]*middle',
41    r'mitm',
42]
43# inclusive-language: enable
44
45# Test: master  # inclusive-language: ignore
46# Test: master
47
48
49def _process_inclusive_language(*words):
50    """Turn word list into one big regex with common inflections."""
51
52    if not words:
53        words = tuple(NON_INCLUSIVE_WORDS)
54
55    all_words = []
56    for entry in words:
57        if isinstance(entry, str):
58            all_words.append(entry)
59        elif isinstance(entry, (list, tuple)):
60            all_words.extend(entry)
61        all_words.extend(x for x in words)
62    all_words = tuple(all_words)
63
64    # Confirm each individual word compiles as a valid regex.
65    for word in all_words:
66        _ = re.compile(word)
67
68    word_boundary = (
69        r'(\b|_|(?<=[a-z])(?=[A-Z])|(?<=[0-9])(?=\w)|(?<=\w)(?=[0-9]))')
70
71    return re.compile(
72        r"({b})(?i:{w})(e?[sd]{b}|{b})".format(w='|'.join(all_words),
73                                               b=word_boundary), )
74
75
76NON_INCLUSIVE_WORDS_REGEX = _process_inclusive_language()
77
78# If seen, ignore this line and the next.
79_IGNORE = 'inclusive-language: ignore'
80
81# Ignore a whole section. Please do not change the order of these lines.
82_DISABLE = 'inclusive-language: disable'
83_ENABLE = 'inclusive-language: enable'
84
85
86@dataclasses.dataclass
87class PathMatch:
88    word: str
89
90    def __repr__(self):
91        return f'Found non-inclusive word "{self.word}" in file path'
92
93
94@dataclasses.dataclass
95class LineMatch:
96    line: int
97    word: str
98
99    def __repr__(self):
100        return f'Found non-inclusive word "{self.word}" on line {self.line}'
101
102
103@presubmit.Check
104def inclusive_language(
105    ctx: presubmit.PresubmitContext,
106    words_regex=NON_INCLUSIVE_WORDS_REGEX,
107):
108    """Presubmit check that ensures files do not contain banned words."""
109
110    found_words: Dict[Path, List[Union[PathMatch, LineMatch]]] = {}
111
112    for path in ctx.paths:
113        match = words_regex.search(str(path.relative_to(ctx.root)))
114        if match:
115            found_words.setdefault(path, [])
116            found_words[path].append(PathMatch(match.group(0)))
117
118        if path.is_symlink() or path.is_dir():
119            continue
120
121        try:
122            with open(path, 'r') as ins:
123                enabled = True
124                prev = ''
125                for i, line in enumerate(ins, start=1):
126                    if _DISABLE in line:
127                        enabled = False
128                    if _ENABLE in line:
129                        enabled = True
130
131                    # If we see the ignore line on this or the previous line we
132                    # ignore any bad words on this line.
133                    ignored = _IGNORE in prev or _IGNORE in line
134
135                    if enabled and not ignored:
136                        match = words_regex.search(line)
137
138                        if match:
139                            found_words.setdefault(path, [])
140                            found_words[path].append(
141                                LineMatch(i, match.group(0)))
142
143                    # Not using 'continue' so this line always executes.
144                    prev = line
145
146        except UnicodeDecodeError:
147            # File is not text, like a gif.
148            pass
149
150    for path, matches in found_words.items():
151        print('=' * 40)
152        print(path)
153        for match in matches:
154            print(match)
155
156    if found_words:
157        print()
158        print("""
159Individual lines can be ignored with "inclusive-language: ignore". Blocks can be
160ignored with "inclusive-language: disable" and reenabled with
161"inclusive-language: enable".
162""".strip())
163        # Re-enable just in case: inclusive-language: enable.
164
165        raise presubmit.PresubmitFailure
166
167
168def inclusive_language_checker(*words):
169    """Create banned words checker for the given list of banned words."""
170
171    regex = _process_inclusive_language(*words)
172
173    def inclusive_language(  # pylint: disable=redefined-outer-name
174        ctx: presubmit.PresubmitContext):
175        globals()['inclusive_language'](ctx, regex)
176
177    return inclusive_language
178