1# Copyright 2021 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Inclusive language presubmit check.""" 15 16import dataclasses 17from pathlib import Path 18import re 19from typing import Dict, List, Union 20 21from . import presubmit 22 23# List borrowed from Android: 24# https://source.android.com/setup/contribute/respectful-code 25# inclusive-language: disable 26NON_INCLUSIVE_WORDS = [ 27 r'master', 28 r'slave', 29 r'(white|gr[ae]y|black)\s*(list|hat)', 30 r'craz(y|ie)', 31 r'insane', 32 r'crip+led?', 33 r'sanity', 34 r'sane', 35 r'dummy', 36 r'grandfather', 37 r's?he', 38 r'his', 39 r'her', 40 r'm[ae]n[-\s]*in[-\s]*the[-\s]*middle', 41 r'mitm', 42] 43# inclusive-language: enable 44 45# Test: master # inclusive-language: ignore 46# Test: master 47 48 49def _process_inclusive_language(*words): 50 """Turn word list into one big regex with common inflections.""" 51 52 if not words: 53 words = tuple(NON_INCLUSIVE_WORDS) 54 55 all_words = [] 56 for entry in words: 57 if isinstance(entry, str): 58 all_words.append(entry) 59 elif isinstance(entry, (list, tuple)): 60 all_words.extend(entry) 61 all_words.extend(x for x in words) 62 all_words = tuple(all_words) 63 64 # Confirm each individual word compiles as a valid regex. 65 for word in all_words: 66 _ = re.compile(word) 67 68 word_boundary = ( 69 r'(\b|_|(?<=[a-z])(?=[A-Z])|(?<=[0-9])(?=\w)|(?<=\w)(?=[0-9]))') 70 71 return re.compile( 72 r"({b})(?i:{w})(e?[sd]{b}|{b})".format(w='|'.join(all_words), 73 b=word_boundary), ) 74 75 76NON_INCLUSIVE_WORDS_REGEX = _process_inclusive_language() 77 78# If seen, ignore this line and the next. 79_IGNORE = 'inclusive-language: ignore' 80 81# Ignore a whole section. Please do not change the order of these lines. 82_DISABLE = 'inclusive-language: disable' 83_ENABLE = 'inclusive-language: enable' 84 85 86@dataclasses.dataclass 87class PathMatch: 88 word: str 89 90 def __repr__(self): 91 return f'Found non-inclusive word "{self.word}" in file path' 92 93 94@dataclasses.dataclass 95class LineMatch: 96 line: int 97 word: str 98 99 def __repr__(self): 100 return f'Found non-inclusive word "{self.word}" on line {self.line}' 101 102 103@presubmit.Check 104def inclusive_language( 105 ctx: presubmit.PresubmitContext, 106 words_regex=NON_INCLUSIVE_WORDS_REGEX, 107): 108 """Presubmit check that ensures files do not contain banned words.""" 109 110 found_words: Dict[Path, List[Union[PathMatch, LineMatch]]] = {} 111 112 for path in ctx.paths: 113 match = words_regex.search(str(path.relative_to(ctx.root))) 114 if match: 115 found_words.setdefault(path, []) 116 found_words[path].append(PathMatch(match.group(0))) 117 118 if path.is_symlink() or path.is_dir(): 119 continue 120 121 try: 122 with open(path, 'r') as ins: 123 enabled = True 124 prev = '' 125 for i, line in enumerate(ins, start=1): 126 if _DISABLE in line: 127 enabled = False 128 if _ENABLE in line: 129 enabled = True 130 131 # If we see the ignore line on this or the previous line we 132 # ignore any bad words on this line. 133 ignored = _IGNORE in prev or _IGNORE in line 134 135 if enabled and not ignored: 136 match = words_regex.search(line) 137 138 if match: 139 found_words.setdefault(path, []) 140 found_words[path].append( 141 LineMatch(i, match.group(0))) 142 143 # Not using 'continue' so this line always executes. 144 prev = line 145 146 except UnicodeDecodeError: 147 # File is not text, like a gif. 148 pass 149 150 for path, matches in found_words.items(): 151 print('=' * 40) 152 print(path) 153 for match in matches: 154 print(match) 155 156 if found_words: 157 print() 158 print(""" 159Individual lines can be ignored with "inclusive-language: ignore". Blocks can be 160ignored with "inclusive-language: disable" and reenabled with 161"inclusive-language: enable". 162""".strip()) 163 # Re-enable just in case: inclusive-language: enable. 164 165 raise presubmit.PresubmitFailure 166 167 168def inclusive_language_checker(*words): 169 """Create banned words checker for the given list of banned words.""" 170 171 regex = _process_inclusive_language(*words) 172 173 def inclusive_language( # pylint: disable=redefined-outer-name 174 ctx: presubmit.PresubmitContext): 175 globals()['inclusive_language'](ctx, regex) 176 177 return inclusive_language 178