1# Copyright 2021 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Inclusive language presubmit check.""" 15 16import dataclasses 17from pathlib import Path 18import re 19 20from . import presubmit, presubmit_context 21 22# List borrowed from Android: 23# https://source.android.com/setup/contribute/respectful-code 24# inclusive-language: disable 25NON_INCLUSIVE_WORDS = [ 26 r'master', 27 r'slave', 28 r'red[-\s]?line', 29 r'(white|gr[ae]y|black)[-\s]*(list|hat)', 30 r'craz(y|ie)', 31 r'insane', 32 r'crip+led?', 33 r'sanity', 34 r'sane', 35 r'dummy', 36 r'grandfather', 37 r's?he', 38 r'his', 39 r'her', 40 r'm[ae]n[-\s]*in[-\s]*the[-\s]*middle', 41 r'mitm', 42 r'first[-\s]?class[-\s]?citizen', 43] 44# inclusive-language: enable 45 46# Test: master # inclusive-language: ignore 47# Test: master 48 49 50def _process_inclusive_language(*words): 51 """Turn word list into one big regex with common inflections.""" 52 53 if not words: 54 words = tuple(NON_INCLUSIVE_WORDS) 55 56 all_words = [] 57 for entry in words: 58 if isinstance(entry, str): 59 all_words.append(entry) 60 elif isinstance(entry, (list, tuple)): 61 all_words.extend(entry) 62 all_words.extend(x for x in words) 63 all_words = tuple(all_words) 64 65 # Confirm each individual word compiles as a valid regex. 66 for word in all_words: 67 _ = re.compile(word) 68 69 word_boundary = ( 70 r'(\b|_|(?<=[a-z])(?=[A-Z])|(?<=[0-9])(?=\w)|(?<=\w)(?=[0-9]))' 71 ) 72 73 return re.compile( 74 r"({b})(?i:{w})(e?[sd]{b}|{b})".format( 75 w='|'.join(all_words), b=word_boundary 76 ), 77 ) 78 79 80NON_INCLUSIVE_WORDS_REGEX = _process_inclusive_language() 81 82# If seen, ignore this line and the next. 83_IGNORE = 'inclusive-language: ignore' 84 85# Ignore a whole section. Please do not change the order of these lines. 86_DISABLE = 'inclusive-language: disable' 87_ENABLE = 'inclusive-language: enable' 88 89 90@dataclasses.dataclass 91class PathMatch: 92 word: str 93 94 def __repr__(self): 95 return f'Found non-inclusive word "{self.word}" in file path' 96 97 98@dataclasses.dataclass 99class LineMatch: 100 line: int 101 word: str 102 103 def __repr__(self): 104 return f'Found non-inclusive word "{self.word}" on line {self.line}' 105 106 107@presubmit.check(name='inclusive_language') 108def presubmit_check( 109 ctx: presubmit_context.PresubmitContext, 110 words_regex=NON_INCLUSIVE_WORDS_REGEX, 111): 112 """Presubmit check that ensures files do not contain banned words.""" 113 114 # No subprocesses are run for inclusive_language so don't perform this check 115 # if dry_run is on. 116 if ctx.dry_run: 117 return 118 119 found_words: dict[Path, list[PathMatch | LineMatch]] = {} 120 121 ctx.paths = presubmit_context.apply_exclusions(ctx) 122 123 for path in ctx.paths: 124 match = words_regex.search(str(path.relative_to(ctx.root))) 125 if match: 126 found_words.setdefault(path, []) 127 found_words[path].append(PathMatch(match.group(0))) 128 129 if path.is_symlink() or path.is_dir(): 130 continue 131 132 try: 133 with open(path, 'r') as ins: 134 enabled = True 135 prev = '' 136 for i, line in enumerate(ins, start=1): 137 if _DISABLE in line: 138 enabled = False 139 if _ENABLE in line: 140 enabled = True 141 142 # If we see the ignore line on this or the previous line we 143 # ignore any bad words on this line. 144 ignored = _IGNORE in prev or _IGNORE in line 145 146 if enabled and not ignored: 147 match = words_regex.search(line) 148 149 if match: 150 found_words.setdefault(path, []) 151 found_words[path].append( 152 LineMatch(i, match.group(0)) 153 ) 154 155 # Not using 'continue' so this line always executes. 156 prev = line 157 158 except UnicodeDecodeError: 159 # File is not text, like a gif. 160 pass 161 162 if found_words: 163 with open(ctx.failure_summary_log, 'w') as outs: 164 for i, (path, matches) in enumerate(found_words.items()): 165 if i: 166 print('=' * 40, file=outs) 167 print(path, file=outs) 168 for match in matches: 169 print(match, file=outs) 170 171 print(ctx.failure_summary_log.read_text(), end=None) 172 173 print() 174 print( 175 """ 176Individual lines can be ignored with "inclusive-language: ignore". Blocks can be 177ignored with "inclusive-language: disable" and reenabled with 178"inclusive-language: enable". 179""".strip() 180 ) 181 # Re-enable just in case: inclusive-language: enable. 182 183 raise presubmit_context.PresubmitFailure 184 185 186def inclusive_language_checker(*words): 187 """Create banned words checker for the given list of banned words.""" 188 189 regex = _process_inclusive_language(*words) 190 191 def inclusive_language( # pylint: disable=redefined-outer-name 192 ctx: presubmit_context.PresubmitContext, 193 ): 194 globals()['inclusive_language'](ctx, regex) 195 196 return inclusive_language 197