1# Copyright 2021 The Pigweed Authors 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); you may not 4# use this file except in compliance with the License. You may obtain a copy of 5# the License at 6# 7# https://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 12# License for the specific language governing permissions and limitations under 13# the License. 14"""Inclusive language presubmit check.""" 15 16import dataclasses 17from pathlib import Path 18import re 19from typing import Dict, List, Union 20 21from . import presubmit 22 23# List borrowed from Android: 24# https://source.android.com/setup/contribute/respectful-code 25# inclusive-language: disable 26NON_INCLUSIVE_WORDS = [ 27 r'master', 28 r'slave', 29 r'red[-\s]?line', 30 r'(white|gr[ae]y|black)[-\s]*(list|hat)', 31 r'craz(y|ie)', 32 r'insane', 33 r'crip+led?', 34 r'sanity', 35 r'sane', 36 r'dummy', 37 r'grandfather', 38 r's?he', 39 r'his', 40 r'her', 41 r'm[ae]n[-\s]*in[-\s]*the[-\s]*middle', 42 r'mitm', 43 r'first[-\s]?class[-\s]?citizen', 44] 45# inclusive-language: enable 46 47# Test: master # inclusive-language: ignore 48# Test: master 49 50 51def _process_inclusive_language(*words): 52 """Turn word list into one big regex with common inflections.""" 53 54 if not words: 55 words = tuple(NON_INCLUSIVE_WORDS) 56 57 all_words = [] 58 for entry in words: 59 if isinstance(entry, str): 60 all_words.append(entry) 61 elif isinstance(entry, (list, tuple)): 62 all_words.extend(entry) 63 all_words.extend(x for x in words) 64 all_words = tuple(all_words) 65 66 # Confirm each individual word compiles as a valid regex. 67 for word in all_words: 68 _ = re.compile(word) 69 70 word_boundary = ( 71 r'(\b|_|(?<=[a-z])(?=[A-Z])|(?<=[0-9])(?=\w)|(?<=\w)(?=[0-9]))' 72 ) 73 74 return re.compile( 75 r"({b})(?i:{w})(e?[sd]{b}|{b})".format( 76 w='|'.join(all_words), b=word_boundary 77 ), 78 ) 79 80 81NON_INCLUSIVE_WORDS_REGEX = _process_inclusive_language() 82 83# If seen, ignore this line and the next. 84_IGNORE = 'inclusive-language: ignore' 85 86# Ignore a whole section. Please do not change the order of these lines. 87_DISABLE = 'inclusive-language: disable' 88_ENABLE = 'inclusive-language: enable' 89 90 91@dataclasses.dataclass 92class PathMatch: 93 word: str 94 95 def __repr__(self): 96 return f'Found non-inclusive word "{self.word}" in file path' 97 98 99@dataclasses.dataclass 100class LineMatch: 101 line: int 102 word: str 103 104 def __repr__(self): 105 return f'Found non-inclusive word "{self.word}" on line {self.line}' 106 107 108@presubmit.check(name='inclusive_language') 109def presubmit_check( 110 ctx: presubmit.PresubmitContext, 111 words_regex=NON_INCLUSIVE_WORDS_REGEX, 112): 113 """Presubmit check that ensures files do not contain banned words.""" 114 115 found_words: Dict[Path, List[Union[PathMatch, LineMatch]]] = {} 116 117 for path in ctx.paths: 118 match = words_regex.search(str(path.relative_to(ctx.root))) 119 if match: 120 found_words.setdefault(path, []) 121 found_words[path].append(PathMatch(match.group(0))) 122 123 if path.is_symlink() or path.is_dir(): 124 continue 125 126 try: 127 with open(path, 'r') as ins: 128 enabled = True 129 prev = '' 130 for i, line in enumerate(ins, start=1): 131 if _DISABLE in line: 132 enabled = False 133 if _ENABLE in line: 134 enabled = True 135 136 # If we see the ignore line on this or the previous line we 137 # ignore any bad words on this line. 138 ignored = _IGNORE in prev or _IGNORE in line 139 140 if enabled and not ignored: 141 match = words_regex.search(line) 142 143 if match: 144 found_words.setdefault(path, []) 145 found_words[path].append( 146 LineMatch(i, match.group(0)) 147 ) 148 149 # Not using 'continue' so this line always executes. 150 prev = line 151 152 except UnicodeDecodeError: 153 # File is not text, like a gif. 154 pass 155 156 if found_words: 157 with open(ctx.failure_summary_log, 'w') as outs: 158 for i, (path, matches) in enumerate(found_words.items()): 159 if i: 160 print('=' * 40, file=outs) 161 print(path, file=outs) 162 for match in matches: 163 print(match, file=outs) 164 165 print(ctx.failure_summary_log.read_text(), end=None) 166 167 print() 168 print( 169 """ 170Individual lines can be ignored with "inclusive-language: ignore". Blocks can be 171ignored with "inclusive-language: disable" and reenabled with 172"inclusive-language: enable". 173""".strip() 174 ) 175 # Re-enable just in case: inclusive-language: enable. 176 177 raise presubmit.PresubmitFailure 178 179 180def inclusive_language_checker(*words): 181 """Create banned words checker for the given list of banned words.""" 182 183 regex = _process_inclusive_language(*words) 184 185 def inclusive_language( # pylint: disable=redefined-outer-name 186 ctx: presubmit.PresubmitContext, 187 ): 188 globals()['inclusive_language'](ctx, regex) 189 190 return inclusive_language 191