1#!/usr/bin/env python3 2# Copyright 2020 Google LLC. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16################################################################################ 17"""Check code for common issues before submitting.""" 18 19import argparse 20import os 21import subprocess 22import sys 23import unittest 24import yaml 25 26_SRC_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 27 28 29def _is_project_file(actual_path, expected_filename): 30 """Returns True if actual_path's name is |expected_filename| and is a file 31 that exists and is in in projects/.""" 32 if os.path.basename(actual_path) != expected_filename: 33 return False 34 35 if os.path.basename(os.path.dirname( 36 os.path.dirname(actual_path))) != 'projects': 37 return False 38 39 return os.path.exists(actual_path) 40 41 42# TODO: Check for -fsanitize=fuzzer in files as well. 43 44 45def _check_one_lib_fuzzing_engine(build_sh_file): 46 """Returns False if |build_sh_file| contains -lFuzzingEngine. 47 This is deprecated behavior. $LIB_FUZZING_ENGINE should be used instead 48 so that -fsanitize=fuzzer is used.""" 49 if not _is_project_file(build_sh_file, 'build.sh'): 50 return True 51 52 with open(build_sh_file) as build_sh: 53 build_sh_lines = build_sh.readlines() 54 for line_num, line in enumerate(build_sh_lines): 55 uncommented_code = line.split('#')[0] 56 if '-lFuzzingEngine' in uncommented_code: 57 print( 58 'Error: build.sh contains deprecated "-lFuzzingEngine" on line: {0}. ' 59 'Please use "$LIB_FUZZING_ENGINE" instead.'.format(line_num)) 60 return False 61 return True 62 63 64def check_lib_fuzzing_engine(paths): 65 """Call _check_one_lib_fuzzing_engine on each path in |paths|. Return True if 66 the result of every call is True.""" 67 return all([_check_one_lib_fuzzing_engine(path) for path in paths]) 68 69 70class ProjectYamlChecker: 71 """Checks for a project.yaml file.""" 72 73 # Sections in a project.yaml and the constant values that they are allowed 74 # to have. 75 SECTIONS_AND_CONSTANTS = { 76 'sanitizers': {'address', 'none', 'memory', 'undefined', 'dataflow'}, 77 'architectures': {'i386', 'x86_64'}, 78 'fuzzing_engines': {'afl', 'libfuzzer', 'honggfuzz', 'dataflow', 'none'}, 79 } 80 81 # Note: this list must be updated when we allow new sections. 82 VALID_SECTION_NAMES = [ 83 'architectures', 84 'auto_ccs', 85 'blackbox', 86 'builds_per_day', 87 'coverage_extra_args', 88 'disabled', 89 'fuzzing_engines', 90 'help_url', 91 'homepage', 92 'language', 93 'labels', # For internal use only, hard to lint as it uses fuzzer names. 94 'main_repo', 95 'primary_contact', 96 'run_tests', 97 'sanitizers', 98 'selective_unpack', 99 'vendor_ccs', 100 'view_restrictions', 101 ] 102 103 LANGUAGES_SUPPORTED = [ 104 'c', 105 'c++', 106 'go', 107 'jvm', 108 'python', 109 'rust', 110 ] 111 112 # Note that some projects like boost only have auto-ccs. However, forgetting 113 # primary contact is probably a mistake. 114 REQUIRED_SECTIONS = ['primary_contact', 'main_repo'] 115 116 def __init__(self, filename): 117 self.filename = filename 118 with open(filename) as file_handle: 119 self.data = yaml.safe_load(file_handle) 120 121 self.success = True 122 123 def do_checks(self): 124 """Do all project.yaml checks. Return True if they pass.""" 125 if self.is_disabled(): 126 return True 127 128 checks = [ 129 self.check_project_yaml_constants, 130 self.check_required_sections, 131 self.check_valid_section_names, 132 self.check_valid_emails, 133 self.check_valid_language, 134 ] 135 for check_function in checks: 136 check_function() 137 return self.success 138 139 def is_disabled(self): 140 """Is this project disabled.""" 141 return self.data.get('disabled', False) 142 143 def error(self, message): 144 """Print an error message and set self.success to False.""" 145 self.success = False 146 print('Error in {filename}: {message}'.format(filename=self.filename, 147 message=message)) 148 149 def check_project_yaml_constants(self): 150 """Check that certain sections only have certain constant values.""" 151 for section, allowed_constants in self.SECTIONS_AND_CONSTANTS.items(): 152 if section not in self.data: 153 continue 154 actual_constants = self.data[section] 155 for constant in actual_constants: 156 if isinstance(constant, str): 157 if constant not in allowed_constants: 158 self.error(('{constant} (in {section} section) is not a valid ' 159 'constant ({allowed_constants}).').format( 160 constant=constant, 161 section=section, 162 allowed_constants=', '.join(allowed_constants))) 163 elif isinstance(constant, dict): 164 # The only alternative value allowed is the experimental flag, i.e. 165 # `constant == {'memory': {'experimental': True}}`. Do not check the 166 # experimental flag, but assert that the sanitizer is a valid one. 167 if (len(constant.keys()) > 1 or 168 list(constant.keys())[0] not in allowed_constants): 169 self.error('Not allowed value in the project.yaml: ' + 170 str(constant)) 171 else: 172 self.error('Not allowed value in the project.yaml: ' + str(constant)) 173 174 def check_valid_section_names(self): 175 """Check that only valid sections are included.""" 176 for name in self.data: 177 if name not in self.VALID_SECTION_NAMES: 178 self.error('{name} is not a valid section name ({valid_names})'.format( 179 name=name, valid_names=self.VALID_SECTION_NAMES)) 180 181 def check_required_sections(self): 182 """Check that all required sections are present.""" 183 for section in self.REQUIRED_SECTIONS: 184 if section not in self.data: 185 self.error(section + ' section is missing.') 186 187 def check_valid_emails(self): 188 """Check that emails are valid looking.""" 189 # Get email addresses. 190 email_addresses = [] 191 primary_contact = self.data.get('primary_contact') 192 if primary_contact: 193 email_addresses.append(primary_contact) 194 auto_ccs = self.data.get('auto_ccs') 195 if auto_ccs: 196 email_addresses.extend(auto_ccs) 197 198 # Check that email addresses seem normal. 199 for email_address in email_addresses: 200 if '@' not in email_address or '.' not in email_address: 201 self.error(email_address + ' is an invalid email address.') 202 203 def check_valid_language(self): 204 """Check that the language is specified and valid.""" 205 language = self.data.get('language') 206 if not language: 207 self.error('Missing "language" attribute in project.yaml.') 208 elif language not in self.LANGUAGES_SUPPORTED: 209 self.error( 210 '"language: {language}" is not supported ({supported}).'.format( 211 language=language, supported=self.LANGUAGES_SUPPORTED)) 212 213 214def _check_one_project_yaml(project_yaml_filename): 215 """Do checks on the project.yaml file.""" 216 if not _is_project_file(project_yaml_filename, 'project.yaml'): 217 return True 218 219 checker = ProjectYamlChecker(project_yaml_filename) 220 return checker.do_checks() 221 222 223def check_project_yaml(paths): 224 """Call _check_one_project_yaml on each path in |paths|. Return True if 225 the result of every call is True.""" 226 return all([_check_one_project_yaml(path) for path in paths]) 227 228 229def do_checks(changed_files): 230 """Run all presubmit checks return False if any fails.""" 231 checks = [ 232 check_license, yapf, lint, check_project_yaml, check_lib_fuzzing_engine 233 ] 234 # Use a list comprehension here and in other cases where we use all() so that 235 # we don't quit early on failure. This is more user-friendly since the more 236 # errors we spit out at once, the less frequently the less check-fix-check 237 # cycles they need to do. 238 return all([check(changed_files) for check in checks]) 239 240 241_CHECK_LICENSE_FILENAMES = ['Dockerfile'] 242_CHECK_LICENSE_EXTENSIONS = [ 243 '.bash', 244 '.c', 245 '.cc', 246 '.cpp', 247 '.css', 248 '.h', 249 '.htm', 250 '.html', 251 '.js', 252 '.proto', 253 '.py', 254 '.sh', 255] 256 257_LICENSE_STRING = 'http://www.apache.org/licenses/LICENSE-2.0' 258 259 260def check_license(paths): 261 """Validate license header.""" 262 if not paths: 263 return True 264 265 success = True 266 for path in paths: 267 filename = os.path.basename(path) 268 extension = os.path.splitext(path)[1] 269 if (filename not in _CHECK_LICENSE_FILENAMES and 270 extension not in _CHECK_LICENSE_EXTENSIONS): 271 continue 272 273 with open(path) as file_handle: 274 if _LICENSE_STRING not in file_handle.read(): 275 print('Missing license header in file %s.' % str(path)) 276 success = False 277 278 return success 279 280 281def bool_to_returncode(success): 282 """Return 0 if |success|. Otherwise return 1.""" 283 if success: 284 print('Success.') 285 return 0 286 287 print('Failed.') 288 return 1 289 290 291def is_nonfuzzer_python(path): 292 """Returns True if |path| ends in .py.""" 293 return os.path.splitext(path)[1] == '.py' and '/projects/' not in path 294 295 296def lint(_=None): 297 """Run python's linter on infra. Return False if it fails linting.""" 298 299 command = ['python3', '-m', 'pylint', '-j', '0', 'infra'] 300 returncode = subprocess.run(command, check=False).returncode 301 return returncode == 0 302 303 304def yapf(paths, validate=True): 305 """Do yapf on |path| if it is Python file. Only validates format if 306 |validate| otherwise, formats the file. Returns False if validation 307 or formatting fails.""" 308 paths = [path for path in paths if is_nonfuzzer_python(path)] 309 if not paths: 310 return True 311 312 validate_argument = '-d' if validate else '-i' 313 command = ['yapf', validate_argument, '-p'] 314 command.extend(paths) 315 316 returncode = subprocess.run(command, check=False).returncode 317 return returncode == 0 318 319 320def get_changed_files(): 321 """Return a list of absolute paths of files changed in this git branch.""" 322 branch_commit_hash = subprocess.check_output( 323 ['git', 'merge-base', 'FETCH_HEAD', 'origin/HEAD']).strip().decode() 324 325 diff_commands = [ 326 # Return list of modified files in the commits on this branch. 327 ['git', 'diff', '--name-only', branch_commit_hash + '..'], 328 # Return list of modified files from uncommitted changes. 329 ['git', 'diff', '--name-only'] 330 ] 331 332 changed_files = set() 333 for command in diff_commands: 334 file_paths = subprocess.check_output(command).decode().splitlines() 335 for file_path in file_paths: 336 if not os.path.isfile(file_path): 337 continue 338 changed_files.add(file_path) 339 print('Changed files: {changed_files}'.format( 340 changed_files=' '.join(changed_files))) 341 return [os.path.abspath(f) for f in changed_files] 342 343 344def run_build_tests(): 345 """Runs build tests because they can't be run in parallel.""" 346 suite_list = [ 347 unittest.TestLoader().discover(os.path.join(_SRC_ROOT, 'infra', 'build'), 348 pattern='*_test.py'), 349 ] 350 suite = unittest.TestSuite(suite_list) 351 print('Running build tests.') 352 result = unittest.TextTestRunner().run(suite) 353 return not result.failures and not result.errors 354 355 356def run_nonbuild_tests(parallel): 357 """Run all tests but build tests. Do it in parallel if |parallel|. The reason 358 why we exclude build tests is because they use an emulator that prevents them 359 from being used in parallel.""" 360 # We look for all project directories because otherwise pytest won't run tests 361 # that are not in valid modules (e.g. "base-images"). 362 relevant_dirs = set() 363 all_files = get_all_files() 364 for file_path in all_files: 365 directory = os.path.dirname(file_path) 366 relevant_dirs.add(directory) 367 368 # Use ignore-glob because ignore doesn't seem to work properly with the way we 369 # pass directories to pytest. 370 command = [ 371 'pytest', 372 # Test errors with error: "ModuleNotFoundError: No module named 'apt'. 373 '--ignore-glob=infra/base-images/base-sanitizer-libs-builder/*', 374 '--ignore-glob=infra/build/*', 375 ] 376 if parallel: 377 command.extend(['-n', 'auto']) 378 command += list(relevant_dirs) 379 print('Running non-build tests.') 380 return subprocess.run(command, check=False).returncode == 0 381 382 383def run_tests(_=None, parallel=False): 384 """Runs all unit tests.""" 385 nonbuild_success = run_nonbuild_tests(parallel) 386 build_success = run_build_tests() 387 return nonbuild_success and build_success 388 389 390def get_all_files(): 391 """Returns a list of absolute paths of files in this repo.""" 392 get_all_files_command = ['git', 'ls-files'] 393 output = subprocess.check_output(get_all_files_command).decode().splitlines() 394 return [os.path.abspath(path) for path in output if os.path.isfile(path)] 395 396 397def main(): 398 """Check changes on a branch for common issues before submitting.""" 399 # Get program arguments. 400 parser = argparse.ArgumentParser(description='Presubmit script for oss-fuzz.') 401 parser.add_argument('command', 402 choices=['format', 'lint', 'license', 'infra-tests'], 403 nargs='?') 404 parser.add_argument('-a', 405 '--all-files', 406 action='store_true', 407 help='Run presubmit check(s) on all files', 408 default=False) 409 parser.add_argument('-p', 410 '--parallel', 411 action='store_true', 412 help='Run tests in parallel.', 413 default=False) 414 args = parser.parse_args() 415 416 if args.all_files: 417 relevant_files = get_all_files() 418 else: 419 relevant_files = get_changed_files() 420 421 os.chdir(_SRC_ROOT) 422 423 # Do one specific check if the user asked for it. 424 if args.command == 'format': 425 success = yapf(relevant_files, False) 426 return bool_to_returncode(success) 427 428 if args.command == 'lint': 429 success = lint() 430 return bool_to_returncode(success) 431 432 if args.command == 'license': 433 success = check_license(relevant_files) 434 return bool_to_returncode(success) 435 436 if args.command == 'infra-tests': 437 success = run_tests(relevant_files, parallel=args.parallel) 438 return bool_to_returncode(success) 439 440 # Do all the checks (but no tests). 441 success = do_checks(relevant_files) 442 443 return bool_to_returncode(success) 444 445 446if __name__ == '__main__': 447 sys.exit(main()) 448