1#!/usr/bin/env python3 2# Copyright 2020 Google LLC 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16################################################################################ 17"""Checks code for common issues before submitting.""" 18 19import argparse 20import os 21import subprocess 22import sys 23import unittest 24import yaml 25 26import constants 27 28_SRC_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 29 30 31def _is_project_file(actual_path, expected_filename): 32 """Returns True if actual_path's name is |expected_filename| and is a file 33 that exists and is in in projects/.""" 34 if os.path.basename(actual_path) != expected_filename: 35 return False 36 37 if os.path.basename(os.path.dirname( 38 os.path.dirname(actual_path))) != 'projects': 39 return False 40 41 return os.path.exists(actual_path) 42 43 44# TODO: Check for -fsanitize=fuzzer in files as well. 45 46 47def _check_one_lib_fuzzing_engine(build_sh_file): 48 """Returns False if |build_sh_file| contains -lFuzzingEngine. 49 This is deprecated behavior. $LIB_FUZZING_ENGINE should be used instead 50 so that -fsanitize=fuzzer is used.""" 51 if not _is_project_file(build_sh_file, 'build.sh'): 52 return True 53 54 with open(build_sh_file) as build_sh: 55 build_sh_lines = build_sh.readlines() 56 for line_num, line in enumerate(build_sh_lines): 57 uncommented_code = line.split('#')[0] 58 if '-lFuzzingEngine' in uncommented_code: 59 print( 60 'Error: build.sh contains deprecated "-lFuzzingEngine" on line: {0}. ' 61 'Please use "$LIB_FUZZING_ENGINE" instead.'.format(line_num)) 62 return False 63 return True 64 65 66def check_lib_fuzzing_engine(paths): 67 """Calls _check_one_lib_fuzzing_engine on each path in |paths|. Returns True 68 if the result of every call is True.""" 69 return all([_check_one_lib_fuzzing_engine(path) for path in paths]) 70 71 72class ProjectYamlChecker: 73 """Checks for a project.yaml file.""" 74 75 # Sections in a project.yaml and the constant values that they are allowed 76 # to have. 77 SECTIONS_AND_CONSTANTS = { 78 'sanitizers': constants.SANITIZERS, 79 'architectures': constants.ARCHITECTURES, 80 'fuzzing_engines': constants.ENGINES, 81 } 82 83 # Note: this list must be updated when we allow new sections. 84 VALID_SECTION_NAMES = [ 85 'architectures', 86 'auto_ccs', 87 'blackbox', 88 'builds_per_day', 89 'coverage_extra_args', 90 'disabled', 91 'fuzzing_engines', 92 'help_url', 93 'homepage', 94 'language', 95 'labels', # For internal use only, hard to lint as it uses fuzzer names. 96 'main_repo', 97 'primary_contact', 98 'run_tests', 99 'sanitizers', 100 'selective_unpack', 101 'vendor_ccs', 102 'view_restrictions', 103 ] 104 105 # Note that some projects like boost only have auto-ccs. However, forgetting 106 # primary contact is probably a mistake. 107 REQUIRED_SECTIONS = ['primary_contact', 'main_repo'] 108 109 def __init__(self, filename): 110 self.filename = filename 111 with open(filename) as file_handle: 112 self.data = yaml.safe_load(file_handle) 113 114 self.success = True 115 116 def do_checks(self): 117 """Does all project.yaml checks. Returns True if they pass.""" 118 if self.is_disabled(): 119 return True 120 121 checks = [ 122 self.check_project_yaml_constants, 123 self.check_required_sections, 124 self.check_valid_section_names, 125 self.check_valid_emails, 126 self.check_valid_language, 127 self.check_dataflow, 128 ] 129 for check_function in checks: 130 check_function() 131 return self.success 132 133 def is_disabled(self): 134 """Returns True if this project is disabled.""" 135 return self.data.get('disabled', False) 136 137 def error(self, message): 138 """Prints an error message and sets self.success to False.""" 139 self.success = False 140 print('Error in {filename}: {message}'.format(filename=self.filename, 141 message=message)) 142 143 def check_dataflow(self): 144 """Checks that if "dataflow" is specified in "fuzzing_engines", it is also 145 specified in "sanitizers", and that if specified in "sanitizers", it is also 146 specified in "fuzzing_engines". Returns True if this condition is met.""" 147 engines = self.data.get('fuzzing_engines', []) 148 dfsan_engines = 'dataflow' in engines 149 sanitizers = self.data.get('sanitizers', []) 150 dfsan_sanitizers = 'dataflow' in sanitizers 151 152 if dfsan_engines and not dfsan_sanitizers: 153 self.error('"dataflow" only specified in "fuzzing_engines" must also be ' 154 'specified in "sanitizers" or in neither.') 155 return 156 157 if dfsan_sanitizers and not dfsan_engines: 158 self.error('"dataflow" only specified in "sanitizers" must also be ' 159 'specified in "fuzzing_engines" or in neither.') 160 return 161 162 def check_project_yaml_constants(self): 163 """Returns True if certain sections only have certain constant values.""" 164 for section, allowed_constants in self.SECTIONS_AND_CONSTANTS.items(): 165 if section not in self.data: 166 continue 167 actual_constants = self.data[section] 168 for constant in actual_constants: 169 if isinstance(constant, str): 170 if constant not in allowed_constants: 171 self.error(('{constant} (in {section} section) is not a valid ' 172 'constant ({allowed_constants}).').format( 173 constant=constant, 174 section=section, 175 allowed_constants=', '.join(allowed_constants))) 176 elif isinstance(constant, dict): 177 # The only alternative value allowed is the experimental flag, i.e. 178 # `constant == {'memory': {'experimental': True}}`. Do not check the 179 # experimental flag, but assert that the sanitizer is a valid one. 180 if (len(constant.keys()) > 1 or 181 list(constant.keys())[0] not in allowed_constants): 182 self.error('Not allowed value in the project.yaml: ' + 183 str(constant)) 184 else: 185 self.error('Not allowed value in the project.yaml: ' + str(constant)) 186 187 def check_valid_section_names(self): 188 """Returns True if all section names are valid.""" 189 for name in self.data: 190 if name not in self.VALID_SECTION_NAMES: 191 self.error('{name} is not a valid section name ({valid_names})'.format( 192 name=name, valid_names=self.VALID_SECTION_NAMES)) 193 194 def check_required_sections(self): 195 """Returns True if all required sections are in |self.data|.""" 196 for section in self.REQUIRED_SECTIONS: 197 if section not in self.data: 198 self.error(section + ' section is missing.') 199 200 def check_valid_emails(self): 201 """Returns True if emails are valid looking..""" 202 # Get email addresses. 203 email_addresses = [] 204 primary_contact = self.data.get('primary_contact') 205 if primary_contact: 206 email_addresses.append(primary_contact) 207 auto_ccs = self.data.get('auto_ccs') 208 if auto_ccs: 209 email_addresses.extend(auto_ccs) 210 211 # Check that email addresses seem normal. 212 for email_address in email_addresses: 213 if '@' not in email_address or '.' not in email_address: 214 self.error(email_address + ' is an invalid email address.') 215 216 def check_valid_language(self): 217 """Returns True if the language is specified and valid.""" 218 language = self.data.get('language') 219 if not language: 220 self.error('Missing "language" attribute in project.yaml.') 221 elif language not in constants.LANGUAGES: 222 self.error( 223 '"language: {language}" is not supported ({supported}).'.format( 224 language=language, supported=constants.LANGUAGES)) 225 226 227def _check_one_project_yaml(project_yaml_filename): 228 """Does checks on the project.yaml file. Returns True on success.""" 229 if not _is_project_file(project_yaml_filename, 'project.yaml'): 230 return True 231 232 checker = ProjectYamlChecker(project_yaml_filename) 233 return checker.do_checks() 234 235 236def check_project_yaml(paths): 237 """Calls _check_one_project_yaml on each path in |paths|. Returns True if the 238 result of every call is True.""" 239 return all([_check_one_project_yaml(path) for path in paths]) 240 241 242def do_checks(changed_files): 243 """Runs all presubmit checks. Returns False if any fails.""" 244 checks = [ 245 check_license, yapf, lint, check_project_yaml, check_lib_fuzzing_engine 246 ] 247 # Use a list comprehension here and in other cases where we use all() so that 248 # we don't quit early on failure. This is more user-friendly since the more 249 # errors we spit out at once, the less frequently the less check-fix-check 250 # cycles they need to do. 251 return all([check(changed_files) for check in checks]) 252 253 254_CHECK_LICENSE_FILENAMES = ['Dockerfile'] 255_CHECK_LICENSE_EXTENSIONS = [ 256 '.bash', 257 '.c', 258 '.cc', 259 '.cpp', 260 '.css', 261 '.Dockerfile', 262 '.h', 263 '.htm', 264 '.html', 265 '.js', 266 '.proto', 267 '.py', 268 '.sh', 269] 270THIRD_PARTY_DIR_NAME = 'third_party' 271 272_LICENSE_STRING = 'http://www.apache.org/licenses/LICENSE-2.0' 273 274 275def check_license(paths): 276 """Validates license header.""" 277 if not paths: 278 return True 279 280 success = True 281 for path in paths: 282 path_parts = str(path).split(os.sep) 283 if any(path_part == THIRD_PARTY_DIR_NAME for path_part in path_parts): 284 continue 285 filename = os.path.basename(path) 286 extension = os.path.splitext(path)[1] 287 if (filename not in _CHECK_LICENSE_FILENAMES and 288 extension not in _CHECK_LICENSE_EXTENSIONS): 289 continue 290 291 with open(path) as file_handle: 292 if _LICENSE_STRING not in file_handle.read(): 293 print('Missing license header in file %s.' % str(path)) 294 success = False 295 296 return success 297 298 299def bool_to_returncode(success): 300 """Returns 0 if |success|. Otherwise returns 1.""" 301 if success: 302 print('Success.') 303 return 0 304 305 print('Failed.') 306 return 1 307 308 309def is_nonfuzzer_python(path): 310 """Returns True if |path| ends in .py.""" 311 return os.path.splitext(path)[1] == '.py' and '/projects/' not in path 312 313 314def lint(_=None): 315 """Runs python's linter on infra. Returns False if it fails linting.""" 316 317 command = ['python3', '-m', 'pylint', '-j', '0', 'infra'] 318 returncode = subprocess.run(command, check=False).returncode 319 return returncode == 0 320 321 322def yapf(paths, validate=True): 323 """Does yapf on |path| if it is Python file. Only validates format if 324 |validate|. Otherwise, formats the file. Returns False if validation or 325 formatting fails.""" 326 paths = [path for path in paths if is_nonfuzzer_python(path)] 327 if not paths: 328 return True 329 330 validate_argument = '-d' if validate else '-i' 331 command = ['yapf', validate_argument, '-p'] 332 command.extend(paths) 333 334 returncode = subprocess.run(command, check=False).returncode 335 return returncode == 0 336 337 338def get_changed_files(): 339 """Returns a list of absolute paths of files changed in this git branch.""" 340 branch_commit_hash = subprocess.check_output( 341 ['git', 'merge-base', 'HEAD', 'origin/HEAD']).strip().decode() 342 343 diff_commands = [ 344 # Return list of modified files in the commits on this branch. 345 ['git', 'diff', '--name-only', branch_commit_hash + '..'], 346 # Return list of modified files from uncommitted changes. 347 ['git', 'diff', '--name-only'] 348 ] 349 350 changed_files = set() 351 for command in diff_commands: 352 file_paths = subprocess.check_output(command).decode().splitlines() 353 for file_path in file_paths: 354 if not os.path.isfile(file_path): 355 continue 356 changed_files.add(file_path) 357 print('Changed files: {changed_files}'.format( 358 changed_files=' '.join(changed_files))) 359 return [os.path.abspath(f) for f in changed_files] 360 361 362def run_build_tests(): 363 """Runs build tests because they can't be run in parallel.""" 364 suite_list = [ 365 unittest.TestLoader().discover(os.path.join(_SRC_ROOT, 'infra', 'build'), 366 pattern='*_test.py'), 367 ] 368 suite = unittest.TestSuite(suite_list) 369 print('Running build tests.') 370 result = unittest.TextTestRunner().run(suite) 371 return not result.failures and not result.errors 372 373 374def run_nonbuild_tests(parallel): 375 """Runs all tests but build tests. Does them in parallel if |parallel|. The 376 reason why we exclude build tests is because they use an emulator that 377 prevents them from being used in parallel.""" 378 # We look for all project directories because otherwise pytest won't run tests 379 # that are not in valid modules (e.g. "base-images"). 380 relevant_dirs = set() 381 all_files = get_all_files() 382 for file_path in all_files: 383 directory = os.path.dirname(file_path) 384 relevant_dirs.add(directory) 385 386 # Use ignore-glob because ignore doesn't seem to work properly with the way we 387 # pass directories to pytest. 388 command = [ 389 'pytest', 390 '--ignore-glob=infra/build/*', 391 ] 392 if parallel: 393 command.extend(['-n', 'auto']) 394 command += list(relevant_dirs) 395 print('Running non-build tests.') 396 397 # TODO(metzman): Get rid of this once config_utils stops using it. 398 env = os.environ.copy() 399 env['CIFUZZ_TEST'] = '1' 400 401 return subprocess.run(command, check=False, env=env).returncode == 0 402 403 404def run_tests(_=None, parallel=False, build_tests=True, nonbuild_tests=True): 405 """Runs all unit tests.""" 406 build_success = True 407 nonbuild_success = True 408 if nonbuild_tests: 409 nonbuild_success = run_nonbuild_tests(parallel) 410 else: 411 print('Skipping nonbuild tests as specified.') 412 413 if build_tests: 414 build_success = run_build_tests() 415 else: 416 print('Skipping build tests as specified.') 417 418 return nonbuild_success and build_success 419 420 421def get_all_files(): 422 """Returns a list of absolute paths of files in this repo.""" 423 get_all_files_command = ['git', 'ls-files'] 424 output = subprocess.check_output(get_all_files_command).decode().splitlines() 425 return [os.path.abspath(path) for path in output if os.path.isfile(path)] 426 427 428def main(): 429 """Check changes on a branch for common issues before submitting.""" 430 # Get program arguments. 431 parser = argparse.ArgumentParser(description='Presubmit script for oss-fuzz.') 432 parser.add_argument('command', 433 choices=['format', 'lint', 'license', 'infra-tests'], 434 nargs='?') 435 parser.add_argument('-a', 436 '--all-files', 437 action='store_true', 438 help='Run presubmit check(s) on all files', 439 default=False) 440 parser.add_argument('-p', 441 '--parallel', 442 action='store_true', 443 help='Run tests in parallel.', 444 default=False) 445 parser.add_argument('-s', 446 '--skip-build-tests', 447 action='store_true', 448 help='Skip build tests which are slow and must run ' 449 'sequentially.', 450 default=False) 451 parser.add_argument('-n', 452 '--skip-nonbuild-tests', 453 action='store_true', 454 help='Only do build tests.', 455 default=False) 456 args = parser.parse_args() 457 458 if args.all_files: 459 relevant_files = get_all_files() 460 else: 461 relevant_files = get_changed_files() 462 463 os.chdir(_SRC_ROOT) 464 465 # Do one specific check if the user asked for it. 466 if args.command == 'format': 467 success = yapf(relevant_files, False) 468 return bool_to_returncode(success) 469 470 if args.command == 'lint': 471 success = lint() 472 return bool_to_returncode(success) 473 474 if args.command == 'license': 475 success = check_license(relevant_files) 476 return bool_to_returncode(success) 477 478 if args.command == 'infra-tests': 479 success = run_tests(relevant_files, 480 parallel=args.parallel, 481 build_tests=(not args.skip_build_tests), 482 nonbuild_tests=(not args.skip_nonbuild_tests)) 483 return bool_to_returncode(success) 484 485 # Do all the checks (but no tests). 486 success = do_checks(relevant_files) 487 488 return bool_to_returncode(success) 489 490 491if __name__ == '__main__': 492 sys.exit(main()) 493