• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2020 Google LLC
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#        http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16################################################################################
17"""Checks code for common issues before submitting."""
18
19import argparse
20import os
21import subprocess
22import sys
23import unittest
24import yaml
25
26import constants
27
28_SRC_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
29
30
31def _is_project_file(actual_path, expected_filename):
32  """Returns True if actual_path's name is |expected_filename| and is a file
33  that exists and is in in projects/."""
34  if os.path.basename(actual_path) != expected_filename:
35    return False
36
37  if os.path.basename(os.path.dirname(
38      os.path.dirname(actual_path))) != 'projects':
39    return False
40
41  return os.path.exists(actual_path)
42
43
44# TODO: Check for -fsanitize=fuzzer in files as well.
45
46
47def _check_one_lib_fuzzing_engine(build_sh_file):
48  """Returns False if |build_sh_file| contains -lFuzzingEngine.
49  This is deprecated behavior. $LIB_FUZZING_ENGINE should be used instead
50  so that -fsanitize=fuzzer is used."""
51  if not _is_project_file(build_sh_file, 'build.sh'):
52    return True
53
54  with open(build_sh_file) as build_sh:
55    build_sh_lines = build_sh.readlines()
56  for line_num, line in enumerate(build_sh_lines):
57    uncommented_code = line.split('#')[0]
58    if '-lFuzzingEngine' in uncommented_code:
59      print(
60          'Error: build.sh contains deprecated "-lFuzzingEngine" on line: {0}. '
61          'Please use "$LIB_FUZZING_ENGINE" instead.'.format(line_num))
62      return False
63  return True
64
65
66def check_lib_fuzzing_engine(paths):
67  """Calls _check_one_lib_fuzzing_engine on each path in |paths|. Returns True
68  if the result of every call is True."""
69  return all([_check_one_lib_fuzzing_engine(path) for path in paths])
70
71
72class ProjectYamlChecker:
73  """Checks for a project.yaml file."""
74
75  # Sections in a project.yaml and the constant values that they are allowed
76  # to have.
77  SECTIONS_AND_CONSTANTS = {
78      'sanitizers': constants.SANITIZERS,
79      'architectures': constants.ARCHITECTURES,
80      'fuzzing_engines': constants.ENGINES,
81  }
82
83  # Note: this list must be updated when we allow new sections.
84  VALID_SECTION_NAMES = [
85      'architectures',
86      'auto_ccs',
87      'blackbox',
88      'builds_per_day',
89      'coverage_extra_args',
90      'disabled',
91      'fuzzing_engines',
92      'help_url',
93      'homepage',
94      'language',
95      'labels',  # For internal use only, hard to lint as it uses fuzzer names.
96      'main_repo',
97      'primary_contact',
98      'run_tests',
99      'sanitizers',
100      'selective_unpack',
101      'vendor_ccs',
102      'view_restrictions',
103  ]
104
105  # Note that some projects like boost only have auto-ccs. However, forgetting
106  # primary contact is probably a mistake.
107  REQUIRED_SECTIONS = ['primary_contact', 'main_repo']
108
109  def __init__(self, filename):
110    self.filename = filename
111    with open(filename) as file_handle:
112      self.data = yaml.safe_load(file_handle)
113
114    self.success = True
115
116  def do_checks(self):
117    """Does all project.yaml checks. Returns True if they pass."""
118    if self.is_disabled():
119      return True
120
121    checks = [
122        self.check_project_yaml_constants,
123        self.check_required_sections,
124        self.check_valid_section_names,
125        self.check_valid_emails,
126        self.check_valid_language,
127        self.check_dataflow,
128    ]
129    for check_function in checks:
130      check_function()
131    return self.success
132
133  def is_disabled(self):
134    """Returns True if this project is disabled."""
135    return self.data.get('disabled', False)
136
137  def error(self, message):
138    """Prints an error message and sets self.success to False."""
139    self.success = False
140    print('Error in {filename}: {message}'.format(filename=self.filename,
141                                                  message=message))
142
143  def check_dataflow(self):
144    """Checks that if "dataflow" is specified in "fuzzing_engines", it is also
145    specified in "sanitizers", and that if specified in "sanitizers", it is also
146    specified in "fuzzing_engines". Returns True if this condition is met."""
147    engines = self.data.get('fuzzing_engines', [])
148    dfsan_engines = 'dataflow' in engines
149    sanitizers = self.data.get('sanitizers', [])
150    dfsan_sanitizers = 'dataflow' in sanitizers
151
152    if dfsan_engines and not dfsan_sanitizers:
153      self.error('"dataflow" only specified in "fuzzing_engines" must also be '
154                 'specified in "sanitizers" or in neither.')
155      return
156
157    if dfsan_sanitizers and not dfsan_engines:
158      self.error('"dataflow" only specified in "sanitizers" must also be '
159                 'specified in "fuzzing_engines" or in neither.')
160      return
161
162  def check_project_yaml_constants(self):
163    """Returns True if certain sections only have certain constant values."""
164    for section, allowed_constants in self.SECTIONS_AND_CONSTANTS.items():
165      if section not in self.data:
166        continue
167      actual_constants = self.data[section]
168      for constant in actual_constants:
169        if isinstance(constant, str):
170          if constant not in allowed_constants:
171            self.error(('{constant} (in {section} section) is not a valid '
172                        'constant ({allowed_constants}).').format(
173                            constant=constant,
174                            section=section,
175                            allowed_constants=', '.join(allowed_constants)))
176        elif isinstance(constant, dict):
177          # The only alternative value allowed is the experimental flag, i.e.
178          # `constant == {'memory': {'experimental': True}}`. Do not check the
179          # experimental flag, but assert that the sanitizer is a valid one.
180          if (len(constant.keys()) > 1 or
181              list(constant.keys())[0] not in allowed_constants):
182            self.error('Not allowed value in the project.yaml: ' +
183                       str(constant))
184        else:
185          self.error('Not allowed value in the project.yaml: ' + str(constant))
186
187  def check_valid_section_names(self):
188    """Returns True if all section names are valid."""
189    for name in self.data:
190      if name not in self.VALID_SECTION_NAMES:
191        self.error('{name} is not a valid section name ({valid_names})'.format(
192            name=name, valid_names=self.VALID_SECTION_NAMES))
193
194  def check_required_sections(self):
195    """Returns True if all required sections are in |self.data|."""
196    for section in self.REQUIRED_SECTIONS:
197      if section not in self.data:
198        self.error(section + ' section is missing.')
199
200  def check_valid_emails(self):
201    """Returns True if emails are valid looking.."""
202    # Get email addresses.
203    email_addresses = []
204    primary_contact = self.data.get('primary_contact')
205    if primary_contact:
206      email_addresses.append(primary_contact)
207    auto_ccs = self.data.get('auto_ccs')
208    if auto_ccs:
209      email_addresses.extend(auto_ccs)
210
211    # Check that email addresses seem normal.
212    for email_address in email_addresses:
213      if '@' not in email_address or '.' not in email_address:
214        self.error(email_address + ' is an invalid email address.')
215
216  def check_valid_language(self):
217    """Returns True if the language is specified and valid."""
218    language = self.data.get('language')
219    if not language:
220      self.error('Missing "language" attribute in project.yaml.')
221    elif language not in constants.LANGUAGES:
222      self.error(
223          '"language: {language}" is not supported ({supported}).'.format(
224              language=language, supported=constants.LANGUAGES))
225
226
227def _check_one_project_yaml(project_yaml_filename):
228  """Does checks on the project.yaml file. Returns True on success."""
229  if not _is_project_file(project_yaml_filename, 'project.yaml'):
230    return True
231
232  checker = ProjectYamlChecker(project_yaml_filename)
233  return checker.do_checks()
234
235
236def check_project_yaml(paths):
237  """Calls _check_one_project_yaml on each path in |paths|. Returns True if the
238  result of every call is True."""
239  return all([_check_one_project_yaml(path) for path in paths])
240
241
242def do_checks(changed_files):
243  """Runs all presubmit checks. Returns False if any fails."""
244  checks = [
245      check_license, yapf, lint, check_project_yaml, check_lib_fuzzing_engine
246  ]
247  # Use a list comprehension here and in other cases where we use all() so that
248  # we don't quit early on failure. This is more user-friendly since the more
249  # errors we spit out at once, the less frequently the less check-fix-check
250  # cycles they need to do.
251  return all([check(changed_files) for check in checks])
252
253
254_CHECK_LICENSE_FILENAMES = ['Dockerfile']
255_CHECK_LICENSE_EXTENSIONS = [
256    '.bash',
257    '.c',
258    '.cc',
259    '.cpp',
260    '.css',
261    '.Dockerfile',
262    '.h',
263    '.htm',
264    '.html',
265    '.js',
266    '.proto',
267    '.py',
268    '.sh',
269]
270THIRD_PARTY_DIR_NAME = 'third_party'
271
272_LICENSE_STRING = 'http://www.apache.org/licenses/LICENSE-2.0'
273
274
275def check_license(paths):
276  """Validates license header."""
277  if not paths:
278    return True
279
280  success = True
281  for path in paths:
282    path_parts = str(path).split(os.sep)
283    if any(path_part == THIRD_PARTY_DIR_NAME for path_part in path_parts):
284      continue
285    filename = os.path.basename(path)
286    extension = os.path.splitext(path)[1]
287    if (filename not in _CHECK_LICENSE_FILENAMES and
288        extension not in _CHECK_LICENSE_EXTENSIONS):
289      continue
290
291    with open(path) as file_handle:
292      if _LICENSE_STRING not in file_handle.read():
293        print('Missing license header in file %s.' % str(path))
294        success = False
295
296  return success
297
298
299def bool_to_returncode(success):
300  """Returns 0 if |success|. Otherwise returns 1."""
301  if success:
302    print('Success.')
303    return 0
304
305  print('Failed.')
306  return 1
307
308
309def is_nonfuzzer_python(path):
310  """Returns True if |path| ends in .py."""
311  return os.path.splitext(path)[1] == '.py' and '/projects/' not in path
312
313
314def lint(_=None):
315  """Runs python's linter on infra. Returns False if it fails linting."""
316
317  command = ['python3', '-m', 'pylint', '-j', '0', 'infra']
318  returncode = subprocess.run(command, check=False).returncode
319  return returncode == 0
320
321
322def yapf(paths, validate=True):
323  """Does yapf on |path| if it is Python file. Only validates format if
324  |validate|. Otherwise, formats the file. Returns False if validation or
325  formatting fails."""
326  paths = [path for path in paths if is_nonfuzzer_python(path)]
327  if not paths:
328    return True
329
330  validate_argument = '-d' if validate else '-i'
331  command = ['yapf', validate_argument, '-p']
332  command.extend(paths)
333
334  returncode = subprocess.run(command, check=False).returncode
335  return returncode == 0
336
337
338def get_changed_files():
339  """Returns a list of absolute paths of files changed in this git branch."""
340  branch_commit_hash = subprocess.check_output(
341      ['git', 'merge-base', 'HEAD', 'origin/HEAD']).strip().decode()
342
343  diff_commands = [
344      # Return list of modified files in the commits on this branch.
345      ['git', 'diff', '--name-only', branch_commit_hash + '..'],
346      # Return list of modified files from uncommitted changes.
347      ['git', 'diff', '--name-only']
348  ]
349
350  changed_files = set()
351  for command in diff_commands:
352    file_paths = subprocess.check_output(command).decode().splitlines()
353    for file_path in file_paths:
354      if not os.path.isfile(file_path):
355        continue
356      changed_files.add(file_path)
357  print('Changed files: {changed_files}'.format(
358      changed_files=' '.join(changed_files)))
359  return [os.path.abspath(f) for f in changed_files]
360
361
362def run_build_tests():
363  """Runs build tests because they can't be run in parallel."""
364  suite_list = [
365      unittest.TestLoader().discover(os.path.join(_SRC_ROOT, 'infra', 'build'),
366                                     pattern='*_test.py'),
367  ]
368  suite = unittest.TestSuite(suite_list)
369  print('Running build tests.')
370  result = unittest.TextTestRunner().run(suite)
371  return not result.failures and not result.errors
372
373
374def run_nonbuild_tests(parallel):
375  """Runs all tests but build tests. Does them in parallel if |parallel|. The
376  reason why we exclude build tests is because they use an emulator that
377  prevents them from being used in parallel."""
378  # We look for all project directories because otherwise pytest won't run tests
379  # that are not in valid modules (e.g. "base-images").
380  relevant_dirs = set()
381  all_files = get_all_files()
382  for file_path in all_files:
383    directory = os.path.dirname(file_path)
384    relevant_dirs.add(directory)
385
386  # Use ignore-glob because ignore doesn't seem to work properly with the way we
387  # pass directories to pytest.
388  command = [
389      'pytest',
390      '--ignore-glob=infra/build/*',
391  ]
392  if parallel:
393    command.extend(['-n', 'auto'])
394  command += list(relevant_dirs)
395  print('Running non-build tests.')
396
397  # TODO(metzman): Get rid of this once config_utils stops using it.
398  env = os.environ.copy()
399  env['CIFUZZ_TEST'] = '1'
400
401  return subprocess.run(command, check=False, env=env).returncode == 0
402
403
404def run_tests(_=None, parallel=False, build_tests=True, nonbuild_tests=True):
405  """Runs all unit tests."""
406  build_success = True
407  nonbuild_success = True
408  if nonbuild_tests:
409    nonbuild_success = run_nonbuild_tests(parallel)
410  else:
411    print('Skipping nonbuild tests as specified.')
412
413  if build_tests:
414    build_success = run_build_tests()
415  else:
416    print('Skipping build tests as specified.')
417
418  return nonbuild_success and build_success
419
420
421def get_all_files():
422  """Returns a list of absolute paths of files in this repo."""
423  get_all_files_command = ['git', 'ls-files']
424  output = subprocess.check_output(get_all_files_command).decode().splitlines()
425  return [os.path.abspath(path) for path in output if os.path.isfile(path)]
426
427
428def main():
429  """Check changes on a branch for common issues before submitting."""
430  # Get program arguments.
431  parser = argparse.ArgumentParser(description='Presubmit script for oss-fuzz.')
432  parser.add_argument('command',
433                      choices=['format', 'lint', 'license', 'infra-tests'],
434                      nargs='?')
435  parser.add_argument('-a',
436                      '--all-files',
437                      action='store_true',
438                      help='Run presubmit check(s) on all files',
439                      default=False)
440  parser.add_argument('-p',
441                      '--parallel',
442                      action='store_true',
443                      help='Run tests in parallel.',
444                      default=False)
445  parser.add_argument('-s',
446                      '--skip-build-tests',
447                      action='store_true',
448                      help='Skip build tests which are slow and must run '
449                      'sequentially.',
450                      default=False)
451  parser.add_argument('-n',
452                      '--skip-nonbuild-tests',
453                      action='store_true',
454                      help='Only do build tests.',
455                      default=False)
456  args = parser.parse_args()
457
458  if args.all_files:
459    relevant_files = get_all_files()
460  else:
461    relevant_files = get_changed_files()
462
463  os.chdir(_SRC_ROOT)
464
465  # Do one specific check if the user asked for it.
466  if args.command == 'format':
467    success = yapf(relevant_files, False)
468    return bool_to_returncode(success)
469
470  if args.command == 'lint':
471    success = lint()
472    return bool_to_returncode(success)
473
474  if args.command == 'license':
475    success = check_license(relevant_files)
476    return bool_to_returncode(success)
477
478  if args.command == 'infra-tests':
479    success = run_tests(relevant_files,
480                        parallel=args.parallel,
481                        build_tests=(not args.skip_build_tests),
482                        nonbuild_tests=(not args.skip_nonbuild_tests))
483    return bool_to_returncode(success)
484
485  # Do all the checks (but no tests).
486  success = do_checks(relevant_files)
487
488  return bool_to_returncode(success)
489
490
491if __name__ == '__main__':
492  sys.exit(main())
493