• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Uses bisection to determine which commit a bug was introduced and fixed.
15This module takes a high and a low commit SHA, a repo name, and a bug.
16The module bisects the high and low commit SHA searching for the location
17where the bug was introduced. It also looks for where the bug was fixed.
18This is done with the following steps:
19
20
21  NOTE: Needs to be run from root of the OSS-Fuzz source checkout.
22
23  Typical usage example:
24        python3 infra/bisector.py
25          --old_commit 1e403e9259a1abedf108ab86f711ba52c907226d
26          --new_commit f79be4f2330f4b89ea2f42e1c44ca998c59a0c0f
27          --fuzz_target rules_fuzzer
28          --project_name yara
29          --testcase infra/yara_testcase
30          --sanitizer address
31"""
32
33import argparse
34import collections
35import logging
36import os
37import sys
38import tempfile
39
40import build_specified_commit
41import helper
42import repo_manager
43import utils
44
45Result = collections.namedtuple('Result', ['repo_url', 'commit'])
46
47START_MARKERS = [
48    '==ERROR',
49    '==WARNING',
50]
51
52END_MARKERS = [
53    'SUMMARY:',
54]
55
56DEDUP_TOKEN_MARKER = 'DEDUP_TOKEN:'
57
58
59class BisectError(Exception):
60  """Bisection error."""
61
62  def __init__(self, message, repo_url):
63    super().__init__(message)
64    self.repo_url = repo_url
65
66
67def main():
68  """Finds the commit SHA where an error was initally introduced."""
69  logging.getLogger().setLevel(logging.INFO)
70  utils.chdir_to_root()
71  parser = argparse.ArgumentParser(
72      description='git bisection for finding introduction of bugs')
73
74  parser.add_argument('--project_name',
75                      help='The name of the project where the bug occurred.',
76                      required=True)
77  parser.add_argument('--new_commit',
78                      help='The newest commit SHA to be bisected.',
79                      required=True)
80  parser.add_argument('--old_commit',
81                      help='The oldest commit SHA to be bisected.',
82                      required=True)
83  parser.add_argument('--fuzz_target',
84                      help='The name of the fuzzer to be built.',
85                      required=True)
86  parser.add_argument('--test_case_path',
87                      help='The path to test case.',
88                      required=True)
89  parser.add_argument('--engine',
90                      help='The default is "libfuzzer".',
91                      default='libfuzzer')
92  parser.add_argument('--sanitizer',
93                      default='address',
94                      help='The default is "address".')
95  parser.add_argument('--type',
96                      choices=['regressed', 'fixed'],
97                      help='The bisection type.',
98                      required=True)
99  parser.add_argument('--architecture', default='x86_64')
100  args = parser.parse_args()
101
102  build_data = build_specified_commit.BuildData(project_name=args.project_name,
103                                                engine=args.engine,
104                                                sanitizer=args.sanitizer,
105                                                architecture=args.architecture)
106
107  result = bisect(args.type, args.old_commit, args.new_commit,
108                  args.test_case_path, args.fuzz_target, build_data)
109  if not result.commit:
110    logging.error('No error was found in commit range %s:%s', args.old_commit,
111                  args.new_commit)
112    return 1
113  if result.commit == args.old_commit:
114    logging.error(
115        'Bisection Error: Both the first and the last commits in'
116        'the given range have the same behavior, bisection is not possible. ')
117    return 1
118  if args.type == 'regressed':
119    print('Error was introduced at commit %s' % result.commit)
120  elif args.type == 'fixed':
121    print('Error was fixed at commit %s' % result.commit)
122  return 0
123
124
125def _get_dedup_token(output):
126  """Get dedup token."""
127  for line in output.splitlines():
128    token_location = line.find(DEDUP_TOKEN_MARKER)
129    if token_location == -1:
130      continue
131
132    return line[token_location + len(DEDUP_TOKEN_MARKER):].strip()
133
134  return None
135
136
137def _check_for_crash(project_name, fuzz_target, testcase_path):
138  """Check for crash."""
139
140  def docker_run(args):
141    command = ['docker', 'run', '--rm', '--privileged']
142    if sys.stdin.isatty():
143      command.append('-i')
144
145    return utils.execute(command + args)
146
147  logging.info('Checking for crash')
148  out, err, return_code = helper.reproduce_impl(
149      project=helper.Project(project_name),
150      fuzzer_name=fuzz_target,
151      valgrind=False,
152      env_to_add=[],
153      fuzzer_args=[],
154      testcase_path=testcase_path,
155      run_function=docker_run,
156      err_result=(None, None, None))
157  if return_code is None:
158    return None
159
160  logging.info('stdout =\n%s', out)
161  logging.info('stderr =\n%s', err)
162
163  # pylint: disable=unsupported-membership-test
164  has_start_marker = any(
165      marker in out or marker in err for marker in START_MARKERS)
166  has_end_marker = any(marker in out or marker in err for marker in END_MARKERS)
167  if not has_start_marker or not has_end_marker:
168    return None
169
170  return _get_dedup_token(out + err)
171
172
173# pylint: disable=too-many-locals
174# pylint: disable=too-many-arguments
175# pylint: disable=too-many-statements
176def _bisect(bisect_type, old_commit, new_commit, testcase_path, fuzz_target,
177            build_data):
178  """Perform the bisect."""
179  # pylint: disable=too-many-branches
180  base_builder_repo = build_specified_commit.load_base_builder_repo()
181
182  with tempfile.TemporaryDirectory() as tmp_dir:
183    repo_url, repo_path = build_specified_commit.detect_main_repo(
184        build_data.project_name, commit=new_commit)
185    if not repo_url or not repo_path:
186      raise ValueError('Main git repo can not be determined.')
187
188    if old_commit == new_commit:
189      raise BisectError('old_commit is the same as new_commit', repo_url)
190
191    # Copy /src from the built Docker container to ensure all dependencies
192    # exist. This will be mounted when running them.
193    host_src_dir = build_specified_commit.copy_src_from_docker(
194        build_data.project_name, tmp_dir)
195
196    bisect_repo_manager = repo_manager.RepoManager(
197        os.path.join(host_src_dir, os.path.basename(repo_path)))
198    bisect_repo_manager.fetch_all_remotes()
199
200    commit_list = bisect_repo_manager.get_commit_list(new_commit, old_commit)
201
202    old_idx = len(commit_list) - 1
203    new_idx = 0
204    logging.info('Testing against new_commit (%s)', commit_list[new_idx])
205    if not build_specified_commit.build_fuzzers_from_commit(
206        commit_list[new_idx],
207        bisect_repo_manager,
208        host_src_dir,
209        build_data,
210        base_builder_repo=base_builder_repo):
211      raise BisectError('Failed to build new_commit', repo_url)
212
213    if bisect_type == 'fixed':
214      should_crash = False
215    elif bisect_type == 'regressed':
216      should_crash = True
217    else:
218      raise BisectError('Invalid bisect type ' + bisect_type, repo_url)
219
220    expected_error = _check_for_crash(build_data.project_name, fuzz_target,
221                                      testcase_path)
222    logging.info('new_commit result = %s', expected_error)
223
224    if not should_crash and expected_error:
225      logging.warning('new_commit crashed but not shouldn\'t. '
226                      'Continuing to see if stack changes.')
227
228    range_valid = False
229    for _ in range(2):
230      logging.info('Testing against old_commit (%s)', commit_list[old_idx])
231      if not build_specified_commit.build_fuzzers_from_commit(
232          commit_list[old_idx],
233          bisect_repo_manager,
234          host_src_dir,
235          build_data,
236          base_builder_repo=base_builder_repo):
237        raise BisectError('Failed to build old_commit', repo_url)
238
239      if _check_for_crash(build_data.project_name, fuzz_target,
240                          testcase_path) == expected_error:
241        logging.warning('old_commit %s had same result as new_commit %s',
242                        old_commit, new_commit)
243        # Try again on an slightly older commit.
244        old_commit = bisect_repo_manager.get_parent(old_commit, 64)
245        if not old_commit:
246          break
247
248        commit_list = bisect_repo_manager.get_commit_list(
249            new_commit, old_commit)
250        old_idx = len(commit_list) - 1
251        continue
252
253      range_valid = True
254      break
255
256    if not range_valid:
257      raise BisectError('old_commit had same result as new_commit', repo_url)
258
259    while old_idx - new_idx > 1:
260      curr_idx = (old_idx + new_idx) // 2
261      logging.info('Testing against %s (idx=%d)', commit_list[curr_idx],
262                   curr_idx)
263      if not build_specified_commit.build_fuzzers_from_commit(
264          commit_list[curr_idx],
265          bisect_repo_manager,
266          host_src_dir,
267          build_data,
268          base_builder_repo=base_builder_repo):
269        # Treat build failures as if we couldn't repo.
270        # TODO(ochang): retry nearby commits?
271        old_idx = curr_idx
272        continue
273
274      current_error = _check_for_crash(build_data.project_name, fuzz_target,
275                                       testcase_path)
276      logging.info('Current result = %s', current_error)
277      if expected_error == current_error:
278        new_idx = curr_idx
279      else:
280        old_idx = curr_idx
281    return Result(repo_url, commit_list[new_idx])
282
283
284# pylint: disable=too-many-locals
285# pylint: disable=too-many-arguments
286def bisect(bisect_type, old_commit, new_commit, testcase_path, fuzz_target,
287           build_data):
288  """From a commit range, this function caluclates which introduced a
289  specific error from a fuzz testcase_path.
290
291  Args:
292    bisect_type: The type of the bisect ('regressed' or 'fixed').
293    old_commit: The oldest commit in the error regression range.
294    new_commit: The newest commit in the error regression range.
295    testcase_path: The file path of the test case that triggers the error
296    fuzz_target: The name of the fuzzer to be tested.
297    build_data: a class holding all of the input parameters for bisection.
298
299  Returns:
300    The commit SHA that introduced the error or None.
301
302  Raises:
303    ValueError: when a repo url can't be determine from the project.
304  """
305  try:
306    return _bisect(bisect_type, old_commit, new_commit, testcase_path,
307                   fuzz_target, build_data)
308  finally:
309    # Clean up projects/ as _bisect may have modified it.
310    oss_fuzz_repo_manager = repo_manager.RepoManager(helper.OSS_FUZZ_DIR)
311    oss_fuzz_repo_manager.git(['reset', 'projects'])
312    oss_fuzz_repo_manager.git(['checkout', 'projects'])
313    oss_fuzz_repo_manager.git(['clean', '-fxd', 'projects'])
314
315
316if __name__ == '__main__':
317  main()
318