1# Copyright 2019 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Uses bisection to determine which commit a bug was introduced and fixed. 15This module takes a high and a low commit SHA, a repo name, and a bug. 16The module bisects the high and low commit SHA searching for the location 17where the bug was introduced. It also looks for where the bug was fixed. 18This is done with the following steps: 19 20 21 NOTE: Needs to be run from root of the OSS-Fuzz source checkout. 22 23 Typical usage example: 24 python3 infra/bisector.py 25 --old_commit 1e403e9259a1abedf108ab86f711ba52c907226d 26 --new_commit f79be4f2330f4b89ea2f42e1c44ca998c59a0c0f 27 --fuzz_target rules_fuzzer 28 --project_name yara 29 --testcase infra/yara_testcase 30 --sanitizer address 31""" 32 33import argparse 34import collections 35import logging 36import os 37import sys 38import tempfile 39 40import build_specified_commit 41import helper 42import repo_manager 43import utils 44 45Result = collections.namedtuple('Result', ['repo_url', 'commit']) 46 47START_MARKERS = [ 48 '==ERROR', 49 '==WARNING', 50] 51 52END_MARKERS = [ 53 'SUMMARY:', 54] 55 56DEDUP_TOKEN_MARKER = 'DEDUP_TOKEN:' 57 58 59class BisectError(Exception): 60 """Bisection error.""" 61 62 def __init__(self, message, repo_url): 63 super().__init__(message) 64 self.repo_url = repo_url 65 66 67def main(): 68 """Finds the commit SHA where an error was initally introduced.""" 69 logging.getLogger().setLevel(logging.INFO) 70 utils.chdir_to_root() 71 parser = argparse.ArgumentParser( 72 description='git bisection for finding introduction of bugs') 73 74 parser.add_argument('--project_name', 75 help='The name of the project where the bug occurred.', 76 required=True) 77 parser.add_argument('--new_commit', 78 help='The newest commit SHA to be bisected.', 79 required=True) 80 parser.add_argument('--old_commit', 81 help='The oldest commit SHA to be bisected.', 82 required=True) 83 parser.add_argument('--fuzz_target', 84 help='The name of the fuzzer to be built.', 85 required=True) 86 parser.add_argument('--test_case_path', 87 help='The path to test case.', 88 required=True) 89 parser.add_argument('--engine', 90 help='The default is "libfuzzer".', 91 default='libfuzzer') 92 parser.add_argument('--sanitizer', 93 default='address', 94 help='The default is "address".') 95 parser.add_argument('--type', 96 choices=['regressed', 'fixed'], 97 help='The bisection type.', 98 required=True) 99 parser.add_argument('--architecture', default='x86_64') 100 args = parser.parse_args() 101 102 build_data = build_specified_commit.BuildData(project_name=args.project_name, 103 engine=args.engine, 104 sanitizer=args.sanitizer, 105 architecture=args.architecture) 106 107 result = bisect(args.type, args.old_commit, args.new_commit, 108 args.test_case_path, args.fuzz_target, build_data) 109 if not result.commit: 110 logging.error('No error was found in commit range %s:%s', args.old_commit, 111 args.new_commit) 112 return 1 113 if result.commit == args.old_commit: 114 logging.error( 115 'Bisection Error: Both the first and the last commits in' 116 'the given range have the same behavior, bisection is not possible. ') 117 return 1 118 if args.type == 'regressed': 119 print('Error was introduced at commit %s' % result.commit) 120 elif args.type == 'fixed': 121 print('Error was fixed at commit %s' % result.commit) 122 return 0 123 124 125def _get_dedup_token(output): 126 """Get dedup token.""" 127 for line in output.splitlines(): 128 token_location = line.find(DEDUP_TOKEN_MARKER) 129 if token_location == -1: 130 continue 131 132 return line[token_location + len(DEDUP_TOKEN_MARKER):].strip() 133 134 return None 135 136 137def _check_for_crash(project_name, fuzz_target, testcase_path): 138 """Check for crash.""" 139 140 def docker_run(args): 141 command = ['docker', 'run', '--rm', '--privileged'] 142 if sys.stdin.isatty(): 143 command.append('-i') 144 145 return utils.execute(command + args) 146 147 logging.info('Checking for crash') 148 out, err, return_code = helper.reproduce_impl( 149 project=helper.Project(project_name), 150 fuzzer_name=fuzz_target, 151 valgrind=False, 152 env_to_add=[], 153 fuzzer_args=[], 154 testcase_path=testcase_path, 155 run_function=docker_run, 156 err_result=(None, None, None)) 157 if return_code is None: 158 return None 159 160 logging.info('stdout =\n%s', out) 161 logging.info('stderr =\n%s', err) 162 163 # pylint: disable=unsupported-membership-test 164 has_start_marker = any( 165 marker in out or marker in err for marker in START_MARKERS) 166 has_end_marker = any(marker in out or marker in err for marker in END_MARKERS) 167 if not has_start_marker or not has_end_marker: 168 return None 169 170 return _get_dedup_token(out + err) 171 172 173# pylint: disable=too-many-locals 174# pylint: disable=too-many-arguments 175# pylint: disable=too-many-statements 176def _bisect(bisect_type, old_commit, new_commit, testcase_path, fuzz_target, 177 build_data): 178 """Perform the bisect.""" 179 # pylint: disable=too-many-branches 180 base_builder_repo = build_specified_commit.load_base_builder_repo() 181 182 with tempfile.TemporaryDirectory() as tmp_dir: 183 repo_url, repo_path = build_specified_commit.detect_main_repo( 184 build_data.project_name, commit=new_commit) 185 if not repo_url or not repo_path: 186 raise ValueError('Main git repo can not be determined.') 187 188 if old_commit == new_commit: 189 raise BisectError('old_commit is the same as new_commit', repo_url) 190 191 # Copy /src from the built Docker container to ensure all dependencies 192 # exist. This will be mounted when running them. 193 host_src_dir = build_specified_commit.copy_src_from_docker( 194 build_data.project_name, tmp_dir) 195 196 bisect_repo_manager = repo_manager.RepoManager( 197 os.path.join(host_src_dir, os.path.basename(repo_path))) 198 bisect_repo_manager.fetch_all_remotes() 199 200 commit_list = bisect_repo_manager.get_commit_list(new_commit, old_commit) 201 202 old_idx = len(commit_list) - 1 203 new_idx = 0 204 logging.info('Testing against new_commit (%s)', commit_list[new_idx]) 205 if not build_specified_commit.build_fuzzers_from_commit( 206 commit_list[new_idx], 207 bisect_repo_manager, 208 host_src_dir, 209 build_data, 210 base_builder_repo=base_builder_repo): 211 raise BisectError('Failed to build new_commit', repo_url) 212 213 if bisect_type == 'fixed': 214 should_crash = False 215 elif bisect_type == 'regressed': 216 should_crash = True 217 else: 218 raise BisectError('Invalid bisect type ' + bisect_type, repo_url) 219 220 expected_error = _check_for_crash(build_data.project_name, fuzz_target, 221 testcase_path) 222 logging.info('new_commit result = %s', expected_error) 223 224 if not should_crash and expected_error: 225 logging.warning('new_commit crashed but not shouldn\'t. ' 226 'Continuing to see if stack changes.') 227 228 range_valid = False 229 for _ in range(2): 230 logging.info('Testing against old_commit (%s)', commit_list[old_idx]) 231 if not build_specified_commit.build_fuzzers_from_commit( 232 commit_list[old_idx], 233 bisect_repo_manager, 234 host_src_dir, 235 build_data, 236 base_builder_repo=base_builder_repo): 237 raise BisectError('Failed to build old_commit', repo_url) 238 239 if _check_for_crash(build_data.project_name, fuzz_target, 240 testcase_path) == expected_error: 241 logging.warning('old_commit %s had same result as new_commit %s', 242 old_commit, new_commit) 243 # Try again on an slightly older commit. 244 old_commit = bisect_repo_manager.get_parent(old_commit, 64) 245 if not old_commit: 246 break 247 248 commit_list = bisect_repo_manager.get_commit_list( 249 new_commit, old_commit) 250 old_idx = len(commit_list) - 1 251 continue 252 253 range_valid = True 254 break 255 256 if not range_valid: 257 raise BisectError('old_commit had same result as new_commit', repo_url) 258 259 while old_idx - new_idx > 1: 260 curr_idx = (old_idx + new_idx) // 2 261 logging.info('Testing against %s (idx=%d)', commit_list[curr_idx], 262 curr_idx) 263 if not build_specified_commit.build_fuzzers_from_commit( 264 commit_list[curr_idx], 265 bisect_repo_manager, 266 host_src_dir, 267 build_data, 268 base_builder_repo=base_builder_repo): 269 # Treat build failures as if we couldn't repo. 270 # TODO(ochang): retry nearby commits? 271 old_idx = curr_idx 272 continue 273 274 current_error = _check_for_crash(build_data.project_name, fuzz_target, 275 testcase_path) 276 logging.info('Current result = %s', current_error) 277 if expected_error == current_error: 278 new_idx = curr_idx 279 else: 280 old_idx = curr_idx 281 return Result(repo_url, commit_list[new_idx]) 282 283 284# pylint: disable=too-many-locals 285# pylint: disable=too-many-arguments 286def bisect(bisect_type, old_commit, new_commit, testcase_path, fuzz_target, 287 build_data): 288 """From a commit range, this function caluclates which introduced a 289 specific error from a fuzz testcase_path. 290 291 Args: 292 bisect_type: The type of the bisect ('regressed' or 'fixed'). 293 old_commit: The oldest commit in the error regression range. 294 new_commit: The newest commit in the error regression range. 295 testcase_path: The file path of the test case that triggers the error 296 fuzz_target: The name of the fuzzer to be tested. 297 build_data: a class holding all of the input parameters for bisection. 298 299 Returns: 300 The commit SHA that introduced the error or None. 301 302 Raises: 303 ValueError: when a repo url can't be determine from the project. 304 """ 305 try: 306 return _bisect(bisect_type, old_commit, new_commit, testcase_path, 307 fuzz_target, build_data) 308 finally: 309 # Clean up projects/ as _bisect may have modified it. 310 oss_fuzz_repo_manager = repo_manager.RepoManager(helper.OSS_FUZZ_DIR) 311 oss_fuzz_repo_manager.git(['reset', 'projects']) 312 oss_fuzz_repo_manager.git(['checkout', 'projects']) 313 oss_fuzz_repo_manager.git(['clean', '-fxd', 'projects']) 314 315 316if __name__ == '__main__': 317 main() 318