1# Copyright 2019 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Module to build a image from a specific commit, branch or pull request. 15 16This module is allows each of the OSS Fuzz projects fuzzers to be built 17from a specific point in time. This feature can be used for implementations 18like continuious integration fuzzing and bisection to find errors 19""" 20import argparse 21import bisect 22import datetime 23from distutils import spawn 24import os 25import collections 26import json 27import logging 28import re 29import shutil 30import tempfile 31 32import helper 33import repo_manager 34import retry 35import utils 36 37BuildData = collections.namedtuple( 38 'BuildData', ['project_name', 'engine', 'sanitizer', 'architecture']) 39 40_GIT_DIR_MARKER = 'gitdir: ' 41_IMAGE_BUILD_TRIES = 3 42 43 44class BaseBuilderRepo: 45 """Repo of base-builder images.""" 46 47 def __init__(self): 48 self.timestamps = [] 49 self.digests = [] 50 51 def add_digest(self, timestamp, digest): 52 """Add a digest.""" 53 self.timestamps.append(timestamp) 54 self.digests.append(digest) 55 56 def find_digest(self, timestamp): 57 """Find the latest image before the given timestamp.""" 58 index = bisect.bisect_right(self.timestamps, timestamp) 59 if index > 0: 60 return self.digests[index - 1] 61 62 logging.error('Failed to find suitable base-builder.') 63 return None 64 65 66def _replace_gitdir(src_dir, file_path): 67 """Replace gitdir with a relative path.""" 68 with open(file_path) as handle: 69 lines = handle.readlines() 70 71 new_lines = [] 72 for line in lines: 73 if line.startswith(_GIT_DIR_MARKER): 74 absolute_path = line[len(_GIT_DIR_MARKER):].strip() 75 if not os.path.isabs(absolute_path): 76 # Already relative. 77 return 78 79 current_dir = os.path.dirname(file_path) 80 # Rebase to /src rather than the host src dir. 81 base_dir = current_dir.replace(src_dir, '/src') 82 relative_path = os.path.relpath(absolute_path, base_dir) 83 logging.info('Replacing absolute submodule gitdir from %s to %s', 84 absolute_path, relative_path) 85 86 line = _GIT_DIR_MARKER + relative_path 87 88 new_lines.append(line) 89 90 with open(file_path, 'w') as handle: 91 handle.write(''.join(new_lines)) 92 93 94def _make_gitdirs_relative(src_dir): 95 """Make gitdirs relative.""" 96 for root_dir, _, files in os.walk(src_dir): 97 for filename in files: 98 if filename != '.git': 99 continue 100 101 file_path = os.path.join(root_dir, filename) 102 _replace_gitdir(src_dir, file_path) 103 104 105def _replace_base_builder_digest(dockerfile_path, digest): 106 """Replace the base-builder digest in a Dockerfile.""" 107 with open(dockerfile_path) as handle: 108 lines = handle.readlines() 109 110 new_lines = [] 111 for line in lines: 112 if line.strip().startswith('FROM'): 113 line = 'FROM gcr.io/oss-fuzz-base/base-builder@' + digest + '\n' 114 115 new_lines.append(line) 116 117 with open(dockerfile_path, 'w') as handle: 118 handle.write(''.join(new_lines)) 119 120 121def copy_src_from_docker(project_name, host_dir): 122 """Copy /src from docker to the host.""" 123 # Copy /src to host. 124 image_name = 'gcr.io/oss-fuzz/' + project_name 125 src_dir = os.path.join(host_dir, 'src') 126 if os.path.exists(src_dir): 127 shutil.rmtree(src_dir, ignore_errors=True) 128 129 docker_args = [ 130 '-v', 131 host_dir + ':/out', 132 image_name, 133 'cp', 134 '-r', 135 '-p', 136 '/src', 137 '/out', 138 ] 139 helper.docker_run(docker_args) 140 141 # Submodules can have gitdir entries which point to absolute paths. Make them 142 # relative, as otherwise we can't do operations on the checkout on the host. 143 _make_gitdirs_relative(src_dir) 144 return src_dir 145 146 147@retry.wrap(_IMAGE_BUILD_TRIES, 2) 148def _build_image_with_retries(project_name): 149 """Build image with retries.""" 150 return helper.build_image_impl(helper.Project(project_name)) 151 152 153def get_required_post_checkout_steps(dockerfile_path): 154 """Get required post checkout steps (best effort).""" 155 156 checkout_pattern = re.compile(r'\s*RUN\s*(git|svn|hg)') 157 158 # If the build.sh is copied from upstream, we need to copy it again after 159 # changing the revision to ensure correct building. 160 post_run_pattern = re.compile(r'\s*RUN\s*(.*build\.sh.*(\$SRC|/src).*)') 161 162 with open(dockerfile_path) as handle: 163 lines = handle.readlines() 164 165 subsequent_run_cmds = [] 166 for i, line in enumerate(lines): 167 if checkout_pattern.match(line): 168 subsequent_run_cmds = [] 169 continue 170 171 match = post_run_pattern.match(line) 172 if match: 173 workdir = helper.workdir_from_lines(lines[:i]) 174 command = match.group(1) 175 subsequent_run_cmds.append((workdir, command)) 176 177 return subsequent_run_cmds 178 179 180# pylint: disable=too-many-locals 181def build_fuzzers_from_commit(commit, 182 build_repo_manager, 183 host_src_path, 184 build_data, 185 base_builder_repo=None): 186 """Builds a OSS-Fuzz fuzzer at a specific commit SHA. 187 188 Args: 189 commit: The commit SHA to build the fuzzers at. 190 build_repo_manager: The OSS-Fuzz project's repo manager to be built at. 191 build_data: A struct containing project build information. 192 base_builder_repo: A BaseBuilderRepo. 193 Returns: 194 0 on successful build or error code on failure. 195 """ 196 oss_fuzz_repo_manager = repo_manager.RepoManager(helper.OSS_FUZZ_DIR) 197 num_retry = 1 198 199 def cleanup(): 200 # Re-copy /src for a clean checkout every time. 201 copy_src_from_docker(build_data.project_name, 202 os.path.dirname(host_src_path)) 203 build_repo_manager.fetch_all_remotes() 204 205 projects_dir = os.path.join('projects', build_data.project_name) 206 dockerfile_path = os.path.join(projects_dir, 'Dockerfile') 207 208 for i in range(num_retry + 1): 209 build_repo_manager.checkout_commit(commit, clean=False) 210 211 post_checkout_steps = get_required_post_checkout_steps(dockerfile_path) 212 for workdir, post_checkout_step in post_checkout_steps: 213 logging.info('Running post-checkout step `%s` in %s.', post_checkout_step, 214 workdir) 215 helper.docker_run([ 216 '-w', 217 workdir, 218 '-v', 219 host_src_path + ':' + '/src', 220 'gcr.io/oss-fuzz/' + build_data.project_name, 221 '/bin/bash', 222 '-c', 223 post_checkout_step, 224 ]) 225 226 project = helper.Project(build_data.project_name) 227 result = helper.build_fuzzers_impl(project=project, 228 clean=True, 229 engine=build_data.engine, 230 sanitizer=build_data.sanitizer, 231 architecture=build_data.architecture, 232 env_to_add=None, 233 source_path=host_src_path, 234 mount_path='/src') 235 if result or i == num_retry: 236 break 237 238 # Retry with an OSS-Fuzz builder container that's closer to the project 239 # commit date. 240 commit_date = build_repo_manager.commit_date(commit) 241 242 # Find first change in the projects/<PROJECT> directory before the project 243 # commit date. 244 oss_fuzz_commit, _, _ = oss_fuzz_repo_manager.git([ 245 'log', '--before=' + commit_date.isoformat(), '-n1', '--format=%H', 246 projects_dir 247 ], 248 check_result=True) 249 oss_fuzz_commit = oss_fuzz_commit.strip() 250 if not oss_fuzz_commit: 251 logging.info( 252 'Could not find first OSS-Fuzz commit prior to upstream commit. ' 253 'Falling back to oldest integration commit.') 254 255 # Find the oldest commit. 256 oss_fuzz_commit, _, _ = oss_fuzz_repo_manager.git( 257 ['log', '--reverse', '--format=%H', projects_dir], check_result=True) 258 259 oss_fuzz_commit = oss_fuzz_commit.splitlines()[0].strip() 260 261 if not oss_fuzz_commit: 262 logging.error('Failed to get oldest integration commit.') 263 break 264 265 logging.info('Build failed. Retrying on earlier OSS-Fuzz commit %s.', 266 oss_fuzz_commit) 267 268 # Check out projects/<PROJECT> dir to the commit that was found. 269 oss_fuzz_repo_manager.git(['checkout', oss_fuzz_commit, projects_dir], 270 check_result=True) 271 272 # Also use the closest base-builder we can find. 273 if base_builder_repo: 274 base_builder_digest = base_builder_repo.find_digest(commit_date) 275 if not base_builder_digest: 276 return False 277 278 logging.info('Using base-builder with digest %s.', base_builder_digest) 279 _replace_base_builder_digest(dockerfile_path, base_builder_digest) 280 281 # Rebuild image and re-copy src dir since things in /src could have changed. 282 if not _build_image_with_retries(build_data.project_name): 283 logging.error('Failed to rebuild image.') 284 return False 285 286 cleanup() 287 288 cleanup() 289 return result 290 291 292def detect_main_repo(project_name, repo_name=None, commit=None): 293 """Checks a docker image for the main repo of an OSS-Fuzz project. 294 295 Note: The default is to use the repo name to detect the main repo. 296 297 Args: 298 project_name: The name of the oss-fuzz project. 299 repo_name: The name of the main repo in an OSS-Fuzz project. 300 commit: A commit SHA that is associated with the main repo. 301 302 Returns: 303 A tuple containing (the repo's origin, the repo's path). 304 """ 305 306 if not repo_name and not commit: 307 logging.error( 308 'Error: can not detect main repo without a repo_name or a commit.') 309 return None, None 310 if repo_name and commit: 311 logging.info( 312 'Both repo name and commit specific. Using repo name for detection.') 313 314 # Change to oss-fuzz main directory so helper.py runs correctly. 315 utils.chdir_to_root() 316 if not _build_image_with_retries(project_name): 317 logging.error('Error: building %s image failed.', project_name) 318 return None, None 319 docker_image_name = 'gcr.io/oss-fuzz/' + project_name 320 command_to_run = [ 321 'docker', 'run', '--rm', '-t', docker_image_name, 'python3', 322 os.path.join('/opt', 'cifuzz', 'detect_repo.py') 323 ] 324 if repo_name: 325 command_to_run.extend(['--repo_name', repo_name]) 326 else: 327 command_to_run.extend(['--example_commit', commit]) 328 out, _, _ = utils.execute(command_to_run) 329 match = re.search(r'\bDetected repo: ([^ ]+) ([^ ]+)', out.rstrip()) 330 if match and match.group(1) and match.group(2): 331 return match.group(1), match.group(2) 332 333 logging.error('Failed to detect repo:\n%s', out) 334 return None, None 335 336 337def load_base_builder_repo(): 338 """Get base-image digests.""" 339 gcloud_path = spawn.find_executable('gcloud') 340 if not gcloud_path: 341 logging.warning('gcloud not found in PATH.') 342 return None 343 344 result, _, _ = utils.execute([ 345 gcloud_path, 346 'container', 347 'images', 348 'list-tags', 349 'gcr.io/oss-fuzz-base/base-builder', 350 '--format=json', 351 '--sort-by=timestamp', 352 ], 353 check_result=True) 354 result = json.loads(result) 355 356 repo = BaseBuilderRepo() 357 for image in result: 358 timestamp = datetime.datetime.fromisoformat( 359 image['timestamp']['datetime']).astimezone(datetime.timezone.utc) 360 repo.add_digest(timestamp, image['digest']) 361 362 return repo 363 364 365def main(): 366 """Main function.""" 367 logging.getLogger().setLevel(logging.INFO) 368 369 parser = argparse.ArgumentParser( 370 description='Build fuzzers at a specific commit') 371 parser.add_argument('--project_name', 372 help='The name of the project where the bug occurred.', 373 required=True) 374 parser.add_argument('--commit', 375 help='The newest commit SHA to be bisected.', 376 required=True) 377 parser.add_argument('--engine', 378 help='The default is "libfuzzer".', 379 default='libfuzzer') 380 parser.add_argument('--sanitizer', 381 default='address', 382 help='The default is "address".') 383 parser.add_argument('--architecture', default='x86_64') 384 385 args = parser.parse_args() 386 387 repo_url, repo_path = detect_main_repo(args.project_name, commit=args.commit) 388 389 if not repo_url or not repo_path: 390 raise ValueError('Main git repo can not be determined.') 391 392 with tempfile.TemporaryDirectory() as tmp_dir: 393 host_src_dir = copy_src_from_docker(args.project_name, tmp_dir) 394 build_repo_manager = repo_manager.RepoManager( 395 os.path.join(host_src_dir, os.path.basename(repo_path))) 396 base_builder_repo = load_base_builder_repo() 397 398 build_data = BuildData(project_name=args.project_name, 399 engine=args.engine, 400 sanitizer=args.sanitizer, 401 architecture=args.architecture) 402 if not build_fuzzers_from_commit(args.commit, 403 build_repo_manager, 404 host_src_dir, 405 build_data, 406 base_builder_repo=base_builder_repo): 407 raise RuntimeError('Failed to build.') 408 409 410if __name__ == '__main__': 411 main() 412