1#!/usr/bin/env python3 2# Copyright 2019 Google Inc. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16################################################################################ 17"""Use git bisect to find the Clang/LLVM commit causing a regression.""" 18 19import logging 20import os 21import re 22import shutil 23import subprocess 24import sys 25 26 27def execute(command, *args, expect_zero=True, **kwargs): 28 """Execute |command| and return the returncode, stdout and stderr.""" 29 kwargs['stdout'] = subprocess.PIPE 30 kwargs['stderr'] = subprocess.PIPE 31 logging.debug('Running command: "%s"', str(command)) 32 process = subprocess.Popen(command, *args, **kwargs) 33 stdout, stderr = process.communicate() 34 stdout = stdout.decode('utf-8') 35 stderr = stderr.decode('utf-8') 36 retcode = process.returncode 37 logging.info('Command: "%s" returned: %d.\nStdout: %s.\nStderr: %s', 38 str(command), retcode, stdout, stderr) 39 if expect_zero and retcode != 0: 40 raise subprocess.CalledProcessError(retcode, command) 41 return retcode, stdout, stderr 42 43 44def search_bisect_output(output): 45 """Search |output| for a message indicating the culprit commit has been 46 found.""" 47 # TODO(metzman): Is it necessary to look for "good"? 48 culprit_regex = re.compile('([a-z0-9]{40}) is the first (good|bad) commit') 49 match = re.match(culprit_regex, output) 50 return match.group(1) if match is not None else None 51 52 53class GitRepo: 54 """Class for executing commmands on a git repo.""" 55 56 def __init__(self, repo_dir): 57 self.repo_dir = repo_dir 58 59 def do_command(self, git_subcommand): 60 """Execute a |git_subcommand| (a list of strings).""" 61 command = ['git', '-C', self.repo_dir] + git_subcommand 62 return execute(command) 63 64 def test_commit(self, test_command): 65 """Build LLVM at the currently checkedout commit, then run |test_command|. 66 If returncode is 0 run 'git bisect good' otherwise return 'git bisect bad'. 67 Return None if bisect didn't finish yet. Return the culprit commit if it 68 does.""" 69 build_clang(self.repo_dir) 70 retcode, _, _ = execute(test_command, shell=True, expect_zero=False) 71 if retcode == 0: 72 retcode, stdout, _ = self.do_bisect_command('good') 73 else: 74 retcode, stdout, _ = self.do_bisect_command('bad') 75 return search_bisect_output(stdout) 76 77 def bisect(self, good_commit, bad_commit, test_command): 78 """Do git bisect assuming |good_commit| is good, |bad_commit| is bad and 79 |test_command| is an oracle. Return the culprit commit.""" 80 self.bisect_start(good_commit, bad_commit, test_command) 81 result = self.test_commit(test_command) 82 while result is None: 83 result = self.test_commit(test_command) 84 return result 85 86 def bisect_start(self, good_commit, bad_commit, test_command): 87 """Start doing git bisect.""" 88 self.do_bisect_command('start') 89 # Do bad commit first since it is more likely to be recent. 90 self.test_start_commit(bad_commit, 'bad', test_command) 91 self.test_start_commit(good_commit, 'good', test_command) 92 93 def do_bisect_command(self, subcommand): 94 """Execute a git bisect |subcommand| (string) and return the result.""" 95 return self.do_command(['bisect', subcommand]) 96 97 def test_start_commit(self, commit, label, test_command): 98 """Use |test_command| to test the first good or bad |commit| (depending on 99 |label|).""" 100 assert label in ('good', 'bad'), label 101 self.do_command(['checkout', commit]) 102 build_clang(self.repo_dir) 103 retcode, _, _ = execute(test_command, shell=True, expect_zero=False) 104 if label == 'good' and retcode != 0: 105 raise BisectError('Test command "%s" returns %d on first good commit %s' % 106 (test_command, retcode, commit)) 107 if label == 'bad' and retcode == 0: 108 raise BisectError('Test command "%s" returns %d on first bad commit %s' % 109 (test_command, retcode, commit)) 110 111 self.do_bisect_command(label) 112 113 114class BisectError(Exception): 115 """Error that was encountered during bisection.""" 116 117 118def get_clang_build_env(): 119 """Get an environment for building Clang.""" 120 env = os.environ.copy() 121 for variable in ['CXXFLAGS', 'CFLAGS']: 122 if variable in env: 123 del env[variable] 124 return env 125 126 127def install_clang_build_deps(): 128 """Instal dependencies necessary to build clang.""" 129 execute([ 130 'apt-get', 'install', '-y', 'build-essential', 'make', 'cmake', 131 'ninja-build', 'git', 'subversion', 'g++-multilib' 132 ]) 133 134 135def clone_with_retries(repo, local_path, num_retries=10): 136 """Clone |repo| to |local_path| if it doesn't exist already. Try up to 137 |num_retries| times. Return False if unable to checkout.""" 138 if os.path.isdir(local_path): 139 return 140 for _ in range(num_retries): 141 if os.path.isdir(local_path): 142 shutil.rmtree(local_path) 143 retcode, _, _ = execute(['git', 'clone', repo, local_path], 144 expect_zero=False) 145 if retcode == 0: 146 return 147 raise Exception('Could not checkout %s.' % repo) 148 149 150def get_clang_target_arch(): 151 """Get target architecture we want clang to target when we build it.""" 152 _, arch, _ = execute(['uname', '-m']) 153 if 'x86_64' in arch: 154 return 'X86' 155 if 'aarch64' in arch: 156 return 'AArch64' 157 raise Exception('Unsupported target: %s.' % arch) 158 159 160def prepare_build(llvm_project_path): 161 """Prepare to build clang.""" 162 llvm_build_dir = os.path.join(os.getenv('WORK'), 'llvm-build') 163 if not os.path.exists(llvm_build_dir): 164 os.mkdir(llvm_build_dir) 165 execute([ 166 'cmake', '-G', 'Ninja', '-DLIBCXX_ENABLE_SHARED=OFF', 167 '-DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON', '-DLIBCXXABI_ENABLE_SHARED=OFF', 168 '-DCMAKE_BUILD_TYPE=Release', 169 '-DLLVM_ENABLE_PROJECTS=libcxx;libcxxabi;compiler-rt;clang', 170 '-DLLVM_TARGETS_TO_BUILD=' + get_clang_target_arch(), 171 os.path.join(llvm_project_path, 'llvm') 172 ], 173 env=get_clang_build_env(), 174 cwd=llvm_build_dir) 175 return llvm_build_dir 176 177 178def build_clang(llvm_project_path): 179 """Checkout, build and install Clang.""" 180 # TODO(metzman): Merge Python checkout and build code with 181 # checkout_build_install_llvm.sh. 182 # TODO(metzman): Look into speeding this process using ccache. 183 # TODO(metzman): Make this program capable of handling MSAN and i386 Clang 184 # regressions. 185 llvm_build_dir = prepare_build(llvm_project_path) 186 execute(['ninja', '-C', llvm_build_dir, 'install'], env=get_clang_build_env()) 187 188 189def find_culprit_commit(test_command, good_commit, bad_commit): 190 """Returns the culprit LLVM commit that introduced a bug revealed by running 191 |test_command|. Uses git bisect and treats |good_commit| as the first latest 192 known good commit and |bad_commit| as the first known bad commit.""" 193 llvm_project_path = os.path.join(os.getenv('SRC'), 'llvm-project') 194 clone_with_retries('https://github.com/llvm/llvm-project.git', 195 llvm_project_path) 196 git_repo = GitRepo(llvm_project_path) 197 result = git_repo.bisect(good_commit, bad_commit, test_command) 198 print('Culprit commit', result) 199 return result 200 201 202def main(): 203 # pylint: disable=line-too-long 204 """Finds the culprit LLVM commit that introduced a clang regression. 205 Can be tested using this command in a libsodium shell: 206 python3 bisect_clang.py "cd /src/libsodium; make clean; cd -; compile && /out/secret_key_auth_fuzzer -runs=100" \ 207 f7e52fbdb5a7af8ea0808e98458b497125a5eca1 \ 208 8288453f6aac05080b751b680455349e09d49825 209 """ 210 # pylint: enable=line-too-long 211 # TODO(metzman): Check CFLAGS for things like -fsanitize=fuzzer-no-link. 212 # TODO(metzman): Allow test_command to be optional and for just build.sh to be 213 # used instead. 214 test_command = sys.argv[1] 215 # TODO(metzman): Add in more automation so that the script can automatically 216 # determine the commits used in last Clang roll. 217 good_commit = sys.argv[2] 218 bad_commit = sys.argv[3] 219 # TODO(metzman): Make verbosity configurable. 220 logging.getLogger().setLevel(logging.DEBUG) 221 install_clang_build_deps() 222 find_culprit_commit(test_command, good_commit, bad_commit) 223 return 0 224 225 226if __name__ == '__main__': 227 sys.exit(main()) 228