1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# Copyright 2020 The ChromiumOS Authors 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""Fetches and submits the artifacts from ChromeOS toolchain's crash bucket. 8""" 9 10import argparse 11import glob 12import json 13import logging 14import os 15import os.path 16import shutil 17import subprocess 18import sys 19 20import chroot 21 22 23def get_artifacts(pattern): 24 results = subprocess.check_output( 25 ["gsutil.py", "ls", pattern], stderr=subprocess.STDOUT, encoding="utf-8" 26 ) 27 return sorted(l.strip() for l in results.splitlines()) 28 29 30def get_crash_reproducers(working_dir): 31 results = [] 32 for src in [ 33 f 34 for f in glob.glob("%s/*.c*" % working_dir) 35 if f.split(".")[-1] in ["c", "cc", "cpp"] 36 ]: 37 script = ".".join(src.split(".")[:-1]) + ".sh" 38 if not os.path.exists(script): 39 logging.warning("could not find the matching script of %s", src) 40 else: 41 results.append((src, script)) 42 return results 43 44 45def submit_crash_to_forcey( 46 forcey: str, temporary_directory: str, buildbucket_id: str, url: str 47) -> None: 48 dest_dir = os.path.join(temporary_directory, buildbucket_id) 49 dest_file = os.path.join(dest_dir, os.path.basename(url)) 50 logging.info("Downloading and submitting %r...", url) 51 subprocess.check_output( 52 ["gsutil.py", "cp", url, dest_file], stderr=subprocess.STDOUT 53 ) 54 subprocess.check_output(["tar", "-xJf", dest_file], cwd=dest_dir) 55 for src, script in get_crash_reproducers(dest_dir): 56 subprocess.check_output( 57 [ 58 forcey, 59 "reduce", 60 "-wait=false", 61 "-note", 62 "%s:%s" % (url, src), 63 "-sh_file", 64 script, 65 "-src_file", 66 src, 67 ] 68 ) 69 70 71def main(argv): 72 chroot.VerifyOutsideChroot() 73 logging.basicConfig( 74 format="%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s", 75 level=logging.INFO, 76 ) 77 cur_dir = os.path.dirname(os.path.abspath(__file__)) 78 parser = argparse.ArgumentParser(description=__doc__) 79 parser.add_argument( 80 "--4c", dest="forcey", required=True, help="Path to a 4c client binary" 81 ) 82 parser.add_argument( 83 "--state_file", 84 default=os.path.join(cur_dir, "chromeos-state.json"), 85 help="The path to the state file.", 86 ) 87 parser.add_argument( 88 "--nocleanup", 89 action="store_false", 90 dest="cleanup", 91 help="Keep temporary files created after the script finishes.", 92 ) 93 opts = parser.parse_args(argv) 94 95 state_file = os.path.abspath(opts.state_file) 96 os.makedirs(os.path.dirname(state_file), exist_ok=True) 97 temporary_directory = "/tmp/bisect_clang_crashes" 98 os.makedirs(temporary_directory, exist_ok=True) 99 urls = get_artifacts( 100 "gs://chromeos-toolchain-artifacts/clang-crash-diagnoses" 101 "/**/*clang_crash_diagnoses.tar.xz" 102 ) 103 logging.info("%d crash URLs found", len(urls)) 104 105 visited = {} 106 if os.path.exists(state_file): 107 buildbucket_ids = {url.split("/")[-2] for url in urls} 108 with open(state_file, encoding="utf-8") as f: 109 data = json.load(f) 110 visited = {k: v for k, v in data.items() if k in buildbucket_ids} 111 logging.info( 112 "Successfully loaded %d previously-submitted crashes", len(visited) 113 ) 114 115 try: 116 for url in urls: 117 splits = url.split("/") 118 buildbucket_id = splits[-2] 119 # Skip the builds that has been processed 120 if buildbucket_id in visited: 121 continue 122 submit_crash_to_forcey( 123 forcey=opts.forcey, 124 temporary_directory=temporary_directory, 125 buildbucket_id=buildbucket_id, 126 url=url, 127 ) 128 visited[buildbucket_id] = url 129 130 exception_in_flight = False 131 except: 132 exception_in_flight = True 133 raise 134 finally: 135 if exception_in_flight: 136 # This is best-effort. If the machine powers off or similar, we'll just 137 # resubmit the same crashes, which is suboptimal, but otherwise 138 # acceptable. 139 logging.error( 140 "Something went wrong; attempting to save our work..." 141 ) 142 else: 143 logging.info("Persisting state...") 144 145 tmp_state_file = state_file + ".tmp" 146 with open(tmp_state_file, "w", encoding="utf-8") as f: 147 json.dump(visited, f, indent=2) 148 os.rename(tmp_state_file, state_file) 149 150 logging.info("State successfully persisted") 151 152 if opts.cleanup: 153 shutil.rmtree(temporary_directory) 154 155 156if __name__ == "__main__": 157 sys.exit(main(sys.argv[1:])) 158