1# Copyright 2020 Google Inc. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15################################################################################ 16"""Utility module for Google Cloud Build scripts.""" 17import base64 18import collections 19import os 20import six.moves.urllib.parse as urlparse 21import sys 22import time 23 24import requests 25 26import google.auth 27import googleapiclient.discovery 28from oauth2client.service_account import ServiceAccountCredentials 29 30BUILD_TIMEOUT = 12 * 60 * 60 31 32# Needed for reading public target.list.* files. 33GCS_URL_BASENAME = 'https://storage.googleapis.com/' 34 35GCS_UPLOAD_URL_FORMAT = '/{0}/{1}/{2}' 36 37# Where corpus backups can be downloaded from. 38CORPUS_BACKUP_URL = ('/{project}-backup.clusterfuzz-external.appspot.com/' 39 'corpus/libFuzzer/{fuzzer}/latest.zip') 40 41# Cloud Builder has a limit of 100 build steps and 100 arguments for each step. 42CORPUS_DOWNLOAD_BATCH_SIZE = 100 43 44TARGETS_LIST_BASENAME = 'targets.list' 45 46EngineInfo = collections.namedtuple( 47 'EngineInfo', 48 ['upload_bucket', 'supported_sanitizers', 'supported_architectures']) 49 50ENGINE_INFO = { 51 'libfuzzer': 52 EngineInfo(upload_bucket='clusterfuzz-builds', 53 supported_sanitizers=['address', 'memory', 'undefined'], 54 supported_architectures=['x86_64', 'i386']), 55 'afl': 56 EngineInfo(upload_bucket='clusterfuzz-builds-afl', 57 supported_sanitizers=['address'], 58 supported_architectures=['x86_64']), 59 'honggfuzz': 60 EngineInfo(upload_bucket='clusterfuzz-builds-honggfuzz', 61 supported_sanitizers=['address'], 62 supported_architectures=['x86_64']), 63 'dataflow': 64 EngineInfo(upload_bucket='clusterfuzz-builds-dataflow', 65 supported_sanitizers=['dataflow'], 66 supported_architectures=['x86_64']), 67 'none': 68 EngineInfo(upload_bucket='clusterfuzz-builds-no-engine', 69 supported_sanitizers=['address'], 70 supported_architectures=['x86_64']), 71} 72 73 74def get_targets_list_filename(sanitizer): 75 """Returns target list filename.""" 76 return TARGETS_LIST_BASENAME + '.' + sanitizer 77 78 79def get_targets_list_url(bucket, project, sanitizer): 80 """Returns target list url.""" 81 filename = get_targets_list_filename(sanitizer) 82 url = GCS_UPLOAD_URL_FORMAT.format(bucket, project, filename) 83 return url 84 85 86def get_upload_bucket(engine, architecture, testing): 87 """Returns the upload bucket for |engine| and architecture. Returns the 88 testing bucket if |testing|.""" 89 bucket = ENGINE_INFO[engine].upload_bucket 90 if architecture != 'x86_64': 91 bucket += '-' + architecture 92 if testing: 93 bucket += '-testing' 94 return bucket 95 96 97def _get_targets_list(project_name, testing): 98 """Returns target list.""" 99 # libFuzzer ASan 'x86_84' is the default configuration, get list of targets 100 # from it. 101 bucket = get_upload_bucket('libfuzzer', 'x86_64', testing) 102 url = get_targets_list_url(bucket, project_name, 'address') 103 104 url = urlparse.urljoin(GCS_URL_BASENAME, url) 105 response = requests.get(url) 106 if not response.status_code == 200: 107 sys.stderr.write('Failed to get list of targets from "%s".\n' % url) 108 sys.stderr.write('Status code: %d \t\tText:\n%s\n' % 109 (response.status_code, response.text)) 110 return None 111 112 return response.text.split() 113 114 115# pylint: disable=no-member 116def get_signed_url(path, method='PUT', content_type=''): 117 """Returns signed url.""" 118 timestamp = int(time.time() + BUILD_TIMEOUT) 119 blob = f'{method}\n\n{content_type}\n{timestamp}\n{path}' 120 121 service_account_path = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS') 122 if service_account_path: 123 creds = ServiceAccountCredentials.from_json_keyfile_name( 124 os.environ['GOOGLE_APPLICATION_CREDENTIALS']) 125 client_id = creds.service_account_email 126 signature = base64.b64encode(creds.sign_blob(blob)[1]) 127 else: 128 credentials, project = google.auth.default() 129 iam = googleapiclient.discovery.build('iamcredentials', 130 'v1', 131 credentials=credentials, 132 cache_discovery=False) 133 client_id = project + '@appspot.gserviceaccount.com' 134 service_account = f'projects/-/serviceAccounts/{client_id}' 135 response = iam.projects().serviceAccounts().signBlob( 136 name=service_account, 137 body={ 138 'delegates': [], 139 'payload': base64.b64encode(blob.encode('utf-8')).decode('utf-8'), 140 }).execute() 141 signature = response['signedBlob'] 142 143 values = { 144 'GoogleAccessId': client_id, 145 'Expires': timestamp, 146 'Signature': signature, 147 } 148 return f'https://storage.googleapis.com{path}?{urlparse.urlencode(values)}' 149 150 151def download_corpora_steps(project_name, testing): 152 """Returns GCB steps for downloading corpora backups for the given project. 153 """ 154 fuzz_targets = _get_targets_list(project_name, testing) 155 if not fuzz_targets: 156 sys.stderr.write('No fuzz targets found for project "%s".\n' % project_name) 157 return None 158 159 steps = [] 160 # Split fuzz targets into batches of CORPUS_DOWNLOAD_BATCH_SIZE. 161 for i in range(0, len(fuzz_targets), CORPUS_DOWNLOAD_BATCH_SIZE): 162 download_corpus_args = [] 163 for binary_name in fuzz_targets[i:i + CORPUS_DOWNLOAD_BATCH_SIZE]: 164 qualified_name = binary_name 165 qualified_name_prefix = '%s_' % project_name 166 if not binary_name.startswith(qualified_name_prefix): 167 qualified_name = qualified_name_prefix + binary_name 168 169 url = get_signed_url(CORPUS_BACKUP_URL.format(project=project_name, 170 fuzzer=qualified_name), 171 method='GET') 172 173 corpus_archive_path = os.path.join('/corpus', binary_name + '.zip') 174 download_corpus_args.append('%s %s' % (corpus_archive_path, url)) 175 176 steps.append({ 177 'name': 'gcr.io/oss-fuzz-base/base-runner', 178 'entrypoint': 'download_corpus', 179 'args': download_corpus_args, 180 'volumes': [{ 181 'name': 'corpus', 182 'path': '/corpus' 183 }], 184 }) 185 186 return steps 187 188 189def http_upload_step(data, signed_url, content_type): 190 """Returns a GCB step to upload data to the given URL via GCS HTTP API.""" 191 step = { 192 'name': 193 'gcr.io/cloud-builders/curl', 194 'args': [ 195 '-H', 196 'Content-Type: ' + content_type, 197 '-X', 198 'PUT', 199 '-d', 200 data, 201 signed_url, 202 ], 203 } 204 return step 205 206 207def gsutil_rm_rf_step(url): 208 """Returns a GCB step to recursively delete the object with given GCS url.""" 209 step = { 210 'name': 'gcr.io/cloud-builders/gsutil', 211 'entrypoint': 'sh', 212 'args': [ 213 '-c', 214 'gsutil -m rm -rf %s || exit 0' % url, 215 ], 216 } 217 return step 218 219 220def get_pull_test_images_steps(test_image_suffix): 221 """Returns steps to pull testing versions of base-images and tag them so that 222 they are used in builds.""" 223 images = [ 224 'gcr.io/oss-fuzz-base/base-builder', 225 'gcr.io/oss-fuzz-base/base-builder-swift', 226 'gcr.io/oss-fuzz-base/base-builder-jvm', 227 'gcr.io/oss-fuzz-base/base-builder-go', 228 'gcr.io/oss-fuzz-base/base-builder-python', 229 'gcr.io/oss-fuzz-base/base-builder-rust', 230 ] 231 steps = [] 232 for image in images: 233 test_image = image + '-' + test_image_suffix 234 steps.append({ 235 'name': 'gcr.io/cloud-builders/docker', 236 'args': [ 237 'pull', 238 test_image, 239 ], 240 'waitFor': '-' # Start this immediately, don't wait for previous step. 241 }) 242 243 # This step is hacky but gives us great flexibility. OSS-Fuzz has hardcoded 244 # references to gcr.io/oss-fuzz-base/base-builder (in dockerfiles, for 245 # example) and gcr.io/oss-fuzz-base-runner (in this build code). But the 246 # testing versions of those images are called e.g. 247 # gcr.io/oss-fuzz-base/base-builder-testing and 248 # gcr.io/oss-fuzz-base/base-runner-testing. How can we get the build to use 249 # the testing images instead of the real ones? By doing this step: tagging 250 # the test image with the non-test version, so that the test version is used 251 # instead of pulling the real one. 252 steps.append({ 253 'name': 'gcr.io/cloud-builders/docker', 254 'args': ['tag', test_image, image], 255 }) 256 return steps 257 258 259def get_srcmap_step_id(): 260 """Returns the id for the srcmap step.""" 261 return 'srcmap' 262 263 264def project_image_steps(name, 265 image, 266 language, 267 branch=None, 268 test_image_suffix=None): 269 """Returns GCB steps to build OSS-Fuzz project image.""" 270 clone_step = { 271 'args': [ 272 'clone', 'https://github.com/google/oss-fuzz.git', '--depth', '1' 273 ], 274 'name': 'gcr.io/cloud-builders/git', 275 } 276 if branch: 277 # Do this to support testing other branches. 278 clone_step['args'].extend(['--branch', branch]) 279 280 steps = [clone_step] 281 if test_image_suffix: 282 steps.extend(get_pull_test_images_steps(test_image_suffix)) 283 284 srcmap_step_id = get_srcmap_step_id() 285 steps += [{ 286 'name': 'gcr.io/cloud-builders/docker', 287 'args': [ 288 'build', 289 '-t', 290 image, 291 '.', 292 ], 293 'dir': 'oss-fuzz/projects/' + name, 294 }, { 295 'name': image, 296 'args': [ 297 'bash', '-c', 298 'srcmap > /workspace/srcmap.json && cat /workspace/srcmap.json' 299 ], 300 'env': [ 301 'OSSFUZZ_REVISION=$REVISION_ID', 302 'FUZZING_LANGUAGE=%s' % language, 303 ], 304 'id': srcmap_step_id 305 }] 306 307 return steps 308