1# Copyright 2020 Google Inc. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15################################################################################ 16"""Cloud functions for build scheduling.""" 17 18from collections import namedtuple 19import logging 20import os 21import re 22import yaml 23 24from github import Github 25from google.api_core import exceptions 26from google.cloud import ndb 27from google.cloud import scheduler_v1 28 29import build_and_run_coverage 30import build_project 31from datastore_entities import GithubCreds 32from datastore_entities import Project 33 34VALID_PROJECT_NAME = re.compile(r'^[a-zA-Z0-9_-]+$') 35DEFAULT_BUILDS_PER_DAY = 1 36MAX_BUILDS_PER_DAY = 4 37COVERAGE_SCHEDULE = '0 6 * * *' 38FUZZING_BUILD_TOPIC = 'request-build' 39COVERAGE_BUILD_TOPIC = 'request-coverage-build' 40 41ProjectMetadata = namedtuple( 42 'ProjectMetadata', 'schedule project_yaml_contents dockerfile_contents') 43 44 45class ProjectYamlError(Exception): 46 """Error in project.yaml format.""" 47 48 49def create_scheduler(cloud_scheduler_client, project_name, schedule, tag, 50 topic): 51 """Creates schedulers for new projects.""" 52 project_id = os.environ.get('GCP_PROJECT') 53 location_id = os.environ.get('FUNCTION_REGION') 54 parent = cloud_scheduler_client.location_path(project_id, location_id) 55 job = { 56 'name': parent + '/jobs/' + project_name + '-scheduler-' + tag, 57 'pubsub_target': { 58 'topic_name': 'projects/' + project_id + '/topics/' + topic, 59 'data': project_name.encode() 60 }, 61 'schedule': schedule 62 } 63 64 cloud_scheduler_client.create_job(parent, job) 65 66 67def delete_scheduler(cloud_scheduler_client, project_name, tag): 68 """Deletes schedulers for projects that were removed.""" 69 project_id = os.environ.get('GCP_PROJECT') 70 location_id = os.environ.get('FUNCTION_REGION') 71 name = cloud_scheduler_client.job_path(project_id, location_id, 72 project_name + '-scheduler-' + tag) 73 cloud_scheduler_client.delete_job(name) 74 75 76def update_scheduler(cloud_scheduler_client, project, schedule, tag): 77 """Updates schedule in case schedule was changed.""" 78 project_id = os.environ.get('GCP_PROJECT') 79 location_id = os.environ.get('FUNCTION_REGION') 80 parent = cloud_scheduler_client.location_path(project_id, location_id) 81 job = { 82 'name': parent + '/jobs/' + project.name + '-scheduler-' + tag, 83 'pubsub_target': { 84 'topic_name': 'projects/' + project_id + '/topics/request-build', 85 'data': project.name.encode() 86 }, 87 'schedule': schedule, 88 } 89 90 update_mask = {'paths': ['schedule']} 91 cloud_scheduler_client.update_job(job, update_mask) 92 93 94def delete_project(cloud_scheduler_client, project): 95 """Delete the given project.""" 96 logging.info('Deleting project %s', project.name) 97 for tag in (build_project.FUZZING_BUILD_TYPE, 98 build_and_run_coverage.COVERAGE_BUILD_TYPE): 99 try: 100 delete_scheduler(cloud_scheduler_client, project.name, tag) 101 except exceptions.NotFound: 102 # Already deleted. 103 continue 104 except exceptions.GoogleAPICallError as error: 105 logging.error('Scheduler deletion for %s failed with %s', project.name, 106 error) 107 return 108 109 project.key.delete() 110 111 112# pylint: disable=too-many-branches 113def sync_projects(cloud_scheduler_client, projects): 114 """Sync projects with cloud datastore.""" 115 for project in Project.query(): 116 if project.name not in projects: 117 delete_project(cloud_scheduler_client, project) 118 119 existing_projects = {project.name for project in Project.query()} 120 for project_name in projects: 121 if project_name in existing_projects: 122 continue 123 124 try: 125 create_scheduler(cloud_scheduler_client, project_name, 126 projects[project_name].schedule, 127 build_project.FUZZING_BUILD_TYPE, FUZZING_BUILD_TOPIC) 128 create_scheduler(cloud_scheduler_client, project_name, COVERAGE_SCHEDULE, 129 build_and_run_coverage.COVERAGE_BUILD_TYPE, 130 COVERAGE_BUILD_TOPIC) 131 project_metadata = projects[project_name] 132 Project(name=project_name, 133 schedule=project_metadata.schedule, 134 project_yaml_contents=project_metadata.project_yaml_contents, 135 dockerfile_contents=project_metadata.dockerfile_contents).put() 136 except exceptions.GoogleAPICallError as error: 137 logging.error('Scheduler creation for %s failed with %s', project_name, 138 error) 139 140 for project in Project.query(): 141 if project.name not in projects: 142 continue 143 144 logging.info('Setting up project %s', project.name) 145 project_metadata = projects[project.name] 146 project_changed = False 147 if project.schedule != project_metadata.schedule: 148 try: 149 logging.info('Schedule changed.') 150 update_scheduler(cloud_scheduler_client, project, 151 projects[project.name].schedule, 152 build_project.FUZZING_BUILD_TYPE) 153 project.schedule = project_metadata.schedule 154 project_changed = True 155 except exceptions.GoogleAPICallError as error: 156 logging.error('Updating scheduler for %s failed with %s', project.name, 157 error) 158 if project.project_yaml_contents != project_metadata.project_yaml_contents: 159 project.project_yaml_contents = project_metadata.project_yaml_contents 160 project_changed = True 161 162 if project.dockerfile_contents != project_metadata.dockerfile_contents: 163 project.dockerfile_contents = project_metadata.dockerfile_contents 164 project_changed = True 165 166 if project_changed: 167 project.put() 168 169 170def _has_docker_file(project_contents): 171 """Checks if project has a Dockerfile.""" 172 return any( 173 content_file.name == 'Dockerfile' for content_file in project_contents) 174 175 176def get_project_metadata(project_contents): 177 """Checks for schedule parameter in yaml file else uses DEFAULT_SCHEDULE.""" 178 for content_file in project_contents: 179 if content_file.name == 'project.yaml': 180 project_yaml_contents = content_file.decoded_content.decode('utf-8') 181 182 if content_file.name == 'Dockerfile': 183 dockerfile_contents = content_file.decoded_content.decode('utf-8') 184 185 project_yaml = yaml.safe_load(project_yaml_contents) 186 builds_per_day = project_yaml.get('builds_per_day', DEFAULT_BUILDS_PER_DAY) 187 if not isinstance(builds_per_day, int) or builds_per_day not in range( 188 1, MAX_BUILDS_PER_DAY + 1): 189 raise ProjectYamlError('Parameter is not an integer in range [1-4]') 190 191 # Starting at 6:00 am, next build schedules are added at 'interval' slots 192 # Example for interval 2, hours = [6, 18] and schedule = '0 6,18 * * *' 193 interval = 24 // builds_per_day 194 hours = [] 195 for hour in range(6, 30, interval): 196 hours.append(hour % 24) 197 schedule = '0 ' + ','.join(str(hour) for hour in hours) + ' * * *' 198 199 return ProjectMetadata(schedule, project_yaml_contents, dockerfile_contents) 200 201 202def get_projects(repo): 203 """Get project list from git repository.""" 204 projects = {} 205 contents = repo.get_contents('projects') 206 for content_file in contents: 207 if content_file.type != 'dir' or not VALID_PROJECT_NAME.match( 208 content_file.name): 209 continue 210 211 project_contents = repo.get_contents(content_file.path) 212 if not _has_docker_file(project_contents): 213 continue 214 215 try: 216 projects[content_file.name] = get_project_metadata(project_contents) 217 except ProjectYamlError as error: 218 logging.error( 219 'Incorrect format for project.yaml file of %s with error %s', 220 content_file.name, error) 221 222 return projects 223 224 225def get_github_creds(): 226 """Retrieves GitHub client credentials.""" 227 git_creds = GithubCreds.query().get() 228 if git_creds is None: 229 raise RuntimeError('Git credentials not available.') 230 return git_creds 231 232 233def sync(event, context): 234 """Sync projects with cloud datastore.""" 235 del event, context # Unused. 236 237 with ndb.Client().context(): 238 git_creds = get_github_creds() 239 github_client = Github(git_creds.client_id, git_creds.client_secret) 240 repo = github_client.get_repo('google/oss-fuzz') 241 projects = get_projects(repo) 242 cloud_scheduler_client = scheduler_v1.CloudSchedulerClient() 243 sync_projects(cloud_scheduler_client, projects) 244