• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020 Google Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15################################################################################
16"""Cloud functions for build scheduling."""
17
18from collections import namedtuple
19import logging
20import os
21import re
22import yaml
23
24from github import Github
25from google.api_core import exceptions
26from google.cloud import ndb
27from google.cloud import scheduler_v1
28
29import build_and_run_coverage
30import build_project
31from datastore_entities import GithubCreds
32from datastore_entities import Project
33
34VALID_PROJECT_NAME = re.compile(r'^[a-zA-Z0-9_-]+$')
35DEFAULT_BUILDS_PER_DAY = 1
36MAX_BUILDS_PER_DAY = 4
37COVERAGE_SCHEDULE = '0 6 * * *'
38FUZZING_BUILD_TOPIC = 'request-build'
39COVERAGE_BUILD_TOPIC = 'request-coverage-build'
40
41ProjectMetadata = namedtuple(
42    'ProjectMetadata', 'schedule project_yaml_contents dockerfile_contents')
43
44
45class ProjectYamlError(Exception):
46  """Error in project.yaml format."""
47
48
49def create_scheduler(cloud_scheduler_client, project_name, schedule, tag,
50                     topic):
51  """Creates schedulers for new projects."""
52  project_id = os.environ.get('GCP_PROJECT')
53  location_id = os.environ.get('FUNCTION_REGION')
54  parent = cloud_scheduler_client.location_path(project_id, location_id)
55  job = {
56      'name': parent + '/jobs/' + project_name + '-scheduler-' + tag,
57      'pubsub_target': {
58          'topic_name': 'projects/' + project_id + '/topics/' + topic,
59          'data': project_name.encode()
60      },
61      'schedule': schedule
62  }
63
64  cloud_scheduler_client.create_job(parent, job)
65
66
67def delete_scheduler(cloud_scheduler_client, project_name, tag):
68  """Deletes schedulers for projects that were removed."""
69  project_id = os.environ.get('GCP_PROJECT')
70  location_id = os.environ.get('FUNCTION_REGION')
71  name = cloud_scheduler_client.job_path(project_id, location_id,
72                                         project_name + '-scheduler-' + tag)
73  cloud_scheduler_client.delete_job(name)
74
75
76def update_scheduler(cloud_scheduler_client, project, schedule, tag):
77  """Updates schedule in case schedule was changed."""
78  project_id = os.environ.get('GCP_PROJECT')
79  location_id = os.environ.get('FUNCTION_REGION')
80  parent = cloud_scheduler_client.location_path(project_id, location_id)
81  job = {
82      'name': parent + '/jobs/' + project.name + '-scheduler-' + tag,
83      'pubsub_target': {
84          'topic_name': 'projects/' + project_id + '/topics/request-build',
85          'data': project.name.encode()
86      },
87      'schedule': schedule,
88  }
89
90  update_mask = {'paths': ['schedule']}
91  cloud_scheduler_client.update_job(job, update_mask)
92
93
94def delete_project(cloud_scheduler_client, project):
95  """Delete the given project."""
96  logging.info('Deleting project %s', project.name)
97  for tag in (build_project.FUZZING_BUILD_TYPE,
98              build_and_run_coverage.COVERAGE_BUILD_TYPE):
99    try:
100      delete_scheduler(cloud_scheduler_client, project.name, tag)
101    except exceptions.NotFound:
102      # Already deleted.
103      continue
104    except exceptions.GoogleAPICallError as error:
105      logging.error('Scheduler deletion for %s failed with %s', project.name,
106                    error)
107      return
108
109  project.key.delete()
110
111
112# pylint: disable=too-many-branches
113def sync_projects(cloud_scheduler_client, projects):
114  """Sync projects with cloud datastore."""
115  for project in Project.query():
116    if project.name not in projects:
117      delete_project(cloud_scheduler_client, project)
118
119  existing_projects = {project.name for project in Project.query()}
120  for project_name in projects:
121    if project_name in existing_projects:
122      continue
123
124    try:
125      create_scheduler(cloud_scheduler_client, project_name,
126                       projects[project_name].schedule,
127                       build_project.FUZZING_BUILD_TYPE, FUZZING_BUILD_TOPIC)
128      create_scheduler(cloud_scheduler_client, project_name, COVERAGE_SCHEDULE,
129                       build_and_run_coverage.COVERAGE_BUILD_TYPE,
130                       COVERAGE_BUILD_TOPIC)
131      project_metadata = projects[project_name]
132      Project(name=project_name,
133              schedule=project_metadata.schedule,
134              project_yaml_contents=project_metadata.project_yaml_contents,
135              dockerfile_contents=project_metadata.dockerfile_contents).put()
136    except exceptions.GoogleAPICallError as error:
137      logging.error('Scheduler creation for %s failed with %s', project_name,
138                    error)
139
140  for project in Project.query():
141    if project.name not in projects:
142      continue
143
144    logging.info('Setting up project %s', project.name)
145    project_metadata = projects[project.name]
146    project_changed = False
147    if project.schedule != project_metadata.schedule:
148      try:
149        logging.info('Schedule changed.')
150        update_scheduler(cloud_scheduler_client, project,
151                         projects[project.name].schedule,
152                         build_project.FUZZING_BUILD_TYPE)
153        project.schedule = project_metadata.schedule
154        project_changed = True
155      except exceptions.GoogleAPICallError as error:
156        logging.error('Updating scheduler for %s failed with %s', project.name,
157                      error)
158    if project.project_yaml_contents != project_metadata.project_yaml_contents:
159      project.project_yaml_contents = project_metadata.project_yaml_contents
160      project_changed = True
161
162    if project.dockerfile_contents != project_metadata.dockerfile_contents:
163      project.dockerfile_contents = project_metadata.dockerfile_contents
164      project_changed = True
165
166    if project_changed:
167      project.put()
168
169
170def _has_docker_file(project_contents):
171  """Checks if project has a Dockerfile."""
172  return any(
173      content_file.name == 'Dockerfile' for content_file in project_contents)
174
175
176def get_project_metadata(project_contents):
177  """Checks for schedule parameter in yaml file else uses DEFAULT_SCHEDULE."""
178  for content_file in project_contents:
179    if content_file.name == 'project.yaml':
180      project_yaml_contents = content_file.decoded_content.decode('utf-8')
181
182    if content_file.name == 'Dockerfile':
183      dockerfile_contents = content_file.decoded_content.decode('utf-8')
184
185  project_yaml = yaml.safe_load(project_yaml_contents)
186  builds_per_day = project_yaml.get('builds_per_day', DEFAULT_BUILDS_PER_DAY)
187  if not isinstance(builds_per_day, int) or builds_per_day not in range(
188      1, MAX_BUILDS_PER_DAY + 1):
189    raise ProjectYamlError('Parameter is not an integer in range [1-4]')
190
191  # Starting at 6:00 am, next build schedules are added at 'interval' slots
192  # Example for interval 2, hours = [6, 18] and schedule = '0 6,18 * * *'
193  interval = 24 // builds_per_day
194  hours = []
195  for hour in range(6, 30, interval):
196    hours.append(hour % 24)
197  schedule = '0 ' + ','.join(str(hour) for hour in hours) + ' * * *'
198
199  return ProjectMetadata(schedule, project_yaml_contents, dockerfile_contents)
200
201
202def get_projects(repo):
203  """Get project list from git repository."""
204  projects = {}
205  contents = repo.get_contents('projects')
206  for content_file in contents:
207    if content_file.type != 'dir' or not VALID_PROJECT_NAME.match(
208        content_file.name):
209      continue
210
211    project_contents = repo.get_contents(content_file.path)
212    if not _has_docker_file(project_contents):
213      continue
214
215    try:
216      projects[content_file.name] = get_project_metadata(project_contents)
217    except ProjectYamlError as error:
218      logging.error(
219          'Incorrect format for project.yaml file of %s with error %s',
220          content_file.name, error)
221
222  return projects
223
224
225def get_github_creds():
226  """Retrieves GitHub client credentials."""
227  git_creds = GithubCreds.query().get()
228  if git_creds is None:
229    raise RuntimeError('Git credentials not available.')
230  return git_creds
231
232
233def sync(event, context):
234  """Sync projects with cloud datastore."""
235  del event, context  # Unused.
236
237  with ndb.Client().context():
238    git_creds = get_github_creds()
239    github_client = Github(git_creds.client_id, git_creds.client_secret)
240    repo = github_client.get_repo('google/oss-fuzz')
241    projects = get_projects(repo)
242    cloud_scheduler_client = scheduler_v1.CloudSchedulerClient()
243    sync_projects(cloud_scheduler_client, projects)
244