1#!/usr/bin/python2 2 3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""Module used to back up the mysql db and upload to Google Storage. 8 9Usage: 10 backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10 11 12 gs_bucket may refer to a local location by omitting gs:// and giving a local 13 path if desired for testing. The example usage above creates a dump 14 of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and 15 cleans up older dumps if there are more than 10 in that directory. 16""" 17 18from __future__ import absolute_import 19from __future__ import division 20from __future__ import print_function 21 22import datetime 23from distutils import version 24import logging 25import optparse 26import os 27import tempfile 28 29import common 30 31from autotest_lib.client.common_lib import error 32from autotest_lib.client.common_lib import global_config 33from autotest_lib.client.common_lib import utils 34 35from chromite.lib import metrics 36from chromite.lib import ts_mon_config 37from six.moves import range 38 39_ATTEMPTS = 3 40_GSUTIL_BIN = 'gsutil' 41_GS_BUCKET = 'gs://chromeos-lab/backup/database' 42# TODO(scottz): Should we need to ignore more than one database a general 43# function should be designed that lists tables in the database and properly 44# creates the --ignore-table= args to be passed to mysqldump. 45# Tables to ignore when dumping all databases. 46# performance_schema is an internal database that cannot be dumped 47IGNORE_TABLES = ['performance_schema.cond_instances', 48 'performance_schema.events_waits_current', 49 'performance_schema.cond_instances', 50 'performance_schema.events_waits_history', 51 'performance_schema.events_waits_history_long', 52 'performance_schema.events_waits_summary_by_instance', 53 ('performance_schema.' 54 'events_waits_summary_by_thread_by_event_name'), 55 'performance_schema.events_waits_summary_global_by_event_name', 56 'performance_schema.file_instances', 57 'performance_schema.file_summary_by_event_name', 58 'performance_schema.file_summary_by_instance', 59 'performance_schema.mutex_instances', 60 'performance_schema.performance_timers', 61 'performance_schema.rwlock_instances', 62 'performance_schema.setup_consumers', 63 'performance_schema.setup_instruments', 64 'performance_schema.setup_timers', 65 'performance_schema.threads'] 66 67# Conventional mysqldump schedules. 68_DAILY = 'daily' 69_WEEKLY = 'weekly' 70_MONTHLY = 'monthly' 71 72# Dump of server db only 73_SERVER_DB = 'server_db' 74 75# Contrary to a conventional mysql dump which takes O(hours) on large databases, 76# a host dump is the cheapest form of backup possible. We dump the output of a 77# of a mysql command showing all hosts and their pool labels to a text file that 78# is backed up to google storage. 79_ONLY_HOSTS = 'only_hosts' 80_ONLY_SHARDS = 'only_shards' 81_SCHEDULER_TYPES = [_SERVER_DB, _ONLY_HOSTS, _ONLY_SHARDS, 82 _DAILY, _WEEKLY, _MONTHLY] 83 84class BackupError(Exception): 85 """Raised for error occurred during backup.""" 86 87 88class MySqlArchiver(object): 89 """Class that archives the Autotest MySQL DB to Google Storage. 90 91 Vars: 92 gs_dir: The path to the directory in Google Storage that this dump file 93 will be uploaded to. 94 number_to_keep: The number of dumps we should store. 95 """ 96 _AUTOTEST_DB = "chromeos_autotest_db" 97 _SERVER_DB = "chromeos_lab_servers" 98 99 100 def __init__(self, scheduled_type, number_to_keep, gs_bucket): 101 # For conventional scheduled type, we back up all databases. 102 # self._db is only used when scheduled_type is not 103 # conventional scheduled type. 104 self._db = self._get_db_name(scheduled_type) 105 self._gs_dir = '/'.join([gs_bucket, scheduled_type]) 106 self._number_to_keep = number_to_keep 107 self._type = scheduled_type 108 109 110 @classmethod 111 def _get_db_name(cls, scheduled_type): 112 """Get the db name to backup. 113 114 @param scheduled_type: one of _SCHEDULER_TYPES. 115 116 @returns: The name of the db to backup. 117 Or None for backup all dbs. 118 """ 119 if scheduled_type == _SERVER_DB: 120 return cls._SERVER_DB 121 elif scheduled_type in [_ONLY_HOSTS, _ONLY_SHARDS]: 122 return cls._AUTOTEST_DB 123 else: 124 return None 125 126 @staticmethod 127 def _get_user_pass(): 128 """Returns a tuple containing the user/pass to use to access the DB.""" 129 user = global_config.global_config.get_config_value( 130 'CROS', 'db_backup_user') 131 password = global_config.global_config.get_config_value( 132 'CROS', 'db_backup_password') 133 return user, password 134 135 136 def create_mysql_dump(self): 137 """Returns the path to a mysql dump of the current autotest DB.""" 138 user, password = self._get_user_pass() 139 _, filename = tempfile.mkstemp('autotest_db_dump') 140 logging.debug('Dumping mysql database to file %s', filename) 141 extra_dump_args = '' 142 for entry in IGNORE_TABLES: 143 extra_dump_args += '--ignore-table=%s ' % entry 144 if self._type in [_WEEKLY, _MONTHLY]: 145 extra_dump_args += '--dump-slave ' 146 147 if not self._db: 148 extra_dump_args += "--all-databases" 149 db_name = self._db or '' 150 utils.system('set -o pipefail; mysqldump --user=%s ' 151 '--password=%s %s %s| gzip - > %s' % ( 152 user, password, extra_dump_args, db_name, filename)) 153 return filename 154 155 156 def _create_dump_from_query(self, query): 157 """Dumps result of a query into a text file. 158 159 @param query: Query to execute. 160 161 @return: The path to a tempfile containing the response of the query. 162 """ 163 if not self._db: 164 raise BackupError("_create_dump_from_query requires a specific db.") 165 parameters = {'db': self._db, 'query': query} 166 parameters['user'], parameters['password'] = self._get_user_pass() 167 _, parameters['filename'] = tempfile.mkstemp('autotest_db_dump') 168 utils.system( 169 'set -o pipefail; mysql -u %(user)s -p%(password)s ' 170 '%(db)s -e "%(query)s" > %(filename)s' % 171 parameters) 172 return parameters['filename'] 173 174 175 def create_host_dump(self): 176 """Dumps hosts and their labels into a text file. 177 178 @return: The path to a tempfile containing a dump of 179 hosts and their pool labels. 180 """ 181 respect_static_labels = global_config.global_config.get_config_value( 182 'SKYLAB', 'respect_static_labels', type=bool, default=False) 183 template = ('SELECT hosts.hostname, labels.name FROM afe_hosts AS ' 184 'hosts JOIN %(hosts_labels_table)s AS hlt ON ' 185 'hosts.id = hlt.host_id ' 186 'JOIN %(labels_table)s AS labels ' 187 'ON labels.id = hlt.%(column)s ' 188 'WHERE labels.name LIKE \'%%pool%%\';') 189 if respect_static_labels: 190 # HACK: We're not checking the replaced_by_static_label on the 191 # pool label and just hard coding the fact that pool labels are 192 # indeed static labels. Expedience. 193 query = template % { 194 'hosts_labels_table': 'afe_static_hosts_labels', 195 'labels_table': 'afe_static_labels', 196 'column': 'staticlabel_id', 197 } 198 else: 199 query = template % { 200 'hosts_labels_table': 'afe_hosts_labels', 201 'labels_table': 'afe_labels', 202 'column': 'label_id', 203 } 204 return self._create_dump_from_query(query) 205 206 207 def create_shards_dump(self): 208 """Dumps shards and their labels into a text file. 209 210 @return: The path to a tempfile containing a dump of 211 shards and their labels. 212 """ 213 query = ('SELECT hostname, labels.name FROM afe_shards AS shards ' 214 'JOIN afe_shards_labels ' 215 'ON shards.id = afe_shards_labels.shard_id ' 216 'JOIN afe_labels AS labels ' 217 'ON labels.id = afe_shards_labels.label_id;') 218 return self._create_dump_from_query(query) 219 220 221 def dump(self): 222 """Creates a data dump based on the type of schedule. 223 224 @return: The path to a file containing the dump. 225 """ 226 if self._type == _ONLY_HOSTS: 227 return self.create_host_dump() 228 if self._type == _ONLY_SHARDS: 229 return self.create_shards_dump() 230 return self.create_mysql_dump() 231 232 233 def _get_name(self): 234 """Returns the name of the dump as presented to google storage.""" 235 if self._type in [_ONLY_HOSTS, _ONLY_SHARDS]: 236 file_type = 'txt' 237 else: 238 file_type = 'gz' 239 return 'autotest-dump.%s.%s' % ( 240 datetime.datetime.now().strftime('%y.%m.%d'), file_type) 241 242 243 @staticmethod 244 def _retry_run(cmd): 245 """Run the specified |cmd| string, retrying if necessary. 246 247 Args: 248 cmd: The command to run. 249 """ 250 for attempt in range(_ATTEMPTS): 251 try: 252 return utils.system_output(cmd) 253 except error.CmdError: 254 if attempt == _ATTEMPTS - 1: 255 raise 256 else: 257 logging.error('Failed to run %r', cmd) 258 259 260 def upload_to_google_storage(self, dump_file): 261 """Uploads the given |dump_file| to Google Storage. 262 263 @param dump_file: The path to the file containing the dump. 264 """ 265 cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s' 266 input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file, 267 name=self._get_name(), gs_dir=self._gs_dir) 268 cmd = cmd % input_dict 269 logging.debug('Uploading mysql dump to google storage') 270 self._retry_run(cmd) 271 os.remove(dump_file) 272 273 274 def _get_gs_command(self, cmd): 275 """Returns an array representing the command for rm or ls.""" 276 # Helpful code to allow us to test without gs. 277 assert cmd in ['rm', 'ls'] 278 gs_bin = _GSUTIL_BIN 279 if self._gs_dir.startswith('gs://'): 280 cmd_array = [gs_bin, cmd] 281 else: 282 cmd_array = [cmd] 283 284 return cmd_array 285 286 287 def _do_ls(self): 288 """Returns the output of running ls on the gs bucket.""" 289 cmd = self._get_gs_command('ls') + [self._gs_dir] 290 return self._retry_run(' '.join(cmd)) 291 292 293 def cleanup(self): 294 """Cleans up the gs bucket to ensure we don't over archive.""" 295 logging.debug('Cleaning up previously archived dump files.') 296 listing = self._do_ls() 297 ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion) 298 if len(ordered_listing) < self._number_to_keep: 299 logging.debug('Cleanup found nothing to do.') 300 return 301 302 to_remove = ordered_listing[:-self._number_to_keep] 303 rm_cmd = self._get_gs_command('rm') 304 for artifact in to_remove: 305 cmd = ' '.join(rm_cmd + [artifact]) 306 self._retry_run(cmd) 307 308 309def parse_options(): 310 """Parses given options.""" 311 parser = optparse.OptionParser() 312 parser.add_option('--gs_bucket', default=_GS_BUCKET, 313 help='Google storage bucket to store mysql db dumps.') 314 parser.add_option('--keep', default=10, type=int, 315 help='Number of dumps to keep of specified type.') 316 parser.add_option('--type', default=_DAILY, 317 help='The type of mysql dump to store.') 318 parser.add_option('--verbose', default=False, action='store_true', 319 help='Google storage bucket to store mysql db dumps.') 320 options = parser.parse_args()[0] 321 if options.type not in _SCHEDULER_TYPES: 322 parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES)) 323 324 return options 325 326 327def main(): 328 """Runs the program.""" 329 options = parse_options() 330 backup_succeeded = False 331 332 with ts_mon_config.SetupTsMonGlobalState(service_name='mysql_db_backup', 333 indirect=True): 334 with metrics.SecondsTimer( 335 'chromeos/autotest/afe_db/backup/durations', 336 fields={'type': options.type}): 337 try: 338 logging.debug('Start db backup: %s', options.type) 339 archiver = MySqlArchiver( 340 options.type, options.keep, options.gs_bucket) 341 dump_file = archiver.dump() 342 logging.debug('Uploading backup: %s', options.type) 343 archiver.upload_to_google_storage(dump_file) 344 archiver.cleanup() 345 logging.debug('Db backup completed: %s', options.type) 346 backup_succeeded = True 347 finally: 348 metrics.Counter( 349 'chromeos/autotest/db/db_backup/completed').increment( 350 fields={'success': backup_succeeded, 351 'type': options.type}) 352 353 354if __name__ == '__main__': 355 main() 356