1#!/usr/bin/python 2 3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""Module used to back up the mysql db and upload to Google Storage. 8 9Usage: 10 backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10 11 12 gs_bucket may refer to a local location by omitting gs:// and giving a local 13 path if desired for testing. The example usage above creates a dump 14 of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and 15 cleans up older dumps if there are more than 10 in that directory. 16""" 17 18import datetime 19from distutils import version 20import logging 21import optparse 22import os 23import tempfile 24 25import common 26 27from autotest_lib.client.common_lib import error 28from autotest_lib.client.common_lib import global_config, logging_manager, utils 29from autotest_lib.utils import test_importer 30 31 32_ATTEMPTS = 3 33_GSUTIL_BIN = 'gsutil' 34_GS_BUCKET = 'gs://chromeos-lab/backup/database' 35# TODO(scottz): Should we need to ignore more than one database a general 36# function should be designed that lists tables in the database and properly 37# creates the --ignore-table= args to be passed to mysqldump. 38# Tables to ignore when dumping all databases. 39# performance_schema is an internal database that cannot be dumped 40IGNORE_TABLES = ['performance_schema.cond_instances', 41 'performance_schema.events_waits_current', 42 'performance_schema.cond_instances', 43 'performance_schema.events_waits_history', 44 'performance_schema.events_waits_history_long', 45 'performance_schema.events_waits_summary_by_instance', 46 ('performance_schema.' 47 'events_waits_summary_by_thread_by_event_name'), 48 'performance_schema.events_waits_summary_global_by_event_name', 49 'performance_schema.file_instances', 50 'performance_schema.file_summary_by_event_name', 51 'performance_schema.file_summary_by_instance', 52 'performance_schema.mutex_instances', 53 'performance_schema.performance_timers', 54 'performance_schema.rwlock_instances', 55 'performance_schema.setup_consumers', 56 'performance_schema.setup_instruments', 57 'performance_schema.setup_timers', 58 'performance_schema.threads'] 59 60# Conventional mysqldump schedules. 61_DAILY = 'daily' 62_WEEKLY = 'weekly' 63_MONTHLY = 'monthly' 64 65# Back up server db 66_SERVER_DB = 'server_db' 67 68# Contrary to a conventional mysql dump which takes O(hours) on large databases, 69# a host dump is the cheapest form of backup possible. We dump the output of a 70# of a mysql command showing all hosts and their pool labels to a text file that 71# is backed up to google storage. 72_ONLY_HOSTS = 'only_hosts' 73_ONLY_SHARDS = 'only_shards' 74_SCHEDULER_TYPES = [_SERVER_DB, _ONLY_HOSTS, _ONLY_SHARDS, _DAILY, _WEEKLY, _MONTHLY] 75 76class BackupError(Exception): 77 """Raised for error occurred during backup.""" 78 79 80class MySqlArchiver(object): 81 """Class that archives the Autotest MySQL DB to Google Storage. 82 83 Vars: 84 gs_dir: The path to the directory in Google Storage that this dump file 85 will be uploaded to. 86 number_to_keep: The number of dumps we should store. 87 """ 88 _AUTOTEST_DB = "chromeos_autotest_db" 89 _SERVER_DB = "chromeos_lab_servers" 90 91 92 def __init__(self, scheduled_type, number_to_keep, gs_bucket): 93 # For conventional scheduled type, we back up all databases. 94 # self._db is only used when scheduled_type is not 95 # conventional scheduled type. 96 self._db = self._get_db_name(scheduled_type) 97 self._gs_dir = '/'.join([gs_bucket, scheduled_type]) 98 self._number_to_keep = number_to_keep 99 self._type = scheduled_type 100 101 102 @classmethod 103 def _get_db_name(cls, scheduled_type): 104 """Get the db name to backup. 105 106 @param scheduled_type: one of _SCHEDULER_TYPES. 107 108 @returns: The name of the db to backup. 109 Or None for backup all dbs. 110 """ 111 if scheduled_type == _SERVER_DB: 112 return cls._SERVER_DB 113 elif scheduled_type in [_ONLY_HOSTS, _ONLY_SHARDS]: 114 return cls._AUTOTEST_DB 115 else: 116 return None 117 118 @staticmethod 119 def _get_user_pass(): 120 """Returns a tuple containing the user/pass to use to access the DB.""" 121 user = global_config.global_config.get_config_value( 122 'CROS', 'db_backup_user') 123 password = global_config.global_config.get_config_value( 124 'CROS', 'db_backup_password') 125 return user, password 126 127 128 def create_mysql_dump(self): 129 """Returns the path to a mysql dump of the current autotest DB.""" 130 user, password = self._get_user_pass() 131 _, filename = tempfile.mkstemp('autotest_db_dump') 132 logging.debug('Dumping mysql database to file %s', filename) 133 extra_dump_args = '' 134 for entry in IGNORE_TABLES: 135 extra_dump_args += '--ignore-table=%s ' % entry 136 137 if not self._db: 138 extra_dump_args += "--all-databases" 139 db_name = self._db or '' 140 utils.system('set -o pipefail; mysqldump --user=%s ' 141 '--password=%s %s %s| gzip - > %s' % ( 142 user, password, extra_dump_args, db_name, filename)) 143 return filename 144 145 146 def _create_dump_from_query(self, query): 147 """Dumps result of a query into a text file. 148 149 @param query: Query to execute. 150 151 @return: The path to a tempfile containing the response of the query. 152 """ 153 if not self._db: 154 raise BackupError("_create_dump_from_query requires a specific db.") 155 parameters = {'db': self._db, 'query': query} 156 parameters['user'], parameters['password'] = self._get_user_pass() 157 _, parameters['filename'] = tempfile.mkstemp('autotest_db_dump') 158 utils.system( 159 'set -o pipefail; mysql -u %(user)s -p%(password)s ' 160 '%(db)s -e "%(query)s" > %(filename)s' % 161 parameters) 162 return parameters['filename'] 163 164 165 def create_host_dump(self): 166 """Dumps hosts and their labels into a text file. 167 168 @return: The path to a tempfile containing a dump of 169 hosts and their pool labels. 170 """ 171 query = ('SELECT hostname, labels.name FROM afe_hosts AS hosts ' 172 'JOIN afe_hosts_labels ON hosts.id = afe_hosts_labels.host_id ' 173 'JOIN afe_labels AS labels ' 174 'ON labels.id = afe_hosts_labels.label_id ' 175 'WHERE labels.name LIKE \'%%pool%%\';') 176 return self._create_dump_from_query(query) 177 178 179 def create_shards_dump(self): 180 """Dumps shards and their labels into a text file. 181 182 @return: The path to a tempfile containing a dump of 183 shards and their labels. 184 """ 185 query = ('SELECT hostname, labels.name FROM afe_shards AS shards ' 186 'JOIN afe_shards_labels ' 187 'ON shards.id = afe_shards_labels.shard_id ' 188 'JOIN afe_labels AS labels ' 189 'ON labels.id = afe_shards_labels.label_id;') 190 return self._create_dump_from_query(query) 191 192 193 def dump(self): 194 """Creates a data dump based on the type of schedule. 195 196 @return: The path to a file containing the dump. 197 """ 198 if self._type == _ONLY_HOSTS: 199 return self.create_host_dump() 200 if self._type == _ONLY_SHARDS: 201 return self.create_shards_dump() 202 return self.create_mysql_dump() 203 204 205 def _get_name(self): 206 """Returns the name of the dump as presented to google storage.""" 207 if self._type in [_ONLY_HOSTS, _ONLY_SHARDS]: 208 file_type = 'txt' 209 else: 210 file_type = 'gz' 211 return 'autotest-dump.%s.%s' % ( 212 datetime.datetime.now().strftime('%y.%m.%d'), file_type) 213 214 215 @staticmethod 216 def _retry_run(cmd): 217 """Run the specified |cmd| string, retrying if necessary. 218 219 Args: 220 cmd: The command to run. 221 """ 222 for attempt in range(_ATTEMPTS): 223 try: 224 return utils.system_output(cmd) 225 except error.CmdError: 226 if attempt == _ATTEMPTS - 1: 227 raise 228 else: 229 logging.error('Failed to run %r', cmd) 230 231 232 def upload_to_google_storage(self, dump_file): 233 """Uploads the given |dump_file| to Google Storage. 234 235 @param dump_file: The path to the file containing the dump. 236 """ 237 cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s' 238 input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file, 239 name=self._get_name(), gs_dir=self._gs_dir) 240 cmd = cmd % input_dict 241 logging.debug('Uploading mysql dump to google storage') 242 self._retry_run(cmd) 243 os.remove(dump_file) 244 245 246 def _get_gs_command(self, cmd): 247 """Returns an array representing the command for rm or ls.""" 248 # Helpful code to allow us to test without gs. 249 assert cmd in ['rm', 'ls'] 250 gs_bin = _GSUTIL_BIN 251 if self._gs_dir.startswith('gs://'): 252 cmd_array = [gs_bin, cmd] 253 else: 254 cmd_array = [cmd] 255 256 return cmd_array 257 258 259 def _do_ls(self): 260 """Returns the output of running ls on the gs bucket.""" 261 cmd = self._get_gs_command('ls') + [self._gs_dir] 262 return self._retry_run(' '.join(cmd)) 263 264 265 def cleanup(self): 266 """Cleans up the gs bucket to ensure we don't over archive.""" 267 logging.debug('Cleaning up previously archived dump files.') 268 listing = self._do_ls() 269 ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion) 270 if len(ordered_listing) < self._number_to_keep: 271 logging.debug('Cleanup found nothing to do.') 272 return 273 274 to_remove = ordered_listing[:-self._number_to_keep] 275 rm_cmd = self._get_gs_command('rm') 276 for artifact in to_remove: 277 cmd = ' '.join(rm_cmd + [self._gs_dir + '/' + artifact]) 278 self._retry_run(cmd) 279 280 281def parse_options(): 282 """Parses given options.""" 283 parser = optparse.OptionParser() 284 parser.add_option('--gs_bucket', default=_GS_BUCKET, 285 help='Google storage bucket to store mysql db dumps.') 286 parser.add_option('--keep', default=10, type=int, 287 help='Number of dumps to keep of specified type.') 288 parser.add_option('--type', default=_DAILY, 289 help='The type of mysql dump to store.') 290 parser.add_option('--verbose', default=False, action='store_true', 291 help='Google storage bucket to store mysql db dumps.') 292 options = parser.parse_args()[0] 293 if options.type not in _SCHEDULER_TYPES: 294 parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES)) 295 296 return options 297 298 299def main(): 300 """Runs the program.""" 301 options = parse_options() 302 logging_manager.configure_logging(test_importer.TestImporterLoggingConfig(), 303 verbose=options.verbose) 304 logging.debug('Start db backup: %s', options.type) 305 archiver = MySqlArchiver(options.type, options.keep, options.gs_bucket) 306 dump_file = archiver.dump() 307 logging.debug('Uploading backup: %s', options.type) 308 archiver.upload_to_google_storage(dump_file) 309 archiver.cleanup() 310 logging.debug('Db backup completed: %s', options.type) 311 312 313if __name__ == '__main__': 314 main() 315