1#!/usr/bin/python 2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6import argparse 7import datetime 8import os 9import re 10import sys 11import logging 12 13os.environ['DJANGO_SETTINGS_MODULE'] = 'frontend.settings' 14 15import common 16from autotest_lib.server import utils 17from django.db import connections, transaction 18 19 20# Format Appears as: [Date] [Time] - [Msg Level] - [Message] 21LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s' 22# This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01) 23DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]' 24 '|3[01])$') 25SELECT_CMD_FORMAT = """ 26SELECT %(table)s.%(primary_key)s FROM %(table)s 27WHERE %(table)s.%(time_column)s <= "%(date)s" 28""" 29SELECT_JOIN_CMD_FORMAT = """ 30SELECT %(table)s.%(primary_key)s FROM %(table)s 31INNER JOIN %(related_table)s 32 ON %(table)s.%(foreign_key)s=%(related_table)s.%(related_primary_key)s 33WHERE %(related_table)s.%(time_column)s <= "%(date)s" 34""" 35SELECT_WITH_INDIRECTION_FORMAT = """ 36SELECT %(table)s.%(primary_key)s FROM %(table)s 37INNER JOIN %(indirection_table)s 38 ON %(table)s.%(foreign_key)s = 39 %(indirection_table)s.%(indirection_primary_key)s 40INNER JOIN %(related_table)s 41 ON %(indirection_table)s.%(indirection_foreign_key)s = 42 %(related_table)s.%(related_primary_key)s 43WHERE %(related_table)s.%(time_column)s <= "%(date)s" 44""" 45DELETE_ROWS_FORMAT = """ 46DELETE FROM %(table)s 47WHERE %(table)s.%(primary_key)s IN (%(rows)s) 48""" 49 50 51AFE_JOB_ID = 'afe_job_id' 52JOB_ID = 'job_id' 53JOB_IDX = 'job_idx' 54TEST_IDX = 'test_idx' 55 56# CAUTION: Make sure only the 'default' connection is used. Otherwise 57# db_cleanup may delete stuff from the global database, which is generally not 58# intended. 59cursor = connections['default'].cursor() 60 61STEP_SIZE = None # Threading this through properly is disgusting. 62 63class ProgressBar(object): 64 TEXT = "{:<40s} [{:<20s}] ({:>9d}/{:>9d})" 65 66 def __init__(self, name, amount): 67 self._name = name 68 self._amount = amount 69 self._cur = 0 70 71 def __enter__(self): 72 return self 73 74 def __exit__(self, a, b, c): 75 sys.stdout.write('\n') 76 sys.stdout.flush() 77 78 def update(self, x): 79 """ 80 Advance the counter by `x`. 81 82 @param x: An integer of how many more elements were processed. 83 """ 84 self._cur += x 85 86 def show(self): 87 """ 88 Display the progress bar on the current line. Repeated invocations 89 "update" the display. 90 """ 91 if self._amount == 0: 92 barlen = 20 93 else: 94 barlen = int(20 * self._cur / float(self._amount)) 95 if barlen: 96 bartext = '=' * (barlen-1) + '>' 97 else: 98 bartext = '' 99 text = self.TEXT.format(self._name, bartext, self._cur, self._amount) 100 sys.stdout.write('\r') 101 sys.stdout.write(text) 102 sys.stdout.flush() 103 104 105def grouper(iterable, n): 106 """ 107 Group the elements of `iterable` into groups of maximum size `n`. 108 109 @param iterable: An iterable. 110 @param n: Max size of returned groups. 111 @returns: Yields iterables of size <= n. 112 113 >>> grouper('ABCDEFG', 3) 114 [['A', 'B', C'], ['D', 'E', 'F'], ['G']] 115 """ 116 args = [iter(iterable)] * n 117 while True: 118 lst = [] 119 try: 120 for itr in args: 121 lst.append(next(itr)) 122 yield lst 123 except StopIteration: 124 if lst: 125 yield lst 126 break 127 128 129def _delete_table_data_before_date(table_to_delete_from, primary_key, 130 related_table, related_primary_key, 131 date, foreign_key=None, 132 time_column="started_time", 133 indirection_table=None, 134 indirection_primary_key=None, 135 indirection_foreign_key=None): 136 """ 137 We want a delete statement that will only delete from one table while 138 using a related table to find the rows to delete. 139 140 An example mysql command: 141 DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN 142 tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND 143 tko_tests.started_time <= '2012-02-01'; 144 145 There are also tables that require 2 joins to determine which rows we want 146 to delete and we determine these rows by joining the table we want to 147 delete from with an indirection table to the actual jobs table. 148 149 @param table_to_delete_from: Table whose rows we want to delete. 150 @param related_table: Table with the date information we are selecting by. 151 @param foreign_key: Foreign key used in table_to_delete_from to reference 152 the related table. If None, the primary_key is used. 153 @param primary_key: Primary key in the related table. 154 @param date: End date of the information we are trying to delete. 155 @param time_column: Column that we want to use to compare the date to. 156 @param indirection_table: Table we use to link the data we are trying to 157 delete with the table with the date information. 158 @param indirection_primary_key: Key we use to connect the indirection table 159 to the table we are trying to delete rows 160 from. 161 @param indirection_foreign_key: Key we use to connect the indirection table 162 to the table with the date information. 163 """ 164 if not foreign_key: 165 foreign_key = primary_key 166 167 if not related_table: 168 # Deleting from a table directly. 169 variables = dict(table=table_to_delete_from, primary_key=primary_key, 170 time_column=time_column, date=date) 171 sql = SELECT_CMD_FORMAT % variables 172 elif not indirection_table: 173 # Deleting using a single JOIN to get the date information. 174 variables = dict(primary_key=primary_key, table=table_to_delete_from, 175 foreign_key=foreign_key, related_table=related_table, 176 related_primary_key=related_primary_key, 177 time_column=time_column, date=date) 178 sql = SELECT_JOIN_CMD_FORMAT % variables 179 else: 180 # There are cases where we need to JOIN 3 TABLES to determine the rows 181 # we want to delete. 182 variables = dict(primary_key=primary_key, table=table_to_delete_from, 183 indirection_table=indirection_table, 184 foreign_key=foreign_key, 185 indirection_primary_key=indirection_primary_key, 186 related_table=related_table, 187 related_primary_key=related_primary_key, 188 indirection_foreign_key=indirection_foreign_key, 189 time_column=time_column, date=date) 190 sql = SELECT_WITH_INDIRECTION_FORMAT % variables 191 192 logging.debug('SQL: %s', sql) 193 cursor.execute(sql, []) 194 rows = [x[0] for x in cursor.fetchall()] 195 logging.debug(rows) 196 197 if not rows or rows == [None]: 198 with ProgressBar(table_to_delete_from, 0) as pb: 199 pb.show() 200 logging.debug('Noting to delete for %s', table_to_delete_from) 201 return 202 203 with ProgressBar(table_to_delete_from, len(rows)) as pb: 204 for row_keys in grouper(rows, STEP_SIZE): 205 variables['rows'] = ','.join([str(x) for x in row_keys]) 206 sql = DELETE_ROWS_FORMAT % variables 207 logging.debug('SQL: %s', sql) 208 cursor.execute(sql, []) 209 transaction.commit_unless_managed(using='default') 210 pb.update(len(row_keys)) 211 pb.show() 212 213 214def _subtract_days(date, days_to_subtract): 215 """ 216 Return a date (string) that is 'days' before 'date' 217 218 @param date: date (string) we are subtracting from. 219 @param days_to_subtract: days (int) we are subtracting. 220 """ 221 date_obj = datetime.datetime.strptime(date, '%Y-%m-%d') 222 difference = date_obj - datetime.timedelta(days=days_to_subtract) 223 return difference.strftime('%Y-%m-%d') 224 225 226def _delete_all_data_before_date(date): 227 """ 228 Delete all the database data before a given date. 229 230 This function focuses predominately on the data for jobs in tko_jobs. 231 However not all jobs in afe_jobs are also in tko_jobs. 232 233 Therefore we delete all the afe_job and foreign key relations prior to two 234 days before date. Then we do the queries using tko_jobs and these 235 tables to ensure all the related information is gone. Even though we are 236 repeating deletes on these tables, the second delete will be quick and 237 completely thorough in ensuring we clean up all the foreign key 238 dependencies correctly. 239 240 @param date: End date of the information we are trying to delete. 241 @param step: Rows to delete per SQL query. 242 """ 243 # First cleanup all afe_job related data (prior to 2 days before date). 244 # The reason for this is not all afe_jobs may be in tko_jobs. 245 afe_date = _subtract_days(date, 2) 246 logging.info('Cleaning up all afe_job data prior to %s.', afe_date) 247 _delete_table_data_before_date('afe_aborted_host_queue_entries', 248 'queue_entry_id', 249 'afe_jobs', 'id', afe_date, 250 time_column= 'created_on', 251 foreign_key='queue_entry_id', 252 indirection_table='afe_host_queue_entries', 253 indirection_primary_key='id', 254 indirection_foreign_key='job_id') 255 _delete_table_data_before_date('afe_special_tasks', 'id', 256 'afe_jobs', 'id', 257 afe_date, time_column='created_on', 258 foreign_key='queue_entry_id', 259 indirection_table='afe_host_queue_entries', 260 indirection_primary_key='id', 261 indirection_foreign_key='job_id') 262 _delete_table_data_before_date('afe_host_queue_entries', 'id', 263 'afe_jobs', 'id', 264 afe_date, time_column='created_on', 265 foreign_key=JOB_ID) 266 _delete_table_data_before_date('afe_job_keyvals', 'id', 267 'afe_jobs', 'id', 268 afe_date, time_column='created_on', 269 foreign_key=JOB_ID) 270 _delete_table_data_before_date('afe_jobs_dependency_labels', 'id', 271 'afe_jobs', 'id', 272 afe_date, time_column='created_on', 273 foreign_key=JOB_ID) 274 _delete_table_data_before_date('afe_jobs', 'id', 275 None, None, 276 afe_date, time_column='created_on') 277 # Special tasks that aren't associated with an HQE 278 # Since we don't do the queue_entry_id=NULL check, we might wipe out a bit 279 # more than we should, but I doubt anyone will notice or care. 280 _delete_table_data_before_date('afe_special_tasks', 'id', 281 None, None, 282 afe_date, time_column='time_requested') 283 284 # Now go through and clean up all the rows related to tko_jobs prior to 285 # date. 286 logging.info('Cleaning up all data related to tko_jobs prior to %s.', 287 date) 288 _delete_table_data_before_date('tko_test_attributes', 'id', 289 'tko_tests', TEST_IDX, 290 date, foreign_key=TEST_IDX) 291 _delete_table_data_before_date('tko_test_labels_tests', 'id', 292 'tko_tests', TEST_IDX, 293 date, foreign_key= 'test_id') 294 _delete_table_data_before_date('tko_iteration_result', TEST_IDX, 295 'tko_tests', TEST_IDX, 296 date) 297 _delete_table_data_before_date('tko_iteration_perf_value', TEST_IDX, 298 'tko_tests', TEST_IDX, 299 date) 300 _delete_table_data_before_date('tko_iteration_attributes', TEST_IDX, 301 'tko_tests', TEST_IDX, 302 date) 303 _delete_table_data_before_date('tko_job_keyvals', 'id', 304 'tko_jobs', JOB_IDX, 305 date, foreign_key='job_id') 306 _delete_table_data_before_date('afe_aborted_host_queue_entries', 307 'queue_entry_id', 308 'tko_jobs', AFE_JOB_ID, date, 309 foreign_key='queue_entry_id', 310 indirection_table='afe_host_queue_entries', 311 indirection_primary_key='id', 312 indirection_foreign_key='job_id') 313 _delete_table_data_before_date('afe_special_tasks', 'id', 314 'tko_jobs', AFE_JOB_ID, 315 date, foreign_key='queue_entry_id', 316 indirection_table='afe_host_queue_entries', 317 indirection_primary_key='id', 318 indirection_foreign_key='job_id') 319 _delete_table_data_before_date('afe_host_queue_entries', 'id', 320 'tko_jobs', AFE_JOB_ID, 321 date, foreign_key='job_id') 322 _delete_table_data_before_date('afe_job_keyvals', 'id', 323 'tko_jobs', AFE_JOB_ID, 324 date, foreign_key='job_id') 325 _delete_table_data_before_date('afe_jobs_dependency_labels', 'id', 326 'tko_jobs', AFE_JOB_ID, 327 date, foreign_key='job_id') 328 _delete_table_data_before_date('afe_jobs', 'id', 329 'tko_jobs', AFE_JOB_ID, 330 date, foreign_key='id') 331 _delete_table_data_before_date('tko_tests', TEST_IDX, 332 'tko_jobs', JOB_IDX, 333 date, foreign_key=JOB_IDX) 334 _delete_table_data_before_date('tko_jobs', JOB_IDX, 335 None, None, date) 336 337 338def parse_args(): 339 """Parse command line arguments""" 340 parser = argparse.ArgumentParser() 341 parser.add_argument('-v', '--verbose', action='store_true', 342 help='Print SQL commands and results') 343 parser.add_argument('--step', type=int, action='store', 344 default=1000, 345 help='Number of rows to delete at once') 346 parser.add_argument('-c', '--check_server', action='store_true', 347 help='Check if the server should run db clean up.') 348 parser.add_argument('date', help='Keep results newer than') 349 return parser.parse_args() 350 351 352def should_cleanup(): 353 """Check if the server should run db_cleanup. 354 355 Only shard should clean up db. 356 357 @returns: True if it should run db cleanup otherwise False. 358 """ 359 return utils.is_shard() 360 361 362def main(): 363 """main""" 364 args = parse_args() 365 366 level = logging.DEBUG if args.verbose else logging.INFO 367 logging.basicConfig(level=level, format=LOGGING_FORMAT) 368 logging.info('Calling: %s', sys.argv) 369 370 if not re.match(DATE_FORMAT_REGEX, args.date): 371 print 'DATE must be in yyyy-mm-dd format!' 372 return 373 if args.check_server and not should_cleanup(): 374 print 'Only shard can run db cleanup.' 375 return 376 377 global STEP_SIZE 378 STEP_SIZE = args.step 379 _delete_all_data_before_date(args.date) 380 381 382if __name__ == '__main__': 383 main() 384