• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import argparse
7import datetime
8import os
9import re
10import sys
11import logging
12
13os.environ['DJANGO_SETTINGS_MODULE'] = 'frontend.settings'
14
15import common
16from autotest_lib.server import utils
17from django.db import connections, transaction
18
19
20# Format Appears as: [Date] [Time] - [Msg Level] - [Message]
21LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
22# This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01)
23DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]'
24                     '|3[01])$')
25SELECT_CMD_FORMAT = """
26SELECT %(table)s.%(primary_key)s FROM %(table)s
27WHERE %(table)s.%(time_column)s <= "%(date)s"
28"""
29SELECT_JOIN_CMD_FORMAT = """
30SELECT %(table)s.%(primary_key)s FROM %(table)s
31INNER JOIN %(related_table)s
32  ON %(table)s.%(foreign_key)s=%(related_table)s.%(related_primary_key)s
33WHERE %(related_table)s.%(time_column)s <= "%(date)s"
34"""
35SELECT_WITH_INDIRECTION_FORMAT = """
36SELECT %(table)s.%(primary_key)s FROM %(table)s
37INNER JOIN %(indirection_table)s
38  ON %(table)s.%(foreign_key)s =
39     %(indirection_table)s.%(indirection_primary_key)s
40INNER JOIN %(related_table)s
41  ON %(indirection_table)s.%(indirection_foreign_key)s =
42  %(related_table)s.%(related_primary_key)s
43WHERE %(related_table)s.%(time_column)s <= "%(date)s"
44"""
45DELETE_ROWS_FORMAT = """
46DELETE FROM %(table)s
47WHERE %(table)s.%(primary_key)s IN (%(rows)s)
48"""
49
50
51AFE_JOB_ID = 'afe_job_id'
52JOB_ID = 'job_id'
53JOB_IDX = 'job_idx'
54TEST_IDX = 'test_idx'
55
56# CAUTION: Make sure only the 'default' connection is used. Otherwise
57# db_cleanup may delete stuff from the global database, which is generally not
58# intended.
59cursor = connections['default'].cursor()
60
61STEP_SIZE = None  # Threading this through properly is disgusting.
62
63class ProgressBar(object):
64    TEXT = "{:<40s} [{:<20s}] ({:>9d}/{:>9d})"
65
66    def __init__(self, name, amount):
67        self._name = name
68        self._amount = amount
69        self._cur = 0
70
71    def __enter__(self):
72        return self
73
74    def __exit__(self, a, b, c):
75        sys.stdout.write('\n')
76        sys.stdout.flush()
77
78    def update(self, x):
79        """
80        Advance the counter by `x`.
81
82        @param x: An integer of how many more elements were processed.
83        """
84        self._cur += x
85
86    def show(self):
87        """
88        Display the progress bar on the current line.  Repeated invocations
89        "update" the display.
90        """
91        if self._amount == 0:
92            barlen = 20
93        else:
94            barlen = int(20 * self._cur / float(self._amount))
95        if barlen:
96            bartext = '=' * (barlen-1) + '>'
97        else:
98            bartext = ''
99        text = self.TEXT.format(self._name, bartext, self._cur, self._amount)
100        sys.stdout.write('\r')
101        sys.stdout.write(text)
102        sys.stdout.flush()
103
104
105def grouper(iterable, n):
106    """
107    Group the elements of `iterable` into groups of maximum size `n`.
108
109    @param iterable: An iterable.
110    @param n: Max size of returned groups.
111    @returns: Yields iterables of size <= n.
112
113    >>> grouper('ABCDEFG', 3)
114    [['A', 'B', C'], ['D', 'E', 'F'], ['G']]
115    """
116    args = [iter(iterable)] * n
117    while True:
118        lst = []
119        try:
120            for itr in args:
121                lst.append(next(itr))
122            yield lst
123        except StopIteration:
124            if lst:
125                yield lst
126            break
127
128
129def _delete_table_data_before_date(table_to_delete_from, primary_key,
130                                   related_table, related_primary_key,
131                                   date, foreign_key=None,
132                                   time_column="started_time",
133                                   indirection_table=None,
134                                   indirection_primary_key=None,
135                                   indirection_foreign_key=None):
136    """
137    We want a delete statement that will only delete from one table while
138    using a related table to find the rows to delete.
139
140    An example mysql command:
141    DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN
142    tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND
143    tko_tests.started_time <= '2012-02-01';
144
145    There are also tables that require 2 joins to determine which rows we want
146    to delete and we determine these rows by joining the table we want to
147    delete from with an indirection table to the actual jobs table.
148
149    @param table_to_delete_from: Table whose rows we want to delete.
150    @param related_table: Table with the date information we are selecting by.
151    @param foreign_key: Foreign key used in table_to_delete_from to reference
152                        the related table. If None, the primary_key is used.
153    @param primary_key: Primary key in the related table.
154    @param date: End date of the information we are trying to delete.
155    @param time_column: Column that we want to use to compare the date to.
156    @param indirection_table: Table we use to link the data we are trying to
157                              delete with the table with the date information.
158    @param indirection_primary_key: Key we use to connect the indirection table
159                                    to the table we are trying to delete rows
160                                    from.
161    @param indirection_foreign_key: Key we use to connect the indirection table
162                                    to the table with the date information.
163    """
164    if not foreign_key:
165        foreign_key = primary_key
166
167    if not related_table:
168        # Deleting from a table directly.
169        variables = dict(table=table_to_delete_from, primary_key=primary_key,
170                         time_column=time_column, date=date)
171        sql = SELECT_CMD_FORMAT % variables
172    elif not indirection_table:
173        # Deleting using a single JOIN to get the date information.
174        variables = dict(primary_key=primary_key, table=table_to_delete_from,
175                         foreign_key=foreign_key, related_table=related_table,
176                         related_primary_key=related_primary_key,
177                         time_column=time_column, date=date)
178        sql = SELECT_JOIN_CMD_FORMAT % variables
179    else:
180        # There are cases where we need to JOIN 3 TABLES to determine the rows
181        # we want to delete.
182        variables = dict(primary_key=primary_key, table=table_to_delete_from,
183                         indirection_table=indirection_table,
184                         foreign_key=foreign_key,
185                         indirection_primary_key=indirection_primary_key,
186                         related_table=related_table,
187                         related_primary_key=related_primary_key,
188                         indirection_foreign_key=indirection_foreign_key,
189                         time_column=time_column, date=date)
190        sql = SELECT_WITH_INDIRECTION_FORMAT % variables
191
192    logging.debug('SQL: %s', sql)
193    cursor.execute(sql, [])
194    rows = [x[0] for x in cursor.fetchall()]
195    logging.debug(rows)
196
197    if not rows or rows == [None]:
198        with ProgressBar(table_to_delete_from, 0) as pb:
199            pb.show()
200        logging.debug('Noting to delete for %s', table_to_delete_from)
201        return
202
203    with ProgressBar(table_to_delete_from, len(rows)) as pb:
204        for row_keys in grouper(rows, STEP_SIZE):
205            variables['rows'] = ','.join([str(x) for x in row_keys])
206            sql = DELETE_ROWS_FORMAT % variables
207            logging.debug('SQL: %s', sql)
208            cursor.execute(sql, [])
209            transaction.commit_unless_managed(using='default')
210            pb.update(len(row_keys))
211            pb.show()
212
213
214def _subtract_days(date, days_to_subtract):
215    """
216    Return a date (string) that is 'days' before 'date'
217
218    @param date: date (string) we are subtracting from.
219    @param days_to_subtract: days (int) we are subtracting.
220    """
221    date_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
222    difference = date_obj - datetime.timedelta(days=days_to_subtract)
223    return difference.strftime('%Y-%m-%d')
224
225
226def _delete_all_data_before_date(date):
227    """
228    Delete all the database data before a given date.
229
230    This function focuses predominately on the data for jobs in tko_jobs.
231    However not all jobs in afe_jobs are also in tko_jobs.
232
233    Therefore we delete all the afe_job and foreign key relations prior to two
234    days before date. Then we do the queries using tko_jobs and these
235    tables to ensure all the related information is gone. Even though we are
236    repeating deletes on these tables, the second delete will be quick and
237    completely thorough in ensuring we clean up all the foreign key
238    dependencies correctly.
239
240    @param date: End date of the information we are trying to delete.
241    @param step: Rows to delete per SQL query.
242    """
243    # First cleanup all afe_job related data (prior to 2 days before date).
244    # The reason for this is not all afe_jobs may be in tko_jobs.
245    afe_date = _subtract_days(date, 2)
246    logging.info('Cleaning up all afe_job data prior to %s.', afe_date)
247    _delete_table_data_before_date('afe_aborted_host_queue_entries',
248                                   'queue_entry_id',
249                                   'afe_jobs', 'id', afe_date,
250                                   time_column= 'created_on',
251                                   foreign_key='queue_entry_id',
252                                   indirection_table='afe_host_queue_entries',
253                                   indirection_primary_key='id',
254                                   indirection_foreign_key='job_id')
255    _delete_table_data_before_date('afe_special_tasks', 'id',
256                                   'afe_jobs', 'id',
257                                   afe_date, time_column='created_on',
258                                   foreign_key='queue_entry_id',
259                                   indirection_table='afe_host_queue_entries',
260                                   indirection_primary_key='id',
261                                   indirection_foreign_key='job_id')
262    _delete_table_data_before_date('afe_host_queue_entries', 'id',
263                                   'afe_jobs', 'id',
264                                   afe_date, time_column='created_on',
265                                   foreign_key=JOB_ID)
266    _delete_table_data_before_date('afe_job_keyvals', 'id',
267                                   'afe_jobs', 'id',
268                                   afe_date, time_column='created_on',
269                                   foreign_key=JOB_ID)
270    _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
271                                   'afe_jobs', 'id',
272                                   afe_date, time_column='created_on',
273                                   foreign_key=JOB_ID)
274    _delete_table_data_before_date('afe_jobs', 'id',
275                                   None, None,
276                                   afe_date, time_column='created_on')
277    # Special tasks that aren't associated with an HQE
278    # Since we don't do the queue_entry_id=NULL check, we might wipe out a bit
279    # more than we should, but I doubt anyone will notice or care.
280    _delete_table_data_before_date('afe_special_tasks', 'id',
281                                   None, None,
282                                   afe_date, time_column='time_requested')
283
284    # Now go through and clean up all the rows related to tko_jobs prior to
285    # date.
286    logging.info('Cleaning up all data related to tko_jobs prior to %s.',
287                  date)
288    _delete_table_data_before_date('tko_test_attributes', 'id',
289                                   'tko_tests', TEST_IDX,
290                                   date, foreign_key=TEST_IDX)
291    _delete_table_data_before_date('tko_test_labels_tests', 'id',
292                                   'tko_tests', TEST_IDX,
293                                   date, foreign_key= 'test_id')
294    _delete_table_data_before_date('tko_iteration_result', TEST_IDX,
295                                   'tko_tests', TEST_IDX,
296                                   date)
297    _delete_table_data_before_date('tko_iteration_perf_value', TEST_IDX,
298                                   'tko_tests', TEST_IDX,
299                                   date)
300    _delete_table_data_before_date('tko_iteration_attributes', TEST_IDX,
301                                   'tko_tests', TEST_IDX,
302                                   date)
303    _delete_table_data_before_date('tko_job_keyvals', 'id',
304                                   'tko_jobs', JOB_IDX,
305                                   date, foreign_key='job_id')
306    _delete_table_data_before_date('afe_aborted_host_queue_entries',
307                                   'queue_entry_id',
308                                   'tko_jobs', AFE_JOB_ID, date,
309                                   foreign_key='queue_entry_id',
310                                   indirection_table='afe_host_queue_entries',
311                                   indirection_primary_key='id',
312                                   indirection_foreign_key='job_id')
313    _delete_table_data_before_date('afe_special_tasks', 'id',
314                                   'tko_jobs', AFE_JOB_ID,
315                                   date, foreign_key='queue_entry_id',
316                                   indirection_table='afe_host_queue_entries',
317                                   indirection_primary_key='id',
318                                   indirection_foreign_key='job_id')
319    _delete_table_data_before_date('afe_host_queue_entries', 'id',
320                                   'tko_jobs', AFE_JOB_ID,
321                                   date, foreign_key='job_id')
322    _delete_table_data_before_date('afe_job_keyvals', 'id',
323                                   'tko_jobs', AFE_JOB_ID,
324                                   date, foreign_key='job_id')
325    _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
326                                   'tko_jobs', AFE_JOB_ID,
327                                   date, foreign_key='job_id')
328    _delete_table_data_before_date('afe_jobs', 'id',
329                                   'tko_jobs', AFE_JOB_ID,
330                                   date, foreign_key='id')
331    _delete_table_data_before_date('tko_tests', TEST_IDX,
332                                   'tko_jobs', JOB_IDX,
333                                   date, foreign_key=JOB_IDX)
334    _delete_table_data_before_date('tko_jobs', JOB_IDX,
335                                   None, None, date)
336
337
338def parse_args():
339    """Parse command line arguments"""
340    parser = argparse.ArgumentParser()
341    parser.add_argument('-v', '--verbose', action='store_true',
342                        help='Print SQL commands and results')
343    parser.add_argument('--step', type=int, action='store',
344                        default=1000,
345                        help='Number of rows to delete at once')
346    parser.add_argument('-c', '--check_server', action='store_true',
347                        help='Check if the server should run db clean up.')
348    parser.add_argument('date', help='Keep results newer than')
349    return parser.parse_args()
350
351
352def should_cleanup():
353    """Check if the server should run db_cleanup.
354
355    Only shard should clean up db.
356
357    @returns: True if it should run db cleanup otherwise False.
358    """
359    return utils.is_shard()
360
361
362def main():
363    """main"""
364    args = parse_args()
365
366    level = logging.DEBUG if args.verbose else logging.INFO
367    logging.basicConfig(level=level, format=LOGGING_FORMAT)
368    logging.info('Calling: %s', sys.argv)
369
370    if not re.match(DATE_FORMAT_REGEX, args.date):
371        print 'DATE must be in yyyy-mm-dd format!'
372        return
373    if args.check_server and not should_cleanup():
374        print 'Only shard can run db cleanup.'
375        return
376
377    global STEP_SIZE
378    STEP_SIZE = args.step
379    _delete_all_data_before_date(args.date)
380
381
382if __name__ == '__main__':
383    main()
384