• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import argparse
7import datetime
8import os
9import re
10import sys
11import logging
12
13os.environ['DJANGO_SETTINGS_MODULE'] = 'frontend.settings'
14
15import common
16from django.db import connections, transaction
17
18
19# Format Appears as: [Date] [Time] - [Msg Level] - [Message]
20LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
21# This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01)
22DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]'
23                     '|3[01])$')
24SELECT_CMD_FORMAT = """
25SELECT %(table)s.%(primary_key)s FROM %(table)s
26WHERE %(table)s.%(time_column)s <= "%(date)s"
27"""
28SELECT_JOIN_CMD_FORMAT = """
29SELECT %(table)s.%(primary_key)s FROM %(table)s
30INNER JOIN %(related_table)s
31  ON %(table)s.%(foreign_key)s=%(related_table)s.%(related_primary_key)s
32WHERE %(related_table)s.%(time_column)s <= "%(date)s"
33"""
34SELECT_WITH_INDIRECTION_FORMAT = """
35SELECT %(table)s.%(primary_key)s FROM %(table)s
36INNER JOIN %(indirection_table)s
37  ON %(table)s.%(foreign_key)s =
38     %(indirection_table)s.%(indirection_primary_key)s
39INNER JOIN %(related_table)s
40  ON %(indirection_table)s.%(indirection_foreign_key)s =
41  %(related_table)s.%(related_primary_key)s
42WHERE %(related_table)s.%(time_column)s <= "%(date)s"
43"""
44DELETE_ROWS_FORMAT = """
45DELETE FROM %(table)s
46WHERE %(table)s.%(primary_key)s IN (%(rows)s)
47"""
48
49
50AFE_JOB_ID = 'afe_job_id'
51JOB_ID = 'job_id'
52JOB_IDX = 'job_idx'
53TEST_IDX = 'test_idx'
54
55# CAUTION: Make sure only the 'default' connection is used. Otherwise
56# db_cleanup may delete stuff from the global database, which is generally not
57# intended.
58cursor = connections['default'].cursor()
59
60STEP_SIZE = None  # Threading this through properly is disgusting.
61
62class ProgressBar(object):
63    TEXT = "{:<40s} [{:<20s}] ({:>9d}/{:>9d})"
64
65    def __init__(self, name, amount):
66        self._name = name
67        self._amount = amount
68        self._cur = 0
69
70    def __enter__(self):
71        return self
72
73    def __exit__(self, a, b, c):
74        sys.stdout.write('\n')
75        sys.stdout.flush()
76
77    def update(self, x):
78        """
79        Advance the counter by `x`.
80
81        @param x: An integer of how many more elements were processed.
82        """
83        self._cur += x
84
85    def show(self):
86        """
87        Display the progress bar on the current line.  Repeated invocations
88        "update" the display.
89        """
90        if self._amount == 0:
91            barlen = 20
92        else:
93            barlen = int(20 * self._cur / float(self._amount))
94        if barlen:
95            bartext = '=' * (barlen-1) + '>'
96        else:
97            bartext = ''
98        text = self.TEXT.format(self._name, bartext, self._cur, self._amount)
99        sys.stdout.write('\r')
100        sys.stdout.write(text)
101        sys.stdout.flush()
102
103
104def grouper(iterable, n):
105    """
106    Group the elements of `iterable` into groups of maximum size `n`.
107
108    @param iterable: An iterable.
109    @param n: Max size of returned groups.
110    @returns: Yields iterables of size <= n.
111
112    >>> grouper('ABCDEFG', 3)
113    [['A', 'B', C'], ['D', 'E', 'F'], ['G']]
114    """
115    args = [iter(iterable)] * n
116    while True:
117        lst = []
118        try:
119            for itr in args:
120                lst.append(next(itr))
121            yield lst
122        except StopIteration:
123            if lst:
124                yield lst
125            break
126
127
128def _delete_table_data_before_date(table_to_delete_from, primary_key,
129                                   related_table, related_primary_key,
130                                   date, foreign_key=None,
131                                   time_column="started_time",
132                                   indirection_table=None,
133                                   indirection_primary_key=None,
134                                   indirection_foreign_key=None):
135    """
136    We want a delete statement that will only delete from one table while
137    using a related table to find the rows to delete.
138
139    An example mysql command:
140    DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN
141    tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND
142    tko_tests.started_time <= '2012-02-01';
143
144    There are also tables that require 2 joins to determine which rows we want
145    to delete and we determine these rows by joining the table we want to
146    delete from with an indirection table to the actual jobs table.
147
148    @param table_to_delete_from: Table whose rows we want to delete.
149    @param related_table: Table with the date information we are selecting by.
150    @param foreign_key: Foreign key used in table_to_delete_from to reference
151                        the related table. If None, the primary_key is used.
152    @param primary_key: Primary key in the related table.
153    @param date: End date of the information we are trying to delete.
154    @param time_column: Column that we want to use to compare the date to.
155    @param indirection_table: Table we use to link the data we are trying to
156                              delete with the table with the date information.
157    @param indirection_primary_key: Key we use to connect the indirection table
158                                    to the table we are trying to delete rows
159                                    from.
160    @param indirection_foreign_key: Key we use to connect the indirection table
161                                    to the table with the date information.
162    """
163    if not foreign_key:
164        foreign_key = primary_key
165
166    if not related_table:
167        # Deleting from a table directly.
168        variables = dict(table=table_to_delete_from, primary_key=primary_key,
169                         time_column=time_column, date=date)
170        sql = SELECT_CMD_FORMAT % variables
171    elif not indirection_table:
172        # Deleting using a single JOIN to get the date information.
173        variables = dict(primary_key=primary_key, table=table_to_delete_from,
174                         foreign_key=foreign_key, related_table=related_table,
175                         related_primary_key=related_primary_key,
176                         time_column=time_column, date=date)
177        sql = SELECT_JOIN_CMD_FORMAT % variables
178    else:
179        # There are cases where we need to JOIN 3 TABLES to determine the rows
180        # we want to delete.
181        variables = dict(primary_key=primary_key, table=table_to_delete_from,
182                         indirection_table=indirection_table,
183                         foreign_key=foreign_key,
184                         indirection_primary_key=indirection_primary_key,
185                         related_table=related_table,
186                         related_primary_key=related_primary_key,
187                         indirection_foreign_key=indirection_foreign_key,
188                         time_column=time_column, date=date)
189        sql = SELECT_WITH_INDIRECTION_FORMAT % variables
190
191    logging.debug('SQL: %s', sql)
192    cursor.execute(sql, [])
193    rows = [x[0] for x in cursor.fetchall()]
194    logging.debug(rows)
195
196    if not rows or rows == [None]:
197        with ProgressBar(table_to_delete_from, 0) as pb:
198            pb.show()
199        logging.debug('Noting to delete for %s', table_to_delete_from)
200        return
201
202    with ProgressBar(table_to_delete_from, len(rows)) as pb:
203        for row_keys in grouper(rows, STEP_SIZE):
204            variables['rows'] = ','.join([str(x) for x in row_keys])
205            sql = DELETE_ROWS_FORMAT % variables
206            logging.debug('SQL: %s', sql)
207            cursor.execute(sql, [])
208            transaction.commit_unless_managed(using='default')
209            pb.update(len(row_keys))
210            pb.show()
211
212
213def _subtract_days(date, days_to_subtract):
214    """
215    Return a date (string) that is 'days' before 'date'
216
217    @param date: date (string) we are subtracting from.
218    @param days_to_subtract: days (int) we are subtracting.
219    """
220    date_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
221    difference = date_obj - datetime.timedelta(days=days_to_subtract)
222    return difference.strftime('%Y-%m-%d')
223
224
225def _delete_all_data_before_date(date):
226    """
227    Delete all the database data before a given date.
228
229    This function focuses predominately on the data for jobs in tko_jobs.
230    However not all jobs in afe_jobs are also in tko_jobs.
231
232    Therefore we delete all the afe_job and foreign key relations prior to two
233    days before date. Then we do the queries using tko_jobs and these
234    tables to ensure all the related information is gone. Even though we are
235    repeating deletes on these tables, the second delete will be quick and
236    completely thorough in ensuring we clean up all the foreign key
237    dependencies correctly.
238
239    @param date: End date of the information we are trying to delete.
240    @param step: Rows to delete per SQL query.
241    """
242    # First cleanup all afe_job related data (prior to 2 days before date).
243    # The reason for this is not all afe_jobs may be in tko_jobs.
244    afe_date = _subtract_days(date, 2)
245    logging.info('Cleaning up all afe_job data prior to %s.', afe_date)
246    _delete_table_data_before_date('afe_aborted_host_queue_entries',
247                                   'queue_entry_id',
248                                   'afe_jobs', 'id', afe_date,
249                                   time_column= 'created_on',
250                                   foreign_key='queue_entry_id',
251                                   indirection_table='afe_host_queue_entries',
252                                   indirection_primary_key='id',
253                                   indirection_foreign_key='job_id')
254    _delete_table_data_before_date('afe_special_tasks', 'id',
255                                   'afe_jobs', 'id',
256                                   afe_date, time_column='created_on',
257                                   foreign_key='queue_entry_id',
258                                   indirection_table='afe_host_queue_entries',
259                                   indirection_primary_key='id',
260                                   indirection_foreign_key='job_id')
261    _delete_table_data_before_date('afe_host_queue_entries', 'id',
262                                   'afe_jobs', 'id',
263                                   afe_date, time_column='created_on',
264                                   foreign_key=JOB_ID)
265    _delete_table_data_before_date('afe_job_keyvals', 'id',
266                                   'afe_jobs', 'id',
267                                   afe_date, time_column='created_on',
268                                   foreign_key=JOB_ID)
269    _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
270                                   'afe_jobs', 'id',
271                                   afe_date, time_column='created_on',
272                                   foreign_key=JOB_ID)
273    _delete_table_data_before_date('afe_jobs', 'id',
274                                   None, None,
275                                   afe_date, time_column='created_on')
276    # Special tasks that aren't associated with an HQE
277    # Since we don't do the queue_entry_id=NULL check, we might wipe out a bit
278    # more than we should, but I doubt anyone will notice or care.
279    _delete_table_data_before_date('afe_special_tasks', 'id',
280                                   None, None,
281                                   afe_date, time_column='time_requested')
282
283    # Now go through and clean up all the rows related to tko_jobs prior to
284    # date.
285    logging.info('Cleaning up all data related to tko_jobs prior to %s.',
286                  date)
287    _delete_table_data_before_date('tko_test_attributes', 'id',
288                                   'tko_tests', TEST_IDX,
289                                   date, foreign_key=TEST_IDX)
290    _delete_table_data_before_date('tko_test_labels_tests', 'id',
291                                   'tko_tests', TEST_IDX,
292                                   date, foreign_key= 'test_id')
293    _delete_table_data_before_date('tko_iteration_result', TEST_IDX,
294                                   'tko_tests', TEST_IDX,
295                                   date)
296    _delete_table_data_before_date('tko_iteration_perf_value', TEST_IDX,
297                                   'tko_tests', TEST_IDX,
298                                   date)
299    _delete_table_data_before_date('tko_iteration_attributes', TEST_IDX,
300                                   'tko_tests', TEST_IDX,
301                                   date)
302    _delete_table_data_before_date('tko_job_keyvals', 'id',
303                                   'tko_jobs', JOB_IDX,
304                                   date, foreign_key='job_id')
305    _delete_table_data_before_date('afe_aborted_host_queue_entries',
306                                   'queue_entry_id',
307                                   'tko_jobs', AFE_JOB_ID, date,
308                                   foreign_key='queue_entry_id',
309                                   indirection_table='afe_host_queue_entries',
310                                   indirection_primary_key='id',
311                                   indirection_foreign_key='job_id')
312    _delete_table_data_before_date('afe_special_tasks', 'id',
313                                   'tko_jobs', AFE_JOB_ID,
314                                   date, foreign_key='queue_entry_id',
315                                   indirection_table='afe_host_queue_entries',
316                                   indirection_primary_key='id',
317                                   indirection_foreign_key='job_id')
318    _delete_table_data_before_date('afe_host_queue_entries', 'id',
319                                   'tko_jobs', AFE_JOB_ID,
320                                   date, foreign_key='job_id')
321    _delete_table_data_before_date('afe_job_keyvals', 'id',
322                                   'tko_jobs', AFE_JOB_ID,
323                                   date, foreign_key='job_id')
324    _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
325                                   'tko_jobs', AFE_JOB_ID,
326                                   date, foreign_key='job_id')
327    _delete_table_data_before_date('afe_jobs', 'id',
328                                   'tko_jobs', AFE_JOB_ID,
329                                   date, foreign_key='id')
330    _delete_table_data_before_date('tko_tests', TEST_IDX,
331                                   'tko_jobs', JOB_IDX,
332                                   date, foreign_key=JOB_IDX)
333    _delete_table_data_before_date('tko_jobs', JOB_IDX,
334                                   None, None, date)
335
336
337def parse_args():
338    """Parse command line arguments"""
339    parser = argparse.ArgumentParser()
340    parser.add_argument('-v', '--verbose', action='store_true',
341                        help='Print SQL commands and results')
342    parser.add_argument('--step', type=int, action='store',
343                        default=1000,
344                        help='Number of rows to delete at once')
345    parser.add_argument('date', help='Keep results newer than')
346    return parser.parse_args()
347
348
349def main():
350    """main"""
351    args = parse_args()
352
353    level = logging.DEBUG if args.verbose else logging.INFO
354    logging.basicConfig(level=level, format=LOGGING_FORMAT)
355    logging.info('Calling: %s', sys.argv)
356
357    if not re.match(DATE_FORMAT_REGEX, args.date):
358        print 'DATE must be in yyyy-mm-dd format!'
359        return
360
361    global STEP_SIZE
362    STEP_SIZE = args.step
363    _delete_all_data_before_date(args.date)
364
365
366if __name__ == '__main__':
367    main()
368