• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python2
2# Copyright 2017 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""
7This is a utility to build a summary of the given directory. and save to a json
8file.
9
10usage: utils.py [-h] [-p PATH] [-m MAX_SIZE_KB]
11
12optional arguments:
13  -p PATH         Path to build directory summary.
14  -m MAX_SIZE_KB  Maximum result size in KB. Set to 0 to disable result
15                  throttling.
16
17The content of the json file looks like:
18{'default': {'/D': [{'control': {'/S': 734}},
19                    {'debug': {'/D': [{'client.0.DEBUG': {'/S': 5698}},
20                                       {'client.0.ERROR': {'/S': 254}},
21                                       {'client.0.INFO': {'/S': 1020}},
22                                       {'client.0.WARNING': {'/S': 242}}],
23                               '/S': 7214}}
24                      ],
25              '/S': 7948
26            }
27}
28"""
29
30from __future__ import division
31from __future__ import print_function
32
33import argparse
34import copy
35import fnmatch
36import glob
37import json
38import logging
39import os
40import random
41from six.moves import range
42import sys
43import time
44import traceback
45
46try:
47    from autotest_lib.client.bin.result_tools import dedupe_file_throttler
48    from autotest_lib.client.bin.result_tools import delete_file_throttler
49    from autotest_lib.client.bin.result_tools import result_info
50    from autotest_lib.client.bin.result_tools import shrink_file_throttler
51    from autotest_lib.client.bin.result_tools import throttler_lib
52    from autotest_lib.client.bin.result_tools import utils_lib
53    from autotest_lib.client.bin.result_tools import zip_file_throttler
54except ImportError:
55    import dedupe_file_throttler
56    import delete_file_throttler
57    import result_info
58    import shrink_file_throttler
59    import throttler_lib
60    import utils_lib
61    import zip_file_throttler
62
63
64# Do NOT import autotest_lib modules here. This module can be executed without
65# dependency on other autotest modules. This is to keep the logic of result
66# trimming on the server side, instead of depending on the autotest client
67# module.
68
69DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json'
70SUMMARY_FILE_PATTERN = 'dir_summary_*.json'
71MERGED_SUMMARY_FILENAME = 'dir_summary_final.json'
72
73# Minimum disk space should be available after saving the summary file.
74MIN_FREE_DISK_BYTES = 10 * 1024 * 1024
75
76# Autotest uses some state files to track process running state. The files are
77# deleted from test results. Therefore, these files can be ignored.
78FILES_TO_IGNORE = set([
79    'control.autoserv.state'
80])
81
82# Smallest file size to shrink to.
83MIN_FILE_SIZE_LIMIT_BYTE = 10 * 1024
84
85def get_unique_dir_summary_file(path):
86    """Get a unique file path to save the directory summary json string.
87
88    @param path: The directory path to save the summary file to.
89    """
90    summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time()
91    # Make sure the summary file name is unique.
92    file_name = os.path.join(path, summary_file)
93    if os.path.exists(file_name):
94        count = 1
95        name, ext = os.path.splitext(summary_file)
96        while os.path.exists(file_name):
97            file_name = os.path.join(path, '%s_%s%s' % (name, count, ext))
98            count += 1
99    return file_name
100
101
102def _preprocess_result_dir_path(path):
103    """Verify the result directory path is valid and make sure it ends with `/`.
104
105    @param path: A path to the result directory.
106    @return: A verified and processed path to the result directory.
107    @raise IOError: If the path doesn't exist.
108    @raise ValueError: If the path is not a directory.
109    """
110    if not os.path.exists(path):
111        raise IOError('Path %s does not exist.' % path)
112
113    if not os.path.isdir(path):
114        raise ValueError('The given path %s is a file. It must be a '
115                         'directory.' % path)
116
117    # Make sure the path ends with `/` so the root key of summary json is always
118    # utils_lib.ROOT_DIR ('')
119    if not path.endswith(os.sep):
120        path = path + os.sep
121
122    return path
123
124
125def _delete_missing_entries(summary_old, summary_new):
126    """Delete files/directories only exists in old summary.
127
128    When the new summary is final, i.e., it's built from the final result
129    directory, files or directories missing are considered to be deleted and
130    trimmed to size 0.
131
132    @param summary_old: Old directory summary.
133    @param summary_new: New directory summary.
134    """
135    new_files = summary_new.get_file_names()
136    old_files = summary_old.get_file_names()
137    for name in old_files:
138        old_file = summary_old.get_file(name)
139        if name not in new_files:
140            if old_file.is_dir:
141                # Trim sub-directories.
142                with old_file.disable_updating_parent_size_info():
143                    _delete_missing_entries(old_file, result_info.EMPTY)
144                old_file.update_sizes()
145            elif name in FILES_TO_IGNORE:
146                # Remove the file from the summary as it can be ignored.
147                summary_old.remove_file(name)
148            else:
149                with old_file.disable_updating_parent_size_info():
150                    # Before setting the trimmed size to 0, update the collected
151                    # size if it's not set yet.
152                    if not old_file.is_collected_size_recorded:
153                        old_file.collected_size = old_file.trimmed_size
154                    old_file.trimmed_size = 0
155        elif old_file.is_dir:
156            # If `name` is a directory in the old summary, but a file in the new
157            # summary, delete the entry in the old summary.
158            new_file = summary_new.get_file(name)
159            if not new_file.is_dir:
160                new_file = result_info.EMPTY
161            _delete_missing_entries(old_file, new_file)
162
163
164def _relocate_summary(result_dir, summary_file, summary):
165    """Update the given summary with the path relative to the result_dir.
166
167    @param result_dir: Path to the result directory.
168    @param summary_file: Path to the summary file.
169    @param summary: A directory summary inside the given result_dir or its
170            sub-directory.
171    @return: An updated summary with the path relative to the result_dir.
172    """
173    sub_path = os.path.dirname(summary_file).replace(
174            result_dir.rstrip(os.sep), '')
175    if sub_path == '':
176        return summary
177
178    folders = sub_path.split(os.sep)
179
180    # The first folder is always '' because of the leading `/` in sub_path.
181    parent = result_info.ResultInfo(
182            result_dir, utils_lib.ROOT_DIR, parent_result_info=None)
183    root = parent
184
185    # That makes sure root has only one folder of utils_lib.ROOT_DIR.
186    for i in range(1, len(folders)):
187        child = result_info.ResultInfo(
188                parent.path, folders[i], parent_result_info=parent)
189        if i == len(folders) - 1:
190            # Add files in summary to child.
191            for info in summary.files:
192                child.files.append(info)
193
194        parent.files.append(child)
195        parent = child
196
197    parent.update_sizes()
198    return root
199
200
201def merge_summaries(path):
202    """Merge all directory summaries in the given path.
203
204    This function calculates the total size of result files being collected for
205    the test device and the files generated on the drone. It also returns merged
206    directory summary.
207
208    @param path: A path to search for directory summaries.
209    @return a tuple of (client_collected_bytes, merged_summary, files):
210            client_collected_bytes: The total size of results collected from
211                the DUT. The number can be larger than the total file size of
212                the given path, as files can be overwritten or removed.
213            merged_summary: The merged directory summary of the given path.
214            files: All summary files in the given path, including
215                sub-directories.
216    """
217    path = _preprocess_result_dir_path(path)
218    # Find all directory summary files and sort them by the time stamp in file
219    # name.
220    summary_files = []
221    for root, _, filenames in os.walk(path):
222        for filename in fnmatch.filter(filenames, 'dir_summary_*.json'):
223            summary_files.append(os.path.join(root, filename))
224    summary_files = sorted(summary_files, key=os.path.getmtime)
225
226    all_summaries = []
227    for summary_file in summary_files:
228        try:
229            summary = result_info.load_summary_json_file(summary_file)
230            summary = _relocate_summary(path, summary_file, summary)
231            all_summaries.append(summary)
232        except (IOError, ValueError) as e:
233            utils_lib.LOG('Failed to load summary file %s Error: %s' %
234                          (summary_file, e))
235
236    # Merge all summaries.
237    merged_summary = all_summaries[0] if len(all_summaries) > 0 else None
238    for summary in all_summaries[1:]:
239        merged_summary.merge(summary)
240    # After all summaries from the test device (client side) are merged, we can
241    # get the total size of result files being transfered from the test device.
242    # If there is no directory summary collected, default client_collected_bytes
243    # to 0.
244    client_collected_bytes = 0
245    if merged_summary:
246        client_collected_bytes = merged_summary.collected_size
247
248    # Get the summary of current directory
249    last_summary = result_info.ResultInfo.build_from_path(path)
250
251    if merged_summary:
252        merged_summary.merge(last_summary, is_final=True)
253        _delete_missing_entries(merged_summary, last_summary)
254    else:
255        merged_summary = last_summary
256
257    return client_collected_bytes, merged_summary, summary_files
258
259
260def _throttle_results(summary, max_result_size_KB):
261    """Throttle the test results by limiting to the given maximum size.
262
263    @param summary: A ResultInfo object containing result summary.
264    @param max_result_size_KB: Maximum test result size in KB.
265    """
266    if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
267        utils_lib.LOG(
268                'Result size is %s, which is less than %d KB. No need to '
269                'throttle.' %
270                (utils_lib.get_size_string(summary.trimmed_size),
271                 max_result_size_KB))
272        return
273
274    args = {'summary': summary,
275            'max_result_size_KB': max_result_size_KB}
276    args_skip_autotest_log = copy.copy(args)
277    args_skip_autotest_log['skip_autotest_log'] = True
278    # Apply the throttlers in following order.
279    throttlers = [
280            (shrink_file_throttler, copy.copy(args_skip_autotest_log)),
281            (zip_file_throttler, copy.copy(args_skip_autotest_log)),
282            (shrink_file_throttler, copy.copy(args)),
283            (dedupe_file_throttler, copy.copy(args)),
284            (zip_file_throttler, copy.copy(args)),
285            ]
286
287    # Add another zip_file_throttler to compress the files being shrunk.
288    # The threshold is set to half of the DEFAULT_FILE_SIZE_LIMIT_BYTE of
289    # shrink_file_throttler.
290    new_args = copy.copy(args)
291    new_args['file_size_threshold_byte'] = 50 * 1024
292    throttlers.append((zip_file_throttler, new_args))
293
294    # If the above throttlers still can't reduce the result size to be under
295    # max_result_size_KB, try to delete files with various threshold, starting
296    # at 5MB then lowering to 100KB.
297    delete_file_thresholds = [5*1024*1024, 1*1024*1024, 100*1024]
298    # Try to keep tgz files first.
299    exclude_file_patterns = ['.*\.tgz']
300    for threshold in delete_file_thresholds:
301        new_args = copy.copy(args)
302        new_args.update({'file_size_threshold_byte': threshold,
303                         'exclude_file_patterns': exclude_file_patterns})
304        throttlers.append((delete_file_throttler, new_args))
305    # Add one more delete_file_throttler to not skipping tgz files.
306    new_args = copy.copy(args)
307    new_args.update({'file_size_threshold_byte': delete_file_thresholds[-1]})
308    throttlers.append((delete_file_throttler, new_args))
309
310    # Run the throttlers in order until result size is under max_result_size_KB.
311    old_size = summary.trimmed_size
312    for throttler, args in throttlers:
313        try:
314            args_without_summary = copy.copy(args)
315            del args_without_summary['summary']
316            utils_lib.LOG('Applying throttler %s, args: %s' %
317                          (throttler.__name__, args_without_summary))
318            throttler.throttle(**args)
319            if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
320                return
321        except:
322            utils_lib.LOG('Failed to apply throttler %s. Exception: %s' %
323                          (throttler, traceback.format_exc()))
324        finally:
325            new_size = summary.trimmed_size
326            if new_size == old_size:
327                utils_lib.LOG('Result size was not changed: %s.' % old_size)
328            else:
329                utils_lib.LOG('Result size was reduced from %s to %s.' %
330                              (utils_lib.get_size_string(old_size),
331                               utils_lib.get_size_string(new_size)))
332
333
334def _setup_logging():
335    """Set up logging to direct logs to stdout."""
336    # Direct logging to stdout
337    logger = logging.getLogger()
338    logger.setLevel(logging.DEBUG)
339    handler = logging.StreamHandler(sys.stdout)
340    handler.setLevel(logging.DEBUG)
341    formatter = logging.Formatter('%(asctime)s %(message)s')
342    handler.setFormatter(formatter)
343    logger.handlers = []
344    logger.addHandler(handler)
345
346
347def _parse_options():
348    """Options for the main script.
349
350    @return: An option object container arg values.
351    """
352    parser = argparse.ArgumentParser()
353    parser.add_argument('-p', type=str, dest='path',
354                        help='Path to build directory summary.')
355    parser.add_argument('-m', type=int, dest='max_size_KB', default=0,
356                        help='Maximum result size in KB. Set to 0 to disable '
357                        'result throttling.')
358    parser.add_argument('-d', action='store_true', dest='delete_summaries',
359                        default=False,
360                        help='-d to delete all result summary files in the '
361                        'given path.')
362    return parser.parse_args()
363
364
365def execute(path, max_size_KB):
366    """Execute the script with given arguments.
367
368    @param path: Path to build directory summary.
369    @param max_size_KB: Maximum result size in KB.
370    """
371    utils_lib.LOG('Running result_tools/utils on path: %s' % path)
372    if max_size_KB > 0:
373        utils_lib.LOG('Throttle result size to : %s' %
374                      utils_lib.get_size_string(max_size_KB * 1024))
375
376    result_dir = path
377    if not os.path.isdir(result_dir):
378        result_dir = os.path.dirname(result_dir)
379    summary = result_info.ResultInfo.build_from_path(path)
380    summary_json = json.dumps(summary)
381    summary_file = get_unique_dir_summary_file(result_dir)
382
383    # Make sure there is enough free disk to write the file
384    stat = os.statvfs(path)
385    free_space = stat.f_frsize * stat.f_bavail
386    if free_space - len(summary_json) < MIN_FREE_DISK_BYTES:
387        raise utils_lib.NotEnoughDiskError(
388                'Not enough disk space after saving the summary file. '
389                'Available free disk: %s bytes. Summary file size: %s bytes.' %
390                (free_space, len(summary_json)))
391
392    with open(summary_file, 'w') as f:
393        f.write(summary_json)
394    utils_lib.LOG('Directory summary of %s is saved to file %s.' %
395                  (path, summary_file))
396
397    if max_size_KB > 0 and summary.trimmed_size > 0:
398        old_size = summary.trimmed_size
399        throttle_probability = float(max_size_KB * 1024) / old_size
400        if random.random() < throttle_probability:
401            utils_lib.LOG(
402                    'Skip throttling %s: size=%s, throttle_probability=%s' %
403                    (path, old_size, throttle_probability))
404        else:
405            _throttle_results(summary, max_size_KB)
406            if summary.trimmed_size < old_size:
407                # Files are throttled, save the updated summary file.
408                utils_lib.LOG('Overwrite the summary file: %s' % summary_file)
409                result_info.save_summary(summary, summary_file)
410
411
412def _delete_summaries(path):
413    """Delete all directory summary files in the given directory.
414
415    This is to cleanup the directory so no summary files are left behind to
416    affect later tests.
417
418    @param path: Path to cleanup directory summary.
419    """
420    # Only summary files directly under the `path` needs to be cleaned.
421    summary_files = glob.glob(os.path.join(path, SUMMARY_FILE_PATTERN))
422    for summary in summary_files:
423        try:
424            os.remove(summary)
425        except IOError as e:
426            utils_lib.LOG('Failed to delete summary: %s. Error: %s' %
427                          (summary, e))
428
429
430def main():
431    """main script. """
432    _setup_logging()
433    options = _parse_options()
434    if options.delete_summaries:
435        _delete_summaries(options.path)
436    else:
437        execute(options.path, options.max_size_KB)
438
439
440if __name__ == '__main__':
441    main()
442