• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python3
2# Copyright 2017 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""
7This is a utility to build a summary of the given directory. and save to a json
8file.
9
10usage: utils.py [-h] [-p PATH] [-m MAX_SIZE_KB]
11
12optional arguments:
13  -p PATH         Path to build directory summary.
14  -m MAX_SIZE_KB  Maximum result size in KB. Set to 0 to disable result
15                  throttling.
16
17The content of the json file looks like:
18{'default': {'/D': [{'control': {'/S': 734}},
19                    {'debug': {'/D': [{'client.0.DEBUG': {'/S': 5698}},
20                                       {'client.0.ERROR': {'/S': 254}},
21                                       {'client.0.INFO': {'/S': 1020}},
22                                       {'client.0.WARNING': {'/S': 242}}],
23                               '/S': 7214}}
24                      ],
25              '/S': 7948
26            }
27}
28"""
29
30from __future__ import division
31from __future__ import print_function
32
33import argparse
34import copy
35import fnmatch
36import glob
37import json
38import logging
39import os
40import random
41from six.moves import range
42import sys
43import time
44import traceback
45
46try:
47    from autotest_lib.client.bin.result_tools import dedupe_file_throttler
48    from autotest_lib.client.bin.result_tools import delete_file_throttler
49    from autotest_lib.client.bin.result_tools import result_info
50    from autotest_lib.client.bin.result_tools import throttler_lib
51    from autotest_lib.client.bin.result_tools import utils_lib
52    from autotest_lib.client.bin.result_tools import zip_file_throttler
53except ImportError:
54    import dedupe_file_throttler
55    import delete_file_throttler
56    import result_info
57    import throttler_lib
58    import utils_lib
59    import zip_file_throttler
60
61
62# Do NOT import autotest_lib modules here. This module can be executed without
63# dependency on other autotest modules. This is to keep the logic of result
64# trimming on the server side, instead of depending on the autotest client
65# module.
66
67DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json'
68SUMMARY_FILE_PATTERN = 'dir_summary_*.json'
69MERGED_SUMMARY_FILENAME = 'dir_summary_final.json'
70
71# Minimum disk space should be available after saving the summary file.
72MIN_FREE_DISK_BYTES = 10 * 1024 * 1024
73
74# Autotest uses some state files to track process running state. The files are
75# deleted from test results. Therefore, these files can be ignored.
76FILES_TO_IGNORE = set([
77    'control.autoserv.state'
78])
79
80# Smallest file size to shrink to.
81MIN_FILE_SIZE_LIMIT_BYTE = 10 * 1024
82
83def get_unique_dir_summary_file(path):
84    """Get a unique file path to save the directory summary json string.
85
86    @param path: The directory path to save the summary file to.
87    """
88    summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time()
89    # Make sure the summary file name is unique.
90    file_name = os.path.join(path, summary_file)
91    if os.path.exists(file_name):
92        count = 1
93        name, ext = os.path.splitext(summary_file)
94        while os.path.exists(file_name):
95            file_name = os.path.join(path, '%s_%s%s' % (name, count, ext))
96            count += 1
97    return file_name
98
99
100def _preprocess_result_dir_path(path):
101    """Verify the result directory path is valid and make sure it ends with `/`.
102
103    @param path: A path to the result directory.
104    @return: A verified and processed path to the result directory.
105    @raise IOError: If the path doesn't exist.
106    @raise ValueError: If the path is not a directory.
107    """
108    if not os.path.exists(path):
109        raise IOError('Path %s does not exist.' % path)
110
111    if not os.path.isdir(path):
112        raise ValueError('The given path %s is a file. It must be a '
113                         'directory.' % path)
114
115    # Make sure the path ends with `/` so the root key of summary json is always
116    # utils_lib.ROOT_DIR ('')
117    if not path.endswith(os.sep):
118        path = path + os.sep
119
120    return path
121
122
123def _delete_missing_entries(summary_old, summary_new):
124    """Delete files/directories only exists in old summary.
125
126    When the new summary is final, i.e., it's built from the final result
127    directory, files or directories missing are considered to be deleted and
128    trimmed to size 0.
129
130    @param summary_old: Old directory summary.
131    @param summary_new: New directory summary.
132    """
133    new_files = summary_new.get_file_names()
134    old_files = summary_old.get_file_names()
135    for name in old_files:
136        old_file = summary_old.get_file(name)
137        if name not in new_files:
138            if old_file.is_dir:
139                # Trim sub-directories.
140                with old_file.disable_updating_parent_size_info():
141                    _delete_missing_entries(old_file, result_info.EMPTY)
142                old_file.update_sizes()
143            elif name in FILES_TO_IGNORE:
144                # Remove the file from the summary as it can be ignored.
145                summary_old.remove_file(name)
146            else:
147                with old_file.disable_updating_parent_size_info():
148                    # Before setting the trimmed size to 0, update the collected
149                    # size if it's not set yet.
150                    if not old_file.is_collected_size_recorded:
151                        old_file.collected_size = old_file.trimmed_size
152                    old_file.trimmed_size = 0
153        elif old_file.is_dir:
154            # If `name` is a directory in the old summary, but a file in the new
155            # summary, delete the entry in the old summary.
156            new_file = summary_new.get_file(name)
157            if not new_file.is_dir:
158                new_file = result_info.EMPTY
159            _delete_missing_entries(old_file, new_file)
160
161
162def _relocate_summary(result_dir, summary_file, summary):
163    """Update the given summary with the path relative to the result_dir.
164
165    @param result_dir: Path to the result directory.
166    @param summary_file: Path to the summary file.
167    @param summary: A directory summary inside the given result_dir or its
168            sub-directory.
169    @return: An updated summary with the path relative to the result_dir.
170    """
171    sub_path = os.path.dirname(summary_file).replace(
172            result_dir.rstrip(os.sep), '')
173    if sub_path == '':
174        return summary
175
176    folders = sub_path.split(os.sep)
177
178    # The first folder is always '' because of the leading `/` in sub_path.
179    parent = result_info.ResultInfo(
180            result_dir, utils_lib.ROOT_DIR, parent_result_info=None)
181    root = parent
182
183    # That makes sure root has only one folder of utils_lib.ROOT_DIR.
184    for i in range(1, len(folders)):
185        child = result_info.ResultInfo(
186                parent.path, folders[i], parent_result_info=parent)
187        if i == len(folders) - 1:
188            # Add files in summary to child.
189            for info in summary.files:
190                child.files.append(info)
191
192        parent.files.append(child)
193        parent = child
194
195    parent.update_sizes()
196    return root
197
198
199def merge_summaries(path):
200    """Merge all directory summaries in the given path.
201
202    This function calculates the total size of result files being collected for
203    the test device and the files generated on the drone. It also returns merged
204    directory summary.
205
206    @param path: A path to search for directory summaries.
207    @return a tuple of (client_collected_bytes, merged_summary, files):
208            client_collected_bytes: The total size of results collected from
209                the DUT. The number can be larger than the total file size of
210                the given path, as files can be overwritten or removed.
211            merged_summary: The merged directory summary of the given path.
212            files: All summary files in the given path, including
213                sub-directories.
214    """
215    path = _preprocess_result_dir_path(path)
216    # Find all directory summary files and sort them by the time stamp in file
217    # name.
218    summary_files = []
219    for root, _, filenames in os.walk(path):
220        for filename in fnmatch.filter(filenames, 'dir_summary_*.json'):
221            summary_files.append(os.path.join(root, filename))
222    summary_files = sorted(summary_files, key=os.path.getmtime)
223
224    all_summaries = []
225    for summary_file in summary_files:
226        try:
227            summary = result_info.load_summary_json_file(summary_file)
228            summary = _relocate_summary(path, summary_file, summary)
229            all_summaries.append(summary)
230        except (IOError, ValueError) as e:
231            utils_lib.LOG('Failed to load summary file %s Error: %s' %
232                          (summary_file, e))
233
234    # Merge all summaries.
235    merged_summary = all_summaries[0] if len(all_summaries) > 0 else None
236    for summary in all_summaries[1:]:
237        merged_summary.merge(summary)
238    # After all summaries from the test device (client side) are merged, we can
239    # get the total size of result files being transfered from the test device.
240    # If there is no directory summary collected, default client_collected_bytes
241    # to 0.
242    client_collected_bytes = 0
243    if merged_summary:
244        client_collected_bytes = merged_summary.collected_size
245
246    # Get the summary of current directory
247    last_summary = result_info.ResultInfo.build_from_path(path)
248
249    if merged_summary:
250        merged_summary.merge(last_summary, is_final=True)
251        _delete_missing_entries(merged_summary, last_summary)
252    else:
253        merged_summary = last_summary
254
255    return client_collected_bytes, merged_summary, summary_files
256
257
258def _throttle_results(summary, max_result_size_KB):
259    """Throttle the test results by limiting to the given maximum size.
260
261    @param summary: A ResultInfo object containing result summary.
262    @param max_result_size_KB: Maximum test result size in KB.
263    """
264    if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
265        utils_lib.LOG(
266                'Result size is %s, which is less than %d KB. No need to '
267                'throttle.' %
268                (utils_lib.get_size_string(summary.trimmed_size),
269                 max_result_size_KB))
270        return
271
272    args = {'summary': summary,
273            'max_result_size_KB': max_result_size_KB}
274    args_skip_autotest_log = copy.copy(args)
275    args_skip_autotest_log['skip_autotest_log'] = True
276    # Apply the throttlers in following order.
277    throttlers = [
278            (zip_file_throttler, copy.copy(args_skip_autotest_log)),
279            (dedupe_file_throttler, copy.copy(args)),
280            (zip_file_throttler, copy.copy(args)),
281            ]
282
283    # Add another zip_file_throttler to compress the files being shrunk.
284    # The threshold is set to half of the DEFAULT_FILE_SIZE_LIMIT_BYTE of
285    # shrink_file_throttler.
286    new_args = copy.copy(args)
287    new_args['file_size_threshold_byte'] = 50 * 1024
288    throttlers.append((zip_file_throttler, new_args))
289
290    # If the above throttlers still can't reduce the result size to be under
291    # max_result_size_KB, try to delete files with various threshold, starting
292    # at 5MB then lowering to 100KB.
293    delete_file_thresholds = [5*1024*1024, 1*1024*1024, 100*1024]
294    # Try to keep tgz files first.
295    exclude_file_patterns = ['.*\.tgz']
296    for threshold in delete_file_thresholds:
297        new_args = copy.copy(args)
298        new_args.update({'file_size_threshold_byte': threshold,
299                         'exclude_file_patterns': exclude_file_patterns})
300        throttlers.append((delete_file_throttler, new_args))
301    # Add one more delete_file_throttler to not skipping tgz files.
302    new_args = copy.copy(args)
303    new_args.update({'file_size_threshold_byte': delete_file_thresholds[-1]})
304    throttlers.append((delete_file_throttler, new_args))
305
306    # Run the throttlers in order until result size is under max_result_size_KB.
307    old_size = summary.trimmed_size
308    for throttler, args in throttlers:
309        try:
310            args_without_summary = copy.copy(args)
311            del args_without_summary['summary']
312            utils_lib.LOG('Applying throttler %s, args: %s' %
313                          (throttler.__name__, args_without_summary))
314            throttler.throttle(**args)
315            if throttler_lib.check_throttle_limit(summary, max_result_size_KB):
316                return
317        except:
318            utils_lib.LOG('Failed to apply throttler %s. Exception: %s' %
319                          (throttler, traceback.format_exc()))
320        finally:
321            new_size = summary.trimmed_size
322            if new_size == old_size:
323                utils_lib.LOG('Result size was not changed: %s.' % old_size)
324            else:
325                utils_lib.LOG('Result size was reduced from %s to %s.' %
326                              (utils_lib.get_size_string(old_size),
327                               utils_lib.get_size_string(new_size)))
328
329
330def _setup_logging():
331    """Set up logging to direct logs to stdout."""
332    # Direct logging to stdout
333    logger = logging.getLogger()
334    logger.setLevel(logging.DEBUG)
335    handler = logging.StreamHandler(sys.stdout)
336    handler.setLevel(logging.DEBUG)
337    formatter = logging.Formatter('%(asctime)s %(message)s')
338    handler.setFormatter(formatter)
339    logger.handlers = []
340    logger.addHandler(handler)
341
342
343def _parse_options():
344    """Options for the main script.
345
346    @return: An option object container arg values.
347    """
348    parser = argparse.ArgumentParser()
349    parser.add_argument('-p', type=str, dest='path',
350                        help='Path to build directory summary.')
351    parser.add_argument('-m', type=int, dest='max_size_KB', default=0,
352                        help='Maximum result size in KB. Set to 0 to disable '
353                        'result throttling.')
354    parser.add_argument('-d', action='store_true', dest='delete_summaries',
355                        default=False,
356                        help='-d to delete all result summary files in the '
357                        'given path.')
358    return parser.parse_args()
359
360
361def execute(path, max_size_KB):
362    """Execute the script with given arguments.
363
364    @param path: Path to build directory summary.
365    @param max_size_KB: Maximum result size in KB.
366    """
367    utils_lib.LOG('Running result_tools/utils on path: %s' % path)
368    utils_lib.LOG('Running result_tools/utils in pyversion %s ' % sys.version)
369
370    if max_size_KB > 0:
371        utils_lib.LOG('Throttle result size to : %s' %
372                      utils_lib.get_size_string(max_size_KB * 1024))
373
374    result_dir = path
375    if not os.path.isdir(result_dir):
376        result_dir = os.path.dirname(result_dir)
377    summary = result_info.ResultInfo.build_from_path(path)
378    summary_json = json.dumps(summary)
379    summary_file = get_unique_dir_summary_file(result_dir)
380
381    # Make sure there is enough free disk to write the file
382    stat = os.statvfs(path)
383    free_space = stat.f_frsize * stat.f_bavail
384    if free_space - len(summary_json) < MIN_FREE_DISK_BYTES:
385        raise utils_lib.NotEnoughDiskError(
386                'Not enough disk space after saving the summary file. '
387                'Available free disk: %s bytes. Summary file size: %s bytes.' %
388                (free_space, len(summary_json)))
389
390    with open(summary_file, 'w') as f:
391        f.write(summary_json)
392    utils_lib.LOG('Directory summary of %s is saved to file %s.' %
393                  (path, summary_file))
394
395    if max_size_KB > 0 and summary.trimmed_size > 0:
396        old_size = summary.trimmed_size
397        throttle_probability = float(max_size_KB * 1024) / old_size
398        if random.random() < throttle_probability:
399            utils_lib.LOG(
400                    'Skip throttling %s: size=%s, throttle_probability=%s' %
401                    (path, old_size, throttle_probability))
402        else:
403            _throttle_results(summary, max_size_KB)
404            if summary.trimmed_size < old_size:
405                # Files are throttled, save the updated summary file.
406                utils_lib.LOG('Overwrite the summary file: %s' % summary_file)
407                result_info.save_summary(summary, summary_file)
408
409
410def _delete_summaries(path):
411    """Delete all directory summary files in the given directory.
412
413    This is to cleanup the directory so no summary files are left behind to
414    affect later tests.
415
416    @param path: Path to cleanup directory summary.
417    """
418    # Only summary files directly under the `path` needs to be cleaned.
419    summary_files = glob.glob(os.path.join(path, SUMMARY_FILE_PATTERN))
420    for summary in summary_files:
421        try:
422            os.remove(summary)
423        except IOError as e:
424            utils_lib.LOG('Failed to delete summary: %s. Error: %s' %
425                          (summary, e))
426
427
428def main():
429    """main script. """
430    _setup_logging()
431    options = _parse_options()
432    if options.delete_summaries:
433        _delete_summaries(options.path)
434    else:
435        execute(options.path, options.max_size_KB)
436
437
438if __name__ == '__main__':
439    main()
440