1#!/usr/bin/python3 2# Copyright 2017 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6""" 7This is a utility to build a summary of the given directory. and save to a json 8file. 9 10usage: utils.py [-h] [-p PATH] [-m MAX_SIZE_KB] 11 12optional arguments: 13 -p PATH Path to build directory summary. 14 -m MAX_SIZE_KB Maximum result size in KB. Set to 0 to disable result 15 throttling. 16 17The content of the json file looks like: 18{'default': {'/D': [{'control': {'/S': 734}}, 19 {'debug': {'/D': [{'client.0.DEBUG': {'/S': 5698}}, 20 {'client.0.ERROR': {'/S': 254}}, 21 {'client.0.INFO': {'/S': 1020}}, 22 {'client.0.WARNING': {'/S': 242}}], 23 '/S': 7214}} 24 ], 25 '/S': 7948 26 } 27} 28""" 29 30from __future__ import division 31from __future__ import print_function 32 33import argparse 34import copy 35import fnmatch 36import glob 37import json 38import logging 39import os 40import random 41from six.moves import range 42import sys 43import time 44import traceback 45 46try: 47 from autotest_lib.client.bin.result_tools import dedupe_file_throttler 48 from autotest_lib.client.bin.result_tools import delete_file_throttler 49 from autotest_lib.client.bin.result_tools import result_info 50 from autotest_lib.client.bin.result_tools import throttler_lib 51 from autotest_lib.client.bin.result_tools import utils_lib 52 from autotest_lib.client.bin.result_tools import zip_file_throttler 53except ImportError: 54 import dedupe_file_throttler 55 import delete_file_throttler 56 import result_info 57 import throttler_lib 58 import utils_lib 59 import zip_file_throttler 60 61 62# Do NOT import autotest_lib modules here. This module can be executed without 63# dependency on other autotest modules. This is to keep the logic of result 64# trimming on the server side, instead of depending on the autotest client 65# module. 66 67DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json' 68SUMMARY_FILE_PATTERN = 'dir_summary_*.json' 69MERGED_SUMMARY_FILENAME = 'dir_summary_final.json' 70 71# Minimum disk space should be available after saving the summary file. 72MIN_FREE_DISK_BYTES = 10 * 1024 * 1024 73 74# Autotest uses some state files to track process running state. The files are 75# deleted from test results. Therefore, these files can be ignored. 76FILES_TO_IGNORE = set([ 77 'control.autoserv.state' 78]) 79 80# Smallest file size to shrink to. 81MIN_FILE_SIZE_LIMIT_BYTE = 10 * 1024 82 83def get_unique_dir_summary_file(path): 84 """Get a unique file path to save the directory summary json string. 85 86 @param path: The directory path to save the summary file to. 87 """ 88 summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time() 89 # Make sure the summary file name is unique. 90 file_name = os.path.join(path, summary_file) 91 if os.path.exists(file_name): 92 count = 1 93 name, ext = os.path.splitext(summary_file) 94 while os.path.exists(file_name): 95 file_name = os.path.join(path, '%s_%s%s' % (name, count, ext)) 96 count += 1 97 return file_name 98 99 100def _preprocess_result_dir_path(path): 101 """Verify the result directory path is valid and make sure it ends with `/`. 102 103 @param path: A path to the result directory. 104 @return: A verified and processed path to the result directory. 105 @raise IOError: If the path doesn't exist. 106 @raise ValueError: If the path is not a directory. 107 """ 108 if not os.path.exists(path): 109 raise IOError('Path %s does not exist.' % path) 110 111 if not os.path.isdir(path): 112 raise ValueError('The given path %s is a file. It must be a ' 113 'directory.' % path) 114 115 # Make sure the path ends with `/` so the root key of summary json is always 116 # utils_lib.ROOT_DIR ('') 117 if not path.endswith(os.sep): 118 path = path + os.sep 119 120 return path 121 122 123def _delete_missing_entries(summary_old, summary_new): 124 """Delete files/directories only exists in old summary. 125 126 When the new summary is final, i.e., it's built from the final result 127 directory, files or directories missing are considered to be deleted and 128 trimmed to size 0. 129 130 @param summary_old: Old directory summary. 131 @param summary_new: New directory summary. 132 """ 133 new_files = summary_new.get_file_names() 134 old_files = summary_old.get_file_names() 135 for name in old_files: 136 old_file = summary_old.get_file(name) 137 if name not in new_files: 138 if old_file.is_dir: 139 # Trim sub-directories. 140 with old_file.disable_updating_parent_size_info(): 141 _delete_missing_entries(old_file, result_info.EMPTY) 142 old_file.update_sizes() 143 elif name in FILES_TO_IGNORE: 144 # Remove the file from the summary as it can be ignored. 145 summary_old.remove_file(name) 146 else: 147 with old_file.disable_updating_parent_size_info(): 148 # Before setting the trimmed size to 0, update the collected 149 # size if it's not set yet. 150 if not old_file.is_collected_size_recorded: 151 old_file.collected_size = old_file.trimmed_size 152 old_file.trimmed_size = 0 153 elif old_file.is_dir: 154 # If `name` is a directory in the old summary, but a file in the new 155 # summary, delete the entry in the old summary. 156 new_file = summary_new.get_file(name) 157 if not new_file.is_dir: 158 new_file = result_info.EMPTY 159 _delete_missing_entries(old_file, new_file) 160 161 162def _relocate_summary(result_dir, summary_file, summary): 163 """Update the given summary with the path relative to the result_dir. 164 165 @param result_dir: Path to the result directory. 166 @param summary_file: Path to the summary file. 167 @param summary: A directory summary inside the given result_dir or its 168 sub-directory. 169 @return: An updated summary with the path relative to the result_dir. 170 """ 171 sub_path = os.path.dirname(summary_file).replace( 172 result_dir.rstrip(os.sep), '') 173 if sub_path == '': 174 return summary 175 176 folders = sub_path.split(os.sep) 177 178 # The first folder is always '' because of the leading `/` in sub_path. 179 parent = result_info.ResultInfo( 180 result_dir, utils_lib.ROOT_DIR, parent_result_info=None) 181 root = parent 182 183 # That makes sure root has only one folder of utils_lib.ROOT_DIR. 184 for i in range(1, len(folders)): 185 child = result_info.ResultInfo( 186 parent.path, folders[i], parent_result_info=parent) 187 if i == len(folders) - 1: 188 # Add files in summary to child. 189 for info in summary.files: 190 child.files.append(info) 191 192 parent.files.append(child) 193 parent = child 194 195 parent.update_sizes() 196 return root 197 198 199def merge_summaries(path): 200 """Merge all directory summaries in the given path. 201 202 This function calculates the total size of result files being collected for 203 the test device and the files generated on the drone. It also returns merged 204 directory summary. 205 206 @param path: A path to search for directory summaries. 207 @return a tuple of (client_collected_bytes, merged_summary, files): 208 client_collected_bytes: The total size of results collected from 209 the DUT. The number can be larger than the total file size of 210 the given path, as files can be overwritten or removed. 211 merged_summary: The merged directory summary of the given path. 212 files: All summary files in the given path, including 213 sub-directories. 214 """ 215 path = _preprocess_result_dir_path(path) 216 # Find all directory summary files and sort them by the time stamp in file 217 # name. 218 summary_files = [] 219 for root, _, filenames in os.walk(path): 220 for filename in fnmatch.filter(filenames, 'dir_summary_*.json'): 221 summary_files.append(os.path.join(root, filename)) 222 summary_files = sorted(summary_files, key=os.path.getmtime) 223 224 all_summaries = [] 225 for summary_file in summary_files: 226 try: 227 summary = result_info.load_summary_json_file(summary_file) 228 summary = _relocate_summary(path, summary_file, summary) 229 all_summaries.append(summary) 230 except (IOError, ValueError) as e: 231 utils_lib.LOG('Failed to load summary file %s Error: %s' % 232 (summary_file, e)) 233 234 # Merge all summaries. 235 merged_summary = all_summaries[0] if len(all_summaries) > 0 else None 236 for summary in all_summaries[1:]: 237 merged_summary.merge(summary) 238 # After all summaries from the test device (client side) are merged, we can 239 # get the total size of result files being transfered from the test device. 240 # If there is no directory summary collected, default client_collected_bytes 241 # to 0. 242 client_collected_bytes = 0 243 if merged_summary: 244 client_collected_bytes = merged_summary.collected_size 245 246 # Get the summary of current directory 247 last_summary = result_info.ResultInfo.build_from_path(path) 248 249 if merged_summary: 250 merged_summary.merge(last_summary, is_final=True) 251 _delete_missing_entries(merged_summary, last_summary) 252 else: 253 merged_summary = last_summary 254 255 return client_collected_bytes, merged_summary, summary_files 256 257 258def _throttle_results(summary, max_result_size_KB): 259 """Throttle the test results by limiting to the given maximum size. 260 261 @param summary: A ResultInfo object containing result summary. 262 @param max_result_size_KB: Maximum test result size in KB. 263 """ 264 if throttler_lib.check_throttle_limit(summary, max_result_size_KB): 265 utils_lib.LOG( 266 'Result size is %s, which is less than %d KB. No need to ' 267 'throttle.' % 268 (utils_lib.get_size_string(summary.trimmed_size), 269 max_result_size_KB)) 270 return 271 272 args = {'summary': summary, 273 'max_result_size_KB': max_result_size_KB} 274 args_skip_autotest_log = copy.copy(args) 275 args_skip_autotest_log['skip_autotest_log'] = True 276 # Apply the throttlers in following order. 277 throttlers = [ 278 (zip_file_throttler, copy.copy(args_skip_autotest_log)), 279 (dedupe_file_throttler, copy.copy(args)), 280 (zip_file_throttler, copy.copy(args)), 281 ] 282 283 # Add another zip_file_throttler to compress the files being shrunk. 284 # The threshold is set to half of the DEFAULT_FILE_SIZE_LIMIT_BYTE of 285 # shrink_file_throttler. 286 new_args = copy.copy(args) 287 new_args['file_size_threshold_byte'] = 50 * 1024 288 throttlers.append((zip_file_throttler, new_args)) 289 290 # If the above throttlers still can't reduce the result size to be under 291 # max_result_size_KB, try to delete files with various threshold, starting 292 # at 5MB then lowering to 100KB. 293 delete_file_thresholds = [5*1024*1024, 1*1024*1024, 100*1024] 294 # Try to keep tgz files first. 295 exclude_file_patterns = ['.*\.tgz'] 296 for threshold in delete_file_thresholds: 297 new_args = copy.copy(args) 298 new_args.update({'file_size_threshold_byte': threshold, 299 'exclude_file_patterns': exclude_file_patterns}) 300 throttlers.append((delete_file_throttler, new_args)) 301 # Add one more delete_file_throttler to not skipping tgz files. 302 new_args = copy.copy(args) 303 new_args.update({'file_size_threshold_byte': delete_file_thresholds[-1]}) 304 throttlers.append((delete_file_throttler, new_args)) 305 306 # Run the throttlers in order until result size is under max_result_size_KB. 307 old_size = summary.trimmed_size 308 for throttler, args in throttlers: 309 try: 310 args_without_summary = copy.copy(args) 311 del args_without_summary['summary'] 312 utils_lib.LOG('Applying throttler %s, args: %s' % 313 (throttler.__name__, args_without_summary)) 314 throttler.throttle(**args) 315 if throttler_lib.check_throttle_limit(summary, max_result_size_KB): 316 return 317 except: 318 utils_lib.LOG('Failed to apply throttler %s. Exception: %s' % 319 (throttler, traceback.format_exc())) 320 finally: 321 new_size = summary.trimmed_size 322 if new_size == old_size: 323 utils_lib.LOG('Result size was not changed: %s.' % old_size) 324 else: 325 utils_lib.LOG('Result size was reduced from %s to %s.' % 326 (utils_lib.get_size_string(old_size), 327 utils_lib.get_size_string(new_size))) 328 329 330def _setup_logging(): 331 """Set up logging to direct logs to stdout.""" 332 # Direct logging to stdout 333 logger = logging.getLogger() 334 logger.setLevel(logging.DEBUG) 335 handler = logging.StreamHandler(sys.stdout) 336 handler.setLevel(logging.DEBUG) 337 formatter = logging.Formatter('%(asctime)s %(message)s') 338 handler.setFormatter(formatter) 339 logger.handlers = [] 340 logger.addHandler(handler) 341 342 343def _parse_options(): 344 """Options for the main script. 345 346 @return: An option object container arg values. 347 """ 348 parser = argparse.ArgumentParser() 349 parser.add_argument('-p', type=str, dest='path', 350 help='Path to build directory summary.') 351 parser.add_argument('-m', type=int, dest='max_size_KB', default=0, 352 help='Maximum result size in KB. Set to 0 to disable ' 353 'result throttling.') 354 parser.add_argument('-d', action='store_true', dest='delete_summaries', 355 default=False, 356 help='-d to delete all result summary files in the ' 357 'given path.') 358 return parser.parse_args() 359 360 361def execute(path, max_size_KB): 362 """Execute the script with given arguments. 363 364 @param path: Path to build directory summary. 365 @param max_size_KB: Maximum result size in KB. 366 """ 367 utils_lib.LOG('Running result_tools/utils on path: %s' % path) 368 utils_lib.LOG('Running result_tools/utils in pyversion %s ' % sys.version) 369 370 if max_size_KB > 0: 371 utils_lib.LOG('Throttle result size to : %s' % 372 utils_lib.get_size_string(max_size_KB * 1024)) 373 374 result_dir = path 375 if not os.path.isdir(result_dir): 376 result_dir = os.path.dirname(result_dir) 377 summary = result_info.ResultInfo.build_from_path(path) 378 summary_json = json.dumps(summary) 379 summary_file = get_unique_dir_summary_file(result_dir) 380 381 # Make sure there is enough free disk to write the file 382 stat = os.statvfs(path) 383 free_space = stat.f_frsize * stat.f_bavail 384 if free_space - len(summary_json) < MIN_FREE_DISK_BYTES: 385 raise utils_lib.NotEnoughDiskError( 386 'Not enough disk space after saving the summary file. ' 387 'Available free disk: %s bytes. Summary file size: %s bytes.' % 388 (free_space, len(summary_json))) 389 390 with open(summary_file, 'w') as f: 391 f.write(summary_json) 392 utils_lib.LOG('Directory summary of %s is saved to file %s.' % 393 (path, summary_file)) 394 395 if max_size_KB > 0 and summary.trimmed_size > 0: 396 old_size = summary.trimmed_size 397 throttle_probability = float(max_size_KB * 1024) / old_size 398 if random.random() < throttle_probability: 399 utils_lib.LOG( 400 'Skip throttling %s: size=%s, throttle_probability=%s' % 401 (path, old_size, throttle_probability)) 402 else: 403 _throttle_results(summary, max_size_KB) 404 if summary.trimmed_size < old_size: 405 # Files are throttled, save the updated summary file. 406 utils_lib.LOG('Overwrite the summary file: %s' % summary_file) 407 result_info.save_summary(summary, summary_file) 408 409 410def _delete_summaries(path): 411 """Delete all directory summary files in the given directory. 412 413 This is to cleanup the directory so no summary files are left behind to 414 affect later tests. 415 416 @param path: Path to cleanup directory summary. 417 """ 418 # Only summary files directly under the `path` needs to be cleaned. 419 summary_files = glob.glob(os.path.join(path, SUMMARY_FILE_PATTERN)) 420 for summary in summary_files: 421 try: 422 os.remove(summary) 423 except IOError as e: 424 utils_lib.LOG('Failed to delete summary: %s. Error: %s' % 425 (summary, e)) 426 427 428def main(): 429 """main script. """ 430 _setup_logging() 431 options = _parse_options() 432 if options.delete_summaries: 433 _delete_summaries(options.path) 434 else: 435 execute(options.path, options.max_size_KB) 436 437 438if __name__ == '__main__': 439 main() 440