1#!/usr/bin/python2 2# Copyright 2017 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6""" 7This is a utility to build a summary of the given directory. and save to a json 8file. 9 10usage: utils.py [-h] [-p PATH] [-m MAX_SIZE_KB] 11 12optional arguments: 13 -p PATH Path to build directory summary. 14 -m MAX_SIZE_KB Maximum result size in KB. Set to 0 to disable result 15 throttling. 16 17The content of the json file looks like: 18{'default': {'/D': [{'control': {'/S': 734}}, 19 {'debug': {'/D': [{'client.0.DEBUG': {'/S': 5698}}, 20 {'client.0.ERROR': {'/S': 254}}, 21 {'client.0.INFO': {'/S': 1020}}, 22 {'client.0.WARNING': {'/S': 242}}], 23 '/S': 7214}} 24 ], 25 '/S': 7948 26 } 27} 28""" 29 30from __future__ import division 31from __future__ import print_function 32 33import argparse 34import copy 35import fnmatch 36import glob 37import json 38import logging 39import os 40import random 41from six.moves import range 42import sys 43import time 44import traceback 45 46try: 47 from autotest_lib.client.bin.result_tools import dedupe_file_throttler 48 from autotest_lib.client.bin.result_tools import delete_file_throttler 49 from autotest_lib.client.bin.result_tools import result_info 50 from autotest_lib.client.bin.result_tools import shrink_file_throttler 51 from autotest_lib.client.bin.result_tools import throttler_lib 52 from autotest_lib.client.bin.result_tools import utils_lib 53 from autotest_lib.client.bin.result_tools import zip_file_throttler 54except ImportError: 55 import dedupe_file_throttler 56 import delete_file_throttler 57 import result_info 58 import shrink_file_throttler 59 import throttler_lib 60 import utils_lib 61 import zip_file_throttler 62 63 64# Do NOT import autotest_lib modules here. This module can be executed without 65# dependency on other autotest modules. This is to keep the logic of result 66# trimming on the server side, instead of depending on the autotest client 67# module. 68 69DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json' 70SUMMARY_FILE_PATTERN = 'dir_summary_*.json' 71MERGED_SUMMARY_FILENAME = 'dir_summary_final.json' 72 73# Minimum disk space should be available after saving the summary file. 74MIN_FREE_DISK_BYTES = 10 * 1024 * 1024 75 76# Autotest uses some state files to track process running state. The files are 77# deleted from test results. Therefore, these files can be ignored. 78FILES_TO_IGNORE = set([ 79 'control.autoserv.state' 80]) 81 82# Smallest file size to shrink to. 83MIN_FILE_SIZE_LIMIT_BYTE = 10 * 1024 84 85def get_unique_dir_summary_file(path): 86 """Get a unique file path to save the directory summary json string. 87 88 @param path: The directory path to save the summary file to. 89 """ 90 summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time() 91 # Make sure the summary file name is unique. 92 file_name = os.path.join(path, summary_file) 93 if os.path.exists(file_name): 94 count = 1 95 name, ext = os.path.splitext(summary_file) 96 while os.path.exists(file_name): 97 file_name = os.path.join(path, '%s_%s%s' % (name, count, ext)) 98 count += 1 99 return file_name 100 101 102def _preprocess_result_dir_path(path): 103 """Verify the result directory path is valid and make sure it ends with `/`. 104 105 @param path: A path to the result directory. 106 @return: A verified and processed path to the result directory. 107 @raise IOError: If the path doesn't exist. 108 @raise ValueError: If the path is not a directory. 109 """ 110 if not os.path.exists(path): 111 raise IOError('Path %s does not exist.' % path) 112 113 if not os.path.isdir(path): 114 raise ValueError('The given path %s is a file. It must be a ' 115 'directory.' % path) 116 117 # Make sure the path ends with `/` so the root key of summary json is always 118 # utils_lib.ROOT_DIR ('') 119 if not path.endswith(os.sep): 120 path = path + os.sep 121 122 return path 123 124 125def _delete_missing_entries(summary_old, summary_new): 126 """Delete files/directories only exists in old summary. 127 128 When the new summary is final, i.e., it's built from the final result 129 directory, files or directories missing are considered to be deleted and 130 trimmed to size 0. 131 132 @param summary_old: Old directory summary. 133 @param summary_new: New directory summary. 134 """ 135 new_files = summary_new.get_file_names() 136 old_files = summary_old.get_file_names() 137 for name in old_files: 138 old_file = summary_old.get_file(name) 139 if name not in new_files: 140 if old_file.is_dir: 141 # Trim sub-directories. 142 with old_file.disable_updating_parent_size_info(): 143 _delete_missing_entries(old_file, result_info.EMPTY) 144 old_file.update_sizes() 145 elif name in FILES_TO_IGNORE: 146 # Remove the file from the summary as it can be ignored. 147 summary_old.remove_file(name) 148 else: 149 with old_file.disable_updating_parent_size_info(): 150 # Before setting the trimmed size to 0, update the collected 151 # size if it's not set yet. 152 if not old_file.is_collected_size_recorded: 153 old_file.collected_size = old_file.trimmed_size 154 old_file.trimmed_size = 0 155 elif old_file.is_dir: 156 # If `name` is a directory in the old summary, but a file in the new 157 # summary, delete the entry in the old summary. 158 new_file = summary_new.get_file(name) 159 if not new_file.is_dir: 160 new_file = result_info.EMPTY 161 _delete_missing_entries(old_file, new_file) 162 163 164def _relocate_summary(result_dir, summary_file, summary): 165 """Update the given summary with the path relative to the result_dir. 166 167 @param result_dir: Path to the result directory. 168 @param summary_file: Path to the summary file. 169 @param summary: A directory summary inside the given result_dir or its 170 sub-directory. 171 @return: An updated summary with the path relative to the result_dir. 172 """ 173 sub_path = os.path.dirname(summary_file).replace( 174 result_dir.rstrip(os.sep), '') 175 if sub_path == '': 176 return summary 177 178 folders = sub_path.split(os.sep) 179 180 # The first folder is always '' because of the leading `/` in sub_path. 181 parent = result_info.ResultInfo( 182 result_dir, utils_lib.ROOT_DIR, parent_result_info=None) 183 root = parent 184 185 # That makes sure root has only one folder of utils_lib.ROOT_DIR. 186 for i in range(1, len(folders)): 187 child = result_info.ResultInfo( 188 parent.path, folders[i], parent_result_info=parent) 189 if i == len(folders) - 1: 190 # Add files in summary to child. 191 for info in summary.files: 192 child.files.append(info) 193 194 parent.files.append(child) 195 parent = child 196 197 parent.update_sizes() 198 return root 199 200 201def merge_summaries(path): 202 """Merge all directory summaries in the given path. 203 204 This function calculates the total size of result files being collected for 205 the test device and the files generated on the drone. It also returns merged 206 directory summary. 207 208 @param path: A path to search for directory summaries. 209 @return a tuple of (client_collected_bytes, merged_summary, files): 210 client_collected_bytes: The total size of results collected from 211 the DUT. The number can be larger than the total file size of 212 the given path, as files can be overwritten or removed. 213 merged_summary: The merged directory summary of the given path. 214 files: All summary files in the given path, including 215 sub-directories. 216 """ 217 path = _preprocess_result_dir_path(path) 218 # Find all directory summary files and sort them by the time stamp in file 219 # name. 220 summary_files = [] 221 for root, _, filenames in os.walk(path): 222 for filename in fnmatch.filter(filenames, 'dir_summary_*.json'): 223 summary_files.append(os.path.join(root, filename)) 224 summary_files = sorted(summary_files, key=os.path.getmtime) 225 226 all_summaries = [] 227 for summary_file in summary_files: 228 try: 229 summary = result_info.load_summary_json_file(summary_file) 230 summary = _relocate_summary(path, summary_file, summary) 231 all_summaries.append(summary) 232 except (IOError, ValueError) as e: 233 utils_lib.LOG('Failed to load summary file %s Error: %s' % 234 (summary_file, e)) 235 236 # Merge all summaries. 237 merged_summary = all_summaries[0] if len(all_summaries) > 0 else None 238 for summary in all_summaries[1:]: 239 merged_summary.merge(summary) 240 # After all summaries from the test device (client side) are merged, we can 241 # get the total size of result files being transfered from the test device. 242 # If there is no directory summary collected, default client_collected_bytes 243 # to 0. 244 client_collected_bytes = 0 245 if merged_summary: 246 client_collected_bytes = merged_summary.collected_size 247 248 # Get the summary of current directory 249 last_summary = result_info.ResultInfo.build_from_path(path) 250 251 if merged_summary: 252 merged_summary.merge(last_summary, is_final=True) 253 _delete_missing_entries(merged_summary, last_summary) 254 else: 255 merged_summary = last_summary 256 257 return client_collected_bytes, merged_summary, summary_files 258 259 260def _throttle_results(summary, max_result_size_KB): 261 """Throttle the test results by limiting to the given maximum size. 262 263 @param summary: A ResultInfo object containing result summary. 264 @param max_result_size_KB: Maximum test result size in KB. 265 """ 266 if throttler_lib.check_throttle_limit(summary, max_result_size_KB): 267 utils_lib.LOG( 268 'Result size is %s, which is less than %d KB. No need to ' 269 'throttle.' % 270 (utils_lib.get_size_string(summary.trimmed_size), 271 max_result_size_KB)) 272 return 273 274 args = {'summary': summary, 275 'max_result_size_KB': max_result_size_KB} 276 args_skip_autotest_log = copy.copy(args) 277 args_skip_autotest_log['skip_autotest_log'] = True 278 # Apply the throttlers in following order. 279 throttlers = [ 280 (shrink_file_throttler, copy.copy(args_skip_autotest_log)), 281 (zip_file_throttler, copy.copy(args_skip_autotest_log)), 282 (shrink_file_throttler, copy.copy(args)), 283 (dedupe_file_throttler, copy.copy(args)), 284 (zip_file_throttler, copy.copy(args)), 285 ] 286 287 # Add another zip_file_throttler to compress the files being shrunk. 288 # The threshold is set to half of the DEFAULT_FILE_SIZE_LIMIT_BYTE of 289 # shrink_file_throttler. 290 new_args = copy.copy(args) 291 new_args['file_size_threshold_byte'] = 50 * 1024 292 throttlers.append((zip_file_throttler, new_args)) 293 294 # If the above throttlers still can't reduce the result size to be under 295 # max_result_size_KB, try to delete files with various threshold, starting 296 # at 5MB then lowering to 100KB. 297 delete_file_thresholds = [5*1024*1024, 1*1024*1024, 100*1024] 298 # Try to keep tgz files first. 299 exclude_file_patterns = ['.*\.tgz'] 300 for threshold in delete_file_thresholds: 301 new_args = copy.copy(args) 302 new_args.update({'file_size_threshold_byte': threshold, 303 'exclude_file_patterns': exclude_file_patterns}) 304 throttlers.append((delete_file_throttler, new_args)) 305 # Add one more delete_file_throttler to not skipping tgz files. 306 new_args = copy.copy(args) 307 new_args.update({'file_size_threshold_byte': delete_file_thresholds[-1]}) 308 throttlers.append((delete_file_throttler, new_args)) 309 310 # Run the throttlers in order until result size is under max_result_size_KB. 311 old_size = summary.trimmed_size 312 for throttler, args in throttlers: 313 try: 314 args_without_summary = copy.copy(args) 315 del args_without_summary['summary'] 316 utils_lib.LOG('Applying throttler %s, args: %s' % 317 (throttler.__name__, args_without_summary)) 318 throttler.throttle(**args) 319 if throttler_lib.check_throttle_limit(summary, max_result_size_KB): 320 return 321 except: 322 utils_lib.LOG('Failed to apply throttler %s. Exception: %s' % 323 (throttler, traceback.format_exc())) 324 finally: 325 new_size = summary.trimmed_size 326 if new_size == old_size: 327 utils_lib.LOG('Result size was not changed: %s.' % old_size) 328 else: 329 utils_lib.LOG('Result size was reduced from %s to %s.' % 330 (utils_lib.get_size_string(old_size), 331 utils_lib.get_size_string(new_size))) 332 333 334def _setup_logging(): 335 """Set up logging to direct logs to stdout.""" 336 # Direct logging to stdout 337 logger = logging.getLogger() 338 logger.setLevel(logging.DEBUG) 339 handler = logging.StreamHandler(sys.stdout) 340 handler.setLevel(logging.DEBUG) 341 formatter = logging.Formatter('%(asctime)s %(message)s') 342 handler.setFormatter(formatter) 343 logger.handlers = [] 344 logger.addHandler(handler) 345 346 347def _parse_options(): 348 """Options for the main script. 349 350 @return: An option object container arg values. 351 """ 352 parser = argparse.ArgumentParser() 353 parser.add_argument('-p', type=str, dest='path', 354 help='Path to build directory summary.') 355 parser.add_argument('-m', type=int, dest='max_size_KB', default=0, 356 help='Maximum result size in KB. Set to 0 to disable ' 357 'result throttling.') 358 parser.add_argument('-d', action='store_true', dest='delete_summaries', 359 default=False, 360 help='-d to delete all result summary files in the ' 361 'given path.') 362 return parser.parse_args() 363 364 365def execute(path, max_size_KB): 366 """Execute the script with given arguments. 367 368 @param path: Path to build directory summary. 369 @param max_size_KB: Maximum result size in KB. 370 """ 371 utils_lib.LOG('Running result_tools/utils on path: %s' % path) 372 if max_size_KB > 0: 373 utils_lib.LOG('Throttle result size to : %s' % 374 utils_lib.get_size_string(max_size_KB * 1024)) 375 376 result_dir = path 377 if not os.path.isdir(result_dir): 378 result_dir = os.path.dirname(result_dir) 379 summary = result_info.ResultInfo.build_from_path(path) 380 summary_json = json.dumps(summary) 381 summary_file = get_unique_dir_summary_file(result_dir) 382 383 # Make sure there is enough free disk to write the file 384 stat = os.statvfs(path) 385 free_space = stat.f_frsize * stat.f_bavail 386 if free_space - len(summary_json) < MIN_FREE_DISK_BYTES: 387 raise utils_lib.NotEnoughDiskError( 388 'Not enough disk space after saving the summary file. ' 389 'Available free disk: %s bytes. Summary file size: %s bytes.' % 390 (free_space, len(summary_json))) 391 392 with open(summary_file, 'w') as f: 393 f.write(summary_json) 394 utils_lib.LOG('Directory summary of %s is saved to file %s.' % 395 (path, summary_file)) 396 397 if max_size_KB > 0 and summary.trimmed_size > 0: 398 old_size = summary.trimmed_size 399 throttle_probability = float(max_size_KB * 1024) / old_size 400 if random.random() < throttle_probability: 401 utils_lib.LOG( 402 'Skip throttling %s: size=%s, throttle_probability=%s' % 403 (path, old_size, throttle_probability)) 404 else: 405 _throttle_results(summary, max_size_KB) 406 if summary.trimmed_size < old_size: 407 # Files are throttled, save the updated summary file. 408 utils_lib.LOG('Overwrite the summary file: %s' % summary_file) 409 result_info.save_summary(summary, summary_file) 410 411 412def _delete_summaries(path): 413 """Delete all directory summary files in the given directory. 414 415 This is to cleanup the directory so no summary files are left behind to 416 affect later tests. 417 418 @param path: Path to cleanup directory summary. 419 """ 420 # Only summary files directly under the `path` needs to be cleaned. 421 summary_files = glob.glob(os.path.join(path, SUMMARY_FILE_PATTERN)) 422 for summary in summary_files: 423 try: 424 os.remove(summary) 425 except IOError as e: 426 utils_lib.LOG('Failed to delete summary: %s. Error: %s' % 427 (summary, e)) 428 429 430def main(): 431 """main script. """ 432 _setup_logging() 433 options = _parse_options() 434 if options.delete_summaries: 435 _delete_summaries(options.path) 436 else: 437 execute(options.path, options.max_size_KB) 438 439 440if __name__ == '__main__': 441 main() 442