1#!/usr/bin/python 2# Copyright 2017 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6""" 7This is a utility to build a summary of the given directory. and save to a json 8file. 9 10usage: utils.py [-h] [-p PATH] [-m MAX_SIZE_KB] 11 12optional arguments: 13 -p PATH Path to build directory summary. 14 -m MAX_SIZE_KB Maximum result size in KB. Set to 0 to disable result 15 throttling. 16 17The content of the json file looks like: 18{'default': {'/D': [{'control': {'/S': 734}}, 19 {'debug': {'/D': [{'client.0.DEBUG': {'/S': 5698}}, 20 {'client.0.ERROR': {'/S': 254}}, 21 {'client.0.INFO': {'/S': 1020}}, 22 {'client.0.WARNING': {'/S': 242}}], 23 '/S': 7214}} 24 ], 25 '/S': 7948 26 } 27} 28""" 29 30import argparse 31import copy 32import fnmatch 33import glob 34import json 35import logging 36import os 37import random 38import sys 39import time 40import traceback 41 42import dedupe_file_throttler 43import delete_file_throttler 44import result_info 45import shrink_file_throttler 46import throttler_lib 47import utils_lib 48import zip_file_throttler 49 50 51# Do NOT import autotest_lib modules here. This module can be executed without 52# dependency on other autotest modules. This is to keep the logic of result 53# trimming on the server side, instead of depending on the autotest client 54# module. 55 56DEFAULT_SUMMARY_FILENAME_FMT = 'dir_summary_%d.json' 57SUMMARY_FILE_PATTERN = 'dir_summary_*.json' 58MERGED_SUMMARY_FILENAME = 'dir_summary_final.json' 59 60# Minimum disk space should be available after saving the summary file. 61MIN_FREE_DISK_BYTES = 10 * 1024 * 1024 62 63# Autotest uses some state files to track process running state. The files are 64# deleted from test results. Therefore, these files can be ignored. 65FILES_TO_IGNORE = set([ 66 'control.autoserv.state' 67]) 68 69# Smallest file size to shrink to. 70MIN_FILE_SIZE_LIMIT_BYTE = 10 * 1024 71 72def get_unique_dir_summary_file(path): 73 """Get a unique file path to save the directory summary json string. 74 75 @param path: The directory path to save the summary file to. 76 """ 77 summary_file = DEFAULT_SUMMARY_FILENAME_FMT % time.time() 78 # Make sure the summary file name is unique. 79 file_name = os.path.join(path, summary_file) 80 if os.path.exists(file_name): 81 count = 1 82 name, ext = os.path.splitext(summary_file) 83 while os.path.exists(file_name): 84 file_name = os.path.join(path, '%s_%s%s' % (name, count, ext)) 85 count += 1 86 return file_name 87 88 89def _preprocess_result_dir_path(path): 90 """Verify the result directory path is valid and make sure it ends with `/`. 91 92 @param path: A path to the result directory. 93 @return: A verified and processed path to the result directory. 94 @raise IOError: If the path doesn't exist. 95 @raise ValueError: If the path is not a directory. 96 """ 97 if not os.path.exists(path): 98 raise IOError('Path %s does not exist.' % path) 99 100 if not os.path.isdir(path): 101 raise ValueError('The given path %s is a file. It must be a ' 102 'directory.' % path) 103 104 # Make sure the path ends with `/` so the root key of summary json is always 105 # utils_lib.ROOT_DIR ('') 106 if not path.endswith(os.sep): 107 path = path + os.sep 108 109 return path 110 111 112def _delete_missing_entries(summary_old, summary_new): 113 """Delete files/directories only exists in old summary. 114 115 When the new summary is final, i.e., it's built from the final result 116 directory, files or directories missing are considered to be deleted and 117 trimmed to size 0. 118 119 @param summary_old: Old directory summary. 120 @param summary_new: New directory summary. 121 """ 122 new_files = summary_new.get_file_names() 123 old_files = summary_old.get_file_names() 124 for name in old_files: 125 old_file = summary_old.get_file(name) 126 if name not in new_files: 127 if old_file.is_dir: 128 # Trim sub-directories. 129 with old_file.disable_updating_parent_size_info(): 130 _delete_missing_entries(old_file, result_info.EMPTY) 131 old_file.update_sizes() 132 elif name in FILES_TO_IGNORE: 133 # Remove the file from the summary as it can be ignored. 134 summary_old.remove_file(name) 135 else: 136 with old_file.disable_updating_parent_size_info(): 137 # Before setting the trimmed size to 0, update the collected 138 # size if it's not set yet. 139 if not old_file.is_collected_size_recorded: 140 old_file.collected_size = old_file.trimmed_size 141 old_file.trimmed_size = 0 142 elif old_file.is_dir: 143 # If `name` is a directory in the old summary, but a file in the new 144 # summary, delete the entry in the old summary. 145 new_file = summary_new.get_file(name) 146 if not new_file.is_dir: 147 new_file = result_info.EMPTY 148 _delete_missing_entries(old_file, new_file) 149 150 151def _relocate_summary(result_dir, summary_file, summary): 152 """Update the given summary with the path relative to the result_dir. 153 154 @param result_dir: Path to the result directory. 155 @param summary_file: Path to the summary file. 156 @param summary: A directory summary inside the given result_dir or its 157 sub-directory. 158 @return: An updated summary with the path relative to the result_dir. 159 """ 160 sub_path = os.path.dirname(summary_file).replace( 161 result_dir.rstrip(os.sep), '') 162 if sub_path == '': 163 return summary 164 165 folders = sub_path.split(os.sep) 166 167 # The first folder is always '' because of the leading `/` in sub_path. 168 parent = result_info.ResultInfo( 169 result_dir, utils_lib.ROOT_DIR, parent_result_info=None) 170 root = parent 171 172 # That makes sure root has only one folder of utils_lib.ROOT_DIR. 173 for i in range(1, len(folders)): 174 child = result_info.ResultInfo( 175 parent.path, folders[i], parent_result_info=parent) 176 if i == len(folders) - 1: 177 # Add files in summary to child. 178 for info in summary.files: 179 child.files.append(info) 180 181 parent.files.append(child) 182 parent = child 183 184 parent.update_sizes() 185 return root 186 187 188def merge_summaries(path): 189 """Merge all directory summaries in the given path. 190 191 This function calculates the total size of result files being collected for 192 the test device and the files generated on the drone. It also returns merged 193 directory summary. 194 195 @param path: A path to search for directory summaries. 196 @return a tuple of (client_collected_bytes, merged_summary, files): 197 client_collected_bytes: The total size of results collected from 198 the DUT. The number can be larger than the total file size of 199 the given path, as files can be overwritten or removed. 200 merged_summary: The merged directory summary of the given path. 201 files: All summary files in the given path, including 202 sub-directories. 203 """ 204 path = _preprocess_result_dir_path(path) 205 # Find all directory summary files and sort them by the time stamp in file 206 # name. 207 summary_files = [] 208 for root, _, filenames in os.walk(path): 209 for filename in fnmatch.filter(filenames, 'dir_summary_*.json'): 210 summary_files.append(os.path.join(root, filename)) 211 summary_files = sorted(summary_files, key=os.path.getmtime) 212 213 all_summaries = [] 214 for summary_file in summary_files: 215 try: 216 summary = result_info.load_summary_json_file(summary_file) 217 summary = _relocate_summary(path, summary_file, summary) 218 all_summaries.append(summary) 219 except (IOError, ValueError) as e: 220 utils_lib.LOG('Failed to load summary file %s Error: %s' % 221 (summary_file, e)) 222 223 # Merge all summaries. 224 merged_summary = all_summaries[0] if len(all_summaries) > 0 else None 225 for summary in all_summaries[1:]: 226 merged_summary.merge(summary) 227 # After all summaries from the test device (client side) are merged, we can 228 # get the total size of result files being transfered from the test device. 229 # If there is no directory summary collected, default client_collected_bytes 230 # to 0. 231 client_collected_bytes = 0 232 if merged_summary: 233 client_collected_bytes = merged_summary.collected_size 234 235 # Get the summary of current directory 236 last_summary = result_info.ResultInfo.build_from_path(path) 237 238 if merged_summary: 239 merged_summary.merge(last_summary, is_final=True) 240 _delete_missing_entries(merged_summary, last_summary) 241 else: 242 merged_summary = last_summary 243 244 return client_collected_bytes, merged_summary, summary_files 245 246 247def _throttle_results(summary, max_result_size_KB): 248 """Throttle the test results by limiting to the given maximum size. 249 250 @param summary: A ResultInfo object containing result summary. 251 @param max_result_size_KB: Maximum test result size in KB. 252 """ 253 if throttler_lib.check_throttle_limit(summary, max_result_size_KB): 254 utils_lib.LOG( 255 'Result size is %s, which is less than %d KB. No need to ' 256 'throttle.' % 257 (utils_lib.get_size_string(summary.trimmed_size), 258 max_result_size_KB)) 259 return 260 261 args = {'summary': summary, 262 'max_result_size_KB': max_result_size_KB} 263 args_skip_autotest_log = copy.copy(args) 264 args_skip_autotest_log['skip_autotest_log'] = True 265 # Apply the throttlers in following order. 266 throttlers = [ 267 (shrink_file_throttler, copy.copy(args_skip_autotest_log)), 268 (zip_file_throttler, copy.copy(args_skip_autotest_log)), 269 (shrink_file_throttler, copy.copy(args)), 270 (dedupe_file_throttler, copy.copy(args)), 271 (zip_file_throttler, copy.copy(args)), 272 ] 273 274 # Add another zip_file_throttler to compress the files being shrunk. 275 # The threshold is set to half of the DEFAULT_FILE_SIZE_LIMIT_BYTE of 276 # shrink_file_throttler. 277 new_args = copy.copy(args) 278 new_args['file_size_threshold_byte'] = 50 * 1024 279 throttlers.append((zip_file_throttler, new_args)) 280 281 # If the above throttlers still can't reduce the result size to be under 282 # max_result_size_KB, try to delete files with various threshold, starting 283 # at 5MB then lowering to 100KB. 284 delete_file_thresholds = [5*1024*1024, 1*1024*1024, 100*1024] 285 # Try to keep tgz files first. 286 exclude_file_patterns = ['.*\.tgz'] 287 for threshold in delete_file_thresholds: 288 new_args = copy.copy(args) 289 new_args.update({'file_size_threshold_byte': threshold, 290 'exclude_file_patterns': exclude_file_patterns}) 291 throttlers.append((delete_file_throttler, new_args)) 292 # Add one more delete_file_throttler to not skipping tgz files. 293 new_args = copy.copy(args) 294 new_args.update({'file_size_threshold_byte': delete_file_thresholds[-1]}) 295 throttlers.append((delete_file_throttler, new_args)) 296 297 # Run the throttlers in order until result size is under max_result_size_KB. 298 old_size = summary.trimmed_size 299 for throttler, args in throttlers: 300 try: 301 args_without_summary = copy.copy(args) 302 del args_without_summary['summary'] 303 utils_lib.LOG('Applying throttler %s, args: %s' % 304 (throttler.__name__, args_without_summary)) 305 throttler.throttle(**args) 306 if throttler_lib.check_throttle_limit(summary, max_result_size_KB): 307 return 308 except: 309 utils_lib.LOG('Failed to apply throttler %s. Exception: %s' % 310 (throttler, traceback.format_exc())) 311 finally: 312 new_size = summary.trimmed_size 313 if new_size == old_size: 314 utils_lib.LOG('Result size was not changed: %s.' % old_size) 315 else: 316 utils_lib.LOG('Result size was reduced from %s to %s.' % 317 (utils_lib.get_size_string(old_size), 318 utils_lib.get_size_string(new_size))) 319 320 321def _setup_logging(): 322 """Set up logging to direct logs to stdout.""" 323 # Direct logging to stdout 324 logger = logging.getLogger() 325 logger.setLevel(logging.DEBUG) 326 handler = logging.StreamHandler(sys.stdout) 327 handler.setLevel(logging.DEBUG) 328 formatter = logging.Formatter('%(asctime)s %(message)s') 329 handler.setFormatter(formatter) 330 logger.handlers = [] 331 logger.addHandler(handler) 332 333 334def _parse_options(): 335 """Options for the main script. 336 337 @return: An option object container arg values. 338 """ 339 parser = argparse.ArgumentParser() 340 parser.add_argument('-p', type=str, dest='path', 341 help='Path to build directory summary.') 342 parser.add_argument('-m', type=int, dest='max_size_KB', default=0, 343 help='Maximum result size in KB. Set to 0 to disable ' 344 'result throttling.') 345 parser.add_argument('-d', action='store_true', dest='delete_summaries', 346 default=False, 347 help='-d to delete all result summary files in the ' 348 'given path.') 349 return parser.parse_args() 350 351 352def execute(path, max_size_KB): 353 """Execute the script with given arguments. 354 355 @param path: Path to build directory summary. 356 @param max_size_KB: Maximum result size in KB. 357 """ 358 utils_lib.LOG('Running result_tools/utils on path: %s' % path) 359 if max_size_KB > 0: 360 utils_lib.LOG('Throttle result size to : %s' % 361 utils_lib.get_size_string(max_size_KB * 1024)) 362 363 result_dir = path 364 if not os.path.isdir(result_dir): 365 result_dir = os.path.dirname(result_dir) 366 summary = result_info.ResultInfo.build_from_path(path) 367 summary_json = json.dumps(summary) 368 summary_file = get_unique_dir_summary_file(result_dir) 369 370 # Make sure there is enough free disk to write the file 371 stat = os.statvfs(path) 372 free_space = stat.f_frsize * stat.f_bavail 373 if free_space - len(summary_json) < MIN_FREE_DISK_BYTES: 374 raise utils_lib.NotEnoughDiskError( 375 'Not enough disk space after saving the summary file. ' 376 'Available free disk: %s bytes. Summary file size: %s bytes.' % 377 (free_space, len(summary_json))) 378 379 with open(summary_file, 'w') as f: 380 f.write(summary_json) 381 utils_lib.LOG('Directory summary of %s is saved to file %s.' % 382 (path, summary_file)) 383 384 if max_size_KB > 0 and summary.trimmed_size > 0: 385 old_size = summary.trimmed_size 386 throttle_probability = float(max_size_KB * 1024) / old_size 387 if random.random() < throttle_probability: 388 utils_lib.LOG( 389 'Skip throttling %s: size=%s, throttle_probability=%s' % 390 (path, old_size, throttle_probability)) 391 else: 392 _throttle_results(summary, max_size_KB) 393 if summary.trimmed_size < old_size: 394 # Files are throttled, save the updated summary file. 395 utils_lib.LOG('Overwrite the summary file: %s' % summary_file) 396 result_info.save_summary(summary, summary_file) 397 398 399def _delete_summaries(path): 400 """Delete all directory summary files in the given directory. 401 402 This is to cleanup the directory so no summary files are left behind to 403 affect later tests. 404 405 @param path: Path to cleanup directory summary. 406 """ 407 # Only summary files directly under the `path` needs to be cleaned. 408 summary_files = glob.glob(os.path.join(path, SUMMARY_FILE_PATTERN)) 409 for summary in summary_files: 410 try: 411 os.remove(summary) 412 except IOError as e: 413 utils_lib.LOG('Failed to delete summary: %s. Error: %s' % 414 (summary, e)) 415 416 417def main(): 418 """main script. """ 419 _setup_logging() 420 options = _parse_options() 421 if options.delete_summaries: 422 _delete_summaries(options.path) 423 else: 424 execute(options.path, options.max_size_KB) 425 426 427if __name__ == '__main__': 428 main() 429