1# Copyright 2017 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""This throttler tries to remove the remove repeated files sharing the same 6prefix, for example, screenshots or dumps in the same folder. The dedupe logic 7does not compare the file content, instead, it sorts the files with the same 8prefix and remove files in the middle part. 9""" 10 11import os 12import re 13 14import result_info_lib 15import throttler_lib 16import utils_lib 17 18 19# Number of files to keep for the oldest files. 20OLDEST_FILES_TO_KEEP_COUNT = 2 21# Number of files to keep for the newest files. 22NEWEST_FILES_TO_KEEP_COUNT = 1 23 24# Files with path mathing following patterns should not be deduped. 25NO_DEDUPE_FILE_PATTERNS = [ 26 'debug/.*', 27 '.*perf.data$', # Performance test data. 28 '.*/debug/.*', 29 '.*dir_summary_\d+.json', 30 ] 31 32# regex pattern to get the prefix of a file. 33PREFIX_PATTERN = '([a-zA-Z_-]*).*' 34 35def _group_by(file_infos, keys): 36 """Group the file infos by the given keys. 37 38 @param file_infos: A list of ResultInfo objects. 39 @param keys: A list of names of the attribute to group the file infos by. 40 @return: A dictionary of grouped_key: [ResultInfo]. 41 """ 42 grouped_infos = {} 43 for info in file_infos: 44 key_values = [] 45 for key in keys: 46 key_values.append(getattr(info, key)) 47 grouped_key = os.sep.join(key_values) 48 if grouped_key not in grouped_infos: 49 grouped_infos[grouped_key] = [] 50 grouped_infos[grouped_key].append(info) 51 return grouped_infos 52 53 54def _dedupe_files(summary, file_infos, max_result_size_KB): 55 """Delete the given file and update the summary. 56 57 @param summary: A ResultInfo object containing result summary. 58 @param file_infos: A list of ResultInfo objects to be de-duplicated. 59 @param max_result_size_KB: Maximum test result size in KB. 60 """ 61 # Sort file infos based on the modify date of the file. 62 file_infos.sort( 63 key=lambda f: result_info_lib.get_last_modification_time(f.path)) 64 file_infos_to_delete = file_infos[ 65 OLDEST_FILES_TO_KEEP_COUNT:-NEWEST_FILES_TO_KEEP_COUNT] 66 67 for file_info in file_infos_to_delete: 68 if throttler_lib.try_delete_file_on_disk(file_info.path): 69 file_info.trimmed_size = 0 70 71 if throttler_lib.check_throttle_limit(summary, max_result_size_KB): 72 return 73 74 75def throttle(summary, max_result_size_KB): 76 """Throttle the files in summary by de-duplicating files. 77 78 Stop throttling until all files are processed or the result size is already 79 reduced to be under the given max_result_size_KB. 80 81 @param summary: A ResultInfo object containing result summary. 82 @param max_result_size_KB: Maximum test result size in KB. 83 """ 84 _, grouped_files = throttler_lib.sort_result_files(summary) 85 for pattern in throttler_lib.RESULT_THROTTLE_PRIORITY: 86 throttable_files = list(throttler_lib.get_throttleable_files( 87 grouped_files[pattern], NO_DEDUPE_FILE_PATTERNS)) 88 89 for info in throttable_files: 90 info.parent_dir = os.path.dirname(info.path) 91 info.prefix = re.match(PREFIX_PATTERN, info.name).group(1) 92 93 # Group files for each parent directory 94 grouped_infos = _group_by(throttable_files, ['parent_dir', 'prefix']) 95 96 for infos in grouped_infos.values(): 97 if (len(infos) <= 98 OLDEST_FILES_TO_KEEP_COUNT + NEWEST_FILES_TO_KEEP_COUNT): 99 # No need to dedupe if the count of file is too few. 100 continue 101 102 # Remove files can be deduped 103 utils_lib.LOG('De-duplicating files in %s with the same prefix of ' 104 '"%s"' % (infos[0].parent_dir, infos[0].prefix)) 105 #dedupe_file_infos = [i.result_info for i in infos] 106 _dedupe_files(summary, infos, max_result_size_KB) 107 108 if throttler_lib.check_throttle_limit(summary, max_result_size_KB): 109 return 110