1#!/usr/bin/env python3 2# -- coding: utf-8 -- 3# Copyright (c) 2023-2024 Huawei Device Co., Ltd. 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16import argparse 17import stat 18import sys 19import os 20import statistics 21from typing import Tuple, List, Dict, Optional 22 23 24def fatal(msg: str) -> None: 25 """Print message and exit from the script with code 1""" 26 print(msg, file=sys.stderr) 27 sys.exit(1) 28 29 30def parse_arguments() -> argparse.Namespace: 31 """Parse script arguments""" 32 argparser = argparse.ArgumentParser( 33 description="Collect gc phase puase statistics" 34 ) 35 argparser.add_argument("gc_log_file", type=str, 36 help="File with detailed info gc logs") 37 argparser.add_argument("result_file", type=str, 38 help="Result markdown file with statistics") 39 argparser.add_argument("--phase", nargs='+', type=str, required=True, 40 dest="phase", help="GC phase in detailed info gc logs") 41 return argparser.parse_args() 42 43 44class PhaseStats: 45 TIMES: List[Tuple[str, float]] = [ 46 ("ms", 1.0), ("us", 0.001), ("ns", 0.000001), ("s", 1000.0)] 47 48 def __init__(self): 49 self.phase_thread_times: List[float] = [] 50 self.phase_cpu_times: List[float] = [] 51 52 @staticmethod 53 def str_to_time(time_str: str) -> float: 54 for possible_time in PhaseStats.TIMES: 55 if time_str.endswith(possible_time[0]): 56 return float(time_str[:-len(possible_time[0])]) * possible_time[1] 57 raise ValueError("Unsupported time format: " + time_str) 58 59 @staticmethod 60 def get_times(times_str: str) -> Tuple[float, float]: 61 times = times_str.split("/") 62 if len(times) != 2: 63 raise ValueError("Incorrect time format in log: " + times_str) 64 return PhaseStats.str_to_time(times[0]), PhaseStats.str_to_time(times[1]) 65 66 def add_times(self, thread_time: float, cpu_time: float) -> None: 67 self.phase_thread_times.append(thread_time) 68 self.phase_cpu_times.append(cpu_time) 69 70 def get_count(self) -> int: 71 return len(self.phase_thread_times) 72 73 def get_thread_min(self) -> float: 74 return min(self.phase_thread_times, default=0.0) 75 76 def get_cpu_min(self) -> float: 77 return min(self.phase_cpu_times, default=0.0) 78 79 def get_thread_max(self) -> float: 80 return max(self.phase_thread_times, default=0.0) 81 82 def get_cpu_max(self) -> float: 83 return max(self.phase_cpu_times, default=0.0) 84 85 def get_thread_avg(self) -> float: 86 if len(self.phase_thread_times) == 0: 87 return 0.0 88 return statistics.mean(self.phase_thread_times) 89 90 def get_cpu_avg(self) -> float: 91 if len(self.phase_cpu_times) == 0: 92 return 0.0 93 return statistics.mean(self.phase_cpu_times) 94 95 def get_thread_median(self) -> float: 96 if len(self.phase_thread_times) == 0: 97 return 0.0 98 return statistics.median(self.phase_thread_times) 99 100 def get_cpu_median(self) -> float: 101 if len(self.phase_cpu_times) == 0: 102 return 0.0 103 return statistics.median(self.phase_cpu_times) 104 105 106def change_gc_mode(log_line: str) -> Optional[str]: 107 collect_types = ["YOUNG", "MIXED", "TENURED", "FULL"] 108 for collect_type in collect_types: 109 if log_line.find(f" [{collect_type} ") != -1: 110 return f"({collect_type})" 111 return None 112 113 114def process_log(phases_list: List[str], gc_log_file: str, result_md_file: str) -> None: 115 phases: Dict[str, PhaseStats] = dict() 116 mode: str = "" 117 with open(gc_log_file, 'r') as log_file: 118 for log_line in log_file.readlines(): 119 new_mode = change_gc_mode(log_line) 120 if new_mode is not None: 121 mode = new_mode 122 for phase in phases_list: 123 start_phase = log_line.find(" " + phase + " ") 124 if start_phase == -1: 125 continue 126 thread_time, cpu_time = PhaseStats.get_times( 127 log_line[start_phase + len(phase) + 1:].strip()) 128 phases.setdefault(phase, PhaseStats()).add_times( 129 thread_time, cpu_time) 130 phases.setdefault( 131 phase + f" {mode}", PhaseStats()).add_times(thread_time, cpu_time) 132 break 133 134 with os.fdopen(os.open(result_md_file, os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'a') as result_file: 135 title_str = "| stats |" 136 delim_table_str = "|----|" 137 count_str = "| count |" 138 avg_str = "| avg |" 139 median_str = "| median |" 140 max_str = "| max |" 141 min_str = "| min |" 142 for phase in sorted(phases.keys()): 143 title_str += f" {phase} |" 144 delim_table_str = f"{delim_table_str}:----:|" 145 count_str += f" {phases.get(phase).get_count()} / {phases.get(phase).get_count()} |" 146 avg_str += f" {phases.get(phase).get_thread_avg()} / {phases.get(phase).get_cpu_avg()} |" 147 median_str += f" {phases.get(phase).get_thread_median()} / {phases.get(phase).get_cpu_median()} |" 148 max_str += f" {phases.get(phase).get_thread_max()} / {phases.get(phase).get_cpu_max()} |" 149 min_str += f" {phases.get(phase).get_thread_min()} / {phases.get(phase).get_cpu_min()} |" 150 result_file.write(f"<details><summary>{gc_log_file}</summary>\n\n") 151 result_file.write(title_str + '\n') 152 result_file.write(delim_table_str + '\n') 153 result_file.write(count_str + '\n') 154 result_file.write(avg_str + '\n') 155 result_file.write(median_str + '\n') 156 result_file.write(max_str + '\n') 157 result_file.write(min_str + '\n') 158 result_file.write("\n</details>\n") 159 160 161def main() -> None: 162 """Main function for gc phase time stats script""" 163 args = parse_arguments() 164 if not os.path.isfile(args.gc_log_file): 165 fatal(args.gc_log_file + ": file with gc logs does not exist") 166 with os.fdopen(os.open(args.result_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o664), 'w') as result_file: 167 result_file.write("\n_Generated by phase pause stats script_\n\n") 168 process_log(args.phase, args.gc_log_file, args.result_file) 169 170 171if __name__ == "__main__": 172 main() 173