• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -- coding: utf-8 --
3# Copyright (c) 2023-2024 Huawei Device Co., Ltd.
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import argparse
17import stat
18import sys
19import os
20import statistics
21from typing import Tuple, List, Dict, Optional
22
23
24def fatal(msg: str) -> None:
25    """Print message and exit from the script with code 1"""
26    print(msg, file=sys.stderr)
27    sys.exit(1)
28
29
30def parse_arguments() -> argparse.Namespace:
31    """Parse script arguments"""
32    argparser = argparse.ArgumentParser(
33        description="Collect gc phase puase statistics"
34    )
35    argparser.add_argument("gc_log_file", type=str,
36                           help="File with detailed info gc logs")
37    argparser.add_argument("result_file", type=str,
38                           help="Result markdown file with statistics")
39    argparser.add_argument("--phase", nargs='+', type=str, required=True,
40                           dest="phase", help="GC phase in detailed info gc logs")
41    return argparser.parse_args()
42
43
44class PhaseStats:
45    TIMES: List[Tuple[str, float]] = [
46        ("ms", 1.0), ("us", 0.001), ("ns", 0.000001), ("s", 1000.0)]
47
48    def __init__(self):
49        self.phase_thread_times: List[float] = []
50        self.phase_cpu_times: List[float] = []
51
52    @staticmethod
53    def str_to_time(time_str: str) -> float:
54        for possible_time in PhaseStats.TIMES:
55            if time_str.endswith(possible_time[0]):
56                return float(time_str[:-len(possible_time[0])]) * possible_time[1]
57        raise ValueError("Unsupported time format: " + time_str)
58
59    @staticmethod
60    def get_times(times_str: str) -> Tuple[float, float]:
61        times = times_str.split("/")
62        if len(times) != 2:
63            raise ValueError("Incorrect time format in log: " + times_str)
64        return PhaseStats.str_to_time(times[0]), PhaseStats.str_to_time(times[1])
65
66    def add_times(self, thread_time: float, cpu_time: float) -> None:
67        self.phase_thread_times.append(thread_time)
68        self.phase_cpu_times.append(cpu_time)
69
70    def get_count(self) -> int:
71        return len(self.phase_thread_times)
72
73    def get_thread_min(self) -> float:
74        return min(self.phase_thread_times, default=0.0)
75
76    def get_cpu_min(self) -> float:
77        return min(self.phase_cpu_times, default=0.0)
78
79    def get_thread_max(self) -> float:
80        return max(self.phase_thread_times, default=0.0)
81
82    def get_cpu_max(self) -> float:
83        return max(self.phase_cpu_times, default=0.0)
84
85    def get_thread_avg(self) -> float:
86        if len(self.phase_thread_times) == 0:
87            return 0.0
88        return statistics.mean(self.phase_thread_times)
89
90    def get_cpu_avg(self) -> float:
91        if len(self.phase_cpu_times) == 0:
92            return 0.0
93        return statistics.mean(self.phase_cpu_times)
94
95    def get_thread_median(self) -> float:
96        if len(self.phase_thread_times) == 0:
97            return 0.0
98        return statistics.median(self.phase_thread_times)
99
100    def get_cpu_median(self) -> float:
101        if len(self.phase_cpu_times) == 0:
102            return 0.0
103        return statistics.median(self.phase_cpu_times)
104
105
106def change_gc_mode(log_line: str) -> Optional[str]:
107    collect_types = ["YOUNG", "MIXED", "TENURED", "FULL"]
108    for collect_type in collect_types:
109        if log_line.find(f" [{collect_type} ") != -1:
110            return f"({collect_type})"
111    return None
112
113
114def process_log(phases_list: List[str], gc_log_file: str, result_md_file: str) -> None:
115    phases: Dict[str, PhaseStats] = dict()
116    mode: str = ""
117    with open(gc_log_file, 'r') as log_file:
118        for log_line in log_file.readlines():
119            new_mode = change_gc_mode(log_line)
120            if new_mode is not None:
121                mode = new_mode
122            for phase in phases_list:
123                start_phase = log_line.find(" " + phase + " ")
124                if start_phase == -1:
125                    continue
126                thread_time, cpu_time = PhaseStats.get_times(
127                    log_line[start_phase + len(phase) + 1:].strip())
128                phases.setdefault(phase, PhaseStats()).add_times(
129                    thread_time, cpu_time)
130                phases.setdefault(
131                    phase + f" {mode}", PhaseStats()).add_times(thread_time, cpu_time)
132                break
133
134    with os.fdopen(os.open(result_md_file, os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'a') as result_file:
135        title_str = "| stats |"
136        delim_table_str = "|----|"
137        count_str = "| count |"
138        avg_str = "| avg |"
139        median_str = "| median |"
140        max_str = "| max |"
141        min_str = "| min |"
142        for phase in sorted(phases.keys()):
143            title_str += f" {phase} |"
144            delim_table_str = f"{delim_table_str}:----:|"
145            count_str += f" {phases.get(phase).get_count()} / {phases.get(phase).get_count()} |"
146            avg_str += f" {phases.get(phase).get_thread_avg()} / {phases.get(phase).get_cpu_avg()} |"
147            median_str += f" {phases.get(phase).get_thread_median()} / {phases.get(phase).get_cpu_median()} |"
148            max_str += f" {phases.get(phase).get_thread_max()} / {phases.get(phase).get_cpu_max()} |"
149            min_str += f" {phases.get(phase).get_thread_min()} / {phases.get(phase).get_cpu_min()} |"
150        result_file.write(f"<details><summary>{gc_log_file}</summary>\n\n")
151        result_file.write(title_str + '\n')
152        result_file.write(delim_table_str + '\n')
153        result_file.write(count_str + '\n')
154        result_file.write(avg_str + '\n')
155        result_file.write(median_str + '\n')
156        result_file.write(max_str + '\n')
157        result_file.write(min_str + '\n')
158        result_file.write("\n</details>\n")
159
160
161def main() -> None:
162    """Main function for gc phase time stats script"""
163    args = parse_arguments()
164    if not os.path.isfile(args.gc_log_file):
165        fatal(args.gc_log_file + ": file with gc logs does not exist")
166    with os.fdopen(os.open(args.result_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o664), 'w') as result_file:
167        result_file.write("\n_Generated by phase pause stats script_\n\n")
168    process_log(args.phase, args.gc_log_file, args.result_file)
169
170
171if __name__ == "__main__":
172    main()
173