• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -- coding: utf-8 --
3# Copyright (c) 2021-2022 Huawei Device Co., Ltd.
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16import sys
17import os
18import itertools
19from typing import NamedTuple
20
21
22class GCPauseStats(NamedTuple):
23    """Data class with constants for gc stats"""
24    GC_TYPES = ["YOUNG", "MIXED", "TENURED", "FULL"]
25    PAUSE_DETECT_STR = ", paused "
26    TOTAL_DETECT_STR = " total "
27    LIST_OF_STATS = ["count", "min", "max", "avg", "sum"]
28
29
30def sort_one_gc_stat(stats: dict, gc_type: str) -> list:
31    """Sort one type of gc stats for pretty table"""
32    stats_list = list()
33    for trig_type in stats:
34        if trig_type.find(gc_type) != -1:
35            stats_list.append(trig_type)
36    return sorted(stats_list, key=lambda x: stats.get(x)["count"], reverse=True)
37
38
39def sort_gc_stats(stats: dict) -> list:
40    """Sort gc stats for pretty table"""
41    stats_info = list()
42    for gc_type in GCPauseStats.GC_TYPES:
43        if gc_type in stats:
44            stats_info.append(sort_one_gc_stat(stats, gc_type))
45    stats_info.sort(key=lambda x: stats.get(x[0])["count"], reverse=True)
46    return list(itertools.chain(*stats_info))
47
48
49def save_pause_stats(gc_log_path: str, file_name: str, stats: dict) -> None:
50    """Save md table in the file"""
51    with open(file_name, 'a') as file:
52        file.write(f"GC logs: {gc_log_path}\n\n")
53        file.write("| Parameter |")
54        gc_stats_list = ["Total"] + sort_gc_stats(stats)
55        for gc_type in gc_stats_list:
56            file.write(f" {gc_type} |")
57        file.write("\n|:----|")
58        for _ in range(len(stats)):
59            file.write(":---:|")
60        for stat_type in GCPauseStats.LIST_OF_STATS:
61            file.write(f"\n| {stat_type} |")
62            for trigger_stat in gc_stats_list:
63                file.write(f" {stats.get(trigger_stat).get(stat_type)} |")
64        file.write("\n\n")
65
66
67def get_ms_time(line: str) -> float:
68    """Return time in ms"""
69    times = [("ms", 1.0), ("us", 0.001), ("s", 1000.0)]
70    i = line.find(GCPauseStats.PAUSE_DETECT_STR)
71    j = line.find(GCPauseStats.TOTAL_DETECT_STR, i)
72    time_str = line[i + len(GCPauseStats.PAUSE_DETECT_STR):j]
73    for time_end in times:
74        if time_str.endswith(time_end[0]):
75            return float(time_str[:-len(time_end[0])]) * time_end[1]
76    raise ValueError("Could not detect time format")
77
78
79def get_full_type(line: str, cause_start: int, cause_len: int) -> str:
80    """Get gc type with cause"""
81    cause_end = cause_start + cause_len
82    while line[cause_start] != '[':
83        cause_start -= 1
84    while line[cause_end] != ']':
85        cause_end += 1
86    return line[cause_start + 1: cause_end]
87
88
89def get_gc_type(line: str) -> (str, str):
90    """Get gc type type and gc type with cause"""
91    for cause in GCPauseStats.GC_TYPES:
92        i = line.find(cause)
93        if i != -1:
94            return cause, get_full_type(line, i, len(cause))
95    raise ValueError("Unsupported gc cause")
96
97
98def update_stats(stats: dict, gc_type: str, time_value: float):
99    """Update info about the gc type"""
100    trigger_info = stats.setdefault(gc_type, {
101        "max": 0.0,
102        "min": 0.0,
103        "avg": 0.0,
104        "sum": 0.0,
105        "count": 0
106    })
107    count_v = trigger_info.get("count") + 1
108    sum_v = trigger_info.get("sum") + time_value
109    avg_v = sum_v / count_v
110    if count_v == 1:
111        min_v = time_value
112    else:
113        min_v = min(trigger_info.get("min"), time_value)
114    max_v = max(trigger_info.get("max"), time_value)
115    trigger_info.update({
116        "max": max_v,
117        "min": min_v,
118        "avg": avg_v,
119        "sum": sum_v,
120        "count": count_v
121    })
122    stats.update({gc_type: trigger_info})
123
124
125def detect_str(line: str) -> (int, int):
126    """Detect gc info string from log lines"""
127    # Find for mobile and host logs
128    for detect_string in [" I Ark gc  : ", " I/gc: "]:
129        i = line.find(detect_string)
130        if i != -1:
131            return (i, len(detect_string))
132    return (-1, 0)
133
134
135def update_group_stats(gc_pause_stats: dict, gc_type: str, full_gc_type: str, time_v: float):
136    """Update group (Total, full on short gc type) of stats"""
137    update_stats(gc_pause_stats, "Total", time_v)
138    update_stats(gc_pause_stats, gc_type, time_v)
139    update_stats(gc_pause_stats, full_gc_type, time_v)
140
141
142def process_one_log(gc_log_path: str, result_file_path: str, all_stats: dict) -> None:
143    """Process one log file"""
144    gc_pause_stats = {"Total": {
145        "max": 0.0,
146        "min": 0.0,
147        "avg": 0.0,
148        "sum": 0.0,
149        "count": 0
150    }
151    }
152    with open(gc_log_path, 'r') as log_file:
153        for f_line in log_file.readlines():
154            ii = detect_str(f_line)
155            if ii[0] != -1 and f_line.find(GCPauseStats.PAUSE_DETECT_STR) != -1:
156                gc_info_str = f_line[ii[0] + ii[1]:]
157                time_v = get_ms_time(gc_info_str)
158                cause_s, full_cause_s = get_gc_type(gc_info_str)
159                update_group_stats(gc_pause_stats, cause_s,
160                                   full_cause_s, time_v)
161                update_group_stats(all_stats, cause_s,
162                                   full_cause_s, time_v)
163    save_pause_stats(gc_log_path, result_file_path, gc_pause_stats)
164
165
166def main() -> None:
167    """Script's entrypoint"""
168    if len(sys.argv) < 3:
169        print("Incorrect parameters count", file=sys.stderr)
170        print("Usage: ", file=sys.stderr)
171        print(
172            f"  python3 {sys.argv[0]} <gc_log_1...> <results_path>", file=sys.stderr)
173        print(f"    gc_log_num   -- Path to gc logs or application logs with gc logs", file=sys.stderr)
174        print(
175            f"    results_path -- Path to result file with pause stats", file=sys.stderr)
176        print(
177            f"Example: python3 {sys.argv[0]} gc_log.txt result.md", file=sys.stderr)
178        exit(2)
179    gc_log_paths = list()
180    all_gc_stats = {"Total": {
181        "max": 0.0,
182        "min": 0.0,
183        "avg": 0.0,
184        "sum": 0.0,
185        "count": 0
186    }
187    }
188    result_file_path = os.path.abspath(sys.argv[-1])
189
190    with open(result_file_path, 'w') as result_file:
191        result_file.write("_Generated by gc pause stats script_\n\n")
192        result_file.write("All times in ms\n\n")
193
194    for log_path in list(map(os.path.abspath, sys.argv[1:-1])):
195        if os.path.isfile(log_path):
196            gc_log_paths.append(log_path)
197        else:
198            print(f"{log_path}: No such log file", file=sys.stderr)
199
200    for log_path in gc_log_paths:
201        process_one_log(log_path, result_file_path, all_gc_stats)
202    if len(gc_log_paths) > 1:
203        save_pause_stats(
204            f"All {len(gc_log_paths)} logs", result_file_path, all_gc_stats)
205
206
207if __name__ == "__main__":
208    main()
209