1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# 4# Copyright (c) 2024 Huawei Device Co., Ltd. 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18import re 19import copy 20import optparse 21import struct 22import os 23import stat 24 25import parse_functions 26 27TRACE_REGEX_ASYNC = "\s*(\d+)\s+(.*?)\|\d+\|[SFC]\s+:(.*?)\s+:(.*?)\s+(.*?)\s+\]\d+\[\s+\)(\d+)\s*\(\s+(\d+?)-(.*?)\s+" 28TRACE_REGEX_SYNC = "\s*\|\d+\|E\s+:(.*?)\s+:(.*?)\s+(.*?)\s+\]\d+\[\s+\)(\d+)\s*\(\s+(\d+?)-(.*?)\s+" 29text_file = "" 30binary_file = "" 31out_file = "" 32 33CONTENT_TYPE_DEFAULT = 0 34CONTENT_TYPE_EVENTS_FORMAT = 1 35CONTENT_TYPE_CMDLINES = 2 36CONTENT_TYPE_TGIDS = 3 37CONTENT_TYPE_CPU_RAW = 4 38CONTENT_TYPE_HEADER_PAGE = 30 39CONTENT_TYPE_PRINTK_FORMATS = 31 40CONTENT_TYPE_KALLSYMS = 32 41 42INT8_DATA_READ_LEN = 1 43INT16_DATA_READ_LEN = 2 44INT32_DATA_READ_LEN = 4 45INT64_DATA_READ_LEN = 8 46 47READ_PAGE_SIZE = 4096 48 49RB_MISSED_FLAGS = (0x1 << 31) | (1 << 30) 50 51BUFFER_TYPE_PADDING = 29 52BUFFER_TYPE_TIME_EXTEND = 30 53BUFFER_TYPE_TIME_STAMP = 31 54 55events_format = {} 56cmd_lines = {} 57tgids = {} 58get_not_found_format = set() 59 60TRACE_TXT_HEADER_FORMAT = """# tracer: nop 61# 62# entries-in-buffer/entries-written: %lu/%lu #P:%d 63# 64# _-----=> irqs-off 65# / _----=> need-resched 66# | / _---=> hardirq/softirq 67# || / _--=> preempt-depth 68# ||| / delay 69# TASK-PID TGID CPU# |||| TIMESTAMP FUNCTION 70# | | | | |||| | | 71""" 72 73 74def parse_options(): 75 global text_file 76 global binary_file 77 global out_file 78 79 usage = "Usage: %prog -t text_file -o out_file or\n%prog -b binary_file -o out_file" 80 desc = "Example: %prog -t my_trace_file.htrace -o my_trace_file.systrace" 81 82 parser = optparse.OptionParser(usage=usage, description=desc) 83 parser.add_option('-t', '--text_file', dest='text_file', 84 help='Name of the text file to be parsed.', metavar='FILE') 85 parser.add_option('-b', '--binary_file', dest='binary_file', 86 help='Name of the binary file to be parsed.', metavar='FILE') 87 parser.add_option('-o', '--out_file', dest='out_file', 88 help='File name after successful parsing.', metavar='FILE') 89 90 options, args = parser.parse_args() 91 92 if options.out_file is not None: 93 out_file = options.out_file 94 else: 95 print("Error: out_file must be specified") 96 exit(-1) 97 if options.text_file is not None: 98 text_file = options.text_file 99 if options.binary_file is not None: 100 binary_file = options.binary_file 101 102 if text_file == '' and binary_file == '': 103 print("Error: You must specify a text or binary file") 104 exit(-1) 105 if text_file != '' and binary_file != '': 106 print("Error: Only one parsed file can be specified") 107 exit(-1) 108 109 110def parse_text_trace_file(): 111 print("start processing text trace file") 112 pattern_async = re.compile(TRACE_REGEX_ASYNC) 113 pattern_sync = re.compile(TRACE_REGEX_SYNC) 114 match_num = 0 115 116 infile_flags = os.O_RDONLY 117 infile_mode = stat.S_IRUSR 118 infile = os.fdopen(os.open(text_file, infile_flags, infile_mode), "r", encoding="utf-8") 119 outfile_flags = os.O_RDWR | os.O_CREAT 120 outfile_mode = stat.S_IRUSR | stat.S_IWUSR 121 outfile = os.fdopen(os.open(out_file, outfile_flags, outfile_mode), "w+", encoding="utf-8") 122 123 for line in infile: 124 reverse_line = line[::-1] 125 trace_match_async = pattern_async.match(reverse_line) 126 trace_match_sync = pattern_sync.match(reverse_line) 127 if trace_match_async: 128 line = line.rstrip(' ') 129 pos = line.rfind(' ') 130 line = "%s%s%s" % (line[:pos], '|', line[pos + 1:]) 131 match_num += 1 132 elif trace_match_sync: 133 line = "%s\n" % (line.rstrip()[:-1]) 134 match_num += 1 135 outfile.write(line) 136 infile.close() 137 outfile.close() 138 print("total matched and modified lines: ", match_num) 139 140 141cpu_raw_read_pos = 0 142TRACE_HEADER_SIZE = 12 143 144 145def parse_trace_header(infile): 146 trace_header = {} 147 trace_header_data = infile.read(TRACE_HEADER_SIZE) 148 trace_header_data_len = len(trace_header_data) 149 if trace_header_data_len == 12: 150 trace_header_tuple = struct.unpack('HBHL', trace_header_data) 151 trace_header["magic_number"] = trace_header_tuple[0] 152 trace_header["file_type"] = trace_header_tuple[1] 153 trace_header["version_number"] = trace_header_tuple[2] 154 trace_header["reserved"] = trace_header_tuple[3] 155 return trace_header 156 157 158def parse_page_header(data): 159 global cpu_raw_read_pos 160 page_header = {} 161 162 data_str = data[cpu_raw_read_pos:cpu_raw_read_pos + INT64_DATA_READ_LEN * 2 + INT8_DATA_READ_LEN] 163 data_str_len = len(data_str) 164 if data_str_len == 17: 165 struct_page_header = struct.unpack('QQB', data_str) 166 cpu_raw_read_pos += INT64_DATA_READ_LEN * 2 + INT8_DATA_READ_LEN 167 page_header["time_stamp"] = struct_page_header[0] 168 169 page_header["length"] = struct_page_header[1] 170 page_header["core_id"] = struct_page_header[2] 171 172 return page_header 173 174 175def parse_event_header(data): 176 global cpu_raw_read_pos 177 event_header = {} 178 179 data_str = data[cpu_raw_read_pos:cpu_raw_read_pos + INT32_DATA_READ_LEN + INT16_DATA_READ_LEN] 180 data_str_len = len(data_str) 181 if data_str_len == 6: 182 struct_event_header = struct.unpack('LH', data_str) 183 event_header["time_stamp_offset"] = struct_event_header[0] 184 event_header["size"] = struct_event_header[1] 185 186 cpu_raw_read_pos += INT32_DATA_READ_LEN + INT16_DATA_READ_LEN 187 188 return event_header 189 190 191TRACE_FLAG_IRQS_OFF = 0x01 192TRACE_FLAG_IRQS_NOSUPPORT = 0x02 193TRACE_FLAG_NEED_RESCHED = 0x04 194TRACE_FLAG_HARDIRQ = 0x08 195TRACE_FLAG_SOFTIRQ = 0x10 196TRACE_FLAG_PREEMPT_RESCHED = 0x20 197TRACE_FLAG_NMI = 0x40 198 199 200def trace_flags_to_str(flags, preempt_count): 201 result = "" 202 irqs_off = '.' 203 if flags & TRACE_FLAG_IRQS_OFF != 0: 204 irqs_off = 'd' 205 elif flags & TRACE_FLAG_IRQS_NOSUPPORT != 0: 206 irqs_off = 'X' 207 result += irqs_off 208 209 need_resched = '.' 210 is_need_resched = flags & TRACE_FLAG_NEED_RESCHED 211 is_preempt_resched = flags & TRACE_FLAG_PREEMPT_RESCHED 212 if is_need_resched != 0 and is_preempt_resched != 0: 213 need_resched = 'N' 214 elif is_need_resched != 0: 215 need_resched = 'n' 216 elif is_preempt_resched != 0: 217 need_resched = 'p' 218 result += need_resched 219 220 nmi_flag = flags & TRACE_FLAG_NMI 221 hard_irq = flags & TRACE_FLAG_HARDIRQ 222 soft_irq = flags & TRACE_FLAG_SOFTIRQ 223 irq_char = '.' 224 if nmi_flag != 0 and hard_irq != 0: 225 irq_char = 'Z' 226 elif nmi_flag != 0: 227 irq_char = 'z' 228 elif hard_irq != 0 and soft_irq != 0: 229 irq_char = 'H' 230 elif hard_irq != 0: 231 irq_char = 'h' 232 elif soft_irq != 0: 233 irq_char = 's' 234 result += irq_char 235 236 if preempt_count != 0: 237 result += "0123456789abcdef"[preempt_count & 0x0F] 238 else: 239 result += "." 240 241 return result 242 243 244COMM_STR_MAX = 16 245PID_STR_MAX = 6 246TGID_STR_MAX = 5 247CPU_STR_MAX = 3 248TS_SECS_MIN = 5 249TS_MICRO_SECS = 6 250 251 252def generate_one_event_str(data, cpu_id, time_stamp, one_event): 253 pid = int.from_bytes(one_event["fields"]["common_pid"], byteorder='little') 254 event_str = "" 255 256 cmd_line = cmd_lines.get(pid, "") 257 if pid == 0: 258 event_str += "<idle>" 259 elif cmd_line != "": 260 event_str += cmd_line 261 else: 262 event_str += "<...>" 263 event_str = event_str.rjust(COMM_STR_MAX) 264 event_str += "-" 265 266 event_str += str(pid).ljust(PID_STR_MAX) 267 268 tgid = tgids.get(pid, "") 269 if tgid != "": 270 event_str += "(" + tgid.rjust(TGID_STR_MAX) + ")" 271 else: 272 event_str += "(-----)" 273 274 event_str += " [" + str(cpu_id).zfill(CPU_STR_MAX) + "] " 275 276 flags = int.from_bytes(one_event["fields"]["common_flags"], byteorder='little') 277 preempt_count = int.from_bytes(one_event["fields"]["common_preempt_count"], byteorder='little') 278 if flags | preempt_count != 0: 279 event_str += trace_flags_to_str(flags, preempt_count) + " " 280 else: 281 event_str += ".... " 282 283 if time_stamp % 1000 >= 500: 284 time_stamp_str = str((time_stamp // 1000) + 1) 285 else: 286 time_stamp_str = str(time_stamp // 1000) 287 ts_secs = time_stamp_str[:-6].rjust(TS_SECS_MIN) 288 ts_micro_secs = time_stamp_str[-6:] 289 event_str += ts_secs + "." + ts_micro_secs + ": " 290 291 parse_result = parse_functions.parse(one_event["print_fmt"], data, one_event) 292 if parse_result is None: 293 get_not_found_format.add(str(one_event["name"])) 294 else: 295 event_str += str(one_event["name"]) + ": " + parse_result 296 297 return event_str 298 299 300format_miss_cnt = 0 301format_miss_set = set() 302trace_event_count_dict = {} # trace event count dict 303trace_event_mem_dict = {} # trace event mem dict 304 305 306def parse_one_event(data, event_id, cpu_id, time_stamp): 307 global format_miss_cnt 308 global format_miss_set 309 global trace_event_count_dict 310 if event_id in trace_event_count_dict: 311 trace_event_count_dict[event_id] += 1 312 else: 313 trace_event_count_dict[event_id] = 1 314 315 event_format = events_format.get(event_id, "") 316 if event_format == "": 317 # current event format is not found in trace file format data. 318 format_miss_cnt += 1 319 format_miss_set.add(event_id) 320 return "" 321 322 fields = event_format["fields"] 323 one_event = {} 324 one_event["id"] = event_id 325 one_event["name"] = event_format["name"] 326 one_event["print_fmt"] = event_format["print_fmt"] 327 one_event["fields"] = {} 328 for field in fields: 329 offset = field["offset"] 330 size = field["size"] 331 one_event["fields"][field["name"]] = data[offset:offset + size] 332 333 return generate_one_event_str(data, cpu_id, time_stamp, one_event) 334 335 336RMQ_ENTRY_ALIGN_MASK = 3 337 338 339def parse_cpu_raw_one_page(data, result): 340 global cpu_raw_read_pos 341 global trace_event_mem_dict 342 end_pos = cpu_raw_read_pos + READ_PAGE_SIZE 343 page_header = parse_page_header(data) 344 345 while cpu_raw_read_pos < end_pos: 346 event_header = parse_event_header(data) 347 if event_header.get("size", 0) == 0: 348 break 349 350 time_stamp = page_header.get("time_stamp", 0) + event_header.get("time_stamp_offset", 0) 351 event_id = struct.unpack('H', data[cpu_raw_read_pos:cpu_raw_read_pos + INT16_DATA_READ_LEN])[0] 352 353 if event_id in trace_event_mem_dict: 354 trace_event_mem_dict[event_id] += event_header.get("size", 0) 355 else: 356 trace_event_mem_dict[event_id] = event_header.get("size", 0) 357 358 one_event_data = data[cpu_raw_read_pos:cpu_raw_read_pos + event_header.get("size", 0)] 359 one_event_result = parse_one_event(one_event_data, event_id, page_header.get("core_id", 0), time_stamp) 360 if one_event_result != "": 361 result.append([time_stamp, one_event_result]) 362 363 evt_size = ((event_header.get("size", 0) + RMQ_ENTRY_ALIGN_MASK) & (~RMQ_ENTRY_ALIGN_MASK)) 364 cpu_raw_read_pos += evt_size 365 cpu_raw_read_pos = end_pos 366 367 368def parse_cpu_raw(data, data_len, result): 369 global cpu_raw_read_pos 370 cpu_raw_read_pos = 0 371 372 while cpu_raw_read_pos < data_len: 373 parse_cpu_raw_one_page(data, result) 374 375 376def parse_events_format_field(field_line): 377 field_info = field_line.split(";") 378 field_info[0] = field_info[0].lstrip() 379 field_info[1] = field_info[1].lstrip() 380 field_info[2] = field_info[2].lstrip() 381 field_info[3] = field_info[3].lstrip() 382 383 field = {} 384 type_name_pos = field_info[0].rfind(" ") 385 field["type"] = field_info[0][len("field:"):type_name_pos] 386 field["name"] = field_info[0][type_name_pos + 1:] 387 field["offset"] = int(field_info[1][len("offset:"):]) 388 field["size"] = int(field_info[2][len("size:"):]) 389 field["signed"] = field_info[3][len("signed:"):] 390 391 return field 392 393 394def parse_events_format(data): 395 name_line_prefix = "name: " 396 id_line_prefix = "ID: " 397 field_line_prefix = "field:" 398 print_fmt_line_prefix = "print fmt: " 399 400 events_format_lines = data.decode('utf-8').split("\n") 401 event_format = {} 402 event_format["fields"] = [] 403 for line in events_format_lines: 404 line = line.lstrip() 405 if line.startswith(name_line_prefix): 406 event_format["name"] = line[len(name_line_prefix):] 407 elif line.startswith(id_line_prefix): 408 event_format["id"] = int(line[len(id_line_prefix):]) 409 elif line.startswith(field_line_prefix): 410 event_format["fields"].append(parse_events_format_field(line)) 411 elif line.startswith(print_fmt_line_prefix): 412 event_format["print_fmt"] = line[len(print_fmt_line_prefix):] 413 events_format[event_format["id"]] = copy.deepcopy(event_format) 414 event_format["fields"].clear() 415 416 417def parse_cmdlines(data): 418 cmd_lines_list = data.decode('utf-8').split("\n") 419 for cmd_line in cmd_lines_list: 420 pos = cmd_line.find(" ") 421 if pos != -1: 422 cmd_lines[int(cmd_line[:pos])] = cmd_line[pos + 1:] 423 424 425def parse_tgids(data): 426 tgids_lines_list = data.decode('utf-8').split("\n") 427 for tgids_line in tgids_lines_list: 428 pos = tgids_line.find(" ") 429 if pos != -1: 430 tgids[int(tgids_line[:pos])] = tgids_line[pos + 1:] 431 432 433def parse_header_page(data): 434 print("in parse_header_page") 435 436 437def parse_printk_formats(data): 438 print("in parse_printk_formats") 439 440 441def parse_kallsyms(data): 442 print("in parse_kallsyms") 443 444 445def parse_trace_base_data(infile, file_size): 446 while infile.tell() < file_size: 447 data_type = struct.unpack('L', infile.read(INT32_DATA_READ_LEN))[0] 448 data_len = struct.unpack('L', infile.read(INT32_DATA_READ_LEN))[0] 449 data = infile.read(data_len) 450 if data_type == CONTENT_TYPE_HEADER_PAGE: 451 parse_header_page(data) 452 elif data_type == CONTENT_TYPE_CMDLINES: 453 parse_cmdlines(data) 454 elif data_type == CONTENT_TYPE_TGIDS: 455 parse_tgids(data) 456 elif data_type == CONTENT_TYPE_EVENTS_FORMAT: 457 parse_events_format(data) 458 elif data_type == CONTENT_TYPE_PRINTK_FORMATS: 459 parse_printk_formats(data) 460 elif data_type == CONTENT_TYPE_KALLSYMS: 461 parse_kallsyms(data) 462 463 464def parse_trace_events_data(infile, file_size, cpu_nums, result): 465 while infile.tell() < file_size: 466 data_type = struct.unpack('L', infile.read(INT32_DATA_READ_LEN))[0] 467 data_len = struct.unpack('L', infile.read(INT32_DATA_READ_LEN))[0] 468 data = infile.read(data_len) 469 470 if data_type >= CONTENT_TYPE_CPU_RAW and data_type < CONTENT_TYPE_CPU_RAW + cpu_nums: 471 parse_cpu_raw(data, data_len, result) 472 473 474def parse_binary_trace_file(): 475 infile_flags = os.O_RDONLY | os.O_BINARY 476 infile_mode = stat.S_IRUSR 477 infile = os.fdopen(os.open(binary_file, infile_flags, infile_mode), 'rb') 478 479 outfile_flags = os.O_RDWR | os.O_CREAT 480 outfile_mode = stat.S_IRUSR | stat.S_IWUSR 481 outfile = os.fdopen(os.open(out_file, outfile_flags, outfile_mode), 'w', encoding="utf-8") 482 483 trace_header = parse_trace_header(infile) 484 cpu_nums = (trace_header.get("reserved", 0) >> 1) & 0b00011111 485 486 outfile.write(TRACE_TXT_HEADER_FORMAT) 487 trace_file_size = os.path.getsize(binary_file) 488 parse_trace_base_data(infile, trace_file_size) 489 infile.seek(TRACE_HEADER_SIZE) 490 491 result = [] 492 parse_trace_events_data(infile, trace_file_size, cpu_nums, result) 493 result = sorted(result, key=lambda x: x[0]) 494 for line in result: 495 outfile.write("{}\n".format(line[1])) 496 497 outfile.close() 498 infile.close() 499 500 for name in get_not_found_format: 501 print("Error: function parse_%s not found" % name) 502 print("Trace format miss count: %d" % format_miss_cnt) 503 print("Trace format id missed set:") 504 print(format_miss_set) 505 506 count_total = sum(trace_event_count_dict.values()) 507 mem_total = sum(trace_event_mem_dict.values()) / 1024 # KB 508 print(f"Trace counter: total count({count_total}), total mem({mem_total:.3f}KB)") 509 for format_id, count in trace_event_count_dict.items(): 510 count_percentage = count / count_total * 100 511 mem = trace_event_mem_dict.get(format_id, 0) / 1024 512 mem_percentage = mem / mem_total * 100 513 print(f"ID {format_id}: count={count}, count percentage={count_percentage:.5f}%, mem={mem:.3f}KB, mem percentage={mem_percentage:.5f}%") 514 515 516def main(): 517 parse_options() 518 519 if text_file != '': 520 parse_text_trace_file() 521 else: 522 parse_binary_trace_file() 523 524 525if __name__ == '__main__': 526 main()