1# Copyright (c) 2015-2017 Intel Corporation 2# 3# Permission is hereby granted, free of charge, to any person obtaining a 4# copy of this software and associated documentation files (the "Software"), 5# to deal in the Software without restriction, including without limitation 6# the rights to use, copy, modify, merge, publish, distribute, sublicense, 7# and/or sell copies of the Software, and to permit persons to whom the 8# Software is furnished to do so, subject to the following conditions: 9# 10# The above copyright notice and this permission notice (including the next 11# paragraph) shall be included in all copies or substantial portions of the 12# Software. 13# 14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20# IN THE SOFTWARE. 21 22import argparse 23import builtins 24import collections 25import os 26import re 27import sys 28import textwrap 29 30import xml.etree.ElementTree as et 31 32hashed_funcs = {} 33 34c_file = None 35_c_indent = 0 36 37def c(*args): 38 code = ' '.join(map(str,args)) 39 for line in code.splitlines(): 40 text = ''.rjust(_c_indent) + line 41 c_file.write(text.rstrip() + "\n") 42 43# indented, but no trailing newline... 44def c_line_start(code): 45 c_file.write(''.rjust(_c_indent) + code) 46def c_raw(code): 47 c_file.write(code) 48 49def c_indent(n): 50 global _c_indent 51 _c_indent = _c_indent + n 52def c_outdent(n): 53 global _c_indent 54 _c_indent = _c_indent - n 55 56header_file = None 57_h_indent = 0 58 59def h(*args): 60 code = ' '.join(map(str,args)) 61 for line in code.splitlines(): 62 text = ''.rjust(_h_indent) + line 63 header_file.write(text.rstrip() + "\n") 64 65def h_indent(n): 66 global _c_indent 67 _h_indent = _h_indent + n 68def h_outdent(n): 69 global _c_indent 70 _h_indent = _h_indent - n 71 72 73def emit_fadd(tmp_id, args): 74 c("double tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0])) 75 return tmp_id + 1 76 77# Be careful to check for divide by zero... 78def emit_fdiv(tmp_id, args): 79 c("double tmp{0} = {1};".format(tmp_id, args[1])) 80 c("double tmp{0} = {1};".format(tmp_id + 1, args[0])) 81 c("double tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id)) 82 return tmp_id + 3 83 84def emit_fmax(tmp_id, args): 85 c("double tmp{0} = {1};".format(tmp_id, args[1])) 86 c("double tmp{0} = {1};".format(tmp_id + 1, args[0])) 87 c("double tmp{0} = MAX(tmp{1}, tmp{2});".format(tmp_id + 2, tmp_id, tmp_id + 1)) 88 return tmp_id + 3 89 90def emit_fmul(tmp_id, args): 91 c("double tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0])) 92 return tmp_id + 1 93 94def emit_fsub(tmp_id, args): 95 c("double tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0])) 96 return tmp_id + 1 97 98def emit_read(tmp_id, args): 99 type = args[1].lower() 100 c("uint64_t tmp{0} = results->accumulator[query->{1}_offset + {2}];".format(tmp_id, type, args[0])) 101 return tmp_id + 1 102 103def emit_uadd(tmp_id, args): 104 c("uint64_t tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0])) 105 return tmp_id + 1 106 107# Be careful to check for divide by zero... 108def emit_udiv(tmp_id, args): 109 c("uint64_t tmp{0} = {1};".format(tmp_id, args[1])) 110 c("uint64_t tmp{0} = {1};".format(tmp_id + 1, args[0])) 111 if args[0].isdigit(): 112 assert int(args[0]) > 0 113 c("uint64_t tmp{0} = tmp{2} / tmp{1};".format(tmp_id + 2, tmp_id + 1, tmp_id)) 114 else: 115 c("uint64_t tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id)) 116 return tmp_id + 3 117 118def emit_umul(tmp_id, args): 119 c("uint64_t tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0])) 120 return tmp_id + 1 121 122def emit_usub(tmp_id, args): 123 c("uint64_t tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0])) 124 return tmp_id + 1 125 126def emit_umin(tmp_id, args): 127 c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id, args[1], args[0])) 128 return tmp_id + 1 129 130def emit_lshft(tmp_id, args): 131 c("uint64_t tmp{0} = {1} << {2};".format(tmp_id, args[1], args[0])) 132 return tmp_id + 1 133 134def emit_rshft(tmp_id, args): 135 c("uint64_t tmp{0} = {1} >> {2};".format(tmp_id, args[1], args[0])) 136 return tmp_id + 1 137 138def emit_and(tmp_id, args): 139 c("uint64_t tmp{0} = {1} & {2};".format(tmp_id, args[1], args[0])) 140 return tmp_id + 1 141 142def emit_ulte(tmp_id, args): 143 c("uint64_t tmp{0} = {1} <= {2};".format(tmp_id, args[1], args[0])) 144 return tmp_id + 1 145 146def emit_ult(tmp_id, args): 147 c("uint64_t tmp{0} = {1} < {2};".format(tmp_id, args[1], args[0])) 148 return tmp_id + 1 149 150def emit_ugte(tmp_id, args): 151 c("uint64_t tmp{0} = {1} >= {2};".format(tmp_id, args[1], args[0])) 152 return tmp_id + 1 153 154def emit_ugt(tmp_id, args): 155 c("uint64_t tmp{0} = {1} > {2};".format(tmp_id, args[1], args[0])) 156 return tmp_id + 1 157 158ops = {} 159# (n operands, emitter) 160ops["FADD"] = (2, emit_fadd) 161ops["FDIV"] = (2, emit_fdiv) 162ops["FMAX"] = (2, emit_fmax) 163ops["FMUL"] = (2, emit_fmul) 164ops["FSUB"] = (2, emit_fsub) 165ops["READ"] = (2, emit_read) 166ops["UADD"] = (2, emit_uadd) 167ops["UDIV"] = (2, emit_udiv) 168ops["UMUL"] = (2, emit_umul) 169ops["USUB"] = (2, emit_usub) 170ops["UMIN"] = (2, emit_umin) 171ops["<<"] = (2, emit_lshft) 172ops[">>"] = (2, emit_rshft) 173ops["AND"] = (2, emit_and) 174ops["UGTE"] = (2, emit_ugte) 175ops["UGT"] = (2, emit_ugt) 176ops["ULTE"] = (2, emit_ulte) 177ops["ULT"] = (2, emit_ult) 178 179 180def brkt(subexp): 181 if " " in subexp: 182 return "(" + subexp + ")" 183 else: 184 return subexp 185 186def splice_bitwise_and(args): 187 return brkt(args[1]) + " & " + brkt(args[0]) 188 189def splice_bitwise_or(args): 190 return brkt(args[1]) + " | " + brkt(args[0]) 191 192def splice_logical_and(args): 193 return brkt(args[1]) + " && " + brkt(args[0]) 194 195def splice_umul(args): 196 return brkt(args[1]) + " * " + brkt(args[0]) 197 198def splice_ult(args): 199 return brkt(args[1]) + " < " + brkt(args[0]) 200 201def splice_ugte(args): 202 return brkt(args[1]) + " >= " + brkt(args[0]) 203 204def splice_ulte(args): 205 return brkt(args[1]) + " <= " + brkt(args[0]) 206 207def splice_ugt(args): 208 return brkt(args[1]) + " > " + brkt(args[0]) 209 210def splice_lshft(args): 211 return brkt(args[1]) + " << " + brkt(args[0]) 212 213def splice_equal(args): 214 return brkt(args[1]) + " == " + brkt(args[0]) 215 216exp_ops = {} 217# (n operands, splicer) 218exp_ops["AND"] = (2, splice_bitwise_and) 219exp_ops["OR"] = (2, splice_bitwise_or) 220exp_ops["UGTE"] = (2, splice_ugte) 221exp_ops["ULT"] = (2, splice_ult) 222exp_ops["&&"] = (2, splice_logical_and) 223exp_ops["UMUL"] = (2, splice_umul) 224exp_ops["<<"] = (2, splice_lshft) 225exp_ops["=="] = (2, splice_equal) 226 227 228hw_vars = {} 229hw_vars["$EuCoresTotalCount"] = "perf->sys_vars.n_eus" 230hw_vars["$VectorEngineTotalCount"] = "perf->sys_vars.n_eus" 231hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices" 232hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices" 233hw_vars["$XeCoreTotalCount"] = "perf->sys_vars.n_eu_sub_slices" 234hw_vars["$EuDualSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices" 235hw_vars["$EuDualSubslicesSlice0123Count"] = "perf->sys_vars.n_eu_slice0123" 236hw_vars["$EuThreadsCount"] = "perf->devinfo->num_thread_per_eu" 237hw_vars["$VectorEngineThreadsCount"] = "perf->devinfo->num_thread_per_eu" 238hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask" 239hw_vars["$SliceTotalCount"] = "perf->sys_vars.n_eu_slices" 240# subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+ 241# only has dual subslices which can be assimilated with 16EUs subslices. 242hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask" 243hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask" 244hw_vars["$XeCoreMask"] = "perf->sys_vars.subslice_mask" 245hw_vars["$GpuTimestampFrequency"] = "perf->devinfo->timestamp_frequency" 246hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq" 247hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq" 248hw_vars["$SkuRevisionId"] = "perf->devinfo->revision" 249hw_vars["$QueryMode"] = "perf->sys_vars.query_mode" 250hw_vars["$ComputeEngineTotalCount"] = "perf->devinfo->engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE]" 251hw_vars["$CopyEngineTotalCount"] = "perf->devinfo->engine_class_supported_count[INTEL_ENGINE_CLASS_COPY]" 252hw_vars["$L3BankTotalCount"] = "perf->sys_vars.n_l3_banks" 253hw_vars["$L3BankMaxCount"] = "perf->sys_vars.n_l3_banks" 254hw_vars["$L3NodeTotalCount"] = "perf->sys_vars.n_l3_nodes" 255hw_vars["$SqidiTotalCount"] = "perf->sys_vars.n_sq_idis" 256 257def resolve_variable(name, set, allow_counters): 258 if name in hw_vars: 259 return hw_vars[name] 260 m = re.search(r'\$GtSlice([0-9]+)$', name) 261 if m: 262 return 'intel_device_info_slice_available(perf->devinfo, {0})'.format(m.group(1)) 263 m = re.search(r'\$GtSlice([0-9]+)XeCore([0-9]+)$', name) 264 if m: 265 return 'intel_device_info_subslice_available(perf->devinfo, {0}, {1})'.format(m.group(1), m.group(2)) 266 if allow_counters and name in set.counter_vars: 267 return set.read_funcs[name[1:]] + "(perf, query, results)" 268 return None 269 270def output_rpn_equation_code(set, counter, equation): 271 c("/* RPN equation: " + equation + " */") 272 tokens = equation.split() 273 stack = [] 274 tmp_id = 0 275 tmp = None 276 277 for token in tokens: 278 stack.append(token) 279 while stack and stack[-1] in ops: 280 op = stack.pop() 281 argc, callback = ops[op] 282 args = [] 283 for i in range(0, argc): 284 operand = stack.pop() 285 if operand[0] == "$": 286 resolved_variable = resolve_variable(operand, set, True) 287 if resolved_variable == None: 288 raise Exception("Failed to resolve variable " + operand + " in equation " + equation + " for " + set.name + " :: " + counter.get('name')); 289 operand = resolved_variable 290 args.append(operand) 291 292 tmp_id = callback(tmp_id, args) 293 294 tmp = "tmp{0}".format(tmp_id - 1) 295 stack.append(tmp) 296 297 if len(stack) != 1: 298 raise Exception("Spurious empty rpn code for " + set.name + " :: " + 299 counter.get('name') + ".\nThis is probably due to some unhandled RPN function, in the equation \"" + 300 equation + "\"") 301 302 value = stack[-1] 303 304 if value[0] == "$": 305 resolved_variable = resolve_variable(value, set, True) 306 if resolved_variable == None: 307 raise Exception("Failed to resolve variable " + operand + " in equation " + equation + " for " + set.name + " :: " + counter.get('name')); 308 value = resolved_variable 309 310 c("\nreturn " + value + ";") 311 312def splice_rpn_expression(set, counter_name, expression): 313 tokens = expression.split() 314 stack = [] 315 316 for token in tokens: 317 stack.append(token) 318 while stack and stack[-1] in exp_ops: 319 op = stack.pop() 320 argc, callback = exp_ops[op] 321 args = [] 322 for i in range(0, argc): 323 operand = stack.pop() 324 if operand[0] == "$": 325 resolved_variable = resolve_variable(operand, set, False) 326 if resolved_variable == None: 327 raise Exception("Failed to resolve variable " + operand + " in expression " + expression + " for " + set.name + " :: " + counter_name) 328 operand = resolved_variable 329 args.append(operand) 330 331 subexp = callback(args) 332 333 stack.append(subexp) 334 335 if len(stack) != 1: 336 raise Exception("Spurious empty rpn expression for " + set.name + " :: " + 337 counter_name + ".\nThis is probably due to some unhandled RPN operation, in the expression \"" + 338 expression + "\"") 339 340 value = stack[-1] 341 342 if value[0] == "$": 343 resolved_variable = resolve_variable(value, set, False) 344 if resolved_variable == None: 345 raise Exception("Failed to resolve variable " + operand + " in expression " + expression + " for " + set.name + " :: " + counter_name) 346 value = resolved_variable 347 348 return value 349 350def output_counter_read(gen, set, counter): 351 c("\n") 352 c("/* {0} :: {1} */".format(set.name, counter.get('name'))) 353 354 if counter.read_hash in hashed_funcs: 355 c("#define %s \\" % counter.read_sym) 356 c_indent(3) 357 c("%s" % hashed_funcs[counter.read_hash]) 358 c_outdent(3) 359 else: 360 ret_type = counter.get('data_type') 361 if ret_type == "uint64": 362 ret_type = "uint64_t" 363 364 read_eq = counter.get('equation') 365 366 c("static " + ret_type) 367 c(counter.read_sym + "(UNUSED struct intel_perf_config *perf,\n") 368 c_indent(len(counter.read_sym) + 1) 369 c("const struct intel_perf_query_info *query,\n") 370 c("const struct intel_perf_query_result *results)\n") 371 c_outdent(len(counter.read_sym) + 1) 372 373 c("{") 374 c_indent(3) 375 output_rpn_equation_code(set, counter, read_eq) 376 c_outdent(3) 377 c("}") 378 379 hashed_funcs[counter.read_hash] = counter.read_sym 380 381 382def output_counter_max(gen, set, counter): 383 max_eq = counter.get('max_equation') 384 385 if not counter.has_custom_max_func(): 386 return 387 388 c("\n") 389 c("/* {0} :: {1} */".format(set.name, counter.get('name'))) 390 391 if counter.max_hash in hashed_funcs: 392 c("#define %s \\" % counter.max_sym) 393 c_indent(3) 394 c("%s" % hashed_funcs[counter.max_hash]) 395 c_outdent(3) 396 else: 397 ret_type = counter.get('data_type') 398 if ret_type == "uint64": 399 ret_type = "uint64_t" 400 401 c("static " + ret_type) 402 c(counter.max_sym + "(struct intel_perf_config *perf,\n") 403 c_indent(len(counter.read_sym) + 1) 404 c("const struct intel_perf_query_info *query,\n") 405 c("const struct intel_perf_query_result *results)\n") 406 c_outdent(len(counter.read_sym) + 1) 407 c("{") 408 c_indent(3) 409 output_rpn_equation_code(set, counter, max_eq) 410 c_outdent(3) 411 c("}") 412 413 hashed_funcs[counter.max_hash] = counter.max_sym 414 415 416c_type_sizes = { "uint32_t": 4, "uint64_t": 8, "float": 4, "double": 8, "bool": 4 } 417def sizeof(c_type): 418 return c_type_sizes[c_type] 419 420def pot_align(base, pot_alignment): 421 return (base + pot_alignment - 1) & ~(pot_alignment - 1); 422 423semantic_type_map = { 424 "duration": "raw", 425 "ratio": "event" 426 } 427 428def output_availability(set, availability, counter_name): 429 expression = splice_rpn_expression(set, counter_name, availability) 430 lines = expression.split(' && ') 431 n_lines = len(lines) 432 if n_lines == 1: 433 c("if (" + lines[0] + ") {") 434 else: 435 c("if (" + lines[0] + " &&") 436 c_indent(4) 437 for i in range(1, (n_lines - 1)): 438 c(lines[i] + " &&") 439 c(lines[(n_lines - 1)] + ") {") 440 c_outdent(4) 441 442 443def output_units(unit): 444 return unit.replace(' ', '_').upper() 445 446 447# should a unit be visible in description? 448units_map = { 449 "bytes" : True, 450 "cycles" : True, 451 "eu atomic requests to l3 cache lines" : False, 452 "eu bytes per l3 cache line" : False, 453 "eu requests to l3 cache lines" : False, 454 "eu sends to l3 cache lines" : False, 455 "events" : True, 456 "hz" : True, 457 "messages" : True, 458 "ns" : True, 459 "number" : False, 460 "percent" : True, 461 "pixels" : True, 462 "texels" : True, 463 "threads" : True, 464 "us" : True, 465 "utilization" : False, 466 "gbps" : True, 467 } 468 469 470def desc_units(unit): 471 val = units_map.get(unit) 472 if val is None: 473 raise Exception("Unknown unit: " + unit) 474 if val == False: 475 return "" 476 if unit == 'hz': 477 unit = 'Hz' 478 return "Unit: " + unit + "." 479 480 481counter_key_tuple = collections.namedtuple( 482 'counter_key', 483 [ 484 'name', 485 'description', 486 'symbol_name', 487 'mdapi_group', 488 'semantic_type', 489 'data_type', 490 'units', 491 ] 492) 493 494 495def counter_key(counter): 496 return counter_key_tuple._make([counter.get(field) for field in counter_key_tuple._fields]) 497 498 499def output_counter_struct(set, counter, idx, 500 name_to_idx, desc_to_idx, 501 symbol_name_to_idx, category_to_idx): 502 data_type = counter.data_type 503 data_type_uc = data_type.upper() 504 505 semantic_type = counter.semantic_type 506 if semantic_type in semantic_type_map: 507 semantic_type = semantic_type_map[semantic_type] 508 509 semantic_type_uc = semantic_type.upper() 510 511 c("[" + str(idx) + "] = {\n") 512 c_indent(3) 513 c(".name_idx = " + str(name_to_idx[counter.name]) + ",\n") 514 c(".desc_idx = " + str(desc_to_idx[counter.description + " " + desc_units(counter.units)]) + ",\n") 515 c(".symbol_name_idx = " + str(symbol_name_to_idx[counter.symbol_name]) + ",\n") 516 c(".category_idx = " + str(category_to_idx[counter.mdapi_group]) + ",\n") 517 c(".type = INTEL_PERF_COUNTER_TYPE_" + semantic_type_uc + ",\n") 518 c(".data_type = INTEL_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ",\n") 519 c(".units = INTEL_PERF_COUNTER_UNITS_" + output_units(counter.units) + ",\n") 520 c_outdent(3) 521 c("},\n") 522 523 524def output_counter_report(set, counter, counter_to_idx, current_offset): 525 data_type = counter.get('data_type') 526 data_type_uc = data_type.upper() 527 c_type = data_type 528 529 if "uint" in c_type: 530 c_type = c_type + "_t" 531 532 semantic_type = counter.get('semantic_type') 533 if semantic_type in semantic_type_map: 534 semantic_type = semantic_type_map[semantic_type] 535 536 semantic_type_uc = semantic_type.upper() 537 538 c("\n") 539 540 availability = counter.get('availability') 541 if availability: 542 output_availability(set, availability, counter.get('name')) 543 c_indent(3) 544 545 key = counter_key(counter) 546 idx = str(counter_to_idx[key]) 547 548 current_offset = pot_align(current_offset, sizeof(c_type)) 549 550 if data_type == 'uint64': 551 c("intel_perf_query_add_counter_uint64(query, " + idx + ", " + 552 str(current_offset) + ", " + 553 set.max_funcs[counter.get('symbol_name')] + "," + 554 set.read_funcs[counter.get('symbol_name')] + ");\n") 555 else: 556 c("intel_perf_query_add_counter_float(query, " + idx + ", " + 557 str(current_offset) + ", " + 558 set.max_funcs[counter.get('symbol_name')] + "," + 559 set.read_funcs[counter.get('symbol_name')] + ");\n") 560 561 562 if availability: 563 c_outdent(3); 564 c("}") 565 566 return current_offset + sizeof(c_type) 567 568 569def str_to_idx_table(strs): 570 sorted_strs = sorted(strs) 571 572 str_to_idx = collections.OrderedDict() 573 str_to_idx[sorted_strs[0]] = 0 574 previous = sorted_strs[0] 575 576 for i in range(1, len(sorted_strs)): 577 str_to_idx[sorted_strs[i]] = str_to_idx[previous] + len(previous) + 1 578 previous = sorted_strs[i] 579 580 return str_to_idx 581 582 583def output_str_table(name: str, str_to_idx): 584 c("\n") 585 c("static const char " + name + "[] = {\n") 586 c_indent(3) 587 c("\n".join(f"/* {idx} */ \"{val}\\0\"" for val, idx in str_to_idx.items())) 588 c_outdent(3) 589 c("};\n") 590 591 592register_types = { 593 'FLEX': 'flex_regs', 594 'NOA': 'mux_regs', 595 'OA': 'b_counter_regs', 596} 597 598def compute_register_lengths(set): 599 register_lengths = {} 600 register_configs = set.findall('register_config') 601 for register_config in register_configs: 602 t = register_types[register_config.get('type')] 603 if t not in register_lengths: 604 register_lengths[t] = len(register_config.findall('register')) 605 else: 606 register_lengths[t] += len(register_config.findall('register')) 607 608 return register_lengths 609 610 611def generate_register_configs(set): 612 register_configs = set.findall('register_config') 613 614 for register_config in register_configs: 615 t = register_types[register_config.get('type')] 616 617 availability = register_config.get('availability') 618 if availability: 619 output_availability(set, availability, register_config.get('type') + ' register config') 620 c_indent(3) 621 622 registers = register_config.findall('register') 623 c("static const struct intel_perf_query_register_prog %s[] = {" % t) 624 c_indent(3) 625 for register in registers: 626 c("{ .reg = %s, .val = %s }," % (register.get('address'), register.get('value'))) 627 c_outdent(3) 628 c("};") 629 c("query->config.%s = %s;" % (t, t)) 630 c("query->config.n_%s = ARRAY_SIZE(%s);" % (t, t)) 631 632 if availability: 633 c_outdent(3) 634 c("}") 635 c("\n") 636 637 638# Wraps a <counter> element from the oa-*.xml files. 639class Counter: 640 def __init__(self, set, xml): 641 self.xml = xml 642 self.set = set 643 self.read_hash = None 644 self.max_hash = None 645 646 self.read_sym = "{0}__{1}__{2}__read".format(self.set.gen.chipset, 647 self.set.underscore_name, 648 self.xml.get('underscore_name')) 649 self.max_sym = self.build_max_sym() 650 651 def get(self, prop): 652 return self.xml.get(prop) 653 654 # Compute the hash of a counter's equation by expanding (including all the 655 # sub-equations it depends on) 656 def compute_hashes(self): 657 if self.read_hash is not None: 658 return 659 660 def replace_token(token): 661 if token[0] != "$": 662 return token 663 if token not in self.set.counter_vars: 664 return token 665 self.set.counter_vars[token].compute_hashes() 666 return self.set.counter_vars[token].read_hash 667 668 read_eq = self.xml.get('equation') 669 self.read_hash = ' '.join(map(replace_token, read_eq.split())) 670 671 max_eq = self.xml.get('max_equation') 672 if max_eq: 673 self.max_hash = ' '.join(map(replace_token, max_eq.split())) 674 675 def has_custom_max_func(self): 676 max_eq = self.xml.get('max_equation') 677 if not max_eq: 678 return False 679 680 try: 681 val = float(max_eq) 682 if val == 100: 683 return False 684 except ValueError: 685 pass 686 687 for token in max_eq.split(): 688 if token[0] == '$' and resolve_variable(token, self.set, True) == None: 689 print("unresolved token " + token) 690 return False 691 return True 692 693 def build_max_sym(self): 694 max_eq = self.xml.get('max_equation') 695 if not max_eq: 696 return "NULL" 697 698 try: 699 val = float(max_eq) 700 if val == 100: 701 if self.xml.get('data_type') == 'uint64': 702 return "percentage_max_uint64" 703 else: 704 return "percentage_max_float" 705 except ValueError: 706 pass 707 708 assert self.has_custom_max_func() 709 return "{0}__{1}__{2}__max".format(self.set.gen.chipset, 710 self.set.underscore_name, 711 self.xml.get('underscore_name')) 712 713 714# Wraps a <set> element from the oa-*.xml files. 715class Set: 716 def __init__(self, gen, xml): 717 self.gen = gen 718 self.xml = xml 719 720 self.counter_vars = {} 721 self.max_funcs = {} 722 self.read_funcs = {} 723 724 xml_counters = self.xml.findall("counter") 725 self.counters = [] 726 for xml_counter in xml_counters: 727 counter = Counter(self, xml_counter) 728 self.counters.append(counter) 729 self.counter_vars['$' + counter.get('symbol_name')] = counter 730 self.read_funcs[counter.get('symbol_name')] = counter.read_sym 731 self.max_funcs[counter.get('symbol_name')] = counter.max_sym 732 733 for counter in self.counters: 734 counter.compute_hashes() 735 736 @property 737 def hw_config_guid(self): 738 return self.xml.get('hw_config_guid') 739 740 @property 741 def name(self): 742 return self.xml.get('name') 743 744 @property 745 def symbol_name(self): 746 return self.xml.get('symbol_name') 747 748 @property 749 def underscore_name(self): 750 return self.xml.get('underscore_name') 751 752 def findall(self, path): 753 return self.xml.findall(path) 754 755 def find(self, path): 756 return self.xml.find(path) 757 758 759# Wraps an entire oa-*.xml file. 760class Gen: 761 def __init__(self, filename): 762 self.filename = filename 763 self.xml = et.parse(self.filename) 764 self.chipset = self.xml.find('.//set').get('chipset').lower() 765 self.sets = [] 766 767 for xml_set in self.xml.findall(".//set"): 768 self.sets.append(Set(self, xml_set)) 769 770 771def main(): 772 global c_file 773 global header_file 774 775 parser = argparse.ArgumentParser() 776 parser.add_argument("--header", help="Header file to write", required=True) 777 parser.add_argument("--code", help="C file to write", required=True) 778 parser.add_argument("xml_files", nargs='+', help="List of xml metrics files to process") 779 780 args = parser.parse_args() 781 782 c_file = open(args.code, 'w') 783 header_file = open(args.header, 'w') 784 785 gens = [] 786 for xml_file in args.xml_files: 787 gens.append(Gen(xml_file)) 788 789 790 copyright = textwrap.dedent("""\ 791 /* Autogenerated file, DO NOT EDIT manually! generated by {} 792 * 793 * Copyright (c) 2015 Intel Corporation 794 * 795 * Permission is hereby granted, free of charge, to any person obtaining a 796 * copy of this software and associated documentation files (the "Software"), 797 * to deal in the Software without restriction, including without limitation 798 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 799 * and/or sell copies of the Software, and to permit persons to whom the 800 * Software is furnished to do so, subject to the following conditions: 801 * 802 * The above copyright notice and this permission notice (including the next 803 * paragraph) shall be included in all copies or substantial portions of the 804 * Software. 805 * 806 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 807 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 808 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 809 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 810 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 811 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 812 * DEALINGS IN THE SOFTWARE. 813 */ 814 815 """).format(os.path.basename(__file__)) 816 817 h(copyright) 818 h(textwrap.dedent("""\ 819 #pragma once 820 821 struct intel_perf_config; 822 823 """)) 824 825 c(copyright) 826 c(textwrap.dedent("""\ 827 #include <stdint.h> 828 #include <stdbool.h> 829 830 #include "util/hash_table.h" 831 #include "util/ralloc.h" 832 833 """)) 834 835 c("#include \"" + os.path.basename(args.header) + "\"") 836 837 c(textwrap.dedent("""\ 838 #include "perf/intel_perf.h" 839 #include "perf/intel_perf_setup.h" 840 """)) 841 842 names = builtins.set() 843 descs = builtins.set() 844 symbol_names = builtins.set() 845 categories = builtins.set() 846 for gen in gens: 847 for set in gen.sets: 848 for counter in set.counters: 849 names.add(counter.get('name')) 850 symbol_names.add(counter.get('symbol_name')) 851 descs.add(counter.get('description') + " " + desc_units(counter.get('units'))) 852 categories.add(counter.get('mdapi_group')) 853 854 name_to_idx = str_to_idx_table(names) 855 output_str_table("name", name_to_idx) 856 857 desc_to_idx = str_to_idx_table(descs) 858 output_str_table("desc", desc_to_idx) 859 860 symbol_name_to_idx = str_to_idx_table(symbol_names) 861 output_str_table("symbol_name", symbol_name_to_idx) 862 863 category_to_idx = str_to_idx_table(categories) 864 output_str_table("category", category_to_idx) 865 866 # Print out all equation functions. 867 for gen in gens: 868 for set in gen.sets: 869 for counter in set.counters: 870 output_counter_read(gen, set, counter) 871 output_counter_max(gen, set, counter) 872 873 c("\n") 874 c("static const struct intel_perf_query_counter_data counters[] = {\n") 875 c_indent(3) 876 877 counter_to_idx = collections.OrderedDict() 878 idx = 0 879 for gen in gens: 880 for set in gen.sets: 881 for counter in set.counters: 882 key = counter_key(counter) 883 if key not in counter_to_idx: 884 counter_to_idx[key] = idx 885 output_counter_struct(set, key, idx, 886 name_to_idx, 887 desc_to_idx, 888 symbol_name_to_idx, 889 category_to_idx) 890 idx += 1 891 892 c_outdent(3) 893 c("};\n\n") 894 895 c(textwrap.dedent("""\ 896 static void ATTRIBUTE_NOINLINE 897 intel_perf_query_add_counter_uint64(struct intel_perf_query_info *query, 898 int counter_idx, size_t offset, 899 intel_counter_read_uint64_t oa_counter_max, 900 intel_counter_read_uint64_t oa_counter_read) 901 { 902 struct intel_perf_query_counter *dest = &query->counters[query->n_counters++]; 903 const struct intel_perf_query_counter_data *counter = &counters[counter_idx]; 904 905 dest->name = &name[counter->name_idx]; 906 dest->desc = &desc[counter->desc_idx]; 907 dest->symbol_name = &symbol_name[counter->symbol_name_idx]; 908 dest->category = &category[counter->category_idx]; 909 910 dest->offset = offset; 911 dest->type = counter->type; 912 dest->data_type = counter->data_type; 913 dest->units = counter->units; 914 dest->oa_counter_max_uint64 = oa_counter_max; 915 dest->oa_counter_read_uint64 = oa_counter_read; 916 } 917 918 static void ATTRIBUTE_NOINLINE 919 intel_perf_query_add_counter_float(struct intel_perf_query_info *query, 920 int counter_idx, size_t offset, 921 intel_counter_read_float_t oa_counter_max, 922 intel_counter_read_float_t oa_counter_read) 923 { 924 struct intel_perf_query_counter *dest = &query->counters[query->n_counters++]; 925 const struct intel_perf_query_counter_data *counter = &counters[counter_idx]; 926 927 dest->name = &name[counter->name_idx]; 928 dest->desc = &desc[counter->desc_idx]; 929 dest->symbol_name = &symbol_name[counter->symbol_name_idx]; 930 dest->category = &category[counter->category_idx]; 931 932 dest->offset = offset; 933 dest->type = counter->type; 934 dest->data_type = counter->data_type; 935 dest->units = counter->units; 936 dest->oa_counter_max_float = oa_counter_max; 937 dest->oa_counter_read_float = oa_counter_read; 938 } 939 940 static float ATTRIBUTE_NOINLINE 941 percentage_max_float(struct intel_perf_config *perf, 942 const struct intel_perf_query_info *query, 943 const struct intel_perf_query_result *results) 944 { 945 return 100; 946 } 947 948 static uint64_t ATTRIBUTE_NOINLINE 949 percentage_max_uint64(struct intel_perf_config *perf, 950 const struct intel_perf_query_info *query, 951 const struct intel_perf_query_result *results) 952 { 953 return 100; 954 } 955 """)) 956 957 # Print out all metric sets registration functions for each set in each 958 # generation. 959 for gen in gens: 960 for set in gen.sets: 961 counters = set.counters 962 963 c("\n") 964 c("\nstatic void\n") 965 c("{0}_register_{1}_counter_query(struct intel_perf_config *perf)\n".format(gen.chipset, set.underscore_name)) 966 c("{\n") 967 c_indent(3) 968 969 c("struct intel_perf_query_info *query = intel_query_alloc(perf, %u);\n" % len(counters)) 970 c("\n") 971 c("query->name = \"" + set.name + "\";\n") 972 c("query->symbol_name = \"" + set.symbol_name + "\";\n") 973 c("query->guid = \"" + set.hw_config_guid + "\";\n") 974 975 c("\n") 976 c("struct intel_perf_query_counter *counter = query->counters;\n") 977 978 c("\n") 979 c("/* Note: we're assuming there can't be any variation in the definition ") 980 c(" * of a query between contexts so it's ok to describe a query within a ") 981 c(" * global variable which only needs to be initialized once... */") 982 c("\nif (!query->data_size) {") 983 c_indent(3) 984 985 generate_register_configs(set) 986 987 offset = 0 988 for counter in counters: 989 offset = output_counter_report(set, counter, counter_to_idx, offset) 990 991 992 c("\ncounter = &query->counters[query->n_counters - 1];\n") 993 c("query->data_size = counter->offset + intel_perf_query_counter_get_size(counter);\n") 994 995 c_outdent(3) 996 c("}"); 997 998 c("\n_mesa_hash_table_insert(perf->oa_metrics_table, query->guid, query);") 999 1000 c_outdent(3) 1001 c("}\n") 1002 1003 h("void intel_oa_register_queries_" + gen.chipset + "(struct intel_perf_config *perf);\n") 1004 1005 c("\nvoid") 1006 c("intel_oa_register_queries_" + gen.chipset + "(struct intel_perf_config *perf)") 1007 c("{") 1008 c_indent(3) 1009 1010 for set in gen.sets: 1011 c("{0}_register_{1}_counter_query(perf);".format(gen.chipset, set.underscore_name)) 1012 1013 c_outdent(3) 1014 c("}") 1015 1016 1017if __name__ == '__main__': 1018 main() 1019