1# Copyright (c) 2015-2017 Intel Corporation 2# 3# Permission is hereby granted, free of charge, to any person obtaining a 4# copy of this software and associated documentation files (the "Software"), 5# to deal in the Software without restriction, including without limitation 6# the rights to use, copy, modify, merge, publish, distribute, sublicense, 7# and/or sell copies of the Software, and to permit persons to whom the 8# Software is furnished to do so, subject to the following conditions: 9# 10# The above copyright notice and this permission notice (including the next 11# paragraph) shall be included in all copies or substantial portions of the 12# Software. 13# 14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20# IN THE SOFTWARE. 21 22import argparse 23import os 24import sys 25import textwrap 26 27import xml.etree.cElementTree as et 28 29max_values = {} 30read_funcs = {} 31 32c_file = None 33_c_indent = 0 34 35def c(*args): 36 if c_file: 37 code = ' '.join(map(str,args)) 38 for line in code.splitlines(): 39 text = ''.rjust(_c_indent) + line 40 c_file.write(text.rstrip() + "\n") 41 42# indented, but no trailing newline... 43def c_line_start(code): 44 if c_file: 45 c_file.write(''.rjust(_c_indent) + code) 46def c_raw(code): 47 if c_file: 48 c_file.write(code) 49 50def c_indent(n): 51 global _c_indent 52 _c_indent = _c_indent + n 53def c_outdent(n): 54 global _c_indent 55 _c_indent = _c_indent - n 56 57header_file = None 58_h_indent = 0 59 60def h(*args): 61 if header_file: 62 code = ' '.join(map(str,args)) 63 for line in code.splitlines(): 64 text = ''.rjust(_h_indent) + line 65 header_file.write(text.rstrip() + "\n") 66 67def h_indent(n): 68 global _c_indent 69 _h_indent = _h_indent + n 70def h_outdent(n): 71 global _c_indent 72 _h_indent = _h_indent - n 73 74 75def emit_fadd(tmp_id, args): 76 c("double tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0])) 77 return tmp_id + 1 78 79# Be careful to check for divide by zero... 80def emit_fdiv(tmp_id, args): 81 c("double tmp{0} = {1};".format(tmp_id, args[1])) 82 c("double tmp{0} = {1};".format(tmp_id + 1, args[0])) 83 c("double tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id)) 84 return tmp_id + 3 85 86def emit_fmax(tmp_id, args): 87 c("double tmp{0} = {1};".format(tmp_id, args[1])) 88 c("double tmp{0} = {1};".format(tmp_id + 1, args[0])) 89 c("double tmp{0} = MAX(tmp{1}, tmp{2});".format(tmp_id + 2, tmp_id, tmp_id + 1)) 90 return tmp_id + 3 91 92def emit_fmul(tmp_id, args): 93 c("double tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0])) 94 return tmp_id + 1 95 96def emit_fsub(tmp_id, args): 97 c("double tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0])) 98 return tmp_id + 1 99 100def emit_read(tmp_id, args): 101 type = args[1].lower() 102 c("uint64_t tmp{0} = accumulator[query->{1}_offset + {2}];".format(tmp_id, type, args[0])) 103 return tmp_id + 1 104 105def emit_uadd(tmp_id, args): 106 c("uint64_t tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0])) 107 return tmp_id + 1 108 109# Be careful to check for divide by zero... 110def emit_udiv(tmp_id, args): 111 c("uint64_t tmp{0} = {1};".format(tmp_id, args[1])) 112 c("uint64_t tmp{0} = {1};".format(tmp_id + 1, args[0])) 113 c("uint64_t tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id)) 114 return tmp_id + 3 115 116def emit_umul(tmp_id, args): 117 c("uint64_t tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0])) 118 return tmp_id + 1 119 120def emit_usub(tmp_id, args): 121 c("uint64_t tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0])) 122 return tmp_id + 1 123 124def emit_umin(tmp_id, args): 125 c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id, args[1], args[0])) 126 return tmp_id + 1 127 128ops = {} 129# (n operands, emitter) 130ops["FADD"] = (2, emit_fadd) 131ops["FDIV"] = (2, emit_fdiv) 132ops["FMAX"] = (2, emit_fmax) 133ops["FMUL"] = (2, emit_fmul) 134ops["FSUB"] = (2, emit_fsub) 135ops["READ"] = (2, emit_read) 136ops["UADD"] = (2, emit_uadd) 137ops["UDIV"] = (2, emit_udiv) 138ops["UMUL"] = (2, emit_umul) 139ops["USUB"] = (2, emit_usub) 140ops["UMIN"] = (2, emit_umin) 141 142def brkt(subexp): 143 if " " in subexp: 144 return "(" + subexp + ")" 145 else: 146 return subexp 147 148def splice_bitwise_and(args): 149 return brkt(args[1]) + " & " + brkt(args[0]) 150 151def splice_logical_and(args): 152 return brkt(args[1]) + " && " + brkt(args[0]) 153 154def splice_ult(args): 155 return brkt(args[1]) + " < " + brkt(args[0]) 156 157def splice_ugte(args): 158 return brkt(args[1]) + " >= " + brkt(args[0]) 159 160exp_ops = {} 161# (n operands, splicer) 162exp_ops["AND"] = (2, splice_bitwise_and) 163exp_ops["UGTE"] = (2, splice_ugte) 164exp_ops["ULT"] = (2, splice_ult) 165exp_ops["&&"] = (2, splice_logical_and) 166 167 168hw_vars = {} 169hw_vars["$EuCoresTotalCount"] = "brw->perfquery.sys_vars.n_eus" 170hw_vars["$EuSlicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_slices" 171hw_vars["$EuSubslicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_sub_slices" 172hw_vars["$EuThreadsCount"] = "brw->perfquery.sys_vars.eu_threads_count" 173hw_vars["$SliceMask"] = "brw->perfquery.sys_vars.slice_mask" 174hw_vars["$SubsliceMask"] = "brw->perfquery.sys_vars.subslice_mask" 175hw_vars["$GpuTimestampFrequency"] = "brw->perfquery.sys_vars.timestamp_frequency" 176hw_vars["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq" 177hw_vars["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq" 178hw_vars["$SkuRevisionId"] = "brw->perfquery.sys_vars.revision" 179 180def output_rpn_equation_code(set, counter, equation, counter_vars): 181 c("/* RPN equation: " + equation + " */") 182 tokens = equation.split() 183 stack = [] 184 tmp_id = 0 185 tmp = None 186 187 for token in tokens: 188 stack.append(token) 189 while stack and stack[-1] in ops: 190 op = stack.pop() 191 argc, callback = ops[op] 192 args = [] 193 for i in range(0, argc): 194 operand = stack.pop() 195 if operand[0] == "$": 196 if operand in hw_vars: 197 operand = hw_vars[operand] 198 elif operand in counter_vars: 199 reference = counter_vars[operand] 200 operand = read_funcs[operand[1:]] + "(brw, query, accumulator)" 201 else: 202 raise Exception("Failed to resolve variable " + operand + " in equation " + equation + " for " + set.get('name') + " :: " + counter.get('name')); 203 args.append(operand) 204 205 tmp_id = callback(tmp_id, args) 206 207 tmp = "tmp{0}".format(tmp_id - 1) 208 stack.append(tmp) 209 210 if len(stack) != 1: 211 raise Exception("Spurious empty rpn code for " + set.get('name') + " :: " + 212 counter.get('name') + ".\nThis is probably due to some unhandled RPN function, in the equation \"" + 213 equation + "\"") 214 215 value = stack[-1] 216 217 if value in hw_vars: 218 value = hw_vars[value] 219 if value in counter_vars: 220 value = read_funcs[value[1:]] + "(brw, query, accumulator)" 221 222 c("\nreturn " + value + ";") 223 224def splice_rpn_expression(set, counter, expression): 225 tokens = expression.split() 226 stack = [] 227 228 for token in tokens: 229 stack.append(token) 230 while stack and stack[-1] in exp_ops: 231 op = stack.pop() 232 argc, callback = exp_ops[op] 233 args = [] 234 for i in range(0, argc): 235 operand = stack.pop() 236 if operand[0] == "$": 237 if operand in hw_vars: 238 operand = hw_vars[operand] 239 else: 240 raise Exception("Failed to resolve variable " + operand + " in expression " + expression + " for " + set.get('name') + " :: " + counter.get('name')); 241 args.append(operand) 242 243 subexp = callback(args) 244 245 stack.append(subexp) 246 247 if len(stack) != 1: 248 raise Exception("Spurious empty rpn expression for " + set.get('name') + " :: " + 249 counter.get('name') + ".\nThis is probably due to some unhandled RPN operation, in the expression \"" + 250 expression + "\"") 251 252 return stack[-1] 253 254def output_counter_read(set, counter, counter_vars): 255 c("\n") 256 c("/* {0} :: {1} */".format(set.get('name'), counter.get('name'))) 257 ret_type = counter.get('data_type') 258 if ret_type == "uint64": 259 ret_type = "uint64_t" 260 261 c("static " + ret_type) 262 read_sym = "{0}__{1}__{2}__read".format(set.get('chipset').lower(), set.get('underscore_name'), counter.get('underscore_name')) 263 c(read_sym + "(struct brw_context *brw,\n") 264 c_indent(len(read_sym) + 1) 265 c("const struct brw_perf_query_info *query,\n") 266 c("uint64_t *accumulator)\n") 267 c_outdent(len(read_sym) + 1) 268 269 c("{") 270 c_indent(3) 271 272 output_rpn_equation_code(set, counter, counter.get('equation'), counter_vars) 273 274 c_outdent(3) 275 c("}") 276 277 return read_sym 278 279def output_counter_max(set, counter, counter_vars): 280 max_eq = counter.get('max_equation') 281 282 if not max_eq: 283 return "0; /* undefined */" 284 285 try: 286 val = float(max_eq) 287 return max_eq + ";" 288 except ValueError: 289 pass 290 291 # We can only report constant maximum values via INTEL_performance_query 292 for token in max_eq.split(): 293 if token[0] == '$' and token not in hw_vars: 294 return "0; /* unsupported (varies over time) */" 295 296 c("\n") 297 c("/* {0} :: {1} */".format(set.get('name'), counter.get('name'))) 298 ret_type = counter.get('data_type') 299 if ret_type == "uint64": 300 ret_type = "uint64_t" 301 302 c("static " + ret_type) 303 max_sym = "{0}__{1}__{2}__max".format(set.get('chipset').lower(), set.get('underscore_name'), counter.get('underscore_name')) 304 c(max_sym + "(struct brw_context *brw)\n") 305 306 c("{") 307 c_indent(3) 308 309 output_rpn_equation_code(set, counter, max_eq, counter_vars) 310 311 c_outdent(3) 312 c("}") 313 314 return max_sym + "(brw);" 315 316c_type_sizes = { "uint32_t": 4, "uint64_t": 8, "float": 4, "double": 8, "bool": 4 } 317def sizeof(c_type): 318 return c_type_sizes[c_type] 319 320def pot_align(base, pot_alignment): 321 return (base + pot_alignment - 1) & ~(pot_alignment - 1); 322 323semantic_type_map = { 324 "duration": "raw", 325 "ratio": "event" 326 } 327 328def output_availability(set, availability, counter_name): 329 expression = splice_rpn_expression(set, counter_name, availability) 330 lines = expression.split(' && ') 331 n_lines = len(lines) 332 if n_lines == 1: 333 c("if (" + lines[0] + ") {") 334 else: 335 c("if (" + lines[0] + " &&") 336 c_indent(4) 337 for i in range(1, (n_lines - 1)): 338 c(lines[i] + " &&") 339 c(lines[(n_lines - 1)] + ") {") 340 c_outdent(4) 341 342 343def output_counter_report(set, counter, current_offset): 344 data_type = counter.get('data_type') 345 data_type_uc = data_type.upper() 346 c_type = data_type 347 348 if "uint" in c_type: 349 c_type = c_type + "_t" 350 351 semantic_type = counter.get('semantic_type') 352 if semantic_type in semantic_type_map: 353 semantic_type = semantic_type_map[semantic_type] 354 355 semantic_type_uc = semantic_type.upper() 356 357 c("\n") 358 359 availability = counter.get('availability') 360 if availability: 361 output_availability(set, availability, counter.get('name')) 362 c_indent(3) 363 364 c("counter = &query->counters[query->n_counters++];\n") 365 c("counter->oa_counter_read_" + data_type + " = " + read_funcs[counter.get('symbol_name')] + ";\n") 366 c("counter->name = \"" + counter.get('name') + "\";\n") 367 c("counter->desc = \"" + counter.get('description') + "\";\n") 368 c("counter->type = GL_PERFQUERY_COUNTER_" + semantic_type_uc + "_INTEL;\n") 369 c("counter->data_type = GL_PERFQUERY_COUNTER_DATA_" + data_type_uc + "_INTEL;\n") 370 c("counter->raw_max = " + max_values[counter.get('symbol_name')] + "\n") 371 372 current_offset = pot_align(current_offset, sizeof(c_type)) 373 c("counter->offset = " + str(current_offset) + ";\n") 374 c("counter->size = sizeof(" + c_type + ");\n") 375 376 if availability: 377 c_outdent(3); 378 c("}") 379 380 return current_offset + sizeof(c_type) 381 382 383register_types = { 384 'FLEX': 'flex_regs', 385 'NOA': 'mux_regs', 386 'OA': 'b_counter_regs', 387} 388 389def compute_register_lengths(set): 390 register_lengths = {} 391 register_configs = set.findall('register_config') 392 for register_config in register_configs: 393 t = register_types[register_config.get('type')] 394 if t not in register_lengths: 395 register_lengths[t] = len(register_config.findall('register')) 396 else: 397 register_lengths[t] += len(register_config.findall('register')) 398 399 return register_lengths 400 401 402def generate_register_configs(set): 403 register_configs = set.findall('register_config') 404 for register_config in register_configs: 405 t = register_types[register_config.get('type')] 406 407 availability = register_config.get('availability') 408 if availability: 409 output_availability(set, availability, register_config.get('type') + ' register config') 410 c_indent(3) 411 412 for register in register_config.findall('register'): 413 c("query->%s[query->n_%s++] = (struct brw_perf_query_register_prog) { .reg = %s, .val = %s };" % 414 (t, t, register.get('address'), register.get('value'))) 415 416 if availability: 417 c_outdent(3) 418 c("}") 419 c("\n") 420 421 422def main(): 423 global c_file 424 global header_file 425 global max_values 426 global read_funcs 427 428 parser = argparse.ArgumentParser() 429 parser.add_argument("xml", help="XML description of metrics") 430 parser.add_argument("--header", help="Header file to write") 431 parser.add_argument("--code", help="C file to write") 432 parser.add_argument("--chipset", help="Chipset to generate code for", required=True) 433 434 args = parser.parse_args() 435 436 chipset = args.chipset.lower() 437 438 if args.header: 439 header_file = open(args.header, 'w') 440 441 if args.code: 442 c_file = open(args.code, 'w') 443 444 tree = et.parse(args.xml) 445 446 447 copyright = textwrap.dedent("""\ 448 /* Autogenerated file, DO NOT EDIT manually! generated by {} 449 * 450 * Copyright (c) 2015 Intel Corporation 451 * 452 * Permission is hereby granted, free of charge, to any person obtaining a 453 * copy of this software and associated documentation files (the "Software"), 454 * to deal in the Software without restriction, including without limitation 455 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 456 * and/or sell copies of the Software, and to permit persons to whom the 457 * Software is furnished to do so, subject to the following conditions: 458 * 459 * The above copyright notice and this permission notice (including the next 460 * paragraph) shall be included in all copies or substantial portions of the 461 * Software. 462 * 463 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 464 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 465 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 466 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 467 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 468 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 469 * DEALINGS IN THE SOFTWARE. 470 */ 471 472 """).format(os.path.basename(__file__)) 473 474 h(copyright) 475 h(textwrap.dedent("""\ 476 #pragma once 477 478 struct brw_context; 479 480 """)) 481 482 c(copyright) 483 c(textwrap.dedent("""\ 484 #include <stdint.h> 485 #include <stdbool.h> 486 487 #include "util/hash_table.h" 488 489 """)) 490 491 c("#include \"brw_oa_" + chipset + ".h\"") 492 493 c(textwrap.dedent("""\ 494 #include "brw_context.h" 495 #include "brw_performance_query.h" 496 497 498 #define MIN(a, b) ((a < b) ? (a) : (b)) 499 #define MAX(a, b) ((a > b) ? (a) : (b)) 500 501 """)) 502 503 for set in tree.findall(".//set"): 504 max_values = {} 505 read_funcs = {} 506 counter_vars = {} 507 counters = set.findall("counter") 508 509 assert set.get('chipset').lower() == chipset 510 511 for counter in counters: 512 empty_vars = {} 513 read_funcs[counter.get('symbol_name')] = output_counter_read(set, counter, counter_vars) 514 max_values[counter.get('symbol_name')] = output_counter_max(set, counter, empty_vars) 515 counter_vars["$" + counter.get('symbol_name')] = counter 516 517 c("\n") 518 register_lengths = compute_register_lengths(set); 519 for reg_type, reg_length in register_lengths.iteritems(): 520 c("static struct brw_perf_query_register_prog {0}_{1}_{2}[{3}];".format(chipset, 521 set.get('underscore_name'), 522 reg_type, reg_length)) 523 524 c("\nstatic struct brw_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(chipset, set.get('underscore_name'), len(counters))) 525 c("static struct brw_perf_query_info " + chipset + "_" + set.get('underscore_name') + "_query = {\n") 526 c_indent(3) 527 528 c(".kind = OA_COUNTERS,\n") 529 c(".name = \"" + set.get('name') + "\",\n") 530 c(".guid = \"" + set.get('hw_config_guid') + "\",\n") 531 532 c(".counters = {0}_{1}_query_counters,".format(chipset, set.get('underscore_name'))) 533 c(".n_counters = 0,") 534 c(".oa_metrics_set_id = 0, /* determined at runtime, via sysfs */") 535 536 if chipset == "hsw": 537 c(textwrap.dedent("""\ 538 .oa_format = I915_OA_FORMAT_A45_B8_C8, 539 540 /* Accumulation buffer offsets... */ 541 .gpu_time_offset = 0, 542 .a_offset = 1, 543 .b_offset = 46, 544 .c_offset = 54, 545 """)) 546 else: 547 c(textwrap.dedent("""\ 548 .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8, 549 550 /* Accumulation buffer offsets... */ 551 .gpu_time_offset = 0, 552 .gpu_clock_offset = 1, 553 .a_offset = 2, 554 .b_offset = 38, 555 .c_offset = 46, 556 """)) 557 558 for reg_type, reg_length in register_lengths.iteritems(): 559 c(".{0} = {1}_{2}_{3},".format(reg_type, chipset, set.get('underscore_name'), reg_type)) 560 c(".n_{0} = 0, /* Determined at runtime */".format(reg_type)) 561 562 c_outdent(3) 563 c("};\n") 564 565 c("\nstatic void\n") 566 c("register_" + set.get('underscore_name') + "_counter_query(struct brw_context *brw)\n") 567 c("{\n") 568 c_indent(3) 569 570 c("static struct brw_perf_query_info *query = &" + chipset + "_" + set.get('underscore_name') + "_query;\n") 571 c("struct brw_perf_query_counter *counter;\n") 572 573 c("\n") 574 c("/* Note: we're assuming there can't be any variation in the definition ") 575 c(" * of a query between contexts so it's ok to describe a query within a ") 576 c(" * global variable which only needs to be initialized once... */") 577 c("\nif (!query->data_size) {") 578 c_indent(3) 579 580 generate_register_configs(set) 581 582 offset = 0 583 for counter in counters: 584 offset = output_counter_report(set, counter, offset) 585 586 587 c("\nquery->data_size = counter->offset + counter->size;\n") 588 589 c_outdent(3) 590 c("}"); 591 592 c("\n_mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);") 593 594 c_outdent(3) 595 c("}\n") 596 597 h("void brw_oa_register_queries_" + chipset + "(struct brw_context *brw);\n") 598 599 c("\nvoid") 600 c("brw_oa_register_queries_" + chipset + "(struct brw_context *brw)") 601 c("{") 602 c_indent(3) 603 604 for set in tree.findall(".//set"): 605 c("register_" + set.get('underscore_name') + "_counter_query(brw);") 606 607 c_outdent(3) 608 c("}") 609 610 611if __name__ == '__main__': 612 main() 613