1 2template = """\ 3/* 4 * Copyright (c) 2019 Valve Corporation 5 * 6 * SPDX-License-Identifier: MIT 7 * 8 * This file was generated by aco_builder_h.py 9 */ 10 11#ifndef _ACO_BUILDER_ 12#define _ACO_BUILDER_ 13 14#include "aco_ir.h" 15 16namespace aco { 17enum dpp_ctrl { 18 _dpp_quad_perm = 0x000, 19 _dpp_row_sl = 0x100, 20 _dpp_row_sr = 0x110, 21 _dpp_row_rr = 0x120, 22 dpp_wf_sl1 = 0x130, 23 dpp_wf_rl1 = 0x134, 24 dpp_wf_sr1 = 0x138, 25 dpp_wf_rr1 = 0x13C, 26 dpp_row_mirror = 0x140, 27 dpp_row_half_mirror = 0x141, 28 dpp_row_bcast15 = 0x142, 29 dpp_row_bcast31 = 0x143, 30 _dpp_row_share = 0x150, 31 _dpp_row_xmask = 0x160, 32}; 33 34inline dpp_ctrl 35dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) 36{ 37 assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4); 38 return (dpp_ctrl)(lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6)); 39} 40 41inline dpp_ctrl 42dpp_row_sl(unsigned amount) 43{ 44 assert(amount > 0 && amount < 16); 45 return (dpp_ctrl)(((unsigned) _dpp_row_sl) | amount); 46} 47 48inline dpp_ctrl 49dpp_row_sr(unsigned amount) 50{ 51 assert(amount > 0 && amount < 16); 52 return (dpp_ctrl)(((unsigned) _dpp_row_sr) | amount); 53} 54 55inline dpp_ctrl 56dpp_row_rr(unsigned amount) 57{ 58 assert(amount > 0 && amount < 16); 59 return (dpp_ctrl)(((unsigned) _dpp_row_rr) | amount); 60} 61 62inline dpp_ctrl 63dpp_row_share(unsigned lane) 64{ 65 assert(lane < 16); 66 return (dpp_ctrl)(((unsigned) _dpp_row_share) | lane); 67} 68 69inline dpp_ctrl 70dpp_row_xmask(unsigned mask) 71{ 72 assert(mask < 16); 73 return (dpp_ctrl)(((unsigned) _dpp_row_xmask) | mask); 74} 75 76inline unsigned 77ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) 78{ 79 assert(and_mask < 32 && or_mask < 32 && xor_mask < 32); 80 return and_mask | (or_mask << 5) | (xor_mask << 10); 81} 82 83inline unsigned 84ds_pattern_rotate(unsigned delta, unsigned mask) 85{ 86 assert(delta < 32 && mask < 32); 87 return mask | (delta << 5) | 0xc000; 88} 89 90aco_ptr<Instruction> create_s_mov(Definition dst, Operand src); 91 92enum sendmsg { 93 sendmsg_none = 0, 94 sendmsg_gs = 2, /* gfx6 to gfx10.3 */ 95 sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */ 96 sendmsg_hs_tessfactor = 2, /* gfx11+ */ 97 sendmsg_dealloc_vgprs = 3, /* gfx11+ */ 98 sendmsg_save_wave = 4, /* gfx8 to gfx10.3 */ 99 sendmsg_stall_wave_gen = 5, /* gfx9+ */ 100 sendmsg_halt_waves = 6, /* gfx9+ */ 101 sendmsg_ordered_ps_done = 7, /* gfx9+ */ 102 sendmsg_early_prim_dealloc = 8, /* gfx9 to gfx10 */ 103 sendmsg_gs_alloc_req = 9, /* gfx9+ */ 104 sendmsg_get_doorbell = 10, /* gfx9 to gfx10.3 */ 105 sendmsg_get_ddid = 11, /* gfx10 to gfx10.3 */ 106 sendmsg_id_mask = 0xf, 107}; 108 109/* gfx11+ */ 110enum sendmsg_rtn { 111 sendmsg_rtn_get_doorbell = 0, 112 sendmsg_rtn_get_ddid = 1, 113 sendmsg_rtn_get_tma = 2, 114 sendmsg_rtn_get_realtime = 3, 115 sendmsg_rtn_save_wave = 4, 116 sendmsg_rtn_get_tba = 5, 117 sendmsg_rtn_mask = 0xff, 118}; 119 120enum bperm_swiz { 121 bperm_b1_sign = 8, 122 bperm_b3_sign = 9, 123 bperm_b5_sign = 10, 124 bperm_b7_sign = 11, 125 bperm_0 = 12, 126 bperm_255 = 13, 127}; 128 129enum class alu_delay_wait { 130 NO_DEP = 0, 131 VALU_DEP_1 = 1, 132 VALU_DEP_2 = 2, 133 VALU_DEP_3 = 3, 134 VALU_DEP_4 = 4, 135 TRANS32_DEP_1 = 5, 136 TRANS32_DEP_2 = 6, 137 TRANS32_DEP_3 = 7, 138 FMA_ACCUM_CYCLE_1 = 8, 139 SALU_CYCLE_1 = 9, 140 SALU_CYCLE_2 = 10, 141 SALU_CYCLE_3 = 11, 142}; 143 144class Builder { 145public: 146 struct Result { 147 Instruction *instr; 148 149 Result(Instruction *instr_) : instr(instr_) {} 150 151 operator Instruction *() const { 152 return instr; 153 } 154 155 operator Temp() const { 156 return instr->definitions[0].getTemp(); 157 } 158 159 operator Operand() const { 160 return Operand((Temp)*this); 161 } 162 163 Definition& def(unsigned index) const { 164 return instr->definitions[index]; 165 } 166 167 aco_ptr<Instruction> get_ptr() const { 168 return aco_ptr<Instruction>(instr); 169 } 170 171 Instruction * operator * () const { 172 return instr; 173 } 174 175 Instruction * operator -> () const { 176 return instr; 177 } 178 }; 179 180 struct Op { 181 Operand op; 182 Op(Temp tmp) : op(tmp) {} 183 Op(Operand op_) : op(op_) {} 184 Op(Result res) : op((Temp)res) {} 185 }; 186 187 enum WaveSpecificOpcode { 188 s_cselect = (unsigned) aco_opcode::s_cselect_b64, 189 s_cmp_lg = (unsigned) aco_opcode::s_cmp_lg_u64, 190 s_and = (unsigned) aco_opcode::s_and_b64, 191 s_andn2 = (unsigned) aco_opcode::s_andn2_b64, 192 s_or = (unsigned) aco_opcode::s_or_b64, 193 s_orn2 = (unsigned) aco_opcode::s_orn2_b64, 194 s_not = (unsigned) aco_opcode::s_not_b64, 195 s_mov = (unsigned) aco_opcode::s_mov_b64, 196 s_wqm = (unsigned) aco_opcode::s_wqm_b64, 197 s_and_saveexec = (unsigned) aco_opcode::s_and_saveexec_b64, 198 s_or_saveexec = (unsigned) aco_opcode::s_or_saveexec_b64, 199 s_andn2_wrexec = (unsigned) aco_opcode::s_andn2_wrexec_b64, 200 s_xnor = (unsigned) aco_opcode::s_xnor_b64, 201 s_xor = (unsigned) aco_opcode::s_xor_b64, 202 s_bcnt1_i32 = (unsigned) aco_opcode::s_bcnt1_i32_b64, 203 s_bitcmp1 = (unsigned) aco_opcode::s_bitcmp1_b64, 204 s_ff1_i32 = (unsigned) aco_opcode::s_ff1_i32_b64, 205 s_flbit_i32 = (unsigned) aco_opcode::s_flbit_i32_b64, 206 s_lshl = (unsigned) aco_opcode::s_lshl_b64, 207 }; 208 209 Program *program; 210 bool use_iterator; 211 bool start; // only when use_iterator == false 212 RegClass lm; 213 214 std::vector<aco_ptr<Instruction>> *instructions; 215 std::vector<aco_ptr<Instruction>>::iterator it; 216 bool is_precise = false; 217 bool is_sz_preserve = false; 218 bool is_inf_preserve = false; 219 bool is_nan_preserve = false; 220 bool is_nuw = false; 221 222 Builder(Program *pgm) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(NULL) {} 223 Builder(Program *pgm, Block *block) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(&block->instructions) {} 224 Builder(Program *pgm, std::vector<aco_ptr<Instruction>> *instrs) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(instrs) {} 225 226 Builder precise() const { 227 Builder res = *this; 228 res.is_precise = true; 229 return res; 230 }; 231 232 Builder nuw() const { 233 Builder res = *this; 234 res.is_nuw = true; 235 return res; 236 } 237 238 void moveEnd(Block *block) { 239 instructions = &block->instructions; 240 } 241 242 void reset() { 243 use_iterator = false; 244 start = false; 245 instructions = NULL; 246 } 247 248 void reset(Block *block) { 249 use_iterator = false; 250 start = false; 251 instructions = &block->instructions; 252 } 253 254 void reset(std::vector<aco_ptr<Instruction>> *instrs) { 255 use_iterator = false; 256 start = false; 257 instructions = instrs; 258 } 259 260 void reset(std::vector<aco_ptr<Instruction>> *instrs, std::vector<aco_ptr<Instruction>>::iterator instr_it) { 261 use_iterator = true; 262 start = false; 263 instructions = instrs; 264 it = instr_it; 265 } 266 267 Result insert(aco_ptr<Instruction> instr) { 268 Instruction *instr_ptr = instr.get(); 269 if (instructions) { 270 if (use_iterator) { 271 it = instructions->emplace(it, std::move(instr)); 272 it = std::next(it); 273 } else if (!start) { 274 instructions->emplace_back(std::move(instr)); 275 } else { 276 instructions->emplace(instructions->begin(), std::move(instr)); 277 } 278 } 279 return Result(instr_ptr); 280 } 281 282 Result insert(Instruction* instr) { 283 if (instructions) { 284 if (use_iterator) { 285 it = instructions->emplace(it, aco_ptr<Instruction>(instr)); 286 it = std::next(it); 287 } else if (!start) { 288 instructions->emplace_back(aco_ptr<Instruction>(instr)); 289 } else { 290 instructions->emplace(instructions->begin(), aco_ptr<Instruction>(instr)); 291 } 292 } 293 return Result(instr); 294 } 295 296 Temp tmp(RegClass rc) { 297 return program->allocateTmp(rc); 298 } 299 300 Temp tmp(RegType type, unsigned size) { 301 return tmp(RegClass(type, size)); 302 } 303 304 Definition def(RegClass rc) { 305 return Definition(program->allocateTmp(rc)); 306 } 307 308 Definition def(RegType type, unsigned size) { 309 return def(RegClass(type, size)); 310 } 311 312 Definition def(RegClass rc, PhysReg reg) { 313 return Definition(tmp(rc), reg); 314 } 315 316 inline aco_opcode w64or32(WaveSpecificOpcode opcode) const { 317 if (program->wave_size == 64) 318 return (aco_opcode) opcode; 319 320 switch (opcode) { 321 case s_cselect: 322 return aco_opcode::s_cselect_b32; 323 case s_cmp_lg: 324 return aco_opcode::s_cmp_lg_u32; 325 case s_and: 326 return aco_opcode::s_and_b32; 327 case s_andn2: 328 return aco_opcode::s_andn2_b32; 329 case s_or: 330 return aco_opcode::s_or_b32; 331 case s_orn2: 332 return aco_opcode::s_orn2_b32; 333 case s_not: 334 return aco_opcode::s_not_b32; 335 case s_mov: 336 return aco_opcode::s_mov_b32; 337 case s_wqm: 338 return aco_opcode::s_wqm_b32; 339 case s_and_saveexec: 340 return aco_opcode::s_and_saveexec_b32; 341 case s_or_saveexec: 342 return aco_opcode::s_or_saveexec_b32; 343 case s_andn2_wrexec: 344 return aco_opcode::s_andn2_wrexec_b32; 345 case s_xnor: 346 return aco_opcode::s_xnor_b32; 347 case s_xor: 348 return aco_opcode::s_xor_b32; 349 case s_bcnt1_i32: 350 return aco_opcode::s_bcnt1_i32_b32; 351 case s_bitcmp1: 352 return aco_opcode::s_bitcmp1_b32; 353 case s_ff1_i32: 354 return aco_opcode::s_ff1_i32_b32; 355 case s_flbit_i32: 356 return aco_opcode::s_flbit_i32_b32; 357 case s_lshl: 358 return aco_opcode::s_lshl_b32; 359 default: 360 unreachable("Unsupported wave specific opcode."); 361 } 362 } 363 364% for fixed in ['m0', 'vcc', 'exec', 'scc']: 365 Operand ${fixed}(Temp tmp) { 366 % if fixed == 'vcc' or fixed == 'exec': 367 //vcc_hi and exec_hi can still be used in wave32 368 assert(tmp.type() == RegType::sgpr && tmp.bytes() <= 8); 369 % endif 370 Operand op(tmp); 371 op.setPrecolored(aco::${fixed}); 372 return op; 373 } 374 375 Definition ${fixed}(Definition def) { 376 % if fixed == 'vcc' or fixed == 'exec': 377 //vcc_hi and exec_hi can still be used in wave32 378 assert(def.regClass().type() == RegType::sgpr && def.bytes() <= 8); 379 % endif 380 def.setPrecolored(aco::${fixed}); 381 return def; 382 } 383 384% endfor 385 386 Operand set16bit(Operand op) { 387 op.set16bit(true); 388 return op; 389 } 390 391 Operand set24bit(Operand op) { 392 op.set24bit(true); 393 return op; 394 } 395 396 /* hand-written helpers */ 397 Temp as_uniform(Op op) 398 { 399 assert(op.op.isTemp()); 400 if (op.op.getTemp().type() == RegType::vgpr) 401 return pseudo(aco_opcode::p_as_uniform, def(RegType::sgpr, op.op.size()), op); 402 else 403 return op.op.getTemp(); 404 } 405 406 Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool tmpu24=false, bool tmpi24=false) 407 { 408 assert(tmp.type() == RegType::vgpr); 409 /* Assume 24bit if high 8 bits of tmp don't impact the result. */ 410 if ((imm & 0xff) == 0) { 411 tmpu24 = true; 412 tmpi24 = true; 413 } 414 tmpu24 &= imm <= 0xffffffu; 415 tmpi24 &= imm <= 0x7fffffu || imm >= 0xff800000u; 416 bool has_lshl_add = program->gfx_level >= GFX9; 417 /* v_mul_lo_u32 has 1.6x the latency of most VALU on GFX10 (8 vs 5 cycles), 418 * compared to 4x the latency on <GFX10. */ 419 unsigned mul_cost = program->gfx_level >= GFX10 ? 1 : (4 + Operand::c32(imm).isLiteral()); 420 if (imm == 0) { 421 return copy(dst, Operand::zero()); 422 } else if (imm == 1) { 423 return copy(dst, Operand(tmp)); 424 } else if (imm == 0xffffffff) { 425 return vsub32(dst, Operand::zero(), tmp); 426 } else if (util_is_power_of_two_or_zero(imm)) { 427 return vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(ffs(imm) - 1u), tmp); 428 } else if (tmpu24) { 429 return vop2(aco_opcode::v_mul_u32_u24, dst, Operand::c32(imm), tmp); 430 } else if (tmpi24) { 431 return vop2(aco_opcode::v_mul_i32_i24, dst, Operand::c32(imm), tmp); 432 } else if (util_is_power_of_two_nonzero(imm - 1u)) { 433 return vadd32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm - 1u) - 1u), tmp), tmp); 434 } else if (mul_cost > 2 && util_is_power_of_two_nonzero(imm + 1u)) { 435 return vsub32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm + 1u) - 1u), tmp), tmp); 436 } 437 438 unsigned instrs_required = util_bitcount(imm); 439 if (!has_lshl_add) { 440 instrs_required = util_bitcount(imm) - (imm & 0x1); /* shifts */ 441 instrs_required += util_bitcount(imm) - 1; /* additions */ 442 } 443 if (instrs_required < mul_cost) { 444 Result res(NULL); 445 Temp cur; 446 while (imm) { 447 unsigned shift = u_bit_scan(&imm); 448 Definition tmp_dst = imm ? def(v1) : dst; 449 450 if (shift && cur.id()) 451 res = vadd32(Definition(tmp_dst), vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(shift), tmp), cur); 452 else if (shift) 453 res = vop2(aco_opcode::v_lshlrev_b32, Definition(tmp_dst), Operand::c32(shift), tmp); 454 else if (cur.id()) 455 res = vadd32(Definition(tmp_dst), tmp, cur); 456 else 457 tmp_dst = Definition(tmp); 458 459 cur = tmp_dst.getTemp(); 460 } 461 return res; 462 } 463 464 Temp imm_tmp = copy(def(s1), Operand::c32(imm)); 465 return vop3(aco_opcode::v_mul_lo_u32, dst, imm_tmp, tmp); 466 } 467 468 Result v_mul24_imm(Definition dst, Temp tmp, uint32_t imm) 469 { 470 return v_mul_imm(dst, tmp, imm & 0xffffffu, true); 471 } 472 473 Result copy(Definition dst, Op op) { 474 return pseudo(aco_opcode::p_parallelcopy, dst, op); 475 } 476 477 Result vadd32(Definition dst, Op a, Op b, bool carry_out=false, Op carry_in=Op(Operand(s2)), bool post_ra=false) { 478 if (b.op.isConstant() || b.op.regClass().type() != RegType::vgpr) 479 std::swap(a, b); 480 if (!post_ra && (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr)) 481 b = copy(def(v1), b); 482 483 if (!carry_in.op.isUndefined()) 484 return vop2(aco_opcode::v_addc_co_u32, Definition(dst), def(lm), a, b, carry_in); 485 else if (program->gfx_level >= GFX10 && carry_out) 486 return vop3(aco_opcode::v_add_co_u32_e64, Definition(dst), def(lm), a, b); 487 else if (program->gfx_level < GFX9 || carry_out) 488 return vop2(aco_opcode::v_add_co_u32, Definition(dst), def(lm), a, b); 489 else 490 return vop2(aco_opcode::v_add_u32, Definition(dst), a, b); 491 } 492 493 Result vsub32(Definition dst, Op a, Op b, bool carry_out=false, Op borrow=Op(Operand(s2))) 494 { 495 if (!borrow.op.isUndefined() || program->gfx_level < GFX9) 496 carry_out = true; 497 498 bool reverse = !b.op.isTemp() || b.op.regClass().type() != RegType::vgpr; 499 if (reverse) 500 std::swap(a, b); 501 if (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr) 502 b = copy(def(v1), b); 503 504 aco_opcode op; 505 Temp carry; 506 if (carry_out) { 507 carry = tmp(lm); 508 if (borrow.op.isUndefined()) 509 op = reverse ? aco_opcode::v_subrev_co_u32 : aco_opcode::v_sub_co_u32; 510 else 511 op = reverse ? aco_opcode::v_subbrev_co_u32 : aco_opcode::v_subb_co_u32; 512 } else { 513 op = reverse ? aco_opcode::v_subrev_u32 : aco_opcode::v_sub_u32; 514 } 515 bool vop3 = false; 516 if (program->gfx_level >= GFX10 && op == aco_opcode::v_subrev_co_u32) { 517 vop3 = true; 518 op = aco_opcode::v_subrev_co_u32_e64; 519 } else if (program->gfx_level >= GFX10 && op == aco_opcode::v_sub_co_u32) { 520 vop3 = true; 521 op = aco_opcode::v_sub_co_u32_e64; 522 } 523 524 int num_ops = borrow.op.isUndefined() ? 2 : 3; 525 int num_defs = carry_out ? 2 : 1; 526 aco_ptr<Instruction> sub; 527 if (vop3) 528 sub.reset(create_instruction(op, Format::VOP3, num_ops, num_defs)); 529 else 530 sub.reset(create_instruction(op, Format::VOP2, num_ops, num_defs)); 531 sub->operands[0] = a.op; 532 sub->operands[1] = b.op; 533 if (!borrow.op.isUndefined()) 534 sub->operands[2] = borrow.op; 535 sub->definitions[0] = dst; 536 if (carry_out) 537 sub->definitions[1] = Definition(carry); 538 539 return insert(std::move(sub)); 540 } 541 542 Result readlane(Definition dst, Op vsrc, Op lane) 543 { 544 if (program->gfx_level >= GFX8) 545 return vop3(aco_opcode::v_readlane_b32_e64, dst, vsrc, lane); 546 else 547 return vop2(aco_opcode::v_readlane_b32, dst, vsrc, lane); 548 } 549 Result writelane(Definition dst, Op val, Op lane, Op vsrc) { 550 if (program->gfx_level >= GFX8) 551 return vop3(aco_opcode::v_writelane_b32_e64, dst, val, lane, vsrc); 552 else 553 return vop2(aco_opcode::v_writelane_b32, dst, val, lane, vsrc); 554 } 555<% 556import itertools 557formats = [("pseudo", [Format.PSEUDO], list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8), (2, 6), (3, 6), (1, 7)]), 558 ("sop1", [Format.SOP1], [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]), 559 ("sop2", [Format.SOP2], itertools.product([1, 2], [2, 3])), 560 ("sopk", [Format.SOPK], itertools.product([0, 1, 2], [0, 1])), 561 ("sopp", [Format.SOPP], [(0, 0), (0, 1)]), 562 ("sopc", [Format.SOPC], [(1, 2)]), 563 ("smem", [Format.SMEM], [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (1, 1), (0, 0)]), 564 ("ds", [Format.DS], [(1, 0), (1, 1), (1, 2), (1, 3), (0, 3), (0, 4)]), 565 ("ldsdir", [Format.LDSDIR], [(1, 1)]), 566 ("mubuf", [Format.MUBUF], [(0, 4), (1, 3), (1, 4)]), 567 ("mtbuf", [Format.MTBUF], [(0, 4), (1, 3)]), 568 ("mimg", [Format.MIMG], itertools.product([0, 1], [3, 4, 5, 6, 7])), 569 ("exp", [Format.EXP], [(0, 4), (0, 5)]), 570 ("branch", [Format.PSEUDO_BRANCH], [(0, 0), (0, 1)]), 571 ("barrier", [Format.PSEUDO_BARRIER], [(0, 0)]), 572 ("reduction", [Format.PSEUDO_REDUCTION], [(3, 3)]), 573 ("vop1", [Format.VOP1], [(0, 0), (1, 1), (1, 2), (2, 2)]), 574 ("vop1_sdwa", [Format.VOP1, Format.SDWA], [(1, 1)]), 575 ("vop2", [Format.VOP2], itertools.product([1, 2], [2, 3])), 576 ("vop2_sdwa", [Format.VOP2, Format.SDWA], itertools.product([1, 2], [2, 3])), 577 ("vopc", [Format.VOPC], itertools.product([1, 2], [2])), 578 ("vopc_sdwa", [Format.VOPC, Format.SDWA], itertools.product([1, 2], [2])), 579 ("vop3", [Format.VOP3], [(1, 3), (1, 2), (1, 1), (2, 2)]), 580 ("vop3p", [Format.VOP3P], [(1, 2), (1, 3)]), 581 ("vopd", [Format.VOPD], [(2, 2), (2, 3), (2, 4), (2, 5), (2, 6)]), 582 ("vinterp_inreg", [Format.VINTERP_INREG], [(1, 3)]), 583 ("vintrp", [Format.VINTRP], [(1, 2), (1, 3)]), 584 ("vop1_dpp", [Format.VOP1, Format.DPP16], [(1, 1)]), 585 ("vop2_dpp", [Format.VOP2, Format.DPP16], itertools.product([1, 2], [2, 3])), 586 ("vopc_dpp", [Format.VOPC, Format.DPP16], itertools.product([1, 2], [2])), 587 ("vop3_dpp", [Format.VOP3, Format.DPP16], [(1, 3), (1, 2), (1, 1), (2, 2)]), 588 ("vop3p_dpp", [Format.VOP3P, Format.DPP16], [(1, 2), (1, 3)]), 589 ("vop1_dpp8", [Format.VOP1, Format.DPP8], [(1, 1)]), 590 ("vop2_dpp8", [Format.VOP2, Format.DPP8], itertools.product([1, 2], [2, 3])), 591 ("vopc_dpp8", [Format.VOPC, Format.DPP8], itertools.product([1, 2], [2])), 592 ("vop3_dpp8", [Format.VOP3, Format.DPP8], [(1, 3), (1, 2), (1, 1), (2, 2)]), 593 ("vop3p_dpp8", [Format.VOP3P, Format.DPP8], [(1, 2), (1, 3)]), 594 ("vop1_e64", [Format.VOP1, Format.VOP3], itertools.product([1], [1])), 595 ("vop2_e64", [Format.VOP2, Format.VOP3], itertools.product([1, 2], [2, 3])), 596 ("vopc_e64", [Format.VOPC, Format.VOP3], itertools.product([1, 2], [2])), 597 ("vop1_e64_dpp", [Format.VOP1, Format.VOP3, Format.DPP16], itertools.product([1], [1])), 598 ("vop2_e64_dpp", [Format.VOP2, Format.VOP3, Format.DPP16], itertools.product([1, 2], [2, 3])), 599 ("vopc_e64_dpp", [Format.VOPC, Format.VOP3, Format.DPP16], itertools.product([1, 2], [2])), 600 ("vop1_e64_dpp8", [Format.VOP1, Format.VOP3, Format.DPP8], itertools.product([1], [1])), 601 ("vop2_e64_dpp8", [Format.VOP2, Format.VOP3, Format.DPP8], itertools.product([1, 2], [2, 3])), 602 ("vopc_e64_dpp8", [Format.VOPC, Format.VOP3, Format.DPP8], itertools.product([1, 2], [2])), 603 ("flat", [Format.FLAT], [(0, 3), (1, 2), (1, 3)]), 604 ("global", [Format.GLOBAL], [(0, 3), (1, 2), (1, 3)]), 605 ("scratch", [Format.SCRATCH], [(0, 3), (1, 2), (1, 3)])] 606formats = [(f if len(f) == 5 else f + ('',)) for f in formats] 607%>\\ 608% for name, formats, shapes, extra_field_setup in formats: 609 % for num_definitions, num_operands in shapes: 610 <% 611 args = ['aco_opcode opcode'] 612 for i in range(num_definitions): 613 args.append('Definition def%d' % i) 614 for i in range(num_operands): 615 args.append('Op op%d' % i) 616 for f in formats: 617 args += f.get_builder_field_decls() 618 %>\\ 619 620 Result ${name}(${', '.join(args)}) 621 { 622 Instruction* instr = create_instruction(opcode, (Format)(${'|'.join('(int)Format::%s' % f.name for f in formats)}), ${num_operands}, ${num_definitions}); 623 % for i in range(num_definitions): 624 instr->definitions[${i}] = def${i}; 625 instr->definitions[${i}].setPrecise(is_precise); 626 instr->definitions[${i}].setSZPreserve(is_sz_preserve); 627 instr->definitions[${i}].setInfPreserve(is_inf_preserve); 628 instr->definitions[${i}].setNaNPreserve(is_nan_preserve); 629 instr->definitions[${i}].setNUW(is_nuw); 630 % endfor 631 % for i in range(num_operands): 632 instr->operands[${i}] = op${i}.op; 633 % endfor 634 % for f in formats: 635 % for dest, field_name in zip(f.get_builder_field_dests(), f.get_builder_field_names()): 636 instr->${f.get_accessor()}().${dest} = ${field_name}; 637 % endfor 638 ${f.get_builder_initialization(num_operands)} 639 % endfor 640 ${extra_field_setup} 641 return insert(instr); 642 } 643 644 % if name == 'sop1' or name == 'sop2' or name == 'sopc': 645 <% 646 args[0] = 'WaveSpecificOpcode opcode' 647 params = [] 648 for i in range(num_definitions): 649 params.append('def%d' % i) 650 for i in range(num_operands): 651 params.append('op%d' % i) 652 %>\\ 653 654 inline Result ${name}(${', '.join(args)}) 655 { 656 return ${name}(w64or32(opcode), ${', '.join(params)}); 657 } 658 659 % endif 660 % endfor 661% endfor 662}; 663 664void hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset); 665 666} // namespace aco 667 668#endif /* _ACO_BUILDER_ */""" 669 670from aco_opcodes import Format 671from mako.template import Template 672 673print(Template(template).render(Format=Format)) 674