1 2template = """\ 3/* 4 * Copyright (c) 2019 Valve Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 * 25 * This file was generated by aco_builder_h.py 26 */ 27 28#ifndef _ACO_BUILDER_ 29#define _ACO_BUILDER_ 30 31#include "aco_ir.h" 32 33namespace aco { 34enum dpp_ctrl { 35 _dpp_quad_perm = 0x000, 36 _dpp_row_sl = 0x100, 37 _dpp_row_sr = 0x110, 38 _dpp_row_rr = 0x120, 39 dpp_wf_sl1 = 0x130, 40 dpp_wf_rl1 = 0x134, 41 dpp_wf_sr1 = 0x138, 42 dpp_wf_rr1 = 0x13C, 43 dpp_row_mirror = 0x140, 44 dpp_row_half_mirror = 0x141, 45 dpp_row_bcast15 = 0x142, 46 dpp_row_bcast31 = 0x143, 47 _dpp_row_share = 0x150, 48 _dpp_row_xmask = 0x160, 49}; 50 51inline dpp_ctrl 52dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) 53{ 54 assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4); 55 return (dpp_ctrl)(lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6)); 56} 57 58inline dpp_ctrl 59dpp_row_sl(unsigned amount) 60{ 61 assert(amount > 0 && amount < 16); 62 return (dpp_ctrl)(((unsigned) _dpp_row_sl) | amount); 63} 64 65inline dpp_ctrl 66dpp_row_sr(unsigned amount) 67{ 68 assert(amount > 0 && amount < 16); 69 return (dpp_ctrl)(((unsigned) _dpp_row_sr) | amount); 70} 71 72inline dpp_ctrl 73dpp_row_rr(unsigned amount) 74{ 75 assert(amount > 0 && amount < 16); 76 return (dpp_ctrl)(((unsigned) _dpp_row_rr) | amount); 77} 78 79inline dpp_ctrl 80dpp_row_share(unsigned lane) 81{ 82 assert(lane < 16); 83 return (dpp_ctrl)(((unsigned) _dpp_row_share) | lane); 84} 85 86inline dpp_ctrl 87dpp_row_xmask(unsigned mask) 88{ 89 assert(mask < 16); 90 return (dpp_ctrl)(((unsigned) _dpp_row_xmask) | mask); 91} 92 93inline unsigned 94ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) 95{ 96 assert(and_mask < 32 && or_mask < 32 && xor_mask < 32); 97 return and_mask | (or_mask << 5) | (xor_mask << 10); 98} 99 100inline unsigned 101ds_pattern_rotate(unsigned delta, unsigned mask) 102{ 103 assert(delta < 32 && mask < 32); 104 return mask | (delta << 5) | 0xc000; 105} 106 107aco_ptr<Instruction> create_s_mov(Definition dst, Operand src); 108 109enum sendmsg { 110 sendmsg_none = 0, 111 sendmsg_gs = 2, /* gfx6 to gfx10.3 */ 112 sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */ 113 sendmsg_hs_tessfactor = 2, /* gfx11+ */ 114 sendmsg_dealloc_vgprs = 3, /* gfx11+ */ 115 sendmsg_save_wave = 4, /* gfx8 to gfx10.3 */ 116 sendmsg_stall_wave_gen = 5, /* gfx9+ */ 117 sendmsg_halt_waves = 6, /* gfx9+ */ 118 sendmsg_ordered_ps_done = 7, /* gfx9+ */ 119 sendmsg_early_prim_dealloc = 8, /* gfx9 to gfx10 */ 120 sendmsg_gs_alloc_req = 9, /* gfx9+ */ 121 sendmsg_get_doorbell = 10, /* gfx9 to gfx10.3 */ 122 sendmsg_get_ddid = 11, /* gfx10 to gfx10.3 */ 123 sendmsg_id_mask = 0xf, 124}; 125 126/* gfx11+ */ 127enum sendmsg_rtn { 128 sendmsg_rtn_get_doorbell = 0, 129 sendmsg_rtn_get_ddid = 1, 130 sendmsg_rtn_get_tma = 2, 131 sendmsg_rtn_get_realtime = 3, 132 sendmsg_rtn_save_wave = 4, 133 sendmsg_rtn_get_tba = 5, 134 sendmsg_rtn_mask = 0xff, 135}; 136 137enum bperm_swiz { 138 bperm_b1_sign = 8, 139 bperm_b3_sign = 9, 140 bperm_b5_sign = 10, 141 bperm_b7_sign = 11, 142 bperm_0 = 12, 143 bperm_255 = 13, 144}; 145 146enum class alu_delay_wait { 147 NO_DEP = 0, 148 VALU_DEP_1 = 1, 149 VALU_DEP_2 = 2, 150 VALU_DEP_3 = 3, 151 VALU_DEP_4 = 4, 152 TRANS32_DEP_1 = 5, 153 TRANS32_DEP_2 = 6, 154 TRANS32_DEP_3 = 7, 155 FMA_ACCUM_CYCLE_1 = 8, 156 SALU_CYCLE_1 = 9, 157 SALU_CYCLE_2 = 10, 158 SALU_CYCLE_3 = 11, 159}; 160 161class Builder { 162public: 163 struct Result { 164 Instruction *instr; 165 166 Result(Instruction *instr_) : instr(instr_) {} 167 168 operator Instruction *() const { 169 return instr; 170 } 171 172 operator Temp() const { 173 return instr->definitions[0].getTemp(); 174 } 175 176 operator Operand() const { 177 return Operand((Temp)*this); 178 } 179 180 Definition& def(unsigned index) const { 181 return instr->definitions[index]; 182 } 183 184 aco_ptr<Instruction> get_ptr() const { 185 return aco_ptr<Instruction>(instr); 186 } 187 188 Instruction * operator * () const { 189 return instr; 190 } 191 192 Instruction * operator -> () const { 193 return instr; 194 } 195 }; 196 197 struct Op { 198 Operand op; 199 Op(Temp tmp) : op(tmp) {} 200 Op(Operand op_) : op(op_) {} 201 Op(Result res) : op((Temp)res) {} 202 }; 203 204 enum WaveSpecificOpcode { 205 s_cselect = (unsigned) aco_opcode::s_cselect_b64, 206 s_cmp_lg = (unsigned) aco_opcode::s_cmp_lg_u64, 207 s_and = (unsigned) aco_opcode::s_and_b64, 208 s_andn2 = (unsigned) aco_opcode::s_andn2_b64, 209 s_or = (unsigned) aco_opcode::s_or_b64, 210 s_orn2 = (unsigned) aco_opcode::s_orn2_b64, 211 s_not = (unsigned) aco_opcode::s_not_b64, 212 s_mov = (unsigned) aco_opcode::s_mov_b64, 213 s_wqm = (unsigned) aco_opcode::s_wqm_b64, 214 s_and_saveexec = (unsigned) aco_opcode::s_and_saveexec_b64, 215 s_or_saveexec = (unsigned) aco_opcode::s_or_saveexec_b64, 216 s_xnor = (unsigned) aco_opcode::s_xnor_b64, 217 s_xor = (unsigned) aco_opcode::s_xor_b64, 218 s_bcnt1_i32 = (unsigned) aco_opcode::s_bcnt1_i32_b64, 219 s_bitcmp1 = (unsigned) aco_opcode::s_bitcmp1_b64, 220 s_ff1_i32 = (unsigned) aco_opcode::s_ff1_i32_b64, 221 s_flbit_i32 = (unsigned) aco_opcode::s_flbit_i32_b64, 222 s_lshl = (unsigned) aco_opcode::s_lshl_b64, 223 }; 224 225 Program *program; 226 bool use_iterator; 227 bool start; // only when use_iterator == false 228 RegClass lm; 229 230 std::vector<aco_ptr<Instruction>> *instructions; 231 std::vector<aco_ptr<Instruction>>::iterator it; 232 bool is_precise = false; 233 bool is_nuw = false; 234 235 Builder(Program *pgm) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(NULL) {} 236 Builder(Program *pgm, Block *block) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(&block->instructions) {} 237 Builder(Program *pgm, std::vector<aco_ptr<Instruction>> *instrs) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(instrs) {} 238 239 Builder precise() const { 240 Builder res = *this; 241 res.is_precise = true; 242 return res; 243 }; 244 245 Builder nuw() const { 246 Builder res = *this; 247 res.is_nuw = true; 248 return res; 249 } 250 251 void moveEnd(Block *block) { 252 instructions = &block->instructions; 253 } 254 255 void reset() { 256 use_iterator = false; 257 start = false; 258 instructions = NULL; 259 } 260 261 void reset(Block *block) { 262 use_iterator = false; 263 start = false; 264 instructions = &block->instructions; 265 } 266 267 void reset(std::vector<aco_ptr<Instruction>> *instrs) { 268 use_iterator = false; 269 start = false; 270 instructions = instrs; 271 } 272 273 void reset(std::vector<aco_ptr<Instruction>> *instrs, std::vector<aco_ptr<Instruction>>::iterator instr_it) { 274 use_iterator = true; 275 start = false; 276 instructions = instrs; 277 it = instr_it; 278 } 279 280 Result insert(aco_ptr<Instruction> instr) { 281 Instruction *instr_ptr = instr.get(); 282 if (instructions) { 283 if (use_iterator) { 284 it = instructions->emplace(it, std::move(instr)); 285 it = std::next(it); 286 } else if (!start) { 287 instructions->emplace_back(std::move(instr)); 288 } else { 289 instructions->emplace(instructions->begin(), std::move(instr)); 290 } 291 } 292 return Result(instr_ptr); 293 } 294 295 Result insert(Instruction* instr) { 296 if (instructions) { 297 if (use_iterator) { 298 it = instructions->emplace(it, aco_ptr<Instruction>(instr)); 299 it = std::next(it); 300 } else if (!start) { 301 instructions->emplace_back(aco_ptr<Instruction>(instr)); 302 } else { 303 instructions->emplace(instructions->begin(), aco_ptr<Instruction>(instr)); 304 } 305 } 306 return Result(instr); 307 } 308 309 Temp tmp(RegClass rc) { 310 return program->allocateTmp(rc); 311 } 312 313 Temp tmp(RegType type, unsigned size) { 314 return tmp(RegClass(type, size)); 315 } 316 317 Definition def(RegClass rc) { 318 return Definition(program->allocateTmp(rc)); 319 } 320 321 Definition def(RegType type, unsigned size) { 322 return def(RegClass(type, size)); 323 } 324 325 Definition def(RegClass rc, PhysReg reg) { 326 return Definition(program->allocateId(rc), reg, rc); 327 } 328 329 inline aco_opcode w64or32(WaveSpecificOpcode opcode) const { 330 if (program->wave_size == 64) 331 return (aco_opcode) opcode; 332 333 switch (opcode) { 334 case s_cselect: 335 return aco_opcode::s_cselect_b32; 336 case s_cmp_lg: 337 return aco_opcode::s_cmp_lg_u32; 338 case s_and: 339 return aco_opcode::s_and_b32; 340 case s_andn2: 341 return aco_opcode::s_andn2_b32; 342 case s_or: 343 return aco_opcode::s_or_b32; 344 case s_orn2: 345 return aco_opcode::s_orn2_b32; 346 case s_not: 347 return aco_opcode::s_not_b32; 348 case s_mov: 349 return aco_opcode::s_mov_b32; 350 case s_wqm: 351 return aco_opcode::s_wqm_b32; 352 case s_and_saveexec: 353 return aco_opcode::s_and_saveexec_b32; 354 case s_or_saveexec: 355 return aco_opcode::s_or_saveexec_b32; 356 case s_xnor: 357 return aco_opcode::s_xnor_b32; 358 case s_xor: 359 return aco_opcode::s_xor_b32; 360 case s_bcnt1_i32: 361 return aco_opcode::s_bcnt1_i32_b32; 362 case s_bitcmp1: 363 return aco_opcode::s_bitcmp1_b32; 364 case s_ff1_i32: 365 return aco_opcode::s_ff1_i32_b32; 366 case s_flbit_i32: 367 return aco_opcode::s_flbit_i32_b32; 368 case s_lshl: 369 return aco_opcode::s_lshl_b32; 370 default: 371 unreachable("Unsupported wave specific opcode."); 372 } 373 } 374 375% for fixed in ['m0', 'vcc', 'exec', 'scc']: 376 Operand ${fixed}(Temp tmp) { 377 % if fixed == 'vcc' or fixed == 'exec': 378 //vcc_hi and exec_hi can still be used in wave32 379 assert(tmp.type() == RegType::sgpr && tmp.bytes() <= 8); 380 % endif 381 Operand op(tmp); 382 op.setFixed(aco::${fixed}); 383 return op; 384 } 385 386 Definition ${fixed}(Definition def) { 387 % if fixed == 'vcc' or fixed == 'exec': 388 //vcc_hi and exec_hi can still be used in wave32 389 assert(def.regClass().type() == RegType::sgpr && def.bytes() <= 8); 390 % endif 391 def.setFixed(aco::${fixed}); 392 return def; 393 } 394 395% endfor 396 397 Operand set16bit(Operand op) { 398 op.set16bit(true); 399 return op; 400 } 401 402 Operand set24bit(Operand op) { 403 op.set24bit(true); 404 return op; 405 } 406 407 /* hand-written helpers */ 408 Temp as_uniform(Op op) 409 { 410 assert(op.op.isTemp()); 411 if (op.op.getTemp().type() == RegType::vgpr) 412 return pseudo(aco_opcode::p_as_uniform, def(RegType::sgpr, op.op.size()), op); 413 else 414 return op.op.getTemp(); 415 } 416 417 Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool bits24=false) 418 { 419 assert(tmp.type() == RegType::vgpr); 420 bool has_lshl_add = program->gfx_level >= GFX9; 421 /* v_mul_lo_u32 has 1.6x the latency of most VALU on GFX10 (8 vs 5 cycles), 422 * compared to 4x the latency on <GFX10. */ 423 unsigned mul_cost = program->gfx_level >= GFX10 ? 1 : (4 + Operand::c32(imm).isLiteral()); 424 if (imm == 0) { 425 return copy(dst, Operand::zero()); 426 } else if (imm == 1) { 427 return copy(dst, Operand(tmp)); 428 } else if (util_is_power_of_two_or_zero(imm)) { 429 return vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(ffs(imm) - 1u), tmp); 430 } else if (bits24) { 431 return vop2(aco_opcode::v_mul_u32_u24, dst, Operand::c32(imm), tmp); 432 } else if (util_is_power_of_two_nonzero(imm - 1u)) { 433 return vadd32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm - 1u) - 1u), tmp), tmp); 434 } else if (mul_cost > 2 && util_is_power_of_two_nonzero(imm + 1u)) { 435 return vsub32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm + 1u) - 1u), tmp), tmp); 436 } 437 438 unsigned instrs_required = util_bitcount(imm); 439 if (!has_lshl_add) { 440 instrs_required = util_bitcount(imm) - (imm & 0x1); /* shifts */ 441 instrs_required += util_bitcount(imm) - 1; /* additions */ 442 } 443 if (instrs_required < mul_cost) { 444 Result res(NULL); 445 Temp cur; 446 while (imm) { 447 unsigned shift = u_bit_scan(&imm); 448 Definition tmp_dst = imm ? def(v1) : dst; 449 450 if (shift && cur.id()) 451 res = vadd32(Definition(tmp_dst), vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(shift), tmp), cur); 452 else if (shift) 453 res = vop2(aco_opcode::v_lshlrev_b32, Definition(tmp_dst), Operand::c32(shift), tmp); 454 else if (cur.id()) 455 res = vadd32(Definition(tmp_dst), tmp, cur); 456 else 457 tmp_dst = Definition(tmp); 458 459 cur = tmp_dst.getTemp(); 460 } 461 return res; 462 } 463 464 Temp imm_tmp = copy(def(s1), Operand::c32(imm)); 465 return vop3(aco_opcode::v_mul_lo_u32, dst, imm_tmp, tmp); 466 } 467 468 Result v_mul24_imm(Definition dst, Temp tmp, uint32_t imm) 469 { 470 return v_mul_imm(dst, tmp, imm, true); 471 } 472 473 Result copy(Definition dst, Op op) { 474 return pseudo(aco_opcode::p_parallelcopy, dst, op); 475 } 476 477 Result vadd32(Definition dst, Op a, Op b, bool carry_out=false, Op carry_in=Op(Operand(s2)), bool post_ra=false) { 478 if (b.op.isConstant() || b.op.regClass().type() != RegType::vgpr) 479 std::swap(a, b); 480 if (!post_ra && (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr)) 481 b = copy(def(v1), b); 482 483 if (!carry_in.op.isUndefined()) 484 return vop2(aco_opcode::v_addc_co_u32, Definition(dst), def(lm), a, b, carry_in); 485 else if (program->gfx_level >= GFX10 && carry_out) 486 return vop3(aco_opcode::v_add_co_u32_e64, Definition(dst), def(lm), a, b); 487 else if (program->gfx_level < GFX9 || carry_out) 488 return vop2(aco_opcode::v_add_co_u32, Definition(dst), def(lm), a, b); 489 else 490 return vop2(aco_opcode::v_add_u32, Definition(dst), a, b); 491 } 492 493 Result vsub32(Definition dst, Op a, Op b, bool carry_out=false, Op borrow=Op(Operand(s2))) 494 { 495 if (!borrow.op.isUndefined() || program->gfx_level < GFX9) 496 carry_out = true; 497 498 bool reverse = !b.op.isTemp() || b.op.regClass().type() != RegType::vgpr; 499 if (reverse) 500 std::swap(a, b); 501 if (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr) 502 b = copy(def(v1), b); 503 504 aco_opcode op; 505 Temp carry; 506 if (carry_out) { 507 carry = tmp(lm); 508 if (borrow.op.isUndefined()) 509 op = reverse ? aco_opcode::v_subrev_co_u32 : aco_opcode::v_sub_co_u32; 510 else 511 op = reverse ? aco_opcode::v_subbrev_co_u32 : aco_opcode::v_subb_co_u32; 512 } else { 513 op = reverse ? aco_opcode::v_subrev_u32 : aco_opcode::v_sub_u32; 514 } 515 bool vop3 = false; 516 if (program->gfx_level >= GFX10 && op == aco_opcode::v_subrev_co_u32) { 517 vop3 = true; 518 op = aco_opcode::v_subrev_co_u32_e64; 519 } else if (program->gfx_level >= GFX10 && op == aco_opcode::v_sub_co_u32) { 520 vop3 = true; 521 op = aco_opcode::v_sub_co_u32_e64; 522 } 523 524 int num_ops = borrow.op.isUndefined() ? 2 : 3; 525 int num_defs = carry_out ? 2 : 1; 526 aco_ptr<Instruction> sub; 527 if (vop3) 528 sub.reset(create_instruction<VALU_instruction>(op, Format::VOP3, num_ops, num_defs)); 529 else 530 sub.reset(create_instruction<VALU_instruction>(op, Format::VOP2, num_ops, num_defs)); 531 sub->operands[0] = a.op; 532 sub->operands[1] = b.op; 533 if (!borrow.op.isUndefined()) 534 sub->operands[2] = borrow.op; 535 sub->definitions[0] = dst; 536 if (carry_out) 537 sub->definitions[1] = Definition(carry); 538 539 return insert(std::move(sub)); 540 } 541 542 Result readlane(Definition dst, Op vsrc, Op lane) 543 { 544 if (program->gfx_level >= GFX8) 545 return vop3(aco_opcode::v_readlane_b32_e64, dst, vsrc, lane); 546 else 547 return vop2(aco_opcode::v_readlane_b32, dst, vsrc, lane); 548 } 549 Result writelane(Definition dst, Op val, Op lane, Op vsrc) { 550 if (program->gfx_level >= GFX8) 551 return vop3(aco_opcode::v_writelane_b32_e64, dst, val, lane, vsrc); 552 else 553 return vop2(aco_opcode::v_writelane_b32, dst, val, lane, vsrc); 554 } 555<% 556import itertools 557formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8), (2, 6), (3, 6), (1, 6)]), 558 ("sop1", [Format.SOP1], 'SOP1_instruction', [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]), 559 ("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])), 560 ("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])), 561 ("sopp", [Format.SOPP], 'SOPP_instruction', itertools.product([0, 1], [0, 1])), 562 ("sopc", [Format.SOPC], 'SOPC_instruction', [(1, 2)]), 563 ("smem", [Format.SMEM], 'SMEM_instruction', [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (1, 1), (0, 0)]), 564 ("ds", [Format.DS], 'DS_instruction', [(1, 1), (1, 2), (1, 3), (0, 3), (0, 4)]), 565 ("ldsdir", [Format.LDSDIR], 'LDSDIR_instruction', [(1, 1)]), 566 ("mubuf", [Format.MUBUF], 'MUBUF_instruction', [(0, 4), (1, 3)]), 567 ("mtbuf", [Format.MTBUF], 'MTBUF_instruction', [(0, 4), (1, 3)]), 568 ("mimg", [Format.MIMG], 'MIMG_instruction', itertools.product([0, 1], [3, 4, 5, 6, 7])), 569 ("exp", [Format.EXP], 'Export_instruction', [(0, 4), (0, 5)]), 570 ("branch", [Format.PSEUDO_BRANCH], 'Pseudo_branch_instruction', itertools.product([1], [0, 1])), 571 ("barrier", [Format.PSEUDO_BARRIER], 'Pseudo_barrier_instruction', [(0, 0)]), 572 ("reduction", [Format.PSEUDO_REDUCTION], 'Pseudo_reduction_instruction', [(3, 3)]), 573 ("vop1", [Format.VOP1], 'VALU_instruction', [(0, 0), (1, 1), (2, 2)]), 574 ("vop1_sdwa", [Format.VOP1, Format.SDWA], 'SDWA_instruction', [(1, 1)]), 575 ("vop2", [Format.VOP2], 'VALU_instruction', itertools.product([1, 2], [2, 3])), 576 ("vop2_sdwa", [Format.VOP2, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2, 3])), 577 ("vopc", [Format.VOPC], 'VALU_instruction', itertools.product([1, 2], [2])), 578 ("vopc_sdwa", [Format.VOPC, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2])), 579 ("vop3", [Format.VOP3], 'VALU_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]), 580 ("vop3p", [Format.VOP3P], 'VALU_instruction', [(1, 2), (1, 3)]), 581 ("vopd", [Format.VOPD], 'VOPD_instruction', [(2, 2), (2, 3), (2, 4), (2, 5), (2, 6)]), 582 ("vinterp_inreg", [Format.VINTERP_INREG], 'VINTERP_inreg_instruction', [(1, 3)]), 583 ("vintrp", [Format.VINTRP], 'VINTRP_instruction', [(1, 2), (1, 3)]), 584 ("vop1_dpp", [Format.VOP1, Format.DPP16], 'DPP16_instruction', [(1, 1)]), 585 ("vop2_dpp", [Format.VOP2, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2, 3])), 586 ("vopc_dpp", [Format.VOPC, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2])), 587 ("vop3_dpp", [Format.VOP3, Format.DPP16], 'DPP16_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]), 588 ("vop3p_dpp", [Format.VOP3P, Format.DPP16], 'DPP16_instruction', [(1, 2), (1, 3)]), 589 ("vop1_dpp8", [Format.VOP1, Format.DPP8], 'DPP8_instruction', [(1, 1)]), 590 ("vop2_dpp8", [Format.VOP2, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2, 3])), 591 ("vopc_dpp8", [Format.VOPC, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2])), 592 ("vop3_dpp8", [Format.VOP3, Format.DPP8], 'DPP8_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]), 593 ("vop3p_dpp8", [Format.VOP3P, Format.DPP8], 'DPP8_instruction', [(1, 2), (1, 3)]), 594 ("vop1_e64", [Format.VOP1, Format.VOP3], 'VALU_instruction', itertools.product([1], [1])), 595 ("vop2_e64", [Format.VOP2, Format.VOP3], 'VALU_instruction', itertools.product([1, 2], [2, 3])), 596 ("vopc_e64", [Format.VOPC, Format.VOP3], 'VALU_instruction', itertools.product([1, 2], [2])), 597 ("vop1_e64_dpp", [Format.VOP1, Format.VOP3, Format.DPP16], 'DPP16_instruction', itertools.product([1], [1])), 598 ("vop2_e64_dpp", [Format.VOP2, Format.VOP3, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2, 3])), 599 ("vopc_e64_dpp", [Format.VOPC, Format.VOP3, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2])), 600 ("vop1_e64_dpp8", [Format.VOP1, Format.VOP3, Format.DPP8], 'DPP8_instruction', itertools.product([1], [1])), 601 ("vop2_e64_dpp8", [Format.VOP2, Format.VOP3, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2, 3])), 602 ("vopc_e64_dpp8", [Format.VOPC, Format.VOP3, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2])), 603 ("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]), 604 ("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)]), 605 ("scratch", [Format.SCRATCH], 'FLAT_instruction', [(0, 3), (1, 2)])] 606formats = [(f if len(f) == 5 else f + ('',)) for f in formats] 607%>\\ 608% for name, formats, struct, shapes, extra_field_setup in formats: 609 % for num_definitions, num_operands in shapes: 610 <% 611 args = ['aco_opcode opcode'] 612 for i in range(num_definitions): 613 args.append('Definition def%d' % i) 614 for i in range(num_operands): 615 args.append('Op op%d' % i) 616 for f in formats: 617 args += f.get_builder_field_decls() 618 %>\\ 619 620 Result ${name}(${', '.join(args)}) 621 { 622 ${struct} *instr = create_instruction<${struct}>(opcode, (Format)(${'|'.join('(int)Format::%s' % f.name for f in formats)}), ${num_operands}, ${num_definitions}); 623 % for i in range(num_definitions): 624 instr->definitions[${i}] = def${i}; 625 instr->definitions[${i}].setPrecise(is_precise); 626 instr->definitions[${i}].setNUW(is_nuw); 627 % endfor 628 % for i in range(num_operands): 629 instr->operands[${i}] = op${i}.op; 630 % endfor 631 % for f in formats: 632 % for dest, field_name in zip(f.get_builder_field_dests(), f.get_builder_field_names()): 633 instr->${dest} = ${field_name}; 634 % endfor 635 ${f.get_builder_initialization(num_operands)} 636 % endfor 637 ${extra_field_setup} 638 return insert(instr); 639 } 640 641 % if name == 'sop1' or name == 'sop2' or name == 'sopc': 642 <% 643 args[0] = 'WaveSpecificOpcode opcode' 644 params = [] 645 for i in range(num_definitions): 646 params.append('def%d' % i) 647 for i in range(num_operands): 648 params.append('op%d' % i) 649 %>\\ 650 651 inline Result ${name}(${', '.join(args)}) 652 { 653 return ${name}(w64or32(opcode), ${', '.join(params)}); 654 } 655 656 % endif 657 % endfor 658% endfor 659}; 660 661void hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset); 662 663} // namespace aco 664 665#endif /* _ACO_BUILDER_ */""" 666 667from aco_opcodes import opcodes, Format 668from mako.template import Template 669 670print(Template(template).render(opcodes=opcodes, Format=Format)) 671