• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1
2template = """\
3/*
4 * Copyright (c) 2019 Valve Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 *
25 * This file was generated by aco_builder_h.py
26 */
27
28#ifndef _ACO_BUILDER_
29#define _ACO_BUILDER_
30
31#include "aco_ir.h"
32
33namespace aco {
34enum dpp_ctrl {
35    _dpp_quad_perm = 0x000,
36    _dpp_row_sl = 0x100,
37    _dpp_row_sr = 0x110,
38    _dpp_row_rr = 0x120,
39    dpp_wf_sl1 = 0x130,
40    dpp_wf_rl1 = 0x134,
41    dpp_wf_sr1 = 0x138,
42    dpp_wf_rr1 = 0x13C,
43    dpp_row_mirror = 0x140,
44    dpp_row_half_mirror = 0x141,
45    dpp_row_bcast15 = 0x142,
46    dpp_row_bcast31 = 0x143,
47    _dpp_row_share = 0x150,
48    _dpp_row_xmask = 0x160,
49};
50
51inline dpp_ctrl
52dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
53{
54    assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
55    return (dpp_ctrl)(lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6));
56}
57
58inline dpp_ctrl
59dpp_row_sl(unsigned amount)
60{
61    assert(amount > 0 && amount < 16);
62    return (dpp_ctrl)(((unsigned) _dpp_row_sl) | amount);
63}
64
65inline dpp_ctrl
66dpp_row_sr(unsigned amount)
67{
68    assert(amount > 0 && amount < 16);
69    return (dpp_ctrl)(((unsigned) _dpp_row_sr) | amount);
70}
71
72inline dpp_ctrl
73dpp_row_rr(unsigned amount)
74{
75    assert(amount > 0 && amount < 16);
76    return (dpp_ctrl)(((unsigned) _dpp_row_rr) | amount);
77}
78
79inline dpp_ctrl
80dpp_row_share(unsigned lane)
81{
82    assert(lane < 16);
83    return (dpp_ctrl)(((unsigned) _dpp_row_share) | lane);
84}
85
86inline dpp_ctrl
87dpp_row_xmask(unsigned mask)
88{
89    assert(mask < 16);
90    return (dpp_ctrl)(((unsigned) _dpp_row_xmask) | mask);
91}
92
93inline unsigned
94ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
95{
96    assert(and_mask < 32 && or_mask < 32 && xor_mask < 32);
97    return and_mask | (or_mask << 5) | (xor_mask << 10);
98}
99
100inline unsigned
101ds_pattern_rotate(unsigned delta, unsigned mask)
102{
103    assert(delta < 32 && mask < 32);
104    return mask | (delta << 5) | 0xc000;
105}
106
107aco_ptr<Instruction> create_s_mov(Definition dst, Operand src);
108
109enum sendmsg {
110   sendmsg_none = 0,
111   sendmsg_gs = 2, /* gfx6 to gfx10.3 */
112   sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */
113   sendmsg_hs_tessfactor = 2, /* gfx11+ */
114   sendmsg_dealloc_vgprs = 3, /* gfx11+ */
115   sendmsg_save_wave = 4, /* gfx8 to gfx10.3 */
116   sendmsg_stall_wave_gen = 5, /* gfx9+ */
117   sendmsg_halt_waves = 6, /* gfx9+ */
118   sendmsg_ordered_ps_done = 7, /* gfx9+ */
119   sendmsg_early_prim_dealloc = 8, /* gfx9 to gfx10 */
120   sendmsg_gs_alloc_req = 9, /* gfx9+ */
121   sendmsg_get_doorbell = 10, /* gfx9 to gfx10.3 */
122   sendmsg_get_ddid = 11, /* gfx10 to gfx10.3 */
123   sendmsg_id_mask = 0xf,
124};
125
126/* gfx11+ */
127enum sendmsg_rtn {
128   sendmsg_rtn_get_doorbell = 0,
129   sendmsg_rtn_get_ddid = 1,
130   sendmsg_rtn_get_tma = 2,
131   sendmsg_rtn_get_realtime = 3,
132   sendmsg_rtn_save_wave = 4,
133   sendmsg_rtn_get_tba = 5,
134   sendmsg_rtn_mask = 0xff,
135};
136
137enum bperm_swiz {
138   bperm_b1_sign = 8,
139   bperm_b3_sign = 9,
140   bperm_b5_sign = 10,
141   bperm_b7_sign = 11,
142   bperm_0 = 12,
143   bperm_255 = 13,
144};
145
146enum class alu_delay_wait {
147   NO_DEP = 0,
148   VALU_DEP_1 = 1,
149   VALU_DEP_2 = 2,
150   VALU_DEP_3 = 3,
151   VALU_DEP_4 = 4,
152   TRANS32_DEP_1 = 5,
153   TRANS32_DEP_2 = 6,
154   TRANS32_DEP_3 = 7,
155   FMA_ACCUM_CYCLE_1 = 8,
156   SALU_CYCLE_1 = 9,
157   SALU_CYCLE_2 = 10,
158   SALU_CYCLE_3 = 11,
159};
160
161class Builder {
162public:
163   struct Result {
164      Instruction *instr;
165
166      Result(Instruction *instr_) : instr(instr_) {}
167
168      operator Instruction *() const {
169         return instr;
170      }
171
172      operator Temp() const {
173         return instr->definitions[0].getTemp();
174      }
175
176      operator Operand() const {
177         return Operand((Temp)*this);
178      }
179
180      Definition& def(unsigned index) const {
181         return instr->definitions[index];
182      }
183
184      aco_ptr<Instruction> get_ptr() const {
185        return aco_ptr<Instruction>(instr);
186      }
187
188      Instruction * operator * () const {
189         return instr;
190      }
191
192      Instruction * operator -> () const {
193         return instr;
194      }
195   };
196
197   struct Op {
198      Operand op;
199      Op(Temp tmp) : op(tmp) {}
200      Op(Operand op_) : op(op_) {}
201      Op(Result res) : op((Temp)res) {}
202   };
203
204   enum WaveSpecificOpcode {
205      s_cselect = (unsigned) aco_opcode::s_cselect_b64,
206      s_cmp_lg = (unsigned) aco_opcode::s_cmp_lg_u64,
207      s_and = (unsigned) aco_opcode::s_and_b64,
208      s_andn2 = (unsigned) aco_opcode::s_andn2_b64,
209      s_or = (unsigned) aco_opcode::s_or_b64,
210      s_orn2 = (unsigned) aco_opcode::s_orn2_b64,
211      s_not = (unsigned) aco_opcode::s_not_b64,
212      s_mov = (unsigned) aco_opcode::s_mov_b64,
213      s_wqm = (unsigned) aco_opcode::s_wqm_b64,
214      s_and_saveexec = (unsigned) aco_opcode::s_and_saveexec_b64,
215      s_or_saveexec = (unsigned) aco_opcode::s_or_saveexec_b64,
216      s_xnor = (unsigned) aco_opcode::s_xnor_b64,
217      s_xor = (unsigned) aco_opcode::s_xor_b64,
218      s_bcnt1_i32 = (unsigned) aco_opcode::s_bcnt1_i32_b64,
219      s_bitcmp1 = (unsigned) aco_opcode::s_bitcmp1_b64,
220      s_ff1_i32 = (unsigned) aco_opcode::s_ff1_i32_b64,
221      s_flbit_i32 = (unsigned) aco_opcode::s_flbit_i32_b64,
222      s_lshl = (unsigned) aco_opcode::s_lshl_b64,
223   };
224
225   Program *program;
226   bool use_iterator;
227   bool start; // only when use_iterator == false
228   RegClass lm;
229
230   std::vector<aco_ptr<Instruction>> *instructions;
231   std::vector<aco_ptr<Instruction>>::iterator it;
232   bool is_precise = false;
233   bool is_nuw = false;
234
235   Builder(Program *pgm) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(NULL) {}
236   Builder(Program *pgm, Block *block) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(&block->instructions) {}
237   Builder(Program *pgm, std::vector<aco_ptr<Instruction>> *instrs) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(instrs) {}
238
239   Builder precise() const {
240      Builder res = *this;
241      res.is_precise = true;
242      return res;
243   };
244
245   Builder nuw() const {
246      Builder res = *this;
247      res.is_nuw = true;
248      return res;
249   }
250
251   void moveEnd(Block *block) {
252      instructions = &block->instructions;
253   }
254
255   void reset() {
256      use_iterator = false;
257      start = false;
258      instructions = NULL;
259   }
260
261   void reset(Block *block) {
262      use_iterator = false;
263      start = false;
264      instructions = &block->instructions;
265   }
266
267   void reset(std::vector<aco_ptr<Instruction>> *instrs) {
268      use_iterator = false;
269      start = false;
270      instructions = instrs;
271   }
272
273   void reset(std::vector<aco_ptr<Instruction>> *instrs, std::vector<aco_ptr<Instruction>>::iterator instr_it) {
274      use_iterator = true;
275      start = false;
276      instructions = instrs;
277      it = instr_it;
278   }
279
280   Result insert(aco_ptr<Instruction> instr) {
281      Instruction *instr_ptr = instr.get();
282      if (instructions) {
283         if (use_iterator) {
284            it = instructions->emplace(it, std::move(instr));
285            it = std::next(it);
286         } else if (!start) {
287            instructions->emplace_back(std::move(instr));
288         } else {
289            instructions->emplace(instructions->begin(), std::move(instr));
290         }
291      }
292      return Result(instr_ptr);
293   }
294
295   Result insert(Instruction* instr) {
296      if (instructions) {
297         if (use_iterator) {
298            it = instructions->emplace(it, aco_ptr<Instruction>(instr));
299            it = std::next(it);
300         } else if (!start) {
301            instructions->emplace_back(aco_ptr<Instruction>(instr));
302         } else {
303            instructions->emplace(instructions->begin(), aco_ptr<Instruction>(instr));
304         }
305      }
306      return Result(instr);
307   }
308
309   Temp tmp(RegClass rc) {
310      return program->allocateTmp(rc);
311   }
312
313   Temp tmp(RegType type, unsigned size) {
314      return tmp(RegClass(type, size));
315   }
316
317   Definition def(RegClass rc) {
318      return Definition(program->allocateTmp(rc));
319   }
320
321   Definition def(RegType type, unsigned size) {
322      return def(RegClass(type, size));
323   }
324
325   Definition def(RegClass rc, PhysReg reg) {
326      return Definition(program->allocateId(rc), reg, rc);
327   }
328
329   inline aco_opcode w64or32(WaveSpecificOpcode opcode) const {
330      if (program->wave_size == 64)
331         return (aco_opcode) opcode;
332
333      switch (opcode) {
334      case s_cselect:
335         return aco_opcode::s_cselect_b32;
336      case s_cmp_lg:
337         return aco_opcode::s_cmp_lg_u32;
338      case s_and:
339         return aco_opcode::s_and_b32;
340      case s_andn2:
341         return aco_opcode::s_andn2_b32;
342      case s_or:
343         return aco_opcode::s_or_b32;
344      case s_orn2:
345         return aco_opcode::s_orn2_b32;
346      case s_not:
347         return aco_opcode::s_not_b32;
348      case s_mov:
349         return aco_opcode::s_mov_b32;
350      case s_wqm:
351         return aco_opcode::s_wqm_b32;
352      case s_and_saveexec:
353         return aco_opcode::s_and_saveexec_b32;
354      case s_or_saveexec:
355         return aco_opcode::s_or_saveexec_b32;
356      case s_xnor:
357         return aco_opcode::s_xnor_b32;
358      case s_xor:
359         return aco_opcode::s_xor_b32;
360      case s_bcnt1_i32:
361         return aco_opcode::s_bcnt1_i32_b32;
362      case s_bitcmp1:
363         return aco_opcode::s_bitcmp1_b32;
364      case s_ff1_i32:
365         return aco_opcode::s_ff1_i32_b32;
366      case s_flbit_i32:
367         return aco_opcode::s_flbit_i32_b32;
368      case s_lshl:
369         return aco_opcode::s_lshl_b32;
370      default:
371         unreachable("Unsupported wave specific opcode.");
372      }
373   }
374
375% for fixed in ['m0', 'vcc', 'exec', 'scc']:
376   Operand ${fixed}(Temp tmp) {
377       % if fixed == 'vcc' or fixed == 'exec':
378          //vcc_hi and exec_hi can still be used in wave32
379          assert(tmp.type() == RegType::sgpr && tmp.bytes() <= 8);
380       % endif
381       Operand op(tmp);
382       op.setFixed(aco::${fixed});
383       return op;
384   }
385
386   Definition ${fixed}(Definition def) {
387       % if fixed == 'vcc' or fixed == 'exec':
388          //vcc_hi and exec_hi can still be used in wave32
389          assert(def.regClass().type() == RegType::sgpr && def.bytes() <= 8);
390       % endif
391       def.setFixed(aco::${fixed});
392       return def;
393   }
394
395% endfor
396
397   Operand set16bit(Operand op) {
398       op.set16bit(true);
399       return op;
400   }
401
402   Operand set24bit(Operand op) {
403       op.set24bit(true);
404       return op;
405   }
406
407   /* hand-written helpers */
408   Temp as_uniform(Op op)
409   {
410      assert(op.op.isTemp());
411      if (op.op.getTemp().type() == RegType::vgpr)
412         return pseudo(aco_opcode::p_as_uniform, def(RegType::sgpr, op.op.size()), op);
413      else
414         return op.op.getTemp();
415   }
416
417   Result v_mul_imm(Definition dst, Temp tmp, uint32_t imm, bool bits24=false)
418   {
419      assert(tmp.type() == RegType::vgpr);
420      bool has_lshl_add = program->gfx_level >= GFX9;
421      /* v_mul_lo_u32 has 1.6x the latency of most VALU on GFX10 (8 vs 5 cycles),
422       * compared to 4x the latency on <GFX10. */
423      unsigned mul_cost = program->gfx_level >= GFX10 ? 1 : (4 + Operand::c32(imm).isLiteral());
424      if (imm == 0) {
425         return copy(dst, Operand::zero());
426      } else if (imm == 1) {
427         return copy(dst, Operand(tmp));
428      } else if (util_is_power_of_two_or_zero(imm)) {
429         return vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(ffs(imm) - 1u), tmp);
430      } else if (bits24) {
431        return vop2(aco_opcode::v_mul_u32_u24, dst, Operand::c32(imm), tmp);
432      } else if (util_is_power_of_two_nonzero(imm - 1u)) {
433         return vadd32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm - 1u) - 1u), tmp), tmp);
434      } else if (mul_cost > 2 && util_is_power_of_two_nonzero(imm + 1u)) {
435         return vsub32(dst, vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(ffs(imm + 1u) - 1u), tmp), tmp);
436      }
437
438      unsigned instrs_required = util_bitcount(imm);
439      if (!has_lshl_add) {
440         instrs_required = util_bitcount(imm) - (imm & 0x1); /* shifts */
441         instrs_required += util_bitcount(imm) - 1; /* additions */
442      }
443      if (instrs_required < mul_cost) {
444         Result res(NULL);
445         Temp cur;
446         while (imm) {
447            unsigned shift = u_bit_scan(&imm);
448            Definition tmp_dst = imm ? def(v1) : dst;
449
450            if (shift && cur.id())
451               res = vadd32(Definition(tmp_dst), vop2(aco_opcode::v_lshlrev_b32, def(v1), Operand::c32(shift), tmp), cur);
452            else if (shift)
453               res = vop2(aco_opcode::v_lshlrev_b32, Definition(tmp_dst), Operand::c32(shift), tmp);
454            else if (cur.id())
455               res = vadd32(Definition(tmp_dst), tmp, cur);
456            else
457               tmp_dst = Definition(tmp);
458
459            cur = tmp_dst.getTemp();
460         }
461         return res;
462      }
463
464      Temp imm_tmp = copy(def(s1), Operand::c32(imm));
465      return vop3(aco_opcode::v_mul_lo_u32, dst, imm_tmp, tmp);
466   }
467
468   Result v_mul24_imm(Definition dst, Temp tmp, uint32_t imm)
469   {
470      return v_mul_imm(dst, tmp, imm, true);
471   }
472
473   Result copy(Definition dst, Op op) {
474      return pseudo(aco_opcode::p_parallelcopy, dst, op);
475   }
476
477   Result vadd32(Definition dst, Op a, Op b, bool carry_out=false, Op carry_in=Op(Operand(s2)), bool post_ra=false) {
478      if (b.op.isConstant() || b.op.regClass().type() != RegType::vgpr)
479         std::swap(a, b);
480      if (!post_ra && (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr))
481         b = copy(def(v1), b);
482
483      if (!carry_in.op.isUndefined())
484         return vop2(aco_opcode::v_addc_co_u32, Definition(dst), def(lm), a, b, carry_in);
485      else if (program->gfx_level >= GFX10 && carry_out)
486         return vop3(aco_opcode::v_add_co_u32_e64, Definition(dst), def(lm), a, b);
487      else if (program->gfx_level < GFX9 || carry_out)
488         return vop2(aco_opcode::v_add_co_u32, Definition(dst), def(lm), a, b);
489      else
490         return vop2(aco_opcode::v_add_u32, Definition(dst), a, b);
491   }
492
493   Result vsub32(Definition dst, Op a, Op b, bool carry_out=false, Op borrow=Op(Operand(s2)))
494   {
495      if (!borrow.op.isUndefined() || program->gfx_level < GFX9)
496         carry_out = true;
497
498      bool reverse = !b.op.isTemp() || b.op.regClass().type() != RegType::vgpr;
499      if (reverse)
500         std::swap(a, b);
501      if (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr)
502         b = copy(def(v1), b);
503
504      aco_opcode op;
505      Temp carry;
506      if (carry_out) {
507         carry = tmp(lm);
508         if (borrow.op.isUndefined())
509            op = reverse ? aco_opcode::v_subrev_co_u32 : aco_opcode::v_sub_co_u32;
510         else
511            op = reverse ? aco_opcode::v_subbrev_co_u32 : aco_opcode::v_subb_co_u32;
512      } else {
513         op = reverse ? aco_opcode::v_subrev_u32 : aco_opcode::v_sub_u32;
514      }
515      bool vop3 = false;
516      if (program->gfx_level >= GFX10 && op == aco_opcode::v_subrev_co_u32) {
517        vop3 = true;
518        op = aco_opcode::v_subrev_co_u32_e64;
519      } else if (program->gfx_level >= GFX10 && op == aco_opcode::v_sub_co_u32) {
520        vop3 = true;
521        op = aco_opcode::v_sub_co_u32_e64;
522      }
523
524      int num_ops = borrow.op.isUndefined() ? 2 : 3;
525      int num_defs = carry_out ? 2 : 1;
526      aco_ptr<Instruction> sub;
527      if (vop3)
528        sub.reset(create_instruction<VALU_instruction>(op, Format::VOP3, num_ops, num_defs));
529      else
530        sub.reset(create_instruction<VALU_instruction>(op, Format::VOP2, num_ops, num_defs));
531      sub->operands[0] = a.op;
532      sub->operands[1] = b.op;
533      if (!borrow.op.isUndefined())
534         sub->operands[2] = borrow.op;
535      sub->definitions[0] = dst;
536      if (carry_out)
537         sub->definitions[1] = Definition(carry);
538
539      return insert(std::move(sub));
540   }
541
542   Result readlane(Definition dst, Op vsrc, Op lane)
543   {
544      if (program->gfx_level >= GFX8)
545         return vop3(aco_opcode::v_readlane_b32_e64, dst, vsrc, lane);
546      else
547         return vop2(aco_opcode::v_readlane_b32, dst, vsrc, lane);
548   }
549   Result writelane(Definition dst, Op val, Op lane, Op vsrc) {
550      if (program->gfx_level >= GFX8)
551         return vop3(aco_opcode::v_writelane_b32_e64, dst, val, lane, vsrc);
552      else
553         return vop2(aco_opcode::v_writelane_b32, dst, val, lane, vsrc);
554   }
555<%
556import itertools
557formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.product(range(5), range(6))) + [(8, 1), (1, 8), (2, 6), (3, 6), (1, 6)]),
558           ("sop1", [Format.SOP1], 'SOP1_instruction', [(0, 1), (1, 0), (1, 1), (2, 1), (3, 2)]),
559           ("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])),
560           ("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])),
561           ("sopp", [Format.SOPP], 'SOPP_instruction', itertools.product([0, 1], [0, 1])),
562           ("sopc", [Format.SOPC], 'SOPC_instruction', [(1, 2)]),
563           ("smem", [Format.SMEM], 'SMEM_instruction', [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (1, 1), (0, 0)]),
564           ("ds", [Format.DS], 'DS_instruction', [(1, 1), (1, 2), (1, 3), (0, 3), (0, 4)]),
565           ("ldsdir", [Format.LDSDIR], 'LDSDIR_instruction', [(1, 1)]),
566           ("mubuf", [Format.MUBUF], 'MUBUF_instruction', [(0, 4), (1, 3)]),
567           ("mtbuf", [Format.MTBUF], 'MTBUF_instruction', [(0, 4), (1, 3)]),
568           ("mimg", [Format.MIMG], 'MIMG_instruction', itertools.product([0, 1], [3, 4, 5, 6, 7])),
569           ("exp", [Format.EXP], 'Export_instruction', [(0, 4), (0, 5)]),
570           ("branch", [Format.PSEUDO_BRANCH], 'Pseudo_branch_instruction', itertools.product([1], [0, 1])),
571           ("barrier", [Format.PSEUDO_BARRIER], 'Pseudo_barrier_instruction', [(0, 0)]),
572           ("reduction", [Format.PSEUDO_REDUCTION], 'Pseudo_reduction_instruction', [(3, 3)]),
573           ("vop1", [Format.VOP1], 'VALU_instruction', [(0, 0), (1, 1), (2, 2)]),
574           ("vop1_sdwa", [Format.VOP1, Format.SDWA], 'SDWA_instruction', [(1, 1)]),
575           ("vop2", [Format.VOP2], 'VALU_instruction', itertools.product([1, 2], [2, 3])),
576           ("vop2_sdwa", [Format.VOP2, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2, 3])),
577           ("vopc", [Format.VOPC], 'VALU_instruction', itertools.product([1, 2], [2])),
578           ("vopc_sdwa", [Format.VOPC, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2])),
579           ("vop3", [Format.VOP3], 'VALU_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
580           ("vop3p", [Format.VOP3P], 'VALU_instruction', [(1, 2), (1, 3)]),
581           ("vopd", [Format.VOPD], 'VOPD_instruction', [(2, 2), (2, 3), (2, 4), (2, 5), (2, 6)]),
582           ("vinterp_inreg", [Format.VINTERP_INREG], 'VINTERP_inreg_instruction', [(1, 3)]),
583           ("vintrp", [Format.VINTRP], 'VINTRP_instruction', [(1, 2), (1, 3)]),
584           ("vop1_dpp", [Format.VOP1, Format.DPP16], 'DPP16_instruction', [(1, 1)]),
585           ("vop2_dpp", [Format.VOP2, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2, 3])),
586           ("vopc_dpp", [Format.VOPC, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2])),
587           ("vop3_dpp", [Format.VOP3, Format.DPP16], 'DPP16_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
588           ("vop3p_dpp", [Format.VOP3P, Format.DPP16], 'DPP16_instruction', [(1, 2), (1, 3)]),
589           ("vop1_dpp8", [Format.VOP1, Format.DPP8], 'DPP8_instruction', [(1, 1)]),
590           ("vop2_dpp8", [Format.VOP2, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2, 3])),
591           ("vopc_dpp8", [Format.VOPC, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2])),
592           ("vop3_dpp8", [Format.VOP3, Format.DPP8], 'DPP8_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
593           ("vop3p_dpp8", [Format.VOP3P, Format.DPP8], 'DPP8_instruction', [(1, 2), (1, 3)]),
594           ("vop1_e64", [Format.VOP1, Format.VOP3], 'VALU_instruction', itertools.product([1], [1])),
595           ("vop2_e64", [Format.VOP2, Format.VOP3], 'VALU_instruction', itertools.product([1, 2], [2, 3])),
596           ("vopc_e64", [Format.VOPC, Format.VOP3], 'VALU_instruction', itertools.product([1, 2], [2])),
597           ("vop1_e64_dpp", [Format.VOP1, Format.VOP3, Format.DPP16], 'DPP16_instruction', itertools.product([1], [1])),
598           ("vop2_e64_dpp", [Format.VOP2, Format.VOP3, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2, 3])),
599           ("vopc_e64_dpp", [Format.VOPC, Format.VOP3, Format.DPP16], 'DPP16_instruction', itertools.product([1, 2], [2])),
600           ("vop1_e64_dpp8", [Format.VOP1, Format.VOP3, Format.DPP8], 'DPP8_instruction', itertools.product([1], [1])),
601           ("vop2_e64_dpp8", [Format.VOP2, Format.VOP3, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2, 3])),
602           ("vopc_e64_dpp8", [Format.VOPC, Format.VOP3, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2])),
603           ("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]),
604           ("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)]),
605           ("scratch", [Format.SCRATCH], 'FLAT_instruction', [(0, 3), (1, 2)])]
606formats = [(f if len(f) == 5 else f + ('',)) for f in formats]
607%>\\
608% for name, formats, struct, shapes, extra_field_setup in formats:
609    % for num_definitions, num_operands in shapes:
610        <%
611        args = ['aco_opcode opcode']
612        for i in range(num_definitions):
613            args.append('Definition def%d' % i)
614        for i in range(num_operands):
615            args.append('Op op%d' % i)
616        for f in formats:
617            args += f.get_builder_field_decls()
618        %>\\
619
620   Result ${name}(${', '.join(args)})
621   {
622      ${struct} *instr = create_instruction<${struct}>(opcode, (Format)(${'|'.join('(int)Format::%s' % f.name for f in formats)}), ${num_operands}, ${num_definitions});
623        % for i in range(num_definitions):
624            instr->definitions[${i}] = def${i};
625            instr->definitions[${i}].setPrecise(is_precise);
626            instr->definitions[${i}].setNUW(is_nuw);
627        % endfor
628        % for i in range(num_operands):
629            instr->operands[${i}] = op${i}.op;
630        % endfor
631        % for f in formats:
632            % for dest, field_name in zip(f.get_builder_field_dests(), f.get_builder_field_names()):
633      instr->${dest} = ${field_name};
634            % endfor
635            ${f.get_builder_initialization(num_operands)}
636        % endfor
637       ${extra_field_setup}
638      return insert(instr);
639   }
640
641    % if name == 'sop1' or name == 'sop2' or name == 'sopc':
642        <%
643        args[0] = 'WaveSpecificOpcode opcode'
644        params = []
645        for i in range(num_definitions):
646            params.append('def%d' % i)
647        for i in range(num_operands):
648            params.append('op%d' % i)
649        %>\\
650
651   inline Result ${name}(${', '.join(args)})
652   {
653       return ${name}(w64or32(opcode), ${', '.join(params)});
654   }
655
656    % endif
657    % endfor
658% endfor
659};
660
661void hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset);
662
663} // namespace aco
664
665#endif /* _ACO_BUILDER_ */"""
666
667from aco_opcodes import opcodes, Format
668from mako.template import Template
669
670print(Template(template).render(opcodes=opcodes, Format=Format))
671