1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #ifndef ACO_IR_H
26 #define ACO_IR_H
27
28 #include "aco_opcodes.h"
29 #include "aco_shader_info.h"
30 #include "aco_util.h"
31
32 #include "util/compiler.h"
33
34 #include "ac_binary.h"
35 #include "ac_hw_stage.h"
36 #include "amd_family.h"
37 #include <algorithm>
38 #include <bitset>
39 #include <memory>
40 #include <vector>
41
42 typedef struct nir_shader nir_shader;
43
44 namespace aco {
45
46 extern uint64_t debug_flags;
47
48 enum {
49 DEBUG_VALIDATE_IR = 0x1,
50 DEBUG_VALIDATE_RA = 0x2,
51 DEBUG_PERFWARN = 0x4,
52 DEBUG_FORCE_WAITCNT = 0x8,
53 DEBUG_NO_VN = 0x10,
54 DEBUG_NO_OPT = 0x20,
55 DEBUG_NO_SCHED = 0x40,
56 DEBUG_PERF_INFO = 0x80,
57 DEBUG_LIVE_INFO = 0x100,
58 DEBUG_FORCE_WAITDEPS = 0x200,
59 DEBUG_NO_VALIDATE_IR = 0x400,
60 DEBUG_NO_SCHED_ILP = 0x800,
61 DEBUG_NO_SCHED_VOPD = 0x1000,
62 };
63
64 enum storage_class : uint8_t {
65 storage_none = 0x0, /* no synchronization and can be reordered around aliasing stores */
66 storage_buffer = 0x1, /* SSBOs and global memory */
67 storage_gds = 0x2,
68 storage_image = 0x4,
69 storage_shared = 0x8, /* or TCS output */
70 storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
71 storage_task_payload = 0x20, /* Task-Mesh payload */
72 storage_scratch = 0x40,
73 storage_vgpr_spill = 0x80,
74 storage_count = 8, /* not counting storage_none */
75 };
76
77 enum memory_semantics : uint8_t {
78 semantic_none = 0x0,
79 /* for loads: don't move any access after this load to before this load (even other loads)
80 * for barriers: don't move any access after the barrier to before any
81 * atomics/control_barriers/sendmsg_gs_done/position-primitive-export before the barrier */
82 semantic_acquire = 0x1,
83 /* for stores: don't move any access before this store to after this store
84 * for barriers: don't move any access before the barrier to after any
85 * atomics/control_barriers/sendmsg_gs_done/position-primitive-export after the barrier */
86 semantic_release = 0x2,
87
88 /* the rest are for load/stores/atomics only */
89 /* cannot be DCE'd or CSE'd */
90 semantic_volatile = 0x4,
91 /* does not interact with barriers and assumes this lane is the only lane
92 * accessing this memory */
93 semantic_private = 0x8,
94 /* this operation can be reordered around operations of the same storage.
95 * says nothing about barriers */
96 semantic_can_reorder = 0x10,
97 /* this is a atomic instruction (may only read or write memory) */
98 semantic_atomic = 0x20,
99 /* this is instruction both reads and writes memory */
100 semantic_rmw = 0x40,
101
102 semantic_acqrel = semantic_acquire | semantic_release,
103 semantic_atomicrmw = semantic_volatile | semantic_atomic | semantic_rmw,
104 };
105
106 enum sync_scope : uint8_t {
107 scope_invocation = 0,
108 scope_subgroup = 1,
109 scope_workgroup = 2,
110 scope_queuefamily = 3,
111 scope_device = 4,
112 };
113
114 struct memory_sync_info {
memory_sync_infomemory_sync_info115 memory_sync_info() : storage(storage_none), semantics(semantic_none), scope(scope_invocation) {}
116 memory_sync_info(int storage_, int semantics_ = 0, sync_scope scope_ = scope_invocation)
117 : storage((storage_class)storage_), semantics((memory_semantics)semantics_), scope(scope_)
118 {}
119
120 storage_class storage : 8;
121 memory_semantics semantics : 8;
122 sync_scope scope : 8;
123
124 bool operator==(const memory_sync_info& rhs) const
125 {
126 return storage == rhs.storage && semantics == rhs.semantics && scope == rhs.scope;
127 }
128
can_reordermemory_sync_info129 bool can_reorder() const
130 {
131 if (semantics & semantic_acqrel)
132 return false;
133 /* Also check storage so that zero-initialized memory_sync_info can be
134 * reordered. */
135 return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile);
136 }
137 };
138 static_assert(sizeof(memory_sync_info) == 3, "Unexpected padding");
139
140 enum fp_round {
141 fp_round_ne = 0,
142 fp_round_pi = 1,
143 fp_round_ni = 2,
144 fp_round_tz = 3,
145 };
146
147 enum fp_denorm {
148 /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
149 * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
150 fp_denorm_flush = 0x0,
151 fp_denorm_keep_in = 0x1,
152 fp_denorm_keep_out = 0x2,
153 fp_denorm_keep = 0x3,
154 };
155
156 struct float_mode {
157 /* matches encoding of the MODE register */
158 union {
159 struct {
160 fp_round round32 : 2;
161 fp_round round16_64 : 2;
162 unsigned denorm32 : 2;
163 unsigned denorm16_64 : 2;
164 };
165 struct {
166 uint8_t round : 4;
167 uint8_t denorm : 4;
168 };
169 uint8_t val = 0;
170 };
171 /* if false, optimizations which may remove infs/nan/-0.0 can be done */
172 bool preserve_signed_zero_inf_nan32 : 1;
173 bool preserve_signed_zero_inf_nan16_64 : 1;
174 /* if false, optimizations which may remove denormal flushing can be done */
175 bool must_flush_denorms32 : 1;
176 bool must_flush_denorms16_64 : 1;
177 bool care_about_round32 : 1;
178 bool care_about_round16_64 : 1;
179
180 /* Returns true if instructions using the mode "other" can safely use the
181 * current one instead. */
canReplacefloat_mode182 bool canReplace(float_mode other) const noexcept
183 {
184 return val == other.val &&
185 (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
186 (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
187 (must_flush_denorms32 || !other.must_flush_denorms32) &&
188 (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
189 (care_about_round32 || !other.care_about_round32) &&
190 (care_about_round16_64 || !other.care_about_round16_64);
191 }
192 };
193
194 struct wait_imm {
195 static const uint8_t unset_counter = 0xff;
196
197 uint8_t vm;
198 uint8_t exp;
199 uint8_t lgkm;
200 uint8_t vs;
201
202 wait_imm();
203 wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_);
204 wait_imm(enum amd_gfx_level chip, uint16_t packed);
205
206 uint16_t pack(enum amd_gfx_level chip) const;
207
208 bool combine(const wait_imm& other);
209
210 bool empty() const;
211
212 void print(FILE* output) const;
213 };
214
215 /* s_wait_event immediate bits. */
216 enum wait_event_imm : uint16_t {
217 /* If this bit is 0, await that the export buffer space has been allocated.
218 * In Primitive Ordered Pixel Shading, export ready means that the overlapped waves have exited
219 * their ordered sections (by performing the `done` export), and that the current wave may enter
220 * its ordered section.
221 */
222 wait_event_imm_dont_wait_export_ready = 0x1,
223 };
224
225 constexpr Format
asVOP3(Format format)226 asVOP3(Format format)
227 {
228 return (Format)((uint32_t)Format::VOP3 | (uint32_t)format);
229 };
230
231 constexpr Format
asSDWA(Format format)232 asSDWA(Format format)
233 {
234 assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
235 return (Format)((uint32_t)Format::SDWA | (uint32_t)format);
236 }
237
238 constexpr Format
withoutDPP(Format format)239 withoutDPP(Format format)
240 {
241 return (Format)((uint32_t)format & ~((uint32_t)Format::DPP16 | (uint32_t)Format::DPP8));
242 }
243
244 constexpr Format
withoutVOP3(Format format)245 withoutVOP3(Format format)
246 {
247 return (Format)((uint32_t)format & ~((uint32_t)Format::VOP3));
248 }
249
250 enum class RegType {
251 sgpr,
252 vgpr,
253 };
254
255 struct RegClass {
256
257 enum RC : uint8_t {
258 s1 = 1,
259 s2 = 2,
260 s3 = 3,
261 s4 = 4,
262 s6 = 6,
263 s8 = 8,
264 s16 = 16,
265 v1 = s1 | (1 << 5),
266 v2 = s2 | (1 << 5),
267 v3 = s3 | (1 << 5),
268 v4 = s4 | (1 << 5),
269 v5 = 5 | (1 << 5),
270 v6 = 6 | (1 << 5),
271 v7 = 7 | (1 << 5),
272 v8 = 8 | (1 << 5),
273 /* byte-sized register class */
274 v1b = v1 | (1 << 7),
275 v2b = v2 | (1 << 7),
276 v3b = v3 | (1 << 7),
277 v4b = v4 | (1 << 7),
278 v6b = v6 | (1 << 7),
279 v8b = v8 | (1 << 7),
280 /* these are used for WWM and spills to vgpr */
281 v1_linear = v1 | (1 << 6),
282 v2_linear = v2 | (1 << 6),
283 };
284
285 RegClass() = default;
RegClassRegClass286 constexpr RegClass(RC rc_) : rc(rc_) {}
RegClassRegClass287 constexpr RegClass(RegType type, unsigned size)
288 : rc((RC)((type == RegType::vgpr ? 1 << 5 : 0) | size))
289 {}
290
RCRegClass291 constexpr operator RC() const { return rc; }
292 explicit operator bool() = delete;
293
typeRegClass294 constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
is_linear_vgprRegClass295 constexpr bool is_linear_vgpr() const { return rc & (1 << 6); };
is_subdwordRegClass296 constexpr bool is_subdword() const { return rc & (1 << 7); }
bytesRegClass297 constexpr unsigned bytes() const { return ((unsigned)rc & 0x1F) * (is_subdword() ? 1 : 4); }
298 // TODO: use size() less in favor of bytes()
sizeRegClass299 constexpr unsigned size() const { return (bytes() + 3) >> 2; }
is_linearRegClass300 constexpr bool is_linear() const { return rc <= RC::s16 || is_linear_vgpr(); }
as_linearRegClass301 constexpr RegClass as_linear() const { return RegClass((RC)(rc | (1 << 6))); }
as_subdwordRegClass302 constexpr RegClass as_subdword() const { return RegClass((RC)(rc | 1 << 7)); }
303
getRegClass304 static constexpr RegClass get(RegType type, unsigned bytes)
305 {
306 if (type == RegType::sgpr) {
307 return RegClass(type, DIV_ROUND_UP(bytes, 4u));
308 } else {
309 return bytes % 4u ? RegClass(type, bytes).as_subdword() : RegClass(type, bytes / 4u);
310 }
311 }
312
resizeRegClass313 constexpr RegClass resize(unsigned bytes) const
314 {
315 if (is_linear_vgpr()) {
316 assert(bytes % 4u == 0);
317 return get(RegType::vgpr, bytes).as_linear();
318 }
319 return get(type(), bytes);
320 }
321
322 private:
323 RC rc;
324 };
325
326 /* transitional helper expressions */
327 static constexpr RegClass s1{RegClass::s1};
328 static constexpr RegClass s2{RegClass::s2};
329 static constexpr RegClass s3{RegClass::s3};
330 static constexpr RegClass s4{RegClass::s4};
331 static constexpr RegClass s8{RegClass::s8};
332 static constexpr RegClass s16{RegClass::s16};
333 static constexpr RegClass v1{RegClass::v1};
334 static constexpr RegClass v2{RegClass::v2};
335 static constexpr RegClass v3{RegClass::v3};
336 static constexpr RegClass v4{RegClass::v4};
337 static constexpr RegClass v5{RegClass::v5};
338 static constexpr RegClass v6{RegClass::v6};
339 static constexpr RegClass v7{RegClass::v7};
340 static constexpr RegClass v8{RegClass::v8};
341 static constexpr RegClass v1b{RegClass::v1b};
342 static constexpr RegClass v2b{RegClass::v2b};
343 static constexpr RegClass v3b{RegClass::v3b};
344 static constexpr RegClass v4b{RegClass::v4b};
345 static constexpr RegClass v6b{RegClass::v6b};
346 static constexpr RegClass v8b{RegClass::v8b};
347
348 /**
349 * Temp Class
350 * Each temporary virtual register has a
351 * register class (i.e. size and type)
352 * and SSA id.
353 */
354 struct Temp {
TempTemp355 Temp() noexcept : id_(0), reg_class(0) {}
TempTemp356 constexpr Temp(uint32_t id, RegClass cls) noexcept : id_(id), reg_class(uint8_t(cls)) {}
357
idTemp358 constexpr uint32_t id() const noexcept { return id_; }
regClassTemp359 constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; }
360
bytesTemp361 constexpr unsigned bytes() const noexcept { return regClass().bytes(); }
sizeTemp362 constexpr unsigned size() const noexcept { return regClass().size(); }
typeTemp363 constexpr RegType type() const noexcept { return regClass().type(); }
is_linearTemp364 constexpr bool is_linear() const noexcept { return regClass().is_linear(); }
365
366 constexpr bool operator<(Temp other) const noexcept { return id() < other.id(); }
367 constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
368 constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
369
370 private:
371 uint32_t id_ : 24;
372 uint32_t reg_class : 8;
373 };
374
375 /**
376 * PhysReg
377 * Represents the physical register for each
378 * Operand and Definition.
379 */
380 struct PhysReg {
381 constexpr PhysReg() = default;
PhysRegPhysReg382 explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
regPhysReg383 constexpr unsigned reg() const { return reg_b >> 2; }
bytePhysReg384 constexpr unsigned byte() const { return reg_b & 0x3; }
385 constexpr operator unsigned() const { return reg(); }
386 constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
387 constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
388 constexpr bool operator<(PhysReg other) const { return reg_b < other.reg_b; }
advancePhysReg389 constexpr PhysReg advance(int bytes) const
390 {
391 PhysReg res = *this;
392 res.reg_b += bytes;
393 return res;
394 }
395
396 uint16_t reg_b = 0;
397 };
398
399 /* helper expressions for special registers */
400 static constexpr PhysReg m0{124};
401 static constexpr PhysReg flat_scr_lo{102}; /* GFX8-GFX9, encoded differently on GFX6-7 */
402 static constexpr PhysReg flat_scr_hi{103}; /* GFX8-GFX9, encoded differently on GFX6-7 */
403 static constexpr PhysReg vcc{106};
404 static constexpr PhysReg vcc_hi{107};
405 static constexpr PhysReg tba{108}; /* GFX6-GFX8 */
406 static constexpr PhysReg tma{110}; /* GFX6-GFX8 */
407 static constexpr PhysReg ttmp0{112};
408 static constexpr PhysReg ttmp1{113};
409 static constexpr PhysReg ttmp2{114};
410 static constexpr PhysReg ttmp3{115};
411 static constexpr PhysReg ttmp4{116};
412 static constexpr PhysReg ttmp5{117};
413 static constexpr PhysReg ttmp6{118};
414 static constexpr PhysReg ttmp7{119};
415 static constexpr PhysReg ttmp8{120};
416 static constexpr PhysReg ttmp9{121};
417 static constexpr PhysReg ttmp10{122};
418 static constexpr PhysReg ttmp11{123};
419 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
420 static constexpr PhysReg exec{126};
421 static constexpr PhysReg exec_lo{126};
422 static constexpr PhysReg exec_hi{127};
423 static constexpr PhysReg pops_exiting_wave_id{239}; /* GFX9-GFX10.3 */
424 static constexpr PhysReg scc{253};
425
426 /**
427 * Operand Class
428 * Initially, each Operand refers to either
429 * a temporary virtual register
430 * or to a constant value
431 * Temporary registers get mapped to physical register during RA
432 * Constant values are inlined into the instruction sequence.
433 */
434 class Operand final {
435 public:
Operand()436 constexpr Operand()
437 : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false), isKill_(false),
438 isUndef_(true), isFirstKill_(false), constSize(0), isLateKill_(false), is16bit_(false),
439 is24bit_(false), signext(false)
440 {}
441
Operand(Temp r)442 explicit Operand(Temp r) noexcept
443 {
444 data_.temp = r;
445 if (r.id()) {
446 isTemp_ = true;
447 } else {
448 isUndef_ = true;
449 setFixed(PhysReg{128});
450 }
451 };
Operand(Temp r,PhysReg reg)452 explicit Operand(Temp r, PhysReg reg) noexcept
453 {
454 assert(r.id()); /* Don't allow fixing an undef to a register */
455 data_.temp = r;
456 isTemp_ = true;
457 setFixed(reg);
458 };
459
460 /* 8-bit constant */
c8(uint8_t v)461 static Operand c8(uint8_t v) noexcept
462 {
463 /* 8-bit constants are only used for copies and copies from any 8-bit
464 * constant can be implemented with a SDWA v_mul_u32_u24. So consider all
465 * to be inline constants. */
466 Operand op;
467 op.control_ = 0;
468 op.data_.i = v;
469 op.isConstant_ = true;
470 op.constSize = 0;
471 op.setFixed(PhysReg{0u});
472 return op;
473 };
474
475 /* 16-bit constant */
c16(uint16_t v)476 static Operand c16(uint16_t v) noexcept
477 {
478 Operand op;
479 op.control_ = 0;
480 op.data_.i = v;
481 op.isConstant_ = true;
482 op.constSize = 1;
483 if (v <= 64)
484 op.setFixed(PhysReg{128u + v});
485 else if (v >= 0xFFF0) /* [-16 .. -1] */
486 op.setFixed(PhysReg{(unsigned)(192 - (int16_t)v)});
487 else if (v == 0x3800) /* 0.5 */
488 op.setFixed(PhysReg{240});
489 else if (v == 0xB800) /* -0.5 */
490 op.setFixed(PhysReg{241});
491 else if (v == 0x3C00) /* 1.0 */
492 op.setFixed(PhysReg{242});
493 else if (v == 0xBC00) /* -1.0 */
494 op.setFixed(PhysReg{243});
495 else if (v == 0x4000) /* 2.0 */
496 op.setFixed(PhysReg{244});
497 else if (v == 0xC000) /* -2.0 */
498 op.setFixed(PhysReg{245});
499 else if (v == 0x4400) /* 4.0 */
500 op.setFixed(PhysReg{246});
501 else if (v == 0xC400) /* -4.0 */
502 op.setFixed(PhysReg{247});
503 else if (v == 0x3118) /* 1/2 PI */
504 op.setFixed(PhysReg{248});
505 else /* Literal Constant */
506 op.setFixed(PhysReg{255});
507 return op;
508 }
509
510 /* 32-bit constant */
c32(uint32_t v)511 static Operand c32(uint32_t v) noexcept { return c32_or_c64(v, false); }
512
513 /* 64-bit constant */
c64(uint64_t v)514 static Operand c64(uint64_t v) noexcept
515 {
516 Operand op;
517 op.control_ = 0;
518 op.isConstant_ = true;
519 op.constSize = 3;
520 if (v <= 64) {
521 op.data_.i = (uint32_t)v;
522 op.setFixed(PhysReg{128 + (uint32_t)v});
523 } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
524 op.data_.i = (uint32_t)v;
525 op.setFixed(PhysReg{192 - (uint32_t)v});
526 } else if (v == 0x3FE0000000000000) { /* 0.5 */
527 op.data_.i = 0x3f000000;
528 op.setFixed(PhysReg{240});
529 } else if (v == 0xBFE0000000000000) { /* -0.5 */
530 op.data_.i = 0xbf000000;
531 op.setFixed(PhysReg{241});
532 } else if (v == 0x3FF0000000000000) { /* 1.0 */
533 op.data_.i = 0x3f800000;
534 op.setFixed(PhysReg{242});
535 } else if (v == 0xBFF0000000000000) { /* -1.0 */
536 op.data_.i = 0xbf800000;
537 op.setFixed(PhysReg{243});
538 } else if (v == 0x4000000000000000) { /* 2.0 */
539 op.data_.i = 0x40000000;
540 op.setFixed(PhysReg{244});
541 } else if (v == 0xC000000000000000) { /* -2.0 */
542 op.data_.i = 0xc0000000;
543 op.setFixed(PhysReg{245});
544 } else if (v == 0x4010000000000000) { /* 4.0 */
545 op.data_.i = 0x40800000;
546 op.setFixed(PhysReg{246});
547 } else if (v == 0xC010000000000000) { /* -4.0 */
548 op.data_.i = 0xc0800000;
549 op.setFixed(PhysReg{247});
550 } else { /* Literal Constant: we don't know if it is a long or double.*/
551 op.signext = v >> 63;
552 op.data_.i = v & 0xffffffffu;
553 op.setFixed(PhysReg{255});
554 assert(op.constantValue64() == v &&
555 "attempt to create a unrepresentable 64-bit literal constant");
556 }
557 return op;
558 }
559
560 /* 32-bit constant stored as a 32-bit or 64-bit operand */
c32_or_c64(uint32_t v,bool is64bit)561 static Operand c32_or_c64(uint32_t v, bool is64bit) noexcept
562 {
563 Operand op;
564 op.control_ = 0;
565 op.data_.i = v;
566 op.isConstant_ = true;
567 op.constSize = is64bit ? 3 : 2;
568 if (v <= 64)
569 op.setFixed(PhysReg{128 + v});
570 else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
571 op.setFixed(PhysReg{192 - v});
572 else if (v == 0x3f000000) /* 0.5 */
573 op.setFixed(PhysReg{240});
574 else if (v == 0xbf000000) /* -0.5 */
575 op.setFixed(PhysReg{241});
576 else if (v == 0x3f800000) /* 1.0 */
577 op.setFixed(PhysReg{242});
578 else if (v == 0xbf800000) /* -1.0 */
579 op.setFixed(PhysReg{243});
580 else if (v == 0x40000000) /* 2.0 */
581 op.setFixed(PhysReg{244});
582 else if (v == 0xc0000000) /* -2.0 */
583 op.setFixed(PhysReg{245});
584 else if (v == 0x40800000) /* 4.0 */
585 op.setFixed(PhysReg{246});
586 else if (v == 0xc0800000) /* -4.0 */
587 op.setFixed(PhysReg{247});
588 else { /* Literal Constant */
589 assert(!is64bit && "attempt to create a 64-bit literal constant");
590 op.setFixed(PhysReg{255});
591 }
592 return op;
593 }
594
literal32(uint32_t v)595 static Operand literal32(uint32_t v) noexcept
596 {
597 Operand op;
598 op.control_ = 0;
599 op.data_.i = v;
600 op.isConstant_ = true;
601 op.constSize = 2;
602 op.setFixed(PhysReg{255});
603 return op;
604 }
605
Operand(RegClass type)606 explicit Operand(RegClass type) noexcept
607 {
608 isUndef_ = true;
609 data_.temp = Temp(0, type);
610 setFixed(PhysReg{128});
611 };
Operand(PhysReg reg,RegClass type)612 explicit Operand(PhysReg reg, RegClass type) noexcept
613 {
614 data_.temp = Temp(0, type);
615 setFixed(reg);
616 }
617
618 static Operand zero(unsigned bytes = 4) noexcept
619 {
620 if (bytes == 8)
621 return Operand::c64(0);
622 else if (bytes == 4)
623 return Operand::c32(0);
624 else if (bytes == 2)
625 return Operand::c16(0);
626 assert(bytes == 1);
627 return Operand::c8(0);
628 }
629
630 /* This is useful over the constructors when you want to take a gfx level
631 * for 1/2 PI or an unknown operand size.
632 */
get_const(enum amd_gfx_level chip,uint64_t val,unsigned bytes)633 static Operand get_const(enum amd_gfx_level chip, uint64_t val, unsigned bytes)
634 {
635 if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) {
636 /* 1/2 PI can be an inline constant on GFX8+ */
637 Operand op = Operand::c32(val);
638 op.setFixed(PhysReg{248});
639 return op;
640 }
641
642 if (bytes == 8)
643 return Operand::c64(val);
644 else if (bytes == 4)
645 return Operand::c32(val);
646 else if (bytes == 2)
647 return Operand::c16(val);
648 assert(bytes == 1);
649 return Operand::c8(val);
650 }
651
652 static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false,
653 bool sext = false)
654 {
655 if (bytes <= 4)
656 return true;
657
658 if (zext && (val & 0xFFFFFFFF00000000) == 0x0000000000000000)
659 return true;
660 uint64_t upper33 = val & 0xFFFFFFFF80000000;
661 if (sext && (upper33 == 0xFFFFFFFF80000000 || upper33 == 0))
662 return true;
663
664 return val >= 0xFFFFFFFFFFFFFFF0 || val <= 64 || /* [-16 .. 64] */
665 val == 0x3FE0000000000000 || /* 0.5 */
666 val == 0xBFE0000000000000 || /* -0.5 */
667 val == 0x3FF0000000000000 || /* 1.0 */
668 val == 0xBFF0000000000000 || /* -1.0 */
669 val == 0x4000000000000000 || /* 2.0 */
670 val == 0xC000000000000000 || /* -2.0 */
671 val == 0x4010000000000000 || /* 4.0 */
672 val == 0xC010000000000000; /* -4.0 */
673 }
674
isTemp()675 constexpr bool isTemp() const noexcept { return isTemp_; }
676
setTemp(Temp t)677 constexpr void setTemp(Temp t) noexcept
678 {
679 assert(!isConstant_);
680 isTemp_ = true;
681 data_.temp = t;
682 }
683
getTemp()684 constexpr Temp getTemp() const noexcept { return data_.temp; }
685
tempId()686 constexpr uint32_t tempId() const noexcept { return data_.temp.id(); }
687
hasRegClass()688 constexpr bool hasRegClass() const noexcept { return !isConstant(); }
689
regClass()690 constexpr RegClass regClass() const noexcept { return data_.temp.regClass(); }
691
bytes()692 constexpr unsigned bytes() const noexcept
693 {
694 if (isConstant())
695 return 1 << constSize;
696 else
697 return data_.temp.bytes();
698 }
699
size()700 constexpr unsigned size() const noexcept
701 {
702 if (isConstant())
703 return constSize > 2 ? 2 : 1;
704 else
705 return data_.temp.size();
706 }
707
isFixed()708 constexpr bool isFixed() const noexcept { return isFixed_; }
709
physReg()710 constexpr PhysReg physReg() const noexcept { return reg_; }
711
setFixed(PhysReg reg)712 constexpr void setFixed(PhysReg reg) noexcept
713 {
714 isFixed_ = reg != unsigned(-1);
715 reg_ = reg;
716 }
717
isConstant()718 constexpr bool isConstant() const noexcept { return isConstant_; }
719
isLiteral()720 constexpr bool isLiteral() const noexcept { return isConstant() && reg_ == 255; }
721
isUndefined()722 constexpr bool isUndefined() const noexcept { return isUndef_; }
723
constantValue()724 constexpr uint32_t constantValue() const noexcept { return data_.i; }
725
constantEquals(uint32_t cmp)726 constexpr bool constantEquals(uint32_t cmp) const noexcept
727 {
728 return isConstant() && constantValue() == cmp;
729 }
730
constantValue64()731 constexpr uint64_t constantValue64() const noexcept
732 {
733 if (constSize == 3) {
734 if (reg_ <= 192)
735 return reg_ - 128;
736 else if (reg_ <= 208)
737 return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
738
739 switch (reg_) {
740 case 240: return 0x3FE0000000000000;
741 case 241: return 0xBFE0000000000000;
742 case 242: return 0x3FF0000000000000;
743 case 243: return 0xBFF0000000000000;
744 case 244: return 0x4000000000000000;
745 case 245: return 0xC000000000000000;
746 case 246: return 0x4010000000000000;
747 case 247: return 0xC010000000000000;
748 case 255:
749 return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
750 }
751 unreachable("invalid register for 64-bit constant");
752 } else {
753 return data_.i;
754 }
755 }
756
757 /* Value if this were used with vop3/opsel or vop3p. */
constantValue16(bool opsel)758 constexpr uint16_t constantValue16(bool opsel) const noexcept
759 {
760 assert(bytes() == 2 || bytes() == 4);
761 if (opsel) {
762 if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral())
763 return int16_t(data_.i) >>
764 16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
765 else
766 return data_.i >> 16;
767 }
768 return data_.i;
769 }
770
isOfType(RegType type)771 constexpr bool isOfType(RegType type) const noexcept
772 {
773 return hasRegClass() && regClass().type() == type;
774 }
775
776 /* Indicates that the killed operand's live range intersects with the
777 * instruction's definitions. Unlike isKill() and isFirstKill(), this is
778 * not set by liveness analysis. */
setLateKill(bool flag)779 constexpr void setLateKill(bool flag) noexcept { isLateKill_ = flag; }
780
isLateKill()781 constexpr bool isLateKill() const noexcept { return isLateKill_; }
782
setKill(bool flag)783 constexpr void setKill(bool flag) noexcept
784 {
785 isKill_ = flag;
786 if (!flag)
787 setFirstKill(false);
788 }
789
isKill()790 constexpr bool isKill() const noexcept { return isKill_ || isFirstKill(); }
791
setFirstKill(bool flag)792 constexpr void setFirstKill(bool flag) noexcept
793 {
794 isFirstKill_ = flag;
795 if (flag)
796 setKill(flag);
797 }
798
799 /* When there are multiple operands killing the same temporary,
800 * isFirstKill() is only returns true for the first one. */
isFirstKill()801 constexpr bool isFirstKill() const noexcept { return isFirstKill_; }
802
isKillBeforeDef()803 constexpr bool isKillBeforeDef() const noexcept { return isKill() && !isLateKill(); }
804
isFirstKillBeforeDef()805 constexpr bool isFirstKillBeforeDef() const noexcept { return isFirstKill() && !isLateKill(); }
806
807 constexpr bool operator==(Operand other) const noexcept
808 {
809 if (other.size() != size())
810 return false;
811 if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
812 return false;
813 if (isFixed() && other.isFixed() && physReg() != other.physReg())
814 return false;
815 if (isLiteral())
816 return other.isLiteral() && other.constantValue() == constantValue();
817 else if (isConstant())
818 return other.isConstant() && other.physReg() == physReg();
819 else if (isUndefined())
820 return other.isUndefined() && other.regClass() == regClass();
821 else
822 return other.isTemp() && other.getTemp() == getTemp();
823 }
824
825 constexpr bool operator!=(Operand other) const noexcept { return !operator==(other); }
826
set16bit(bool flag)827 constexpr void set16bit(bool flag) noexcept { is16bit_ = flag; }
828
is16bit()829 constexpr bool is16bit() const noexcept { return is16bit_; }
830
set24bit(bool flag)831 constexpr void set24bit(bool flag) noexcept { is24bit_ = flag; }
832
is24bit()833 constexpr bool is24bit() const noexcept { return is24bit_; }
834
835 private:
836 union {
837 Temp temp;
838 uint32_t i;
839 float f;
840 } data_ = {Temp(0, s1)};
841 PhysReg reg_;
842 union {
843 struct {
844 uint8_t isTemp_ : 1;
845 uint8_t isFixed_ : 1;
846 uint8_t isConstant_ : 1;
847 uint8_t isKill_ : 1;
848 uint8_t isUndef_ : 1;
849 uint8_t isFirstKill_ : 1;
850 uint8_t constSize : 2;
851 uint8_t isLateKill_ : 1;
852 uint8_t is16bit_ : 1;
853 uint8_t is24bit_ : 1;
854 uint8_t signext : 1;
855 };
856 /* can't initialize bit-fields in c++11, so work around using a union */
857 uint16_t control_ = 0;
858 };
859 };
860
861 /**
862 * Definition Class
863 * Definitions are the results of Instructions
864 * and refer to temporary virtual registers
865 * which are later mapped to physical registers
866 */
867 class Definition final {
868 public:
Definition()869 constexpr Definition()
870 : temp(Temp(0, s1)), reg_(0), isFixed_(0), isKill_(0), isPrecise_(0), isNUW_(0), isNoCSE_(0)
871 {}
Definition(uint32_t index,RegClass type)872 Definition(uint32_t index, RegClass type) noexcept : temp(index, type) {}
Definition(Temp tmp)873 explicit Definition(Temp tmp) noexcept : temp(tmp) {}
Definition(PhysReg reg,RegClass type)874 Definition(PhysReg reg, RegClass type) noexcept : temp(Temp(0, type)) { setFixed(reg); }
Definition(uint32_t tmpId,PhysReg reg,RegClass type)875 Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept : temp(Temp(tmpId, type))
876 {
877 setFixed(reg);
878 }
879
isTemp()880 constexpr bool isTemp() const noexcept { return tempId() > 0; }
881
getTemp()882 constexpr Temp getTemp() const noexcept { return temp; }
883
tempId()884 constexpr uint32_t tempId() const noexcept { return temp.id(); }
885
setTemp(Temp t)886 constexpr void setTemp(Temp t) noexcept { temp = t; }
887
swapTemp(Definition & other)888 void swapTemp(Definition& other) noexcept { std::swap(temp, other.temp); }
889
regClass()890 constexpr RegClass regClass() const noexcept { return temp.regClass(); }
891
bytes()892 constexpr unsigned bytes() const noexcept { return temp.bytes(); }
893
size()894 constexpr unsigned size() const noexcept { return temp.size(); }
895
isFixed()896 constexpr bool isFixed() const noexcept { return isFixed_; }
897
physReg()898 constexpr PhysReg physReg() const noexcept { return reg_; }
899
setFixed(PhysReg reg)900 constexpr void setFixed(PhysReg reg) noexcept
901 {
902 isFixed_ = 1;
903 reg_ = reg;
904 }
905
setKill(bool flag)906 constexpr void setKill(bool flag) noexcept { isKill_ = flag; }
907
isKill()908 constexpr bool isKill() const noexcept { return isKill_; }
909
setPrecise(bool precise)910 constexpr void setPrecise(bool precise) noexcept { isPrecise_ = precise; }
911
isPrecise()912 constexpr bool isPrecise() const noexcept { return isPrecise_; }
913
914 /* No Unsigned Wrap */
setNUW(bool nuw)915 constexpr void setNUW(bool nuw) noexcept { isNUW_ = nuw; }
916
isNUW()917 constexpr bool isNUW() const noexcept { return isNUW_; }
918
setNoCSE(bool noCSE)919 constexpr void setNoCSE(bool noCSE) noexcept { isNoCSE_ = noCSE; }
920
isNoCSE()921 constexpr bool isNoCSE() const noexcept { return isNoCSE_; }
922
923 private:
924 Temp temp = Temp(0, s1);
925 PhysReg reg_;
926 union {
927 struct {
928 uint8_t isFixed_ : 1;
929 uint8_t isKill_ : 1;
930 uint8_t isPrecise_ : 1;
931 uint8_t isNUW_ : 1;
932 uint8_t isNoCSE_ : 1;
933 };
934 /* can't initialize bit-fields in c++11, so work around using a union */
935 uint8_t control_ = 0;
936 };
937 };
938
939 struct Block;
940 struct Instruction;
941 struct Pseudo_instruction;
942 struct SOP1_instruction;
943 struct SOP2_instruction;
944 struct SOPK_instruction;
945 struct SOPP_instruction;
946 struct SOPC_instruction;
947 struct SMEM_instruction;
948 struct DS_instruction;
949 struct LDSDIR_instruction;
950 struct MTBUF_instruction;
951 struct MUBUF_instruction;
952 struct MIMG_instruction;
953 struct Export_instruction;
954 struct FLAT_instruction;
955 struct Pseudo_branch_instruction;
956 struct Pseudo_barrier_instruction;
957 struct Pseudo_reduction_instruction;
958 struct VALU_instruction;
959 struct VINTERP_inreg_instruction;
960 struct VINTRP_instruction;
961 struct VOPD_instruction;
962 struct DPP16_instruction;
963 struct DPP8_instruction;
964 struct SDWA_instruction;
965
966 struct Instruction {
967 aco_opcode opcode;
968 Format format;
969 uint32_t pass_flags;
970
971 aco::span<Operand> operands;
972 aco::span<Definition> definitions;
973
974 constexpr bool usesModifiers() const noexcept;
975
reads_execInstruction976 constexpr bool reads_exec() const noexcept
977 {
978 for (const Operand& op : operands) {
979 if (op.isFixed() && (op.physReg() == exec_lo || op.physReg() == exec_hi))
980 return true;
981 }
982 return false;
983 }
984
writes_execInstruction985 constexpr bool writes_exec() const noexcept
986 {
987 for (const Definition& def : definitions) {
988 if (def.isFixed() && (def.physReg() == exec_lo || def.physReg() == exec_hi))
989 return true;
990 }
991 return false;
992 }
993
pseudoInstruction994 Pseudo_instruction& pseudo() noexcept
995 {
996 assert(isPseudo());
997 return *(Pseudo_instruction*)this;
998 }
pseudoInstruction999 const Pseudo_instruction& pseudo() const noexcept
1000 {
1001 assert(isPseudo());
1002 return *(Pseudo_instruction*)this;
1003 }
isPseudoInstruction1004 constexpr bool isPseudo() const noexcept { return format == Format::PSEUDO; }
sop1Instruction1005 SOP1_instruction& sop1() noexcept
1006 {
1007 assert(isSOP1());
1008 return *(SOP1_instruction*)this;
1009 }
sop1Instruction1010 const SOP1_instruction& sop1() const noexcept
1011 {
1012 assert(isSOP1());
1013 return *(SOP1_instruction*)this;
1014 }
isSOP1Instruction1015 constexpr bool isSOP1() const noexcept { return format == Format::SOP1; }
sop2Instruction1016 SOP2_instruction& sop2() noexcept
1017 {
1018 assert(isSOP2());
1019 return *(SOP2_instruction*)this;
1020 }
sop2Instruction1021 const SOP2_instruction& sop2() const noexcept
1022 {
1023 assert(isSOP2());
1024 return *(SOP2_instruction*)this;
1025 }
isSOP2Instruction1026 constexpr bool isSOP2() const noexcept { return format == Format::SOP2; }
sopkInstruction1027 SOPK_instruction& sopk() noexcept
1028 {
1029 assert(isSOPK());
1030 return *(SOPK_instruction*)this;
1031 }
sopkInstruction1032 const SOPK_instruction& sopk() const noexcept
1033 {
1034 assert(isSOPK());
1035 return *(SOPK_instruction*)this;
1036 }
isSOPKInstruction1037 constexpr bool isSOPK() const noexcept { return format == Format::SOPK; }
soppInstruction1038 SOPP_instruction& sopp() noexcept
1039 {
1040 assert(isSOPP());
1041 return *(SOPP_instruction*)this;
1042 }
soppInstruction1043 const SOPP_instruction& sopp() const noexcept
1044 {
1045 assert(isSOPP());
1046 return *(SOPP_instruction*)this;
1047 }
isSOPPInstruction1048 constexpr bool isSOPP() const noexcept { return format == Format::SOPP; }
sopcInstruction1049 SOPC_instruction& sopc() noexcept
1050 {
1051 assert(isSOPC());
1052 return *(SOPC_instruction*)this;
1053 }
sopcInstruction1054 const SOPC_instruction& sopc() const noexcept
1055 {
1056 assert(isSOPC());
1057 return *(SOPC_instruction*)this;
1058 }
isSOPCInstruction1059 constexpr bool isSOPC() const noexcept { return format == Format::SOPC; }
smemInstruction1060 SMEM_instruction& smem() noexcept
1061 {
1062 assert(isSMEM());
1063 return *(SMEM_instruction*)this;
1064 }
smemInstruction1065 const SMEM_instruction& smem() const noexcept
1066 {
1067 assert(isSMEM());
1068 return *(SMEM_instruction*)this;
1069 }
isSMEMInstruction1070 constexpr bool isSMEM() const noexcept { return format == Format::SMEM; }
dsInstruction1071 DS_instruction& ds() noexcept
1072 {
1073 assert(isDS());
1074 return *(DS_instruction*)this;
1075 }
dsInstruction1076 const DS_instruction& ds() const noexcept
1077 {
1078 assert(isDS());
1079 return *(DS_instruction*)this;
1080 }
isDSInstruction1081 constexpr bool isDS() const noexcept { return format == Format::DS; }
ldsdirInstruction1082 LDSDIR_instruction& ldsdir() noexcept
1083 {
1084 assert(isLDSDIR());
1085 return *(LDSDIR_instruction*)this;
1086 }
ldsdirInstruction1087 const LDSDIR_instruction& ldsdir() const noexcept
1088 {
1089 assert(isLDSDIR());
1090 return *(LDSDIR_instruction*)this;
1091 }
isLDSDIRInstruction1092 constexpr bool isLDSDIR() const noexcept { return format == Format::LDSDIR; }
mtbufInstruction1093 MTBUF_instruction& mtbuf() noexcept
1094 {
1095 assert(isMTBUF());
1096 return *(MTBUF_instruction*)this;
1097 }
mtbufInstruction1098 const MTBUF_instruction& mtbuf() const noexcept
1099 {
1100 assert(isMTBUF());
1101 return *(MTBUF_instruction*)this;
1102 }
isMTBUFInstruction1103 constexpr bool isMTBUF() const noexcept { return format == Format::MTBUF; }
mubufInstruction1104 MUBUF_instruction& mubuf() noexcept
1105 {
1106 assert(isMUBUF());
1107 return *(MUBUF_instruction*)this;
1108 }
mubufInstruction1109 const MUBUF_instruction& mubuf() const noexcept
1110 {
1111 assert(isMUBUF());
1112 return *(MUBUF_instruction*)this;
1113 }
isMUBUFInstruction1114 constexpr bool isMUBUF() const noexcept { return format == Format::MUBUF; }
mimgInstruction1115 MIMG_instruction& mimg() noexcept
1116 {
1117 assert(isMIMG());
1118 return *(MIMG_instruction*)this;
1119 }
mimgInstruction1120 const MIMG_instruction& mimg() const noexcept
1121 {
1122 assert(isMIMG());
1123 return *(MIMG_instruction*)this;
1124 }
isMIMGInstruction1125 constexpr bool isMIMG() const noexcept { return format == Format::MIMG; }
expInstruction1126 Export_instruction& exp() noexcept
1127 {
1128 assert(isEXP());
1129 return *(Export_instruction*)this;
1130 }
expInstruction1131 const Export_instruction& exp() const noexcept
1132 {
1133 assert(isEXP());
1134 return *(Export_instruction*)this;
1135 }
isEXPInstruction1136 constexpr bool isEXP() const noexcept { return format == Format::EXP; }
flatInstruction1137 FLAT_instruction& flat() noexcept
1138 {
1139 assert(isFlat());
1140 return *(FLAT_instruction*)this;
1141 }
flatInstruction1142 const FLAT_instruction& flat() const noexcept
1143 {
1144 assert(isFlat());
1145 return *(FLAT_instruction*)this;
1146 }
isFlatInstruction1147 constexpr bool isFlat() const noexcept { return format == Format::FLAT; }
globalInstruction1148 FLAT_instruction& global() noexcept
1149 {
1150 assert(isGlobal());
1151 return *(FLAT_instruction*)this;
1152 }
globalInstruction1153 const FLAT_instruction& global() const noexcept
1154 {
1155 assert(isGlobal());
1156 return *(FLAT_instruction*)this;
1157 }
isGlobalInstruction1158 constexpr bool isGlobal() const noexcept { return format == Format::GLOBAL; }
scratchInstruction1159 FLAT_instruction& scratch() noexcept
1160 {
1161 assert(isScratch());
1162 return *(FLAT_instruction*)this;
1163 }
scratchInstruction1164 const FLAT_instruction& scratch() const noexcept
1165 {
1166 assert(isScratch());
1167 return *(FLAT_instruction*)this;
1168 }
isScratchInstruction1169 constexpr bool isScratch() const noexcept { return format == Format::SCRATCH; }
branchInstruction1170 Pseudo_branch_instruction& branch() noexcept
1171 {
1172 assert(isBranch());
1173 return *(Pseudo_branch_instruction*)this;
1174 }
branchInstruction1175 const Pseudo_branch_instruction& branch() const noexcept
1176 {
1177 assert(isBranch());
1178 return *(Pseudo_branch_instruction*)this;
1179 }
isBranchInstruction1180 constexpr bool isBranch() const noexcept { return format == Format::PSEUDO_BRANCH; }
barrierInstruction1181 Pseudo_barrier_instruction& barrier() noexcept
1182 {
1183 assert(isBarrier());
1184 return *(Pseudo_barrier_instruction*)this;
1185 }
barrierInstruction1186 const Pseudo_barrier_instruction& barrier() const noexcept
1187 {
1188 assert(isBarrier());
1189 return *(Pseudo_barrier_instruction*)this;
1190 }
isBarrierInstruction1191 constexpr bool isBarrier() const noexcept { return format == Format::PSEUDO_BARRIER; }
reductionInstruction1192 Pseudo_reduction_instruction& reduction() noexcept
1193 {
1194 assert(isReduction());
1195 return *(Pseudo_reduction_instruction*)this;
1196 }
reductionInstruction1197 const Pseudo_reduction_instruction& reduction() const noexcept
1198 {
1199 assert(isReduction());
1200 return *(Pseudo_reduction_instruction*)this;
1201 }
isReductionInstruction1202 constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
isVOP3PInstruction1203 constexpr bool isVOP3P() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3P; }
vinterp_inregInstruction1204 VINTERP_inreg_instruction& vinterp_inreg() noexcept
1205 {
1206 assert(isVINTERP_INREG());
1207 return *(VINTERP_inreg_instruction*)this;
1208 }
vinterp_inregInstruction1209 const VINTERP_inreg_instruction& vinterp_inreg() const noexcept
1210 {
1211 assert(isVINTERP_INREG());
1212 return *(VINTERP_inreg_instruction*)this;
1213 }
isVINTERP_INREGInstruction1214 constexpr bool isVINTERP_INREG() const noexcept { return format == Format::VINTERP_INREG; }
vopdInstruction1215 VOPD_instruction& vopd() noexcept
1216 {
1217 assert(isVOPD());
1218 return *(VOPD_instruction*)this;
1219 }
vopdInstruction1220 const VOPD_instruction& vopd() const noexcept
1221 {
1222 assert(isVOPD());
1223 return *(VOPD_instruction*)this;
1224 }
isVOPDInstruction1225 constexpr bool isVOPD() const noexcept { return format == Format::VOPD; }
isVOP1Instruction1226 constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
isVOP2Instruction1227 constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
isVOPCInstruction1228 constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
isVOP3Instruction1229 constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; }
vintrpInstruction1230 VINTRP_instruction& vintrp() noexcept
1231 {
1232 assert(isVINTRP());
1233 return *(VINTRP_instruction*)this;
1234 }
vintrpInstruction1235 const VINTRP_instruction& vintrp() const noexcept
1236 {
1237 assert(isVINTRP());
1238 return *(VINTRP_instruction*)this;
1239 }
isVINTRPInstruction1240 constexpr bool isVINTRP() const noexcept { return format == Format::VINTRP; }
dpp16Instruction1241 DPP16_instruction& dpp16() noexcept
1242 {
1243 assert(isDPP16());
1244 return *(DPP16_instruction*)this;
1245 }
dpp16Instruction1246 const DPP16_instruction& dpp16() const noexcept
1247 {
1248 assert(isDPP16());
1249 return *(DPP16_instruction*)this;
1250 }
isDPP16Instruction1251 constexpr bool isDPP16() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP16; }
dpp8Instruction1252 DPP8_instruction& dpp8() noexcept
1253 {
1254 assert(isDPP8());
1255 return *(DPP8_instruction*)this;
1256 }
dpp8Instruction1257 const DPP8_instruction& dpp8() const noexcept
1258 {
1259 assert(isDPP8());
1260 return *(DPP8_instruction*)this;
1261 }
isDPP8Instruction1262 constexpr bool isDPP8() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP8; }
isDPPInstruction1263 constexpr bool isDPP() const noexcept { return isDPP16() || isDPP8(); }
sdwaInstruction1264 SDWA_instruction& sdwa() noexcept
1265 {
1266 assert(isSDWA());
1267 return *(SDWA_instruction*)this;
1268 }
sdwaInstruction1269 const SDWA_instruction& sdwa() const noexcept
1270 {
1271 assert(isSDWA());
1272 return *(SDWA_instruction*)this;
1273 }
isSDWAInstruction1274 constexpr bool isSDWA() const noexcept { return (uint16_t)format & (uint16_t)Format::SDWA; }
1275
flatlikeInstruction1276 FLAT_instruction& flatlike() { return *(FLAT_instruction*)this; }
1277
flatlikeInstruction1278 const FLAT_instruction& flatlike() const { return *(FLAT_instruction*)this; }
1279
isFlatLikeInstruction1280 constexpr bool isFlatLike() const noexcept { return isFlat() || isGlobal() || isScratch(); }
1281
valuInstruction1282 VALU_instruction& valu() noexcept
1283 {
1284 assert(isVALU());
1285 return *(VALU_instruction*)this;
1286 }
valuInstruction1287 const VALU_instruction& valu() const noexcept
1288 {
1289 assert(isVALU());
1290 return *(VALU_instruction*)this;
1291 }
isVALUInstruction1292 constexpr bool isVALU() const noexcept
1293 {
1294 return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P() || isVINTERP_INREG() ||
1295 isVOPD();
1296 }
1297
isSALUInstruction1298 constexpr bool isSALU() const noexcept
1299 {
1300 return isSOP1() || isSOP2() || isSOPC() || isSOPK() || isSOPP();
1301 }
1302
isVMEMInstruction1303 constexpr bool isVMEM() const noexcept { return isMTBUF() || isMUBUF() || isMIMG(); }
1304
1305 bool accessesLDS() const noexcept;
1306 bool isTrans() const noexcept;
1307 };
1308 static_assert(sizeof(Instruction) == 16, "Unexpected padding");
1309
1310 struct SOPK_instruction : public Instruction {
1311 uint16_t imm;
1312 uint16_t padding;
1313 };
1314 static_assert(sizeof(SOPK_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1315
1316 struct SOPP_instruction : public Instruction {
1317 uint32_t imm;
1318 int block;
1319 };
1320 static_assert(sizeof(SOPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1321
1322 struct SOPC_instruction : public Instruction {
1323 uint32_t padding;
1324 };
1325 static_assert(sizeof(SOPC_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1326
1327 struct SOP1_instruction : public Instruction {};
1328 static_assert(sizeof(SOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1329
1330 struct SOP2_instruction : public Instruction {
1331 uint32_t padding;
1332 };
1333 static_assert(sizeof(SOP2_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1334
1335 /**
1336 * Scalar Memory Format:
1337 * For s_(buffer_)load_dword*:
1338 * Operand(0): SBASE - SGPR-pair which provides base address
1339 * Operand(1): Offset - immediate (un)signed offset or SGPR
1340 * Operand(2) / Definition(0): SDATA - SGPR for read / write result
1341 * Operand(n-1): SOffset - SGPR offset (Vega only)
1342 *
1343 * Having no operands is also valid for instructions such as s_dcache_inv.
1344 *
1345 */
1346 struct SMEM_instruction : public Instruction {
1347 memory_sync_info sync;
1348 bool glc : 1; /* VI+: globally coherent */
1349 bool dlc : 1; /* NAVI: device level coherent */
1350 bool nv : 1; /* VEGA only: Non-volatile */
1351 bool disable_wqm : 1;
1352 uint8_t padding : 4;
1353 };
1354 static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1355
1356 struct VALU_instruction : public Instruction {
1357 union {
1358 bitfield_array8<uint32_t, 0, 3> neg; /* VOP3, SDWA, DPP16, v_fma_mix, VINTERP_inreg */
1359 bitfield_array8<uint32_t, 0, 3> neg_lo; /* VOP3P */
1360
1361 bitfield_array8<uint32_t, 3, 3> abs; /* VOP3, SDWA, DPP16, v_fma_mix */
1362 bitfield_array8<uint32_t, 3, 3> neg_hi; /* VOP3P */
1363
1364 bitfield_array8<uint32_t, 6, 4> opsel; /* VOP3, VOPC12(GFX11+), VINTERP_inreg */
1365 bitfield_uint8<uint32_t, 10, 2> omod; /* VOP3, SDWA(GFX9+) */
1366 bitfield_array8<uint32_t, 12, 3> opsel_lo; /* VOP3P */
1367 bitfield_array8<uint32_t, 15, 3> opsel_hi; /* VOP3P */
1368 bitfield_bool<uint32_t, 18> clamp; /* VOP3, VOP3P, SDWA, VINTERP_inreg */
1369 };
1370
1371 void swapOperands(unsigned idx0, unsigned idx1);
1372 };
1373 static_assert(sizeof(VALU_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1374
1375 struct VINTERP_inreg_instruction : public VALU_instruction {
1376 uint8_t wait_exp : 3;
1377 uint8_t padding3 : 5;
1378 uint8_t padding4;
1379 uint8_t padding5;
1380 uint8_t padding6;
1381 };
1382 static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
1383 "Unexpected padding");
1384
1385 struct VOPD_instruction : public VALU_instruction {
1386 aco_opcode opy;
1387 uint16_t padding;
1388 };
1389 static_assert(sizeof(VOPD_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1390
1391 /**
1392 * Data Parallel Primitives Format:
1393 * This format can be used for VOP1, VOP2 or VOPC instructions.
1394 * The swizzle applies to the src0 operand.
1395 *
1396 */
1397 struct DPP16_instruction : public VALU_instruction {
1398 uint16_t dpp_ctrl;
1399 uint8_t row_mask : 4;
1400 uint8_t bank_mask : 4;
1401 bool bound_ctrl : 1;
1402 uint8_t fetch_inactive : 1;
1403 uint8_t padding3 : 6;
1404 };
1405 static_assert(sizeof(DPP16_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1406
1407 struct DPP8_instruction : public VALU_instruction {
1408 uint32_t lane_sel : 24;
1409 uint32_t fetch_inactive : 1;
1410 uint32_t padding : 7;
1411 };
1412 static_assert(sizeof(DPP8_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1413
1414 struct SubdwordSel {
1415 enum sdwa_sel : uint8_t {
1416 ubyte = 0x4,
1417 uword = 0x8,
1418 dword = 0x10,
1419 sext = 0x20,
1420 sbyte = ubyte | sext,
1421 sword = uword | sext,
1422
1423 ubyte0 = ubyte,
1424 ubyte1 = ubyte | 1,
1425 ubyte2 = ubyte | 2,
1426 ubyte3 = ubyte | 3,
1427 sbyte0 = sbyte,
1428 sbyte1 = sbyte | 1,
1429 sbyte2 = sbyte | 2,
1430 sbyte3 = sbyte | 3,
1431 uword0 = uword,
1432 uword1 = uword | 2,
1433 sword0 = sword,
1434 sword1 = sword | 2,
1435 };
1436
SubdwordSelSubdwordSel1437 SubdwordSel() : sel((sdwa_sel)0) {}
SubdwordSelSubdwordSel1438 constexpr SubdwordSel(sdwa_sel sel_) : sel(sel_) {}
SubdwordSelSubdwordSel1439 constexpr SubdwordSel(unsigned size, unsigned offset, bool sign_extend)
1440 : sel((sdwa_sel)((sign_extend ? sext : 0) | size << 2 | offset))
1441 {}
sdwa_selSubdwordSel1442 constexpr operator sdwa_sel() const { return sel; }
1443 explicit operator bool() const { return sel != 0; }
1444
sizeSubdwordSel1445 constexpr unsigned size() const { return (sel >> 2) & 0x7; }
offsetSubdwordSel1446 constexpr unsigned offset() const { return sel & 0x3; }
sign_extendSubdwordSel1447 constexpr bool sign_extend() const { return sel & sext; }
to_sdwa_selSubdwordSel1448 constexpr unsigned to_sdwa_sel(unsigned reg_byte_offset) const
1449 {
1450 reg_byte_offset += offset();
1451 if (size() == 1)
1452 return reg_byte_offset;
1453 else if (size() == 2)
1454 return 4 + (reg_byte_offset >> 1);
1455 else
1456 return 6;
1457 }
1458
1459 private:
1460 sdwa_sel sel;
1461 };
1462
1463 /**
1464 * Sub-Dword Addressing Format:
1465 * This format can be used for VOP1, VOP2 or VOPC instructions.
1466 *
1467 * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
1468 * the definition doesn't have to be VCC on GFX9+.
1469 *
1470 */
1471 struct SDWA_instruction : public VALU_instruction {
1472 /* these destination modifiers aren't available with VOPC except for
1473 * clamp on GFX8 */
1474 SubdwordSel sel[2];
1475 SubdwordSel dst_sel;
1476 uint8_t padding3;
1477 };
1478 static_assert(sizeof(SDWA_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
1479
1480 struct VINTRP_instruction : public Instruction {
1481 uint8_t attribute;
1482 uint8_t component;
1483 uint16_t padding;
1484 };
1485 static_assert(sizeof(VINTRP_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1486
1487 /**
1488 * Local and Global Data Sharing instructions
1489 * Operand(0): ADDR - VGPR which supplies the address.
1490 * Operand(1): DATA0 - First data VGPR.
1491 * Operand(2): DATA1 - Second data VGPR.
1492 * Operand(n-1): M0 - LDS size.
1493 * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
1494 *
1495 */
1496 struct DS_instruction : public Instruction {
1497 memory_sync_info sync;
1498 bool gds;
1499 uint16_t offset0;
1500 uint8_t offset1;
1501 uint8_t padding;
1502 };
1503 static_assert(sizeof(DS_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1504
1505 /**
1506 * LDS Direct instructions
1507 * Operand(0): M0
1508 * Definition(0): VDST - Destination VGPR
1509 */
1510 struct LDSDIR_instruction : public Instruction {
1511 memory_sync_info sync;
1512 uint8_t attr : 6;
1513 uint8_t attr_chan : 2;
1514 uint32_t wait_vdst : 4;
1515 uint32_t padding : 28;
1516 };
1517 static_assert(sizeof(LDSDIR_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1518
1519 /**
1520 * Vector Memory Untyped-buffer Instructions
1521 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1522 * Operand(1): VADDR - Address source. Can carry an index and/or offset
1523 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1524 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1525 *
1526 */
1527 struct MUBUF_instruction : public Instruction {
1528 memory_sync_info sync;
1529 bool offen : 1; /* Supply an offset from VGPR (VADDR) */
1530 bool idxen : 1; /* Supply an index from VGPR (VADDR) */
1531 bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
1532 bool glc : 1; /* globally coherent */
1533 bool dlc : 1; /* NAVI: device level coherent */
1534 bool slc : 1; /* system level coherent */
1535 bool tfe : 1; /* texture fail enable */
1536 bool lds : 1; /* Return read-data to LDS instead of VGPRs */
1537 uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
1538 uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
1539 uint16_t swizzled : 1;
1540 uint16_t padding0 : 2;
1541 uint16_t padding1;
1542 };
1543 static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1544
1545 /**
1546 * Vector Memory Typed-buffer Instructions
1547 * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1548 * Operand(1): VADDR - Address source. Can carry an index and/or offset
1549 * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1550 * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1551 *
1552 */
1553 struct MTBUF_instruction : public Instruction {
1554 memory_sync_info sync;
1555 uint8_t dfmt : 4; /* Data Format of data in memory buffer */
1556 uint8_t nfmt : 3; /* Numeric format of data in memory */
1557 bool offen : 1; /* Supply an offset from VGPR (VADDR) */
1558 uint16_t idxen : 1; /* Supply an index from VGPR (VADDR) */
1559 uint16_t glc : 1; /* globally coherent */
1560 uint16_t dlc : 1; /* NAVI: device level coherent */
1561 uint16_t slc : 1; /* system level coherent */
1562 uint16_t tfe : 1; /* texture fail enable */
1563 uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
1564 uint16_t padding : 10;
1565 uint16_t offset; /* Unsigned byte offset - 12 bit */
1566 };
1567 static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1568
1569 /**
1570 * Vector Memory Image Instructions
1571 * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
1572 * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
1573 * Operand(2): VDATA - Vector GPR for write data or zero if TFE/LWE=1.
1574 * Operand(3): VADDR - Address source. Can carry an offset or an index.
1575 * Definition(0): VDATA - Vector GPR for read result.
1576 *
1577 */
1578 struct MIMG_instruction : public Instruction {
1579 memory_sync_info sync;
1580 uint8_t dmask; /* Data VGPR enable mask */
1581 uint8_t dim : 3; /* NAVI: dimensionality */
1582 bool unrm : 1; /* Force address to be un-normalized */
1583 bool dlc : 1; /* NAVI: device level coherent */
1584 bool glc : 1; /* globally coherent */
1585 bool slc : 1; /* system level coherent */
1586 bool tfe : 1; /* texture fail enable */
1587 bool da : 1; /* declare an array */
1588 bool lwe : 1; /* LOD warning enable */
1589 bool r128 : 1; /* NAVI: Texture resource size */
1590 bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
1591 bool d16 : 1; /* Convert 32-bit data to 16-bit data */
1592 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1593 bool strict_wqm : 1; /* VADDR is a linear VGPR and additional VGPRs may be copied into it */
1594 uint8_t padding0 : 1;
1595 uint8_t padding1;
1596 uint8_t padding2;
1597 };
1598 static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1599
1600 /**
1601 * Flat/Scratch/Global Instructions
1602 * Operand(0): ADDR
1603 * Operand(1): SADDR
1604 * Operand(2) / Definition(0): DATA/VDST
1605 *
1606 */
1607 struct FLAT_instruction : public Instruction {
1608 memory_sync_info sync;
1609 bool slc : 1; /* system level coherent */
1610 bool glc : 1; /* globally coherent */
1611 bool dlc : 1; /* NAVI: device level coherent */
1612 bool lds : 1;
1613 bool nv : 1;
1614 bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1615 uint8_t padding0 : 2;
1616 int16_t offset; /* Vega/Navi only */
1617 uint16_t padding1;
1618 };
1619 static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1620
1621 struct Export_instruction : public Instruction {
1622 uint8_t enabled_mask;
1623 uint8_t dest;
1624 bool compressed : 1;
1625 bool done : 1;
1626 bool valid_mask : 1;
1627 bool row_en : 1;
1628 uint8_t padding0 : 4;
1629 uint8_t padding1;
1630 };
1631 static_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1632
1633 struct Pseudo_instruction : public Instruction {
1634 PhysReg scratch_sgpr; /* might not be valid if it's not needed */
1635 bool tmp_in_scc;
1636 uint8_t padding;
1637 };
1638 static_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1639
1640 struct Pseudo_branch_instruction : public Instruction {
1641 /* target[0] is the block index of the branch target.
1642 * For conditional branches, target[1] contains the fall-through alternative.
1643 * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
1644 */
1645 uint32_t target[2];
1646
1647 /* Indicates that selection control prefers to remove this instruction if possible.
1648 * This is set when the branch is divergent and always taken, or flattened.
1649 */
1650 bool selection_control_remove;
1651 };
1652 static_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 12, "Unexpected padding");
1653
1654 struct Pseudo_barrier_instruction : public Instruction {
1655 memory_sync_info sync;
1656 sync_scope exec_scope;
1657 };
1658 static_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1659
1660 enum ReduceOp : uint16_t {
1661 // clang-format off
1662 iadd8, iadd16, iadd32, iadd64,
1663 imul8, imul16, imul32, imul64,
1664 fadd16, fadd32, fadd64,
1665 fmul16, fmul32, fmul64,
1666 imin8, imin16, imin32, imin64,
1667 imax8, imax16, imax32, imax64,
1668 umin8, umin16, umin32, umin64,
1669 umax8, umax16, umax32, umax64,
1670 fmin16, fmin32, fmin64,
1671 fmax16, fmax32, fmax64,
1672 iand8, iand16, iand32, iand64,
1673 ior8, ior16, ior32, ior64,
1674 ixor8, ixor16, ixor32, ixor64,
1675 num_reduce_ops,
1676 // clang-format on
1677 };
1678
1679 /**
1680 * Subgroup Reduction Instructions, everything except for the data to be
1681 * reduced and the result as inserted by setup_reduce_temp().
1682 * Operand(0): data to be reduced
1683 * Operand(1): reduce temporary
1684 * Operand(2): vector temporary
1685 * Definition(0): result
1686 * Definition(1): scalar temporary
1687 * Definition(2): scalar identity temporary (not used to store identity on GFX10)
1688 * Definition(3): scc clobber
1689 * Definition(4): vcc clobber
1690 *
1691 */
1692 struct Pseudo_reduction_instruction : public Instruction {
1693 ReduceOp reduce_op;
1694 uint16_t cluster_size; // must be 0 for scans
1695 };
1696 static_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4,
1697 "Unexpected padding");
1698
1699 inline bool
accessesLDS()1700 Instruction::accessesLDS() const noexcept
1701 {
1702 return (isDS() && !ds().gds) || isLDSDIR() || isVINTRP();
1703 }
1704
1705 inline void
swapOperands(unsigned idx0,unsigned idx1)1706 VALU_instruction::swapOperands(unsigned idx0, unsigned idx1)
1707 {
1708 if (this->isSDWA() && idx0 != idx1) {
1709 assert(idx0 < 2 && idx1 < 2);
1710 std::swap(this->sdwa().sel[0], this->sdwa().sel[1]);
1711 }
1712 assert(idx0 < 3 && idx1 < 3);
1713 std::swap(this->operands[idx0], this->operands[idx1]);
1714 this->neg[idx0].swap(this->neg[idx1]);
1715 this->abs[idx0].swap(this->abs[idx1]);
1716 this->opsel[idx0].swap(this->opsel[idx1]);
1717 this->opsel_lo[idx0].swap(this->opsel_lo[idx1]);
1718 this->opsel_hi[idx0].swap(this->opsel_hi[idx1]);
1719 }
1720
1721 extern thread_local aco::monotonic_buffer_resource* instruction_buffer;
1722
1723 struct instr_deleter_functor {
1724 /* Don't yet free any instructions. They will be de-allocated
1725 * all at once after compilation finished.
1726 */
operatorinstr_deleter_functor1727 void operator()(void* p) { return; }
1728 };
1729
1730 template <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
1731
1732 template <typename T>
1733 T*
create_instruction(aco_opcode opcode,Format format,uint32_t num_operands,uint32_t num_definitions)1734 create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
1735 uint32_t num_definitions)
1736 {
1737 std::size_t size =
1738 sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
1739 void* data = instruction_buffer->allocate(size, alignof(uint32_t));
1740 memset(data, 0, size);
1741 T* inst = (T*)data;
1742
1743 inst->opcode = opcode;
1744 inst->format = format;
1745
1746 uint16_t operands_offset = sizeof(T) - offsetof(Instruction, operands);
1747 inst->operands = aco::span<Operand>(operands_offset, num_operands);
1748 uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
1749 inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
1750
1751 return inst;
1752 }
1753
1754 constexpr bool
usesModifiers()1755 Instruction::usesModifiers() const noexcept
1756 {
1757 if (isDPP() || isSDWA())
1758 return true;
1759
1760 if (isVOP3P()) {
1761 const VALU_instruction& vop3p = this->valu();
1762 /* opsel_hi must be 1 to not be considered a modifier - even for constants */
1763 return vop3p.opsel_lo || vop3p.clamp || vop3p.neg_lo || vop3p.neg_hi ||
1764 (vop3p.opsel_hi & BITFIELD_MASK(operands.size())) != BITFIELD_MASK(operands.size());
1765 } else if (isVALU()) {
1766 const VALU_instruction& vop3 = this->valu();
1767 return vop3.opsel || vop3.clamp || vop3.omod || vop3.abs || vop3.neg;
1768 }
1769 return false;
1770 }
1771
1772 constexpr bool
is_phi(Instruction * instr)1773 is_phi(Instruction* instr)
1774 {
1775 return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
1776 }
1777
1778 static inline bool
is_phi(aco_ptr<Instruction> & instr)1779 is_phi(aco_ptr<Instruction>& instr)
1780 {
1781 return is_phi(instr.get());
1782 }
1783
1784 memory_sync_info get_sync_info(const Instruction* instr);
1785
1786 inline bool
is_dead(const std::vector<uint16_t> & uses,const Instruction * instr)1787 is_dead(const std::vector<uint16_t>& uses, const Instruction* instr)
1788 {
1789 if (instr->definitions.empty() || instr->isBranch() || instr->opcode == aco_opcode::p_startpgm ||
1790 instr->opcode == aco_opcode::p_init_scratch ||
1791 instr->opcode == aco_opcode::p_dual_src_export_gfx11)
1792 return false;
1793
1794 if (std::any_of(instr->definitions.begin(), instr->definitions.end(),
1795 [&uses](const Definition& def) { return !def.isTemp() || uses[def.tempId()]; }))
1796 return false;
1797
1798 return !(get_sync_info(instr).semantics & (semantic_volatile | semantic_acqrel));
1799 }
1800
1801 bool can_use_input_modifiers(amd_gfx_level gfx_level, aco_opcode op, int idx);
1802 bool can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx);
1803 bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op);
1804 uint8_t get_gfx11_true16_mask(aco_opcode op);
1805 bool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra);
1806 bool can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp8);
1807 bool can_write_m0(const aco_ptr<Instruction>& instr);
1808 /* updates "instr" and returns the old instruction (or NULL if no update was needed) */
1809 aco_ptr<Instruction> convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr);
1810 aco_ptr<Instruction> convert_to_DPP(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr,
1811 bool dpp8);
1812 bool needs_exec_mask(const Instruction* instr);
1813
1814 aco_opcode get_ordered(aco_opcode op);
1815 aco_opcode get_unordered(aco_opcode op);
1816 aco_opcode get_inverse(aco_opcode op);
1817 aco_opcode get_swapped(aco_opcode op);
1818 aco_opcode get_f32_cmp(aco_opcode op);
1819 aco_opcode get_vcmpx(aco_opcode op);
1820 unsigned get_cmp_bitsize(aco_opcode op);
1821 bool is_fp_cmp(aco_opcode op);
1822 bool is_cmpx(aco_opcode op);
1823
1824 bool can_swap_operands(aco_ptr<Instruction>& instr, aco_opcode* new_op, unsigned idx0 = 0,
1825 unsigned idx1 = 1);
1826
1827 uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
1828
1829 unsigned get_mimg_nsa_dwords(const Instruction* instr);
1830
1831 unsigned get_vopd_opy_start(const Instruction* instr);
1832
1833 unsigned get_operand_size(aco_ptr<Instruction>& instr, unsigned index);
1834
1835 bool should_form_clause(const Instruction* a, const Instruction* b);
1836
1837 enum block_kind {
1838 /* uniform indicates that leaving this block,
1839 * all actives lanes stay active */
1840 block_kind_uniform = 1 << 0,
1841 block_kind_top_level = 1 << 1,
1842 block_kind_loop_preheader = 1 << 2,
1843 block_kind_loop_header = 1 << 3,
1844 block_kind_loop_exit = 1 << 4,
1845 block_kind_continue = 1 << 5,
1846 block_kind_break = 1 << 6,
1847 block_kind_continue_or_break = 1 << 7,
1848 block_kind_branch = 1 << 8,
1849 block_kind_merge = 1 << 9,
1850 block_kind_invert = 1 << 10,
1851 block_kind_discard_early_exit = 1 << 11,
1852 block_kind_uses_discard = 1 << 12,
1853 block_kind_resume = 1 << 13,
1854 block_kind_export_end = 1 << 14,
1855 block_kind_end_with_regs = 1 << 15,
1856 };
1857
1858 struct RegisterDemand {
1859 constexpr RegisterDemand() = default;
RegisterDemandRegisterDemand1860 constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {}
1861 int16_t vgpr = 0;
1862 int16_t sgpr = 0;
1863
1864 constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept
1865 {
1866 return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1867 }
1868
exceedsRegisterDemand1869 constexpr bool exceeds(const RegisterDemand other) const noexcept
1870 {
1871 return vgpr > other.vgpr || sgpr > other.sgpr;
1872 }
1873
1874 constexpr RegisterDemand operator+(const Temp t) const noexcept
1875 {
1876 if (t.type() == RegType::sgpr)
1877 return RegisterDemand(vgpr, sgpr + t.size());
1878 else
1879 return RegisterDemand(vgpr + t.size(), sgpr);
1880 }
1881
1882 constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept
1883 {
1884 return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1885 }
1886
1887 constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept
1888 {
1889 return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1890 }
1891
1892 constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept
1893 {
1894 vgpr += other.vgpr;
1895 sgpr += other.sgpr;
1896 return *this;
1897 }
1898
1899 constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept
1900 {
1901 vgpr -= other.vgpr;
1902 sgpr -= other.sgpr;
1903 return *this;
1904 }
1905
1906 constexpr RegisterDemand& operator+=(const Temp t) noexcept
1907 {
1908 if (t.type() == RegType::sgpr)
1909 sgpr += t.size();
1910 else
1911 vgpr += t.size();
1912 return *this;
1913 }
1914
1915 constexpr RegisterDemand& operator-=(const Temp t) noexcept
1916 {
1917 if (t.type() == RegType::sgpr)
1918 sgpr -= t.size();
1919 else
1920 vgpr -= t.size();
1921 return *this;
1922 }
1923
updateRegisterDemand1924 constexpr void update(const RegisterDemand other) noexcept
1925 {
1926 vgpr = std::max(vgpr, other.vgpr);
1927 sgpr = std::max(sgpr, other.sgpr);
1928 }
1929 };
1930
1931 /* CFG */
1932 struct Block {
1933 float_mode fp_mode;
1934 unsigned index;
1935 unsigned offset = 0;
1936 std::vector<aco_ptr<Instruction>> instructions;
1937 std::vector<unsigned> logical_preds;
1938 std::vector<unsigned> linear_preds;
1939 std::vector<unsigned> logical_succs;
1940 std::vector<unsigned> linear_succs;
1941 RegisterDemand register_demand = RegisterDemand();
1942 uint32_t kind = 0;
1943 int32_t logical_idom = -1;
1944 int32_t linear_idom = -1;
1945 uint16_t loop_nest_depth = 0;
1946 uint16_t divergent_if_logical_depth = 0;
1947 uint16_t uniform_if_depth = 0;
1948
1949 /* this information is needed for predecessors to blocks with phis when
1950 * moving out of ssa */
1951 bool scc_live_out = false;
1952
BlockBlock1953 Block() : index(0) {}
1954 };
1955
1956 /*
1957 * Shader stages as provided in Vulkan by the application. Contrast this to HWStage.
1958 */
1959 enum class SWStage : uint16_t {
1960 None = 0,
1961 VS = 1 << 0, /* Vertex Shader */
1962 GS = 1 << 1, /* Geometry Shader */
1963 TCS = 1 << 2, /* Tessellation Control aka Hull Shader */
1964 TES = 1 << 3, /* Tessellation Evaluation aka Domain Shader */
1965 FS = 1 << 4, /* Fragment aka Pixel Shader */
1966 CS = 1 << 5, /* Compute Shader */
1967 TS = 1 << 6, /* Task Shader */
1968 MS = 1 << 7, /* Mesh Shader */
1969 RT = 1 << 8, /* Raytracing Shader */
1970
1971 /* Stage combinations merged to run on a single HWStage */
1972 VS_GS = VS | GS,
1973 VS_TCS = VS | TCS,
1974 TES_GS = TES | GS,
1975 };
1976
1977 constexpr SWStage
1978 operator|(SWStage a, SWStage b)
1979 {
1980 return static_cast<SWStage>(static_cast<uint16_t>(a) | static_cast<uint16_t>(b));
1981 }
1982
1983 /*
1984 * Set of SWStages to be merged into a single shader paired with the
1985 * HWStage it will run on.
1986 */
1987 struct Stage {
1988 constexpr Stage() = default;
1989
StageStage1990 explicit constexpr Stage(ac_hw_stage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
1991
1992 /* Check if the given SWStage is included */
hasStage1993 constexpr bool has(SWStage stage) const
1994 {
1995 return (static_cast<uint16_t>(sw) & static_cast<uint16_t>(stage));
1996 }
1997
num_sw_stagesStage1998 unsigned num_sw_stages() const { return util_bitcount(static_cast<uint16_t>(sw)); }
1999
2000 constexpr bool operator==(const Stage& other) const { return sw == other.sw && hw == other.hw; }
2001
2002 constexpr bool operator!=(const Stage& other) const { return sw != other.sw || hw != other.hw; }
2003
2004 /* Mask of merged software stages */
2005 SWStage sw = SWStage::None;
2006
2007 /* Active hardware stage */
2008 ac_hw_stage hw{};
2009 };
2010
2011 /* possible settings of Program::stage */
2012 static constexpr Stage vertex_vs(AC_HW_VERTEX_SHADER, SWStage::VS);
2013 static constexpr Stage fragment_fs(AC_HW_PIXEL_SHADER, SWStage::FS);
2014 static constexpr Stage compute_cs(AC_HW_COMPUTE_SHADER, SWStage::CS);
2015 static constexpr Stage tess_eval_vs(AC_HW_VERTEX_SHADER, SWStage::TES);
2016 /* Mesh shading pipeline */
2017 static constexpr Stage task_cs(AC_HW_COMPUTE_SHADER, SWStage::TS);
2018 static constexpr Stage mesh_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::MS);
2019 /* GFX10/NGG */
2020 static constexpr Stage vertex_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS);
2021 static constexpr Stage vertex_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS_GS);
2022 static constexpr Stage tess_eval_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES);
2023 static constexpr Stage tess_eval_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES_GS);
2024 /* GFX9 (and GFX10 if NGG isn't used) */
2025 static constexpr Stage vertex_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::VS_GS);
2026 static constexpr Stage vertex_tess_control_hs(AC_HW_HULL_SHADER, SWStage::VS_TCS);
2027 static constexpr Stage tess_eval_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::TES_GS);
2028 /* pre-GFX9 */
2029 static constexpr Stage vertex_ls(AC_HW_LOCAL_SHADER,
2030 SWStage::VS); /* vertex before tessellation control */
2031 static constexpr Stage vertex_es(AC_HW_EXPORT_SHADER, SWStage::VS); /* vertex before geometry */
2032 static constexpr Stage tess_control_hs(AC_HW_HULL_SHADER, SWStage::TCS);
2033 static constexpr Stage tess_eval_es(AC_HW_EXPORT_SHADER,
2034 SWStage::TES); /* tessellation evaluation before geometry */
2035 static constexpr Stage geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::GS);
2036 /* Raytracing */
2037 static constexpr Stage raytracing_cs(AC_HW_COMPUTE_SHADER, SWStage::RT);
2038
2039 struct DeviceInfo {
2040 uint16_t lds_encoding_granule;
2041 uint16_t lds_alloc_granule;
2042 uint32_t lds_limit; /* in bytes */
2043 bool has_16bank_lds;
2044 uint16_t physical_sgprs;
2045 uint16_t physical_vgprs;
2046 uint16_t vgpr_limit;
2047 uint16_t sgpr_limit;
2048 uint16_t sgpr_alloc_granule;
2049 uint16_t vgpr_alloc_granule;
2050 unsigned scratch_alloc_granule;
2051 uint16_t max_waves_per_simd;
2052 unsigned simd_per_cu;
2053 bool has_fast_fma32 = false;
2054 bool has_mac_legacy32 = false;
2055 bool fused_mad_mix = false;
2056 bool xnack_enabled = false;
2057 bool sram_ecc_enabled = false;
2058
2059 int16_t scratch_global_offset_min;
2060 int16_t scratch_global_offset_max;
2061 unsigned max_nsa_vgprs;
2062 };
2063
2064 enum class CompilationProgress {
2065 after_isel,
2066 after_spilling,
2067 after_ra,
2068 };
2069
2070 class Program final {
2071 public:
2072 aco::monotonic_buffer_resource m{65536};
2073 std::vector<Block> blocks;
2074 std::vector<RegClass> temp_rc = {s1};
2075 RegisterDemand max_reg_demand = RegisterDemand();
2076 ac_shader_config* config;
2077 struct aco_shader_info info;
2078 enum amd_gfx_level gfx_level;
2079 enum radeon_family family;
2080 DeviceInfo dev;
2081 unsigned wave_size;
2082 RegClass lane_mask;
2083 Stage stage;
2084 bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
2085 bool needs_wqm = false; /* there exists a p_wqm instruction */
2086 bool has_smem_buffer_or_global_loads = false;
2087 bool has_pops_overlapped_waves_wait = false;
2088 bool has_color_exports = false;
2089 bool is_prolog = false;
2090
2091 std::vector<uint8_t> constant_data;
2092 Temp private_segment_buffer;
2093 Temp scratch_offset;
2094
2095 uint16_t num_waves = 0;
2096 uint16_t min_waves = 0;
2097 unsigned workgroup_size; /* if known; otherwise UINT_MAX */
2098 bool wgp_mode;
2099
2100 bool needs_vcc = false;
2101
2102 CompilationProgress progress;
2103
2104 bool collect_statistics = false;
2105 uint32_t statistics[aco_num_statistics];
2106
2107 float_mode next_fp_mode;
2108 unsigned next_loop_depth = 0;
2109 unsigned next_divergent_if_logical_depth = 0;
2110 unsigned next_uniform_if_depth = 0;
2111
2112 std::vector<Definition> args_pending_vmem;
2113
2114 /* For shader part with previous shader part that has lds access. */
2115 bool pending_lds_access = false;
2116
2117 struct {
2118 FILE* output = stderr;
2119 bool shorten_messages = false;
2120 void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message);
2121 void* private_data;
2122 } debug;
2123
allocateId(RegClass rc)2124 uint32_t allocateId(RegClass rc)
2125 {
2126 assert(allocationID <= 16777215);
2127 temp_rc.push_back(rc);
2128 return allocationID++;
2129 }
2130
allocateRange(unsigned amount)2131 void allocateRange(unsigned amount)
2132 {
2133 assert(allocationID + amount <= 16777216);
2134 temp_rc.resize(temp_rc.size() + amount);
2135 allocationID += amount;
2136 }
2137
allocateTmp(RegClass rc)2138 Temp allocateTmp(RegClass rc) { return Temp(allocateId(rc), rc); }
2139
peekAllocationId()2140 uint32_t peekAllocationId() { return allocationID; }
2141
2142 friend void reindex_ssa(Program* program);
2143 friend void reindex_ssa(Program* program, std::vector<IDSet>& live_out);
2144
create_and_insert_block()2145 Block* create_and_insert_block()
2146 {
2147 Block block;
2148 return insert_block(std::move(block));
2149 }
2150
insert_block(Block && block)2151 Block* insert_block(Block&& block)
2152 {
2153 block.index = blocks.size();
2154 block.fp_mode = next_fp_mode;
2155 block.loop_nest_depth = next_loop_depth;
2156 block.divergent_if_logical_depth = next_divergent_if_logical_depth;
2157 block.uniform_if_depth = next_uniform_if_depth;
2158 blocks.emplace_back(std::move(block));
2159 return &blocks.back();
2160 }
2161
2162 private:
2163 uint32_t allocationID = 1;
2164 };
2165
2166 struct live {
2167 /* live temps out per block */
2168 std::vector<IDSet> live_out;
2169 /* register demand (sgpr/vgpr) per instruction per block */
2170 std::vector<std::vector<RegisterDemand>> register_demand;
2171 };
2172
2173 struct ra_test_policy {
2174 /* Force RA to always use its pessimistic fallback algorithm */
2175 bool skip_optimistic_path = false;
2176 };
2177
2178 void init();
2179
2180 void init_program(Program* program, Stage stage, const struct aco_shader_info* info,
2181 enum amd_gfx_level gfx_level, enum radeon_family family, bool wgp_mode,
2182 ac_shader_config* config);
2183
2184 void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
2185 ac_shader_config* config, const struct aco_compiler_options* options,
2186 const struct aco_shader_info* info, const struct ac_shader_args* args);
2187 void select_trap_handler_shader(Program* program, struct nir_shader* shader,
2188 ac_shader_config* config,
2189 const struct aco_compiler_options* options,
2190 const struct aco_shader_info* info,
2191 const struct ac_shader_args* args);
2192 void select_rt_prolog(Program* program, ac_shader_config* config,
2193 const struct aco_compiler_options* options,
2194 const struct aco_shader_info* info, const struct ac_shader_args* in_args,
2195 const struct ac_shader_args* out_args);
2196 void select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo,
2197 ac_shader_config* config, const struct aco_compiler_options* options,
2198 const struct aco_shader_info* info, const struct ac_shader_args* args);
2199
2200 void select_ps_epilog(Program* program, void* pinfo, ac_shader_config* config,
2201 const struct aco_compiler_options* options,
2202 const struct aco_shader_info* info, const struct ac_shader_args* args);
2203
2204 void select_tcs_epilog(Program* program, void* pinfo, ac_shader_config* config,
2205 const struct aco_compiler_options* options,
2206 const struct aco_shader_info* info, const struct ac_shader_args* args);
2207
2208 void select_ps_prolog(Program* program, void* pinfo, ac_shader_config* config,
2209 const struct aco_compiler_options* options,
2210 const struct aco_shader_info* info, const struct ac_shader_args* args);
2211
2212 void lower_phis(Program* program);
2213 void calc_min_waves(Program* program);
2214 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
2215 live live_var_analysis(Program* program);
2216 std::vector<uint16_t> dead_code_analysis(Program* program);
2217 void dominator_tree(Program* program);
2218 void insert_exec_mask(Program* program);
2219 void value_numbering(Program* program);
2220 void optimize(Program* program);
2221 void optimize_postRA(Program* program);
2222 void setup_reduce_temp(Program* program);
2223 void lower_to_cssa(Program* program, live& live_vars);
2224 void register_allocation(Program* program, std::vector<IDSet>& live_out_per_block,
2225 ra_test_policy = {});
2226 void ssa_elimination(Program* program);
2227 void lower_to_hw_instr(Program* program);
2228 void schedule_program(Program* program, live& live_vars);
2229 void schedule_ilp(Program* program);
2230 void schedule_vopd(Program* program);
2231 void spill(Program* program, live& live_vars);
2232 void insert_wait_states(Program* program);
2233 bool dealloc_vgprs(Program* program);
2234 void insert_NOPs(Program* program);
2235 void form_hard_clauses(Program* program);
2236 unsigned emit_program(Program* program, std::vector<uint32_t>& code,
2237 std::vector<struct aco_symbol>* symbols = NULL, bool append_endpgm = true);
2238 /**
2239 * Returns true if print_asm can disassemble the given program for the current build/runtime
2240 * configuration
2241 */
2242 bool check_print_asm_support(Program* program);
2243 bool print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output);
2244 bool validate_ir(Program* program);
2245 bool validate_cfg(Program* program);
2246 bool validate_ra(Program* program);
2247 #ifndef NDEBUG
2248 void perfwarn(Program* program, bool cond, const char* msg, Instruction* instr = NULL);
2249 #else
2250 #define perfwarn(program, cond, msg, ...) \
2251 do { \
2252 } while (0)
2253 #endif
2254
2255 void collect_presched_stats(Program* program);
2256 void collect_preasm_stats(Program* program);
2257 void collect_postasm_stats(Program* program, const std::vector<uint32_t>& code);
2258
2259 struct Instruction_cycle_info {
2260 /* Latency until the result is ready (if not needing a waitcnt) */
2261 unsigned latency;
2262
2263 /* How many cycles issuing this instruction takes (i.e. cycles till the next instruction can be
2264 * issued)*/
2265 unsigned issue_cycles;
2266 };
2267
2268 Instruction_cycle_info get_cycle_info(const Program& program, const Instruction& instr);
2269
2270 enum print_flags {
2271 print_no_ssa = 0x1,
2272 print_perf_info = 0x2,
2273 print_kill = 0x4,
2274 print_live_vars = 0x8,
2275 };
2276
2277 void aco_print_operand(const Operand* operand, FILE* output, unsigned flags = 0);
2278 void aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* output,
2279 unsigned flags = 0);
2280 void aco_print_program(const Program* program, FILE* output, unsigned flags = 0);
2281 void aco_print_program(const Program* program, FILE* output, const live& live_vars,
2282 unsigned flags = 0);
2283
2284 void _aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...);
2285 void _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...);
2286
2287 #define aco_perfwarn(program, ...) _aco_perfwarn(program, __FILE__, __LINE__, __VA_ARGS__)
2288 #define aco_err(program, ...) _aco_err(program, __FILE__, __LINE__, __VA_ARGS__)
2289
2290 int get_op_fixed_to_def(Instruction* instr);
2291
2292 /* utilities for dealing with register demand */
2293 RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
2294 RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
2295 RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr,
2296 aco_ptr<Instruction>& instr_before);
2297
2298 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
2299 uint16_t get_extra_sgprs(Program* program);
2300
2301 /* adjust num_waves for workgroup size and LDS limits */
2302 uint16_t max_suitable_waves(Program* program, uint16_t waves);
2303
2304 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
2305 uint16_t get_sgpr_alloc(Program* program, uint16_t addressable_sgprs);
2306 uint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs);
2307
2308 /* return number of addressable sgprs/vgprs for max_waves */
2309 uint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves);
2310 uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves);
2311
2312 bool uses_scratch(Program* program);
2313
2314 typedef struct {
2315 const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
2316 const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
2317 const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
2318 const int16_t opcode_gfx11[static_cast<int>(aco_opcode::num_opcodes)];
2319 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
2320 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
2321 const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
2322 const char* name[static_cast<int>(aco_opcode::num_opcodes)];
2323 const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
2324 /* sizes used for input/output modifiers and constants */
2325 const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)];
2326 const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)];
2327 const uint32_t definitions[static_cast<int>(aco_opcode::num_opcodes)];
2328 const uint32_t operands[static_cast<int>(aco_opcode::num_opcodes)];
2329 } Info;
2330
2331 extern const Info instr_info;
2332
2333 } // namespace aco
2334
2335 #endif /* ACO_IR_H */
2336