• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2013 Rob Clark <robdclark@gmail.com>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #ifndef INSTR_A3XX_H_
7 #define INSTR_A3XX_H_
8 
9 #define PACKED __attribute__((__packed__))
10 
11 #include <assert.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <stdio.h>
15 
16 /* clang-format off */
17 void ir3_assert_handler(const char *expr, const char *file, int line,
18                         const char *func) __attribute__((weak)) __attribute__((__noreturn__));
19 /* clang-format on */
20 
21 /* A wrapper for assert() that allows overriding handling of a failed
22  * assert.  This is needed for tools like crashdec which can want to
23  * attempt to disassemble memory that might not actually be valid
24  * instructions.
25  */
26 #define ir3_assert(expr)                                                       \
27    do {                                                                        \
28       if (!(expr)) {                                                           \
29          if (ir3_assert_handler) {                                             \
30             ir3_assert_handler(#expr, __FILE__, __LINE__, __func__);           \
31          }                                                                     \
32          assert(expr);                                                         \
33       }                                                                        \
34    } while (0)
35 /* size of largest OPC field of all the instruction categories: */
36 #define NOPC_BITS 7
37 
38 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
39 
40 /* clang-format off */
41 typedef enum {
42    /* category 0: */
43    OPC_NOP             = _OPC(0, 0),
44    OPC_JUMP            = _OPC(0, 2),
45    OPC_CALL            = _OPC(0, 3),
46    OPC_RET             = _OPC(0, 4),
47    OPC_KILL            = _OPC(0, 5),
48    OPC_END             = _OPC(0, 6),
49    OPC_EMIT            = _OPC(0, 7),
50    OPC_CUT             = _OPC(0, 8),
51    OPC_CHMASK          = _OPC(0, 9),
52    OPC_CHSH            = _OPC(0, 10),
53    OPC_FLOW_REV        = _OPC(0, 11),
54 
55    OPC_BKT             = _OPC(0, 16),
56    OPC_STKS            = _OPC(0, 17),
57    OPC_STKR            = _OPC(0, 18),
58    OPC_XSET            = _OPC(0, 19),
59    OPC_XCLR            = _OPC(0, 20),
60    OPC_GETONE          = _OPC(0, 21),
61    OPC_DBG             = _OPC(0, 22),
62    OPC_SHPS            = _OPC(0, 23),   /* shader prologue start */
63    OPC_SHPE            = _OPC(0, 24),   /* shader prologue end */
64    OPC_GETLAST         = _OPC(0, 25),
65 
66    OPC_PREDT           = _OPC(0, 29),   /* predicated true */
67    OPC_PREDF           = _OPC(0, 30),   /* predicated false */
68    OPC_PREDE           = _OPC(0, 31),   /* predicated end */
69 
70    /* Logical opcodes for different branch instruction variations: */
71    OPC_BR              = _OPC(0, 40),
72    OPC_BRAO            = _OPC(0, 41),
73    OPC_BRAA            = _OPC(0, 42),
74    OPC_BRAC            = _OPC(0, 43),
75    OPC_BANY            = _OPC(0, 44),
76    OPC_BALL            = _OPC(0, 45),
77    OPC_BRAX            = _OPC(0, 46),
78 
79    /* Logical opcode to distinguish kill and demote */
80    OPC_DEMOTE          = _OPC(0, 47),
81 
82    /* category 1: */
83    OPC_MOV             = _OPC(1, 0),
84    OPC_MOVP            = _OPC(1, 1),
85    /* swz, gat, sct */
86    OPC_MOVMSK          = _OPC(1, 3),
87 
88    /* Virtual opcodes for instructions differentiated via a "sub-opcode" that
89     * replaces the repeat field:
90     */
91    OPC_SWZ            = _OPC(1, 4),
92    OPC_GAT            = _OPC(1, 5),
93    OPC_SCT            = _OPC(1, 6),
94 
95    /* Logical opcodes for different variants of mov: */
96    OPC_MOV_IMMED       = _OPC(1, 40),
97    OPC_MOV_CONST       = _OPC(1, 41),
98    OPC_MOV_GPR         = _OPC(1, 42),
99    OPC_MOV_RELGPR      = _OPC(1, 43),
100    OPC_MOV_RELCONST    = _OPC(1, 44),
101 
102    /* Macros that expand to an if statement + move */
103    OPC_BALLOT_MACRO    = _OPC(1, 50),
104    OPC_ANY_MACRO       = _OPC(1, 51),
105    OPC_ALL_MACRO       = _OPC(1, 52),
106    OPC_ELECT_MACRO     = _OPC(1, 53),
107    OPC_READ_COND_MACRO = _OPC(1, 54),
108    OPC_READ_FIRST_MACRO = _OPC(1, 55),
109    OPC_SHPS_MACRO       = _OPC(1, 56),
110    OPC_READ_GETLAST_MACRO = _OPC(1, 57),
111 
112    /* Macros that expand to a loop */
113    OPC_SCAN_MACRO      = _OPC(1, 58),
114    OPC_SCAN_CLUSTERS_MACRO = _OPC(1, 60),
115 
116    /* category 2: */
117    OPC_ADD_F           = _OPC(2, 0),
118    OPC_MIN_F           = _OPC(2, 1),
119    OPC_MAX_F           = _OPC(2, 2),
120    OPC_MUL_F           = _OPC(2, 3),
121    OPC_SIGN_F          = _OPC(2, 4),
122    OPC_CMPS_F          = _OPC(2, 5),
123    OPC_ABSNEG_F        = _OPC(2, 6),
124    OPC_CMPV_F          = _OPC(2, 7),
125    /* 8 - invalid */
126    OPC_FLOOR_F         = _OPC(2, 9),
127    OPC_CEIL_F          = _OPC(2, 10),
128    OPC_RNDNE_F         = _OPC(2, 11),
129    OPC_RNDAZ_F         = _OPC(2, 12),
130    OPC_TRUNC_F         = _OPC(2, 13),
131    /* 14-15 - invalid */
132    OPC_ADD_U           = _OPC(2, 16),
133    OPC_ADD_S           = _OPC(2, 17),
134    OPC_SUB_U           = _OPC(2, 18),
135    OPC_SUB_S           = _OPC(2, 19),
136    OPC_CMPS_U          = _OPC(2, 20),
137    OPC_CMPS_S          = _OPC(2, 21),
138    OPC_MIN_U           = _OPC(2, 22),
139    OPC_MIN_S           = _OPC(2, 23),
140    OPC_MAX_U           = _OPC(2, 24),
141    OPC_MAX_S           = _OPC(2, 25),
142    OPC_ABSNEG_S        = _OPC(2, 26),
143    /* 27 - invalid */
144    OPC_AND_B           = _OPC(2, 28),
145    OPC_OR_B            = _OPC(2, 29),
146    OPC_NOT_B           = _OPC(2, 30),
147    OPC_XOR_B           = _OPC(2, 31),
148    /* 32 - invalid */
149    OPC_CMPV_U          = _OPC(2, 33),
150    OPC_CMPV_S          = _OPC(2, 34),
151    /* 35-47 - invalid */
152    OPC_MUL_U24         = _OPC(2, 48), /* 24b mul into 32b result */
153    OPC_MUL_S24         = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
154    OPC_MULL_U          = _OPC(2, 50),
155    OPC_BFREV_B         = _OPC(2, 51),
156    OPC_CLZ_S           = _OPC(2, 52),
157    OPC_CLZ_B           = _OPC(2, 53),
158    OPC_SHL_B           = _OPC(2, 54),
159    OPC_SHR_B           = _OPC(2, 55),
160    OPC_ASHR_B          = _OPC(2, 56),
161    OPC_BARY_F          = _OPC(2, 57),
162    OPC_MGEN_B          = _OPC(2, 58),
163    OPC_GETBIT_B        = _OPC(2, 59),
164    OPC_SETRM           = _OPC(2, 60),
165    OPC_CBITS_B         = _OPC(2, 61),
166    OPC_SHB             = _OPC(2, 62),
167    OPC_MSAD            = _OPC(2, 63),
168    OPC_FLAT_B          = _OPC(2, 64),
169 
170    /* category 3: */
171    OPC_MAD_U16         = _OPC(3, 0),
172    OPC_MADSH_U16       = _OPC(3, 1),
173    OPC_MAD_S16         = _OPC(3, 2),
174    OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
175    OPC_MAD_U24         = _OPC(3, 4),
176    OPC_MAD_S24         = _OPC(3, 5),
177    OPC_MAD_F16         = _OPC(3, 6),
178    OPC_MAD_F32         = _OPC(3, 7),
179    OPC_SEL_B16         = _OPC(3, 8),
180    OPC_SEL_B32         = _OPC(3, 9),
181    OPC_SEL_S16         = _OPC(3, 10),
182    OPC_SEL_S32         = _OPC(3, 11),
183    OPC_SEL_F16         = _OPC(3, 12),
184    OPC_SEL_F32         = _OPC(3, 13),
185    OPC_SAD_S16         = _OPC(3, 14),
186    OPC_SAD_S32         = _OPC(3, 15),
187    OPC_SHRM            = _OPC(3, 16),
188    OPC_SHLM            = _OPC(3, 17),
189    OPC_SHRG            = _OPC(3, 18),
190    OPC_SHLG            = _OPC(3, 19),
191    OPC_ANDG            = _OPC(3, 20),
192    OPC_DP2ACC          = _OPC(3, 21),
193    OPC_DP4ACC          = _OPC(3, 22),
194    OPC_WMM             = _OPC(3, 23),
195    OPC_WMM_ACCU        = _OPC(3, 24),
196 
197    /* category 4: */
198    OPC_RCP             = _OPC(4, 0),
199    OPC_RSQ             = _OPC(4, 1),
200    OPC_LOG2            = _OPC(4, 2),
201    OPC_EXP2            = _OPC(4, 3),
202    OPC_SIN             = _OPC(4, 4),
203    OPC_COS             = _OPC(4, 5),
204    OPC_SQRT            = _OPC(4, 6),
205    /* NOTE that these are 8+opc from their highp equivs, so it's possible
206     * that the high order bit in the opc field has been repurposed for
207     * half-precision use?  But note that other ops (rcp/lsin/cos/sqrt)
208     * still use the same opc as highp
209     */
210    OPC_HRSQ            = _OPC(4, 9),
211    OPC_HLOG2           = _OPC(4, 10),
212    OPC_HEXP2           = _OPC(4, 11),
213 
214    /* category 5: */
215    OPC_ISAM            = _OPC(5, 0),
216    OPC_ISAML           = _OPC(5, 1),
217    OPC_ISAMM           = _OPC(5, 2),
218    OPC_SAM             = _OPC(5, 3),
219    OPC_SAMB            = _OPC(5, 4),
220    OPC_SAML            = _OPC(5, 5),
221    OPC_SAMGQ           = _OPC(5, 6),
222    OPC_GETLOD          = _OPC(5, 7),
223    OPC_CONV            = _OPC(5, 8),
224    OPC_CONVM           = _OPC(5, 9),
225    OPC_GETSIZE         = _OPC(5, 10),
226    OPC_GETBUF          = _OPC(5, 11),
227    OPC_GETPOS          = _OPC(5, 12),
228    OPC_GETINFO         = _OPC(5, 13),
229    OPC_DSX             = _OPC(5, 14),
230    OPC_DSY             = _OPC(5, 15),
231    OPC_GATHER4R        = _OPC(5, 16),
232    OPC_GATHER4G        = _OPC(5, 17),
233    OPC_GATHER4B        = _OPC(5, 18),
234    OPC_GATHER4A        = _OPC(5, 19),
235    OPC_SAMGP0          = _OPC(5, 20),
236    OPC_SAMGP1          = _OPC(5, 21),
237    OPC_SAMGP2          = _OPC(5, 22),
238    OPC_SAMGP3          = _OPC(5, 23),
239    OPC_DSXPP_1         = _OPC(5, 24),
240    OPC_DSYPP_1         = _OPC(5, 25),
241    OPC_RGETPOS         = _OPC(5, 26),
242    OPC_RGETINFO        = _OPC(5, 27),
243    OPC_BRCST_ACTIVE    = _OPC(5, 28),
244    OPC_QUAD_SHUFFLE_BRCST  = _OPC(5, 29),
245    OPC_QUAD_SHUFFLE_HORIZ  = _OPC(5, 30),
246    OPC_QUAD_SHUFFLE_VERT   = _OPC(5, 31),
247    OPC_QUAD_SHUFFLE_DIAG   = _OPC(5, 32),
248    OPC_TCINV               = _OPC(5, 33),
249    /* cat5 meta instructions, placed above the cat5 opc field's size */
250    OPC_DSXPP_MACRO     = _OPC(5, 35),
251    OPC_DSYPP_MACRO     = _OPC(5, 36),
252 
253    /* category 6: */
254    OPC_LDG             = _OPC(6, 0),        /* load-global */
255    OPC_LDL             = _OPC(6, 1),
256    OPC_LDP             = _OPC(6, 2),
257    OPC_STG             = _OPC(6, 3),        /* store-global */
258    OPC_STL             = _OPC(6, 4),
259    OPC_STP             = _OPC(6, 5),
260    OPC_LDIB            = _OPC(6, 6),
261    OPC_G2L             = _OPC(6, 7),
262    OPC_L2G             = _OPC(6, 8),
263    OPC_PREFETCH        = _OPC(6, 9),
264    OPC_LDLW            = _OPC(6, 10),
265    OPC_STLW            = _OPC(6, 11),
266    OPC_RESFMT          = _OPC(6, 14),
267    OPC_RESINFO         = _OPC(6, 15),
268    OPC_ATOMIC_ADD      = _OPC(6, 16),
269    OPC_ATOMIC_SUB      = _OPC(6, 17),
270    OPC_ATOMIC_XCHG     = _OPC(6, 18),
271    OPC_ATOMIC_INC      = _OPC(6, 19),
272    OPC_ATOMIC_DEC      = _OPC(6, 20),
273    OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
274    OPC_ATOMIC_MIN      = _OPC(6, 22),
275    OPC_ATOMIC_MAX      = _OPC(6, 23),
276    OPC_ATOMIC_AND      = _OPC(6, 24),
277    OPC_ATOMIC_OR       = _OPC(6, 25),
278    OPC_ATOMIC_XOR      = _OPC(6, 26),
279    OPC_LDGB            = _OPC(6, 27),
280    OPC_STGB            = _OPC(6, 28),
281    OPC_STIB            = _OPC(6, 29),
282    OPC_LDC             = _OPC(6, 30),
283    OPC_LDLV            = _OPC(6, 31),
284    OPC_PIPR            = _OPC(6, 32), /* ??? */
285    OPC_PIPC            = _OPC(6, 33), /* ??? */
286    OPC_EMIT2           = _OPC(6, 34), /* ??? */
287    OPC_ENDLS           = _OPC(6, 35), /* ??? */
288    OPC_GETSPID         = _OPC(6, 36), /* SP ID */
289    OPC_GETWID          = _OPC(6, 37), /* wavefront ID */
290    OPC_GETFIBERID      = _OPC(6, 38), /* fiber ID */
291    OPC_SHFL            = _OPC(6, 39),
292 
293    /* Logical opcodes for things that differ in a6xx+ */
294    OPC_STC             = _OPC(6, 40),
295    OPC_RESINFO_B       = _OPC(6, 41),
296    OPC_LDIB_B          = _OPC(6, 42),
297    OPC_STIB_B          = _OPC(6, 43),
298 
299    /* Logical opcodes for different atomic instruction variations: */
300    OPC_ATOMIC_B_ADD      = _OPC(6, 44),
301    OPC_ATOMIC_B_SUB      = _OPC(6, 45),
302    OPC_ATOMIC_B_XCHG     = _OPC(6, 46),
303    OPC_ATOMIC_B_INC      = _OPC(6, 47),
304    OPC_ATOMIC_B_DEC      = _OPC(6, 48),
305    OPC_ATOMIC_B_CMPXCHG  = _OPC(6, 49),
306    OPC_ATOMIC_B_MIN      = _OPC(6, 50),
307    OPC_ATOMIC_B_MAX      = _OPC(6, 51),
308    OPC_ATOMIC_B_AND      = _OPC(6, 52),
309    OPC_ATOMIC_B_OR       = _OPC(6, 53),
310    OPC_ATOMIC_B_XOR      = _OPC(6, 54),
311 
312    OPC_ATOMIC_S_ADD      = _OPC(6, 55),
313    OPC_ATOMIC_S_SUB      = _OPC(6, 56),
314    OPC_ATOMIC_S_XCHG     = _OPC(6, 57),
315    OPC_ATOMIC_S_INC      = _OPC(6, 58),
316    OPC_ATOMIC_S_DEC      = _OPC(6, 59),
317    OPC_ATOMIC_S_CMPXCHG  = _OPC(6, 60),
318    OPC_ATOMIC_S_MIN      = _OPC(6, 61),
319    OPC_ATOMIC_S_MAX      = _OPC(6, 62),
320    OPC_ATOMIC_S_AND      = _OPC(6, 63),
321    OPC_ATOMIC_S_OR       = _OPC(6, 64),
322    OPC_ATOMIC_S_XOR      = _OPC(6, 65),
323 
324    OPC_ATOMIC_G_ADD      = _OPC(6, 66),
325    OPC_ATOMIC_G_SUB      = _OPC(6, 67),
326    OPC_ATOMIC_G_XCHG     = _OPC(6, 68),
327    OPC_ATOMIC_G_INC      = _OPC(6, 69),
328    OPC_ATOMIC_G_DEC      = _OPC(6, 70),
329    OPC_ATOMIC_G_CMPXCHG  = _OPC(6, 71),
330    OPC_ATOMIC_G_MIN      = _OPC(6, 72),
331    OPC_ATOMIC_G_MAX      = _OPC(6, 73),
332    OPC_ATOMIC_G_AND      = _OPC(6, 74),
333    OPC_ATOMIC_G_OR       = _OPC(6, 75),
334    OPC_ATOMIC_G_XOR      = _OPC(6, 76),
335 
336    OPC_LDG_A           = _OPC(6, 77),
337    OPC_STG_A           = _OPC(6, 78),
338 
339    OPC_SPILL_MACRO     = _OPC(6, 79),
340    OPC_RELOAD_MACRO    = _OPC(6, 80),
341 
342    OPC_LDC_K           = _OPC(6, 81),
343    OPC_STSC            = _OPC(6, 82),
344    OPC_LDG_K           = _OPC(6, 83),
345 
346    /* Macros that expand to an stsc at the start of the preamble.
347     * It loads into const file and should not be optimized in any way.
348     */
349    OPC_PUSH_CONSTS_LOAD_MACRO = _OPC(6, 84),
350 
351    OPC_RAY_INTERSECTION = _OPC(6, 90),
352    OPC_RESBASE          = _OPC(6, 91),
353 
354    /* category 7: */
355    OPC_BAR             = _OPC(7, 0),
356    OPC_FENCE           = _OPC(7, 1),
357    OPC_SLEEP           = _OPC(7, 2),
358    OPC_ICINV           = _OPC(7, 3),
359    OPC_DCCLN           = _OPC(7, 4),
360    OPC_DCINV           = _OPC(7, 5),
361    OPC_DCFLU           = _OPC(7, 6),
362 
363    OPC_LOCK            = _OPC(7, 7),
364    OPC_UNLOCK          = _OPC(7, 8),
365 
366    OPC_ALIAS           = _OPC(7, 9),
367 
368    OPC_CCINV           = _OPC(7, 10),
369 
370    /* meta instructions (category 8): */
371 #define OPC_META 8
372    /* placeholder instr to mark shader inputs: */
373    OPC_META_INPUT      = _OPC(OPC_META, 0),
374    /* The "collect" and "split" instructions are used for keeping
375     * track of instructions that write to multiple dst registers
376     * (split) like texture sample instructions, or read multiple
377     * consecutive scalar registers (collect) (bary.f, texture samp)
378     *
379     * A "split" extracts a scalar component from a vecN, and a
380     * "collect" gathers multiple scalar components into a vecN
381     */
382    OPC_META_SPLIT      = _OPC(OPC_META, 2),
383    OPC_META_COLLECT    = _OPC(OPC_META, 3),
384 
385    /* placeholder for texture fetches that run before FS invocation
386     * starts:
387     */
388    OPC_META_TEX_PREFETCH = _OPC(OPC_META, 4),
389 
390    /* Parallel copies have multiple destinations, and copy each destination
391     * to its corresponding source. This happens "in parallel," meaning that
392     * it happens as-if every source is read first and then every destination
393     * is stored. These are produced in RA when register shuffling is
394     * required, and then lowered away immediately afterwards.
395     */
396    OPC_META_PARALLEL_COPY = _OPC(OPC_META, 5),
397    OPC_META_PHI = _OPC(OPC_META, 6),
398    /*
399     * A manually encoded opcode
400     */
401    OPC_META_RAW = _OPC(OPC_META, 7),
402 } opc_t;
403 /* clang-format on */
404 
405 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
406 #define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
407 
408 const char *disasm_a3xx_instr_name(opc_t opc);
409 
410 typedef enum {
411    TYPE_F16 = 0,
412    TYPE_F32 = 1,
413    TYPE_U16 = 2,
414    TYPE_U32 = 3,
415    TYPE_S16 = 4,
416    TYPE_S32 = 5,
417    TYPE_ATOMIC_U64 = 6, /* Only valid for a7xx atomics */
418    TYPE_U8 = 6,
419    TYPE_U8_32 = 7,
420 } type_t;
421 
422 static inline uint32_t
type_size(type_t type)423 type_size(type_t type)
424 {
425    switch (type) {
426    case TYPE_F32:
427    case TYPE_U32:
428    case TYPE_U8_32:
429    case TYPE_S32:
430       return 32;
431    case TYPE_F16:
432    case TYPE_U16:
433    case TYPE_S16:
434       return 16;
435    case TYPE_U8:
436       return 8;
437    default:
438       ir3_assert(0); /* invalid type */
439       return 0;
440    }
441 }
442 
443 static inline type_t
type_uint_size(unsigned bit_size)444 type_uint_size(unsigned bit_size)
445 {
446    switch (bit_size) {
447    case 8:  return TYPE_U8;
448    case 1:  /* 1b bools are treated as normal half-regs */
449    case 16: return TYPE_U16;
450    case 32: return TYPE_U32;
451    case 64:
452       return TYPE_U32;
453    default:
454       ir3_assert(0); /* invalid size */
455       return (type_t)0;
456    }
457 }
458 
459 static inline type_t
type_float_size(unsigned bit_size)460 type_float_size(unsigned bit_size)
461 {
462    switch (bit_size) {
463    case 16: return TYPE_F16;
464    case 32: return TYPE_F32;
465    default:
466       ir3_assert(0); /* invalid size */
467       return (type_t)0;
468    }
469 }
470 
471 static inline int
type_float(type_t type)472 type_float(type_t type)
473 {
474    return (type == TYPE_F32) || (type == TYPE_F16);
475 }
476 
477 static inline int
type_uint(type_t type)478 type_uint(type_t type)
479 {
480    return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8) || (type == TYPE_U8_32);
481 }
482 
483 static inline int
type_sint(type_t type)484 type_sint(type_t type)
485 {
486    return (type == TYPE_S32) || (type == TYPE_S16);
487 }
488 
489 typedef enum {
490    ROUND_ZERO = 0,
491    ROUND_EVEN = 1,
492    ROUND_POS_INF = 2,
493    ROUND_NEG_INF = 3,
494 } round_t;
495 
496 /* comp:
497  *   0 - x
498  *   1 - y
499  *   2 - z
500  *   3 - w
501  */
502 static inline uint32_t
regid(int num,int comp)503 regid(int num, int comp)
504 {
505    return (num << 2) | (comp & 0x3);
506 }
507 
508 #define INVALID_REG     regid(63, 0)
509 #define VALIDREG(r)     ((r) != INVALID_REG)
510 #define CONDREG(r, val) COND(VALIDREG(r), (val))
511 
512 /* special registers: */
513 #define REG_A0 61 /* address register */
514 #define REG_P0 62 /* predicate register */
515 #define REG_P0_X regid(REG_P0, 0) /* p0.x */
516 
517 #define INVALID_CONST_REG UINT16_MAX
518 
519 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
520  * if so, how to get the (base, index) pair for both sampler and texture.
521  * There is a single base embedded in the instruction, which is always used
522  * for the texture.
523  */
524 typedef enum {
525    /* Use traditional GL binding model, get texture and sampler index from src3
526     * which is presumed to be uniform on a4xx+ (a3xx doesn't have the other
527     * modes, but does handle non-uniform indexing).
528     */
529    CAT5_UNIFORM = 0,
530 
531    /* The sampler base comes from the low 3 bits of a1.x, and the sampler
532     * and texture index come from src3 which is presumed to be uniform.
533     */
534    CAT5_BINDLESS_A1_UNIFORM = 1,
535 
536    /* The texture and sampler share the same base, and the sampler and
537     * texture index come from src3 which is *not* presumed to be uniform.
538     */
539    CAT5_BINDLESS_NONUNIFORM = 2,
540 
541    /* The sampler base comes from the low 3 bits of a1.x, and the sampler
542     * and texture index come from src3 which is *not* presumed to be
543     * uniform.
544     */
545    CAT5_BINDLESS_A1_NONUNIFORM = 3,
546 
547    /* Use traditional GL binding model, get texture and sampler index
548     * from src3 which is *not* presumed to be uniform.
549     */
550    CAT5_NONUNIFORM = 4,
551 
552    /* The texture and sampler share the same base, and the sampler and
553     * texture index come from src3 which is presumed to be uniform.
554     */
555    CAT5_BINDLESS_UNIFORM = 5,
556 
557    /* The texture and sampler share the same base, get sampler index from low
558     * 4 bits of src3 and texture index from high 4 bits.
559     */
560    CAT5_BINDLESS_IMM = 6,
561 
562    /* The sampler base comes from the low 3 bits of a1.x, and the texture
563     * index comes from the next 8 bits of a1.x. The sampler index is an
564     * immediate in src3.
565     */
566    CAT5_BINDLESS_A1_IMM = 7,
567 } cat5_desc_mode_t;
568 
569 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
570  */
571 typedef enum {
572    /* Use old GL binding model with an immediate index. */
573    CAT6_IMM = 0,
574 
575    CAT6_UNIFORM = 1,
576 
577    CAT6_NONUNIFORM = 2,
578 
579    /* Use the bindless model, with an immediate index.
580     */
581    CAT6_BINDLESS_IMM = 4,
582 
583    /* Use the bindless model, with a uniform register index.
584     */
585    CAT6_BINDLESS_UNIFORM = 5,
586 
587    /* Use the bindless model, with a register index that isn't guaranteed
588     * to be uniform. This presumably checks if the indices are equal and
589     * splits up the load/store, because it works the way you would
590     * expect.
591     */
592    CAT6_BINDLESS_NONUNIFORM = 6,
593 } cat6_desc_mode_t;
594 
595 static inline bool
is_sat_compatible(opc_t opc)596 is_sat_compatible(opc_t opc)
597 {
598    /* On a6xx saturation doesn't work on cat4 */
599    if (opc_cat(opc) != 2 && opc_cat(opc) != 3)
600       return false;
601 
602    switch (opc) {
603    /* On a3xx and a6xx saturation doesn't work on bary.f/flat.b */
604    case OPC_BARY_F:
605    case OPC_FLAT_B:
606    /* On a6xx saturation doesn't work on sel.* */
607    case OPC_SEL_B16:
608    case OPC_SEL_B32:
609    case OPC_SEL_S16:
610    case OPC_SEL_S32:
611    case OPC_SEL_F16:
612    case OPC_SEL_F32:
613       return false;
614    default:
615       return true;
616    }
617 }
618 
619 static inline bool
is_mad(opc_t opc)620 is_mad(opc_t opc)
621 {
622    switch (opc) {
623    case OPC_MAD_U16:
624    case OPC_MAD_S16:
625    case OPC_MAD_U24:
626    case OPC_MAD_S24:
627    case OPC_MAD_F16:
628    case OPC_MAD_F32:
629       return true;
630    default:
631       return false;
632    }
633 }
634 
635 static inline bool
is_madsh(opc_t opc)636 is_madsh(opc_t opc)
637 {
638    switch (opc) {
639    case OPC_MADSH_U16:
640    case OPC_MADSH_M16:
641       return true;
642    default:
643       return false;
644    }
645 }
646 
647 static inline bool
is_sad(opc_t opc)648 is_sad(opc_t opc)
649 {
650    switch (opc) {
651    case OPC_SAD_S16:
652    case OPC_SAD_S32:
653       return true;
654    default:
655       return false;
656    }
657 }
658 
659 static inline bool
is_local_atomic(opc_t opc)660 is_local_atomic(opc_t opc)
661 {
662    switch (opc) {
663    case OPC_ATOMIC_ADD:
664    case OPC_ATOMIC_SUB:
665    case OPC_ATOMIC_XCHG:
666    case OPC_ATOMIC_INC:
667    case OPC_ATOMIC_DEC:
668    case OPC_ATOMIC_CMPXCHG:
669    case OPC_ATOMIC_MIN:
670    case OPC_ATOMIC_MAX:
671    case OPC_ATOMIC_AND:
672    case OPC_ATOMIC_OR:
673    case OPC_ATOMIC_XOR:
674       return true;
675    default:
676       return false;
677    }
678 }
679 
680 static inline bool
is_global_a3xx_atomic(opc_t opc)681 is_global_a3xx_atomic(opc_t opc)
682 {
683    switch (opc) {
684    case OPC_ATOMIC_S_ADD:
685    case OPC_ATOMIC_S_SUB:
686    case OPC_ATOMIC_S_XCHG:
687    case OPC_ATOMIC_S_INC:
688    case OPC_ATOMIC_S_DEC:
689    case OPC_ATOMIC_S_CMPXCHG:
690    case OPC_ATOMIC_S_MIN:
691    case OPC_ATOMIC_S_MAX:
692    case OPC_ATOMIC_S_AND:
693    case OPC_ATOMIC_S_OR:
694    case OPC_ATOMIC_S_XOR:
695       return true;
696    default:
697       return false;
698    }
699 }
700 
701 static inline bool
is_global_a6xx_atomic(opc_t opc)702 is_global_a6xx_atomic(opc_t opc)
703 {
704    switch (opc) {
705    case OPC_ATOMIC_G_ADD:
706    case OPC_ATOMIC_G_SUB:
707    case OPC_ATOMIC_G_XCHG:
708    case OPC_ATOMIC_G_INC:
709    case OPC_ATOMIC_G_DEC:
710    case OPC_ATOMIC_G_CMPXCHG:
711    case OPC_ATOMIC_G_MIN:
712    case OPC_ATOMIC_G_MAX:
713    case OPC_ATOMIC_G_AND:
714    case OPC_ATOMIC_G_OR:
715    case OPC_ATOMIC_G_XOR:
716       return true;
717    default:
718       return false;
719    }
720 }
721 
722 static inline bool
is_bindless_atomic(opc_t opc)723 is_bindless_atomic(opc_t opc)
724 {
725    switch (opc) {
726    case OPC_ATOMIC_B_ADD:
727    case OPC_ATOMIC_B_SUB:
728    case OPC_ATOMIC_B_XCHG:
729    case OPC_ATOMIC_B_INC:
730    case OPC_ATOMIC_B_DEC:
731    case OPC_ATOMIC_B_CMPXCHG:
732    case OPC_ATOMIC_B_MIN:
733    case OPC_ATOMIC_B_MAX:
734    case OPC_ATOMIC_B_AND:
735    case OPC_ATOMIC_B_OR:
736    case OPC_ATOMIC_B_XOR:
737       return true;
738    default:
739       return false;
740    }
741 }
742 
743 static inline bool
is_atomic(opc_t opc)744 is_atomic(opc_t opc)
745 {
746    return is_local_atomic(opc) || is_global_a3xx_atomic(opc) ||
747           is_global_a6xx_atomic(opc) || is_bindless_atomic(opc);
748 }
749 
750 static inline bool
is_ssbo(opc_t opc)751 is_ssbo(opc_t opc)
752 {
753    switch (opc) {
754    case OPC_RESFMT:
755    case OPC_RESINFO:
756    case OPC_LDGB:
757    case OPC_STGB:
758    case OPC_STIB:
759       return true;
760    default:
761       return false;
762    }
763 }
764 
765 static inline bool
is_isam(opc_t opc)766 is_isam(opc_t opc)
767 {
768    switch (opc) {
769    case OPC_ISAM:
770    case OPC_ISAML:
771    case OPC_ISAMM:
772       return true;
773    default:
774       return false;
775    }
776 }
777 
778 static inline bool
is_cat2_float(opc_t opc)779 is_cat2_float(opc_t opc)
780 {
781    switch (opc) {
782    case OPC_ADD_F:
783    case OPC_MIN_F:
784    case OPC_MAX_F:
785    case OPC_MUL_F:
786    case OPC_SIGN_F:
787    case OPC_CMPS_F:
788    case OPC_ABSNEG_F:
789    case OPC_CMPV_F:
790    case OPC_FLOOR_F:
791    case OPC_CEIL_F:
792    case OPC_RNDNE_F:
793    case OPC_RNDAZ_F:
794    case OPC_TRUNC_F:
795       return true;
796 
797    default:
798       return false;
799    }
800 }
801 
802 static inline bool
is_cat3_float(opc_t opc)803 is_cat3_float(opc_t opc)
804 {
805    switch (opc) {
806    case OPC_MAD_F16:
807    case OPC_MAD_F32:
808    case OPC_SEL_F16:
809    case OPC_SEL_F32:
810       return true;
811    default:
812       return false;
813    }
814 }
815 
816 static inline bool
is_cat3_alt(opc_t opc)817 is_cat3_alt(opc_t opc)
818 {
819    switch (opc) {
820    case OPC_SHLM:
821    case OPC_SHRM:
822    case OPC_SHLG:
823    case OPC_SHRG:
824    case OPC_ANDG:
825       return true;
826    default:
827       return false;
828    }
829 }
830 
831 #endif /* INSTR_A3XX_H_ */
832