• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26 
27 #define PACKED __attribute__((__packed__))
28 
29 #include <assert.h>
30 #include <stdbool.h>
31 #include <stdint.h>
32 #include <stdio.h>
33 
34 /* clang-format off */
35 void ir3_assert_handler(const char *expr, const char *file, int line,
36                         const char *func) __attribute__((weak)) __attribute__((__noreturn__));
37 /* clang-format on */
38 
39 /* A wrapper for assert() that allows overriding handling of a failed
40  * assert.  This is needed for tools like crashdec which can want to
41  * attempt to disassemble memory that might not actually be valid
42  * instructions.
43  */
44 #define ir3_assert(expr)                                                       \
45    do {                                                                        \
46       if (!(expr)) {                                                           \
47          if (ir3_assert_handler) {                                             \
48             ir3_assert_handler(#expr, __FILE__, __LINE__, __func__);           \
49          }                                                                     \
50          assert(expr);                                                         \
51       }                                                                        \
52    } while (0)
53 /* size of largest OPC field of all the instruction categories: */
54 #define NOPC_BITS 7
55 
56 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
57 
58 /* clang-format off */
59 typedef enum {
60    /* category 0: */
61    OPC_NOP             = _OPC(0, 0),
62    OPC_B               = _OPC(0, 1),
63    OPC_JUMP            = _OPC(0, 2),
64    OPC_CALL            = _OPC(0, 3),
65    OPC_RET             = _OPC(0, 4),
66    OPC_KILL            = _OPC(0, 5),
67    OPC_END             = _OPC(0, 6),
68    OPC_EMIT            = _OPC(0, 7),
69    OPC_CUT             = _OPC(0, 8),
70    OPC_CHMASK          = _OPC(0, 9),
71    OPC_CHSH            = _OPC(0, 10),
72    OPC_FLOW_REV        = _OPC(0, 11),
73 
74    OPC_BKT             = _OPC(0, 16),
75    OPC_STKS            = _OPC(0, 17),
76    OPC_STKR            = _OPC(0, 18),
77    OPC_XSET            = _OPC(0, 19),
78    OPC_XCLR            = _OPC(0, 20),
79    OPC_GETONE          = _OPC(0, 21),
80    OPC_DBG             = _OPC(0, 22),
81    OPC_SHPS            = _OPC(0, 23),   /* shader prologue start */
82    OPC_SHPE            = _OPC(0, 24),   /* shader prologue end */
83    OPC_GETLAST         = _OPC(0, 25),
84 
85    OPC_PREDT           = _OPC(0, 29),   /* predicated true */
86    OPC_PREDF           = _OPC(0, 30),   /* predicated false */
87    OPC_PREDE           = _OPC(0, 31),   /* predicated end */
88 
89    /* Logical opcodes for different branch instruction variations: */
90    OPC_BR              = _OPC(0, 40),
91    OPC_BRAO            = _OPC(0, 41),
92    OPC_BRAA            = _OPC(0, 42),
93    OPC_BRAC            = _OPC(0, 43),
94    OPC_BANY            = _OPC(0, 44),
95    OPC_BALL            = _OPC(0, 45),
96    OPC_BRAX            = _OPC(0, 46),
97 
98    /* Logical opcode to distinguish kill and demote */
99    OPC_DEMOTE          = _OPC(0, 47),
100 
101    /* category 1: */
102    OPC_MOV             = _OPC(1, 0),
103    OPC_MOVP            = _OPC(1, 1),
104    /* swz, gat, sct */
105    OPC_MOVMSK          = _OPC(1, 3),
106 
107    /* Virtual opcodes for instructions differentiated via a "sub-opcode" that
108     * replaces the repeat field:
109     */
110    OPC_SWZ            = _OPC(1, 4),
111    OPC_GAT            = _OPC(1, 5),
112    OPC_SCT            = _OPC(1, 6),
113 
114    /* Logical opcodes for different variants of mov: */
115    OPC_MOV_IMMED       = _OPC(1, 40),
116    OPC_MOV_CONST       = _OPC(1, 41),
117    OPC_MOV_GPR         = _OPC(1, 42),
118    OPC_MOV_RELGPR      = _OPC(1, 43),
119    OPC_MOV_RELCONST    = _OPC(1, 44),
120 
121    /* Macros that expand to an if statement + move */
122    OPC_BALLOT_MACRO    = _OPC(1, 50),
123    OPC_ANY_MACRO       = _OPC(1, 51),
124    OPC_ALL_MACRO       = _OPC(1, 52),
125    OPC_ELECT_MACRO     = _OPC(1, 53),
126    OPC_READ_COND_MACRO = _OPC(1, 54),
127    OPC_READ_FIRST_MACRO = _OPC(1, 55),
128    OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
129    OPC_SHPS_MACRO       = _OPC(1, 57),
130 
131    /* Macros that expand to a loop */
132    OPC_SCAN_MACRO      = _OPC(1, 58),
133    OPC_SCAN_CLUSTERS_MACRO = _OPC(1, 60),
134 
135    /* Macros that expand to an stsc at the start of the preamble.
136     * It loads into const file and should not be optimized in any way.
137     */
138    OPC_PUSH_CONSTS_LOAD_MACRO = _OPC(1, 59),
139 
140    /* category 2: */
141    OPC_ADD_F           = _OPC(2, 0),
142    OPC_MIN_F           = _OPC(2, 1),
143    OPC_MAX_F           = _OPC(2, 2),
144    OPC_MUL_F           = _OPC(2, 3),
145    OPC_SIGN_F          = _OPC(2, 4),
146    OPC_CMPS_F          = _OPC(2, 5),
147    OPC_ABSNEG_F        = _OPC(2, 6),
148    OPC_CMPV_F          = _OPC(2, 7),
149    /* 8 - invalid */
150    OPC_FLOOR_F         = _OPC(2, 9),
151    OPC_CEIL_F          = _OPC(2, 10),
152    OPC_RNDNE_F         = _OPC(2, 11),
153    OPC_RNDAZ_F         = _OPC(2, 12),
154    OPC_TRUNC_F         = _OPC(2, 13),
155    /* 14-15 - invalid */
156    OPC_ADD_U           = _OPC(2, 16),
157    OPC_ADD_S           = _OPC(2, 17),
158    OPC_SUB_U           = _OPC(2, 18),
159    OPC_SUB_S           = _OPC(2, 19),
160    OPC_CMPS_U          = _OPC(2, 20),
161    OPC_CMPS_S          = _OPC(2, 21),
162    OPC_MIN_U           = _OPC(2, 22),
163    OPC_MIN_S           = _OPC(2, 23),
164    OPC_MAX_U           = _OPC(2, 24),
165    OPC_MAX_S           = _OPC(2, 25),
166    OPC_ABSNEG_S        = _OPC(2, 26),
167    /* 27 - invalid */
168    OPC_AND_B           = _OPC(2, 28),
169    OPC_OR_B            = _OPC(2, 29),
170    OPC_NOT_B           = _OPC(2, 30),
171    OPC_XOR_B           = _OPC(2, 31),
172    /* 32 - invalid */
173    OPC_CMPV_U          = _OPC(2, 33),
174    OPC_CMPV_S          = _OPC(2, 34),
175    /* 35-47 - invalid */
176    OPC_MUL_U24         = _OPC(2, 48), /* 24b mul into 32b result */
177    OPC_MUL_S24         = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
178    OPC_MULL_U          = _OPC(2, 50),
179    OPC_BFREV_B         = _OPC(2, 51),
180    OPC_CLZ_S           = _OPC(2, 52),
181    OPC_CLZ_B           = _OPC(2, 53),
182    OPC_SHL_B           = _OPC(2, 54),
183    OPC_SHR_B           = _OPC(2, 55),
184    OPC_ASHR_B          = _OPC(2, 56),
185    OPC_BARY_F          = _OPC(2, 57),
186    OPC_MGEN_B          = _OPC(2, 58),
187    OPC_GETBIT_B        = _OPC(2, 59),
188    OPC_SETRM           = _OPC(2, 60),
189    OPC_CBITS_B         = _OPC(2, 61),
190    OPC_SHB             = _OPC(2, 62),
191    OPC_MSAD            = _OPC(2, 63),
192    OPC_FLAT_B          = _OPC(2, 64),
193 
194    /* category 3: */
195    OPC_MAD_U16         = _OPC(3, 0),
196    OPC_MADSH_U16       = _OPC(3, 1),
197    OPC_MAD_S16         = _OPC(3, 2),
198    OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
199    OPC_MAD_U24         = _OPC(3, 4),
200    OPC_MAD_S24         = _OPC(3, 5),
201    OPC_MAD_F16         = _OPC(3, 6),
202    OPC_MAD_F32         = _OPC(3, 7),
203    OPC_SEL_B16         = _OPC(3, 8),
204    OPC_SEL_B32         = _OPC(3, 9),
205    OPC_SEL_S16         = _OPC(3, 10),
206    OPC_SEL_S32         = _OPC(3, 11),
207    OPC_SEL_F16         = _OPC(3, 12),
208    OPC_SEL_F32         = _OPC(3, 13),
209    OPC_SAD_S16         = _OPC(3, 14),
210    OPC_SAD_S32         = _OPC(3, 15),
211    OPC_SHRM            = _OPC(3, 16),
212    OPC_SHLM            = _OPC(3, 17),
213    OPC_SHRG            = _OPC(3, 18),
214    OPC_SHLG            = _OPC(3, 19),
215    OPC_ANDG            = _OPC(3, 20),
216    OPC_DP2ACC          = _OPC(3, 21),
217    OPC_DP4ACC          = _OPC(3, 22),
218    OPC_WMM             = _OPC(3, 23),
219    OPC_WMM_ACCU        = _OPC(3, 24),
220 
221    /* category 4: */
222    OPC_RCP             = _OPC(4, 0),
223    OPC_RSQ             = _OPC(4, 1),
224    OPC_LOG2            = _OPC(4, 2),
225    OPC_EXP2            = _OPC(4, 3),
226    OPC_SIN             = _OPC(4, 4),
227    OPC_COS             = _OPC(4, 5),
228    OPC_SQRT            = _OPC(4, 6),
229    /* NOTE that these are 8+opc from their highp equivs, so it's possible
230     * that the high order bit in the opc field has been repurposed for
231     * half-precision use?  But note that other ops (rcp/lsin/cos/sqrt)
232     * still use the same opc as highp
233     */
234    OPC_HRSQ            = _OPC(4, 9),
235    OPC_HLOG2           = _OPC(4, 10),
236    OPC_HEXP2           = _OPC(4, 11),
237 
238    /* category 5: */
239    OPC_ISAM            = _OPC(5, 0),
240    OPC_ISAML           = _OPC(5, 1),
241    OPC_ISAMM           = _OPC(5, 2),
242    OPC_SAM             = _OPC(5, 3),
243    OPC_SAMB            = _OPC(5, 4),
244    OPC_SAML            = _OPC(5, 5),
245    OPC_SAMGQ           = _OPC(5, 6),
246    OPC_GETLOD          = _OPC(5, 7),
247    OPC_CONV            = _OPC(5, 8),
248    OPC_CONVM           = _OPC(5, 9),
249    OPC_GETSIZE         = _OPC(5, 10),
250    OPC_GETBUF          = _OPC(5, 11),
251    OPC_GETPOS          = _OPC(5, 12),
252    OPC_GETINFO         = _OPC(5, 13),
253    OPC_DSX             = _OPC(5, 14),
254    OPC_DSY             = _OPC(5, 15),
255    OPC_GATHER4R        = _OPC(5, 16),
256    OPC_GATHER4G        = _OPC(5, 17),
257    OPC_GATHER4B        = _OPC(5, 18),
258    OPC_GATHER4A        = _OPC(5, 19),
259    OPC_SAMGP0          = _OPC(5, 20),
260    OPC_SAMGP1          = _OPC(5, 21),
261    OPC_SAMGP2          = _OPC(5, 22),
262    OPC_SAMGP3          = _OPC(5, 23),
263    OPC_DSXPP_1         = _OPC(5, 24),
264    OPC_DSYPP_1         = _OPC(5, 25),
265    OPC_RGETPOS         = _OPC(5, 26),
266    OPC_RGETINFO        = _OPC(5, 27),
267    OPC_BRCST_ACTIVE    = _OPC(5, 28),
268    OPC_QUAD_SHUFFLE_BRCST  = _OPC(5, 29),
269    OPC_QUAD_SHUFFLE_HORIZ  = _OPC(5, 30),
270    OPC_QUAD_SHUFFLE_VERT   = _OPC(5, 31),
271    OPC_QUAD_SHUFFLE_DIAG   = _OPC(5, 32),
272    OPC_TCINV               = _OPC(5, 33),
273    /* cat5 meta instructions, placed above the cat5 opc field's size */
274    OPC_DSXPP_MACRO     = _OPC(5, 35),
275    OPC_DSYPP_MACRO     = _OPC(5, 36),
276 
277    /* category 6: */
278    OPC_LDG             = _OPC(6, 0),        /* load-global */
279    OPC_LDL             = _OPC(6, 1),
280    OPC_LDP             = _OPC(6, 2),
281    OPC_STG             = _OPC(6, 3),        /* store-global */
282    OPC_STL             = _OPC(6, 4),
283    OPC_STP             = _OPC(6, 5),
284    OPC_LDIB            = _OPC(6, 6),
285    OPC_G2L             = _OPC(6, 7),
286    OPC_L2G             = _OPC(6, 8),
287    OPC_PREFETCH        = _OPC(6, 9),
288    OPC_LDLW            = _OPC(6, 10),
289    OPC_STLW            = _OPC(6, 11),
290    OPC_RESFMT          = _OPC(6, 14),
291    OPC_RESINFO         = _OPC(6, 15),
292    OPC_ATOMIC_ADD      = _OPC(6, 16),
293    OPC_ATOMIC_SUB      = _OPC(6, 17),
294    OPC_ATOMIC_XCHG     = _OPC(6, 18),
295    OPC_ATOMIC_INC      = _OPC(6, 19),
296    OPC_ATOMIC_DEC      = _OPC(6, 20),
297    OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
298    OPC_ATOMIC_MIN      = _OPC(6, 22),
299    OPC_ATOMIC_MAX      = _OPC(6, 23),
300    OPC_ATOMIC_AND      = _OPC(6, 24),
301    OPC_ATOMIC_OR       = _OPC(6, 25),
302    OPC_ATOMIC_XOR      = _OPC(6, 26),
303    OPC_LDGB            = _OPC(6, 27),
304    OPC_STGB            = _OPC(6, 28),
305    OPC_STIB            = _OPC(6, 29),
306    OPC_LDC             = _OPC(6, 30),
307    OPC_LDLV            = _OPC(6, 31),
308    OPC_PIPR            = _OPC(6, 32), /* ??? */
309    OPC_PIPC            = _OPC(6, 33), /* ??? */
310    OPC_EMIT2           = _OPC(6, 34), /* ??? */
311    OPC_ENDLS           = _OPC(6, 35), /* ??? */
312    OPC_GETSPID         = _OPC(6, 36), /* SP ID */
313    OPC_GETWID          = _OPC(6, 37), /* wavefront ID */
314    OPC_GETFIBERID      = _OPC(6, 38), /* fiber ID */
315 
316    /* Logical opcodes for things that differ in a6xx+ */
317    OPC_STC             = _OPC(6, 40),
318    OPC_RESINFO_B       = _OPC(6, 41),
319    OPC_LDIB_B          = _OPC(6, 42),
320    OPC_STIB_B          = _OPC(6, 43),
321 
322    /* Logical opcodes for different atomic instruction variations: */
323    OPC_ATOMIC_B_ADD      = _OPC(6, 44),
324    OPC_ATOMIC_B_SUB      = _OPC(6, 45),
325    OPC_ATOMIC_B_XCHG     = _OPC(6, 46),
326    OPC_ATOMIC_B_INC      = _OPC(6, 47),
327    OPC_ATOMIC_B_DEC      = _OPC(6, 48),
328    OPC_ATOMIC_B_CMPXCHG  = _OPC(6, 49),
329    OPC_ATOMIC_B_MIN      = _OPC(6, 50),
330    OPC_ATOMIC_B_MAX      = _OPC(6, 51),
331    OPC_ATOMIC_B_AND      = _OPC(6, 52),
332    OPC_ATOMIC_B_OR       = _OPC(6, 53),
333    OPC_ATOMIC_B_XOR      = _OPC(6, 54),
334 
335    OPC_ATOMIC_S_ADD      = _OPC(6, 55),
336    OPC_ATOMIC_S_SUB      = _OPC(6, 56),
337    OPC_ATOMIC_S_XCHG     = _OPC(6, 57),
338    OPC_ATOMIC_S_INC      = _OPC(6, 58),
339    OPC_ATOMIC_S_DEC      = _OPC(6, 59),
340    OPC_ATOMIC_S_CMPXCHG  = _OPC(6, 60),
341    OPC_ATOMIC_S_MIN      = _OPC(6, 61),
342    OPC_ATOMIC_S_MAX      = _OPC(6, 62),
343    OPC_ATOMIC_S_AND      = _OPC(6, 63),
344    OPC_ATOMIC_S_OR       = _OPC(6, 64),
345    OPC_ATOMIC_S_XOR      = _OPC(6, 65),
346 
347    OPC_ATOMIC_G_ADD      = _OPC(6, 66),
348    OPC_ATOMIC_G_SUB      = _OPC(6, 67),
349    OPC_ATOMIC_G_XCHG     = _OPC(6, 68),
350    OPC_ATOMIC_G_INC      = _OPC(6, 69),
351    OPC_ATOMIC_G_DEC      = _OPC(6, 70),
352    OPC_ATOMIC_G_CMPXCHG  = _OPC(6, 71),
353    OPC_ATOMIC_G_MIN      = _OPC(6, 72),
354    OPC_ATOMIC_G_MAX      = _OPC(6, 73),
355    OPC_ATOMIC_G_AND      = _OPC(6, 74),
356    OPC_ATOMIC_G_OR       = _OPC(6, 75),
357    OPC_ATOMIC_G_XOR      = _OPC(6, 76),
358 
359    OPC_LDG_A           = _OPC(6, 77),
360    OPC_STG_A           = _OPC(6, 78),
361 
362    OPC_SPILL_MACRO     = _OPC(6, 79),
363    OPC_RELOAD_MACRO    = _OPC(6, 80),
364 
365    OPC_LDC_K           = _OPC(6, 81),
366    OPC_STSC            = _OPC(6, 82),
367    OPC_LDG_K           = _OPC(6, 83),
368 
369    /* category 7: */
370    OPC_BAR             = _OPC(7, 0),
371    OPC_FENCE           = _OPC(7, 1),
372    OPC_SLEEP           = _OPC(7, 2),
373    OPC_ICINV           = _OPC(7, 3),
374    OPC_DCCLN           = _OPC(7, 4),
375    OPC_DCINV           = _OPC(7, 5),
376    OPC_DCFLU           = _OPC(7, 6),
377 
378    OPC_LOCK            = _OPC(7, 7),
379    OPC_UNLOCK          = _OPC(7, 8),
380 
381    OPC_ALIAS           = _OPC(7, 9),
382 
383    OPC_CCINV           = _OPC(7, 10),
384 
385    /* meta instructions (category 8): */
386 #define OPC_META 8
387    /* placeholder instr to mark shader inputs: */
388    OPC_META_INPUT      = _OPC(OPC_META, 0),
389    /* The "collect" and "split" instructions are used for keeping
390     * track of instructions that write to multiple dst registers
391     * (split) like texture sample instructions, or read multiple
392     * consecutive scalar registers (collect) (bary.f, texture samp)
393     *
394     * A "split" extracts a scalar component from a vecN, and a
395     * "collect" gathers multiple scalar components into a vecN
396     */
397    OPC_META_SPLIT      = _OPC(OPC_META, 2),
398    OPC_META_COLLECT    = _OPC(OPC_META, 3),
399 
400    /* placeholder for texture fetches that run before FS invocation
401     * starts:
402     */
403    OPC_META_TEX_PREFETCH = _OPC(OPC_META, 4),
404 
405    /* Parallel copies have multiple destinations, and copy each destination
406     * to its corresponding source. This happens "in parallel," meaning that
407     * it happens as-if every source is read first and then every destination
408     * is stored. These are produced in RA when register shuffling is
409     * required, and then lowered away immediately afterwards.
410     */
411    OPC_META_PARALLEL_COPY = _OPC(OPC_META, 5),
412    OPC_META_PHI = _OPC(OPC_META, 6),
413    /*
414     * A manually encoded opcode
415     */
416    OPC_META_RAW = _OPC(OPC_META, 7),
417 } opc_t;
418 /* clang-format on */
419 
420 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
421 #define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
422 
423 const char *disasm_a3xx_instr_name(opc_t opc);
424 
425 typedef enum {
426    TYPE_F16 = 0,
427    TYPE_F32 = 1,
428    TYPE_U16 = 2,
429    TYPE_U32 = 3,
430    TYPE_S16 = 4,
431    TYPE_S32 = 5,
432    TYPE_U8 = 6,
433    TYPE_S8 = 7, // XXX I assume?
434 } type_t;
435 
436 static inline uint32_t
type_size(type_t type)437 type_size(type_t type)
438 {
439    switch (type) {
440    case TYPE_F32:
441    case TYPE_U32:
442    case TYPE_S32:
443       return 32;
444    case TYPE_F16:
445    case TYPE_U16:
446    case TYPE_S16:
447       return 16;
448    case TYPE_U8:
449    case TYPE_S8:
450       return 8;
451    default:
452       ir3_assert(0); /* invalid type */
453       return 0;
454    }
455 }
456 
457 static inline type_t
type_uint_size(unsigned bit_size)458 type_uint_size(unsigned bit_size)
459 {
460    switch (bit_size) {
461    case 8:  return TYPE_U8;
462    case 1:  /* 1b bools are treated as normal half-regs */
463    case 16: return TYPE_U16;
464    case 32: return TYPE_U32;
465    default:
466       ir3_assert(0); /* invalid size */
467       return (type_t)0;
468    }
469 }
470 
471 static inline type_t
type_float_size(unsigned bit_size)472 type_float_size(unsigned bit_size)
473 {
474    switch (bit_size) {
475    case 16: return TYPE_F16;
476    case 32: return TYPE_F32;
477    default:
478       ir3_assert(0); /* invalid size */
479       return (type_t)0;
480    }
481 }
482 
483 static inline int
type_float(type_t type)484 type_float(type_t type)
485 {
486    return (type == TYPE_F32) || (type == TYPE_F16);
487 }
488 
489 static inline int
type_uint(type_t type)490 type_uint(type_t type)
491 {
492    return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
493 }
494 
495 static inline int
type_sint(type_t type)496 type_sint(type_t type)
497 {
498    return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
499 }
500 
501 typedef enum {
502    ROUND_ZERO = 0,
503    ROUND_EVEN = 1,
504    ROUND_POS_INF = 2,
505    ROUND_NEG_INF = 3,
506 } round_t;
507 
508 /* comp:
509  *   0 - x
510  *   1 - y
511  *   2 - z
512  *   3 - w
513  */
514 static inline uint32_t
regid(int num,int comp)515 regid(int num, int comp)
516 {
517    return (num << 2) | (comp & 0x3);
518 }
519 
520 #define INVALID_REG     regid(63, 0)
521 #define VALIDREG(r)     ((r) != INVALID_REG)
522 #define CONDREG(r, val) COND(VALIDREG(r), (val))
523 
524 /* special registers: */
525 #define REG_A0 61 /* address register */
526 #define REG_P0 62 /* predicate register */
527 
528 typedef enum {
529    BRANCH_PLAIN = 0, /* br */
530    BRANCH_OR = 1,    /* brao */
531    BRANCH_AND = 2,   /* braa */
532    BRANCH_CONST = 3, /* brac */
533    BRANCH_ANY = 4,   /* bany */
534    BRANCH_ALL = 5,   /* ball */
535    BRANCH_X = 6,     /* brax ??? */
536 } brtype_t;
537 
538 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
539  * if so, how to get the (base, index) pair for both sampler and texture.
540  * There is a single base embedded in the instruction, which is always used
541  * for the texture.
542  */
543 typedef enum {
544    /* Use traditional GL binding model, get texture and sampler index from src3
545     * which is presumed to be uniform on a4xx+ (a3xx doesn't have the other
546     * modes, but does handle non-uniform indexing).
547     */
548    CAT5_UNIFORM = 0,
549 
550    /* The sampler base comes from the low 3 bits of a1.x, and the sampler
551     * and texture index come from src3 which is presumed to be uniform.
552     */
553    CAT5_BINDLESS_A1_UNIFORM = 1,
554 
555    /* The texture and sampler share the same base, and the sampler and
556     * texture index come from src3 which is *not* presumed to be uniform.
557     */
558    CAT5_BINDLESS_NONUNIFORM = 2,
559 
560    /* The sampler base comes from the low 3 bits of a1.x, and the sampler
561     * and texture index come from src3 which is *not* presumed to be
562     * uniform.
563     */
564    CAT5_BINDLESS_A1_NONUNIFORM = 3,
565 
566    /* Use traditional GL binding model, get texture and sampler index
567     * from src3 which is *not* presumed to be uniform.
568     */
569    CAT5_NONUNIFORM = 4,
570 
571    /* The texture and sampler share the same base, and the sampler and
572     * texture index come from src3 which is presumed to be uniform.
573     */
574    CAT5_BINDLESS_UNIFORM = 5,
575 
576    /* The texture and sampler share the same base, get sampler index from low
577     * 4 bits of src3 and texture index from high 4 bits.
578     */
579    CAT5_BINDLESS_IMM = 6,
580 
581    /* The sampler base comes from the low 3 bits of a1.x, and the texture
582     * index comes from the next 8 bits of a1.x. The sampler index is an
583     * immediate in src3.
584     */
585    CAT5_BINDLESS_A1_IMM = 7,
586 } cat5_desc_mode_t;
587 
588 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
589  */
590 typedef enum {
591    /* Use old GL binding model with an immediate index. */
592    CAT6_IMM = 0,
593 
594    CAT6_UNIFORM = 1,
595 
596    CAT6_NONUNIFORM = 2,
597 
598    /* Use the bindless model, with an immediate index.
599     */
600    CAT6_BINDLESS_IMM = 4,
601 
602    /* Use the bindless model, with a uniform register index.
603     */
604    CAT6_BINDLESS_UNIFORM = 5,
605 
606    /* Use the bindless model, with a register index that isn't guaranteed
607     * to be uniform. This presumably checks if the indices are equal and
608     * splits up the load/store, because it works the way you would
609     * expect.
610     */
611    CAT6_BINDLESS_NONUNIFORM = 6,
612 } cat6_desc_mode_t;
613 
614 static inline bool
is_sat_compatible(opc_t opc)615 is_sat_compatible(opc_t opc)
616 {
617    /* On a6xx saturation doesn't work on cat4 */
618    if (opc_cat(opc) != 2 && opc_cat(opc) != 3)
619       return false;
620 
621    switch (opc) {
622    /* On a3xx and a6xx saturation doesn't work on bary.f */
623    case OPC_BARY_F:
624    /* On a6xx saturation doesn't work on sel.* */
625    case OPC_SEL_B16:
626    case OPC_SEL_B32:
627    case OPC_SEL_S16:
628    case OPC_SEL_S32:
629    case OPC_SEL_F16:
630    case OPC_SEL_F32:
631       return false;
632    default:
633       return true;
634    }
635 }
636 
637 static inline bool
is_mad(opc_t opc)638 is_mad(opc_t opc)
639 {
640    switch (opc) {
641    case OPC_MAD_U16:
642    case OPC_MAD_S16:
643    case OPC_MAD_U24:
644    case OPC_MAD_S24:
645    case OPC_MAD_F16:
646    case OPC_MAD_F32:
647       return true;
648    default:
649       return false;
650    }
651 }
652 
653 static inline bool
is_madsh(opc_t opc)654 is_madsh(opc_t opc)
655 {
656    switch (opc) {
657    case OPC_MADSH_U16:
658    case OPC_MADSH_M16:
659       return true;
660    default:
661       return false;
662    }
663 }
664 
665 static inline bool
is_local_atomic(opc_t opc)666 is_local_atomic(opc_t opc)
667 {
668    switch (opc) {
669    case OPC_ATOMIC_ADD:
670    case OPC_ATOMIC_SUB:
671    case OPC_ATOMIC_XCHG:
672    case OPC_ATOMIC_INC:
673    case OPC_ATOMIC_DEC:
674    case OPC_ATOMIC_CMPXCHG:
675    case OPC_ATOMIC_MIN:
676    case OPC_ATOMIC_MAX:
677    case OPC_ATOMIC_AND:
678    case OPC_ATOMIC_OR:
679    case OPC_ATOMIC_XOR:
680       return true;
681    default:
682       return false;
683    }
684 }
685 
686 static inline bool
is_global_a3xx_atomic(opc_t opc)687 is_global_a3xx_atomic(opc_t opc)
688 {
689    switch (opc) {
690    case OPC_ATOMIC_S_ADD:
691    case OPC_ATOMIC_S_SUB:
692    case OPC_ATOMIC_S_XCHG:
693    case OPC_ATOMIC_S_INC:
694    case OPC_ATOMIC_S_DEC:
695    case OPC_ATOMIC_S_CMPXCHG:
696    case OPC_ATOMIC_S_MIN:
697    case OPC_ATOMIC_S_MAX:
698    case OPC_ATOMIC_S_AND:
699    case OPC_ATOMIC_S_OR:
700    case OPC_ATOMIC_S_XOR:
701       return true;
702    default:
703       return false;
704    }
705 }
706 
707 static inline bool
is_global_a6xx_atomic(opc_t opc)708 is_global_a6xx_atomic(opc_t opc)
709 {
710    switch (opc) {
711    case OPC_ATOMIC_G_ADD:
712    case OPC_ATOMIC_G_SUB:
713    case OPC_ATOMIC_G_XCHG:
714    case OPC_ATOMIC_G_INC:
715    case OPC_ATOMIC_G_DEC:
716    case OPC_ATOMIC_G_CMPXCHG:
717    case OPC_ATOMIC_G_MIN:
718    case OPC_ATOMIC_G_MAX:
719    case OPC_ATOMIC_G_AND:
720    case OPC_ATOMIC_G_OR:
721    case OPC_ATOMIC_G_XOR:
722       return true;
723    default:
724       return false;
725    }
726 }
727 
728 static inline bool
is_bindless_atomic(opc_t opc)729 is_bindless_atomic(opc_t opc)
730 {
731    switch (opc) {
732    case OPC_ATOMIC_B_ADD:
733    case OPC_ATOMIC_B_SUB:
734    case OPC_ATOMIC_B_XCHG:
735    case OPC_ATOMIC_B_INC:
736    case OPC_ATOMIC_B_DEC:
737    case OPC_ATOMIC_B_CMPXCHG:
738    case OPC_ATOMIC_B_MIN:
739    case OPC_ATOMIC_B_MAX:
740    case OPC_ATOMIC_B_AND:
741    case OPC_ATOMIC_B_OR:
742    case OPC_ATOMIC_B_XOR:
743       return true;
744    default:
745       return false;
746    }
747 }
748 
749 static inline bool
is_atomic(opc_t opc)750 is_atomic(opc_t opc)
751 {
752    return is_local_atomic(opc) || is_global_a3xx_atomic(opc) ||
753           is_global_a6xx_atomic(opc) || is_bindless_atomic(opc);
754 }
755 
756 static inline bool
is_ssbo(opc_t opc)757 is_ssbo(opc_t opc)
758 {
759    switch (opc) {
760    case OPC_RESFMT:
761    case OPC_RESINFO:
762    case OPC_LDGB:
763    case OPC_STGB:
764    case OPC_STIB:
765       return true;
766    default:
767       return false;
768    }
769 }
770 
771 static inline bool
is_isam(opc_t opc)772 is_isam(opc_t opc)
773 {
774    switch (opc) {
775    case OPC_ISAM:
776    case OPC_ISAML:
777    case OPC_ISAMM:
778       return true;
779    default:
780       return false;
781    }
782 }
783 
784 static inline bool
is_cat2_float(opc_t opc)785 is_cat2_float(opc_t opc)
786 {
787    switch (opc) {
788    case OPC_ADD_F:
789    case OPC_MIN_F:
790    case OPC_MAX_F:
791    case OPC_MUL_F:
792    case OPC_SIGN_F:
793    case OPC_CMPS_F:
794    case OPC_ABSNEG_F:
795    case OPC_CMPV_F:
796    case OPC_FLOOR_F:
797    case OPC_CEIL_F:
798    case OPC_RNDNE_F:
799    case OPC_RNDAZ_F:
800    case OPC_TRUNC_F:
801       return true;
802 
803    default:
804       return false;
805    }
806 }
807 
808 static inline bool
is_cat3_float(opc_t opc)809 is_cat3_float(opc_t opc)
810 {
811    switch (opc) {
812    case OPC_MAD_F16:
813    case OPC_MAD_F32:
814    case OPC_SEL_F16:
815    case OPC_SEL_F32:
816       return true;
817    default:
818       return false;
819    }
820 }
821 
822 #endif /* INSTR_A3XX_H_ */
823