1 /*
2 * Copyright © 2013 Rob Clark <robdclark@gmail.com>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #ifndef INSTR_A3XX_H_
7 #define INSTR_A3XX_H_
8
9 #define PACKED __attribute__((__packed__))
10
11 #include <assert.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <stdio.h>
15
16 /* clang-format off */
17 void ir3_assert_handler(const char *expr, const char *file, int line,
18 const char *func) __attribute__((weak)) __attribute__((__noreturn__));
19 /* clang-format on */
20
21 /* A wrapper for assert() that allows overriding handling of a failed
22 * assert. This is needed for tools like crashdec which can want to
23 * attempt to disassemble memory that might not actually be valid
24 * instructions.
25 */
26 #define ir3_assert(expr) \
27 do { \
28 if (!(expr)) { \
29 if (ir3_assert_handler) { \
30 ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
31 } \
32 assert(expr); \
33 } \
34 } while (0)
35 /* size of largest OPC field of all the instruction categories: */
36 #define NOPC_BITS 7
37
38 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
39
40 /* clang-format off */
41 typedef enum {
42 /* category 0: */
43 OPC_NOP = _OPC(0, 0),
44 OPC_JUMP = _OPC(0, 2),
45 OPC_CALL = _OPC(0, 3),
46 OPC_RET = _OPC(0, 4),
47 OPC_KILL = _OPC(0, 5),
48 OPC_END = _OPC(0, 6),
49 OPC_EMIT = _OPC(0, 7),
50 OPC_CUT = _OPC(0, 8),
51 OPC_CHMASK = _OPC(0, 9),
52 OPC_CHSH = _OPC(0, 10),
53 OPC_FLOW_REV = _OPC(0, 11),
54
55 OPC_BKT = _OPC(0, 16),
56 OPC_STKS = _OPC(0, 17),
57 OPC_STKR = _OPC(0, 18),
58 OPC_XSET = _OPC(0, 19),
59 OPC_XCLR = _OPC(0, 20),
60 OPC_GETONE = _OPC(0, 21),
61 OPC_DBG = _OPC(0, 22),
62 OPC_SHPS = _OPC(0, 23), /* shader prologue start */
63 OPC_SHPE = _OPC(0, 24), /* shader prologue end */
64 OPC_GETLAST = _OPC(0, 25),
65
66 OPC_PREDT = _OPC(0, 29), /* predicated true */
67 OPC_PREDF = _OPC(0, 30), /* predicated false */
68 OPC_PREDE = _OPC(0, 31), /* predicated end */
69
70 /* Logical opcodes for different branch instruction variations: */
71 OPC_BR = _OPC(0, 40),
72 OPC_BRAO = _OPC(0, 41),
73 OPC_BRAA = _OPC(0, 42),
74 OPC_BRAC = _OPC(0, 43),
75 OPC_BANY = _OPC(0, 44),
76 OPC_BALL = _OPC(0, 45),
77 OPC_BRAX = _OPC(0, 46),
78
79 /* Logical opcode to distinguish kill and demote */
80 OPC_DEMOTE = _OPC(0, 47),
81
82 /* category 1: */
83 OPC_MOV = _OPC(1, 0),
84 OPC_MOVP = _OPC(1, 1),
85 /* swz, gat, sct */
86 OPC_MOVMSK = _OPC(1, 3),
87
88 /* Virtual opcodes for instructions differentiated via a "sub-opcode" that
89 * replaces the repeat field:
90 */
91 OPC_SWZ = _OPC(1, 4),
92 OPC_GAT = _OPC(1, 5),
93 OPC_SCT = _OPC(1, 6),
94
95 /* Logical opcodes for different variants of mov: */
96 OPC_MOV_IMMED = _OPC(1, 40),
97 OPC_MOV_CONST = _OPC(1, 41),
98 OPC_MOV_GPR = _OPC(1, 42),
99 OPC_MOV_RELGPR = _OPC(1, 43),
100 OPC_MOV_RELCONST = _OPC(1, 44),
101
102 /* Macros that expand to an if statement + move */
103 OPC_BALLOT_MACRO = _OPC(1, 50),
104 OPC_ANY_MACRO = _OPC(1, 51),
105 OPC_ALL_MACRO = _OPC(1, 52),
106 OPC_ELECT_MACRO = _OPC(1, 53),
107 OPC_READ_COND_MACRO = _OPC(1, 54),
108 OPC_READ_FIRST_MACRO = _OPC(1, 55),
109 OPC_SHPS_MACRO = _OPC(1, 56),
110 OPC_READ_GETLAST_MACRO = _OPC(1, 57),
111
112 /* Macros that expand to a loop */
113 OPC_SCAN_MACRO = _OPC(1, 58),
114 OPC_SCAN_CLUSTERS_MACRO = _OPC(1, 60),
115
116 /* category 2: */
117 OPC_ADD_F = _OPC(2, 0),
118 OPC_MIN_F = _OPC(2, 1),
119 OPC_MAX_F = _OPC(2, 2),
120 OPC_MUL_F = _OPC(2, 3),
121 OPC_SIGN_F = _OPC(2, 4),
122 OPC_CMPS_F = _OPC(2, 5),
123 OPC_ABSNEG_F = _OPC(2, 6),
124 OPC_CMPV_F = _OPC(2, 7),
125 /* 8 - invalid */
126 OPC_FLOOR_F = _OPC(2, 9),
127 OPC_CEIL_F = _OPC(2, 10),
128 OPC_RNDNE_F = _OPC(2, 11),
129 OPC_RNDAZ_F = _OPC(2, 12),
130 OPC_TRUNC_F = _OPC(2, 13),
131 /* 14-15 - invalid */
132 OPC_ADD_U = _OPC(2, 16),
133 OPC_ADD_S = _OPC(2, 17),
134 OPC_SUB_U = _OPC(2, 18),
135 OPC_SUB_S = _OPC(2, 19),
136 OPC_CMPS_U = _OPC(2, 20),
137 OPC_CMPS_S = _OPC(2, 21),
138 OPC_MIN_U = _OPC(2, 22),
139 OPC_MIN_S = _OPC(2, 23),
140 OPC_MAX_U = _OPC(2, 24),
141 OPC_MAX_S = _OPC(2, 25),
142 OPC_ABSNEG_S = _OPC(2, 26),
143 /* 27 - invalid */
144 OPC_AND_B = _OPC(2, 28),
145 OPC_OR_B = _OPC(2, 29),
146 OPC_NOT_B = _OPC(2, 30),
147 OPC_XOR_B = _OPC(2, 31),
148 /* 32 - invalid */
149 OPC_CMPV_U = _OPC(2, 33),
150 OPC_CMPV_S = _OPC(2, 34),
151 /* 35-47 - invalid */
152 OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
153 OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
154 OPC_MULL_U = _OPC(2, 50),
155 OPC_BFREV_B = _OPC(2, 51),
156 OPC_CLZ_S = _OPC(2, 52),
157 OPC_CLZ_B = _OPC(2, 53),
158 OPC_SHL_B = _OPC(2, 54),
159 OPC_SHR_B = _OPC(2, 55),
160 OPC_ASHR_B = _OPC(2, 56),
161 OPC_BARY_F = _OPC(2, 57),
162 OPC_MGEN_B = _OPC(2, 58),
163 OPC_GETBIT_B = _OPC(2, 59),
164 OPC_SETRM = _OPC(2, 60),
165 OPC_CBITS_B = _OPC(2, 61),
166 OPC_SHB = _OPC(2, 62),
167 OPC_MSAD = _OPC(2, 63),
168 OPC_FLAT_B = _OPC(2, 64),
169
170 /* category 3: */
171 OPC_MAD_U16 = _OPC(3, 0),
172 OPC_MADSH_U16 = _OPC(3, 1),
173 OPC_MAD_S16 = _OPC(3, 2),
174 OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
175 OPC_MAD_U24 = _OPC(3, 4),
176 OPC_MAD_S24 = _OPC(3, 5),
177 OPC_MAD_F16 = _OPC(3, 6),
178 OPC_MAD_F32 = _OPC(3, 7),
179 OPC_SEL_B16 = _OPC(3, 8),
180 OPC_SEL_B32 = _OPC(3, 9),
181 OPC_SEL_S16 = _OPC(3, 10),
182 OPC_SEL_S32 = _OPC(3, 11),
183 OPC_SEL_F16 = _OPC(3, 12),
184 OPC_SEL_F32 = _OPC(3, 13),
185 OPC_SAD_S16 = _OPC(3, 14),
186 OPC_SAD_S32 = _OPC(3, 15),
187 OPC_SHRM = _OPC(3, 16),
188 OPC_SHLM = _OPC(3, 17),
189 OPC_SHRG = _OPC(3, 18),
190 OPC_SHLG = _OPC(3, 19),
191 OPC_ANDG = _OPC(3, 20),
192 OPC_DP2ACC = _OPC(3, 21),
193 OPC_DP4ACC = _OPC(3, 22),
194 OPC_WMM = _OPC(3, 23),
195 OPC_WMM_ACCU = _OPC(3, 24),
196
197 /* category 4: */
198 OPC_RCP = _OPC(4, 0),
199 OPC_RSQ = _OPC(4, 1),
200 OPC_LOG2 = _OPC(4, 2),
201 OPC_EXP2 = _OPC(4, 3),
202 OPC_SIN = _OPC(4, 4),
203 OPC_COS = _OPC(4, 5),
204 OPC_SQRT = _OPC(4, 6),
205 /* NOTE that these are 8+opc from their highp equivs, so it's possible
206 * that the high order bit in the opc field has been repurposed for
207 * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
208 * still use the same opc as highp
209 */
210 OPC_HRSQ = _OPC(4, 9),
211 OPC_HLOG2 = _OPC(4, 10),
212 OPC_HEXP2 = _OPC(4, 11),
213
214 /* category 5: */
215 OPC_ISAM = _OPC(5, 0),
216 OPC_ISAML = _OPC(5, 1),
217 OPC_ISAMM = _OPC(5, 2),
218 OPC_SAM = _OPC(5, 3),
219 OPC_SAMB = _OPC(5, 4),
220 OPC_SAML = _OPC(5, 5),
221 OPC_SAMGQ = _OPC(5, 6),
222 OPC_GETLOD = _OPC(5, 7),
223 OPC_CONV = _OPC(5, 8),
224 OPC_CONVM = _OPC(5, 9),
225 OPC_GETSIZE = _OPC(5, 10),
226 OPC_GETBUF = _OPC(5, 11),
227 OPC_GETPOS = _OPC(5, 12),
228 OPC_GETINFO = _OPC(5, 13),
229 OPC_DSX = _OPC(5, 14),
230 OPC_DSY = _OPC(5, 15),
231 OPC_GATHER4R = _OPC(5, 16),
232 OPC_GATHER4G = _OPC(5, 17),
233 OPC_GATHER4B = _OPC(5, 18),
234 OPC_GATHER4A = _OPC(5, 19),
235 OPC_SAMGP0 = _OPC(5, 20),
236 OPC_SAMGP1 = _OPC(5, 21),
237 OPC_SAMGP2 = _OPC(5, 22),
238 OPC_SAMGP3 = _OPC(5, 23),
239 OPC_DSXPP_1 = _OPC(5, 24),
240 OPC_DSYPP_1 = _OPC(5, 25),
241 OPC_RGETPOS = _OPC(5, 26),
242 OPC_RGETINFO = _OPC(5, 27),
243 OPC_BRCST_ACTIVE = _OPC(5, 28),
244 OPC_QUAD_SHUFFLE_BRCST = _OPC(5, 29),
245 OPC_QUAD_SHUFFLE_HORIZ = _OPC(5, 30),
246 OPC_QUAD_SHUFFLE_VERT = _OPC(5, 31),
247 OPC_QUAD_SHUFFLE_DIAG = _OPC(5, 32),
248 OPC_TCINV = _OPC(5, 33),
249 /* cat5 meta instructions, placed above the cat5 opc field's size */
250 OPC_DSXPP_MACRO = _OPC(5, 35),
251 OPC_DSYPP_MACRO = _OPC(5, 36),
252
253 /* category 6: */
254 OPC_LDG = _OPC(6, 0), /* load-global */
255 OPC_LDL = _OPC(6, 1),
256 OPC_LDP = _OPC(6, 2),
257 OPC_STG = _OPC(6, 3), /* store-global */
258 OPC_STL = _OPC(6, 4),
259 OPC_STP = _OPC(6, 5),
260 OPC_LDIB = _OPC(6, 6),
261 OPC_G2L = _OPC(6, 7),
262 OPC_L2G = _OPC(6, 8),
263 OPC_PREFETCH = _OPC(6, 9),
264 OPC_LDLW = _OPC(6, 10),
265 OPC_STLW = _OPC(6, 11),
266 OPC_RESFMT = _OPC(6, 14),
267 OPC_RESINFO = _OPC(6, 15),
268 OPC_ATOMIC_ADD = _OPC(6, 16),
269 OPC_ATOMIC_SUB = _OPC(6, 17),
270 OPC_ATOMIC_XCHG = _OPC(6, 18),
271 OPC_ATOMIC_INC = _OPC(6, 19),
272 OPC_ATOMIC_DEC = _OPC(6, 20),
273 OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
274 OPC_ATOMIC_MIN = _OPC(6, 22),
275 OPC_ATOMIC_MAX = _OPC(6, 23),
276 OPC_ATOMIC_AND = _OPC(6, 24),
277 OPC_ATOMIC_OR = _OPC(6, 25),
278 OPC_ATOMIC_XOR = _OPC(6, 26),
279 OPC_LDGB = _OPC(6, 27),
280 OPC_STGB = _OPC(6, 28),
281 OPC_STIB = _OPC(6, 29),
282 OPC_LDC = _OPC(6, 30),
283 OPC_LDLV = _OPC(6, 31),
284 OPC_PIPR = _OPC(6, 32), /* ??? */
285 OPC_PIPC = _OPC(6, 33), /* ??? */
286 OPC_EMIT2 = _OPC(6, 34), /* ??? */
287 OPC_ENDLS = _OPC(6, 35), /* ??? */
288 OPC_GETSPID = _OPC(6, 36), /* SP ID */
289 OPC_GETWID = _OPC(6, 37), /* wavefront ID */
290 OPC_GETFIBERID = _OPC(6, 38), /* fiber ID */
291 OPC_SHFL = _OPC(6, 39),
292
293 /* Logical opcodes for things that differ in a6xx+ */
294 OPC_STC = _OPC(6, 40),
295 OPC_RESINFO_B = _OPC(6, 41),
296 OPC_LDIB_B = _OPC(6, 42),
297 OPC_STIB_B = _OPC(6, 43),
298
299 /* Logical opcodes for different atomic instruction variations: */
300 OPC_ATOMIC_B_ADD = _OPC(6, 44),
301 OPC_ATOMIC_B_SUB = _OPC(6, 45),
302 OPC_ATOMIC_B_XCHG = _OPC(6, 46),
303 OPC_ATOMIC_B_INC = _OPC(6, 47),
304 OPC_ATOMIC_B_DEC = _OPC(6, 48),
305 OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49),
306 OPC_ATOMIC_B_MIN = _OPC(6, 50),
307 OPC_ATOMIC_B_MAX = _OPC(6, 51),
308 OPC_ATOMIC_B_AND = _OPC(6, 52),
309 OPC_ATOMIC_B_OR = _OPC(6, 53),
310 OPC_ATOMIC_B_XOR = _OPC(6, 54),
311
312 OPC_ATOMIC_S_ADD = _OPC(6, 55),
313 OPC_ATOMIC_S_SUB = _OPC(6, 56),
314 OPC_ATOMIC_S_XCHG = _OPC(6, 57),
315 OPC_ATOMIC_S_INC = _OPC(6, 58),
316 OPC_ATOMIC_S_DEC = _OPC(6, 59),
317 OPC_ATOMIC_S_CMPXCHG = _OPC(6, 60),
318 OPC_ATOMIC_S_MIN = _OPC(6, 61),
319 OPC_ATOMIC_S_MAX = _OPC(6, 62),
320 OPC_ATOMIC_S_AND = _OPC(6, 63),
321 OPC_ATOMIC_S_OR = _OPC(6, 64),
322 OPC_ATOMIC_S_XOR = _OPC(6, 65),
323
324 OPC_ATOMIC_G_ADD = _OPC(6, 66),
325 OPC_ATOMIC_G_SUB = _OPC(6, 67),
326 OPC_ATOMIC_G_XCHG = _OPC(6, 68),
327 OPC_ATOMIC_G_INC = _OPC(6, 69),
328 OPC_ATOMIC_G_DEC = _OPC(6, 70),
329 OPC_ATOMIC_G_CMPXCHG = _OPC(6, 71),
330 OPC_ATOMIC_G_MIN = _OPC(6, 72),
331 OPC_ATOMIC_G_MAX = _OPC(6, 73),
332 OPC_ATOMIC_G_AND = _OPC(6, 74),
333 OPC_ATOMIC_G_OR = _OPC(6, 75),
334 OPC_ATOMIC_G_XOR = _OPC(6, 76),
335
336 OPC_LDG_A = _OPC(6, 77),
337 OPC_STG_A = _OPC(6, 78),
338
339 OPC_SPILL_MACRO = _OPC(6, 79),
340 OPC_RELOAD_MACRO = _OPC(6, 80),
341
342 OPC_LDC_K = _OPC(6, 81),
343 OPC_STSC = _OPC(6, 82),
344 OPC_LDG_K = _OPC(6, 83),
345
346 /* Macros that expand to an stsc at the start of the preamble.
347 * It loads into const file and should not be optimized in any way.
348 */
349 OPC_PUSH_CONSTS_LOAD_MACRO = _OPC(6, 84),
350
351 OPC_RAY_INTERSECTION = _OPC(6, 90),
352 OPC_RESBASE = _OPC(6, 91),
353
354 /* category 7: */
355 OPC_BAR = _OPC(7, 0),
356 OPC_FENCE = _OPC(7, 1),
357 OPC_SLEEP = _OPC(7, 2),
358 OPC_ICINV = _OPC(7, 3),
359 OPC_DCCLN = _OPC(7, 4),
360 OPC_DCINV = _OPC(7, 5),
361 OPC_DCFLU = _OPC(7, 6),
362
363 OPC_LOCK = _OPC(7, 7),
364 OPC_UNLOCK = _OPC(7, 8),
365
366 OPC_ALIAS = _OPC(7, 9),
367
368 OPC_CCINV = _OPC(7, 10),
369
370 /* meta instructions (category 8): */
371 #define OPC_META 8
372 /* placeholder instr to mark shader inputs: */
373 OPC_META_INPUT = _OPC(OPC_META, 0),
374 /* The "collect" and "split" instructions are used for keeping
375 * track of instructions that write to multiple dst registers
376 * (split) like texture sample instructions, or read multiple
377 * consecutive scalar registers (collect) (bary.f, texture samp)
378 *
379 * A "split" extracts a scalar component from a vecN, and a
380 * "collect" gathers multiple scalar components into a vecN
381 */
382 OPC_META_SPLIT = _OPC(OPC_META, 2),
383 OPC_META_COLLECT = _OPC(OPC_META, 3),
384
385 /* placeholder for texture fetches that run before FS invocation
386 * starts:
387 */
388 OPC_META_TEX_PREFETCH = _OPC(OPC_META, 4),
389
390 /* Parallel copies have multiple destinations, and copy each destination
391 * to its corresponding source. This happens "in parallel," meaning that
392 * it happens as-if every source is read first and then every destination
393 * is stored. These are produced in RA when register shuffling is
394 * required, and then lowered away immediately afterwards.
395 */
396 OPC_META_PARALLEL_COPY = _OPC(OPC_META, 5),
397 OPC_META_PHI = _OPC(OPC_META, 6),
398 /*
399 * A manually encoded opcode
400 */
401 OPC_META_RAW = _OPC(OPC_META, 7),
402 } opc_t;
403 /* clang-format on */
404
405 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
406 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
407
408 const char *disasm_a3xx_instr_name(opc_t opc);
409
410 typedef enum {
411 TYPE_F16 = 0,
412 TYPE_F32 = 1,
413 TYPE_U16 = 2,
414 TYPE_U32 = 3,
415 TYPE_S16 = 4,
416 TYPE_S32 = 5,
417 TYPE_ATOMIC_U64 = 6, /* Only valid for a7xx atomics */
418 TYPE_U8 = 6,
419 TYPE_U8_32 = 7,
420 } type_t;
421
422 static inline uint32_t
type_size(type_t type)423 type_size(type_t type)
424 {
425 switch (type) {
426 case TYPE_F32:
427 case TYPE_U32:
428 case TYPE_U8_32:
429 case TYPE_S32:
430 return 32;
431 case TYPE_F16:
432 case TYPE_U16:
433 case TYPE_S16:
434 return 16;
435 case TYPE_U8:
436 return 8;
437 default:
438 ir3_assert(0); /* invalid type */
439 return 0;
440 }
441 }
442
443 static inline type_t
type_uint_size(unsigned bit_size)444 type_uint_size(unsigned bit_size)
445 {
446 switch (bit_size) {
447 case 8: return TYPE_U8;
448 case 1: /* 1b bools are treated as normal half-regs */
449 case 16: return TYPE_U16;
450 case 32: return TYPE_U32;
451 case 64:
452 return TYPE_U32;
453 default:
454 ir3_assert(0); /* invalid size */
455 return (type_t)0;
456 }
457 }
458
459 static inline type_t
type_float_size(unsigned bit_size)460 type_float_size(unsigned bit_size)
461 {
462 switch (bit_size) {
463 case 16: return TYPE_F16;
464 case 32: return TYPE_F32;
465 default:
466 ir3_assert(0); /* invalid size */
467 return (type_t)0;
468 }
469 }
470
471 static inline int
type_float(type_t type)472 type_float(type_t type)
473 {
474 return (type == TYPE_F32) || (type == TYPE_F16);
475 }
476
477 static inline int
type_uint(type_t type)478 type_uint(type_t type)
479 {
480 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8) || (type == TYPE_U8_32);
481 }
482
483 static inline int
type_sint(type_t type)484 type_sint(type_t type)
485 {
486 return (type == TYPE_S32) || (type == TYPE_S16);
487 }
488
489 typedef enum {
490 ROUND_ZERO = 0,
491 ROUND_EVEN = 1,
492 ROUND_POS_INF = 2,
493 ROUND_NEG_INF = 3,
494 } round_t;
495
496 /* comp:
497 * 0 - x
498 * 1 - y
499 * 2 - z
500 * 3 - w
501 */
502 static inline uint32_t
regid(int num,int comp)503 regid(int num, int comp)
504 {
505 return (num << 2) | (comp & 0x3);
506 }
507
508 #define INVALID_REG regid(63, 0)
509 #define VALIDREG(r) ((r) != INVALID_REG)
510 #define CONDREG(r, val) COND(VALIDREG(r), (val))
511
512 /* special registers: */
513 #define REG_A0 61 /* address register */
514 #define REG_P0 62 /* predicate register */
515 #define REG_P0_X regid(REG_P0, 0) /* p0.x */
516
517 #define INVALID_CONST_REG UINT16_MAX
518
519 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
520 * if so, how to get the (base, index) pair for both sampler and texture.
521 * There is a single base embedded in the instruction, which is always used
522 * for the texture.
523 */
524 typedef enum {
525 /* Use traditional GL binding model, get texture and sampler index from src3
526 * which is presumed to be uniform on a4xx+ (a3xx doesn't have the other
527 * modes, but does handle non-uniform indexing).
528 */
529 CAT5_UNIFORM = 0,
530
531 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
532 * and texture index come from src3 which is presumed to be uniform.
533 */
534 CAT5_BINDLESS_A1_UNIFORM = 1,
535
536 /* The texture and sampler share the same base, and the sampler and
537 * texture index come from src3 which is *not* presumed to be uniform.
538 */
539 CAT5_BINDLESS_NONUNIFORM = 2,
540
541 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
542 * and texture index come from src3 which is *not* presumed to be
543 * uniform.
544 */
545 CAT5_BINDLESS_A1_NONUNIFORM = 3,
546
547 /* Use traditional GL binding model, get texture and sampler index
548 * from src3 which is *not* presumed to be uniform.
549 */
550 CAT5_NONUNIFORM = 4,
551
552 /* The texture and sampler share the same base, and the sampler and
553 * texture index come from src3 which is presumed to be uniform.
554 */
555 CAT5_BINDLESS_UNIFORM = 5,
556
557 /* The texture and sampler share the same base, get sampler index from low
558 * 4 bits of src3 and texture index from high 4 bits.
559 */
560 CAT5_BINDLESS_IMM = 6,
561
562 /* The sampler base comes from the low 3 bits of a1.x, and the texture
563 * index comes from the next 8 bits of a1.x. The sampler index is an
564 * immediate in src3.
565 */
566 CAT5_BINDLESS_A1_IMM = 7,
567 } cat5_desc_mode_t;
568
569 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
570 */
571 typedef enum {
572 /* Use old GL binding model with an immediate index. */
573 CAT6_IMM = 0,
574
575 CAT6_UNIFORM = 1,
576
577 CAT6_NONUNIFORM = 2,
578
579 /* Use the bindless model, with an immediate index.
580 */
581 CAT6_BINDLESS_IMM = 4,
582
583 /* Use the bindless model, with a uniform register index.
584 */
585 CAT6_BINDLESS_UNIFORM = 5,
586
587 /* Use the bindless model, with a register index that isn't guaranteed
588 * to be uniform. This presumably checks if the indices are equal and
589 * splits up the load/store, because it works the way you would
590 * expect.
591 */
592 CAT6_BINDLESS_NONUNIFORM = 6,
593 } cat6_desc_mode_t;
594
595 static inline bool
is_sat_compatible(opc_t opc)596 is_sat_compatible(opc_t opc)
597 {
598 /* On a6xx saturation doesn't work on cat4 */
599 if (opc_cat(opc) != 2 && opc_cat(opc) != 3)
600 return false;
601
602 switch (opc) {
603 /* On a3xx and a6xx saturation doesn't work on bary.f/flat.b */
604 case OPC_BARY_F:
605 case OPC_FLAT_B:
606 /* On a6xx saturation doesn't work on sel.* */
607 case OPC_SEL_B16:
608 case OPC_SEL_B32:
609 case OPC_SEL_S16:
610 case OPC_SEL_S32:
611 case OPC_SEL_F16:
612 case OPC_SEL_F32:
613 return false;
614 default:
615 return true;
616 }
617 }
618
619 static inline bool
is_mad(opc_t opc)620 is_mad(opc_t opc)
621 {
622 switch (opc) {
623 case OPC_MAD_U16:
624 case OPC_MAD_S16:
625 case OPC_MAD_U24:
626 case OPC_MAD_S24:
627 case OPC_MAD_F16:
628 case OPC_MAD_F32:
629 return true;
630 default:
631 return false;
632 }
633 }
634
635 static inline bool
is_madsh(opc_t opc)636 is_madsh(opc_t opc)
637 {
638 switch (opc) {
639 case OPC_MADSH_U16:
640 case OPC_MADSH_M16:
641 return true;
642 default:
643 return false;
644 }
645 }
646
647 static inline bool
is_sad(opc_t opc)648 is_sad(opc_t opc)
649 {
650 switch (opc) {
651 case OPC_SAD_S16:
652 case OPC_SAD_S32:
653 return true;
654 default:
655 return false;
656 }
657 }
658
659 static inline bool
is_local_atomic(opc_t opc)660 is_local_atomic(opc_t opc)
661 {
662 switch (opc) {
663 case OPC_ATOMIC_ADD:
664 case OPC_ATOMIC_SUB:
665 case OPC_ATOMIC_XCHG:
666 case OPC_ATOMIC_INC:
667 case OPC_ATOMIC_DEC:
668 case OPC_ATOMIC_CMPXCHG:
669 case OPC_ATOMIC_MIN:
670 case OPC_ATOMIC_MAX:
671 case OPC_ATOMIC_AND:
672 case OPC_ATOMIC_OR:
673 case OPC_ATOMIC_XOR:
674 return true;
675 default:
676 return false;
677 }
678 }
679
680 static inline bool
is_global_a3xx_atomic(opc_t opc)681 is_global_a3xx_atomic(opc_t opc)
682 {
683 switch (opc) {
684 case OPC_ATOMIC_S_ADD:
685 case OPC_ATOMIC_S_SUB:
686 case OPC_ATOMIC_S_XCHG:
687 case OPC_ATOMIC_S_INC:
688 case OPC_ATOMIC_S_DEC:
689 case OPC_ATOMIC_S_CMPXCHG:
690 case OPC_ATOMIC_S_MIN:
691 case OPC_ATOMIC_S_MAX:
692 case OPC_ATOMIC_S_AND:
693 case OPC_ATOMIC_S_OR:
694 case OPC_ATOMIC_S_XOR:
695 return true;
696 default:
697 return false;
698 }
699 }
700
701 static inline bool
is_global_a6xx_atomic(opc_t opc)702 is_global_a6xx_atomic(opc_t opc)
703 {
704 switch (opc) {
705 case OPC_ATOMIC_G_ADD:
706 case OPC_ATOMIC_G_SUB:
707 case OPC_ATOMIC_G_XCHG:
708 case OPC_ATOMIC_G_INC:
709 case OPC_ATOMIC_G_DEC:
710 case OPC_ATOMIC_G_CMPXCHG:
711 case OPC_ATOMIC_G_MIN:
712 case OPC_ATOMIC_G_MAX:
713 case OPC_ATOMIC_G_AND:
714 case OPC_ATOMIC_G_OR:
715 case OPC_ATOMIC_G_XOR:
716 return true;
717 default:
718 return false;
719 }
720 }
721
722 static inline bool
is_bindless_atomic(opc_t opc)723 is_bindless_atomic(opc_t opc)
724 {
725 switch (opc) {
726 case OPC_ATOMIC_B_ADD:
727 case OPC_ATOMIC_B_SUB:
728 case OPC_ATOMIC_B_XCHG:
729 case OPC_ATOMIC_B_INC:
730 case OPC_ATOMIC_B_DEC:
731 case OPC_ATOMIC_B_CMPXCHG:
732 case OPC_ATOMIC_B_MIN:
733 case OPC_ATOMIC_B_MAX:
734 case OPC_ATOMIC_B_AND:
735 case OPC_ATOMIC_B_OR:
736 case OPC_ATOMIC_B_XOR:
737 return true;
738 default:
739 return false;
740 }
741 }
742
743 static inline bool
is_atomic(opc_t opc)744 is_atomic(opc_t opc)
745 {
746 return is_local_atomic(opc) || is_global_a3xx_atomic(opc) ||
747 is_global_a6xx_atomic(opc) || is_bindless_atomic(opc);
748 }
749
750 static inline bool
is_ssbo(opc_t opc)751 is_ssbo(opc_t opc)
752 {
753 switch (opc) {
754 case OPC_RESFMT:
755 case OPC_RESINFO:
756 case OPC_LDGB:
757 case OPC_STGB:
758 case OPC_STIB:
759 return true;
760 default:
761 return false;
762 }
763 }
764
765 static inline bool
is_isam(opc_t opc)766 is_isam(opc_t opc)
767 {
768 switch (opc) {
769 case OPC_ISAM:
770 case OPC_ISAML:
771 case OPC_ISAMM:
772 return true;
773 default:
774 return false;
775 }
776 }
777
778 static inline bool
is_cat2_float(opc_t opc)779 is_cat2_float(opc_t opc)
780 {
781 switch (opc) {
782 case OPC_ADD_F:
783 case OPC_MIN_F:
784 case OPC_MAX_F:
785 case OPC_MUL_F:
786 case OPC_SIGN_F:
787 case OPC_CMPS_F:
788 case OPC_ABSNEG_F:
789 case OPC_CMPV_F:
790 case OPC_FLOOR_F:
791 case OPC_CEIL_F:
792 case OPC_RNDNE_F:
793 case OPC_RNDAZ_F:
794 case OPC_TRUNC_F:
795 return true;
796
797 default:
798 return false;
799 }
800 }
801
802 static inline bool
is_cat3_float(opc_t opc)803 is_cat3_float(opc_t opc)
804 {
805 switch (opc) {
806 case OPC_MAD_F16:
807 case OPC_MAD_F32:
808 case OPC_SEL_F16:
809 case OPC_SEL_F32:
810 return true;
811 default:
812 return false;
813 }
814 }
815
816 static inline bool
is_cat3_alt(opc_t opc)817 is_cat3_alt(opc_t opc)
818 {
819 switch (opc) {
820 case OPC_SHLM:
821 case OPC_SHRM:
822 case OPC_SHLG:
823 case OPC_SHRG:
824 case OPC_ANDG:
825 return true;
826 default:
827 return false;
828 }
829 }
830
831 #endif /* INSTR_A3XX_H_ */
832