1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26
27 #define PACKED __attribute__((__packed__))
28
29 #include <assert.h>
30 #include <stdbool.h>
31 #include <stdint.h>
32 #include <stdio.h>
33
34 /* clang-format off */
35 void ir3_assert_handler(const char *expr, const char *file, int line,
36 const char *func) __attribute__((weak)) __attribute__((__noreturn__));
37 /* clang-format on */
38
39 /* A wrapper for assert() that allows overriding handling of a failed
40 * assert. This is needed for tools like crashdec which can want to
41 * attempt to disassemble memory that might not actually be valid
42 * instructions.
43 */
44 #define ir3_assert(expr) \
45 do { \
46 if (!(expr)) { \
47 if (ir3_assert_handler) { \
48 ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
49 } \
50 assert(expr); \
51 } \
52 } while (0)
53 /* size of largest OPC field of all the instruction categories: */
54 #define NOPC_BITS 7
55
56 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
57
58 /* clang-format off */
59 typedef enum {
60 /* category 0: */
61 OPC_NOP = _OPC(0, 0),
62 OPC_B = _OPC(0, 1),
63 OPC_JUMP = _OPC(0, 2),
64 OPC_CALL = _OPC(0, 3),
65 OPC_RET = _OPC(0, 4),
66 OPC_KILL = _OPC(0, 5),
67 OPC_END = _OPC(0, 6),
68 OPC_EMIT = _OPC(0, 7),
69 OPC_CUT = _OPC(0, 8),
70 OPC_CHMASK = _OPC(0, 9),
71 OPC_CHSH = _OPC(0, 10),
72 OPC_FLOW_REV = _OPC(0, 11),
73
74 OPC_BKT = _OPC(0, 16),
75 OPC_STKS = _OPC(0, 17),
76 OPC_STKR = _OPC(0, 18),
77 OPC_XSET = _OPC(0, 19),
78 OPC_XCLR = _OPC(0, 20),
79 OPC_GETONE = _OPC(0, 21),
80 OPC_DBG = _OPC(0, 22),
81 OPC_SHPS = _OPC(0, 23), /* shader prologue start */
82 OPC_SHPE = _OPC(0, 24), /* shader prologue end */
83 OPC_GETLAST = _OPC(0, 25),
84
85 OPC_PREDT = _OPC(0, 29), /* predicated true */
86 OPC_PREDF = _OPC(0, 30), /* predicated false */
87 OPC_PREDE = _OPC(0, 31), /* predicated end */
88
89 /* Logical opcodes for different branch instruction variations: */
90 OPC_BR = _OPC(0, 40),
91 OPC_BRAO = _OPC(0, 41),
92 OPC_BRAA = _OPC(0, 42),
93 OPC_BRAC = _OPC(0, 43),
94 OPC_BANY = _OPC(0, 44),
95 OPC_BALL = _OPC(0, 45),
96 OPC_BRAX = _OPC(0, 46),
97
98 /* Logical opcode to distinguish kill and demote */
99 OPC_DEMOTE = _OPC(0, 47),
100
101 /* category 1: */
102 OPC_MOV = _OPC(1, 0),
103 OPC_MOVP = _OPC(1, 1),
104 /* swz, gat, sct */
105 OPC_MOVMSK = _OPC(1, 3),
106
107 /* Virtual opcodes for instructions differentiated via a "sub-opcode" that
108 * replaces the repeat field:
109 */
110 OPC_SWZ = _OPC(1, 4),
111 OPC_GAT = _OPC(1, 5),
112 OPC_SCT = _OPC(1, 6),
113
114 /* Logical opcodes for different variants of mov: */
115 OPC_MOV_IMMED = _OPC(1, 40),
116 OPC_MOV_CONST = _OPC(1, 41),
117 OPC_MOV_GPR = _OPC(1, 42),
118 OPC_MOV_RELGPR = _OPC(1, 43),
119 OPC_MOV_RELCONST = _OPC(1, 44),
120
121 /* Macros that expand to an if statement + move */
122 OPC_BALLOT_MACRO = _OPC(1, 50),
123 OPC_ANY_MACRO = _OPC(1, 51),
124 OPC_ALL_MACRO = _OPC(1, 52),
125 OPC_ELECT_MACRO = _OPC(1, 53),
126 OPC_READ_COND_MACRO = _OPC(1, 54),
127 OPC_READ_FIRST_MACRO = _OPC(1, 55),
128 OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
129 OPC_SHPS_MACRO = _OPC(1, 57),
130
131 /* Macros that expand to a loop */
132 OPC_SCAN_MACRO = _OPC(1, 58),
133 OPC_SCAN_CLUSTERS_MACRO = _OPC(1, 60),
134
135 /* Macros that expand to an stsc at the start of the preamble.
136 * It loads into const file and should not be optimized in any way.
137 */
138 OPC_PUSH_CONSTS_LOAD_MACRO = _OPC(1, 59),
139
140 /* category 2: */
141 OPC_ADD_F = _OPC(2, 0),
142 OPC_MIN_F = _OPC(2, 1),
143 OPC_MAX_F = _OPC(2, 2),
144 OPC_MUL_F = _OPC(2, 3),
145 OPC_SIGN_F = _OPC(2, 4),
146 OPC_CMPS_F = _OPC(2, 5),
147 OPC_ABSNEG_F = _OPC(2, 6),
148 OPC_CMPV_F = _OPC(2, 7),
149 /* 8 - invalid */
150 OPC_FLOOR_F = _OPC(2, 9),
151 OPC_CEIL_F = _OPC(2, 10),
152 OPC_RNDNE_F = _OPC(2, 11),
153 OPC_RNDAZ_F = _OPC(2, 12),
154 OPC_TRUNC_F = _OPC(2, 13),
155 /* 14-15 - invalid */
156 OPC_ADD_U = _OPC(2, 16),
157 OPC_ADD_S = _OPC(2, 17),
158 OPC_SUB_U = _OPC(2, 18),
159 OPC_SUB_S = _OPC(2, 19),
160 OPC_CMPS_U = _OPC(2, 20),
161 OPC_CMPS_S = _OPC(2, 21),
162 OPC_MIN_U = _OPC(2, 22),
163 OPC_MIN_S = _OPC(2, 23),
164 OPC_MAX_U = _OPC(2, 24),
165 OPC_MAX_S = _OPC(2, 25),
166 OPC_ABSNEG_S = _OPC(2, 26),
167 /* 27 - invalid */
168 OPC_AND_B = _OPC(2, 28),
169 OPC_OR_B = _OPC(2, 29),
170 OPC_NOT_B = _OPC(2, 30),
171 OPC_XOR_B = _OPC(2, 31),
172 /* 32 - invalid */
173 OPC_CMPV_U = _OPC(2, 33),
174 OPC_CMPV_S = _OPC(2, 34),
175 /* 35-47 - invalid */
176 OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
177 OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
178 OPC_MULL_U = _OPC(2, 50),
179 OPC_BFREV_B = _OPC(2, 51),
180 OPC_CLZ_S = _OPC(2, 52),
181 OPC_CLZ_B = _OPC(2, 53),
182 OPC_SHL_B = _OPC(2, 54),
183 OPC_SHR_B = _OPC(2, 55),
184 OPC_ASHR_B = _OPC(2, 56),
185 OPC_BARY_F = _OPC(2, 57),
186 OPC_MGEN_B = _OPC(2, 58),
187 OPC_GETBIT_B = _OPC(2, 59),
188 OPC_SETRM = _OPC(2, 60),
189 OPC_CBITS_B = _OPC(2, 61),
190 OPC_SHB = _OPC(2, 62),
191 OPC_MSAD = _OPC(2, 63),
192 OPC_FLAT_B = _OPC(2, 64),
193
194 /* category 3: */
195 OPC_MAD_U16 = _OPC(3, 0),
196 OPC_MADSH_U16 = _OPC(3, 1),
197 OPC_MAD_S16 = _OPC(3, 2),
198 OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
199 OPC_MAD_U24 = _OPC(3, 4),
200 OPC_MAD_S24 = _OPC(3, 5),
201 OPC_MAD_F16 = _OPC(3, 6),
202 OPC_MAD_F32 = _OPC(3, 7),
203 OPC_SEL_B16 = _OPC(3, 8),
204 OPC_SEL_B32 = _OPC(3, 9),
205 OPC_SEL_S16 = _OPC(3, 10),
206 OPC_SEL_S32 = _OPC(3, 11),
207 OPC_SEL_F16 = _OPC(3, 12),
208 OPC_SEL_F32 = _OPC(3, 13),
209 OPC_SAD_S16 = _OPC(3, 14),
210 OPC_SAD_S32 = _OPC(3, 15),
211 OPC_SHRM = _OPC(3, 16),
212 OPC_SHLM = _OPC(3, 17),
213 OPC_SHRG = _OPC(3, 18),
214 OPC_SHLG = _OPC(3, 19),
215 OPC_ANDG = _OPC(3, 20),
216 OPC_DP2ACC = _OPC(3, 21),
217 OPC_DP4ACC = _OPC(3, 22),
218 OPC_WMM = _OPC(3, 23),
219 OPC_WMM_ACCU = _OPC(3, 24),
220
221 /* category 4: */
222 OPC_RCP = _OPC(4, 0),
223 OPC_RSQ = _OPC(4, 1),
224 OPC_LOG2 = _OPC(4, 2),
225 OPC_EXP2 = _OPC(4, 3),
226 OPC_SIN = _OPC(4, 4),
227 OPC_COS = _OPC(4, 5),
228 OPC_SQRT = _OPC(4, 6),
229 /* NOTE that these are 8+opc from their highp equivs, so it's possible
230 * that the high order bit in the opc field has been repurposed for
231 * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
232 * still use the same opc as highp
233 */
234 OPC_HRSQ = _OPC(4, 9),
235 OPC_HLOG2 = _OPC(4, 10),
236 OPC_HEXP2 = _OPC(4, 11),
237
238 /* category 5: */
239 OPC_ISAM = _OPC(5, 0),
240 OPC_ISAML = _OPC(5, 1),
241 OPC_ISAMM = _OPC(5, 2),
242 OPC_SAM = _OPC(5, 3),
243 OPC_SAMB = _OPC(5, 4),
244 OPC_SAML = _OPC(5, 5),
245 OPC_SAMGQ = _OPC(5, 6),
246 OPC_GETLOD = _OPC(5, 7),
247 OPC_CONV = _OPC(5, 8),
248 OPC_CONVM = _OPC(5, 9),
249 OPC_GETSIZE = _OPC(5, 10),
250 OPC_GETBUF = _OPC(5, 11),
251 OPC_GETPOS = _OPC(5, 12),
252 OPC_GETINFO = _OPC(5, 13),
253 OPC_DSX = _OPC(5, 14),
254 OPC_DSY = _OPC(5, 15),
255 OPC_GATHER4R = _OPC(5, 16),
256 OPC_GATHER4G = _OPC(5, 17),
257 OPC_GATHER4B = _OPC(5, 18),
258 OPC_GATHER4A = _OPC(5, 19),
259 OPC_SAMGP0 = _OPC(5, 20),
260 OPC_SAMGP1 = _OPC(5, 21),
261 OPC_SAMGP2 = _OPC(5, 22),
262 OPC_SAMGP3 = _OPC(5, 23),
263 OPC_DSXPP_1 = _OPC(5, 24),
264 OPC_DSYPP_1 = _OPC(5, 25),
265 OPC_RGETPOS = _OPC(5, 26),
266 OPC_RGETINFO = _OPC(5, 27),
267 OPC_BRCST_ACTIVE = _OPC(5, 28),
268 OPC_QUAD_SHUFFLE_BRCST = _OPC(5, 29),
269 OPC_QUAD_SHUFFLE_HORIZ = _OPC(5, 30),
270 OPC_QUAD_SHUFFLE_VERT = _OPC(5, 31),
271 OPC_QUAD_SHUFFLE_DIAG = _OPC(5, 32),
272 OPC_TCINV = _OPC(5, 33),
273 /* cat5 meta instructions, placed above the cat5 opc field's size */
274 OPC_DSXPP_MACRO = _OPC(5, 35),
275 OPC_DSYPP_MACRO = _OPC(5, 36),
276
277 /* category 6: */
278 OPC_LDG = _OPC(6, 0), /* load-global */
279 OPC_LDL = _OPC(6, 1),
280 OPC_LDP = _OPC(6, 2),
281 OPC_STG = _OPC(6, 3), /* store-global */
282 OPC_STL = _OPC(6, 4),
283 OPC_STP = _OPC(6, 5),
284 OPC_LDIB = _OPC(6, 6),
285 OPC_G2L = _OPC(6, 7),
286 OPC_L2G = _OPC(6, 8),
287 OPC_PREFETCH = _OPC(6, 9),
288 OPC_LDLW = _OPC(6, 10),
289 OPC_STLW = _OPC(6, 11),
290 OPC_RESFMT = _OPC(6, 14),
291 OPC_RESINFO = _OPC(6, 15),
292 OPC_ATOMIC_ADD = _OPC(6, 16),
293 OPC_ATOMIC_SUB = _OPC(6, 17),
294 OPC_ATOMIC_XCHG = _OPC(6, 18),
295 OPC_ATOMIC_INC = _OPC(6, 19),
296 OPC_ATOMIC_DEC = _OPC(6, 20),
297 OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
298 OPC_ATOMIC_MIN = _OPC(6, 22),
299 OPC_ATOMIC_MAX = _OPC(6, 23),
300 OPC_ATOMIC_AND = _OPC(6, 24),
301 OPC_ATOMIC_OR = _OPC(6, 25),
302 OPC_ATOMIC_XOR = _OPC(6, 26),
303 OPC_LDGB = _OPC(6, 27),
304 OPC_STGB = _OPC(6, 28),
305 OPC_STIB = _OPC(6, 29),
306 OPC_LDC = _OPC(6, 30),
307 OPC_LDLV = _OPC(6, 31),
308 OPC_PIPR = _OPC(6, 32), /* ??? */
309 OPC_PIPC = _OPC(6, 33), /* ??? */
310 OPC_EMIT2 = _OPC(6, 34), /* ??? */
311 OPC_ENDLS = _OPC(6, 35), /* ??? */
312 OPC_GETSPID = _OPC(6, 36), /* SP ID */
313 OPC_GETWID = _OPC(6, 37), /* wavefront ID */
314 OPC_GETFIBERID = _OPC(6, 38), /* fiber ID */
315
316 /* Logical opcodes for things that differ in a6xx+ */
317 OPC_STC = _OPC(6, 40),
318 OPC_RESINFO_B = _OPC(6, 41),
319 OPC_LDIB_B = _OPC(6, 42),
320 OPC_STIB_B = _OPC(6, 43),
321
322 /* Logical opcodes for different atomic instruction variations: */
323 OPC_ATOMIC_B_ADD = _OPC(6, 44),
324 OPC_ATOMIC_B_SUB = _OPC(6, 45),
325 OPC_ATOMIC_B_XCHG = _OPC(6, 46),
326 OPC_ATOMIC_B_INC = _OPC(6, 47),
327 OPC_ATOMIC_B_DEC = _OPC(6, 48),
328 OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49),
329 OPC_ATOMIC_B_MIN = _OPC(6, 50),
330 OPC_ATOMIC_B_MAX = _OPC(6, 51),
331 OPC_ATOMIC_B_AND = _OPC(6, 52),
332 OPC_ATOMIC_B_OR = _OPC(6, 53),
333 OPC_ATOMIC_B_XOR = _OPC(6, 54),
334
335 OPC_ATOMIC_S_ADD = _OPC(6, 55),
336 OPC_ATOMIC_S_SUB = _OPC(6, 56),
337 OPC_ATOMIC_S_XCHG = _OPC(6, 57),
338 OPC_ATOMIC_S_INC = _OPC(6, 58),
339 OPC_ATOMIC_S_DEC = _OPC(6, 59),
340 OPC_ATOMIC_S_CMPXCHG = _OPC(6, 60),
341 OPC_ATOMIC_S_MIN = _OPC(6, 61),
342 OPC_ATOMIC_S_MAX = _OPC(6, 62),
343 OPC_ATOMIC_S_AND = _OPC(6, 63),
344 OPC_ATOMIC_S_OR = _OPC(6, 64),
345 OPC_ATOMIC_S_XOR = _OPC(6, 65),
346
347 OPC_ATOMIC_G_ADD = _OPC(6, 66),
348 OPC_ATOMIC_G_SUB = _OPC(6, 67),
349 OPC_ATOMIC_G_XCHG = _OPC(6, 68),
350 OPC_ATOMIC_G_INC = _OPC(6, 69),
351 OPC_ATOMIC_G_DEC = _OPC(6, 70),
352 OPC_ATOMIC_G_CMPXCHG = _OPC(6, 71),
353 OPC_ATOMIC_G_MIN = _OPC(6, 72),
354 OPC_ATOMIC_G_MAX = _OPC(6, 73),
355 OPC_ATOMIC_G_AND = _OPC(6, 74),
356 OPC_ATOMIC_G_OR = _OPC(6, 75),
357 OPC_ATOMIC_G_XOR = _OPC(6, 76),
358
359 OPC_LDG_A = _OPC(6, 77),
360 OPC_STG_A = _OPC(6, 78),
361
362 OPC_SPILL_MACRO = _OPC(6, 79),
363 OPC_RELOAD_MACRO = _OPC(6, 80),
364
365 OPC_LDC_K = _OPC(6, 81),
366 OPC_STSC = _OPC(6, 82),
367 OPC_LDG_K = _OPC(6, 83),
368
369 /* category 7: */
370 OPC_BAR = _OPC(7, 0),
371 OPC_FENCE = _OPC(7, 1),
372 OPC_SLEEP = _OPC(7, 2),
373 OPC_ICINV = _OPC(7, 3),
374 OPC_DCCLN = _OPC(7, 4),
375 OPC_DCINV = _OPC(7, 5),
376 OPC_DCFLU = _OPC(7, 6),
377
378 OPC_LOCK = _OPC(7, 7),
379 OPC_UNLOCK = _OPC(7, 8),
380
381 OPC_ALIAS = _OPC(7, 9),
382
383 OPC_CCINV = _OPC(7, 10),
384
385 /* meta instructions (category 8): */
386 #define OPC_META 8
387 /* placeholder instr to mark shader inputs: */
388 OPC_META_INPUT = _OPC(OPC_META, 0),
389 /* The "collect" and "split" instructions are used for keeping
390 * track of instructions that write to multiple dst registers
391 * (split) like texture sample instructions, or read multiple
392 * consecutive scalar registers (collect) (bary.f, texture samp)
393 *
394 * A "split" extracts a scalar component from a vecN, and a
395 * "collect" gathers multiple scalar components into a vecN
396 */
397 OPC_META_SPLIT = _OPC(OPC_META, 2),
398 OPC_META_COLLECT = _OPC(OPC_META, 3),
399
400 /* placeholder for texture fetches that run before FS invocation
401 * starts:
402 */
403 OPC_META_TEX_PREFETCH = _OPC(OPC_META, 4),
404
405 /* Parallel copies have multiple destinations, and copy each destination
406 * to its corresponding source. This happens "in parallel," meaning that
407 * it happens as-if every source is read first and then every destination
408 * is stored. These are produced in RA when register shuffling is
409 * required, and then lowered away immediately afterwards.
410 */
411 OPC_META_PARALLEL_COPY = _OPC(OPC_META, 5),
412 OPC_META_PHI = _OPC(OPC_META, 6),
413 /*
414 * A manually encoded opcode
415 */
416 OPC_META_RAW = _OPC(OPC_META, 7),
417 } opc_t;
418 /* clang-format on */
419
420 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
421 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
422
423 const char *disasm_a3xx_instr_name(opc_t opc);
424
425 typedef enum {
426 TYPE_F16 = 0,
427 TYPE_F32 = 1,
428 TYPE_U16 = 2,
429 TYPE_U32 = 3,
430 TYPE_S16 = 4,
431 TYPE_S32 = 5,
432 TYPE_U8 = 6,
433 TYPE_S8 = 7, // XXX I assume?
434 } type_t;
435
436 static inline uint32_t
type_size(type_t type)437 type_size(type_t type)
438 {
439 switch (type) {
440 case TYPE_F32:
441 case TYPE_U32:
442 case TYPE_S32:
443 return 32;
444 case TYPE_F16:
445 case TYPE_U16:
446 case TYPE_S16:
447 return 16;
448 case TYPE_U8:
449 case TYPE_S8:
450 return 8;
451 default:
452 ir3_assert(0); /* invalid type */
453 return 0;
454 }
455 }
456
457 static inline type_t
type_uint_size(unsigned bit_size)458 type_uint_size(unsigned bit_size)
459 {
460 switch (bit_size) {
461 case 8: return TYPE_U8;
462 case 1: /* 1b bools are treated as normal half-regs */
463 case 16: return TYPE_U16;
464 case 32: return TYPE_U32;
465 default:
466 ir3_assert(0); /* invalid size */
467 return (type_t)0;
468 }
469 }
470
471 static inline type_t
type_float_size(unsigned bit_size)472 type_float_size(unsigned bit_size)
473 {
474 switch (bit_size) {
475 case 16: return TYPE_F16;
476 case 32: return TYPE_F32;
477 default:
478 ir3_assert(0); /* invalid size */
479 return (type_t)0;
480 }
481 }
482
483 static inline int
type_float(type_t type)484 type_float(type_t type)
485 {
486 return (type == TYPE_F32) || (type == TYPE_F16);
487 }
488
489 static inline int
type_uint(type_t type)490 type_uint(type_t type)
491 {
492 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
493 }
494
495 static inline int
type_sint(type_t type)496 type_sint(type_t type)
497 {
498 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
499 }
500
501 typedef enum {
502 ROUND_ZERO = 0,
503 ROUND_EVEN = 1,
504 ROUND_POS_INF = 2,
505 ROUND_NEG_INF = 3,
506 } round_t;
507
508 /* comp:
509 * 0 - x
510 * 1 - y
511 * 2 - z
512 * 3 - w
513 */
514 static inline uint32_t
regid(int num,int comp)515 regid(int num, int comp)
516 {
517 return (num << 2) | (comp & 0x3);
518 }
519
520 #define INVALID_REG regid(63, 0)
521 #define VALIDREG(r) ((r) != INVALID_REG)
522 #define CONDREG(r, val) COND(VALIDREG(r), (val))
523
524 /* special registers: */
525 #define REG_A0 61 /* address register */
526 #define REG_P0 62 /* predicate register */
527
528 typedef enum {
529 BRANCH_PLAIN = 0, /* br */
530 BRANCH_OR = 1, /* brao */
531 BRANCH_AND = 2, /* braa */
532 BRANCH_CONST = 3, /* brac */
533 BRANCH_ANY = 4, /* bany */
534 BRANCH_ALL = 5, /* ball */
535 BRANCH_X = 6, /* brax ??? */
536 } brtype_t;
537
538 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
539 * if so, how to get the (base, index) pair for both sampler and texture.
540 * There is a single base embedded in the instruction, which is always used
541 * for the texture.
542 */
543 typedef enum {
544 /* Use traditional GL binding model, get texture and sampler index from src3
545 * which is presumed to be uniform on a4xx+ (a3xx doesn't have the other
546 * modes, but does handle non-uniform indexing).
547 */
548 CAT5_UNIFORM = 0,
549
550 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
551 * and texture index come from src3 which is presumed to be uniform.
552 */
553 CAT5_BINDLESS_A1_UNIFORM = 1,
554
555 /* The texture and sampler share the same base, and the sampler and
556 * texture index come from src3 which is *not* presumed to be uniform.
557 */
558 CAT5_BINDLESS_NONUNIFORM = 2,
559
560 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
561 * and texture index come from src3 which is *not* presumed to be
562 * uniform.
563 */
564 CAT5_BINDLESS_A1_NONUNIFORM = 3,
565
566 /* Use traditional GL binding model, get texture and sampler index
567 * from src3 which is *not* presumed to be uniform.
568 */
569 CAT5_NONUNIFORM = 4,
570
571 /* The texture and sampler share the same base, and the sampler and
572 * texture index come from src3 which is presumed to be uniform.
573 */
574 CAT5_BINDLESS_UNIFORM = 5,
575
576 /* The texture and sampler share the same base, get sampler index from low
577 * 4 bits of src3 and texture index from high 4 bits.
578 */
579 CAT5_BINDLESS_IMM = 6,
580
581 /* The sampler base comes from the low 3 bits of a1.x, and the texture
582 * index comes from the next 8 bits of a1.x. The sampler index is an
583 * immediate in src3.
584 */
585 CAT5_BINDLESS_A1_IMM = 7,
586 } cat5_desc_mode_t;
587
588 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
589 */
590 typedef enum {
591 /* Use old GL binding model with an immediate index. */
592 CAT6_IMM = 0,
593
594 CAT6_UNIFORM = 1,
595
596 CAT6_NONUNIFORM = 2,
597
598 /* Use the bindless model, with an immediate index.
599 */
600 CAT6_BINDLESS_IMM = 4,
601
602 /* Use the bindless model, with a uniform register index.
603 */
604 CAT6_BINDLESS_UNIFORM = 5,
605
606 /* Use the bindless model, with a register index that isn't guaranteed
607 * to be uniform. This presumably checks if the indices are equal and
608 * splits up the load/store, because it works the way you would
609 * expect.
610 */
611 CAT6_BINDLESS_NONUNIFORM = 6,
612 } cat6_desc_mode_t;
613
614 static inline bool
is_sat_compatible(opc_t opc)615 is_sat_compatible(opc_t opc)
616 {
617 /* On a6xx saturation doesn't work on cat4 */
618 if (opc_cat(opc) != 2 && opc_cat(opc) != 3)
619 return false;
620
621 switch (opc) {
622 /* On a3xx and a6xx saturation doesn't work on bary.f */
623 case OPC_BARY_F:
624 /* On a6xx saturation doesn't work on sel.* */
625 case OPC_SEL_B16:
626 case OPC_SEL_B32:
627 case OPC_SEL_S16:
628 case OPC_SEL_S32:
629 case OPC_SEL_F16:
630 case OPC_SEL_F32:
631 return false;
632 default:
633 return true;
634 }
635 }
636
637 static inline bool
is_mad(opc_t opc)638 is_mad(opc_t opc)
639 {
640 switch (opc) {
641 case OPC_MAD_U16:
642 case OPC_MAD_S16:
643 case OPC_MAD_U24:
644 case OPC_MAD_S24:
645 case OPC_MAD_F16:
646 case OPC_MAD_F32:
647 return true;
648 default:
649 return false;
650 }
651 }
652
653 static inline bool
is_madsh(opc_t opc)654 is_madsh(opc_t opc)
655 {
656 switch (opc) {
657 case OPC_MADSH_U16:
658 case OPC_MADSH_M16:
659 return true;
660 default:
661 return false;
662 }
663 }
664
665 static inline bool
is_local_atomic(opc_t opc)666 is_local_atomic(opc_t opc)
667 {
668 switch (opc) {
669 case OPC_ATOMIC_ADD:
670 case OPC_ATOMIC_SUB:
671 case OPC_ATOMIC_XCHG:
672 case OPC_ATOMIC_INC:
673 case OPC_ATOMIC_DEC:
674 case OPC_ATOMIC_CMPXCHG:
675 case OPC_ATOMIC_MIN:
676 case OPC_ATOMIC_MAX:
677 case OPC_ATOMIC_AND:
678 case OPC_ATOMIC_OR:
679 case OPC_ATOMIC_XOR:
680 return true;
681 default:
682 return false;
683 }
684 }
685
686 static inline bool
is_global_a3xx_atomic(opc_t opc)687 is_global_a3xx_atomic(opc_t opc)
688 {
689 switch (opc) {
690 case OPC_ATOMIC_S_ADD:
691 case OPC_ATOMIC_S_SUB:
692 case OPC_ATOMIC_S_XCHG:
693 case OPC_ATOMIC_S_INC:
694 case OPC_ATOMIC_S_DEC:
695 case OPC_ATOMIC_S_CMPXCHG:
696 case OPC_ATOMIC_S_MIN:
697 case OPC_ATOMIC_S_MAX:
698 case OPC_ATOMIC_S_AND:
699 case OPC_ATOMIC_S_OR:
700 case OPC_ATOMIC_S_XOR:
701 return true;
702 default:
703 return false;
704 }
705 }
706
707 static inline bool
is_global_a6xx_atomic(opc_t opc)708 is_global_a6xx_atomic(opc_t opc)
709 {
710 switch (opc) {
711 case OPC_ATOMIC_G_ADD:
712 case OPC_ATOMIC_G_SUB:
713 case OPC_ATOMIC_G_XCHG:
714 case OPC_ATOMIC_G_INC:
715 case OPC_ATOMIC_G_DEC:
716 case OPC_ATOMIC_G_CMPXCHG:
717 case OPC_ATOMIC_G_MIN:
718 case OPC_ATOMIC_G_MAX:
719 case OPC_ATOMIC_G_AND:
720 case OPC_ATOMIC_G_OR:
721 case OPC_ATOMIC_G_XOR:
722 return true;
723 default:
724 return false;
725 }
726 }
727
728 static inline bool
is_bindless_atomic(opc_t opc)729 is_bindless_atomic(opc_t opc)
730 {
731 switch (opc) {
732 case OPC_ATOMIC_B_ADD:
733 case OPC_ATOMIC_B_SUB:
734 case OPC_ATOMIC_B_XCHG:
735 case OPC_ATOMIC_B_INC:
736 case OPC_ATOMIC_B_DEC:
737 case OPC_ATOMIC_B_CMPXCHG:
738 case OPC_ATOMIC_B_MIN:
739 case OPC_ATOMIC_B_MAX:
740 case OPC_ATOMIC_B_AND:
741 case OPC_ATOMIC_B_OR:
742 case OPC_ATOMIC_B_XOR:
743 return true;
744 default:
745 return false;
746 }
747 }
748
749 static inline bool
is_atomic(opc_t opc)750 is_atomic(opc_t opc)
751 {
752 return is_local_atomic(opc) || is_global_a3xx_atomic(opc) ||
753 is_global_a6xx_atomic(opc) || is_bindless_atomic(opc);
754 }
755
756 static inline bool
is_ssbo(opc_t opc)757 is_ssbo(opc_t opc)
758 {
759 switch (opc) {
760 case OPC_RESFMT:
761 case OPC_RESINFO:
762 case OPC_LDGB:
763 case OPC_STGB:
764 case OPC_STIB:
765 return true;
766 default:
767 return false;
768 }
769 }
770
771 static inline bool
is_isam(opc_t opc)772 is_isam(opc_t opc)
773 {
774 switch (opc) {
775 case OPC_ISAM:
776 case OPC_ISAML:
777 case OPC_ISAMM:
778 return true;
779 default:
780 return false;
781 }
782 }
783
784 static inline bool
is_cat2_float(opc_t opc)785 is_cat2_float(opc_t opc)
786 {
787 switch (opc) {
788 case OPC_ADD_F:
789 case OPC_MIN_F:
790 case OPC_MAX_F:
791 case OPC_MUL_F:
792 case OPC_SIGN_F:
793 case OPC_CMPS_F:
794 case OPC_ABSNEG_F:
795 case OPC_CMPV_F:
796 case OPC_FLOOR_F:
797 case OPC_CEIL_F:
798 case OPC_RNDNE_F:
799 case OPC_RNDAZ_F:
800 case OPC_TRUNC_F:
801 return true;
802
803 default:
804 return false;
805 }
806 }
807
808 static inline bool
is_cat3_float(opc_t opc)809 is_cat3_float(opc_t opc)
810 {
811 switch (opc) {
812 case OPC_MAD_F16:
813 case OPC_MAD_F32:
814 case OPC_SEL_F16:
815 case OPC_SEL_F32:
816 return true;
817 default:
818 return false;
819 }
820 }
821
822 #endif /* INSTR_A3XX_H_ */
823