1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26
27 #define PACKED __attribute__((__packed__))
28
29 #include <assert.h>
30 #include <stdbool.h>
31 #include <stdint.h>
32 #include <stdio.h>
33
34 /* clang-format off */
35 void ir3_assert_handler(const char *expr, const char *file, int line,
36 const char *func) __attribute__((weak)) __attribute__((__noreturn__));
37 /* clang-format on */
38
39 /* A wrapper for assert() that allows overriding handling of a failed
40 * assert. This is needed for tools like crashdec which can want to
41 * attempt to disassemble memory that might not actually be valid
42 * instructions.
43 */
44 #define ir3_assert(expr) \
45 do { \
46 if (!(expr)) { \
47 if (ir3_assert_handler) { \
48 ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
49 } \
50 assert(expr); \
51 } \
52 } while (0)
53 /* size of largest OPC field of all the instruction categories: */
54 #define NOPC_BITS 6
55
56 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
57
58 /* clang-format off */
59 typedef enum {
60 /* category 0: */
61 OPC_NOP = _OPC(0, 0),
62 OPC_B = _OPC(0, 1),
63 OPC_JUMP = _OPC(0, 2),
64 OPC_CALL = _OPC(0, 3),
65 OPC_RET = _OPC(0, 4),
66 OPC_KILL = _OPC(0, 5),
67 OPC_END = _OPC(0, 6),
68 OPC_EMIT = _OPC(0, 7),
69 OPC_CUT = _OPC(0, 8),
70 OPC_CHMASK = _OPC(0, 9),
71 OPC_CHSH = _OPC(0, 10),
72 OPC_FLOW_REV = _OPC(0, 11),
73
74 OPC_BKT = _OPC(0, 16),
75 OPC_STKS = _OPC(0, 17),
76 OPC_STKR = _OPC(0, 18),
77 OPC_XSET = _OPC(0, 19),
78 OPC_XCLR = _OPC(0, 20),
79 OPC_GETONE = _OPC(0, 21),
80 OPC_DBG = _OPC(0, 22),
81 OPC_SHPS = _OPC(0, 23), /* shader prologue start */
82 OPC_SHPE = _OPC(0, 24), /* shader prologue end */
83
84 OPC_PREDT = _OPC(0, 29), /* predicated true */
85 OPC_PREDF = _OPC(0, 30), /* predicated false */
86 OPC_PREDE = _OPC(0, 31), /* predicated end */
87
88 /* Logical opcodes for different branch instruction variations: */
89 OPC_BR = _OPC(0, 40),
90 OPC_BRAO = _OPC(0, 41),
91 OPC_BRAA = _OPC(0, 42),
92 OPC_BRAC = _OPC(0, 43),
93 OPC_BANY = _OPC(0, 44),
94 OPC_BALL = _OPC(0, 45),
95 OPC_BRAX = _OPC(0, 46),
96
97 /* Logical opcode to distinguish kill and demote */
98 OPC_DEMOTE = _OPC(0, 47),
99
100 /* category 1: */
101 OPC_MOV = _OPC(1, 0),
102 OPC_MOVP = _OPC(1, 1),
103 /* swz, gat, sct */
104 OPC_MOVMSK = _OPC(1, 3),
105
106 /* Virtual opcodes for instructions differentiated via a "sub-opcode" that
107 * replaces the repeat field:
108 */
109 OPC_SWZ = _OPC(1, 4),
110 OPC_GAT = _OPC(1, 5),
111 OPC_SCT = _OPC(1, 6),
112
113 /* Logical opcodes for different variants of mov: */
114 OPC_MOV_IMMED = _OPC(1, 40),
115 OPC_MOV_CONST = _OPC(1, 41),
116 OPC_MOV_GPR = _OPC(1, 42),
117 OPC_MOV_RELGPR = _OPC(1, 43),
118 OPC_MOV_RELCONST = _OPC(1, 44),
119
120 /* Macros that expand to an if statement + move */
121 OPC_BALLOT_MACRO = _OPC(1, 50),
122 OPC_ANY_MACRO = _OPC(1, 51),
123 OPC_ALL_MACRO = _OPC(1, 52),
124 OPC_ELECT_MACRO = _OPC(1, 53),
125 OPC_READ_COND_MACRO = _OPC(1, 54),
126 OPC_READ_FIRST_MACRO = _OPC(1, 55),
127 OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
128
129 /* category 2: */
130 OPC_ADD_F = _OPC(2, 0),
131 OPC_MIN_F = _OPC(2, 1),
132 OPC_MAX_F = _OPC(2, 2),
133 OPC_MUL_F = _OPC(2, 3),
134 OPC_SIGN_F = _OPC(2, 4),
135 OPC_CMPS_F = _OPC(2, 5),
136 OPC_ABSNEG_F = _OPC(2, 6),
137 OPC_CMPV_F = _OPC(2, 7),
138 /* 8 - invalid */
139 OPC_FLOOR_F = _OPC(2, 9),
140 OPC_CEIL_F = _OPC(2, 10),
141 OPC_RNDNE_F = _OPC(2, 11),
142 OPC_RNDAZ_F = _OPC(2, 12),
143 OPC_TRUNC_F = _OPC(2, 13),
144 /* 14-15 - invalid */
145 OPC_ADD_U = _OPC(2, 16),
146 OPC_ADD_S = _OPC(2, 17),
147 OPC_SUB_U = _OPC(2, 18),
148 OPC_SUB_S = _OPC(2, 19),
149 OPC_CMPS_U = _OPC(2, 20),
150 OPC_CMPS_S = _OPC(2, 21),
151 OPC_MIN_U = _OPC(2, 22),
152 OPC_MIN_S = _OPC(2, 23),
153 OPC_MAX_U = _OPC(2, 24),
154 OPC_MAX_S = _OPC(2, 25),
155 OPC_ABSNEG_S = _OPC(2, 26),
156 /* 27 - invalid */
157 OPC_AND_B = _OPC(2, 28),
158 OPC_OR_B = _OPC(2, 29),
159 OPC_NOT_B = _OPC(2, 30),
160 OPC_XOR_B = _OPC(2, 31),
161 /* 32 - invalid */
162 OPC_CMPV_U = _OPC(2, 33),
163 OPC_CMPV_S = _OPC(2, 34),
164 /* 35-47 - invalid */
165 OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
166 OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
167 OPC_MULL_U = _OPC(2, 50),
168 OPC_BFREV_B = _OPC(2, 51),
169 OPC_CLZ_S = _OPC(2, 52),
170 OPC_CLZ_B = _OPC(2, 53),
171 OPC_SHL_B = _OPC(2, 54),
172 OPC_SHR_B = _OPC(2, 55),
173 OPC_ASHR_B = _OPC(2, 56),
174 OPC_BARY_F = _OPC(2, 57),
175 OPC_MGEN_B = _OPC(2, 58),
176 OPC_GETBIT_B = _OPC(2, 59),
177 OPC_SETRM = _OPC(2, 60),
178 OPC_CBITS_B = _OPC(2, 61),
179 OPC_SHB = _OPC(2, 62),
180 OPC_MSAD = _OPC(2, 63),
181
182 /* category 3: */
183 OPC_MAD_U16 = _OPC(3, 0),
184 OPC_MADSH_U16 = _OPC(3, 1),
185 OPC_MAD_S16 = _OPC(3, 2),
186 OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
187 OPC_MAD_U24 = _OPC(3, 4),
188 OPC_MAD_S24 = _OPC(3, 5),
189 OPC_MAD_F16 = _OPC(3, 6),
190 OPC_MAD_F32 = _OPC(3, 7),
191 OPC_SEL_B16 = _OPC(3, 8),
192 OPC_SEL_B32 = _OPC(3, 9),
193 OPC_SEL_S16 = _OPC(3, 10),
194 OPC_SEL_S32 = _OPC(3, 11),
195 OPC_SEL_F16 = _OPC(3, 12),
196 OPC_SEL_F32 = _OPC(3, 13),
197 OPC_SAD_S16 = _OPC(3, 14),
198 OPC_SAD_S32 = _OPC(3, 15),
199 OPC_SHLG_B16 = _OPC(3, 16),
200
201 /* category 4: */
202 OPC_RCP = _OPC(4, 0),
203 OPC_RSQ = _OPC(4, 1),
204 OPC_LOG2 = _OPC(4, 2),
205 OPC_EXP2 = _OPC(4, 3),
206 OPC_SIN = _OPC(4, 4),
207 OPC_COS = _OPC(4, 5),
208 OPC_SQRT = _OPC(4, 6),
209 /* NOTE that these are 8+opc from their highp equivs, so it's possible
210 * that the high order bit in the opc field has been repurposed for
211 * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
212 * still use the same opc as highp
213 */
214 OPC_HRSQ = _OPC(4, 9),
215 OPC_HLOG2 = _OPC(4, 10),
216 OPC_HEXP2 = _OPC(4, 11),
217
218 /* category 5: */
219 OPC_ISAM = _OPC(5, 0),
220 OPC_ISAML = _OPC(5, 1),
221 OPC_ISAMM = _OPC(5, 2),
222 OPC_SAM = _OPC(5, 3),
223 OPC_SAMB = _OPC(5, 4),
224 OPC_SAML = _OPC(5, 5),
225 OPC_SAMGQ = _OPC(5, 6),
226 OPC_GETLOD = _OPC(5, 7),
227 OPC_CONV = _OPC(5, 8),
228 OPC_CONVM = _OPC(5, 9),
229 OPC_GETSIZE = _OPC(5, 10),
230 OPC_GETBUF = _OPC(5, 11),
231 OPC_GETPOS = _OPC(5, 12),
232 OPC_GETINFO = _OPC(5, 13),
233 OPC_DSX = _OPC(5, 14),
234 OPC_DSY = _OPC(5, 15),
235 OPC_GATHER4R = _OPC(5, 16),
236 OPC_GATHER4G = _OPC(5, 17),
237 OPC_GATHER4B = _OPC(5, 18),
238 OPC_GATHER4A = _OPC(5, 19),
239 OPC_SAMGP0 = _OPC(5, 20),
240 OPC_SAMGP1 = _OPC(5, 21),
241 OPC_SAMGP2 = _OPC(5, 22),
242 OPC_SAMGP3 = _OPC(5, 23),
243 OPC_DSXPP_1 = _OPC(5, 24),
244 OPC_DSYPP_1 = _OPC(5, 25),
245 OPC_RGETPOS = _OPC(5, 26),
246 OPC_RGETINFO = _OPC(5, 27),
247 /* cat5 meta instructions, placed above the cat5 opc field's size */
248 OPC_DSXPP_MACRO = _OPC(5, 32),
249 OPC_DSYPP_MACRO = _OPC(5, 33),
250
251 /* category 6: */
252 OPC_LDG = _OPC(6, 0), /* load-global */
253 OPC_LDL = _OPC(6, 1),
254 OPC_LDP = _OPC(6, 2),
255 OPC_STG = _OPC(6, 3), /* store-global */
256 OPC_STL = _OPC(6, 4),
257 OPC_STP = _OPC(6, 5),
258 OPC_LDIB = _OPC(6, 6),
259 OPC_G2L = _OPC(6, 7),
260 OPC_L2G = _OPC(6, 8),
261 OPC_PREFETCH = _OPC(6, 9),
262 OPC_LDLW = _OPC(6, 10),
263 OPC_STLW = _OPC(6, 11),
264 OPC_RESFMT = _OPC(6, 14),
265 OPC_RESINFO = _OPC(6, 15),
266 OPC_ATOMIC_ADD = _OPC(6, 16),
267 OPC_ATOMIC_SUB = _OPC(6, 17),
268 OPC_ATOMIC_XCHG = _OPC(6, 18),
269 OPC_ATOMIC_INC = _OPC(6, 19),
270 OPC_ATOMIC_DEC = _OPC(6, 20),
271 OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
272 OPC_ATOMIC_MIN = _OPC(6, 22),
273 OPC_ATOMIC_MAX = _OPC(6, 23),
274 OPC_ATOMIC_AND = _OPC(6, 24),
275 OPC_ATOMIC_OR = _OPC(6, 25),
276 OPC_ATOMIC_XOR = _OPC(6, 26),
277 OPC_LDGB = _OPC(6, 27),
278 OPC_STGB = _OPC(6, 28),
279 OPC_STIB = _OPC(6, 29),
280 OPC_LDC = _OPC(6, 30),
281 OPC_LDLV = _OPC(6, 31),
282 OPC_PIPR = _OPC(6, 32), /* ??? */
283 OPC_PIPC = _OPC(6, 33), /* ??? */
284 OPC_EMIT2 = _OPC(6, 34), /* ??? */
285 OPC_ENDLS = _OPC(6, 35), /* ??? */
286 OPC_GETSPID = _OPC(6, 36), /* SP ID */
287 OPC_GETWID = _OPC(6, 37), /* wavefront ID */
288
289 /* Logical opcodes for things that differ in a6xx+ */
290 OPC_STC = _OPC(6, 40),
291 OPC_RESINFO_B = _OPC(6, 41),
292 OPC_LDIB_B = _OPC(6, 42),
293 OPC_STIB_B = _OPC(6, 43),
294
295 /* Logical opcodes for different atomic instruction variations: */
296 OPC_ATOMIC_B_ADD = _OPC(6, 44),
297 OPC_ATOMIC_B_SUB = _OPC(6, 45),
298 OPC_ATOMIC_B_XCHG = _OPC(6, 46),
299 OPC_ATOMIC_B_INC = _OPC(6, 47),
300 OPC_ATOMIC_B_DEC = _OPC(6, 48),
301 OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49),
302 OPC_ATOMIC_B_MIN = _OPC(6, 50),
303 OPC_ATOMIC_B_MAX = _OPC(6, 51),
304 OPC_ATOMIC_B_AND = _OPC(6, 52),
305 OPC_ATOMIC_B_OR = _OPC(6, 53),
306 OPC_ATOMIC_B_XOR = _OPC(6, 54),
307
308 OPC_LDG_A = _OPC(6, 55),
309 OPC_STG_A = _OPC(6, 56),
310
311 OPC_SPILL_MACRO = _OPC(6, 57),
312 OPC_RELOAD_MACRO = _OPC(6, 58),
313
314 /* category 7: */
315 OPC_BAR = _OPC(7, 0),
316 OPC_FENCE = _OPC(7, 1),
317
318 /* meta instructions (category -1): */
319 /* placeholder instr to mark shader inputs: */
320 OPC_META_INPUT = _OPC(-1, 0),
321 /* The "collect" and "split" instructions are used for keeping
322 * track of instructions that write to multiple dst registers
323 * (split) like texture sample instructions, or read multiple
324 * consecutive scalar registers (collect) (bary.f, texture samp)
325 *
326 * A "split" extracts a scalar component from a vecN, and a
327 * "collect" gathers multiple scalar components into a vecN
328 */
329 OPC_META_SPLIT = _OPC(-1, 2),
330 OPC_META_COLLECT = _OPC(-1, 3),
331
332 /* placeholder for texture fetches that run before FS invocation
333 * starts:
334 */
335 OPC_META_TEX_PREFETCH = _OPC(-1, 4),
336
337 /* Parallel copies have multiple destinations, and copy each destination
338 * to its corresponding source. This happens "in parallel," meaning that
339 * it happens as-if every source is read first and then every destination
340 * is stored. These are produced in RA when register shuffling is
341 * required, and then lowered away immediately afterwards.
342 */
343 OPC_META_PARALLEL_COPY = _OPC(-1, 5),
344 OPC_META_PHI = _OPC(-1, 6),
345 } opc_t;
346 /* clang-format on */
347
348 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
349 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
350
351 const char *disasm_a3xx_instr_name(opc_t opc);
352
353 typedef enum {
354 TYPE_F16 = 0,
355 TYPE_F32 = 1,
356 TYPE_U16 = 2,
357 TYPE_U32 = 3,
358 TYPE_S16 = 4,
359 TYPE_S32 = 5,
360 TYPE_U8 = 6,
361 TYPE_S8 = 7, // XXX I assume?
362 } type_t;
363
364 static inline uint32_t
type_size(type_t type)365 type_size(type_t type)
366 {
367 switch (type) {
368 case TYPE_F32:
369 case TYPE_U32:
370 case TYPE_S32:
371 return 32;
372 case TYPE_F16:
373 case TYPE_U16:
374 case TYPE_S16:
375 return 16;
376 case TYPE_U8:
377 case TYPE_S8:
378 return 8;
379 default:
380 ir3_assert(0); /* invalid type */
381 return 0;
382 }
383 }
384
385 static inline int
type_float(type_t type)386 type_float(type_t type)
387 {
388 return (type == TYPE_F32) || (type == TYPE_F16);
389 }
390
391 static inline int
type_uint(type_t type)392 type_uint(type_t type)
393 {
394 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
395 }
396
397 static inline int
type_sint(type_t type)398 type_sint(type_t type)
399 {
400 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
401 }
402
403 typedef enum {
404 ROUND_ZERO = 0,
405 ROUND_EVEN = 1,
406 ROUND_POS_INF = 2,
407 ROUND_NEG_INF = 3,
408 } round_t;
409
410 /* comp:
411 * 0 - x
412 * 1 - y
413 * 2 - z
414 * 3 - w
415 */
416 static inline uint32_t
regid(int num,int comp)417 regid(int num, int comp)
418 {
419 return (num << 2) | (comp & 0x3);
420 }
421
422 #define INVALID_REG regid(63, 0)
423 #define VALIDREG(r) ((r) != INVALID_REG)
424 #define CONDREG(r, val) COND(VALIDREG(r), (val))
425
426 /* special registers: */
427 #define REG_A0 61 /* address register */
428 #define REG_P0 62 /* predicate register */
429
430 typedef enum {
431 BRANCH_PLAIN = 0, /* br */
432 BRANCH_OR = 1, /* brao */
433 BRANCH_AND = 2, /* braa */
434 BRANCH_CONST = 3, /* brac */
435 BRANCH_ANY = 4, /* bany */
436 BRANCH_ALL = 5, /* ball */
437 BRANCH_X = 6, /* brax ??? */
438 } brtype_t;
439
440 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
441 * if so, how to get the (base, index) pair for both sampler and texture.
442 * There is a single base embedded in the instruction, which is always used
443 * for the texture.
444 */
445 typedef enum {
446 /* Use traditional GL binding model, get texture and sampler index
447 * from src3 which is not presumed to be uniform. This is
448 * backwards-compatible with earlier generations, where this field was
449 * always 0 and nonuniform-indexed sampling always worked.
450 */
451 CAT5_NONUNIFORM = 0,
452
453 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
454 * and texture index come from src3 which is presumed to be uniform.
455 */
456 CAT5_BINDLESS_A1_UNIFORM = 1,
457
458 /* The texture and sampler share the same base, and the sampler and
459 * texture index come from src3 which is *not* presumed to be uniform.
460 */
461 CAT5_BINDLESS_NONUNIFORM = 2,
462
463 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
464 * and texture index come from src3 which is *not* presumed to be
465 * uniform.
466 */
467 CAT5_BINDLESS_A1_NONUNIFORM = 3,
468
469 /* Use traditional GL binding model, get texture and sampler index
470 * from src3 which is presumed to be uniform.
471 */
472 CAT5_UNIFORM = 4,
473
474 /* The texture and sampler share the same base, and the sampler and
475 * texture index come from src3 which is presumed to be uniform.
476 */
477 CAT5_BINDLESS_UNIFORM = 5,
478
479 /* The texture and sampler share the same base, get sampler index from low
480 * 4 bits of src3 and texture index from high 4 bits.
481 */
482 CAT5_BINDLESS_IMM = 6,
483
484 /* The sampler base comes from the low 3 bits of a1.x, and the texture
485 * index comes from the next 8 bits of a1.x. The sampler index is an
486 * immediate in src3.
487 */
488 CAT5_BINDLESS_A1_IMM = 7,
489 } cat5_desc_mode_t;
490
491 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
492 */
493 typedef enum {
494 /* Use old GL binding model with an immediate index. */
495 CAT6_IMM = 0,
496
497 CAT6_UNIFORM = 1,
498
499 CAT6_NONUNIFORM = 2,
500
501 /* Use the bindless model, with an immediate index.
502 */
503 CAT6_BINDLESS_IMM = 4,
504
505 /* Use the bindless model, with a uniform register index.
506 */
507 CAT6_BINDLESS_UNIFORM = 5,
508
509 /* Use the bindless model, with a register index that isn't guaranteed
510 * to be uniform. This presumably checks if the indices are equal and
511 * splits up the load/store, because it works the way you would
512 * expect.
513 */
514 CAT6_BINDLESS_NONUNIFORM = 6,
515 } cat6_desc_mode_t;
516
517 static inline bool
is_sat_compatible(opc_t opc)518 is_sat_compatible(opc_t opc)
519 {
520 /* On a6xx saturation doesn't work on cat4 */
521 if (opc_cat(opc) != 2 && opc_cat(opc) != 3)
522 return false;
523
524 switch (opc) {
525 /* On a3xx and a6xx saturation doesn't work on bary.f */
526 case OPC_BARY_F:
527 /* On a6xx saturation doesn't work on sel.* */
528 case OPC_SEL_B16:
529 case OPC_SEL_B32:
530 case OPC_SEL_S16:
531 case OPC_SEL_S32:
532 case OPC_SEL_F16:
533 case OPC_SEL_F32:
534 return false;
535 default:
536 return true;
537 }
538 }
539
540 static inline bool
is_mad(opc_t opc)541 is_mad(opc_t opc)
542 {
543 switch (opc) {
544 case OPC_MAD_U16:
545 case OPC_MAD_S16:
546 case OPC_MAD_U24:
547 case OPC_MAD_S24:
548 case OPC_MAD_F16:
549 case OPC_MAD_F32:
550 return true;
551 default:
552 return false;
553 }
554 }
555
556 static inline bool
is_madsh(opc_t opc)557 is_madsh(opc_t opc)
558 {
559 switch (opc) {
560 case OPC_MADSH_U16:
561 case OPC_MADSH_M16:
562 return true;
563 default:
564 return false;
565 }
566 }
567
568 static inline bool
is_atomic(opc_t opc)569 is_atomic(opc_t opc)
570 {
571 switch (opc) {
572 case OPC_ATOMIC_ADD:
573 case OPC_ATOMIC_SUB:
574 case OPC_ATOMIC_XCHG:
575 case OPC_ATOMIC_INC:
576 case OPC_ATOMIC_DEC:
577 case OPC_ATOMIC_CMPXCHG:
578 case OPC_ATOMIC_MIN:
579 case OPC_ATOMIC_MAX:
580 case OPC_ATOMIC_AND:
581 case OPC_ATOMIC_OR:
582 case OPC_ATOMIC_XOR:
583 return true;
584 default:
585 return false;
586 }
587 }
588
589 static inline bool
is_ssbo(opc_t opc)590 is_ssbo(opc_t opc)
591 {
592 switch (opc) {
593 case OPC_RESFMT:
594 case OPC_RESINFO:
595 case OPC_LDGB:
596 case OPC_STGB:
597 case OPC_STIB:
598 return true;
599 default:
600 return false;
601 }
602 }
603
604 static inline bool
is_isam(opc_t opc)605 is_isam(opc_t opc)
606 {
607 switch (opc) {
608 case OPC_ISAM:
609 case OPC_ISAML:
610 case OPC_ISAMM:
611 return true;
612 default:
613 return false;
614 }
615 }
616
617 static inline bool
is_cat2_float(opc_t opc)618 is_cat2_float(opc_t opc)
619 {
620 switch (opc) {
621 case OPC_ADD_F:
622 case OPC_MIN_F:
623 case OPC_MAX_F:
624 case OPC_MUL_F:
625 case OPC_SIGN_F:
626 case OPC_CMPS_F:
627 case OPC_ABSNEG_F:
628 case OPC_CMPV_F:
629 case OPC_FLOOR_F:
630 case OPC_CEIL_F:
631 case OPC_RNDNE_F:
632 case OPC_RNDAZ_F:
633 case OPC_TRUNC_F:
634 return true;
635
636 default:
637 return false;
638 }
639 }
640
641 static inline bool
is_cat3_float(opc_t opc)642 is_cat3_float(opc_t opc)
643 {
644 switch (opc) {
645 case OPC_MAD_F16:
646 case OPC_MAD_F32:
647 case OPC_SEL_F16:
648 case OPC_SEL_F32:
649 return true;
650 default:
651 return false;
652 }
653 }
654
655 #endif /* INSTR_A3XX_H_ */
656