1 /*
2 * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef INSTR_A3XX_H_
25 #define INSTR_A3XX_H_
26
27 #define PACKED __attribute__((__packed__))
28
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdbool.h>
32 #include <assert.h>
33
34 void ir3_assert_handler(const char *expr, const char *file, int line,
35 const char *func) __attribute__((weak)) __attribute__ ((__noreturn__));
36
37 /* A wrapper for assert() that allows overriding handling of a failed
38 * assert. This is needed for tools like crashdec which can want to
39 * attempt to disassemble memory that might not actually be valid
40 * instructions.
41 */
42 #define ir3_assert(expr) do { \
43 if (!(expr)) { \
44 if (ir3_assert_handler) { \
45 ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
46 } \
47 assert(expr); \
48 } \
49 } while (0)
50 /* size of largest OPC field of all the instruction categories: */
51 #define NOPC_BITS 6
52
53 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
54
55 typedef enum {
56 /* category 0: */
57 OPC_NOP = _OPC(0, 0),
58 OPC_B = _OPC(0, 1),
59 OPC_JUMP = _OPC(0, 2),
60 OPC_CALL = _OPC(0, 3),
61 OPC_RET = _OPC(0, 4),
62 OPC_KILL = _OPC(0, 5),
63 OPC_END = _OPC(0, 6),
64 OPC_EMIT = _OPC(0, 7),
65 OPC_CUT = _OPC(0, 8),
66 OPC_CHMASK = _OPC(0, 9),
67 OPC_CHSH = _OPC(0, 10),
68 OPC_FLOW_REV = _OPC(0, 11),
69
70 OPC_BKT = _OPC(0, 16),
71 OPC_STKS = _OPC(0, 17),
72 OPC_STKR = _OPC(0, 18),
73 OPC_XSET = _OPC(0, 19),
74 OPC_XCLR = _OPC(0, 20),
75 OPC_GETONE = _OPC(0, 21),
76 OPC_DBG = _OPC(0, 22),
77 OPC_SHPS = _OPC(0, 23), /* shader prologue start */
78 OPC_SHPE = _OPC(0, 24), /* shader prologue end */
79
80 OPC_PREDT = _OPC(0, 29), /* predicated true */
81 OPC_PREDF = _OPC(0, 30), /* predicated false */
82 OPC_PREDE = _OPC(0, 31), /* predicated end */
83
84 /* category 1: */
85 OPC_MOV = _OPC(1, 0),
86
87 /* category 2: */
88 OPC_ADD_F = _OPC(2, 0),
89 OPC_MIN_F = _OPC(2, 1),
90 OPC_MAX_F = _OPC(2, 2),
91 OPC_MUL_F = _OPC(2, 3),
92 OPC_SIGN_F = _OPC(2, 4),
93 OPC_CMPS_F = _OPC(2, 5),
94 OPC_ABSNEG_F = _OPC(2, 6),
95 OPC_CMPV_F = _OPC(2, 7),
96 /* 8 - invalid */
97 OPC_FLOOR_F = _OPC(2, 9),
98 OPC_CEIL_F = _OPC(2, 10),
99 OPC_RNDNE_F = _OPC(2, 11),
100 OPC_RNDAZ_F = _OPC(2, 12),
101 OPC_TRUNC_F = _OPC(2, 13),
102 /* 14-15 - invalid */
103 OPC_ADD_U = _OPC(2, 16),
104 OPC_ADD_S = _OPC(2, 17),
105 OPC_SUB_U = _OPC(2, 18),
106 OPC_SUB_S = _OPC(2, 19),
107 OPC_CMPS_U = _OPC(2, 20),
108 OPC_CMPS_S = _OPC(2, 21),
109 OPC_MIN_U = _OPC(2, 22),
110 OPC_MIN_S = _OPC(2, 23),
111 OPC_MAX_U = _OPC(2, 24),
112 OPC_MAX_S = _OPC(2, 25),
113 OPC_ABSNEG_S = _OPC(2, 26),
114 /* 27 - invalid */
115 OPC_AND_B = _OPC(2, 28),
116 OPC_OR_B = _OPC(2, 29),
117 OPC_NOT_B = _OPC(2, 30),
118 OPC_XOR_B = _OPC(2, 31),
119 /* 32 - invalid */
120 OPC_CMPV_U = _OPC(2, 33),
121 OPC_CMPV_S = _OPC(2, 34),
122 /* 35-47 - invalid */
123 OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
124 OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
125 OPC_MULL_U = _OPC(2, 50),
126 OPC_BFREV_B = _OPC(2, 51),
127 OPC_CLZ_S = _OPC(2, 52),
128 OPC_CLZ_B = _OPC(2, 53),
129 OPC_SHL_B = _OPC(2, 54),
130 OPC_SHR_B = _OPC(2, 55),
131 OPC_ASHR_B = _OPC(2, 56),
132 OPC_BARY_F = _OPC(2, 57),
133 OPC_MGEN_B = _OPC(2, 58),
134 OPC_GETBIT_B = _OPC(2, 59),
135 OPC_SETRM = _OPC(2, 60),
136 OPC_CBITS_B = _OPC(2, 61),
137 OPC_SHB = _OPC(2, 62),
138 OPC_MSAD = _OPC(2, 63),
139
140 /* category 3: */
141 OPC_MAD_U16 = _OPC(3, 0),
142 OPC_MADSH_U16 = _OPC(3, 1),
143 OPC_MAD_S16 = _OPC(3, 2),
144 OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
145 OPC_MAD_U24 = _OPC(3, 4),
146 OPC_MAD_S24 = _OPC(3, 5),
147 OPC_MAD_F16 = _OPC(3, 6),
148 OPC_MAD_F32 = _OPC(3, 7),
149 OPC_SEL_B16 = _OPC(3, 8),
150 OPC_SEL_B32 = _OPC(3, 9),
151 OPC_SEL_S16 = _OPC(3, 10),
152 OPC_SEL_S32 = _OPC(3, 11),
153 OPC_SEL_F16 = _OPC(3, 12),
154 OPC_SEL_F32 = _OPC(3, 13),
155 OPC_SAD_S16 = _OPC(3, 14),
156 OPC_SAD_S32 = _OPC(3, 15),
157
158 /* category 4: */
159 OPC_RCP = _OPC(4, 0),
160 OPC_RSQ = _OPC(4, 1),
161 OPC_LOG2 = _OPC(4, 2),
162 OPC_EXP2 = _OPC(4, 3),
163 OPC_SIN = _OPC(4, 4),
164 OPC_COS = _OPC(4, 5),
165 OPC_SQRT = _OPC(4, 6),
166 /* NOTE that these are 8+opc from their highp equivs, so it's possible
167 * that the high order bit in the opc field has been repurposed for
168 * half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
169 * still use the same opc as highp
170 */
171 OPC_HRSQ = _OPC(4, 9),
172 OPC_HLOG2 = _OPC(4, 10),
173 OPC_HEXP2 = _OPC(4, 11),
174
175 /* category 5: */
176 OPC_ISAM = _OPC(5, 0),
177 OPC_ISAML = _OPC(5, 1),
178 OPC_ISAMM = _OPC(5, 2),
179 OPC_SAM = _OPC(5, 3),
180 OPC_SAMB = _OPC(5, 4),
181 OPC_SAML = _OPC(5, 5),
182 OPC_SAMGQ = _OPC(5, 6),
183 OPC_GETLOD = _OPC(5, 7),
184 OPC_CONV = _OPC(5, 8),
185 OPC_CONVM = _OPC(5, 9),
186 OPC_GETSIZE = _OPC(5, 10),
187 OPC_GETBUF = _OPC(5, 11),
188 OPC_GETPOS = _OPC(5, 12),
189 OPC_GETINFO = _OPC(5, 13),
190 OPC_DSX = _OPC(5, 14),
191 OPC_DSY = _OPC(5, 15),
192 OPC_GATHER4R = _OPC(5, 16),
193 OPC_GATHER4G = _OPC(5, 17),
194 OPC_GATHER4B = _OPC(5, 18),
195 OPC_GATHER4A = _OPC(5, 19),
196 OPC_SAMGP0 = _OPC(5, 20),
197 OPC_SAMGP1 = _OPC(5, 21),
198 OPC_SAMGP2 = _OPC(5, 22),
199 OPC_SAMGP3 = _OPC(5, 23),
200 OPC_DSXPP_1 = _OPC(5, 24),
201 OPC_DSYPP_1 = _OPC(5, 25),
202 OPC_RGETPOS = _OPC(5, 26),
203 OPC_RGETINFO = _OPC(5, 27),
204 /* cat5 meta instructions, placed above the cat5 opc field's size */
205 OPC_DSXPP_MACRO = _OPC(5, 32),
206 OPC_DSYPP_MACRO = _OPC(5, 33),
207
208 /* category 6: */
209 OPC_LDG = _OPC(6, 0), /* load-global */
210 OPC_LDL = _OPC(6, 1),
211 OPC_LDP = _OPC(6, 2),
212 OPC_STG = _OPC(6, 3), /* store-global */
213 OPC_STL = _OPC(6, 4),
214 OPC_STP = _OPC(6, 5),
215 OPC_LDIB = _OPC(6, 6),
216 OPC_G2L = _OPC(6, 7),
217 OPC_L2G = _OPC(6, 8),
218 OPC_PREFETCH = _OPC(6, 9),
219 OPC_LDLW = _OPC(6, 10),
220 OPC_STLW = _OPC(6, 11),
221 OPC_RESFMT = _OPC(6, 14),
222 OPC_RESINFO = _OPC(6, 15),
223 OPC_ATOMIC_ADD = _OPC(6, 16),
224 OPC_ATOMIC_SUB = _OPC(6, 17),
225 OPC_ATOMIC_XCHG = _OPC(6, 18),
226 OPC_ATOMIC_INC = _OPC(6, 19),
227 OPC_ATOMIC_DEC = _OPC(6, 20),
228 OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
229 OPC_ATOMIC_MIN = _OPC(6, 22),
230 OPC_ATOMIC_MAX = _OPC(6, 23),
231 OPC_ATOMIC_AND = _OPC(6, 24),
232 OPC_ATOMIC_OR = _OPC(6, 25),
233 OPC_ATOMIC_XOR = _OPC(6, 26),
234 OPC_LDGB = _OPC(6, 27),
235 OPC_STGB = _OPC(6, 28),
236 OPC_STIB = _OPC(6, 29),
237 OPC_LDC = _OPC(6, 30),
238 OPC_LDLV = _OPC(6, 31),
239
240 /* category 7: */
241 OPC_BAR = _OPC(7, 0),
242 OPC_FENCE = _OPC(7, 1),
243
244 /* meta instructions (category -1): */
245 /* placeholder instr to mark shader inputs: */
246 OPC_META_INPUT = _OPC(-1, 0),
247 /* The "collect" and "split" instructions are used for keeping
248 * track of instructions that write to multiple dst registers
249 * (split) like texture sample instructions, or read multiple
250 * consecutive scalar registers (collect) (bary.f, texture samp)
251 *
252 * A "split" extracts a scalar component from a vecN, and a
253 * "collect" gathers multiple scalar components into a vecN
254 */
255 OPC_META_SPLIT = _OPC(-1, 2),
256 OPC_META_COLLECT = _OPC(-1, 3),
257
258 /* placeholder for texture fetches that run before FS invocation
259 * starts:
260 */
261 OPC_META_TEX_PREFETCH = _OPC(-1, 4),
262
263 } opc_t;
264
265 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
266 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
267
268 const char *disasm_a3xx_instr_name(opc_t opc);
269
270 typedef enum {
271 TYPE_F16 = 0,
272 TYPE_F32 = 1,
273 TYPE_U16 = 2,
274 TYPE_U32 = 3,
275 TYPE_S16 = 4,
276 TYPE_S32 = 5,
277 TYPE_U8 = 6,
278 TYPE_S8 = 7, // XXX I assume?
279 } type_t;
280
type_size(type_t type)281 static inline uint32_t type_size(type_t type)
282 {
283 switch (type) {
284 case TYPE_F32:
285 case TYPE_U32:
286 case TYPE_S32:
287 return 32;
288 case TYPE_F16:
289 case TYPE_U16:
290 case TYPE_S16:
291 return 16;
292 case TYPE_U8:
293 case TYPE_S8:
294 return 8;
295 default:
296 ir3_assert(0); /* invalid type */
297 return 0;
298 }
299 }
300
type_float(type_t type)301 static inline int type_float(type_t type)
302 {
303 return (type == TYPE_F32) || (type == TYPE_F16);
304 }
305
type_uint(type_t type)306 static inline int type_uint(type_t type)
307 {
308 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
309 }
310
type_sint(type_t type)311 static inline int type_sint(type_t type)
312 {
313 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
314 }
315
316 typedef union PACKED {
317 /* normal gpr or const src register: */
318 struct PACKED {
319 uint32_t comp : 2;
320 uint32_t num : 10;
321 };
322 /* for immediate val: */
323 int32_t iim_val : 11;
324 /* to make compiler happy: */
325 uint32_t dummy32;
326 uint32_t dummy10 : 10;
327 int32_t idummy10 : 10;
328 uint32_t dummy11 : 11;
329 uint32_t dummy12 : 12;
330 uint32_t dummy13 : 13;
331 uint32_t dummy8 : 8;
332 int32_t idummy13 : 13;
333 int32_t idummy8 : 8;
334 } reg_t;
335
336 /* comp:
337 * 0 - x
338 * 1 - y
339 * 2 - z
340 * 3 - w
341 */
regid(int num,int comp)342 static inline uint32_t regid(int num, int comp)
343 {
344 return (num << 2) | (comp & 0x3);
345 }
346
347 #define INVALID_REG regid(63, 0)
348 #define VALIDREG(r) ((r) != INVALID_REG)
349 #define CONDREG(r, val) COND(VALIDREG(r), (val))
350
351 /* special registers: */
352 #define REG_A0 61 /* address register */
353 #define REG_P0 62 /* predicate register */
354
reg_special(reg_t reg)355 static inline int reg_special(reg_t reg)
356 {
357 return (reg.num == REG_A0) || (reg.num == REG_P0);
358 }
359
360 typedef enum {
361 BRANCH_PLAIN = 0, /* br */
362 BRANCH_OR = 1, /* brao */
363 BRANCH_AND = 2, /* braa */
364 BRANCH_CONST = 3, /* brac */
365 BRANCH_ANY = 4, /* bany */
366 BRANCH_ALL = 5, /* ball */
367 BRANCH_X = 6, /* brax ??? */
368 } brtype_t;
369
370 typedef struct PACKED {
371 /* dword0: */
372 union PACKED {
373 struct PACKED {
374 int16_t immed : 16;
375 uint32_t dummy1 : 16;
376 } a3xx;
377 struct PACKED {
378 int32_t immed : 20;
379 uint32_t dummy1 : 12;
380 } a4xx;
381 struct PACKED {
382 int32_t immed : 32;
383 } a5xx;
384 };
385
386 /* dword1: */
387 uint32_t idx : 5; /* brac.N index */
388 uint32_t brtype : 3; /* branch type, see brtype_t */
389 uint32_t repeat : 3;
390 uint32_t dummy3 : 1;
391 uint32_t ss : 1;
392 uint32_t inv1 : 1;
393 uint32_t comp1 : 2;
394 uint32_t eq : 1;
395 uint32_t opc_hi : 1; /* at least one bit */
396 uint32_t dummy4 : 2;
397 uint32_t inv0 : 1;
398 uint32_t comp0 : 2; /* component for first src */
399 uint32_t opc : 4;
400 uint32_t jmp_tgt : 1;
401 uint32_t sync : 1;
402 uint32_t opc_cat : 3;
403 } instr_cat0_t;
404
405 typedef struct PACKED {
406 /* dword0: */
407 union PACKED {
408 /* for normal src register: */
409 struct PACKED {
410 uint32_t src : 11;
411 /* at least low bit of pad must be zero or it will
412 * look like a address relative src
413 */
414 uint32_t pad : 21;
415 };
416 /* for address relative: */
417 struct PACKED {
418 int32_t off : 10;
419 uint32_t src_rel_c : 1;
420 uint32_t src_rel : 1;
421 uint32_t unknown : 20;
422 };
423 /* for immediate: */
424 int32_t iim_val;
425 uint32_t uim_val;
426 float fim_val;
427 };
428
429 /* dword1: */
430 uint32_t dst : 8;
431 uint32_t repeat : 3;
432 uint32_t src_r : 1;
433 uint32_t ss : 1;
434 uint32_t ul : 1;
435 uint32_t dst_type : 3;
436 uint32_t dst_rel : 1;
437 uint32_t src_type : 3;
438 uint32_t src_c : 1;
439 uint32_t src_im : 1;
440 uint32_t even : 1;
441 uint32_t pos_inf : 1;
442 uint32_t must_be_0 : 2;
443 uint32_t jmp_tgt : 1;
444 uint32_t sync : 1;
445 uint32_t opc_cat : 3;
446 } instr_cat1_t;
447
448 typedef struct PACKED {
449 /* dword0: */
450 union PACKED {
451 struct PACKED {
452 uint32_t src1 : 11;
453 uint32_t must_be_zero1: 2;
454 uint32_t src1_im : 1; /* immediate */
455 uint32_t src1_neg : 1; /* negate */
456 uint32_t src1_abs : 1; /* absolute value */
457 };
458 struct PACKED {
459 uint32_t src1 : 10;
460 uint32_t src1_c : 1; /* relative-const */
461 uint32_t src1_rel : 1; /* relative address */
462 uint32_t must_be_zero : 1;
463 uint32_t dummy : 3;
464 } rel1;
465 struct PACKED {
466 uint32_t src1 : 12;
467 uint32_t src1_c : 1; /* const */
468 uint32_t dummy : 3;
469 } c1;
470 };
471
472 union PACKED {
473 struct PACKED {
474 uint32_t src2 : 11;
475 uint32_t must_be_zero2: 2;
476 uint32_t src2_im : 1; /* immediate */
477 uint32_t src2_neg : 1; /* negate */
478 uint32_t src2_abs : 1; /* absolute value */
479 };
480 struct PACKED {
481 uint32_t src2 : 10;
482 uint32_t src2_c : 1; /* relative-const */
483 uint32_t src2_rel : 1; /* relative address */
484 uint32_t must_be_zero : 1;
485 uint32_t dummy : 3;
486 } rel2;
487 struct PACKED {
488 uint32_t src2 : 12;
489 uint32_t src2_c : 1; /* const */
490 uint32_t dummy : 3;
491 } c2;
492 };
493
494 /* dword1: */
495 uint32_t dst : 8;
496 uint32_t repeat : 2;
497 uint32_t sat : 1;
498 uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
499 uint32_t ss : 1;
500 uint32_t ul : 1; /* dunno */
501 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
502 uint32_t ei : 1;
503 uint32_t cond : 3;
504 uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
505 uint32_t full : 1; /* not half */
506 uint32_t opc : 6;
507 uint32_t jmp_tgt : 1;
508 uint32_t sync : 1;
509 uint32_t opc_cat : 3;
510 } instr_cat2_t;
511
512 typedef struct PACKED {
513 /* dword0: */
514 union PACKED {
515 struct PACKED {
516 uint32_t src1 : 11;
517 uint32_t must_be_zero1: 2;
518 uint32_t src2_c : 1;
519 uint32_t src1_neg : 1;
520 uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
521 };
522 struct PACKED {
523 uint32_t src1 : 10;
524 uint32_t src1_c : 1;
525 uint32_t src1_rel : 1;
526 uint32_t must_be_zero : 1;
527 uint32_t dummy : 3;
528 } rel1;
529 struct PACKED {
530 uint32_t src1 : 12;
531 uint32_t src1_c : 1;
532 uint32_t dummy : 3;
533 } c1;
534 };
535
536 union PACKED {
537 struct PACKED {
538 uint32_t src3 : 11;
539 uint32_t must_be_zero2: 2;
540 uint32_t src3_r : 1;
541 uint32_t src2_neg : 1;
542 uint32_t src3_neg : 1;
543 };
544 struct PACKED {
545 uint32_t src3 : 10;
546 uint32_t src3_c : 1;
547 uint32_t src3_rel : 1;
548 uint32_t must_be_zero : 1;
549 uint32_t dummy : 3;
550 } rel2;
551 struct PACKED {
552 uint32_t src3 : 12;
553 uint32_t src3_c : 1;
554 uint32_t dummy : 3;
555 } c2;
556 };
557
558 /* dword1: */
559 uint32_t dst : 8;
560 uint32_t repeat : 2;
561 uint32_t sat : 1;
562 uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
563 uint32_t ss : 1;
564 uint32_t ul : 1;
565 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
566 uint32_t src2 : 8;
567 uint32_t opc : 4;
568 uint32_t jmp_tgt : 1;
569 uint32_t sync : 1;
570 uint32_t opc_cat : 3;
571 } instr_cat3_t;
572
instr_cat3_full(instr_cat3_t * cat3)573 static inline bool instr_cat3_full(instr_cat3_t *cat3)
574 {
575 switch (_OPC(3, cat3->opc)) {
576 case OPC_MAD_F16:
577 case OPC_MAD_U16:
578 case OPC_MAD_S16:
579 case OPC_SEL_B16:
580 case OPC_SEL_S16:
581 case OPC_SEL_F16:
582 case OPC_SAD_S16:
583 case OPC_SAD_S32: // really??
584 return false;
585 default:
586 return true;
587 }
588 }
589
590 typedef struct PACKED {
591 /* dword0: */
592 union PACKED {
593 struct PACKED {
594 uint32_t src : 11;
595 uint32_t must_be_zero1: 2;
596 uint32_t src_im : 1; /* immediate */
597 uint32_t src_neg : 1; /* negate */
598 uint32_t src_abs : 1; /* absolute value */
599 };
600 struct PACKED {
601 uint32_t src : 10;
602 uint32_t src_c : 1; /* relative-const */
603 uint32_t src_rel : 1; /* relative address */
604 uint32_t must_be_zero : 1;
605 uint32_t dummy : 3;
606 } rel;
607 struct PACKED {
608 uint32_t src : 12;
609 uint32_t src_c : 1; /* const */
610 uint32_t dummy : 3;
611 } c;
612 };
613 uint32_t dummy1 : 16; /* seem to be ignored */
614
615 /* dword1: */
616 uint32_t dst : 8;
617 uint32_t repeat : 2;
618 uint32_t sat : 1;
619 uint32_t src_r : 1;
620 uint32_t ss : 1;
621 uint32_t ul : 1;
622 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
623 uint32_t dummy2 : 5; /* seem to be ignored */
624 uint32_t full : 1; /* not half */
625 uint32_t opc : 6;
626 uint32_t jmp_tgt : 1;
627 uint32_t sync : 1;
628 uint32_t opc_cat : 3;
629 } instr_cat4_t;
630
631 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
632 * if so, how to get the (base, index) pair for both sampler and texture.
633 * There is a single base embedded in the instruction, which is always used
634 * for the texture.
635 */
636 typedef enum {
637 /* Use traditional GL binding model, get texture and sampler index
638 * from src3 which is not presumed to be uniform. This is
639 * backwards-compatible with earlier generations, where this field was
640 * always 0 and nonuniform-indexed sampling always worked.
641 */
642 CAT5_NONUNIFORM = 0,
643
644 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
645 * and texture index come from src3 which is presumed to be uniform.
646 */
647 CAT5_BINDLESS_A1_UNIFORM = 1,
648
649 /* The texture and sampler share the same base, and the sampler and
650 * texture index come from src3 which is *not* presumed to be uniform.
651 */
652 CAT5_BINDLESS_NONUNIFORM = 2,
653
654 /* The sampler base comes from the low 3 bits of a1.x, and the sampler
655 * and texture index come from src3 which is *not* presumed to be
656 * uniform.
657 */
658 CAT5_BINDLESS_A1_NONUNIFORM = 3,
659
660 /* Use traditional GL binding model, get texture and sampler index
661 * from src3 which is presumed to be uniform.
662 */
663 CAT5_UNIFORM = 4,
664
665 /* The texture and sampler share the same base, and the sampler and
666 * texture index come from src3 which is presumed to be uniform.
667 */
668 CAT5_BINDLESS_UNIFORM = 5,
669
670 /* The texture and sampler share the same base, get sampler index from low
671 * 4 bits of src3 and texture index from high 4 bits.
672 */
673 CAT5_BINDLESS_IMM = 6,
674
675 /* The sampler base comes from the low 3 bits of a1.x, and the texture
676 * index comes from the next 8 bits of a1.x. The sampler index is an
677 * immediate in src3.
678 */
679 CAT5_BINDLESS_A1_IMM = 7,
680 } cat5_desc_mode_t;
681
682 typedef struct PACKED {
683 /* dword0: */
684 union PACKED {
685 /* normal case: */
686 struct PACKED {
687 uint32_t full : 1; /* not half */
688 uint32_t src1 : 8;
689 uint32_t src2 : 8;
690 uint32_t dummy1 : 4; /* seem to be ignored */
691 uint32_t samp : 4;
692 uint32_t tex : 7;
693 } norm;
694 /* s2en case: */
695 struct PACKED {
696 uint32_t full : 1; /* not half */
697 uint32_t src1 : 8;
698 uint32_t src2 : 8;
699 uint32_t dummy1 : 2;
700 uint32_t base_hi : 2;
701 uint32_t src3 : 8;
702 uint32_t desc_mode : 3;
703 } s2en_bindless;
704 /* same in either case: */
705 // XXX I think, confirm this
706 struct PACKED {
707 uint32_t full : 1; /* not half */
708 uint32_t src1 : 8;
709 uint32_t src2 : 8;
710 uint32_t pad : 15;
711 };
712 };
713
714 /* dword1: */
715 uint32_t dst : 8;
716 uint32_t wrmask : 4; /* write-mask */
717 uint32_t type : 3;
718 uint32_t base_lo : 1; /* used with bindless */
719 uint32_t is_3d : 1;
720
721 uint32_t is_a : 1;
722 uint32_t is_s : 1;
723 uint32_t is_s2en_bindless : 1;
724 uint32_t is_o : 1;
725 uint32_t is_p : 1;
726
727 uint32_t opc : 5;
728 uint32_t jmp_tgt : 1;
729 uint32_t sync : 1;
730 uint32_t opc_cat : 3;
731 } instr_cat5_t;
732
733 /* dword0 encoding for src_off: [src1 + off], src2: */
734 typedef struct PACKED {
735 /* dword0: */
736 uint32_t mustbe1 : 1;
737 int32_t off : 13;
738 uint32_t src1 : 8;
739 uint32_t src1_im : 1;
740 uint32_t src2_im : 1;
741 uint32_t src2 : 8;
742
743 /* dword1: */
744 uint32_t dword1;
745 } instr_cat6a_t;
746
747 /* dword0 encoding for !src_off: [src1], src2 */
748 typedef struct PACKED {
749 /* dword0: */
750 uint32_t mustbe0 : 1;
751 uint32_t src1 : 8;
752 uint32_t pad : 5;
753 uint32_t ignore0 : 8;
754 uint32_t src1_im : 1;
755 uint32_t src2_im : 1;
756 uint32_t src2 : 8;
757
758 /* dword1: */
759 uint32_t dword1;
760 } instr_cat6b_t;
761
762 /* dword1 encoding for dst_off: */
763 typedef struct PACKED {
764 /* dword0: */
765 uint32_t dw0_pad1 : 9;
766 int32_t off_high : 5;
767 uint32_t dw0_pad2 : 18;
768
769 uint32_t off : 8;
770 uint32_t mustbe1 : 1;
771 uint32_t dst : 8;
772 uint32_t pad1 : 15;
773 } instr_cat6c_t;
774
775 /* dword1 encoding for !dst_off: */
776 typedef struct PACKED {
777 /* dword0: */
778 uint32_t dword0;
779
780 uint32_t dst : 8;
781 uint32_t mustbe0 : 1;
782 uint32_t idx : 8;
783 uint32_t pad0 : 15;
784 } instr_cat6d_t;
785
786 /* ldgb and atomics..
787 *
788 * ldgb: pad0=0, pad3=1
789 * atomic .g: pad0=1, pad3=1
790 * .l: pad0=1, pad3=0
791 */
792 typedef struct PACKED {
793 /* dword0: */
794 uint32_t pad0 : 1;
795 uint32_t src3 : 8;
796 uint32_t d : 2;
797 uint32_t typed : 1;
798 uint32_t type_size : 2;
799 uint32_t src1 : 8;
800 uint32_t src1_im : 1;
801 uint32_t src2_im : 1;
802 uint32_t src2 : 8;
803
804 /* dword1: */
805 uint32_t dst : 8;
806 uint32_t mustbe0 : 1;
807 uint32_t src_ssbo : 8;
808 uint32_t pad2 : 3; // type
809 uint32_t g : 1;
810 uint32_t src_ssbo_im : 1;
811 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
812 } instr_cat6ldgb_t;
813
814 /* stgb, pad0=0, pad3=2
815 */
816 typedef struct PACKED {
817 /* dword0: */
818 uint32_t mustbe1 : 1; // ???
819 uint32_t src1 : 8;
820 uint32_t d : 2;
821 uint32_t typed : 1;
822 uint32_t type_size : 2;
823 uint32_t pad0 : 9;
824 uint32_t src2_im : 1;
825 uint32_t src2 : 8;
826
827 /* dword1: */
828 uint32_t src3 : 8;
829 uint32_t src3_im : 1;
830 uint32_t dst_ssbo : 8;
831 uint32_t pad2 : 3; // type
832 uint32_t pad3 : 2;
833 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
834 } instr_cat6stgb_t;
835
836 typedef union PACKED {
837 instr_cat6a_t a;
838 instr_cat6b_t b;
839 instr_cat6c_t c;
840 instr_cat6d_t d;
841 instr_cat6ldgb_t ldgb;
842 instr_cat6stgb_t stgb;
843 struct PACKED {
844 /* dword0: */
845 uint32_t src_off : 1;
846 uint32_t pad1 : 31;
847
848 /* dword1: */
849 uint32_t pad2 : 8;
850 uint32_t dst_off : 1;
851 uint32_t pad3 : 8;
852 uint32_t type : 3;
853 uint32_t g : 1; /* or in some cases it means dst immed */
854 uint32_t pad4 : 1;
855 uint32_t opc : 5;
856 uint32_t jmp_tgt : 1;
857 uint32_t sync : 1;
858 uint32_t opc_cat : 3;
859 };
860 } instr_cat6_t;
861
862 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
863 */
864 typedef enum {
865 /* Use old GL binding model with an immediate index. */
866 CAT6_IMM = 0,
867
868 CAT6_UNIFORM = 1,
869
870 CAT6_NONUNIFORM = 2,
871
872 /* Use the bindless model, with an immediate index.
873 */
874 CAT6_BINDLESS_IMM = 4,
875
876 /* Use the bindless model, with a uniform register index.
877 */
878 CAT6_BINDLESS_UNIFORM = 5,
879
880 /* Use the bindless model, with a register index that isn't guaranteed
881 * to be uniform. This presumably checks if the indices are equal and
882 * splits up the load/store, because it works the way you would
883 * expect.
884 */
885 CAT6_BINDLESS_NONUNIFORM = 6,
886 } cat6_desc_mode_t;
887
888 /**
889 * For atomic ops (which return a value):
890 *
891 * pad1=1, pad3=c, pad5=3
892 * src1 - vecN offset/coords
893 * src2.x - is actually dest register
894 * src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
895 * and src2.z is 'data'
896 *
897 * For stib (which does not return a value):
898 * pad1=0, pad3=c, pad5=2
899 * src1 - vecN offset/coords
900 * src2 - value to store
901 *
902 * For ldib:
903 * pad1=1, pad3=c, pad5=2
904 * src1 - vecN offset/coords
905 *
906 * for ldc (load from UBO using descriptor):
907 * pad1=0, pad3=8, pad5=2
908 *
909 * pad2 and pad5 are only observed to be 0.
910 */
911 typedef struct PACKED {
912 /* dword0: */
913 uint32_t pad1 : 1;
914 uint32_t base : 3;
915 uint32_t pad2 : 2;
916 uint32_t desc_mode : 3;
917 uint32_t d : 2;
918 uint32_t typed : 1;
919 uint32_t type_size : 2;
920 uint32_t opc : 5;
921 uint32_t pad3 : 5;
922 uint32_t src1 : 8; /* coordinate/offset */
923
924 /* dword1: */
925 uint32_t src2 : 8; /* or the dst for load instructions */
926 uint32_t pad4 : 1; //mustbe0 ??
927 uint32_t ssbo : 8; /* ssbo/image binding point */
928 uint32_t type : 3;
929 uint32_t pad5 : 7;
930 uint32_t jmp_tgt : 1;
931 uint32_t sync : 1;
932 uint32_t opc_cat : 3;
933 } instr_cat6_a6xx_t;
934
935 typedef struct PACKED {
936 /* dword0: */
937 uint32_t pad1 : 32;
938
939 /* dword1: */
940 uint32_t pad2 : 12;
941 uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */
942 uint32_t pad3 : 6;
943 uint32_t w : 1; /* write */
944 uint32_t r : 1; /* read */
945 uint32_t l : 1; /* local */
946 uint32_t g : 1; /* global */
947 uint32_t opc : 4; /* presumed, but only a couple known OPCs */
948 uint32_t jmp_tgt : 1; /* (jp) */
949 uint32_t sync : 1; /* (sy) */
950 uint32_t opc_cat : 3;
951 } instr_cat7_t;
952
953 typedef union PACKED {
954 instr_cat0_t cat0;
955 instr_cat1_t cat1;
956 instr_cat2_t cat2;
957 instr_cat3_t cat3;
958 instr_cat4_t cat4;
959 instr_cat5_t cat5;
960 instr_cat6_t cat6;
961 instr_cat6_a6xx_t cat6_a6xx;
962 instr_cat7_t cat7;
963 struct PACKED {
964 /* dword0: */
965 uint32_t pad1 : 32;
966
967 /* dword1: */
968 uint32_t pad2 : 12;
969 uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */
970 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
971 uint32_t pad3 : 13;
972 uint32_t jmp_tgt : 1;
973 uint32_t sync : 1;
974 uint32_t opc_cat : 3;
975
976 };
977 } instr_t;
978
instr_repeat(instr_t * instr)979 static inline uint32_t instr_repeat(instr_t *instr)
980 {
981 switch (instr->opc_cat) {
982 case 0: return instr->cat0.repeat;
983 case 1: return instr->cat1.repeat;
984 case 2: return instr->cat2.repeat;
985 case 3: return instr->cat3.repeat;
986 case 4: return instr->cat4.repeat;
987 default: return 0;
988 }
989 }
990
instr_sat(instr_t * instr)991 static inline bool instr_sat(instr_t *instr)
992 {
993 switch (instr->opc_cat) {
994 case 2: return instr->cat2.sat;
995 case 3: return instr->cat3.sat;
996 case 4: return instr->cat4.sat;
997 default: return false;
998 }
999 }
1000
1001 /* We can probably drop the gpu_id arg, but keeping it for now so we can
1002 * assert if we see something we think should be new encoding on an older
1003 * gpu.
1004 */
is_cat6_legacy(instr_t * instr,unsigned gpu_id)1005 static inline bool is_cat6_legacy(instr_t *instr, unsigned gpu_id)
1006 {
1007 instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
1008
1009 /* At least one of these two bits is pad in all the possible
1010 * "legacy" cat6 encodings, and a analysis of all the pre-a6xx
1011 * cmdstream traces I have indicates that the pad bit is zero
1012 * in all cases. So we can use this to detect new encoding:
1013 */
1014 if ((cat6->pad3 & 0x8) && (cat6->pad5 & 0x2)) {
1015 ir3_assert(gpu_id >= 600);
1016 ir3_assert(instr->cat6.opc == 0);
1017 return false;
1018 }
1019
1020 return true;
1021 }
1022
instr_opc(instr_t * instr,unsigned gpu_id)1023 static inline uint32_t instr_opc(instr_t *instr, unsigned gpu_id)
1024 {
1025 switch (instr->opc_cat) {
1026 case 0: return instr->cat0.opc | instr->cat0.opc_hi << 4;
1027 case 1: return 0;
1028 case 2: return instr->cat2.opc;
1029 case 3: return instr->cat3.opc;
1030 case 4: return instr->cat4.opc;
1031 case 5: return instr->cat5.opc;
1032 case 6:
1033 if (!is_cat6_legacy(instr, gpu_id))
1034 return instr->cat6_a6xx.opc;
1035 return instr->cat6.opc;
1036 case 7: return instr->cat7.opc;
1037 default: return 0;
1038 }
1039 }
1040
is_mad(opc_t opc)1041 static inline bool is_mad(opc_t opc)
1042 {
1043 switch (opc) {
1044 case OPC_MAD_U16:
1045 case OPC_MAD_S16:
1046 case OPC_MAD_U24:
1047 case OPC_MAD_S24:
1048 case OPC_MAD_F16:
1049 case OPC_MAD_F32:
1050 return true;
1051 default:
1052 return false;
1053 }
1054 }
1055
is_madsh(opc_t opc)1056 static inline bool is_madsh(opc_t opc)
1057 {
1058 switch (opc) {
1059 case OPC_MADSH_U16:
1060 case OPC_MADSH_M16:
1061 return true;
1062 default:
1063 return false;
1064 }
1065 }
1066
is_atomic(opc_t opc)1067 static inline bool is_atomic(opc_t opc)
1068 {
1069 switch (opc) {
1070 case OPC_ATOMIC_ADD:
1071 case OPC_ATOMIC_SUB:
1072 case OPC_ATOMIC_XCHG:
1073 case OPC_ATOMIC_INC:
1074 case OPC_ATOMIC_DEC:
1075 case OPC_ATOMIC_CMPXCHG:
1076 case OPC_ATOMIC_MIN:
1077 case OPC_ATOMIC_MAX:
1078 case OPC_ATOMIC_AND:
1079 case OPC_ATOMIC_OR:
1080 case OPC_ATOMIC_XOR:
1081 return true;
1082 default:
1083 return false;
1084 }
1085 }
1086
is_ssbo(opc_t opc)1087 static inline bool is_ssbo(opc_t opc)
1088 {
1089 switch (opc) {
1090 case OPC_RESFMT:
1091 case OPC_RESINFO:
1092 case OPC_LDGB:
1093 case OPC_STGB:
1094 case OPC_STIB:
1095 return true;
1096 default:
1097 return false;
1098 }
1099 }
1100
is_isam(opc_t opc)1101 static inline bool is_isam(opc_t opc)
1102 {
1103 switch (opc) {
1104 case OPC_ISAM:
1105 case OPC_ISAML:
1106 case OPC_ISAMM:
1107 return true;
1108 default:
1109 return false;
1110 }
1111 }
1112
1113
is_cat2_float(opc_t opc)1114 static inline bool is_cat2_float(opc_t opc)
1115 {
1116 switch (opc) {
1117 case OPC_ADD_F:
1118 case OPC_MIN_F:
1119 case OPC_MAX_F:
1120 case OPC_MUL_F:
1121 case OPC_SIGN_F:
1122 case OPC_CMPS_F:
1123 case OPC_ABSNEG_F:
1124 case OPC_CMPV_F:
1125 case OPC_FLOOR_F:
1126 case OPC_CEIL_F:
1127 case OPC_RNDNE_F:
1128 case OPC_RNDAZ_F:
1129 case OPC_TRUNC_F:
1130 return true;
1131
1132 default:
1133 return false;
1134 }
1135 }
1136
is_cat3_float(opc_t opc)1137 static inline bool is_cat3_float(opc_t opc)
1138 {
1139 switch (opc) {
1140 case OPC_MAD_F16:
1141 case OPC_MAD_F32:
1142 case OPC_SEL_F16:
1143 case OPC_SEL_F32:
1144 return true;
1145 default:
1146 return false;
1147 }
1148 }
1149
1150 #endif /* INSTR_A3XX_H_ */
1151