• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_compiler.h"
7 #include "agx_opcodes.h"
8 
9 /* Binary patches needed for branch offsets */
10 struct agx_branch_fixup {
11    /* Offset into the binary to patch */
12    off_t offset;
13 
14    /* Value to patch with will be block->offset */
15    agx_block *block;
16 
17    /* If true, skips to the last instruction of the target block */
18    bool skip_to_end;
19 };
20 
21 static void
pack_assert_internal(const agx_instr * I,bool condition,const char * msg)22 pack_assert_internal(const agx_instr *I, bool condition, const char *msg)
23 {
24    if (!condition) {
25       fprintf(stderr, "Packing assertion failed for instruction:\n\n");
26       agx_print_instr(I, stderr);
27       fprintf(stderr, "\n%s\n", msg);
28       abort();
29    }
30 }
31 
32 #define pack_assert_msg(I, cond, msg)                                          \
33    pack_assert_internal(I, cond, msg ": " #cond)
34 
35 #define pack_assert(I, cond) pack_assert_internal(I, cond, #cond)
36 
37 static void
assert_register_is_aligned(const agx_instr * I,agx_index reg)38 assert_register_is_aligned(const agx_instr *I, agx_index reg)
39 {
40    pack_assert_msg(I, reg.type == AGX_INDEX_REGISTER, "expecting a register");
41 
42    switch (reg.size) {
43    case AGX_SIZE_16:
44       return;
45    case AGX_SIZE_32:
46       pack_assert_msg(I, (reg.value & 1) == 0, "unaligned reg");
47       return;
48    case AGX_SIZE_64:
49       pack_assert_msg(I, (reg.value & 3) == 0, "unaligned reg");
50       return;
51    }
52 
53    unreachable("Invalid register size");
54 }
55 
56 /* Texturing has its own operands */
57 static unsigned
agx_pack_sample_coords(const agx_instr * I,agx_index index,bool * flag,bool * is_16)58 agx_pack_sample_coords(const agx_instr *I, agx_index index, bool *flag,
59                        bool *is_16)
60 {
61    /* TODO: Do we have a use case for 16-bit coords? */
62    pack_assert_msg(I, index.size == AGX_SIZE_32, "32-bit coordinates");
63    pack_assert_msg(I, index.value < 0x100, "coordinate register bound");
64 
65    *is_16 = false;
66    *flag = index.discard;
67    return index.value;
68 }
69 
70 static unsigned
agx_pack_texture(const agx_instr * I,agx_index base,agx_index index,unsigned * packed_base,unsigned * flag)71 agx_pack_texture(const agx_instr *I, agx_index base, agx_index index,
72                  unsigned *packed_base, unsigned *flag)
73 {
74    if (base.type == AGX_INDEX_IMMEDIATE) {
75       pack_assert(I, base.value == 0);
76 
77       /* Texture state registers */
78       *packed_base = 0;
79 
80       if (index.type == AGX_INDEX_REGISTER) {
81          pack_assert(I, index.size == AGX_SIZE_16);
82          *flag = 1;
83       } else {
84          pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
85          *flag = 0;
86       }
87    } else {
88       pack_assert(I, base.type == AGX_INDEX_UNIFORM);
89       pack_assert(I, base.size == AGX_SIZE_64);
90       pack_assert(I, (base.value & 3) == 0);
91       pack_assert(I, index.size == AGX_SIZE_32);
92 
93       /* Bindless */
94       *packed_base = base.value >> 2;
95       *flag = 3;
96    }
97 
98    return index.value;
99 }
100 
101 static unsigned
agx_pack_sampler(const agx_instr * I,agx_index index,bool * flag)102 agx_pack_sampler(const agx_instr *I, agx_index index, bool *flag)
103 {
104    if (index.type == AGX_INDEX_REGISTER) {
105       pack_assert(I, index.size == AGX_SIZE_16);
106       *flag = 1;
107    } else {
108       pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
109       *flag = 0;
110    }
111 
112    return index.value;
113 }
114 
115 static unsigned
agx_pack_sample_compare_offset(const agx_instr * I,agx_index index)116 agx_pack_sample_compare_offset(const agx_instr *I, agx_index index)
117 {
118    if (index.type == AGX_INDEX_NULL)
119       return 0;
120 
121    pack_assert(I, index.size == AGX_SIZE_32);
122    pack_assert(I, index.value < 0x100);
123    assert_register_is_aligned(I, index);
124    return index.value;
125 }
126 
127 static unsigned
agx_pack_lod(const agx_instr * I,agx_index index,unsigned * lod_mode)128 agx_pack_lod(const agx_instr *I, agx_index index, unsigned *lod_mode)
129 {
130    /* For automatic LOD, the LOD field is unused. Assert as much. */
131    if ((*lod_mode) == AGX_LOD_MODE_AUTO_LOD) {
132       pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
133       pack_assert(I, index.value == 0);
134       return 0;
135    }
136 
137    if (index.type == AGX_INDEX_UNIFORM) {
138       /* Translate LOD mode from register mode to uniform mode */
139       pack_assert(I,
140                   ((*lod_mode) & BITFIELD_BIT(2)) && "must start as reg mode");
141       *lod_mode = (*lod_mode) & ~BITFIELD_BIT(2);
142       pack_assert(I, index.value < 0x200);
143    } else {
144       /* Otherwise must be registers */
145       pack_assert(I, index.type == AGX_INDEX_REGISTER);
146       pack_assert(I, index.value < 0x100);
147    }
148 
149    return index.value;
150 }
151 
152 static unsigned
agx_pack_pbe_source(const agx_instr * I,agx_index index,bool * flag)153 agx_pack_pbe_source(const agx_instr *I, agx_index index, bool *flag)
154 {
155    pack_assert(I, index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32);
156    assert_register_is_aligned(I, index);
157 
158    *flag = (index.size == AGX_SIZE_32);
159    return index.value;
160 }
161 
162 static unsigned
agx_pack_pbe_lod(const agx_instr * I,agx_index index,bool * flag)163 agx_pack_pbe_lod(const agx_instr *I, agx_index index, bool *flag)
164 {
165    pack_assert(I, index.size == AGX_SIZE_16);
166 
167    if (index.type == AGX_INDEX_IMMEDIATE)
168       *flag = true;
169    else if (index.type == AGX_INDEX_REGISTER)
170       *flag = false;
171    else
172       unreachable("Invalid PBE LOD type");
173 
174    return index.value;
175 }
176 
177 /* Load/stores have their own operands */
178 
179 static unsigned
agx_pack_memory_reg(const agx_instr * I,agx_index index,bool * flag)180 agx_pack_memory_reg(const agx_instr *I, agx_index index, bool *flag)
181 {
182    assert_register_is_aligned(I, index);
183 
184    *flag = (index.size >= AGX_SIZE_32);
185    return index.value;
186 }
187 
188 static unsigned
agx_pack_memory_base(const agx_instr * I,agx_index index,bool * flag)189 agx_pack_memory_base(const agx_instr *I, agx_index index, bool *flag)
190 {
191    pack_assert(I, index.size == AGX_SIZE_64);
192    pack_assert(I, (index.value & 1) == 0);
193 
194    /* Can't seem to access high uniforms from memory instructions */
195    pack_assert(I, index.value < 0x100);
196 
197    if (index.type == AGX_INDEX_UNIFORM) {
198       *flag = 1;
199    } else {
200       pack_assert(I, index.type == AGX_INDEX_REGISTER);
201       *flag = 0;
202    }
203 
204    return index.value;
205 }
206 
207 static unsigned
agx_pack_memory_index(const agx_instr * I,agx_index index,bool * flag)208 agx_pack_memory_index(const agx_instr *I, agx_index index, bool *flag)
209 {
210    if (index.type == AGX_INDEX_IMMEDIATE) {
211       pack_assert(I, index.value < 0x10000);
212       *flag = 1;
213 
214       return index.value;
215    } else {
216       pack_assert(I, index.type == AGX_INDEX_REGISTER);
217       pack_assert(I, index.size == AGX_SIZE_32);
218       pack_assert(I, (index.value & 1) == 0);
219       pack_assert(I, index.value < 0x100);
220 
221       *flag = 0;
222       return index.value;
223    }
224 }
225 
226 static uint16_t
agx_pack_local_base(const agx_instr * I,agx_index index,unsigned * flags)227 agx_pack_local_base(const agx_instr *I, agx_index index, unsigned *flags)
228 {
229    pack_assert(I, index.size == AGX_SIZE_16);
230 
231    if (index.type == AGX_INDEX_IMMEDIATE) {
232       pack_assert(I, index.value == 0);
233       *flags = 2;
234       return 0;
235    } else if (index.type == AGX_INDEX_UNIFORM) {
236       *flags = 1 | ((index.value >> 8) << 1);
237       return index.value & BITFIELD_MASK(8);
238    } else {
239       assert_register_is_aligned(I, index);
240       *flags = 0;
241       return index.value;
242    }
243 }
244 
245 static uint16_t
agx_pack_local_index(const agx_instr * I,agx_index index,bool * flag)246 agx_pack_local_index(const agx_instr *I, agx_index index, bool *flag)
247 {
248    pack_assert(I, index.size == AGX_SIZE_16);
249 
250    if (index.type == AGX_INDEX_IMMEDIATE) {
251       pack_assert(I, index.value < 0x10000);
252       *flag = 1;
253       return index.value;
254    } else {
255       assert_register_is_aligned(I, index);
256       *flag = 0;
257       return index.value;
258    }
259 }
260 
261 static unsigned
agx_pack_atomic_source(const agx_instr * I,agx_index index)262 agx_pack_atomic_source(const agx_instr *I, agx_index index)
263 {
264    pack_assert_msg(I, index.size == AGX_SIZE_32, "no 64-bit atomics yet");
265    assert_register_is_aligned(I, index);
266    return index.value;
267 }
268 
269 static unsigned
agx_pack_atomic_dest(const agx_instr * I,agx_index index,bool * flag)270 agx_pack_atomic_dest(const agx_instr *I, agx_index index, bool *flag)
271 {
272    /* Atomic destinstions are optional (e.g. for update with no return) */
273    if (index.type == AGX_INDEX_NULL) {
274       *flag = 0;
275       return 0;
276    }
277 
278    /* But are otherwise registers */
279    pack_assert_msg(I, index.size == AGX_SIZE_32, "no 64-bit atomics yet");
280    assert_register_is_aligned(I, index);
281    *flag = 1;
282    return index.value;
283 }
284 
285 /* ALU goes through a common path */
286 
287 static unsigned
agx_pack_alu_dst(const agx_instr * I,agx_index dest)288 agx_pack_alu_dst(const agx_instr *I, agx_index dest)
289 {
290    assert_register_is_aligned(I, dest);
291    unsigned reg = dest.value;
292    enum agx_size size = dest.size;
293    pack_assert(I, reg < 0x100);
294 
295    return (dest.cache ? (1 << 0) : 0) | ((size >= AGX_SIZE_32) ? (1 << 1) : 0) |
296           ((size == AGX_SIZE_64) ? (1 << 2) : 0) | ((reg << 2));
297 }
298 
299 static unsigned
agx_pack_alu_src(const agx_instr * I,agx_index src)300 agx_pack_alu_src(const agx_instr *I, agx_index src)
301 {
302    unsigned value = src.value;
303    enum agx_size size = src.size;
304 
305    if (src.type == AGX_INDEX_IMMEDIATE) {
306       /* Flags 0 for an 8-bit immediate */
307       pack_assert(I, value < 0x100);
308 
309       return (value & BITFIELD_MASK(6)) | ((value >> 6) << 10);
310    } else if (src.type == AGX_INDEX_UNIFORM) {
311       pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
312       pack_assert(I, value < AGX_NUM_UNIFORMS);
313 
314       return (value & BITFIELD_MASK(6)) |
315              ((value & BITFIELD_BIT(8)) ? (1 << 6) : 0) |
316              ((size == AGX_SIZE_32) ? (1 << 7) : 0) | (0x1 << 8) |
317              (((value >> 6) & BITFIELD_MASK(2)) << 10);
318    } else {
319       assert_register_is_aligned(I, src);
320       pack_assert(I, !(src.cache && src.discard));
321 
322       unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
323       unsigned size_flag = (size == AGX_SIZE_64)   ? 0x3
324                            : (size == AGX_SIZE_32) ? 0x2
325                            : (size == AGX_SIZE_16) ? 0x0
326                                                    : 0x0;
327 
328       return (value & BITFIELD_MASK(6)) | (hint << 6) | (size_flag << 8) |
329              (((value >> 6) & BITFIELD_MASK(2)) << 10);
330    }
331 }
332 
333 static unsigned
agx_pack_cmpsel_src(const agx_instr * I,agx_index src,enum agx_size dest_size)334 agx_pack_cmpsel_src(const agx_instr *I, agx_index src, enum agx_size dest_size)
335 {
336    unsigned value = src.value;
337    ASSERTED enum agx_size size = src.size;
338 
339    if (src.type == AGX_INDEX_IMMEDIATE) {
340       /* Flags 0x4 for an 8-bit immediate */
341       pack_assert(I, value < 0x100);
342 
343       return (value & BITFIELD_MASK(6)) | (0x4 << 6) | ((value >> 6) << 10);
344    } else if (src.type == AGX_INDEX_UNIFORM) {
345       pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
346       pack_assert(I, size == dest_size);
347       pack_assert(I, value < 0x200);
348 
349       return (value & BITFIELD_MASK(6)) | ((value >> 8) << 6) | (0x3 << 7) |
350              (((value >> 6) & BITFIELD_MASK(2)) << 10);
351    } else {
352       pack_assert(I, src.type == AGX_INDEX_REGISTER);
353       pack_assert(I, !(src.cache && src.discard));
354       pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
355       pack_assert(I, size == dest_size);
356       assert_register_is_aligned(I, src);
357 
358       unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
359 
360       return (value & BITFIELD_MASK(6)) | (hint << 6) |
361              (((value >> 6) & BITFIELD_MASK(2)) << 10);
362    }
363 }
364 
365 static unsigned
agx_pack_sample_mask_src(const agx_instr * I,agx_index src)366 agx_pack_sample_mask_src(const agx_instr *I, agx_index src)
367 {
368    unsigned value = src.value;
369    unsigned packed_value =
370       (value & BITFIELD_MASK(6)) | (((value >> 6) & BITFIELD_MASK(2)) << 10);
371 
372    if (src.type == AGX_INDEX_IMMEDIATE) {
373       pack_assert(I, value < 0x100);
374       return packed_value | (1 << 7);
375    } else {
376       pack_assert(I, src.type == AGX_INDEX_REGISTER);
377       assert_register_is_aligned(I, src);
378       pack_assert(I, !(src.cache && src.discard));
379 
380       return packed_value;
381    }
382 }
383 
384 static unsigned
agx_pack_float_mod(agx_index src)385 agx_pack_float_mod(agx_index src)
386 {
387    return (src.abs ? (1 << 0) : 0) | (src.neg ? (1 << 1) : 0);
388 }
389 
390 static bool
agx_all_16(agx_instr * I)391 agx_all_16(agx_instr *I)
392 {
393    agx_foreach_dest(I, d) {
394       if (!agx_is_null(I->dest[d]) && I->dest[d].size != AGX_SIZE_16)
395          return false;
396    }
397 
398    agx_foreach_src(I, s) {
399       if (!agx_is_null(I->src[s]) && I->src[s].size != AGX_SIZE_16)
400          return false;
401    }
402 
403    return true;
404 }
405 
406 /* Generic pack for ALU instructions, which are quite regular */
407 
408 static void
agx_pack_alu(struct util_dynarray * emission,agx_instr * I)409 agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
410 {
411    struct agx_opcode_info info = agx_opcodes_info[I->op];
412    struct agx_encoding encoding = info.encoding;
413 
414    bool is_f16 = (I->op == AGX_OPCODE_HMUL || I->op == AGX_OPCODE_HFMA ||
415                   I->op == AGX_OPCODE_HADD);
416 
417    pack_assert_msg(I, encoding.exact, "invalid encoding");
418 
419    uint64_t raw = encoding.exact;
420    uint16_t extend = 0;
421 
422    // TODO: assert saturable
423    if (I->saturate)
424       raw |= (1 << 6);
425 
426    if (info.nr_dests) {
427       pack_assert(I, info.nr_dests == 1);
428       unsigned D = agx_pack_alu_dst(I, I->dest[0]);
429       unsigned extend_offset = (sizeof(extend) * 8) - 4;
430 
431       raw |= (D & BITFIELD_MASK(8)) << 7;
432       extend |= ((D >> 8) << extend_offset);
433 
434       if (info.immediates & AGX_IMMEDIATE_INVERT_COND) {
435          raw |= (uint64_t)(I->invert_cond) << 47;
436       }
437    } else if (info.immediates & AGX_IMMEDIATE_NEST) {
438       raw |= (I->invert_cond << 8);
439       raw |= (I->nest << 11);
440       raw |= (I->icond << 13);
441    }
442 
443    for (unsigned s = 0; s < info.nr_srcs; ++s) {
444       bool is_cmpsel = (s >= 2) && (I->op == AGX_OPCODE_ICMPSEL ||
445                                     I->op == AGX_OPCODE_FCMPSEL);
446 
447       unsigned src = is_cmpsel
448                         ? agx_pack_cmpsel_src(I, I->src[s], I->dest[0].size)
449                         : agx_pack_alu_src(I, I->src[s]);
450 
451       unsigned src_short = (src & BITFIELD_MASK(10));
452       unsigned src_extend = (src >> 10);
453 
454       /* Size bit always zero and so omitted for 16-bit */
455       if (is_f16 && !is_cmpsel)
456          pack_assert(I, (src_short & (1 << 9)) == 0);
457 
458       if (info.is_float || (I->op == AGX_OPCODE_FCMPSEL && !is_cmpsel)) {
459          unsigned fmod = agx_pack_float_mod(I->src[s]);
460          unsigned fmod_offset = is_f16 ? 9 : 10;
461          src_short |= (fmod << fmod_offset);
462       } else if (I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) {
463          /* Force unsigned for immediates so uadd_sat works properly */
464          bool zext = I->src[s].abs || I->src[s].type == AGX_INDEX_IMMEDIATE;
465          bool extends = I->src[s].size < AGX_SIZE_64;
466 
467          unsigned sxt = (extends && !zext) ? (1 << 10) : 0;
468 
469          unsigned negate_src = (I->op == AGX_OPCODE_IMAD) ? 2 : 1;
470          pack_assert(I, !I->src[s].neg || s == negate_src);
471          src_short |= sxt;
472       }
473 
474       /* Sources come at predictable offsets */
475       unsigned offset = 16 + (12 * s);
476       raw |= (((uint64_t)src_short) << offset);
477 
478       /* Destination and each source get extended in reverse order */
479       unsigned extend_offset = (sizeof(extend) * 8) - ((s + 3) * 2);
480       extend |= (src_extend << extend_offset);
481    }
482 
483    if ((I->op == AGX_OPCODE_IMAD && I->src[2].neg) ||
484        (I->op == AGX_OPCODE_IADD && I->src[1].neg))
485       raw |= (1 << 27);
486 
487    if (info.immediates & AGX_IMMEDIATE_TRUTH_TABLE) {
488       raw |= (I->truth_table & 0x3) << 26;
489       raw |= (uint64_t)(I->truth_table >> 2) << 38;
490    } else if (info.immediates & AGX_IMMEDIATE_SHIFT) {
491       pack_assert(I, I->shift <= 4);
492       raw |= (uint64_t)(I->shift & 1) << 39;
493       raw |= (uint64_t)(I->shift >> 1) << 52;
494    } else if (info.immediates & AGX_IMMEDIATE_BFI_MASK) {
495       raw |= (uint64_t)(I->bfi_mask & 0x3) << 38;
496       raw |= (uint64_t)((I->bfi_mask >> 2) & 0x3) << 50;
497       raw |= (uint64_t)((I->bfi_mask >> 4) & 0x1) << 63;
498    } else if (info.immediates & AGX_IMMEDIATE_SIMD_OP) {
499       raw |= (uint64_t)(I->simd_op & 0x1) << 28;
500       raw |= (uint64_t)((I->simd_op >> 1) & 0x7) << 38;
501       raw |= (uint64_t)((I->simd_op >> 4) & 0x1) << 47;
502    } else if (info.immediates & AGX_IMMEDIATE_SR) {
503       raw |= (uint64_t)(I->sr & 0x3F) << 16;
504       raw |= (uint64_t)(I->sr >> 6) << 26;
505    } else if (info.immediates & AGX_IMMEDIATE_WRITEOUT)
506       raw |= (uint64_t)(I->imm) << 8;
507    else if (info.immediates & AGX_IMMEDIATE_IMM)
508       raw |= (uint64_t)(I->imm) << 16;
509    else if (info.immediates & AGX_IMMEDIATE_ROUND)
510       raw |= (uint64_t)(I->imm) << 26;
511    else if (info.immediates & (AGX_IMMEDIATE_FCOND | AGX_IMMEDIATE_ICOND))
512       raw |= (uint64_t)(I->fcond) << 61;
513 
514    /* Determine length bit */
515    unsigned length = encoding.length_short;
516    if (I->op == AGX_OPCODE_MOV_IMM && I->dest[0].size == AGX_SIZE_16)
517       length -= 2;
518 
519    uint64_t short_mask = BITFIELD64_MASK(8 * length);
520    bool length_bit = (extend || (raw & ~short_mask));
521 
522    if (encoding.extensible && length_bit) {
523       raw |= (1 << 15);
524       length += (length > 8) ? 4 : 2;
525    }
526 
527    /* Pack! */
528    if (length <= sizeof(uint64_t)) {
529       unsigned extend_offset = ((length - sizeof(extend)) * 8);
530 
531       /* XXX: Encode these special cases better */
532       switch (I->op) {
533       case AGX_OPCODE_IADD:
534       case AGX_OPCODE_ICMP_BALLOT:
535       case AGX_OPCODE_ICMP_QUAD_BALLOT:
536       case AGX_OPCODE_FCMP_BALLOT:
537       case AGX_OPCODE_FCMP_QUAD_BALLOT:
538          extend_offset -= 16;
539          break;
540       default:
541          break;
542       }
543 
544       raw |= (uint64_t)extend << extend_offset;
545       memcpy(util_dynarray_grow_bytes(emission, 1, length), &raw, length);
546    } else {
547       /* So far, >8 byte ALU is only to store the extend bits */
548       unsigned extend_offset = (((length - sizeof(extend)) * 8) - 64);
549       unsigned hi = ((uint64_t)extend) << extend_offset;
550 
551       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
552       memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi,
553              length - 8);
554    }
555 }
556 
557 static void
agx_pack_instr(struct util_dynarray * emission,struct util_dynarray * fixups,agx_instr * I,enum u_tristate needs_g13x_coherency)558 agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
559                agx_instr *I, enum u_tristate needs_g13x_coherency)
560 {
561    switch (I->op) {
562    case AGX_OPCODE_LD_TILE:
563    case AGX_OPCODE_ST_TILE: {
564       bool load = (I->op == AGX_OPCODE_LD_TILE);
565       unsigned D = agx_pack_alu_dst(I, load ? I->dest[0] : I->src[0]);
566       pack_assert(I, I->mask < 0x10);
567       pack_assert(I, I->pixel_offset < 0x200);
568 
569       agx_index sample_index = load ? I->src[0] : I->src[1];
570       agx_index coords = load ? I->src[1] : I->src[2];
571       pack_assert(I, sample_index.type == AGX_INDEX_REGISTER ||
572                         sample_index.type == AGX_INDEX_IMMEDIATE);
573       pack_assert(I, sample_index.size == AGX_SIZE_16);
574       unsigned St = (sample_index.type == AGX_INDEX_REGISTER) ? 1 : 0;
575       unsigned S = sample_index.value;
576       pack_assert(I, S < 0x100);
577 
578       pack_assert(I, I->explicit_coords == (coords.type == AGX_INDEX_REGISTER));
579       unsigned C = I->explicit_coords ? coords.value : 0;
580 
581       uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
582                      ((uint64_t)(D & BITFIELD_MASK(8)) << 7) | (St << 22) |
583                      ((uint64_t)(I->format) << 24) |
584                      ((uint64_t)(C & BITFIELD_MASK(6)) << 16) |
585                      ((uint64_t)(I->pixel_offset & BITFIELD_MASK(7)) << 28) |
586                      (load || I->explicit_coords ? (1ull << 35) : 0) |
587                      ((uint64_t)(I->mask) << 36) |
588                      ((uint64_t)(I->pixel_offset >> 7) << 40) |
589                      ((uint64_t)(S & BITFIELD_MASK(6)) << 42) |
590                      (I->explicit_coords ? (1ull << 55) : 0) |
591                      ((uint64_t)(S >> 6) << 56) | ((uint64_t)(C >> 6) << 58) |
592                      (((uint64_t)(D >> 8)) << 60);
593 
594       unsigned size = 8;
595       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
596       break;
597    }
598 
599    case AGX_OPCODE_SAMPLE_MASK: {
600       unsigned S = agx_pack_sample_mask_src(I, I->src[1]);
601       unsigned T = I->src[0].value;
602       bool Tt = I->src[0].type == AGX_INDEX_IMMEDIATE;
603       pack_assert(I, Tt || I->src[0].type == AGX_INDEX_REGISTER);
604       uint32_t raw = 0xc1 | (Tt ? BITFIELD_BIT(8) : 0) |
605                      ((T & BITFIELD_MASK(6)) << 9) | ((S & 0xff) << 16) |
606                      ((T >> 6) << 24) | ((S >> 8) << 26);
607 
608       unsigned size = 4;
609       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
610       break;
611    }
612 
613    case AGX_OPCODE_WAIT: {
614       uint64_t raw =
615          agx_opcodes_info[I->op].encoding.exact | (I->scoreboard << 8);
616 
617       unsigned size = 2;
618       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
619       break;
620    }
621 
622    case AGX_OPCODE_ITER:
623    case AGX_OPCODE_ITERPROJ:
624    case AGX_OPCODE_LDCF: {
625       bool flat = (I->op == AGX_OPCODE_LDCF);
626       bool perspective = (I->op == AGX_OPCODE_ITERPROJ);
627       unsigned D = agx_pack_alu_dst(I, I->dest[0]);
628       unsigned channels = (I->channels & 0x3);
629 
630       agx_index src_I = I->src[0];
631       pack_assert(I, src_I.type == AGX_INDEX_IMMEDIATE ||
632                         src_I.type == AGX_INDEX_REGISTER);
633 
634       unsigned cf_I = src_I.value;
635       unsigned cf_J = 0;
636 
637       if (perspective) {
638          agx_index src_J = I->src[1];
639          pack_assert(I, src_J.type == AGX_INDEX_IMMEDIATE);
640          cf_J = src_J.value;
641       }
642 
643       pack_assert(I, cf_I < 0x100);
644       pack_assert(I, cf_J < 0x100);
645 
646       enum agx_interpolation interp = I->interpolation;
647       agx_index sample_index = flat ? agx_null() : I->src[perspective ? 2 : 1];
648 
649       /* Fix up the interpolation enum to distinguish the sample index source */
650       if (interp == AGX_INTERPOLATION_SAMPLE) {
651          if (sample_index.type == AGX_INDEX_REGISTER)
652             interp = AGX_INTERPOLATION_SAMPLE_REGISTER;
653          else
654             pack_assert(I, sample_index.type == AGX_INDEX_IMMEDIATE);
655       } else {
656          sample_index = agx_zero();
657       }
658 
659       bool kill = false;    // TODO: optimize
660       bool forward = false; // TODO: optimize
661 
662       uint64_t raw =
663          0x21 | (flat ? (1 << 7) : 0) | (perspective ? (1 << 6) : 0) |
664          ((D & 0xFF) << 7) | (1ull << 15) | /* XXX */
665          ((cf_I & BITFIELD_MASK(6)) << 16) |
666          ((src_I.type == AGX_INDEX_REGISTER) ? (1 << 23) : 0) |
667          ((cf_J & BITFIELD_MASK(6)) << 24) | (((uint64_t)channels) << 30) |
668          (((uint64_t)sample_index.value) << 32) | (forward ? (1ull << 46) : 0) |
669          (((uint64_t)interp) << 48) | (kill ? (1ull << 52) : 0) |
670          (((uint64_t)(D >> 8)) << 56) | ((uint64_t)(cf_I >> 6) << 58) |
671          ((uint64_t)(cf_J >> 6) << 60);
672 
673       unsigned size = 8;
674       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
675       break;
676    }
677 
678    case AGX_OPCODE_ST_VARY: {
679       agx_index index_src = I->src[0];
680       agx_index value = I->src[1];
681 
682       pack_assert(I, index_src.type == AGX_INDEX_IMMEDIATE ||
683                         index_src.type == AGX_INDEX_REGISTER);
684       pack_assert(I, index_src.value < BITFIELD_MASK(8));
685       pack_assert(I, value.type == AGX_INDEX_REGISTER);
686       pack_assert(I, value.size == AGX_SIZE_32);
687 
688       uint64_t raw = 0x11 | (I->last ? (1 << 7) : 0) |
689                      ((value.value & 0x3F) << 9) |
690                      (((uint64_t)(index_src.value & 0x3F)) << 16) |
691                      (index_src.type == AGX_INDEX_IMMEDIATE ? (1 << 23) : 0) |
692                      ((value.value >> 6) << 24) |
693                      ((index_src.value >> 6) << 26) | (0x8u << 28); /* XXX */
694 
695       unsigned size = 4;
696       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
697       break;
698    }
699 
700    case AGX_OPCODE_DEVICE_LOAD:
701    case AGX_OPCODE_DEVICE_STORE:
702    case AGX_OPCODE_UNIFORM_STORE: {
703       bool is_device_store = I->op == AGX_OPCODE_DEVICE_STORE;
704       bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE;
705       bool is_store = is_device_store || is_uniform_store;
706       bool has_base = !is_uniform_store;
707 
708       /* Uniform stores are required to be 16-bit. The encoding that should be
709        * 32-bit annoyingly doesn't work. Fix up the format and size so we can
710        * use scalar 32-bit values in the IR and avoid special casing earlier in
711        * the compiler.
712        */
713       enum agx_format format = is_uniform_store ? AGX_FORMAT_I16 : I->format;
714       agx_index reg = is_store ? I->src[0] : I->dest[0];
715       unsigned mask = I->mask;
716 
717       if (is_uniform_store && reg.size != AGX_SIZE_16) {
718          if (reg.size == AGX_SIZE_64) {
719             assert(mask == 1);
720             mask = BITFIELD_MASK(4);
721          } else {
722             assert(reg.size == AGX_SIZE_32);
723             assert(mask == 1 || mask == 3);
724             mask = BITFIELD_MASK(mask == 3 ? 4 : 2);
725          }
726 
727          reg.size = AGX_SIZE_16;
728       }
729 
730       unsigned offset_src = (has_base ? 1 : 0) + (is_store ? 1 : 0);
731 
732       bool Rt, At = false, Ot;
733       unsigned R = agx_pack_memory_reg(I, reg, &Rt);
734       unsigned A =
735          has_base ? agx_pack_memory_base(I, I->src[is_store ? 1 : 0], &At) : 0;
736       unsigned O = agx_pack_memory_index(I, I->src[offset_src], &Ot);
737       unsigned u1 = is_uniform_store ? 0 : 1; // XXX
738       unsigned u3 = 0;
739       unsigned u4 = is_uniform_store ? 0 : I->coherent ? 7 : 4;
740       unsigned u5 = 0;
741       bool L = true; /* TODO: when would you want short? */
742 
743       pack_assert(I, mask != 0);
744       pack_assert(I, format <= 0x10);
745 
746       uint64_t raw =
747          agx_opcodes_info[I->op].encoding.exact |
748          ((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) |
749          ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
750          (Ot ? (1 << 24) : 0) | (I->src[offset_src].abs ? (1 << 25) : 0) |
751          (is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) |
752          (u3 << 28) | (I->scoreboard << 30) |
753          (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
754          (((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
755          (((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
756          (((uint64_t)I->shift) << 42) | (((uint64_t)u4) << 44) |
757          (L ? (1ull << 47) : 0) | (((uint64_t)(format >> 3)) << 48) |
758          (((uint64_t)Rt) << 49) | (((uint64_t)u5) << 50) |
759          (((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
760 
761       unsigned size = L ? 8 : 6;
762       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
763       break;
764    }
765 
766    case AGX_OPCODE_LOCAL_LOAD:
767    case AGX_OPCODE_LOCAL_STORE: {
768       bool is_load = I->op == AGX_OPCODE_LOCAL_LOAD;
769       bool L = true; /* TODO: when would you want short? */
770       unsigned At;
771       bool Rt, Ot;
772 
773       unsigned R =
774          agx_pack_memory_reg(I, is_load ? I->dest[0] : I->src[0], &Rt);
775       unsigned A = agx_pack_local_base(I, is_load ? I->src[0] : I->src[1], &At);
776       unsigned O =
777          agx_pack_local_index(I, is_load ? I->src[1] : I->src[2], &Ot);
778 
779       uint64_t raw =
780          agx_opcodes_info[I->op].encoding.exact | (Rt ? BITFIELD64_BIT(8) : 0) |
781          ((R & BITFIELD_MASK(6)) << 9) | (L ? BITFIELD64_BIT(15) : 0) |
782          ((A & BITFIELD_MASK(6)) << 16) | (At << 22) | (I->format << 24) |
783          ((O & BITFIELD64_MASK(6)) << 28) | (Ot ? BITFIELD64_BIT(34) : 0) |
784          (((uint64_t)I->mask) << 36) | (((uint64_t)(O >> 6)) << 48) |
785          (((uint64_t)(A >> 6)) << 58) | (((uint64_t)(R >> 6)) << 60);
786 
787       unsigned size = L ? 8 : 6;
788       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
789       break;
790    }
791 
792    case AGX_OPCODE_ATOMIC: {
793       bool At, Ot, Rt;
794       unsigned A = agx_pack_memory_base(I, I->src[1], &At);
795       unsigned O = agx_pack_memory_index(I, I->src[2], &Ot);
796       unsigned R = agx_pack_atomic_dest(I, I->dest[0], &Rt);
797       unsigned S = agx_pack_atomic_source(I, I->src[0]);
798 
799       /* Due to a hardware quirk, there is a bit in the atomic instruction that
800        * differs based on the target GPU. So, if we're packing an atomic, the
801        * shader must be keyed to a particular GPU (either needs_g13x_coherency
802        * or not needs_g13x_coherency). Assert that here.
803        *
804        * needs_g13x_coherency == U_TRISTATE_UNSET is only allowed for shaders
805        * that do not use atomics and are therefore portable across devices.
806        */
807       assert(needs_g13x_coherency != U_TRISTATE_UNSET);
808 
809       uint64_t raw =
810          agx_opcodes_info[I->op].encoding.exact |
811          (((uint64_t)I->atomic_opc) << 6) | ((R & BITFIELD_MASK(6)) << 10) |
812          ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
813          (Ot ? (1 << 24) : 0) | (I->src[2].abs ? (1 << 25) : 0) | (At << 27) |
814          (I->scoreboard << 30) |
815          (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
816          (((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
817          (((uint64_t)(R >> 6)) << 40) |
818          (needs_g13x_coherency == U_TRISTATE_YES ? BITFIELD64_BIT(45) : 0) |
819          (Rt ? BITFIELD64_BIT(47) : 0) | (((uint64_t)S) << 48) |
820          (((uint64_t)(O >> 8)) << 56);
821 
822       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
823       break;
824    }
825 
826    case AGX_OPCODE_LOCAL_ATOMIC: {
827       bool L = true; /* TODO: Don't force */
828 
829       unsigned At;
830       bool Rt = false, Ot;
831 
832       bool Ra = I->dest[0].type != AGX_INDEX_NULL;
833       unsigned R = Ra ? agx_pack_memory_reg(I, I->dest[0], &Rt) : 0;
834       unsigned S = agx_pack_atomic_source(I, I->src[0]);
835       unsigned A = agx_pack_local_base(I, I->src[1], &At);
836       unsigned O = agx_pack_local_index(I, I->src[2], &Ot);
837 
838       uint64_t raw =
839          agx_opcodes_info[I->op].encoding.exact | (Rt ? BITFIELD64_BIT(8) : 0) |
840          ((R & BITFIELD_MASK(6)) << 9) | (L ? BITFIELD64_BIT(15) : 0) |
841          ((A & BITFIELD_MASK(6)) << 16) | (At << 22) |
842          (((uint64_t)I->atomic_opc) << 24) | ((O & BITFIELD64_MASK(6)) << 28) |
843          (Ot ? BITFIELD64_BIT(34) : 0) | (Ra ? BITFIELD64_BIT(38) : 0) |
844          (((uint64_t)(O >> 6)) << 48) | (((uint64_t)(A >> 6)) << 58) |
845          (((uint64_t)(R >> 6)) << 60);
846 
847       uint64_t raw2 = S;
848 
849       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
850       memcpy(util_dynarray_grow_bytes(emission, 1, 2), &raw2, 2);
851       break;
852    }
853 
854    case AGX_OPCODE_TEXTURE_LOAD:
855    case AGX_OPCODE_IMAGE_LOAD:
856    case AGX_OPCODE_TEXTURE_SAMPLE: {
857       pack_assert(I, I->mask != 0);
858       pack_assert(I, I->format <= 0x10);
859 
860       bool Rt, Ct, St, Cs;
861       unsigned Tt;
862       unsigned U;
863       enum agx_lod_mode lod_mode = I->lod_mode;
864 
865       unsigned R = agx_pack_memory_reg(I, I->dest[0], &Rt);
866       unsigned C = agx_pack_sample_coords(I, I->src[0], &Ct, &Cs);
867       unsigned T = agx_pack_texture(I, I->src[2], I->src[3], &U, &Tt);
868       unsigned S = agx_pack_sampler(I, I->src[4], &St);
869       unsigned O = agx_pack_sample_compare_offset(I, I->src[5]);
870       unsigned D = agx_pack_lod(I, I->src[1], &lod_mode);
871 
872       unsigned q1 = I->shadow;
873       unsigned q2 = I->query_lod ? 2 : 0;
874       unsigned q3 = 0xc; // XXX
875       unsigned kill = 0; // helper invocation kill bit
876 
877       /* Set bit 43 for image loads. This seems to makes sure that image loads
878        * get the value written by the latest image store, not some other image
879        * store that was already in flight, fixing
880        *
881        *    KHR-GLES31.core.shader_image_load_store.basic-glsl-misc-fs
882        *
883        * Apple seems to set this bit unconditionally for read/write image loads
884        * and never for readonly image loads. Some sort of cache control.
885        */
886       if (I->op == AGX_OPCODE_IMAGE_LOAD) {
887          q3 |= 1;
888 
889          /* Cache bypass for multidie coherency */
890          if (I->coherent) {
891             q3 |= 2;
892          }
893       }
894 
895       uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) |
896                         ((I->dim >> 3) << 7) | ((R >> 6) << 8) |
897                         ((C >> 6) << 10) | ((D >> 6) << 12) | ((T >> 6) << 14) |
898                         ((O & BITFIELD_MASK(6)) << 16) | (I->gather << 23) |
899                         (I->offset << 27) | ((S >> 6) << 28) | ((O >> 6) << 30);
900 
901       bool L = (extend != 0);
902 
903       uint64_t raw =
904          0x31 | ((I->op != AGX_OPCODE_TEXTURE_SAMPLE) ? (1 << 6) : 0) |
905          (Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
906          (L ? (1 << 15) : 0) | ((C & BITFIELD_MASK(6)) << 16) |
907          (Ct ? (1 << 22) : 0) | (q1 << 23) | ((D & BITFIELD_MASK(6)) << 24) |
908          (q2 << 30) | (((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
909          (((uint64_t)Tt) << 38) |
910          (((uint64_t)(I->dim & BITFIELD_MASK(3))) << 40) |
911          (((uint64_t)q3) << 43) | (((uint64_t)I->mask) << 48) |
912          (((uint64_t)lod_mode) << 52) |
913          (((uint64_t)(S & BITFIELD_MASK(6))) << 56) | (((uint64_t)St) << 62) |
914          (((uint64_t)I->scoreboard) << 63);
915 
916       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
917       if (L)
918          memcpy(util_dynarray_grow_bytes(emission, 1, 4), &extend, 4);
919 
920       break;
921    }
922 
923    case AGX_OPCODE_IMAGE_WRITE: {
924       bool Ct, Dt, Rt, Cs;
925       unsigned Tt;
926       unsigned U;
927 
928       unsigned R = agx_pack_pbe_source(I, I->src[0], &Rt);
929       unsigned C = agx_pack_sample_coords(I, I->src[1], &Ct, &Cs);
930       unsigned D = agx_pack_pbe_lod(I, I->src[2], &Dt);
931       unsigned T = agx_pack_texture(I, I->src[3], I->src[4], &U, &Tt);
932       bool rtz = false;
933 
934       pack_assert(I, U < (1 << 5));
935       pack_assert(I, D < (1 << 8));
936       pack_assert(I, R < (1 << 8));
937       pack_assert(I, C < (1 << 8));
938       pack_assert(I, T < (1 << 8));
939       pack_assert(I, Tt < (1 << 2));
940 
941       unsigned coherency = I->coherent ? 0xf : 0x9;
942 
943       uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
944                      (Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
945                      ((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
946                      ((D & BITFIELD_MASK(6)) << 24) | (Dt ? (1u << 31) : 0) |
947                      (((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
948                      (((uint64_t)Tt) << 38) |
949                      (((uint64_t)I->dim & BITFIELD_MASK(3)) << 40) |
950                      (((uint64_t)coherency) << 43) | (Cs ? (1ull << 47) : 0) |
951                      (((uint64_t)U) << 48) | (rtz ? (1ull << 53) : 0) |
952                      ((I->dim & BITFIELD_BIT(4)) ? (1ull << 55) : 0) |
953                      (((uint64_t)R >> 6) << 56) | (((uint64_t)C >> 6) << 58) |
954                      (((uint64_t)D >> 6) << 60) | (((uint64_t)T >> 6) << 62);
955 
956       if (raw >> 48) {
957          raw |= BITFIELD_BIT(15);
958          memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
959       } else {
960          memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
961       }
962 
963       break;
964    }
965 
966    case AGX_OPCODE_BLOCK_IMAGE_STORE: {
967       enum agx_format F = I->format;
968       pack_assert(I, F < 0x10);
969 
970       unsigned Tt = 0;
971       pack_assert(I, Tt < 0x4);
972 
973       unsigned U;
974       unsigned T = agx_pack_texture(I, I->src[0], I->src[1], &U, &Tt);
975       pack_assert(I, T < 0x100);
976       pack_assert(I, U < (1 << 5));
977 
978       bool Cs = false;
979       bool Ct = I->src[3].discard;
980       unsigned C = I->src[3].value;
981 
982       agx_index offset = I->src[2];
983       pack_assert(I, offset.size == AGX_SIZE_32);
984       assert_register_is_aligned(I, offset);
985       unsigned R = offset.value;
986 
987       bool unk1 = true;
988 
989       /* This bit has weird behaviour with the interaction of the texture state
990        * index and the tilebuffer offset. Probably best not to use it for now.
991        */
992       unsigned unk3 = 1;
993 
994       uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
995                        (1 << 15) /* we always set length bit for now */ |
996                        ((F & 1) << 8) | ((R & BITFIELD_MASK(6)) << 9) |
997                        ((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
998                        (I->explicit_coords ? (1 << 23) : 0) |
999                        (unk1 ? (1u << 31) : 0);
1000 
1001       uint32_t word1 = (T & BITFIELD_MASK(6)) | (Tt << 6) |
1002                        ((I->dim & BITFIELD_MASK(3)) << 8) | (9 << 11) |
1003                        (Cs ? (1 << 15) : 0) | (((uint64_t)U) << 16) |
1004                        ((I->dim & BITFIELD_BIT(3)) ? (1u << 23) : 0) |
1005                        ((R >> 6) << 24) | ((C >> 6) << 26);
1006 
1007       uint32_t word2 = (F >> 1) | (unk3 ? (1 << 3) : 0) | ((T >> 6) << 14);
1008 
1009       memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
1010       memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word1, 4);
1011       memcpy(util_dynarray_grow_bytes(emission, 1, 2), &word2, 2);
1012       break;
1013    }
1014 
1015    case AGX_OPCODE_ZS_EMIT: {
1016       agx_index S = I->src[0];
1017       if (S.type == AGX_INDEX_IMMEDIATE)
1018          pack_assert(I, S.value < BITFIELD_BIT(8));
1019       else
1020          assert_register_is_aligned(I, S);
1021 
1022       agx_index T = I->src[1];
1023       assert_register_is_aligned(I, T);
1024 
1025       pack_assert(I, I->zs >= 1 && I->zs <= 3);
1026 
1027       uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
1028                        ((S.type == AGX_INDEX_IMMEDIATE) ? (1 << 8) : 0) |
1029                        ((S.value & BITFIELD_MASK(6)) << 9) |
1030                        ((T.value & BITFIELD_MASK(6)) << 16) |
1031                        ((T.value >> 6) << 26) | ((S.value >> 6) << 24) |
1032                        (I->zs << 29);
1033 
1034       memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
1035       break;
1036    }
1037 
1038    case AGX_OPCODE_JMP_EXEC_ANY:
1039    case AGX_OPCODE_JMP_EXEC_NONE:
1040    case AGX_OPCODE_JMP_EXEC_NONE_AFTER: {
1041       /* We don't implement indirect branches */
1042       pack_assert(I, I->target != NULL);
1043 
1044       /* We'll fix the offset later. */
1045       struct agx_branch_fixup fixup = {
1046          .block = I->target,
1047          .offset = emission->size,
1048          .skip_to_end = I->op == AGX_OPCODE_JMP_EXEC_NONE_AFTER,
1049       };
1050 
1051       util_dynarray_append(fixups, struct agx_branch_fixup, fixup);
1052 
1053       /* The rest of the instruction is fixed */
1054       struct agx_opcode_info info = agx_opcodes_info[I->op];
1055       uint64_t raw = info.encoding.exact;
1056       memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
1057       break;
1058    }
1059 
1060    case AGX_OPCODE_DOORBELL: {
1061       pack_assert(I, I->imm < BITFIELD_MASK(8));
1062       struct agx_opcode_info info = agx_opcodes_info[I->op];
1063       uint64_t raw = info.encoding.exact | (I->imm << 40);
1064       memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
1065       break;
1066    }
1067 
1068    case AGX_OPCODE_STACK_UNMAP:
1069    case AGX_OPCODE_STACK_MAP: {
1070       agx_index value = I->op == AGX_OPCODE_STACK_MAP ? I->src[0] : I->dest[0];
1071 
1072       pack_assert(I, value.type == AGX_INDEX_REGISTER);
1073       pack_assert(I, value.size == AGX_SIZE_32);
1074       pack_assert(I, I->imm < BITFIELD_MASK(16));
1075 
1076       unsigned q1 = 0;  // XXX
1077       unsigned q2 = 0;  // XXX
1078       unsigned q3 = 0;  // XXX
1079       unsigned q4 = 16; // XXX
1080       unsigned q5 = 16; // XXX
1081 
1082       struct agx_opcode_info info = agx_opcodes_info[I->op];
1083       uint64_t raw =
1084          info.encoding.exact | (q1 << 8) | ((value.value & 0x3F) << 10) |
1085          ((I->imm & 0xF) << 20) | (1ull << 24) | // XXX
1086          (1ull << 26) |                          // XXX
1087          (q2 << 30) | ((uint64_t)((I->imm >> 4) & 0xF) << 32) |
1088          ((uint64_t)q3 << 37) | ((uint64_t)(value.value >> 6) << 40) |
1089          ((uint64_t)q4 << 42) | (1ull << 47) | // XXX
1090          ((uint64_t)q5 << 48) | ((uint64_t)(I->imm >> 8) << 56);
1091 
1092       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
1093       break;
1094    }
1095 
1096    case AGX_OPCODE_STACK_LOAD:
1097    case AGX_OPCODE_STACK_STORE: {
1098       enum agx_format format = I->format;
1099       unsigned mask = I->mask;
1100 
1101       bool is_load = I->op == AGX_OPCODE_STACK_LOAD;
1102       bool L = true; /* TODO: when would you want short? */
1103 
1104       pack_assert(I, mask != 0);
1105       pack_assert(I, format <= 0x10);
1106 
1107       bool Rt, Ot;
1108       unsigned R =
1109          agx_pack_memory_reg(I, is_load ? I->dest[0] : I->src[0], &Rt);
1110       unsigned O =
1111          agx_pack_memory_index(I, is_load ? I->src[0] : I->src[1], &Ot);
1112 
1113       unsigned i1 = 1; // XXX
1114       unsigned i2 = 0; // XXX
1115       unsigned i5 = 4; // XXX
1116 
1117       uint64_t raw =
1118          agx_opcodes_info[I->op].encoding.exact |
1119          ((format & BITFIELD_MASK(2)) << 8) | ((R & BITFIELD_MASK(6)) << 10) |
1120          ((O & BITFIELD_MASK(4)) << 20) | (Ot ? (1 << 24) : 0) |
1121          ((uint64_t)i1 << 26) | ((uint64_t)I->scoreboard << 30) |
1122          (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
1123          ((uint64_t)i2 << 36) |
1124          (((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
1125          ((uint64_t)i5 << 44) | (L ? (1ull << 47) : 0) |
1126          (((uint64_t)(format >> 2)) << 50) | (((uint64_t)Rt) << 49) |
1127          (((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
1128 
1129       unsigned size = L ? 8 : 6;
1130       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
1131       break;
1132    }
1133    case AGX_OPCODE_STACK_ADJUST: {
1134       struct agx_opcode_info info = agx_opcodes_info[I->op];
1135 
1136       unsigned i0 = 0; // XXX
1137       unsigned i1 = 1; // XXX
1138       unsigned i2 = 2; // XXX
1139       unsigned i3 = 0; // XXX
1140       unsigned i4 = 0; // XXX
1141 
1142       uint64_t raw =
1143          info.encoding.exact | ((uint64_t)i0 << 8) | ((uint64_t)i1 << 26) |
1144          ((uint64_t)i2 << 36) | ((uint64_t)i3 << 44) | ((uint64_t)i4 << 50) |
1145          ((I->stack_size & 0xF) << 20) |
1146          ((uint64_t)((I->stack_size >> 4) & 0xF) << 32) | (1ull << 47) | // XXX
1147          ((uint64_t)(I->stack_size >> 8) << 56);
1148 
1149       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
1150       break;
1151    }
1152 
1153    default:
1154       agx_pack_alu(emission, I);
1155       return;
1156    }
1157 }
1158 
1159 /* Relative branches may be emitted before their targets, so we patch the
1160  * binary to fix up the branch offsets after the main emit */
1161 
1162 static void
agx_fixup_branch(struct util_dynarray * emission,struct agx_branch_fixup fix)1163 agx_fixup_branch(struct util_dynarray *emission, struct agx_branch_fixup fix)
1164 {
1165    /* Branch offset is 2 bytes into the jump instruction */
1166    uint8_t *location = ((uint8_t *)emission->data) + fix.offset + 2;
1167 
1168    off_t target = fix.skip_to_end ? fix.block->last_offset : fix.block->offset;
1169 
1170    /* Offsets are relative to the jump instruction */
1171    int32_t patch = (int32_t)target - (int32_t)fix.offset;
1172 
1173    /* Patch the binary */
1174    memcpy(location, &patch, sizeof(patch));
1175 }
1176 
1177 void
agx_pack_binary(agx_context * ctx,struct util_dynarray * emission)1178 agx_pack_binary(agx_context *ctx, struct util_dynarray *emission)
1179 {
1180    struct util_dynarray fixups;
1181    util_dynarray_init(&fixups, ctx);
1182 
1183    agx_foreach_block(ctx, block) {
1184       /* Relative to the start of the binary, the block begins at the current
1185        * number of bytes emitted */
1186       block->offset = emission->size;
1187 
1188       agx_foreach_instr_in_block(block, ins) {
1189          block->last_offset = emission->size;
1190          agx_pack_instr(emission, &fixups, ins,
1191                         ctx->key->dev.needs_g13x_coherency);
1192       }
1193    }
1194 
1195    util_dynarray_foreach(&fixups, struct agx_branch_fixup, fixup)
1196       agx_fixup_branch(emission, *fixup);
1197 
1198    util_dynarray_fini(&fixups);
1199 
1200    /* Dougall calls the instruction in this footer "trap". Match the blob. */
1201    if (!ctx->key->no_stop || ctx->is_preamble) {
1202       for (unsigned i = 0; i < 8; ++i) {
1203          uint16_t trap = agx_opcodes_info[AGX_OPCODE_TRAP].encoding.exact;
1204          util_dynarray_append(emission, uint16_t, trap);
1205       }
1206    }
1207 }
1208