• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_compiler.h"
7 #include "agx_opcodes.h"
8 
9 /* Binary patches needed for branch offsets */
10 struct agx_branch_fixup {
11    /* Offset into the binary to patch */
12    off_t offset;
13 
14    /* Value to patch with will be block->offset */
15    agx_block *block;
16 
17    /* If true, skips to the last instruction of the target block */
18    bool skip_to_end;
19 };
20 
21 static void
pack_assert_internal(const agx_instr * I,bool condition,const char * msg)22 pack_assert_internal(const agx_instr *I, bool condition, const char *msg)
23 {
24    if (!condition) {
25       printf("Packing assertion failed for instruction:\n\n");
26       agx_print_instr(I, stdout);
27       printf("\n%s\n", msg);
28       abort();
29    }
30 }
31 
32 #define pack_assert_msg(I, cond, msg)                                          \
33    pack_assert_internal(I, cond, msg ": " #cond)
34 
35 #define pack_assert(I, cond) pack_assert_internal(I, cond, #cond)
36 
37 static void
assert_register_is_aligned(const agx_instr * I,agx_index reg)38 assert_register_is_aligned(const agx_instr *I, agx_index reg)
39 {
40    pack_assert_msg(I, reg.type == AGX_INDEX_REGISTER, "expecting a register");
41 
42    switch (reg.size) {
43    case AGX_SIZE_16:
44       return;
45    case AGX_SIZE_32:
46       pack_assert_msg(I, (reg.value & 1) == 0, "unaligned reg");
47       return;
48    case AGX_SIZE_64:
49       pack_assert_msg(I, (reg.value & 3) == 0, "unaligned reg");
50       return;
51    }
52 
53    unreachable("Invalid register size");
54 }
55 
56 /* Texturing has its own operands */
57 static unsigned
agx_pack_sample_coords(const agx_instr * I,agx_index index,bool * flag,bool * is_16)58 agx_pack_sample_coords(const agx_instr *I, agx_index index, bool *flag,
59                        bool *is_16)
60 {
61    /* TODO: Do we have a use case for 16-bit coords? */
62    pack_assert_msg(I, index.size == AGX_SIZE_32, "32-bit coordinates");
63    pack_assert_msg(I, index.value < 0x100, "coordinate register bound");
64 
65    *is_16 = false;
66    *flag = index.discard;
67    return index.value;
68 }
69 
70 static unsigned
agx_pack_texture(const agx_instr * I,agx_index base,agx_index index,unsigned * packed_base,unsigned * flag)71 agx_pack_texture(const agx_instr *I, agx_index base, agx_index index,
72                  unsigned *packed_base, unsigned *flag)
73 {
74    if (base.type == AGX_INDEX_IMMEDIATE) {
75       pack_assert(I, base.value == 0);
76 
77       /* Texture state registers */
78       *packed_base = 0;
79 
80       if (index.type == AGX_INDEX_REGISTER) {
81          pack_assert(I, index.size == AGX_SIZE_16);
82          *flag = 1;
83       } else {
84          pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
85          *flag = 0;
86       }
87    } else {
88       pack_assert(I, base.type == AGX_INDEX_UNIFORM);
89       pack_assert(I, base.size == AGX_SIZE_64);
90       pack_assert(I, (base.value & 3) == 0);
91       pack_assert(I, index.size == AGX_SIZE_32);
92 
93       /* Bindless */
94       *packed_base = base.value >> 2;
95       *flag = 3;
96    }
97 
98    return index.value;
99 }
100 
101 static unsigned
agx_pack_sampler(const agx_instr * I,agx_index index,bool * flag)102 agx_pack_sampler(const agx_instr *I, agx_index index, bool *flag)
103 {
104    if (index.type == AGX_INDEX_REGISTER) {
105       pack_assert(I, index.size == AGX_SIZE_16);
106       *flag = 1;
107    } else {
108       pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
109       *flag = 0;
110    }
111 
112    return index.value;
113 }
114 
115 static unsigned
agx_pack_sample_compare_offset(const agx_instr * I,agx_index index)116 agx_pack_sample_compare_offset(const agx_instr *I, agx_index index)
117 {
118    if (index.type == AGX_INDEX_NULL)
119       return 0;
120 
121    pack_assert(I, index.size == AGX_SIZE_32);
122    pack_assert(I, index.value < 0x100);
123    assert_register_is_aligned(I, index);
124    return index.value;
125 }
126 
127 static unsigned
agx_pack_lod(const agx_instr * I,agx_index index,unsigned * lod_mode)128 agx_pack_lod(const agx_instr *I, agx_index index, unsigned *lod_mode)
129 {
130    /* For automatic LOD, the LOD field is unused. Assert as much. */
131    if ((*lod_mode) == AGX_LOD_MODE_AUTO_LOD) {
132       pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
133       pack_assert(I, index.value == 0);
134       return 0;
135    }
136 
137    if (index.type == AGX_INDEX_UNIFORM) {
138       /* Translate LOD mode from register mode to uniform mode */
139       pack_assert(I,
140                   ((*lod_mode) & BITFIELD_BIT(2)) && "must start as reg mode");
141       *lod_mode = (*lod_mode) & ~BITFIELD_BIT(2);
142       pack_assert(I, index.value < 0x200);
143    } else {
144       /* Otherwise must be registers */
145       pack_assert(I, index.type == AGX_INDEX_REGISTER);
146       pack_assert(I, index.value < 0x100);
147    }
148 
149    return index.value;
150 }
151 
152 static unsigned
agx_pack_pbe_source(const agx_instr * I,agx_index index,bool * flag)153 agx_pack_pbe_source(const agx_instr *I, agx_index index, bool *flag)
154 {
155    pack_assert(I, index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32);
156    assert_register_is_aligned(I, index);
157 
158    *flag = (index.size == AGX_SIZE_32);
159    return index.value;
160 }
161 
162 static unsigned
agx_pack_pbe_lod(const agx_instr * I,agx_index index,bool * flag)163 agx_pack_pbe_lod(const agx_instr *I, agx_index index, bool *flag)
164 {
165    pack_assert(I, index.size == AGX_SIZE_16);
166 
167    if (index.type == AGX_INDEX_IMMEDIATE)
168       *flag = true;
169    else if (index.type == AGX_INDEX_REGISTER)
170       *flag = false;
171    else
172       unreachable("Invalid PBE LOD type");
173 
174    return index.value;
175 }
176 
177 /* Load/stores have their own operands */
178 
179 static unsigned
agx_pack_memory_reg(const agx_instr * I,agx_index index,bool * flag)180 agx_pack_memory_reg(const agx_instr *I, agx_index index, bool *flag)
181 {
182    assert_register_is_aligned(I, index);
183 
184    *flag = (index.size >= AGX_SIZE_32);
185    return index.value;
186 }
187 
188 static unsigned
agx_pack_memory_base(const agx_instr * I,agx_index index,bool * flag)189 agx_pack_memory_base(const agx_instr *I, agx_index index, bool *flag)
190 {
191    pack_assert(I, index.size == AGX_SIZE_64);
192    pack_assert(I, (index.value & 1) == 0);
193 
194    /* Can't seem to access high uniforms from memory instructions */
195    pack_assert(I, index.value < 0x100);
196 
197    if (index.type == AGX_INDEX_UNIFORM) {
198       *flag = 1;
199    } else {
200       pack_assert(I, index.type == AGX_INDEX_REGISTER);
201       *flag = 0;
202    }
203 
204    return index.value;
205 }
206 
207 static unsigned
agx_pack_memory_index(const agx_instr * I,agx_index index,bool * flag)208 agx_pack_memory_index(const agx_instr *I, agx_index index, bool *flag)
209 {
210    if (index.type == AGX_INDEX_IMMEDIATE) {
211       pack_assert(I, index.value < 0x10000);
212       *flag = 1;
213 
214       return index.value;
215    } else {
216       pack_assert(I, index.type == AGX_INDEX_REGISTER);
217       pack_assert(I, index.size == AGX_SIZE_32);
218       pack_assert(I, (index.value & 1) == 0);
219       pack_assert(I, index.value < 0x100);
220 
221       *flag = 0;
222       return index.value;
223    }
224 }
225 
226 static uint16_t
agx_pack_local_base(const agx_instr * I,agx_index index,unsigned * flags)227 agx_pack_local_base(const agx_instr *I, agx_index index, unsigned *flags)
228 {
229    pack_assert(I, index.size == AGX_SIZE_16);
230 
231    if (index.type == AGX_INDEX_IMMEDIATE) {
232       pack_assert(I, index.value == 0);
233       *flags = 2;
234       return 0;
235    } else if (index.type == AGX_INDEX_UNIFORM) {
236       *flags = 1 | ((index.value >> 8) << 1);
237       return index.value & BITFIELD_MASK(7);
238    } else {
239       assert_register_is_aligned(I, index);
240       *flags = 0;
241       return index.value;
242    }
243 }
244 
245 static uint16_t
agx_pack_local_index(const agx_instr * I,agx_index index,bool * flag)246 agx_pack_local_index(const agx_instr *I, agx_index index, bool *flag)
247 {
248    pack_assert(I, index.size == AGX_SIZE_16);
249 
250    if (index.type == AGX_INDEX_IMMEDIATE) {
251       pack_assert(I, index.value < 0x10000);
252       *flag = 1;
253       return index.value;
254    } else {
255       assert_register_is_aligned(I, index);
256       *flag = 0;
257       return index.value;
258    }
259 }
260 
261 static unsigned
agx_pack_atomic_source(const agx_instr * I,agx_index index)262 agx_pack_atomic_source(const agx_instr *I, agx_index index)
263 {
264    pack_assert_msg(I, index.size == AGX_SIZE_32, "no 64-bit atomics yet");
265    assert_register_is_aligned(I, index);
266    return index.value;
267 }
268 
269 static unsigned
agx_pack_atomic_dest(const agx_instr * I,agx_index index,bool * flag)270 agx_pack_atomic_dest(const agx_instr *I, agx_index index, bool *flag)
271 {
272    /* Atomic destinstions are optional (e.g. for update with no return) */
273    if (index.type == AGX_INDEX_NULL) {
274       *flag = 0;
275       return 0;
276    }
277 
278    /* But are otherwise registers */
279    pack_assert_msg(I, index.size == AGX_SIZE_32, "no 64-bit atomics yet");
280    assert_register_is_aligned(I, index);
281    *flag = 1;
282    return index.value;
283 }
284 
285 /* ALU goes through a common path */
286 
287 static unsigned
agx_pack_alu_dst(const agx_instr * I,agx_index dest)288 agx_pack_alu_dst(const agx_instr *I, agx_index dest)
289 {
290    assert_register_is_aligned(I, dest);
291    unsigned reg = dest.value;
292    enum agx_size size = dest.size;
293    pack_assert(I, reg < 0x100);
294 
295    return (dest.cache ? (1 << 0) : 0) | ((size >= AGX_SIZE_32) ? (1 << 1) : 0) |
296           ((size == AGX_SIZE_64) ? (1 << 2) : 0) | ((reg << 2));
297 }
298 
299 static unsigned
agx_pack_alu_src(const agx_instr * I,agx_index src)300 agx_pack_alu_src(const agx_instr *I, agx_index src)
301 {
302    unsigned value = src.value;
303    enum agx_size size = src.size;
304 
305    if (src.type == AGX_INDEX_IMMEDIATE) {
306       /* Flags 0 for an 8-bit immediate */
307       pack_assert(I, value < 0x100);
308 
309       return (value & BITFIELD_MASK(6)) | ((value >> 6) << 10);
310    } else if (src.type == AGX_INDEX_UNIFORM) {
311       pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
312       pack_assert(I, value < AGX_NUM_UNIFORMS);
313 
314       return (value & BITFIELD_MASK(6)) |
315              ((value & BITFIELD_BIT(8)) ? (1 << 6) : 0) |
316              ((size == AGX_SIZE_32) ? (1 << 7) : 0) | (0x1 << 8) |
317              (((value >> 6) & BITFIELD_MASK(2)) << 10);
318    } else {
319       assert_register_is_aligned(I, src);
320       pack_assert(I, !(src.cache && src.discard));
321 
322       unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
323       unsigned size_flag = (size == AGX_SIZE_64)   ? 0x3
324                            : (size == AGX_SIZE_32) ? 0x2
325                            : (size == AGX_SIZE_16) ? 0x0
326                                                    : 0x0;
327 
328       return (value & BITFIELD_MASK(6)) | (hint << 6) | (size_flag << 8) |
329              (((value >> 6) & BITFIELD_MASK(2)) << 10);
330    }
331 }
332 
333 static unsigned
agx_pack_cmpsel_src(const agx_instr * I,agx_index src,enum agx_size dest_size)334 agx_pack_cmpsel_src(const agx_instr *I, agx_index src, enum agx_size dest_size)
335 {
336    unsigned value = src.value;
337    ASSERTED enum agx_size size = src.size;
338 
339    if (src.type == AGX_INDEX_IMMEDIATE) {
340       /* Flags 0x4 for an 8-bit immediate */
341       pack_assert(I, value < 0x100);
342 
343       return (value & BITFIELD_MASK(6)) | (0x4 << 6) | ((value >> 6) << 10);
344    } else if (src.type == AGX_INDEX_UNIFORM) {
345       pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
346       pack_assert(I, size == dest_size);
347       pack_assert(I, value < 0x200);
348 
349       return (value & BITFIELD_MASK(6)) | ((value >> 8) << 6) | (0x3 << 7) |
350              (((value >> 6) & BITFIELD_MASK(2)) << 10);
351    } else {
352       pack_assert(I, src.type == AGX_INDEX_REGISTER);
353       pack_assert(I, !(src.cache && src.discard));
354       pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
355       pack_assert(I, size == dest_size);
356       assert_register_is_aligned(I, src);
357 
358       unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
359 
360       return (value & BITFIELD_MASK(6)) | (hint << 6) |
361              (((value >> 6) & BITFIELD_MASK(2)) << 10);
362    }
363 }
364 
365 static unsigned
agx_pack_sample_mask_src(const agx_instr * I,agx_index src)366 agx_pack_sample_mask_src(const agx_instr *I, agx_index src)
367 {
368    unsigned value = src.value;
369    unsigned packed_value =
370       (value & BITFIELD_MASK(6)) | (((value >> 6) & BITFIELD_MASK(2)) << 10);
371 
372    if (src.type == AGX_INDEX_IMMEDIATE) {
373       pack_assert(I, value < 0x100);
374       return packed_value | (1 << 7);
375    } else {
376       pack_assert(I, src.type == AGX_INDEX_REGISTER);
377       assert_register_is_aligned(I, src);
378       pack_assert(I, !(src.cache && src.discard));
379 
380       return packed_value;
381    }
382 }
383 
384 static unsigned
agx_pack_float_mod(agx_index src)385 agx_pack_float_mod(agx_index src)
386 {
387    return (src.abs ? (1 << 0) : 0) | (src.neg ? (1 << 1) : 0);
388 }
389 
390 static bool
agx_all_16(agx_instr * I)391 agx_all_16(agx_instr *I)
392 {
393    agx_foreach_dest(I, d) {
394       if (!agx_is_null(I->dest[d]) && I->dest[d].size != AGX_SIZE_16)
395          return false;
396    }
397 
398    agx_foreach_src(I, s) {
399       if (!agx_is_null(I->src[s]) && I->src[s].size != AGX_SIZE_16)
400          return false;
401    }
402 
403    return true;
404 }
405 
406 /* Generic pack for ALU instructions, which are quite regular */
407 
408 static void
agx_pack_alu(struct util_dynarray * emission,agx_instr * I)409 agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
410 {
411    struct agx_opcode_info info = agx_opcodes_info[I->op];
412    bool is_16 = agx_all_16(I) && info.encoding_16.exact;
413    struct agx_encoding encoding = is_16 ? info.encoding_16 : info.encoding;
414 
415    pack_assert_msg(I, encoding.exact, "invalid encoding");
416 
417    uint64_t raw = encoding.exact;
418    uint16_t extend = 0;
419 
420    // TODO: assert saturable
421    if (I->saturate)
422       raw |= (1 << 6);
423 
424    if (info.nr_dests) {
425       pack_assert(I, info.nr_dests == 1);
426       unsigned D = agx_pack_alu_dst(I, I->dest[0]);
427       unsigned extend_offset = (sizeof(extend) * 8) - 4;
428 
429       raw |= (D & BITFIELD_MASK(8)) << 7;
430       extend |= ((D >> 8) << extend_offset);
431 
432       if (info.immediates & AGX_IMMEDIATE_INVERT_COND) {
433          raw |= (uint64_t)(I->invert_cond) << 47;
434       }
435    } else if (info.immediates & AGX_IMMEDIATE_NEST) {
436       raw |= (I->invert_cond << 8);
437       raw |= (I->nest << 11);
438       raw |= (I->icond << 13);
439    }
440 
441    for (unsigned s = 0; s < info.nr_srcs; ++s) {
442       bool is_cmpsel = (s >= 2) && (I->op == AGX_OPCODE_ICMPSEL ||
443                                     I->op == AGX_OPCODE_FCMPSEL);
444 
445       unsigned src = is_cmpsel
446                         ? agx_pack_cmpsel_src(I, I->src[s], I->dest[0].size)
447                         : agx_pack_alu_src(I, I->src[s]);
448 
449       unsigned src_short = (src & BITFIELD_MASK(10));
450       unsigned src_extend = (src >> 10);
451 
452       /* Size bit always zero and so omitted for 16-bit */
453       if (is_16 && !is_cmpsel)
454          pack_assert(I, (src_short & (1 << 9)) == 0);
455 
456       if (info.is_float || (I->op == AGX_OPCODE_FCMPSEL && !is_cmpsel)) {
457          unsigned fmod = agx_pack_float_mod(I->src[s]);
458          unsigned fmod_offset = is_16 ? 9 : 10;
459          src_short |= (fmod << fmod_offset);
460       } else if (I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) {
461          /* Force unsigned for immediates so uadd_sat works properly */
462          bool zext = I->src[s].abs || I->src[s].type == AGX_INDEX_IMMEDIATE;
463          bool extends = I->src[s].size < AGX_SIZE_64;
464 
465          unsigned sxt = (extends && !zext) ? (1 << 10) : 0;
466 
467          unsigned negate_src = (I->op == AGX_OPCODE_IMAD) ? 2 : 1;
468          pack_assert(I, !I->src[s].neg || s == negate_src);
469          src_short |= sxt;
470       }
471 
472       /* Sources come at predictable offsets */
473       unsigned offset = 16 + (12 * s);
474       raw |= (((uint64_t)src_short) << offset);
475 
476       /* Destination and each source get extended in reverse order */
477       unsigned extend_offset = (sizeof(extend) * 8) - ((s + 3) * 2);
478       extend |= (src_extend << extend_offset);
479    }
480 
481    if ((I->op == AGX_OPCODE_IMAD && I->src[2].neg) ||
482        (I->op == AGX_OPCODE_IADD && I->src[1].neg))
483       raw |= (1 << 27);
484 
485    if (info.immediates & AGX_IMMEDIATE_TRUTH_TABLE) {
486       raw |= (I->truth_table & 0x3) << 26;
487       raw |= (uint64_t)(I->truth_table >> 2) << 38;
488    } else if (info.immediates & AGX_IMMEDIATE_SHIFT) {
489       pack_assert(I, I->shift <= 4);
490       raw |= (uint64_t)(I->shift & 1) << 39;
491       raw |= (uint64_t)(I->shift >> 1) << 52;
492    } else if (info.immediates & AGX_IMMEDIATE_BFI_MASK) {
493       raw |= (uint64_t)(I->bfi_mask & 0x3) << 38;
494       raw |= (uint64_t)((I->bfi_mask >> 2) & 0x3) << 50;
495       raw |= (uint64_t)((I->bfi_mask >> 4) & 0x1) << 63;
496    } else if (info.immediates & AGX_IMMEDIATE_SR) {
497       raw |= (uint64_t)(I->sr & 0x3F) << 16;
498       raw |= (uint64_t)(I->sr >> 6) << 26;
499    } else if (info.immediates & AGX_IMMEDIATE_WRITEOUT)
500       raw |= (uint64_t)(I->imm) << 8;
501    else if (info.immediates & AGX_IMMEDIATE_IMM)
502       raw |= (uint64_t)(I->imm) << 16;
503    else if (info.immediates & AGX_IMMEDIATE_ROUND)
504       raw |= (uint64_t)(I->imm) << 26;
505    else if (info.immediates & (AGX_IMMEDIATE_FCOND | AGX_IMMEDIATE_ICOND))
506       raw |= (uint64_t)(I->fcond) << 61;
507 
508    /* Determine length bit */
509    unsigned length = encoding.length_short;
510    uint64_t short_mask = BITFIELD64_MASK(8 * length);
511    bool length_bit = (extend || (raw & ~short_mask));
512 
513    if (encoding.extensible && length_bit) {
514       raw |= (1 << 15);
515       length += (length > 8) ? 4 : 2;
516    }
517 
518    /* Pack! */
519    if (length <= sizeof(uint64_t)) {
520       unsigned extend_offset = ((length - sizeof(extend)) * 8);
521 
522       /* XXX: This is a weird special case */
523       if (I->op == AGX_OPCODE_IADD)
524          extend_offset -= 16;
525 
526       raw |= (uint64_t)extend << extend_offset;
527       memcpy(util_dynarray_grow_bytes(emission, 1, length), &raw, length);
528    } else {
529       /* So far, >8 byte ALU is only to store the extend bits */
530       unsigned extend_offset = (((length - sizeof(extend)) * 8) - 64);
531       unsigned hi = ((uint64_t)extend) << extend_offset;
532 
533       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
534       memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi,
535              length - 8);
536    }
537 }
538 
539 static void
agx_pack_instr(struct util_dynarray * emission,struct util_dynarray * fixups,agx_instr * I,bool needs_g13x_coherency)540 agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
541                agx_instr *I, bool needs_g13x_coherency)
542 {
543    switch (I->op) {
544    case AGX_OPCODE_LD_TILE:
545    case AGX_OPCODE_ST_TILE: {
546       bool load = (I->op == AGX_OPCODE_LD_TILE);
547       unsigned D = agx_pack_alu_dst(I, load ? I->dest[0] : I->src[0]);
548       pack_assert(I, I->mask < 0x10);
549       pack_assert(I, I->pixel_offset < 0x200);
550 
551       agx_index sample_index = load ? I->src[0] : I->src[1];
552       pack_assert(I, sample_index.type == AGX_INDEX_REGISTER ||
553                         sample_index.type == AGX_INDEX_IMMEDIATE);
554       pack_assert(I, sample_index.size == AGX_SIZE_16);
555       unsigned St = (sample_index.type == AGX_INDEX_REGISTER) ? 1 : 0;
556       unsigned S = sample_index.value;
557       pack_assert(I, S < 0x100);
558 
559       uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
560                      ((uint64_t)(D & BITFIELD_MASK(8)) << 7) | (St << 22) |
561                      ((uint64_t)(I->format) << 24) |
562                      ((uint64_t)(I->pixel_offset & BITFIELD_MASK(7)) << 28) |
563                      (load ? (1ull << 35) : 0) | ((uint64_t)(I->mask) << 36) |
564                      ((uint64_t)(I->pixel_offset >> 7) << 40) |
565                      ((uint64_t)(S & BITFIELD_MASK(6)) << 42) |
566                      ((uint64_t)(S >> 6) << 56) | (((uint64_t)(D >> 8)) << 60);
567 
568       unsigned size = 8;
569       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
570       break;
571    }
572 
573    case AGX_OPCODE_SAMPLE_MASK: {
574       unsigned S = agx_pack_sample_mask_src(I, I->src[1]);
575       unsigned T = I->src[0].value;
576       bool Tt = I->src[0].type == AGX_INDEX_IMMEDIATE;
577       pack_assert(I, Tt || I->src[0].type == AGX_INDEX_REGISTER);
578       uint32_t raw = 0xc1 | (Tt ? BITFIELD_BIT(8) : 0) |
579                      ((T & BITFIELD_MASK(6)) << 9) | ((S & 0xff) << 16) |
580                      ((T >> 6) << 24) | ((S >> 8) << 26);
581 
582       unsigned size = 4;
583       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
584       break;
585    }
586 
587    case AGX_OPCODE_WAIT: {
588       uint64_t raw =
589          agx_opcodes_info[I->op].encoding.exact | (I->scoreboard << 8);
590 
591       unsigned size = 2;
592       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
593       break;
594    }
595 
596    case AGX_OPCODE_ITER:
597    case AGX_OPCODE_ITERPROJ:
598    case AGX_OPCODE_LDCF: {
599       bool flat = (I->op == AGX_OPCODE_LDCF);
600       bool perspective = (I->op == AGX_OPCODE_ITERPROJ);
601       unsigned D = agx_pack_alu_dst(I, I->dest[0]);
602       unsigned channels = (I->channels & 0x3);
603 
604       agx_index src_I = I->src[0];
605       pack_assert(I, src_I.type == AGX_INDEX_IMMEDIATE);
606 
607       unsigned cf_I = src_I.value;
608       unsigned cf_J = 0;
609 
610       if (perspective) {
611          agx_index src_J = I->src[1];
612          pack_assert(I, src_J.type == AGX_INDEX_IMMEDIATE);
613          cf_J = src_J.value;
614       }
615 
616       pack_assert(I, cf_I < 0x100);
617       pack_assert(I, cf_J < 0x100);
618 
619       enum agx_interpolation interp = I->interpolation;
620       agx_index sample_index = flat ? agx_null() : I->src[perspective ? 2 : 1];
621 
622       /* Fix up the interpolation enum to distinguish the sample index source */
623       if (interp == AGX_INTERPOLATION_SAMPLE) {
624          if (sample_index.type == AGX_INDEX_REGISTER)
625             interp = AGX_INTERPOLATION_SAMPLE_REGISTER;
626          else
627             pack_assert(I, sample_index.type == AGX_INDEX_IMMEDIATE);
628       } else {
629          sample_index = agx_zero();
630       }
631 
632       bool kill = false;    // TODO: optimize
633       bool forward = false; // TODO: optimize
634 
635       uint64_t raw =
636          0x21 | (flat ? (1 << 7) : 0) | (perspective ? (1 << 6) : 0) |
637          ((D & 0xFF) << 7) | (1ull << 15) | /* XXX */
638          ((cf_I & BITFIELD_MASK(6)) << 16) | ((cf_J & BITFIELD_MASK(6)) << 24) |
639          (((uint64_t)channels) << 30) | (((uint64_t)sample_index.value) << 32) |
640          (forward ? (1ull << 46) : 0) | (((uint64_t)interp) << 48) |
641          (kill ? (1ull << 52) : 0) | (((uint64_t)(D >> 8)) << 56) |
642          ((uint64_t)(cf_I >> 6) << 58) | ((uint64_t)(cf_J >> 6) << 60);
643 
644       unsigned size = 8;
645       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
646       break;
647    }
648 
649    case AGX_OPCODE_ST_VARY: {
650       agx_index index_src = I->src[0];
651       agx_index value = I->src[1];
652 
653       pack_assert(I, index_src.type == AGX_INDEX_IMMEDIATE);
654       pack_assert(I, index_src.value < BITFIELD_MASK(8));
655       pack_assert(I, value.type == AGX_INDEX_REGISTER);
656       pack_assert(I, value.size == AGX_SIZE_32);
657 
658       uint64_t raw =
659          0x11 | (I->last ? (1 << 7) : 0) | ((value.value & 0x3F) << 9) |
660          (((uint64_t)(index_src.value & 0x3F)) << 16) | (0x80 << 16) | /* XXX */
661          ((value.value >> 6) << 24) | ((index_src.value >> 6) << 26) |
662          (0x8u << 28); /* XXX */
663 
664       unsigned size = 4;
665       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
666       break;
667    }
668 
669    case AGX_OPCODE_DEVICE_LOAD:
670    case AGX_OPCODE_DEVICE_STORE:
671    case AGX_OPCODE_UNIFORM_STORE: {
672       bool is_device_store = I->op == AGX_OPCODE_DEVICE_STORE;
673       bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE;
674       bool is_store = is_device_store || is_uniform_store;
675       bool has_base = !is_uniform_store;
676 
677       /* Uniform stores internally packed as 16-bit. Fix up the format, mask,
678        * and size so we can use scalar 32-bit values in the IR and avoid
679        * special casing earlier in the compiler.
680        */
681       enum agx_format format = is_uniform_store ? AGX_FORMAT_I16 : I->format;
682       agx_index reg = is_store ? I->src[0] : I->dest[0];
683       unsigned mask = I->mask;
684 
685       if (is_uniform_store) {
686          mask = BITFIELD_MASK(agx_size_align_16(reg.size));
687          reg.size = AGX_SIZE_16;
688       }
689 
690       unsigned offset_src = (has_base ? 1 : 0) + (is_store ? 1 : 0);
691 
692       bool Rt, At = false, Ot;
693       unsigned R = agx_pack_memory_reg(I, reg, &Rt);
694       unsigned A =
695          has_base ? agx_pack_memory_base(I, I->src[is_store ? 1 : 0], &At) : 0;
696       unsigned O = agx_pack_memory_index(I, I->src[offset_src], &Ot);
697       unsigned u1 = is_uniform_store ? 0 : 1; // XXX
698       unsigned u3 = 0;
699       unsigned u4 = is_uniform_store ? 0 : 4; // XXX
700       unsigned u5 = 0;
701       bool L = true; /* TODO: when would you want short? */
702 
703       pack_assert(I, mask != 0);
704       pack_assert(I, format <= 0x10);
705 
706       uint64_t raw =
707          agx_opcodes_info[I->op].encoding.exact |
708          ((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) |
709          ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
710          (Ot ? (1 << 24) : 0) | (I->src[offset_src].abs ? (1 << 25) : 0) |
711          (is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) |
712          (u3 << 28) | (I->scoreboard << 30) |
713          (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
714          (((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
715          (((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
716          (((uint64_t)I->shift) << 42) | (((uint64_t)u4) << 44) |
717          (L ? (1ull << 47) : 0) | (((uint64_t)(format >> 3)) << 48) |
718          (((uint64_t)Rt) << 49) | (((uint64_t)u5) << 50) |
719          (((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
720 
721       unsigned size = L ? 8 : 6;
722       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
723       break;
724    }
725 
726    case AGX_OPCODE_LOCAL_LOAD:
727    case AGX_OPCODE_LOCAL_STORE: {
728       bool is_load = I->op == AGX_OPCODE_LOCAL_LOAD;
729       bool L = true; /* TODO: when would you want short? */
730       unsigned At;
731       bool Rt, Ot;
732 
733       unsigned R =
734          agx_pack_memory_reg(I, is_load ? I->dest[0] : I->src[0], &Rt);
735       unsigned A = agx_pack_local_base(I, is_load ? I->src[0] : I->src[1], &At);
736       unsigned O =
737          agx_pack_local_index(I, is_load ? I->src[1] : I->src[2], &Ot);
738 
739       uint64_t raw =
740          agx_opcodes_info[I->op].encoding.exact | (Rt ? BITFIELD64_BIT(8) : 0) |
741          ((R & BITFIELD_MASK(6)) << 9) | (L ? BITFIELD64_BIT(15) : 0) |
742          ((A & BITFIELD_MASK(6)) << 16) | (At << 22) | (I->format << 24) |
743          ((O & BITFIELD64_MASK(6)) << 28) | (Ot ? BITFIELD64_BIT(34) : 0) |
744          (((uint64_t)I->mask) << 36) | (((uint64_t)(O >> 6)) << 48) |
745          (((uint64_t)(A >> 6)) << 58) | (((uint64_t)(R >> 6)) << 60);
746 
747       unsigned size = L ? 8 : 6;
748       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
749       break;
750    }
751 
752    case AGX_OPCODE_ATOMIC: {
753       bool At, Ot, Rt;
754       unsigned A = agx_pack_memory_base(I, I->src[1], &At);
755       unsigned O = agx_pack_memory_index(I, I->src[2], &Ot);
756       unsigned R = agx_pack_atomic_dest(I, I->dest[0], &Rt);
757       unsigned S = agx_pack_atomic_source(I, I->src[0]);
758 
759       uint64_t raw =
760          agx_opcodes_info[I->op].encoding.exact |
761          (((uint64_t)I->atomic_opc) << 6) | ((R & BITFIELD_MASK(6)) << 10) |
762          ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
763          (Ot ? (1 << 24) : 0) | (I->src[2].abs ? (1 << 25) : 0) | (At << 27) |
764          (I->scoreboard << 30) |
765          (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
766          (((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
767          (((uint64_t)(R >> 6)) << 40) |
768          (needs_g13x_coherency ? BITFIELD64_BIT(45) : 0) |
769          (Rt ? BITFIELD64_BIT(47) : 0) | (((uint64_t)S) << 48) |
770          (((uint64_t)(O >> 8)) << 56);
771 
772       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
773       break;
774    }
775 
776    case AGX_OPCODE_LOCAL_ATOMIC: {
777       bool L = true; /* TODO: Don't force */
778 
779       unsigned At;
780       bool Rt = false, Ot;
781 
782       bool Ra = I->dest[0].type != AGX_INDEX_NULL;
783       unsigned R = Ra ? agx_pack_memory_reg(I, I->dest[0], &Rt) : 0;
784       unsigned S = agx_pack_atomic_source(I, I->src[0]);
785       unsigned A = agx_pack_local_base(I, I->src[1], &At);
786       unsigned O = agx_pack_local_index(I, I->src[2], &Ot);
787 
788       uint64_t raw =
789          agx_opcodes_info[I->op].encoding.exact | (Rt ? BITFIELD64_BIT(8) : 0) |
790          ((R & BITFIELD_MASK(6)) << 9) | (L ? BITFIELD64_BIT(15) : 0) |
791          ((A & BITFIELD_MASK(6)) << 16) | (At << 22) |
792          (((uint64_t)I->atomic_opc) << 24) | ((O & BITFIELD64_MASK(6)) << 28) |
793          (Ot ? BITFIELD64_BIT(34) : 0) | (Ra ? BITFIELD64_BIT(38) : 0) |
794          (((uint64_t)(O >> 6)) << 48) | (((uint64_t)(A >> 6)) << 58) |
795          (((uint64_t)(R >> 6)) << 60);
796 
797       uint64_t raw2 = S;
798 
799       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
800       memcpy(util_dynarray_grow_bytes(emission, 1, 2), &raw2, 2);
801       break;
802    }
803 
804    case AGX_OPCODE_TEXTURE_LOAD:
805    case AGX_OPCODE_IMAGE_LOAD:
806    case AGX_OPCODE_TEXTURE_SAMPLE: {
807       pack_assert(I, I->mask != 0);
808       pack_assert(I, I->format <= 0x10);
809 
810       bool Rt, Ct, St, Cs;
811       unsigned Tt;
812       unsigned U;
813       enum agx_lod_mode lod_mode = I->lod_mode;
814 
815       unsigned R = agx_pack_memory_reg(I, I->dest[0], &Rt);
816       unsigned C = agx_pack_sample_coords(I, I->src[0], &Ct, &Cs);
817       unsigned T = agx_pack_texture(I, I->src[2], I->src[3], &U, &Tt);
818       unsigned S = agx_pack_sampler(I, I->src[4], &St);
819       unsigned O = agx_pack_sample_compare_offset(I, I->src[5]);
820       unsigned D = agx_pack_lod(I, I->src[1], &lod_mode);
821 
822       unsigned q1 = I->shadow;
823       unsigned q2 = I->query_lod ? 2 : 0;
824       unsigned q3 = 12;  // XXX
825       unsigned kill = 0; // helper invocation kill bit
826 
827       /* Set bit 43 for image loads. This seems to makes sure that image loads
828        * get the value written by the latest image store, not some other image
829        * store that was already in flight, fixing
830        *
831        *    KHR-GLES31.core.shader_image_load_store.basic-glsl-misc-fs
832        *
833        * Apple seems to set this bit unconditionally for read/write image loads
834        * and never for readonly image loads. Some sort of cache control.
835        */
836       if (I->op == AGX_OPCODE_IMAGE_LOAD)
837          q3 |= 1;
838 
839       uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) |
840                         ((I->dim >> 3) << 7) | ((R >> 6) << 8) |
841                         ((C >> 6) << 10) | ((D >> 6) << 12) | ((T >> 6) << 14) |
842                         ((O & BITFIELD_MASK(6)) << 16) | (I->gather << 23) |
843                         (I->offset << 27) | ((S >> 6) << 28) | ((O >> 6) << 30);
844 
845       bool L = (extend != 0);
846 
847       uint64_t raw =
848          0x31 | ((I->op != AGX_OPCODE_TEXTURE_SAMPLE) ? (1 << 6) : 0) |
849          (Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
850          (L ? (1 << 15) : 0) | ((C & BITFIELD_MASK(6)) << 16) |
851          (Ct ? (1 << 22) : 0) | (q1 << 23) | ((D & BITFIELD_MASK(6)) << 24) |
852          (q2 << 30) | (((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
853          (((uint64_t)Tt) << 38) |
854          (((uint64_t)(I->dim & BITFIELD_MASK(3))) << 40) |
855          (((uint64_t)q3) << 43) | (((uint64_t)I->mask) << 48) |
856          (((uint64_t)lod_mode) << 52) |
857          (((uint64_t)(S & BITFIELD_MASK(6))) << 56) | (((uint64_t)St) << 62) |
858          (((uint64_t)I->scoreboard) << 63);
859 
860       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
861       if (L)
862          memcpy(util_dynarray_grow_bytes(emission, 1, 4), &extend, 4);
863 
864       break;
865    }
866 
867    case AGX_OPCODE_IMAGE_WRITE: {
868       bool Ct, Dt, Rt, Cs;
869       unsigned Tt;
870       unsigned U;
871 
872       unsigned R = agx_pack_pbe_source(I, I->src[0], &Rt);
873       unsigned C = agx_pack_sample_coords(I, I->src[1], &Ct, &Cs);
874       unsigned D = agx_pack_pbe_lod(I, I->src[2], &Dt);
875       unsigned T = agx_pack_texture(I, I->src[3], I->src[4], &U, &Tt);
876       bool rtz = false;
877 
878       pack_assert(I, U < (1 << 5));
879       pack_assert(I, D < (1 << 8));
880       pack_assert(I, R < (1 << 8));
881       pack_assert(I, C < (1 << 8));
882       pack_assert(I, T < (1 << 8));
883       pack_assert(I, Tt < (1 << 2));
884 
885       uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
886                      (Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
887                      ((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
888                      ((D & BITFIELD_MASK(6)) << 24) | (Dt ? (1u << 31) : 0) |
889                      (((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
890                      (((uint64_t)Tt) << 38) |
891                      (((uint64_t)I->dim & BITFIELD_MASK(3)) << 40) |
892                      (Cs ? (1ull << 47) : 0) | (((uint64_t)U) << 48) |
893                      (rtz ? (1ull << 53) : 0) |
894                      ((I->dim & BITFIELD_BIT(4)) ? (1ull << 55) : 0) |
895                      (((uint64_t)R >> 6) << 56) | (((uint64_t)C >> 6) << 58) |
896                      (((uint64_t)D >> 6) << 60) | (((uint64_t)T >> 6) << 62);
897 
898       if (raw >> 48) {
899          raw |= BITFIELD_BIT(15);
900          memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
901       } else {
902          memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
903       }
904 
905       break;
906    }
907 
908    case AGX_OPCODE_BLOCK_IMAGE_STORE: {
909       enum agx_format F = I->format;
910       pack_assert(I, F < 0x10);
911 
912       unsigned Tt = 0;
913       pack_assert(I, Tt < 0x4);
914 
915       UNUSED unsigned U;
916       unsigned T = agx_pack_texture(I, agx_zero(), I->src[0], &U, &Tt);
917       pack_assert(I, T < 0x100);
918 
919       bool Cs = false;
920       bool Ct = I->src[2].discard;
921       unsigned C = I->src[2].value;
922 
923       agx_index offset = I->src[1];
924       pack_assert(I, offset.size == AGX_SIZE_32);
925       assert_register_is_aligned(I, offset);
926       unsigned R = offset.value;
927 
928       bool unk1 = true;
929       unsigned unk3 = 1;
930 
931       uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
932                        (1 << 15) /* we always set length bit for now */ |
933                        ((F & 1) << 8) | ((R & BITFIELD_MASK(6)) << 9) |
934                        ((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
935                        (unk1 ? (1u << 31) : 0);
936 
937       uint32_t word1 = (T & BITFIELD_MASK(6)) | (Tt << 2) |
938                        ((I->dim & BITFIELD_MASK(3)) << 8) | (9 << 11) |
939                        (Cs ? (1 << 15) : 0) |
940                        ((I->dim & BITFIELD_BIT(3)) ? (1u << 23) : 0) |
941                        ((R >> 6) << 24) | ((C >> 6) << 26);
942 
943       uint32_t word2 = (F >> 1) | (unk3 ? (1 << 3) : 0) | ((T >> 6) << 14);
944 
945       memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
946       memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word1, 4);
947       memcpy(util_dynarray_grow_bytes(emission, 1, 2), &word2, 2);
948       break;
949    }
950 
951    case AGX_OPCODE_ZS_EMIT: {
952       agx_index S = I->src[0];
953       if (S.type == AGX_INDEX_IMMEDIATE)
954          pack_assert(I, S.value < BITFIELD_BIT(8));
955       else
956          assert_register_is_aligned(I, S);
957 
958       agx_index T = I->src[1];
959       assert_register_is_aligned(I, T);
960 
961       pack_assert(I, I->zs >= 1 && I->zs <= 3);
962 
963       uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
964                        ((S.type == AGX_INDEX_IMMEDIATE) ? (1 << 8) : 0) |
965                        ((S.value & BITFIELD_MASK(6)) << 9) |
966                        ((T.value & BITFIELD_MASK(6)) << 16) |
967                        ((T.value >> 6) << 26) | ((S.value >> 6) << 24) |
968                        (I->zs << 29);
969 
970       memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
971       break;
972    }
973 
974    case AGX_OPCODE_JMP_EXEC_ANY:
975    case AGX_OPCODE_JMP_EXEC_NONE:
976    case AGX_OPCODE_JMP_EXEC_NONE_AFTER: {
977       /* We don't implement indirect branches */
978       pack_assert(I, I->target != NULL);
979 
980       /* We'll fix the offset later. */
981       struct agx_branch_fixup fixup = {
982          .block = I->target,
983          .offset = emission->size,
984          .skip_to_end = I->op == AGX_OPCODE_JMP_EXEC_NONE_AFTER,
985       };
986 
987       util_dynarray_append(fixups, struct agx_branch_fixup, fixup);
988 
989       /* The rest of the instruction is fixed */
990       struct agx_opcode_info info = agx_opcodes_info[I->op];
991       uint64_t raw = info.encoding.exact;
992       memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
993       break;
994    }
995 
996    case AGX_OPCODE_DOORBELL: {
997       pack_assert(I, I->imm < BITFIELD_MASK(8));
998       struct agx_opcode_info info = agx_opcodes_info[I->op];
999       uint64_t raw = info.encoding.exact | (I->imm << 40);
1000       memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
1001       break;
1002    }
1003 
1004    case AGX_OPCODE_STACK_UNMAP:
1005    case AGX_OPCODE_STACK_MAP: {
1006       agx_index value = I->op == AGX_OPCODE_STACK_MAP ? I->src[0] : I->dest[0];
1007 
1008       pack_assert(I, value.type == AGX_INDEX_REGISTER);
1009       pack_assert(I, value.size == AGX_SIZE_32);
1010       pack_assert(I, I->imm < BITFIELD_MASK(16));
1011 
1012       unsigned q1 = 0;  // XXX
1013       unsigned q2 = 0;  // XXX
1014       unsigned q3 = 0;  // XXX
1015       unsigned q4 = 16; // XXX
1016       unsigned q5 = 16; // XXX
1017 
1018       struct agx_opcode_info info = agx_opcodes_info[I->op];
1019       uint64_t raw =
1020          info.encoding.exact | (q1 << 8) | ((value.value & 0x3F) << 10) |
1021          ((I->imm & 0xF) << 20) | (1UL << 24) | // XXX
1022          (1UL << 26) |                          // XXX
1023          (q2 << 30) | ((uint64_t)((I->imm >> 4) & 0xF) << 32) |
1024          ((uint64_t)q3 << 37) | ((uint64_t)(value.value >> 6) << 40) |
1025          ((uint64_t)q4 << 42) | (1UL << 47) | // XXX
1026          ((uint64_t)q5 << 48) | ((uint64_t)(I->imm >> 8) << 56);
1027 
1028       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
1029       break;
1030    }
1031 
1032    case AGX_OPCODE_STACK_LOAD:
1033    case AGX_OPCODE_STACK_STORE: {
1034       enum agx_format format = I->format;
1035       unsigned mask = I->mask;
1036 
1037       bool is_load = I->op == AGX_OPCODE_STACK_LOAD;
1038       bool L = true; /* TODO: when would you want short? */
1039 
1040       pack_assert(I, mask != 0);
1041       pack_assert(I, format <= 0x10);
1042 
1043       bool Rt, Ot;
1044       unsigned R =
1045          agx_pack_memory_reg(I, is_load ? I->dest[0] : I->src[0], &Rt);
1046       unsigned O =
1047          agx_pack_memory_index(I, is_load ? I->src[0] : I->src[1], &Ot);
1048 
1049       unsigned i1 = 1; // XXX
1050       unsigned i2 = 0; // XXX
1051       unsigned i5 = 4; // XXX
1052 
1053       uint64_t raw =
1054          agx_opcodes_info[I->op].encoding.exact |
1055          ((format & BITFIELD_MASK(2)) << 8) | ((R & BITFIELD_MASK(6)) << 10) |
1056          ((O & BITFIELD_MASK(4)) << 20) | (Ot ? (1 << 24) : 0) |
1057          ((uint64_t)i1 << 26) | ((uint64_t)I->scoreboard << 30) |
1058          (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
1059          ((uint64_t)i2 << 36) |
1060          (((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
1061          ((uint64_t)i5 << 44) | (L ? (1UL << 47) : 0) |
1062          (((uint64_t)(format >> 2)) << 50) | (((uint64_t)Rt) << 49) |
1063          (((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
1064 
1065       unsigned size = L ? 8 : 6;
1066       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
1067       break;
1068    }
1069    case AGX_OPCODE_STACK_ADJUST: {
1070       struct agx_opcode_info info = agx_opcodes_info[I->op];
1071 
1072       unsigned i0 = 0; // XXX
1073       unsigned i1 = 1; // XXX
1074       unsigned i2 = 2; // XXX
1075       unsigned i3 = 0; // XXX
1076       unsigned i4 = 0; // XXX
1077 
1078       uint64_t raw =
1079          info.encoding.exact | ((uint64_t)i0 << 8) | ((uint64_t)i1 << 26) |
1080          ((uint64_t)i2 << 36) | ((uint64_t)i3 << 44) | ((uint64_t)i4 << 50) |
1081          ((I->stack_size & 0xF) << 20) |
1082          ((uint64_t)((I->stack_size >> 4) & 0xF) << 32) | (1UL << 47) | // XXX
1083          ((uint64_t)(I->stack_size >> 8) << 56);
1084 
1085       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
1086       break;
1087    }
1088 
1089    default:
1090       agx_pack_alu(emission, I);
1091       return;
1092    }
1093 }
1094 
1095 /* Relative branches may be emitted before their targets, so we patch the
1096  * binary to fix up the branch offsets after the main emit */
1097 
1098 static void
agx_fixup_branch(struct util_dynarray * emission,struct agx_branch_fixup fix)1099 agx_fixup_branch(struct util_dynarray *emission, struct agx_branch_fixup fix)
1100 {
1101    /* Branch offset is 2 bytes into the jump instruction */
1102    uint8_t *location = ((uint8_t *)emission->data) + fix.offset + 2;
1103 
1104    off_t target = fix.skip_to_end ? fix.block->last_offset : fix.block->offset;
1105 
1106    /* Offsets are relative to the jump instruction */
1107    int32_t patch = (int32_t)target - (int32_t)fix.offset;
1108 
1109    /* Patch the binary */
1110    memcpy(location, &patch, sizeof(patch));
1111 }
1112 
1113 void
agx_pack_binary(agx_context * ctx,struct util_dynarray * emission)1114 agx_pack_binary(agx_context *ctx, struct util_dynarray *emission)
1115 {
1116    struct util_dynarray fixups;
1117    util_dynarray_init(&fixups, ctx);
1118 
1119    agx_foreach_block(ctx, block) {
1120       /* Relative to the start of the binary, the block begins at the current
1121        * number of bytes emitted */
1122       block->offset = emission->size;
1123 
1124       agx_foreach_instr_in_block(block, ins) {
1125          block->last_offset = emission->size;
1126          agx_pack_instr(emission, &fixups, ins, ctx->key->needs_g13x_coherency);
1127       }
1128    }
1129 
1130    util_dynarray_foreach(&fixups, struct agx_branch_fixup, fixup)
1131       agx_fixup_branch(emission, *fixup);
1132 
1133    /* Dougall calls the instruction in this footer "trap". Match the blob. */
1134    for (unsigned i = 0; i < 8; ++i) {
1135       uint16_t trap = agx_opcodes_info[AGX_OPCODE_TRAP].encoding.exact;
1136       util_dynarray_append(emission, uint16_t, trap);
1137    }
1138 
1139    util_dynarray_fini(&fixups);
1140 }
1141