• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "agx_compiler.h"
25 
26 /* Binary patches needed for branch offsets */
27 struct agx_branch_fixup {
28    /* Offset into the binary to patch */
29    off_t offset;
30 
31    /* Value to patch with will be block->offset */
32    agx_block *block;
33 };
34 
35 /* Texturing has its own operands */
36 static unsigned
agx_pack_sample_coords(agx_index index,bool * flag)37 agx_pack_sample_coords(agx_index index, bool *flag)
38 {
39    /* TODO: how to encode 16-bit coords? */
40    assert(index.size == AGX_SIZE_32);
41    assert(index.value < 0x100);
42 
43    *flag = index.discard;
44    return index.value;
45 }
46 
47 static unsigned
agx_pack_texture(agx_index index,unsigned * flag)48 agx_pack_texture(agx_index index, unsigned *flag)
49 {
50    /* TODO: indirection */
51    assert(index.type == AGX_INDEX_IMMEDIATE);
52    *flag = 0;
53    return index.value;
54 }
55 
56 static unsigned
agx_pack_sampler(agx_index index,bool * flag)57 agx_pack_sampler(agx_index index, bool *flag)
58 {
59    /* TODO: indirection */
60    assert(index.type == AGX_INDEX_IMMEDIATE);
61    *flag = 0;
62    return index.value;
63 }
64 
65 static unsigned
agx_pack_sample_offset(agx_index index,bool * flag)66 agx_pack_sample_offset(agx_index index, bool *flag)
67 {
68    /* TODO: offsets */
69    assert(index.type == AGX_INDEX_NULL);
70    *flag = 0;
71    return 0;
72 }
73 
74 static unsigned
agx_pack_lod(agx_index index)75 agx_pack_lod(agx_index index)
76 {
77    /* Immediate zero */
78    if (index.type == AGX_INDEX_IMMEDIATE && index.value == 0)
79       return 0;
80 
81    /* Otherwise must be a 16-bit float immediate */
82    assert(index.type == AGX_INDEX_REGISTER);
83    assert(index.size == AGX_SIZE_16);
84    assert(index.value < 0x100);
85 
86    return index.value;
87 }
88 
89 /* Load/stores have their own operands */
90 
91 static unsigned
agx_pack_memory_reg(agx_index index,bool * flag)92 agx_pack_memory_reg(agx_index index, bool *flag)
93 {
94    assert(index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32);
95    assert(index.size == AGX_SIZE_16 || (index.value & 1) == 0);
96    assert(index.value < 0x100);
97 
98    *flag = (index.size == AGX_SIZE_32);
99    return index.value;
100 }
101 
102 static unsigned
agx_pack_memory_base(agx_index index,bool * flag)103 agx_pack_memory_base(agx_index index, bool *flag)
104 {
105    assert(index.size == AGX_SIZE_64);
106    assert((index.value & 1) == 0);
107 
108    if (index.type == AGX_INDEX_UNIFORM) {
109       assert(index.value < 0x200);
110       *flag = 1;
111       return index.value;
112    } else {
113       assert(index.value < 0x100);
114       *flag = 0;
115       return index.value;
116    }
117 }
118 
119 static unsigned
agx_pack_memory_index(agx_index index,bool * flag)120 agx_pack_memory_index(agx_index index, bool *flag)
121 {
122    if (index.type == AGX_INDEX_IMMEDIATE) {
123       assert(index.value < 0x10000);
124       *flag = 1;
125 
126       return index.value;
127    } else {
128       assert(index.type == AGX_INDEX_REGISTER);
129       assert((index.value & 1) == 0);
130       assert(index.value < 0x100);
131 
132       *flag = 0;
133       return index.value;
134    }
135 }
136 
137 /* ALU goes through a common path */
138 
139 static unsigned
agx_pack_alu_dst(agx_index dest)140 agx_pack_alu_dst(agx_index dest)
141 {
142    assert(dest.type == AGX_INDEX_REGISTER);
143    unsigned reg = dest.value;
144    enum agx_size size = dest.size;
145    assert(reg < 0x100);
146 
147    /* RA invariant: alignment of half-reg */
148    if (size >= AGX_SIZE_32)
149       assert((reg & 1) == 0);
150 
151    return
152       (dest.cache ? (1 << 0) : 0) |
153       ((size >= AGX_SIZE_32) ? (1 << 1) : 0) |
154       ((size == AGX_SIZE_64) ? (1 << 2) : 0) |
155       ((reg << 2));
156 }
157 
158 static unsigned
agx_pack_alu_src(agx_index src)159 agx_pack_alu_src(agx_index src)
160 {
161    unsigned value = src.value;
162    enum agx_size size = src.size;
163 
164    if (src.type == AGX_INDEX_IMMEDIATE) {
165       /* Flags 0 for an 8-bit immediate */
166       assert(value < 0x100);
167 
168       return
169          (value & BITFIELD_MASK(6)) |
170          ((value >> 6) << 10);
171    } else if (src.type == AGX_INDEX_UNIFORM) {
172       assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);
173       assert(value < 0x200);
174 
175       return
176          (value & BITFIELD_MASK(6)) |
177          ((value >> 8) << 6) |
178          ((size == AGX_SIZE_32) ? (1 << 7) : 0) |
179          (0x1 << 8) |
180          (((value >> 6) & BITFIELD_MASK(2)) << 10);
181    } else {
182       assert(src.type == AGX_INDEX_REGISTER);
183       assert(!(src.cache && src.discard));
184 
185       unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
186       unsigned size_flag =
187          (size == AGX_SIZE_64) ? 0x3 :
188          (size == AGX_SIZE_32) ? 0x2 :
189          (size == AGX_SIZE_16) ? 0x0 : 0x0;
190 
191       return
192          (value & BITFIELD_MASK(6)) |
193          (hint << 6) |
194          (size_flag << 8) |
195          (((value >> 6) & BITFIELD_MASK(2)) << 10);
196    }
197 }
198 
199 static unsigned
agx_pack_cmpsel_src(agx_index src,enum agx_size dest_size)200 agx_pack_cmpsel_src(agx_index src, enum agx_size dest_size)
201 {
202    unsigned value = src.value;
203    ASSERTED enum agx_size size = src.size;
204 
205    if (src.type == AGX_INDEX_IMMEDIATE) {
206       /* Flags 0x4 for an 8-bit immediate */
207       assert(value < 0x100);
208 
209       return
210          (value & BITFIELD_MASK(6)) |
211          (0x4 << 6) |
212          ((value >> 6) << 10);
213    } else if (src.type == AGX_INDEX_UNIFORM) {
214       assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);
215       assert(size == dest_size);
216       assert(value < 0x200);
217 
218       return
219          (value & BITFIELD_MASK(6)) |
220          ((value >> 8) << 6) |
221          (0x3 << 7) |
222          (((value >> 6) & BITFIELD_MASK(2)) << 10);
223    } else {
224       assert(src.type == AGX_INDEX_REGISTER);
225       assert(!(src.cache && src.discard));
226       assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);
227       assert(size == dest_size);
228 
229       unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
230 
231       return
232          (value & BITFIELD_MASK(6)) |
233          (hint << 6) |
234          (((value >> 6) & BITFIELD_MASK(2)) << 10);
235    }
236 }
237 
238 static unsigned
agx_pack_sample_mask_src(agx_index src)239 agx_pack_sample_mask_src(agx_index src)
240 {
241    unsigned value = src.value;
242    unsigned packed_value =
243          (value & BITFIELD_MASK(6)) |
244          (((value >> 6) & BITFIELD_MASK(2)) << 10);
245 
246    if (src.type == AGX_INDEX_IMMEDIATE) {
247       assert(value < 0x100);
248       return packed_value | (1 << 7);
249    } else {
250       assert(src.type == AGX_INDEX_REGISTER);
251       assert(!(src.cache && src.discard));
252 
253       return packed_value;
254    }
255 }
256 
257 static unsigned
agx_pack_float_mod(agx_index src)258 agx_pack_float_mod(agx_index src)
259 {
260    return (src.abs ? (1 << 0) : 0)
261         | (src.neg ? (1 << 1) : 0);
262 }
263 
264 static bool
agx_all_16(agx_instr * I)265 agx_all_16(agx_instr *I)
266 {
267    agx_foreach_dest(I, d) {
268       if (!agx_is_null(I->dest[d]) && I->dest[d].size != AGX_SIZE_16)
269          return false;
270    }
271 
272    agx_foreach_src(I, s) {
273       if (!agx_is_null(I->src[s]) && I->src[s].size != AGX_SIZE_16)
274          return false;
275    }
276 
277    return true;
278 }
279 
280 /* Generic pack for ALU instructions, which are quite regular */
281 
282 static void
agx_pack_alu(struct util_dynarray * emission,agx_instr * I)283 agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
284 {
285    struct agx_opcode_info info = agx_opcodes_info[I->op];
286    bool is_16 = agx_all_16(I) && info.encoding_16.exact;
287    struct agx_encoding encoding = is_16 ?
288                                      info.encoding_16 : info.encoding;
289 
290    assert(encoding.exact && "invalid encoding");
291 
292    uint64_t raw = encoding.exact;
293    uint16_t extend = 0;
294 
295    // TODO: assert saturable
296    if (I->saturate)
297       raw |= (1 << 6);
298 
299    if (info.nr_dests) {
300       assert(info.nr_dests == 1);
301       unsigned D = agx_pack_alu_dst(I->dest[0]);
302       unsigned extend_offset = (sizeof(extend)*8) - 4;
303 
304       raw |= (D & BITFIELD_MASK(8)) << 7;
305       extend |= ((D >> 8) << extend_offset);
306    } else if (info.immediates & AGX_IMMEDIATE_NEST) {
307       raw |= (I->invert_cond << 8);
308       raw |= (I->nest << 11);
309       raw |= (I->icond << 13);
310    }
311 
312    for (unsigned s = 0; s < info.nr_srcs; ++s) {
313       bool is_cmpsel = (s >= 2) &&
314          (I->op == AGX_OPCODE_ICMPSEL || I->op == AGX_OPCODE_FCMPSEL);
315 
316       unsigned src = is_cmpsel ?
317          agx_pack_cmpsel_src(I->src[s], I->dest[0].size) :
318          agx_pack_alu_src(I->src[s]);
319 
320       unsigned src_short = (src & BITFIELD_MASK(10));
321       unsigned src_extend = (src >> 10);
322 
323       /* Size bit always zero and so omitted for 16-bit */
324       if (is_16 && !is_cmpsel)
325          assert((src_short & (1 << 9)) == 0);
326 
327       if (info.is_float) {
328          unsigned fmod = agx_pack_float_mod(I->src[s]);
329          unsigned fmod_offset = is_16 ? 9 : 10;
330          src_short |= (fmod << fmod_offset);
331       } else if (I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) {
332          bool zext = I->src[s].abs;
333          bool extends = I->src[s].size < AGX_SIZE_64;
334 
335          unsigned sxt = (extends && !zext) ? (1 << 10) : 0;
336 
337          assert(!I->src[s].neg || s == 1);
338          src_short |= sxt;
339       }
340 
341       /* Sources come at predictable offsets */
342       unsigned offset = 16 + (12 * s);
343       raw |= (((uint64_t) src_short) << offset);
344 
345       /* Destination and each source get extended in reverse order */
346       unsigned extend_offset = (sizeof(extend)*8) - ((s + 3) * 2);
347       extend |= (src_extend << extend_offset);
348    }
349 
350    if ((I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) && I->src[1].neg)
351       raw |= (1 << 27);
352 
353    if (info.immediates & AGX_IMMEDIATE_TRUTH_TABLE) {
354       raw |= (I->truth_table & 0x3) << 26;
355       raw |= (uint64_t) (I->truth_table >> 2)  << 38;
356    } else if (info.immediates & AGX_IMMEDIATE_SHIFT) {
357       raw |= (uint64_t) (I->shift & 1) << 39;
358       raw |= (uint64_t) (I->shift >> 2) << 52;
359    } else if (info.immediates & AGX_IMMEDIATE_BFI_MASK) {
360       raw |= (uint64_t) (I->mask & 0x3) << 38;
361       raw |= (uint64_t) ((I->mask >> 2) & 0x3) << 50;
362       raw |= (uint64_t) ((I->mask >> 4) & 0x1) << 63;
363    } else if (info.immediates & AGX_IMMEDIATE_SR) {
364       raw |= (uint64_t) (I->sr & 0x3F) << 16;
365       raw |= (uint64_t) (I->sr >> 6) << 26;
366    } else if (info.immediates & AGX_IMMEDIATE_WRITEOUT)
367       raw |= (uint64_t) (I->imm) << 8;
368    else if (info.immediates & AGX_IMMEDIATE_IMM)
369       raw |= (uint64_t) (I->imm) << 16;
370    else if (info.immediates & AGX_IMMEDIATE_ROUND)
371       raw |= (uint64_t) (I->imm) << 26;
372    else if (info.immediates & (AGX_IMMEDIATE_FCOND | AGX_IMMEDIATE_ICOND))
373       raw |= (uint64_t) (I->fcond) << 61;
374 
375    /* Determine length bit */
376    unsigned length = encoding.length_short;
377    unsigned short_mask = (1 << length) - 1;
378    bool length_bit = (extend || (raw & ~short_mask));
379 
380    if (encoding.extensible && length_bit) {
381       raw |= (1 << 15);
382       length += (length > 8) ? 4 : 2;
383    }
384 
385    /* Pack! */
386    if (length <= sizeof(uint64_t)) {
387       unsigned extend_offset = ((length - sizeof(extend)) * 8);
388 
389       /* XXX: This is a weird special case */
390       if (I->op == AGX_OPCODE_IADD)
391          extend_offset -= 16;
392 
393       raw |= (uint64_t) extend << extend_offset;
394       memcpy(util_dynarray_grow_bytes(emission, 1, length), &raw, length);
395    } else {
396       /* So far, >8 byte ALU is only to store the extend bits */
397       unsigned extend_offset = (((length - sizeof(extend)) * 8) - 64);
398       unsigned hi = ((uint64_t) extend) << extend_offset;
399 
400       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
401       memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi, length - 8);
402    }
403 }
404 
405 static void
agx_pack_instr(struct util_dynarray * emission,struct util_dynarray * fixups,agx_instr * I)406 agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx_instr *I)
407 {
408    switch (I->op) {
409    case AGX_OPCODE_LD_TILE:
410    case AGX_OPCODE_ST_TILE:
411    {
412       bool load = (I->op == AGX_OPCODE_LD_TILE);
413       unsigned D = agx_pack_alu_dst(load ? I->dest[0] : I->src[0]);
414       unsigned rt = 0; /* TODO */
415       unsigned mask = I->mask ?: 0xF;
416       assert(mask < 0x10);
417 
418       uint64_t raw =
419          0x09 |
420          (load ? (1 << 6) : 0) |
421          ((uint64_t) (D & BITFIELD_MASK(8)) << 7) |
422          ((uint64_t) (I->format) << 24) |
423          ((uint64_t) (rt) << 32) |
424          (load ? (1ull << 35) : 0) |
425          ((uint64_t) (mask) << 36) |
426          ((uint64_t) 0x0380FC << 40) |
427          (((uint64_t) (D >> 8)) << 60);
428 
429       unsigned size = 8;
430       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
431       break;
432    }
433 
434    case AGX_OPCODE_SAMPLE_MASK:
435    {
436       unsigned S = agx_pack_sample_mask_src(I->src[0]);
437       uint64_t raw =
438          0x7fc1 |
439          ((S & 0xff) << 16) |
440          (0x3 << 24) |
441          ((S >> 8) << 26) |
442          (0x158ull << 32);
443 
444       unsigned size = 8;
445       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
446       break;
447    }
448 
449    case AGX_OPCODE_LD_VARY:
450    case AGX_OPCODE_LD_VARY_FLAT:
451    {
452       bool flat = (I->op == AGX_OPCODE_LD_VARY_FLAT);
453       unsigned D = agx_pack_alu_dst(I->dest[0]);
454       unsigned channels = (I->channels & 0x3);
455       assert(I->mask < 0xF); /* 0 indicates full mask */
456       agx_index index_src = I->src[0];
457       assert(index_src.type == AGX_INDEX_IMMEDIATE);
458       assert(!(flat && I->perspective));
459       unsigned index = index_src.value;
460       bool kill = false; // TODO: optimize
461 
462       uint64_t raw =
463             0x21 | (flat ? (1 << 7) : 0) |
464             (I->perspective ? (1 << 6) : 0) |
465             ((D & 0xFF) << 7) |
466             (1ull << 15) | /* XXX */
467             (((uint64_t) index) << 16) |
468             (((uint64_t) channels) << 30) |
469             (!flat ? (1ull << 46) : 0) | /* XXX */
470             (kill ? (1ull << 52) : 0) | /* XXX */
471             (((uint64_t) (D >> 8)) << 56);
472 
473       unsigned size = 8;
474       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
475       break;
476    }
477 
478    case AGX_OPCODE_ST_VARY:
479    {
480       agx_index index_src = I->src[0];
481       agx_index value = I->src[1];
482 
483       assert(index_src.type == AGX_INDEX_IMMEDIATE);
484       assert(value.type == AGX_INDEX_REGISTER);
485       assert(value.size == AGX_SIZE_32);
486 
487       uint64_t raw =
488             0x11 |
489             (I->last ? (1 << 7) : 0) |
490             ((value.value & 0x3F) << 9) |
491             (((uint64_t) index_src.value) << 16) |
492             (0x80 << 16) | /* XXX */
493             ((value.value >> 6) << 24) |
494             (0x8 << 28); /* XXX */
495 
496       unsigned size = 4;
497       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
498       break;
499    }
500 
501    case AGX_OPCODE_DEVICE_LOAD:
502    {
503       assert(I->mask != 0);
504       assert(I->format <= 0x10);
505 
506       bool Rt, At, Ot;
507       unsigned R = agx_pack_memory_reg(I->dest[0], &Rt);
508       unsigned A = agx_pack_memory_base(I->src[0], &At);
509       unsigned O = agx_pack_memory_index(I->src[1], &Ot);
510       unsigned u1 = 1; // XXX
511       unsigned u3 = 0;
512       unsigned u4 = 4; // XXX
513       unsigned u5 = 0;
514       bool L = true; /* TODO: when would you want short? */
515 
516       uint64_t raw =
517             0x05 |
518             ((I->format & BITFIELD_MASK(3)) << 7) |
519             ((R & BITFIELD_MASK(6)) << 10) |
520             ((A & BITFIELD_MASK(4)) << 16) |
521             ((O & BITFIELD_MASK(4)) << 20) |
522             (Ot ? (1 << 24) : 0) |
523             (I->src[1].abs ? (1 << 25) : 0) |
524             (u1 << 26) |
525             (At << 27) |
526             (u3 << 28) |
527             (I->scoreboard << 30) |
528             (((uint64_t) ((O >> 4) & BITFIELD_MASK(4))) << 32) |
529             (((uint64_t) ((A >> 4) & BITFIELD_MASK(4))) << 36) |
530             (((uint64_t) ((R >> 6) & BITFIELD_MASK(2))) << 40) |
531             (((uint64_t) I->shift) << 42) |
532             (((uint64_t) u4) << 44) |
533             (L ? (1ull << 47) : 0) |
534             (((uint64_t) (I->format >> 3)) << 48) |
535             (((uint64_t) Rt) << 49) |
536             (((uint64_t) u5) << 50) |
537             (((uint64_t) I->mask) << 52) |
538             (((uint64_t) (O >> 8)) << 56);
539 
540       unsigned size = L ? 8 : 6;
541       memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
542       break;
543    }
544 
545    case AGX_OPCODE_TEXTURE_SAMPLE:
546    {
547       assert(I->mask != 0);
548       assert(I->format <= 0x10);
549 
550       bool Rt, Ot, Ct, St;
551       unsigned Tt;
552 
553       unsigned R = agx_pack_memory_reg(I->dest[0], &Rt);
554       unsigned C = agx_pack_sample_coords(I->src[0], &Ct);
555       unsigned T = agx_pack_texture(I->src[2], &Tt);
556       unsigned S = agx_pack_sampler(I->src[3], &St);
557       unsigned O = agx_pack_sample_offset(I->src[4], &Ot);
558       unsigned D = agx_pack_lod(I->src[1]);
559 
560       unsigned U = 0; // TODO: what is sampler ureg?
561       unsigned q1 = 0; // XXX
562       unsigned q2 = 0; // XXX
563       unsigned q3 = 12; // XXX
564       unsigned kill = 0; // helper invocation kill bit
565       unsigned q5 = 0; // XXX
566       unsigned q6 = 0; // XXX
567 
568       uint32_t extend =
569             ((U & BITFIELD_MASK(5)) << 0) |
570             (kill << 5) |
571             ((R >> 6) << 8) |
572             ((C >> 6) << 10) |
573             ((D >> 6) << 12) |
574             ((T >> 6) << 14) |
575             ((O & BITFIELD_MASK(6)) << 16) |
576             (q6 << 22) |
577             (Ot << 27) |
578             ((S >> 6) << 28) |
579             ((O >> 6) << 30);
580 
581       bool L = (extend != 0);
582       assert(I->scoreboard == 0 && "todo");
583 
584       uint64_t raw =
585             0x31 |
586             (Rt ? (1 << 8) : 0) |
587             ((R & BITFIELD_MASK(6)) << 9) |
588             (L ? (1 << 15) : 0) |
589             ((C & BITFIELD_MASK(6)) << 16) |
590             (Ct ? (1 << 22) : 0) |
591             (q1 << 23) |
592             ((D & BITFIELD_MASK(6)) << 24) |
593             (q2 << 30) |
594             (((uint64_t) (T & BITFIELD_MASK(6))) << 32) |
595             (((uint64_t) Tt) << 38) |
596             (((uint64_t) I->dim) << 40) |
597             (((uint64_t) q3) << 43) |
598             (((uint64_t) I->mask) << 48) |
599             (((uint64_t) I->lod_mode) << 52) |
600             (((uint64_t) (S & BITFIELD_MASK(6))) << 56) |
601             (((uint64_t) St) << 62) |
602             (((uint64_t) q5) << 63);
603 
604       memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
605       if (L)
606          memcpy(util_dynarray_grow_bytes(emission, 1, 4), &extend, 4);
607 
608       break;
609    }
610 
611    case AGX_OPCODE_JMP_EXEC_ANY:
612    case AGX_OPCODE_JMP_EXEC_NONE:
613    {
614       /* We don't implement indirect branches */
615       assert(I->target != NULL);
616 
617       /* We'll fix the offset later. */
618       struct agx_branch_fixup fixup = {
619          .block = I->target,
620          .offset = emission->size
621       };
622 
623       util_dynarray_append(fixups, struct agx_branch_fixup, fixup);
624 
625       /* The rest of the instruction is fixed */
626       struct agx_opcode_info info = agx_opcodes_info[I->op];
627       uint64_t raw = info.encoding.exact;
628       memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
629       break;
630    }
631 
632    default:
633       agx_pack_alu(emission, I);
634       return;
635    }
636 }
637 
638 /* Relative branches may be emitted before their targets, so we patch the
639  * binary to fix up the branch offsets after the main emit */
640 
641 static void
agx_fixup_branch(struct util_dynarray * emission,struct agx_branch_fixup fix)642 agx_fixup_branch(struct util_dynarray *emission, struct agx_branch_fixup fix)
643 {
644    /* Branch offset is 2 bytes into the jump instruction */
645    uint8_t *location = ((uint8_t *) emission->data) + fix.offset + 2;
646 
647    /* Offsets are relative to the jump instruction */
648    int32_t patch = (int32_t) fix.block->offset - (int32_t) fix.offset;
649 
650    /* Patch the binary */
651    memcpy(location, &patch, sizeof(patch));
652 }
653 
654 void
agx_pack_binary(agx_context * ctx,struct util_dynarray * emission)655 agx_pack_binary(agx_context *ctx, struct util_dynarray *emission)
656 {
657    struct util_dynarray fixups;
658    util_dynarray_init(&fixups, ctx);
659 
660    agx_foreach_block(ctx, block) {
661       /* Relative to the start of the binary, the block begins at the current
662        * number of bytes emitted */
663       block->offset = emission->size;
664 
665       agx_foreach_instr_in_block(block, ins) {
666          agx_pack_instr(emission, &fixups, ins);
667       }
668    }
669 
670    util_dynarray_foreach(&fixups, struct agx_branch_fixup, fixup)
671       agx_fixup_branch(emission, *fixup);
672 
673    util_dynarray_fini(&fixups);
674 }
675