1 /*
2 * Copyright 2021 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "agx_compiler.h"
7 #include "agx_opcodes.h"
8
9 /* Binary patches needed for branch offsets */
10 struct agx_branch_fixup {
11 /* Offset into the binary to patch */
12 off_t offset;
13
14 /* Value to patch with will be block->offset */
15 agx_block *block;
16
17 /* If true, skips to the last instruction of the target block */
18 bool skip_to_end;
19 };
20
21 static void
pack_assert_internal(const agx_instr * I,bool condition,const char * msg)22 pack_assert_internal(const agx_instr *I, bool condition, const char *msg)
23 {
24 if (!condition) {
25 fprintf(stderr, "Packing assertion failed for instruction:\n\n");
26 agx_print_instr(I, stderr);
27 fprintf(stderr, "\n%s\n", msg);
28 abort();
29 }
30 }
31
32 #define pack_assert_msg(I, cond, msg) \
33 pack_assert_internal(I, cond, msg ": " #cond)
34
35 #define pack_assert(I, cond) pack_assert_internal(I, cond, #cond)
36
37 static void
assert_register_is_aligned(const agx_instr * I,agx_index reg)38 assert_register_is_aligned(const agx_instr *I, agx_index reg)
39 {
40 pack_assert_msg(I, reg.type == AGX_INDEX_REGISTER, "expecting a register");
41
42 switch (reg.size) {
43 case AGX_SIZE_16:
44 return;
45 case AGX_SIZE_32:
46 pack_assert_msg(I, (reg.value & 1) == 0, "unaligned reg");
47 return;
48 case AGX_SIZE_64:
49 pack_assert_msg(I, (reg.value & 3) == 0, "unaligned reg");
50 return;
51 }
52
53 unreachable("Invalid register size");
54 }
55
56 /* Texturing has its own operands */
57 static unsigned
agx_pack_sample_coords(const agx_instr * I,agx_index index,bool * flag,bool * is_16)58 agx_pack_sample_coords(const agx_instr *I, agx_index index, bool *flag,
59 bool *is_16)
60 {
61 /* TODO: Do we have a use case for 16-bit coords? */
62 pack_assert_msg(I, index.size == AGX_SIZE_32, "32-bit coordinates");
63 pack_assert_msg(I, index.value < 0x100, "coordinate register bound");
64
65 *is_16 = false;
66 *flag = index.discard;
67 return index.value;
68 }
69
70 static unsigned
agx_pack_texture(const agx_instr * I,agx_index base,agx_index index,unsigned * packed_base,unsigned * flag)71 agx_pack_texture(const agx_instr *I, agx_index base, agx_index index,
72 unsigned *packed_base, unsigned *flag)
73 {
74 if (base.type == AGX_INDEX_IMMEDIATE) {
75 pack_assert(I, base.value == 0);
76
77 /* Texture state registers */
78 *packed_base = 0;
79
80 if (index.type == AGX_INDEX_REGISTER) {
81 pack_assert(I, index.size == AGX_SIZE_16);
82 *flag = 1;
83 } else {
84 pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
85 *flag = 0;
86 }
87 } else {
88 pack_assert(I, base.type == AGX_INDEX_UNIFORM);
89 pack_assert(I, base.size == AGX_SIZE_64);
90 pack_assert(I, (base.value & 3) == 0);
91 pack_assert(I, index.size == AGX_SIZE_32);
92
93 /* Bindless */
94 *packed_base = base.value >> 2;
95 *flag = 3;
96 }
97
98 return index.value;
99 }
100
101 static unsigned
agx_pack_sampler(const agx_instr * I,agx_index index,bool * flag)102 agx_pack_sampler(const agx_instr *I, agx_index index, bool *flag)
103 {
104 if (index.type == AGX_INDEX_REGISTER) {
105 pack_assert(I, index.size == AGX_SIZE_16);
106 *flag = 1;
107 } else {
108 pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
109 *flag = 0;
110 }
111
112 return index.value;
113 }
114
115 static unsigned
agx_pack_sample_compare_offset(const agx_instr * I,agx_index index)116 agx_pack_sample_compare_offset(const agx_instr *I, agx_index index)
117 {
118 if (index.type == AGX_INDEX_NULL)
119 return 0;
120
121 pack_assert(I, index.size == AGX_SIZE_32);
122 pack_assert(I, index.value < 0x100);
123 assert_register_is_aligned(I, index);
124 return index.value;
125 }
126
127 static unsigned
agx_pack_lod(const agx_instr * I,agx_index index,unsigned * lod_mode)128 agx_pack_lod(const agx_instr *I, agx_index index, unsigned *lod_mode)
129 {
130 /* For automatic LOD, the LOD field is unused. Assert as much. */
131 if ((*lod_mode) == AGX_LOD_MODE_AUTO_LOD) {
132 pack_assert(I, index.type == AGX_INDEX_IMMEDIATE);
133 pack_assert(I, index.value == 0);
134 return 0;
135 }
136
137 if (index.type == AGX_INDEX_UNIFORM) {
138 /* Translate LOD mode from register mode to uniform mode */
139 pack_assert(I,
140 ((*lod_mode) & BITFIELD_BIT(2)) && "must start as reg mode");
141 *lod_mode = (*lod_mode) & ~BITFIELD_BIT(2);
142 pack_assert(I, index.value < 0x200);
143 } else {
144 /* Otherwise must be registers */
145 pack_assert(I, index.type == AGX_INDEX_REGISTER);
146 pack_assert(I, index.value < 0x100);
147 }
148
149 return index.value;
150 }
151
152 static unsigned
agx_pack_pbe_source(const agx_instr * I,agx_index index,bool * flag)153 agx_pack_pbe_source(const agx_instr *I, agx_index index, bool *flag)
154 {
155 pack_assert(I, index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32);
156 assert_register_is_aligned(I, index);
157
158 *flag = (index.size == AGX_SIZE_32);
159 return index.value;
160 }
161
162 static unsigned
agx_pack_pbe_lod(const agx_instr * I,agx_index index,bool * flag)163 agx_pack_pbe_lod(const agx_instr *I, agx_index index, bool *flag)
164 {
165 pack_assert(I, index.size == AGX_SIZE_16);
166
167 if (index.type == AGX_INDEX_IMMEDIATE)
168 *flag = true;
169 else if (index.type == AGX_INDEX_REGISTER)
170 *flag = false;
171 else
172 unreachable("Invalid PBE LOD type");
173
174 return index.value;
175 }
176
177 /* Load/stores have their own operands */
178
179 static unsigned
agx_pack_memory_reg(const agx_instr * I,agx_index index,bool * flag)180 agx_pack_memory_reg(const agx_instr *I, agx_index index, bool *flag)
181 {
182 assert_register_is_aligned(I, index);
183
184 *flag = (index.size >= AGX_SIZE_32);
185 return index.value;
186 }
187
188 static unsigned
agx_pack_memory_base(const agx_instr * I,agx_index index,bool * flag)189 agx_pack_memory_base(const agx_instr *I, agx_index index, bool *flag)
190 {
191 pack_assert(I, index.size == AGX_SIZE_64);
192 pack_assert(I, (index.value & 1) == 0);
193
194 /* Can't seem to access high uniforms from memory instructions */
195 pack_assert(I, index.value < 0x100);
196
197 if (index.type == AGX_INDEX_UNIFORM) {
198 *flag = 1;
199 } else {
200 pack_assert(I, index.type == AGX_INDEX_REGISTER);
201 *flag = 0;
202 }
203
204 return index.value;
205 }
206
207 static unsigned
agx_pack_memory_index(const agx_instr * I,agx_index index,bool * flag)208 agx_pack_memory_index(const agx_instr *I, agx_index index, bool *flag)
209 {
210 if (index.type == AGX_INDEX_IMMEDIATE) {
211 pack_assert(I, index.value < 0x10000);
212 *flag = 1;
213
214 return index.value;
215 } else {
216 pack_assert(I, index.type == AGX_INDEX_REGISTER);
217 pack_assert(I, index.size == AGX_SIZE_32);
218 pack_assert(I, (index.value & 1) == 0);
219 pack_assert(I, index.value < 0x100);
220
221 *flag = 0;
222 return index.value;
223 }
224 }
225
226 static uint16_t
agx_pack_local_base(const agx_instr * I,agx_index index,unsigned * flags)227 agx_pack_local_base(const agx_instr *I, agx_index index, unsigned *flags)
228 {
229 pack_assert(I, index.size == AGX_SIZE_16);
230
231 if (index.type == AGX_INDEX_IMMEDIATE) {
232 pack_assert(I, index.value == 0);
233 *flags = 2;
234 return 0;
235 } else if (index.type == AGX_INDEX_UNIFORM) {
236 *flags = 1 | ((index.value >> 8) << 1);
237 return index.value & BITFIELD_MASK(8);
238 } else {
239 assert_register_is_aligned(I, index);
240 *flags = 0;
241 return index.value;
242 }
243 }
244
245 static uint16_t
agx_pack_local_index(const agx_instr * I,agx_index index,bool * flag)246 agx_pack_local_index(const agx_instr *I, agx_index index, bool *flag)
247 {
248 pack_assert(I, index.size == AGX_SIZE_16);
249
250 if (index.type == AGX_INDEX_IMMEDIATE) {
251 pack_assert(I, index.value < 0x10000);
252 *flag = 1;
253 return index.value;
254 } else {
255 assert_register_is_aligned(I, index);
256 *flag = 0;
257 return index.value;
258 }
259 }
260
261 static unsigned
agx_pack_atomic_source(const agx_instr * I,agx_index index)262 agx_pack_atomic_source(const agx_instr *I, agx_index index)
263 {
264 pack_assert_msg(I, index.size == AGX_SIZE_32, "no 64-bit atomics yet");
265 assert_register_is_aligned(I, index);
266 return index.value;
267 }
268
269 static unsigned
agx_pack_atomic_dest(const agx_instr * I,agx_index index,bool * flag)270 agx_pack_atomic_dest(const agx_instr *I, agx_index index, bool *flag)
271 {
272 /* Atomic destinstions are optional (e.g. for update with no return) */
273 if (index.type == AGX_INDEX_NULL) {
274 *flag = 0;
275 return 0;
276 }
277
278 /* But are otherwise registers */
279 pack_assert_msg(I, index.size == AGX_SIZE_32, "no 64-bit atomics yet");
280 assert_register_is_aligned(I, index);
281 *flag = 1;
282 return index.value;
283 }
284
285 /* ALU goes through a common path */
286
287 static unsigned
agx_pack_alu_dst(const agx_instr * I,agx_index dest)288 agx_pack_alu_dst(const agx_instr *I, agx_index dest)
289 {
290 assert_register_is_aligned(I, dest);
291 unsigned reg = dest.value;
292 enum agx_size size = dest.size;
293 pack_assert(I, reg < 0x100);
294
295 return (dest.cache ? (1 << 0) : 0) | ((size >= AGX_SIZE_32) ? (1 << 1) : 0) |
296 ((size == AGX_SIZE_64) ? (1 << 2) : 0) | ((reg << 2));
297 }
298
299 static unsigned
agx_pack_alu_src(const agx_instr * I,agx_index src)300 agx_pack_alu_src(const agx_instr *I, agx_index src)
301 {
302 unsigned value = src.value;
303 enum agx_size size = src.size;
304
305 if (src.type == AGX_INDEX_IMMEDIATE) {
306 /* Flags 0 for an 8-bit immediate */
307 pack_assert(I, value < 0x100);
308
309 return (value & BITFIELD_MASK(6)) | ((value >> 6) << 10);
310 } else if (src.type == AGX_INDEX_UNIFORM) {
311 pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
312 pack_assert(I, value < AGX_NUM_UNIFORMS);
313
314 return (value & BITFIELD_MASK(6)) |
315 ((value & BITFIELD_BIT(8)) ? (1 << 6) : 0) |
316 ((size == AGX_SIZE_32) ? (1 << 7) : 0) | (0x1 << 8) |
317 (((value >> 6) & BITFIELD_MASK(2)) << 10);
318 } else {
319 assert_register_is_aligned(I, src);
320 pack_assert(I, !(src.cache && src.discard));
321
322 unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
323 unsigned size_flag = (size == AGX_SIZE_64) ? 0x3
324 : (size == AGX_SIZE_32) ? 0x2
325 : (size == AGX_SIZE_16) ? 0x0
326 : 0x0;
327
328 return (value & BITFIELD_MASK(6)) | (hint << 6) | (size_flag << 8) |
329 (((value >> 6) & BITFIELD_MASK(2)) << 10);
330 }
331 }
332
333 static unsigned
agx_pack_cmpsel_src(const agx_instr * I,agx_index src,enum agx_size dest_size)334 agx_pack_cmpsel_src(const agx_instr *I, agx_index src, enum agx_size dest_size)
335 {
336 unsigned value = src.value;
337 ASSERTED enum agx_size size = src.size;
338
339 if (src.type == AGX_INDEX_IMMEDIATE) {
340 /* Flags 0x4 for an 8-bit immediate */
341 pack_assert(I, value < 0x100);
342
343 return (value & BITFIELD_MASK(6)) | (0x4 << 6) | ((value >> 6) << 10);
344 } else if (src.type == AGX_INDEX_UNIFORM) {
345 pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
346 pack_assert(I, size == dest_size);
347 pack_assert(I, value < 0x200);
348
349 return (value & BITFIELD_MASK(6)) | ((value >> 8) << 6) | (0x3 << 7) |
350 (((value >> 6) & BITFIELD_MASK(2)) << 10);
351 } else {
352 pack_assert(I, src.type == AGX_INDEX_REGISTER);
353 pack_assert(I, !(src.cache && src.discard));
354 pack_assert(I, size == AGX_SIZE_16 || size == AGX_SIZE_32);
355 pack_assert(I, size == dest_size);
356 assert_register_is_aligned(I, src);
357
358 unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;
359
360 return (value & BITFIELD_MASK(6)) | (hint << 6) |
361 (((value >> 6) & BITFIELD_MASK(2)) << 10);
362 }
363 }
364
365 static unsigned
agx_pack_sample_mask_src(const agx_instr * I,agx_index src)366 agx_pack_sample_mask_src(const agx_instr *I, agx_index src)
367 {
368 unsigned value = src.value;
369 unsigned packed_value =
370 (value & BITFIELD_MASK(6)) | (((value >> 6) & BITFIELD_MASK(2)) << 10);
371
372 if (src.type == AGX_INDEX_IMMEDIATE) {
373 pack_assert(I, value < 0x100);
374 return packed_value | (1 << 7);
375 } else {
376 pack_assert(I, src.type == AGX_INDEX_REGISTER);
377 assert_register_is_aligned(I, src);
378 pack_assert(I, !(src.cache && src.discard));
379
380 return packed_value;
381 }
382 }
383
384 static unsigned
agx_pack_float_mod(agx_index src)385 agx_pack_float_mod(agx_index src)
386 {
387 return (src.abs ? (1 << 0) : 0) | (src.neg ? (1 << 1) : 0);
388 }
389
390 static bool
agx_all_16(agx_instr * I)391 agx_all_16(agx_instr *I)
392 {
393 agx_foreach_dest(I, d) {
394 if (!agx_is_null(I->dest[d]) && I->dest[d].size != AGX_SIZE_16)
395 return false;
396 }
397
398 agx_foreach_src(I, s) {
399 if (!agx_is_null(I->src[s]) && I->src[s].size != AGX_SIZE_16)
400 return false;
401 }
402
403 return true;
404 }
405
406 /* Generic pack for ALU instructions, which are quite regular */
407
408 static void
agx_pack_alu(struct util_dynarray * emission,agx_instr * I)409 agx_pack_alu(struct util_dynarray *emission, agx_instr *I)
410 {
411 struct agx_opcode_info info = agx_opcodes_info[I->op];
412 struct agx_encoding encoding = info.encoding;
413
414 bool is_f16 = (I->op == AGX_OPCODE_HMUL || I->op == AGX_OPCODE_HFMA ||
415 I->op == AGX_OPCODE_HADD);
416
417 pack_assert_msg(I, encoding.exact, "invalid encoding");
418
419 uint64_t raw = encoding.exact;
420 uint16_t extend = 0;
421
422 // TODO: assert saturable
423 if (I->saturate)
424 raw |= (1 << 6);
425
426 if (info.nr_dests) {
427 pack_assert(I, info.nr_dests == 1);
428 unsigned D = agx_pack_alu_dst(I, I->dest[0]);
429 unsigned extend_offset = (sizeof(extend) * 8) - 4;
430
431 raw |= (D & BITFIELD_MASK(8)) << 7;
432 extend |= ((D >> 8) << extend_offset);
433
434 if (info.immediates & AGX_IMMEDIATE_INVERT_COND) {
435 raw |= (uint64_t)(I->invert_cond) << 47;
436 }
437 } else if (info.immediates & AGX_IMMEDIATE_NEST) {
438 raw |= (I->invert_cond << 8);
439 raw |= (I->nest << 11);
440 raw |= (I->icond << 13);
441 }
442
443 for (unsigned s = 0; s < info.nr_srcs; ++s) {
444 bool is_cmpsel = (s >= 2) && (I->op == AGX_OPCODE_ICMPSEL ||
445 I->op == AGX_OPCODE_FCMPSEL);
446
447 unsigned src = is_cmpsel
448 ? agx_pack_cmpsel_src(I, I->src[s], I->dest[0].size)
449 : agx_pack_alu_src(I, I->src[s]);
450
451 unsigned src_short = (src & BITFIELD_MASK(10));
452 unsigned src_extend = (src >> 10);
453
454 /* Size bit always zero and so omitted for 16-bit */
455 if (is_f16 && !is_cmpsel)
456 pack_assert(I, (src_short & (1 << 9)) == 0);
457
458 if (info.is_float || (I->op == AGX_OPCODE_FCMPSEL && !is_cmpsel)) {
459 unsigned fmod = agx_pack_float_mod(I->src[s]);
460 unsigned fmod_offset = is_f16 ? 9 : 10;
461 src_short |= (fmod << fmod_offset);
462 } else if (I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) {
463 /* Force unsigned for immediates so uadd_sat works properly */
464 bool zext = I->src[s].abs || I->src[s].type == AGX_INDEX_IMMEDIATE;
465 bool extends = I->src[s].size < AGX_SIZE_64;
466
467 unsigned sxt = (extends && !zext) ? (1 << 10) : 0;
468
469 unsigned negate_src = (I->op == AGX_OPCODE_IMAD) ? 2 : 1;
470 pack_assert(I, !I->src[s].neg || s == negate_src);
471 src_short |= sxt;
472 }
473
474 /* Sources come at predictable offsets */
475 unsigned offset = 16 + (12 * s);
476 raw |= (((uint64_t)src_short) << offset);
477
478 /* Destination and each source get extended in reverse order */
479 unsigned extend_offset = (sizeof(extend) * 8) - ((s + 3) * 2);
480 extend |= (src_extend << extend_offset);
481 }
482
483 if ((I->op == AGX_OPCODE_IMAD && I->src[2].neg) ||
484 (I->op == AGX_OPCODE_IADD && I->src[1].neg))
485 raw |= (1 << 27);
486
487 if (info.immediates & AGX_IMMEDIATE_TRUTH_TABLE) {
488 raw |= (I->truth_table & 0x3) << 26;
489 raw |= (uint64_t)(I->truth_table >> 2) << 38;
490 } else if (info.immediates & AGX_IMMEDIATE_SHIFT) {
491 pack_assert(I, I->shift <= 4);
492 raw |= (uint64_t)(I->shift & 1) << 39;
493 raw |= (uint64_t)(I->shift >> 1) << 52;
494 } else if (info.immediates & AGX_IMMEDIATE_BFI_MASK) {
495 raw |= (uint64_t)(I->bfi_mask & 0x3) << 38;
496 raw |= (uint64_t)((I->bfi_mask >> 2) & 0x3) << 50;
497 raw |= (uint64_t)((I->bfi_mask >> 4) & 0x1) << 63;
498 } else if (info.immediates & AGX_IMMEDIATE_SIMD_OP) {
499 raw |= (uint64_t)(I->simd_op & 0x1) << 28;
500 raw |= (uint64_t)((I->simd_op >> 1) & 0x7) << 38;
501 raw |= (uint64_t)((I->simd_op >> 4) & 0x1) << 47;
502 } else if (info.immediates & AGX_IMMEDIATE_SR) {
503 raw |= (uint64_t)(I->sr & 0x3F) << 16;
504 raw |= (uint64_t)(I->sr >> 6) << 26;
505 } else if (info.immediates & AGX_IMMEDIATE_WRITEOUT)
506 raw |= (uint64_t)(I->imm) << 8;
507 else if (info.immediates & AGX_IMMEDIATE_IMM)
508 raw |= (uint64_t)(I->imm) << 16;
509 else if (info.immediates & AGX_IMMEDIATE_ROUND)
510 raw |= (uint64_t)(I->imm) << 26;
511 else if (info.immediates & (AGX_IMMEDIATE_FCOND | AGX_IMMEDIATE_ICOND))
512 raw |= (uint64_t)(I->fcond) << 61;
513
514 /* Determine length bit */
515 unsigned length = encoding.length_short;
516 if (I->op == AGX_OPCODE_MOV_IMM && I->dest[0].size == AGX_SIZE_16)
517 length -= 2;
518
519 uint64_t short_mask = BITFIELD64_MASK(8 * length);
520 bool length_bit = (extend || (raw & ~short_mask));
521
522 if (encoding.extensible && length_bit) {
523 raw |= (1 << 15);
524 length += (length > 8) ? 4 : 2;
525 }
526
527 /* Pack! */
528 if (length <= sizeof(uint64_t)) {
529 unsigned extend_offset = ((length - sizeof(extend)) * 8);
530
531 /* XXX: Encode these special cases better */
532 switch (I->op) {
533 case AGX_OPCODE_IADD:
534 case AGX_OPCODE_ICMP_BALLOT:
535 case AGX_OPCODE_ICMP_QUAD_BALLOT:
536 case AGX_OPCODE_FCMP_BALLOT:
537 case AGX_OPCODE_FCMP_QUAD_BALLOT:
538 extend_offset -= 16;
539 break;
540 default:
541 break;
542 }
543
544 raw |= (uint64_t)extend << extend_offset;
545 memcpy(util_dynarray_grow_bytes(emission, 1, length), &raw, length);
546 } else {
547 /* So far, >8 byte ALU is only to store the extend bits */
548 unsigned extend_offset = (((length - sizeof(extend)) * 8) - 64);
549 unsigned hi = ((uint64_t)extend) << extend_offset;
550
551 memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
552 memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi,
553 length - 8);
554 }
555 }
556
557 static void
agx_pack_instr(struct util_dynarray * emission,struct util_dynarray * fixups,agx_instr * I,enum u_tristate needs_g13x_coherency)558 agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
559 agx_instr *I, enum u_tristate needs_g13x_coherency)
560 {
561 switch (I->op) {
562 case AGX_OPCODE_LD_TILE:
563 case AGX_OPCODE_ST_TILE: {
564 bool load = (I->op == AGX_OPCODE_LD_TILE);
565 unsigned D = agx_pack_alu_dst(I, load ? I->dest[0] : I->src[0]);
566 pack_assert(I, I->mask < 0x10);
567 pack_assert(I, I->pixel_offset < 0x200);
568
569 agx_index sample_index = load ? I->src[0] : I->src[1];
570 agx_index coords = load ? I->src[1] : I->src[2];
571 pack_assert(I, sample_index.type == AGX_INDEX_REGISTER ||
572 sample_index.type == AGX_INDEX_IMMEDIATE);
573 pack_assert(I, sample_index.size == AGX_SIZE_16);
574 unsigned St = (sample_index.type == AGX_INDEX_REGISTER) ? 1 : 0;
575 unsigned S = sample_index.value;
576 pack_assert(I, S < 0x100);
577
578 pack_assert(I, I->explicit_coords == (coords.type == AGX_INDEX_REGISTER));
579 unsigned C = I->explicit_coords ? coords.value : 0;
580
581 uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
582 ((uint64_t)(D & BITFIELD_MASK(8)) << 7) | (St << 22) |
583 ((uint64_t)(I->format) << 24) |
584 ((uint64_t)(C & BITFIELD_MASK(6)) << 16) |
585 ((uint64_t)(I->pixel_offset & BITFIELD_MASK(7)) << 28) |
586 (load || I->explicit_coords ? (1ull << 35) : 0) |
587 ((uint64_t)(I->mask) << 36) |
588 ((uint64_t)(I->pixel_offset >> 7) << 40) |
589 ((uint64_t)(S & BITFIELD_MASK(6)) << 42) |
590 (I->explicit_coords ? (1ull << 55) : 0) |
591 ((uint64_t)(S >> 6) << 56) | ((uint64_t)(C >> 6) << 58) |
592 (((uint64_t)(D >> 8)) << 60);
593
594 unsigned size = 8;
595 memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
596 break;
597 }
598
599 case AGX_OPCODE_SAMPLE_MASK: {
600 unsigned S = agx_pack_sample_mask_src(I, I->src[1]);
601 unsigned T = I->src[0].value;
602 bool Tt = I->src[0].type == AGX_INDEX_IMMEDIATE;
603 pack_assert(I, Tt || I->src[0].type == AGX_INDEX_REGISTER);
604 uint32_t raw = 0xc1 | (Tt ? BITFIELD_BIT(8) : 0) |
605 ((T & BITFIELD_MASK(6)) << 9) | ((S & 0xff) << 16) |
606 ((T >> 6) << 24) | ((S >> 8) << 26);
607
608 unsigned size = 4;
609 memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
610 break;
611 }
612
613 case AGX_OPCODE_WAIT: {
614 uint64_t raw =
615 agx_opcodes_info[I->op].encoding.exact | (I->scoreboard << 8);
616
617 unsigned size = 2;
618 memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
619 break;
620 }
621
622 case AGX_OPCODE_ITER:
623 case AGX_OPCODE_ITERPROJ:
624 case AGX_OPCODE_LDCF: {
625 bool flat = (I->op == AGX_OPCODE_LDCF);
626 bool perspective = (I->op == AGX_OPCODE_ITERPROJ);
627 unsigned D = agx_pack_alu_dst(I, I->dest[0]);
628 unsigned channels = (I->channels & 0x3);
629
630 agx_index src_I = I->src[0];
631 pack_assert(I, src_I.type == AGX_INDEX_IMMEDIATE ||
632 src_I.type == AGX_INDEX_REGISTER);
633
634 unsigned cf_I = src_I.value;
635 unsigned cf_J = 0;
636
637 if (perspective) {
638 agx_index src_J = I->src[1];
639 pack_assert(I, src_J.type == AGX_INDEX_IMMEDIATE);
640 cf_J = src_J.value;
641 }
642
643 pack_assert(I, cf_I < 0x100);
644 pack_assert(I, cf_J < 0x100);
645
646 enum agx_interpolation interp = I->interpolation;
647 agx_index sample_index = flat ? agx_null() : I->src[perspective ? 2 : 1];
648
649 /* Fix up the interpolation enum to distinguish the sample index source */
650 if (interp == AGX_INTERPOLATION_SAMPLE) {
651 if (sample_index.type == AGX_INDEX_REGISTER)
652 interp = AGX_INTERPOLATION_SAMPLE_REGISTER;
653 else
654 pack_assert(I, sample_index.type == AGX_INDEX_IMMEDIATE);
655 } else {
656 sample_index = agx_zero();
657 }
658
659 bool kill = false; // TODO: optimize
660 bool forward = false; // TODO: optimize
661
662 uint64_t raw =
663 0x21 | (flat ? (1 << 7) : 0) | (perspective ? (1 << 6) : 0) |
664 ((D & 0xFF) << 7) | (1ull << 15) | /* XXX */
665 ((cf_I & BITFIELD_MASK(6)) << 16) |
666 ((src_I.type == AGX_INDEX_REGISTER) ? (1 << 23) : 0) |
667 ((cf_J & BITFIELD_MASK(6)) << 24) | (((uint64_t)channels) << 30) |
668 (((uint64_t)sample_index.value) << 32) | (forward ? (1ull << 46) : 0) |
669 (((uint64_t)interp) << 48) | (kill ? (1ull << 52) : 0) |
670 (((uint64_t)(D >> 8)) << 56) | ((uint64_t)(cf_I >> 6) << 58) |
671 ((uint64_t)(cf_J >> 6) << 60);
672
673 unsigned size = 8;
674 memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
675 break;
676 }
677
678 case AGX_OPCODE_ST_VARY: {
679 agx_index index_src = I->src[0];
680 agx_index value = I->src[1];
681
682 pack_assert(I, index_src.type == AGX_INDEX_IMMEDIATE ||
683 index_src.type == AGX_INDEX_REGISTER);
684 pack_assert(I, index_src.value < BITFIELD_MASK(8));
685 pack_assert(I, value.type == AGX_INDEX_REGISTER);
686 pack_assert(I, value.size == AGX_SIZE_32);
687
688 uint64_t raw = 0x11 | (I->last ? (1 << 7) : 0) |
689 ((value.value & 0x3F) << 9) |
690 (((uint64_t)(index_src.value & 0x3F)) << 16) |
691 (index_src.type == AGX_INDEX_IMMEDIATE ? (1 << 23) : 0) |
692 ((value.value >> 6) << 24) |
693 ((index_src.value >> 6) << 26) | (0x8u << 28); /* XXX */
694
695 unsigned size = 4;
696 memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
697 break;
698 }
699
700 case AGX_OPCODE_DEVICE_LOAD:
701 case AGX_OPCODE_DEVICE_STORE:
702 case AGX_OPCODE_UNIFORM_STORE: {
703 bool is_device_store = I->op == AGX_OPCODE_DEVICE_STORE;
704 bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE;
705 bool is_store = is_device_store || is_uniform_store;
706 bool has_base = !is_uniform_store;
707
708 /* Uniform stores are required to be 16-bit. The encoding that should be
709 * 32-bit annoyingly doesn't work. Fix up the format and size so we can
710 * use scalar 32-bit values in the IR and avoid special casing earlier in
711 * the compiler.
712 */
713 enum agx_format format = is_uniform_store ? AGX_FORMAT_I16 : I->format;
714 agx_index reg = is_store ? I->src[0] : I->dest[0];
715 unsigned mask = I->mask;
716
717 if (is_uniform_store && reg.size != AGX_SIZE_16) {
718 if (reg.size == AGX_SIZE_64) {
719 assert(mask == 1);
720 mask = BITFIELD_MASK(4);
721 } else {
722 assert(reg.size == AGX_SIZE_32);
723 assert(mask == 1 || mask == 3);
724 mask = BITFIELD_MASK(mask == 3 ? 4 : 2);
725 }
726
727 reg.size = AGX_SIZE_16;
728 }
729
730 unsigned offset_src = (has_base ? 1 : 0) + (is_store ? 1 : 0);
731
732 bool Rt, At = false, Ot;
733 unsigned R = agx_pack_memory_reg(I, reg, &Rt);
734 unsigned A =
735 has_base ? agx_pack_memory_base(I, I->src[is_store ? 1 : 0], &At) : 0;
736 unsigned O = agx_pack_memory_index(I, I->src[offset_src], &Ot);
737 unsigned u1 = is_uniform_store ? 0 : 1; // XXX
738 unsigned u3 = 0;
739 unsigned u4 = is_uniform_store ? 0 : I->coherent ? 7 : 4;
740 unsigned u5 = 0;
741 bool L = true; /* TODO: when would you want short? */
742
743 pack_assert(I, mask != 0);
744 pack_assert(I, format <= 0x10);
745
746 uint64_t raw =
747 agx_opcodes_info[I->op].encoding.exact |
748 ((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) |
749 ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
750 (Ot ? (1 << 24) : 0) | (I->src[offset_src].abs ? (1 << 25) : 0) |
751 (is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) |
752 (u3 << 28) | (I->scoreboard << 30) |
753 (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
754 (((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
755 (((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
756 (((uint64_t)I->shift) << 42) | (((uint64_t)u4) << 44) |
757 (L ? (1ull << 47) : 0) | (((uint64_t)(format >> 3)) << 48) |
758 (((uint64_t)Rt) << 49) | (((uint64_t)u5) << 50) |
759 (((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
760
761 unsigned size = L ? 8 : 6;
762 memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
763 break;
764 }
765
766 case AGX_OPCODE_LOCAL_LOAD:
767 case AGX_OPCODE_LOCAL_STORE: {
768 bool is_load = I->op == AGX_OPCODE_LOCAL_LOAD;
769 bool L = true; /* TODO: when would you want short? */
770 unsigned At;
771 bool Rt, Ot;
772
773 unsigned R =
774 agx_pack_memory_reg(I, is_load ? I->dest[0] : I->src[0], &Rt);
775 unsigned A = agx_pack_local_base(I, is_load ? I->src[0] : I->src[1], &At);
776 unsigned O =
777 agx_pack_local_index(I, is_load ? I->src[1] : I->src[2], &Ot);
778
779 uint64_t raw =
780 agx_opcodes_info[I->op].encoding.exact | (Rt ? BITFIELD64_BIT(8) : 0) |
781 ((R & BITFIELD_MASK(6)) << 9) | (L ? BITFIELD64_BIT(15) : 0) |
782 ((A & BITFIELD_MASK(6)) << 16) | (At << 22) | (I->format << 24) |
783 ((O & BITFIELD64_MASK(6)) << 28) | (Ot ? BITFIELD64_BIT(34) : 0) |
784 (((uint64_t)I->mask) << 36) | (((uint64_t)(O >> 6)) << 48) |
785 (((uint64_t)(A >> 6)) << 58) | (((uint64_t)(R >> 6)) << 60);
786
787 unsigned size = L ? 8 : 6;
788 memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
789 break;
790 }
791
792 case AGX_OPCODE_ATOMIC: {
793 bool At, Ot, Rt;
794 unsigned A = agx_pack_memory_base(I, I->src[1], &At);
795 unsigned O = agx_pack_memory_index(I, I->src[2], &Ot);
796 unsigned R = agx_pack_atomic_dest(I, I->dest[0], &Rt);
797 unsigned S = agx_pack_atomic_source(I, I->src[0]);
798
799 /* Due to a hardware quirk, there is a bit in the atomic instruction that
800 * differs based on the target GPU. So, if we're packing an atomic, the
801 * shader must be keyed to a particular GPU (either needs_g13x_coherency
802 * or not needs_g13x_coherency). Assert that here.
803 *
804 * needs_g13x_coherency == U_TRISTATE_UNSET is only allowed for shaders
805 * that do not use atomics and are therefore portable across devices.
806 */
807 assert(needs_g13x_coherency != U_TRISTATE_UNSET);
808
809 uint64_t raw =
810 agx_opcodes_info[I->op].encoding.exact |
811 (((uint64_t)I->atomic_opc) << 6) | ((R & BITFIELD_MASK(6)) << 10) |
812 ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
813 (Ot ? (1 << 24) : 0) | (I->src[2].abs ? (1 << 25) : 0) | (At << 27) |
814 (I->scoreboard << 30) |
815 (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
816 (((uint64_t)((A >> 4) & BITFIELD_MASK(4))) << 36) |
817 (((uint64_t)(R >> 6)) << 40) |
818 (needs_g13x_coherency == U_TRISTATE_YES ? BITFIELD64_BIT(45) : 0) |
819 (Rt ? BITFIELD64_BIT(47) : 0) | (((uint64_t)S) << 48) |
820 (((uint64_t)(O >> 8)) << 56);
821
822 memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
823 break;
824 }
825
826 case AGX_OPCODE_LOCAL_ATOMIC: {
827 bool L = true; /* TODO: Don't force */
828
829 unsigned At;
830 bool Rt = false, Ot;
831
832 bool Ra = I->dest[0].type != AGX_INDEX_NULL;
833 unsigned R = Ra ? agx_pack_memory_reg(I, I->dest[0], &Rt) : 0;
834 unsigned S = agx_pack_atomic_source(I, I->src[0]);
835 unsigned A = agx_pack_local_base(I, I->src[1], &At);
836 unsigned O = agx_pack_local_index(I, I->src[2], &Ot);
837
838 uint64_t raw =
839 agx_opcodes_info[I->op].encoding.exact | (Rt ? BITFIELD64_BIT(8) : 0) |
840 ((R & BITFIELD_MASK(6)) << 9) | (L ? BITFIELD64_BIT(15) : 0) |
841 ((A & BITFIELD_MASK(6)) << 16) | (At << 22) |
842 (((uint64_t)I->atomic_opc) << 24) | ((O & BITFIELD64_MASK(6)) << 28) |
843 (Ot ? BITFIELD64_BIT(34) : 0) | (Ra ? BITFIELD64_BIT(38) : 0) |
844 (((uint64_t)(O >> 6)) << 48) | (((uint64_t)(A >> 6)) << 58) |
845 (((uint64_t)(R >> 6)) << 60);
846
847 uint64_t raw2 = S;
848
849 memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
850 memcpy(util_dynarray_grow_bytes(emission, 1, 2), &raw2, 2);
851 break;
852 }
853
854 case AGX_OPCODE_TEXTURE_LOAD:
855 case AGX_OPCODE_IMAGE_LOAD:
856 case AGX_OPCODE_TEXTURE_SAMPLE: {
857 pack_assert(I, I->mask != 0);
858 pack_assert(I, I->format <= 0x10);
859
860 bool Rt, Ct, St, Cs;
861 unsigned Tt;
862 unsigned U;
863 enum agx_lod_mode lod_mode = I->lod_mode;
864
865 unsigned R = agx_pack_memory_reg(I, I->dest[0], &Rt);
866 unsigned C = agx_pack_sample_coords(I, I->src[0], &Ct, &Cs);
867 unsigned T = agx_pack_texture(I, I->src[2], I->src[3], &U, &Tt);
868 unsigned S = agx_pack_sampler(I, I->src[4], &St);
869 unsigned O = agx_pack_sample_compare_offset(I, I->src[5]);
870 unsigned D = agx_pack_lod(I, I->src[1], &lod_mode);
871
872 unsigned q1 = I->shadow;
873 unsigned q2 = I->query_lod ? 2 : 0;
874 unsigned q3 = 0xc; // XXX
875 unsigned kill = 0; // helper invocation kill bit
876
877 /* Set bit 43 for image loads. This seems to makes sure that image loads
878 * get the value written by the latest image store, not some other image
879 * store that was already in flight, fixing
880 *
881 * KHR-GLES31.core.shader_image_load_store.basic-glsl-misc-fs
882 *
883 * Apple seems to set this bit unconditionally for read/write image loads
884 * and never for readonly image loads. Some sort of cache control.
885 */
886 if (I->op == AGX_OPCODE_IMAGE_LOAD) {
887 q3 |= 1;
888
889 /* Cache bypass for multidie coherency */
890 if (I->coherent) {
891 q3 |= 2;
892 }
893 }
894
895 uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) |
896 ((I->dim >> 3) << 7) | ((R >> 6) << 8) |
897 ((C >> 6) << 10) | ((D >> 6) << 12) | ((T >> 6) << 14) |
898 ((O & BITFIELD_MASK(6)) << 16) | (I->gather << 23) |
899 (I->offset << 27) | ((S >> 6) << 28) | ((O >> 6) << 30);
900
901 bool L = (extend != 0);
902
903 uint64_t raw =
904 0x31 | ((I->op != AGX_OPCODE_TEXTURE_SAMPLE) ? (1 << 6) : 0) |
905 (Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
906 (L ? (1 << 15) : 0) | ((C & BITFIELD_MASK(6)) << 16) |
907 (Ct ? (1 << 22) : 0) | (q1 << 23) | ((D & BITFIELD_MASK(6)) << 24) |
908 (q2 << 30) | (((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
909 (((uint64_t)Tt) << 38) |
910 (((uint64_t)(I->dim & BITFIELD_MASK(3))) << 40) |
911 (((uint64_t)q3) << 43) | (((uint64_t)I->mask) << 48) |
912 (((uint64_t)lod_mode) << 52) |
913 (((uint64_t)(S & BITFIELD_MASK(6))) << 56) | (((uint64_t)St) << 62) |
914 (((uint64_t)I->scoreboard) << 63);
915
916 memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
917 if (L)
918 memcpy(util_dynarray_grow_bytes(emission, 1, 4), &extend, 4);
919
920 break;
921 }
922
923 case AGX_OPCODE_IMAGE_WRITE: {
924 bool Ct, Dt, Rt, Cs;
925 unsigned Tt;
926 unsigned U;
927
928 unsigned R = agx_pack_pbe_source(I, I->src[0], &Rt);
929 unsigned C = agx_pack_sample_coords(I, I->src[1], &Ct, &Cs);
930 unsigned D = agx_pack_pbe_lod(I, I->src[2], &Dt);
931 unsigned T = agx_pack_texture(I, I->src[3], I->src[4], &U, &Tt);
932 bool rtz = false;
933
934 pack_assert(I, U < (1 << 5));
935 pack_assert(I, D < (1 << 8));
936 pack_assert(I, R < (1 << 8));
937 pack_assert(I, C < (1 << 8));
938 pack_assert(I, T < (1 << 8));
939 pack_assert(I, Tt < (1 << 2));
940
941 unsigned coherency = I->coherent ? 0xf : 0x9;
942
943 uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
944 (Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
945 ((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
946 ((D & BITFIELD_MASK(6)) << 24) | (Dt ? (1u << 31) : 0) |
947 (((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
948 (((uint64_t)Tt) << 38) |
949 (((uint64_t)I->dim & BITFIELD_MASK(3)) << 40) |
950 (((uint64_t)coherency) << 43) | (Cs ? (1ull << 47) : 0) |
951 (((uint64_t)U) << 48) | (rtz ? (1ull << 53) : 0) |
952 ((I->dim & BITFIELD_BIT(4)) ? (1ull << 55) : 0) |
953 (((uint64_t)R >> 6) << 56) | (((uint64_t)C >> 6) << 58) |
954 (((uint64_t)D >> 6) << 60) | (((uint64_t)T >> 6) << 62);
955
956 if (raw >> 48) {
957 raw |= BITFIELD_BIT(15);
958 memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
959 } else {
960 memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
961 }
962
963 break;
964 }
965
966 case AGX_OPCODE_BLOCK_IMAGE_STORE: {
967 enum agx_format F = I->format;
968 pack_assert(I, F < 0x10);
969
970 unsigned Tt = 0;
971 pack_assert(I, Tt < 0x4);
972
973 unsigned U;
974 unsigned T = agx_pack_texture(I, I->src[0], I->src[1], &U, &Tt);
975 pack_assert(I, T < 0x100);
976 pack_assert(I, U < (1 << 5));
977
978 bool Cs = false;
979 bool Ct = I->src[3].discard;
980 unsigned C = I->src[3].value;
981
982 agx_index offset = I->src[2];
983 pack_assert(I, offset.size == AGX_SIZE_32);
984 assert_register_is_aligned(I, offset);
985 unsigned R = offset.value;
986
987 bool unk1 = true;
988
989 /* This bit has weird behaviour with the interaction of the texture state
990 * index and the tilebuffer offset. Probably best not to use it for now.
991 */
992 unsigned unk3 = 1;
993
994 uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
995 (1 << 15) /* we always set length bit for now */ |
996 ((F & 1) << 8) | ((R & BITFIELD_MASK(6)) << 9) |
997 ((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
998 (I->explicit_coords ? (1 << 23) : 0) |
999 (unk1 ? (1u << 31) : 0);
1000
1001 uint32_t word1 = (T & BITFIELD_MASK(6)) | (Tt << 6) |
1002 ((I->dim & BITFIELD_MASK(3)) << 8) | (9 << 11) |
1003 (Cs ? (1 << 15) : 0) | (((uint64_t)U) << 16) |
1004 ((I->dim & BITFIELD_BIT(3)) ? (1u << 23) : 0) |
1005 ((R >> 6) << 24) | ((C >> 6) << 26);
1006
1007 uint32_t word2 = (F >> 1) | (unk3 ? (1 << 3) : 0) | ((T >> 6) << 14);
1008
1009 memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
1010 memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word1, 4);
1011 memcpy(util_dynarray_grow_bytes(emission, 1, 2), &word2, 2);
1012 break;
1013 }
1014
1015 case AGX_OPCODE_ZS_EMIT: {
1016 agx_index S = I->src[0];
1017 if (S.type == AGX_INDEX_IMMEDIATE)
1018 pack_assert(I, S.value < BITFIELD_BIT(8));
1019 else
1020 assert_register_is_aligned(I, S);
1021
1022 agx_index T = I->src[1];
1023 assert_register_is_aligned(I, T);
1024
1025 pack_assert(I, I->zs >= 1 && I->zs <= 3);
1026
1027 uint32_t word0 = agx_opcodes_info[I->op].encoding.exact |
1028 ((S.type == AGX_INDEX_IMMEDIATE) ? (1 << 8) : 0) |
1029 ((S.value & BITFIELD_MASK(6)) << 9) |
1030 ((T.value & BITFIELD_MASK(6)) << 16) |
1031 ((T.value >> 6) << 26) | ((S.value >> 6) << 24) |
1032 (I->zs << 29);
1033
1034 memcpy(util_dynarray_grow_bytes(emission, 1, 4), &word0, 4);
1035 break;
1036 }
1037
1038 case AGX_OPCODE_JMP_EXEC_ANY:
1039 case AGX_OPCODE_JMP_EXEC_NONE:
1040 case AGX_OPCODE_JMP_EXEC_NONE_AFTER: {
1041 /* We don't implement indirect branches */
1042 pack_assert(I, I->target != NULL);
1043
1044 /* We'll fix the offset later. */
1045 struct agx_branch_fixup fixup = {
1046 .block = I->target,
1047 .offset = emission->size,
1048 .skip_to_end = I->op == AGX_OPCODE_JMP_EXEC_NONE_AFTER,
1049 };
1050
1051 util_dynarray_append(fixups, struct agx_branch_fixup, fixup);
1052
1053 /* The rest of the instruction is fixed */
1054 struct agx_opcode_info info = agx_opcodes_info[I->op];
1055 uint64_t raw = info.encoding.exact;
1056 memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
1057 break;
1058 }
1059
1060 case AGX_OPCODE_DOORBELL: {
1061 pack_assert(I, I->imm < BITFIELD_MASK(8));
1062 struct agx_opcode_info info = agx_opcodes_info[I->op];
1063 uint64_t raw = info.encoding.exact | (I->imm << 40);
1064 memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
1065 break;
1066 }
1067
1068 case AGX_OPCODE_STACK_UNMAP:
1069 case AGX_OPCODE_STACK_MAP: {
1070 agx_index value = I->op == AGX_OPCODE_STACK_MAP ? I->src[0] : I->dest[0];
1071
1072 pack_assert(I, value.type == AGX_INDEX_REGISTER);
1073 pack_assert(I, value.size == AGX_SIZE_32);
1074 pack_assert(I, I->imm < BITFIELD_MASK(16));
1075
1076 unsigned q1 = 0; // XXX
1077 unsigned q2 = 0; // XXX
1078 unsigned q3 = 0; // XXX
1079 unsigned q4 = 16; // XXX
1080 unsigned q5 = 16; // XXX
1081
1082 struct agx_opcode_info info = agx_opcodes_info[I->op];
1083 uint64_t raw =
1084 info.encoding.exact | (q1 << 8) | ((value.value & 0x3F) << 10) |
1085 ((I->imm & 0xF) << 20) | (1ull << 24) | // XXX
1086 (1ull << 26) | // XXX
1087 (q2 << 30) | ((uint64_t)((I->imm >> 4) & 0xF) << 32) |
1088 ((uint64_t)q3 << 37) | ((uint64_t)(value.value >> 6) << 40) |
1089 ((uint64_t)q4 << 42) | (1ull << 47) | // XXX
1090 ((uint64_t)q5 << 48) | ((uint64_t)(I->imm >> 8) << 56);
1091
1092 memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
1093 break;
1094 }
1095
1096 case AGX_OPCODE_STACK_LOAD:
1097 case AGX_OPCODE_STACK_STORE: {
1098 enum agx_format format = I->format;
1099 unsigned mask = I->mask;
1100
1101 bool is_load = I->op == AGX_OPCODE_STACK_LOAD;
1102 bool L = true; /* TODO: when would you want short? */
1103
1104 pack_assert(I, mask != 0);
1105 pack_assert(I, format <= 0x10);
1106
1107 bool Rt, Ot;
1108 unsigned R =
1109 agx_pack_memory_reg(I, is_load ? I->dest[0] : I->src[0], &Rt);
1110 unsigned O =
1111 agx_pack_memory_index(I, is_load ? I->src[0] : I->src[1], &Ot);
1112
1113 unsigned i1 = 1; // XXX
1114 unsigned i2 = 0; // XXX
1115 unsigned i5 = 4; // XXX
1116
1117 uint64_t raw =
1118 agx_opcodes_info[I->op].encoding.exact |
1119 ((format & BITFIELD_MASK(2)) << 8) | ((R & BITFIELD_MASK(6)) << 10) |
1120 ((O & BITFIELD_MASK(4)) << 20) | (Ot ? (1 << 24) : 0) |
1121 ((uint64_t)i1 << 26) | ((uint64_t)I->scoreboard << 30) |
1122 (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
1123 ((uint64_t)i2 << 36) |
1124 (((uint64_t)((R >> 6) & BITFIELD_MASK(2))) << 40) |
1125 ((uint64_t)i5 << 44) | (L ? (1ull << 47) : 0) |
1126 (((uint64_t)(format >> 2)) << 50) | (((uint64_t)Rt) << 49) |
1127 (((uint64_t)mask) << 52) | (((uint64_t)(O >> 8)) << 56);
1128
1129 unsigned size = L ? 8 : 6;
1130 memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);
1131 break;
1132 }
1133 case AGX_OPCODE_STACK_ADJUST: {
1134 struct agx_opcode_info info = agx_opcodes_info[I->op];
1135
1136 unsigned i0 = 0; // XXX
1137 unsigned i1 = 1; // XXX
1138 unsigned i2 = 2; // XXX
1139 unsigned i3 = 0; // XXX
1140 unsigned i4 = 0; // XXX
1141
1142 uint64_t raw =
1143 info.encoding.exact | ((uint64_t)i0 << 8) | ((uint64_t)i1 << 26) |
1144 ((uint64_t)i2 << 36) | ((uint64_t)i3 << 44) | ((uint64_t)i4 << 50) |
1145 ((I->stack_size & 0xF) << 20) |
1146 ((uint64_t)((I->stack_size >> 4) & 0xF) << 32) | (1ull << 47) | // XXX
1147 ((uint64_t)(I->stack_size >> 8) << 56);
1148
1149 memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
1150 break;
1151 }
1152
1153 default:
1154 agx_pack_alu(emission, I);
1155 return;
1156 }
1157 }
1158
1159 /* Relative branches may be emitted before their targets, so we patch the
1160 * binary to fix up the branch offsets after the main emit */
1161
1162 static void
agx_fixup_branch(struct util_dynarray * emission,struct agx_branch_fixup fix)1163 agx_fixup_branch(struct util_dynarray *emission, struct agx_branch_fixup fix)
1164 {
1165 /* Branch offset is 2 bytes into the jump instruction */
1166 uint8_t *location = ((uint8_t *)emission->data) + fix.offset + 2;
1167
1168 off_t target = fix.skip_to_end ? fix.block->last_offset : fix.block->offset;
1169
1170 /* Offsets are relative to the jump instruction */
1171 int32_t patch = (int32_t)target - (int32_t)fix.offset;
1172
1173 /* Patch the binary */
1174 memcpy(location, &patch, sizeof(patch));
1175 }
1176
1177 void
agx_pack_binary(agx_context * ctx,struct util_dynarray * emission)1178 agx_pack_binary(agx_context *ctx, struct util_dynarray *emission)
1179 {
1180 struct util_dynarray fixups;
1181 util_dynarray_init(&fixups, ctx);
1182
1183 agx_foreach_block(ctx, block) {
1184 /* Relative to the start of the binary, the block begins at the current
1185 * number of bytes emitted */
1186 block->offset = emission->size;
1187
1188 agx_foreach_instr_in_block(block, ins) {
1189 block->last_offset = emission->size;
1190 agx_pack_instr(emission, &fixups, ins,
1191 ctx->key->dev.needs_g13x_coherency);
1192 }
1193 }
1194
1195 util_dynarray_foreach(&fixups, struct agx_branch_fixup, fixup)
1196 agx_fixup_branch(emission, *fixup);
1197
1198 util_dynarray_fini(&fixups);
1199
1200 /* Dougall calls the instruction in this footer "trap". Match the blob. */
1201 if (!ctx->key->no_stop || ctx->is_preamble) {
1202 for (unsigned i = 0; i < 8; ++i) {
1203 uint16_t trap = agx_opcodes_info[AGX_OPCODE_TRAP].encoding.exact;
1204 util_dynarray_append(emission, uint16_t, trap);
1205 }
1206 }
1207 }
1208