1 /*
2 * Copyright © 2022 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_builder.h"
6
7 #include <stdio.h>
8 #include <stdlib.h>
9
10 #define MME_TU104_MAX_REGS 23
11
12 void
mme_tu104_builder_init(struct mme_builder * b)13 mme_tu104_builder_init(struct mme_builder *b)
14 {
15 mme_reg_alloc_init(&b->reg_alloc, BITFIELD_MASK(MME_TU104_MAX_REGS));
16 }
17
18 static void
mme_tu104_new_inst(struct mme_tu104_builder * tb)19 mme_tu104_new_inst(struct mme_tu104_builder *tb)
20 {
21 struct mme_tu104_inst noop = { MME_TU104_INST_DEFAULTS };
22 assert(tb->inst_count < ARRAY_SIZE(tb->insts));
23 tb->insts[tb->inst_count] = noop;
24 tb->inst_count++;
25 tb->inst_parts = 0;
26 }
27
28 static struct mme_tu104_inst *
mme_tu104_cur_inst(struct mme_tu104_builder * tb)29 mme_tu104_cur_inst(struct mme_tu104_builder *tb)
30 {
31 assert(tb->inst_count > 0 && tb->inst_count < ARRAY_SIZE(tb->insts));
32 return &tb->insts[tb->inst_count - 1];
33 }
34
35 static inline void
mme_tu104_set_inst_parts(struct mme_tu104_builder * tb,enum mme_tu104_instr_parts parts)36 mme_tu104_set_inst_parts(struct mme_tu104_builder *tb,
37 enum mme_tu104_instr_parts parts)
38 {
39 assert(!(tb->inst_parts & parts));
40 tb->inst_parts |= parts;
41 }
42
43 void
mme_tu104_add_inst(struct mme_builder * b,const struct mme_tu104_inst * inst)44 mme_tu104_add_inst(struct mme_builder *b,
45 const struct mme_tu104_inst *inst)
46 {
47 struct mme_tu104_builder *tb = &b->tu104;
48
49 if (tb->inst_parts || tb->inst_count == 0)
50 mme_tu104_new_inst(&b->tu104);
51 *mme_tu104_cur_inst(tb) = *inst;
52 mme_tu104_new_inst(tb);
53 }
54
55 static unsigned
mme_tu104_reg_num_imms(enum mme_tu104_reg reg)56 mme_tu104_reg_num_imms(enum mme_tu104_reg reg)
57 {
58 switch (reg) {
59 case MME_TU104_REG_IMM:
60 case MME_TU104_REG_IMMPAIR:
61 return 1;
62 case MME_TU104_REG_IMM32:
63 return 2;
64 default:
65 return 0;
66 }
67 }
68
69 static bool
mme_tu104_next_inst_can_add_alu(struct mme_tu104_builder * tb,const struct mme_tu104_alu * alu,bool must_be_alu0)70 mme_tu104_next_inst_can_add_alu(struct mme_tu104_builder *tb,
71 const struct mme_tu104_alu *alu,
72 bool must_be_alu0)
73 {
74 if (tb->inst_count == 0)
75 return false;
76
77 /* Most ALU can be re-ordered with respect to outputs but a couple can't.
78 * In the case where it may depend on an output, flush if we have one.
79 */
80 if (mme_tu104_alu_op_may_depend_on_mthd(alu->op) &&
81 tb->inst_parts & (MME_TU104_INSTR_PART_MTHD0 |
82 MME_TU104_INSTR_PART_EMIT0 |
83 MME_TU104_INSTR_PART_MTHD1 |
84 MME_TU104_INSTR_PART_EMIT1))
85 return false;
86
87 if (must_be_alu0 && (tb->inst_parts & MME_TU104_INSTR_PART_ALU0))
88 return false;
89
90 if (tb->inst_parts & MME_TU104_INSTR_PART_ALU1) {
91 assert(tb->inst_parts & MME_TU104_INSTR_PART_ALU0);
92 return false;
93 }
94
95 assert(alu->src[0] != MME_TU104_REG_LOAD1 &&
96 alu->src[1] != MME_TU104_REG_LOAD0 &&
97 alu->src[1] != MME_TU104_REG_LOAD1);
98 if (alu->src[0] == MME_TU104_REG_LOAD0 &&
99 (tb->inst_parts & MME_TU104_INSTR_PART_LOAD1))
100 return false;
101
102 const unsigned used_imms =
103 util_bitcount(tb->inst_parts & (MME_TU104_INSTR_PART_IMM0 |
104 MME_TU104_INSTR_PART_IMM1));
105
106 const unsigned num_imms = mme_tu104_alu_op_has_implicit_imm(alu->op) +
107 mme_tu104_reg_num_imms(alu->src[0]) +
108 mme_tu104_reg_num_imms(alu->src[1]);
109 assert(num_imms <= 2);
110 if (num_imms + used_imms > 2)
111 return false;
112
113 if (mme_tu104_alu_op_has_implicit_imm(alu->op) &&
114 (tb->inst_parts & MME_TU104_INSTR_PART_ALU0) &&
115 (tb->inst_parts & MME_TU104_INSTR_PART_IMM1))
116 return false;
117
118 struct mme_tu104_inst *cur = mme_tu104_cur_inst(tb);
119
120 if ((tb->inst_parts & MME_TU104_INSTR_PART_ALU0) &&
121 mme_tu104_alus_have_dependency(&cur->alu[0], alu))
122 return false;
123
124 /* No idea why the HW has this rule but it does */
125 if (alu->op == MME_TU104_ALU_OP_STATE &&
126 (tb->inst_parts & MME_TU104_INSTR_PART_ALU0) &&
127 cur->alu[0].op == MME_TU104_ALU_OP_STATE)
128 return false;
129
130 return true;
131 }
132
133 static unsigned
mme_tu104_push_alu(struct mme_tu104_builder * tb,const struct mme_tu104_alu * alu,uint16_t imm0,uint16_t imm1,uint16_t implicit_imm,bool must_be_alu0)134 mme_tu104_push_alu(struct mme_tu104_builder *tb,
135 const struct mme_tu104_alu *alu,
136 uint16_t imm0, uint16_t imm1,
137 uint16_t implicit_imm,
138 bool must_be_alu0)
139 {
140 if (!mme_tu104_next_inst_can_add_alu(tb, alu, must_be_alu0))
141 mme_tu104_new_inst(tb);
142
143 if (mme_tu104_alu_op_has_implicit_imm(alu->op) &&
144 (tb->inst_parts & MME_TU104_INSTR_PART_IMM0))
145 tb->inst_parts |= MME_TU104_INSTR_PART_ALU0;
146
147 assert(mme_tu104_next_inst_can_add_alu(tb, alu, must_be_alu0));
148
149 struct mme_tu104_inst *inst = mme_tu104_cur_inst(tb);
150 unsigned alu_idx = (tb->inst_parts & MME_TU104_INSTR_PART_ALU0) != 0;
151 assert(alu_idx == 0 || !must_be_alu0);
152
153 switch (alu->op) {
154 case MME_TU104_ALU_OP_ADDC:
155 assert(inst->alu[0].op == MME_TU104_ALU_OP_ADD);
156 assert(alu_idx == 1);
157 break;
158 case MME_TU104_ALU_OP_SUBB:
159 assert(inst->alu[0].op == MME_TU104_ALU_OP_SUB);
160 assert(alu_idx == 1);
161 break;
162 case MME_TU104_ALU_OP_MULH:
163 assert(inst->alu[0].op == MME_TU104_ALU_OP_MUL ||
164 inst->alu[0].op == MME_TU104_ALU_OP_MULU);
165 assert(alu_idx == 1);
166 break;
167 default:
168 break;
169 }
170
171 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_ALU0 << alu_idx);
172 inst->alu[alu_idx] = *alu;
173
174 if (alu->src[0] == MME_TU104_REG_LOAD0) {
175 unsigned next_load = (tb->inst_parts & MME_TU104_INSTR_PART_LOAD0) != 0;
176 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_LOAD0 << next_load);
177 inst->alu[alu_idx].src[0] = MME_TU104_REG_LOAD0 + next_load;
178 }
179
180 unsigned next_imm = (tb->inst_parts & MME_TU104_INSTR_PART_IMM0) != 0;
181 const unsigned num_imms = mme_tu104_reg_num_imms(alu->src[0]) +
182 mme_tu104_reg_num_imms(alu->src[1]);
183
184 if (mme_tu104_alu_op_has_implicit_imm(alu->op)) {
185 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 << alu_idx);
186 inst->imm[alu_idx] = implicit_imm;
187 assert(num_imms <= 1);
188 next_imm = 1 - alu_idx;
189 }
190
191 if (num_imms == 1) {
192 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 << next_imm);
193 inst->imm[next_imm] = imm0;
194 assert(alu->src[0] != MME_TU104_REG_IMM32 &&
195 alu->src[0] != MME_TU104_REG_IMMPAIR &&
196 alu->src[1] != MME_TU104_REG_IMM32 &&
197 alu->src[1] != MME_TU104_REG_IMMPAIR);
198 if (alu->src[0] == MME_TU104_REG_IMM && alu_idx != next_imm)
199 inst->alu[alu_idx].src[0] = MME_TU104_REG_IMMPAIR;
200 if (alu->src[1] == MME_TU104_REG_IMM && alu_idx != next_imm)
201 inst->alu[alu_idx].src[1] = MME_TU104_REG_IMMPAIR;
202 } else if (num_imms == 2) {
203 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 |
204 MME_TU104_INSTR_PART_IMM1);
205 inst->imm[0] = imm0;
206 inst->imm[1] = imm1;
207 }
208
209 return alu_idx;
210 }
211
212 static inline enum mme_tu104_reg
mme_value_alu_reg(struct mme_value val)213 mme_value_alu_reg(struct mme_value val)
214 {
215 switch (val.type) {
216 case MME_VALUE_TYPE_ZERO:
217 return MME_TU104_REG_ZERO;
218 case MME_VALUE_TYPE_IMM:
219 if (val.imm == 0)
220 return MME_TU104_REG_ZERO;
221 else if (val.imm == (uint32_t)(int16_t)val.imm)
222 return MME_TU104_REG_IMM;
223 else
224 return MME_TU104_REG_IMM32;
225 case MME_VALUE_TYPE_REG:
226 assert(val.reg <= 23);
227 return MME_TU104_REG_R0 + val.reg;
228 }
229 unreachable("Invalid value type");
230 }
231
232 static void
build_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_tu104_alu_op op,struct mme_value x,struct mme_value y,uint16_t implicit_imm,bool must_be_alu0)233 build_alu_to(struct mme_builder *b,
234 struct mme_value dst,
235 enum mme_tu104_alu_op op,
236 struct mme_value x,
237 struct mme_value y,
238 uint16_t implicit_imm,
239 bool must_be_alu0)
240 {
241 assert(dst.type == MME_VALUE_TYPE_ZERO ||
242 dst.type == MME_VALUE_TYPE_REG);
243
244 enum mme_tu104_reg x_reg = mme_value_alu_reg(x);
245 enum mme_tu104_reg y_reg = mme_value_alu_reg(y);
246
247 unsigned num_imms = mme_tu104_alu_op_has_implicit_imm(op) +
248 mme_tu104_reg_num_imms(x_reg) +
249 mme_tu104_reg_num_imms(y_reg);
250 while (num_imms > 2) {
251 if (y_reg == MME_TU104_REG_IMM32) {
252 y = mme_mov(b, y);
253 y_reg = mme_value_alu_reg(y);
254 num_imms -= 2;
255 } else if (x_reg == MME_TU104_REG_IMM32) {
256 x = mme_mov(b, x);
257 x_reg = mme_value_alu_reg(x);
258 num_imms -= 2;
259 } else if (mme_tu104_reg_num_imms(y_reg) > 0) {
260 assert(mme_tu104_reg_num_imms(y_reg) == 1);
261 y = mme_mov(b, y);
262 y_reg = mme_value_alu_reg(y);
263 num_imms--;
264 } else if (mme_tu104_reg_num_imms(x_reg) > 0) {
265 assert(mme_tu104_reg_num_imms(x_reg) == 1);
266 x = mme_mov(b, x);
267 x_reg = mme_value_alu_reg(x);
268 num_imms--;
269 }
270 }
271
272 uint16_t imm0 = 0, imm1 = 0;
273 if (x_reg == MME_TU104_REG_IMM32) {
274 assert(mme_tu104_reg_num_imms(y_reg) == 0);
275 imm0 = x.imm >> 16;
276 imm1 = x.imm;
277 } else if (y_reg == MME_TU104_REG_IMM32) {
278 assert(mme_tu104_reg_num_imms(x_reg) == 0);
279 imm0 = y.imm >> 16;
280 imm1 = y.imm;
281 } else if (x_reg == MME_TU104_REG_IMM) {
282 assert(mme_tu104_reg_num_imms(y_reg) <= 1);
283 imm0 = x.imm;
284 if (y_reg == MME_TU104_REG_IMM) {
285 imm1 = y.imm;
286 y_reg = MME_TU104_REG_IMMPAIR;
287 }
288 } else if (y_reg == MME_TU104_REG_IMM) {
289 imm0 = y.imm;
290 } else {
291 assert(mme_tu104_reg_num_imms(x_reg) == 0);
292 assert(mme_tu104_reg_num_imms(y_reg) == 0);
293 }
294
295 struct mme_tu104_alu alu = {
296 .dst = mme_value_alu_reg(dst),
297 .op = op,
298 .src = { x_reg, y_reg },
299 };
300 mme_tu104_push_alu(&b->tu104, &alu, imm0, imm1, implicit_imm, must_be_alu0);
301 }
302
303 static enum mme_tu104_alu_op
mme_to_tu104_alu_op(enum mme_alu_op op)304 mme_to_tu104_alu_op(enum mme_alu_op op)
305 {
306 switch (op) {
307 #define ALU_CASE(op) case MME_ALU_OP_##op: return MME_TU104_ALU_OP_##op;
308 ALU_CASE(ADD)
309 ALU_CASE(ADDC)
310 ALU_CASE(SUB)
311 ALU_CASE(SUBB)
312 ALU_CASE(MUL)
313 ALU_CASE(MULH)
314 ALU_CASE(MULU)
315 ALU_CASE(CLZ)
316 ALU_CASE(SLL)
317 ALU_CASE(SRL)
318 ALU_CASE(SRA)
319 ALU_CASE(AND)
320 ALU_CASE(NAND)
321 ALU_CASE(OR)
322 ALU_CASE(XOR)
323 ALU_CASE(SLT)
324 ALU_CASE(SLTU)
325 ALU_CASE(SLE)
326 ALU_CASE(SLEU)
327 ALU_CASE(SEQ)
328 ALU_CASE(DREAD)
329 ALU_CASE(DWRITE)
330 #undef ALU_CASE
331 default:
332 unreachable("Unsupported MME ALU op");
333 }
334 }
335
336 void
mme_tu104_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_alu_op op,struct mme_value x,struct mme_value y)337 mme_tu104_alu_to(struct mme_builder *b,
338 struct mme_value dst,
339 enum mme_alu_op op,
340 struct mme_value x,
341 struct mme_value y)
342 {
343 switch (op) {
344 case MME_ALU_OP_NOT:
345 mme_xor_to(b, dst, x, mme_imm(~(uint32_t)0));
346 break;
347
348 case MME_ALU_OP_AND_NOT: {
349 struct mme_value not_y;
350 switch (y.type) {
351 case MME_VALUE_TYPE_ZERO:
352 not_y = mme_imm(~(uint32_t)0);
353 break;
354
355 case MME_VALUE_TYPE_IMM:
356 if (y.imm == ~(uint32_t)0)
357 not_y = mme_zero();
358 else
359 not_y = mme_imm(~y.imm);
360 break;
361
362 case MME_VALUE_TYPE_REG:
363 not_y = mme_not(b, y);
364 break;
365
366 default:
367 unreachable("Unknown MME value type");
368 }
369
370 mme_and_to(b, dst, x, not_y);
371
372 if (not_y.type == MME_VALUE_TYPE_REG)
373 mme_free_reg(b, not_y);
374 break;
375 }
376
377 default:
378 build_alu_to(b, dst, mme_to_tu104_alu_op(op), x, y, 0, false);
379 }
380 }
381
382 void
mme_tu104_alu64_to(struct mme_builder * b,struct mme_value64 dst,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)383 mme_tu104_alu64_to(struct mme_builder *b,
384 struct mme_value64 dst,
385 enum mme_alu_op op_lo,
386 enum mme_alu_op op_hi,
387 struct mme_value64 x,
388 struct mme_value64 y)
389 {
390 assert(dst.lo.type == MME_VALUE_TYPE_REG);
391 assert(dst.hi.type == MME_VALUE_TYPE_REG);
392
393 /* We can't have any non-zero immediates in the high part or else we might
394 * get half-way through emitting and realize we've run out.
395 */
396 if (x.hi.type == MME_VALUE_TYPE_IMM && x.hi.imm != 0)
397 x.hi = mme_mov(b, x.hi);
398 if (y.hi.type == MME_VALUE_TYPE_IMM && y.hi.imm != 0)
399 y.hi = mme_mov(b, y.hi);
400
401 build_alu_to(b, dst.lo, mme_to_tu104_alu_op(op_lo), x.lo, y.lo, 0, true);
402 build_alu_to(b, dst.hi, mme_to_tu104_alu_op(op_hi), x.hi, y.hi, 0, false);
403 }
404
405 void
mme_tu104_merge_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)406 mme_tu104_merge_to(struct mme_builder *b, struct mme_value dst,
407 struct mme_value x, struct mme_value y,
408 uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
409 {
410 assert(dst_pos < 32);
411 assert(bits < 32);
412 assert(src_pos < 32);
413 uint32_t ctrl = (dst_pos << 10) | (bits << 5) | src_pos;
414 build_alu_to(b, dst, MME_TU104_ALU_OP_MERGE, x, y, ctrl, false);
415 }
416
417 void
mme_tu104_state_arr_to(struct mme_builder * b,struct mme_value dst,uint16_t state,struct mme_value index)418 mme_tu104_state_arr_to(struct mme_builder *b, struct mme_value dst,
419 uint16_t state, struct mme_value index)
420 {
421 assert(state % 4 == 0);
422 build_alu_to(b, dst, MME_TU104_ALU_OP_STATE,
423 mme_imm(state >> 2), index, 0, false);
424 }
425
426 void
mme_tu104_load_barrier(struct mme_builder * b)427 mme_tu104_load_barrier(struct mme_builder *b)
428 {
429 build_alu_to(b, mme_zero(), MME_TU104_ALU_OP_EXTENDED,
430 mme_imm(0x1000), mme_imm(1), 0, false);
431 }
432
433 void
mme_tu104_load_to(struct mme_builder * b,struct mme_value dst)434 mme_tu104_load_to(struct mme_builder *b, struct mme_value dst)
435 {
436 assert(dst.type == MME_VALUE_TYPE_REG ||
437 dst.type == MME_VALUE_TYPE_ZERO);
438
439 struct mme_tu104_alu alu = {
440 .dst = mme_value_alu_reg(dst),
441 .op = MME_TU104_ALU_OP_ADD,
442 .src = {
443 MME_TU104_REG_LOAD0,
444 MME_TU104_REG_ZERO,
445 },
446 };
447 mme_tu104_push_alu(&b->tu104, &alu, 0, 0, 0, 0);
448 }
449
450 static bool
mme_tu104_next_inst_can_add_mthd(struct mme_tu104_builder * tb,enum mme_tu104_out_op out)451 mme_tu104_next_inst_can_add_mthd(struct mme_tu104_builder *tb,
452 enum mme_tu104_out_op out)
453 {
454 if (tb->inst_count == 0)
455 return false;
456
457 if (tb->inst_parts & MME_TU104_INSTR_PART_MTHD1) {
458 assert(tb->inst_parts & MME_TU104_INSTR_PART_MTHD0);
459 return false;
460 }
461
462 /* We can't have a mthd in an op with STATE */
463 struct mme_tu104_inst *cur = mme_tu104_cur_inst(tb);
464 if (((tb->inst_parts & MME_TU104_INSTR_PART_ALU0) &&
465 cur->alu[0].op == MME_TU104_ALU_OP_STATE) ||
466 ((tb->inst_parts & MME_TU104_INSTR_PART_ALU1) &&
467 cur->alu[1].op == MME_TU104_ALU_OP_STATE))
468 return false;
469
470 if (out == MME_TU104_OUT_OP_IMM0 &&
471 (tb->inst_parts & MME_TU104_INSTR_PART_IMM0) &&
472 (tb->inst_parts & MME_TU104_INSTR_PART_IMM1))
473 return false;
474
475 return true;
476 }
477
478 static void
mme_tu104_push_mthd(struct mme_tu104_builder * tb,enum mme_tu104_out_op out,uint16_t imm)479 mme_tu104_push_mthd(struct mme_tu104_builder *tb,
480 enum mme_tu104_out_op out,
481 uint16_t imm)
482 {
483 struct mme_tu104_inst *inst = mme_tu104_cur_inst(tb);
484 if (out == MME_TU104_OUT_OP_IMM0) {
485 unsigned imm_idx = (tb->inst_parts & MME_TU104_INSTR_PART_IMM0) != 0;
486 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 << imm_idx);
487 out = MME_TU104_OUT_OP_IMM0 + imm_idx;
488 inst->imm[imm_idx] = imm;
489 }
490 unsigned mthd_idx = (tb->inst_parts & MME_TU104_INSTR_PART_MTHD0) != 0;
491 /* If we're pushing mthd1, the next emit MUST be emit1 */
492 if (mthd_idx > 0 && !(tb->inst_parts & MME_TU104_INSTR_PART_EMIT0))
493 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_EMIT0);
494 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_MTHD0 << mthd_idx);
495 inst->out[mthd_idx].mthd = out;
496 }
497
498 void
mme_tu104_mthd(struct mme_builder * b,uint16_t mthd,struct mme_value index)499 mme_tu104_mthd(struct mme_builder *b, uint16_t mthd, struct mme_value index)
500 {
501 struct mme_tu104_builder *tb = &b->tu104;
502
503 assert(mthd % 4 == 0);
504 uint32_t mthd_imm = (1 << 12) | (mthd >> 2);
505
506 if (index.type == MME_VALUE_TYPE_REG) {
507 if (!mme_tu104_next_inst_can_add_mthd(tb, MME_TU104_OUT_OP_ALU0))
508 mme_tu104_new_inst(tb);
509
510 const struct mme_tu104_alu alu = {
511 .dst = MME_TU104_REG_ZERO,
512 .op = MME_TU104_ALU_OP_ADD,
513 .src = {
514 MME_TU104_REG_IMM,
515 mme_value_alu_reg(index),
516 },
517 };
518 unsigned alu_idx = mme_tu104_push_alu(tb, &alu, mthd_imm, 0, 0, false);
519 mme_tu104_push_mthd(tb, MME_TU104_OUT_OP_ALU0 + alu_idx, 0);
520 } else {
521 if (!mme_tu104_next_inst_can_add_mthd(tb, MME_TU104_OUT_OP_IMM0))
522 mme_tu104_new_inst(tb);
523
524 if (index.type == MME_VALUE_TYPE_IMM)
525 mthd_imm += index.imm;
526
527 mme_tu104_push_mthd(tb, MME_TU104_OUT_OP_IMM0, mthd_imm);
528 }
529 }
530
531 static bool
mme_tu104_next_inst_can_add_emit(struct mme_tu104_builder * tb,enum mme_tu104_out_op out,uint32_t imm)532 mme_tu104_next_inst_can_add_emit(struct mme_tu104_builder *tb,
533 enum mme_tu104_out_op out,
534 uint32_t imm)
535 {
536 assert(tb->inst_count > 0);
537
538 if (tb->inst_parts & MME_TU104_INSTR_PART_EMIT1) {
539 assert(tb->inst_parts & MME_TU104_INSTR_PART_EMIT0);
540 return false;
541 }
542
543 /* We can't have an emit in an op with STATE */
544 struct mme_tu104_inst *cur = mme_tu104_cur_inst(tb);
545 if (((tb->inst_parts & MME_TU104_INSTR_PART_ALU0) &&
546 cur->alu[0].op == MME_TU104_ALU_OP_STATE) ||
547 ((tb->inst_parts & MME_TU104_INSTR_PART_ALU1) &&
548 cur->alu[1].op == MME_TU104_ALU_OP_STATE))
549 return false;
550
551 const unsigned used_imms =
552 util_bitcount(tb->inst_parts & (MME_TU104_INSTR_PART_IMM0 |
553 MME_TU104_INSTR_PART_IMM1));
554 if (out == MME_TU104_OUT_OP_IMM0 && used_imms > 1)
555 return false;
556 if (out == MME_TU104_OUT_OP_IMM32 && used_imms > 0)
557 return false;
558
559 return true;
560 }
561
562 static void
mme_tu104_push_emit(struct mme_tu104_builder * tb,enum mme_tu104_out_op out,uint32_t imm)563 mme_tu104_push_emit(struct mme_tu104_builder *tb,
564 enum mme_tu104_out_op out,
565 uint32_t imm)
566 {
567 struct mme_tu104_inst *inst = mme_tu104_cur_inst(tb);
568 if (out == MME_TU104_OUT_OP_IMM0) {
569 unsigned imm_idx = (tb->inst_parts & MME_TU104_INSTR_PART_IMM0) != 0;
570 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 << imm_idx);
571 out = MME_TU104_OUT_OP_IMM0 + imm_idx;
572 inst->imm[imm_idx] = imm;
573 } else if (out == MME_TU104_OUT_OP_IMM32) {
574 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_IMM0 |
575 MME_TU104_INSTR_PART_IMM1);
576 inst->imm[0] = imm >> 16;
577 inst->imm[1] = imm;
578 }
579 unsigned emit_idx = (tb->inst_parts & MME_TU104_INSTR_PART_EMIT0) != 0;
580 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_EMIT0 << emit_idx);
581 /* If we're pushing emitN, the next mthd MUST be mthdN+1 */
582 if (!(tb->inst_parts & (MME_TU104_INSTR_PART_MTHD0 << emit_idx)))
583 mme_tu104_set_inst_parts(tb, MME_TU104_INSTR_PART_MTHD0 << emit_idx);
584 inst->out[emit_idx].emit = out;
585 }
586
587 static int
find_alu_idx_for_dst(const struct mme_tu104_inst * inst,struct mme_value dst)588 find_alu_idx_for_dst(const struct mme_tu104_inst *inst,
589 struct mme_value dst)
590 {
591 assert(dst.type == MME_VALUE_TYPE_REG);
592 for (int i = 0; i < 2; i++) {
593 if (inst->alu[i].dst == mme_value_alu_reg(dst))
594 return i;
595 }
596 return -1;
597 }
598
599 void
mme_tu104_emit(struct mme_builder * b,struct mme_value data)600 mme_tu104_emit(struct mme_builder *b, struct mme_value data)
601 {
602 struct mme_tu104_builder *tb = &b->tu104;
603
604 if (data.type == MME_VALUE_TYPE_REG) {
605 if (!mme_tu104_next_inst_can_add_emit(tb, MME_TU104_OUT_OP_ALU0, 0))
606 mme_tu104_new_inst(tb);
607
608 struct mme_tu104_inst *inst = mme_tu104_cur_inst(tb);
609 int alu_idx = find_alu_idx_for_dst(inst, data);
610 if (alu_idx < 0) {
611 const struct mme_tu104_alu alu = {
612 .dst = MME_TU104_REG_ZERO,
613 .op = MME_TU104_ALU_OP_ADD,
614 .src = {
615 mme_value_alu_reg(data),
616 MME_TU104_REG_ZERO,
617 },
618 };
619 alu_idx = mme_tu104_push_alu(tb, &alu, 0, 0, 0, false);
620 }
621 mme_tu104_push_emit(tb, MME_TU104_OUT_OP_ALU0 + alu_idx, 0);
622 } else {
623 enum mme_tu104_out_op out;
624 uint32_t imm;
625 if (data.type == MME_VALUE_TYPE_ZERO) {
626 out = MME_TU104_OUT_OP_IMM0;
627 imm = 0;
628 } else {
629 assert(data.type == MME_VALUE_TYPE_IMM);
630 imm = data.imm;
631 out = data.imm == (uint16_t)data.imm ? MME_TU104_OUT_OP_IMM0 :
632 MME_TU104_OUT_OP_IMM32;
633 }
634 if (!mme_tu104_next_inst_can_add_emit(tb, out, 0))
635 mme_tu104_new_inst(tb);
636
637 mme_tu104_push_emit(tb, out, imm);
638 }
639 }
640
641 static enum mme_tu104_alu_op
mme_cmp_to_tu104_branch_op(enum mme_cmp_op op)642 mme_cmp_to_tu104_branch_op(enum mme_cmp_op op)
643 {
644 switch (op) {
645 #define CMP_CASE(op) case MME_CMP_OP_##op: return MME_TU104_ALU_OP_B##op;
646 CMP_CASE(LT)
647 CMP_CASE(LTU)
648 CMP_CASE(LE)
649 CMP_CASE(LEU)
650 CMP_CASE(EQ)
651 #undef CMP_CASE
652 default:
653 unreachable("Unsupported MME CMP op");
654 }
655 }
656
657 static void
mme_tu104_start_cf(struct mme_builder * b,enum mme_cf_type type,enum mme_tu104_alu_op op,struct mme_value x,struct mme_value y,uint16_t control)658 mme_tu104_start_cf(struct mme_builder *b,
659 enum mme_cf_type type,
660 enum mme_tu104_alu_op op,
661 struct mme_value x,
662 struct mme_value y,
663 uint16_t control)
664 {
665 struct mme_tu104_builder *tb = &b->tu104;
666
667 /* The HW seems to want at least LOOP to always be in alu0 */
668 build_alu_to(b, mme_zero(), op, x, y, control, true);
669
670 uint16_t ip = tb->inst_count - 1;
671 assert(tb->insts[ip].alu[0].op == op);
672
673 tb->cf_stack[tb->cf_depth++] = (struct mme_cf) {
674 .type = type,
675 .start_ip = ip,
676 };
677
678 /* The inside of control-flow needs to start with a new instruction */
679 mme_tu104_new_inst(tb);
680 }
681
682 static struct mme_cf
mme_tu104_end_cf(struct mme_builder * b,enum mme_cf_type type)683 mme_tu104_end_cf(struct mme_builder *b, enum mme_cf_type type)
684 {
685 struct mme_tu104_builder *tb = &b->tu104;
686
687 if (tb->inst_parts)
688 mme_tu104_new_inst(tb);
689
690 assert(tb->cf_depth > 0);
691 struct mme_cf cf = tb->cf_stack[--tb->cf_depth];
692 assert(cf.type == type);
693
694 int delta = tb->inst_count - cf.start_ip - 1;
695 assert(delta > 0 && delta < (1 << 13));
696 tb->insts[cf.start_ip].imm[0] |= delta;
697
698 return cf;
699 }
700
701 void
mme_tu104_start_loop(struct mme_builder * b,struct mme_value count)702 mme_tu104_start_loop(struct mme_builder *b, struct mme_value count)
703 {
704 mme_tu104_start_cf(b, MME_CF_TYPE_LOOP, MME_TU104_ALU_OP_LOOP,
705 count, mme_zero(), 0);
706 }
707
708 void
mme_tu104_end_loop(struct mme_builder * b)709 mme_tu104_end_loop(struct mme_builder *b)
710 {
711 mme_tu104_end_cf(b, MME_CF_TYPE_LOOP);
712 }
713
714 void
mme_tu104_start_if(struct mme_builder * b,enum mme_cmp_op op,bool if_true,struct mme_value x,struct mme_value y)715 mme_tu104_start_if(struct mme_builder *b,
716 enum mme_cmp_op op, bool if_true,
717 struct mme_value x, struct mme_value y)
718 {
719 uint16_t control = if_true ? 0 : BITFIELD_BIT(15);
720 mme_tu104_start_cf(b, MME_CF_TYPE_IF, mme_cmp_to_tu104_branch_op(op),
721 x, y, control);
722 }
723
724 void
mme_tu104_end_if(struct mme_builder * b)725 mme_tu104_end_if(struct mme_builder *b)
726 {
727 mme_tu104_end_cf(b, MME_CF_TYPE_IF);
728 }
729
730 void
mme_tu104_start_while(struct mme_builder * b)731 mme_tu104_start_while(struct mme_builder *b)
732 {
733 mme_tu104_start_cf(b, MME_CF_TYPE_WHILE, MME_TU104_ALU_OP_JAL,
734 mme_zero(), mme_zero(), BITFIELD_BIT(15));
735 }
736
737 void
mme_tu104_end_while(struct mme_builder * b,enum mme_cmp_op cmp,bool if_true,struct mme_value x,struct mme_value y)738 mme_tu104_end_while(struct mme_builder *b,
739 enum mme_cmp_op cmp,
740 bool if_true,
741 struct mme_value x,
742 struct mme_value y)
743 {
744 struct mme_tu104_builder *tb = &b->tu104;
745
746 struct mme_cf cf = mme_tu104_end_cf(b, MME_CF_TYPE_WHILE);
747
748 int delta = tb->inst_count - cf.start_ip - 2;
749 uint16_t control = (-delta & BITFIELD_MASK(13)) |
750 (if_true ? BITFIELD_BIT(15) : 0);
751 build_alu_to(b, mme_zero(), mme_cmp_to_tu104_branch_op(cmp),
752 x, y, control, true);
753
754 /* Start a new instruction so next thing to come along doesn't end up being
755 * the 2nd half of of our back-edge while.
756 */
757 mme_tu104_new_inst(tb);
758 }
759
mme_tu104_exit_if(struct mme_builder * b,enum mme_cmp_op op,bool if_true,struct mme_value x,struct mme_value y)760 void mme_tu104_exit_if(struct mme_builder *b,
761 enum mme_cmp_op op,
762 bool if_true,
763 struct mme_value x,
764 struct mme_value y)
765 {
766 struct mme_tu104_builder *tb = &b->tu104;
767
768 /* we reverse it as we want to take the branch if the condition is true */
769 uint16_t control = if_true ? BITFIELD_BIT(15) : 0;
770 /* magic offset to exit the macro */
771 control |= 0x1000;
772 build_alu_to(b, mme_zero(), mme_cmp_to_tu104_branch_op(op), x, y, control,
773 true);
774
775 mme_tu104_new_inst(tb);
776 }
777
778 uint32_t *
mme_tu104_builder_finish(struct mme_tu104_builder * tb,size_t * size_out)779 mme_tu104_builder_finish(struct mme_tu104_builder *tb, size_t *size_out)
780 {
781 assert(tb->cf_depth == 0);
782
783 /* TODO: If there are at least two instructions and we can guarantee the
784 * last two instructions get exeucted (not in control-flow), we don't need
785 * to add a pair of NOPs.
786 */
787 mme_tu104_new_inst(tb);
788 mme_tu104_new_inst(tb);
789 tb->insts[tb->inst_count - 2].end_next = true;
790
791 if (0)
792 mme_tu104_print(stderr, tb->insts, tb->inst_count);
793
794 size_t enc_size = tb->inst_count * 3 * sizeof(uint32_t);
795 uint32_t *enc = malloc(enc_size);
796 if (enc != NULL) {
797 mme_tu104_encode(enc, tb->inst_count, tb->insts);
798 *size_out = enc_size;
799 }
800 return enc;
801 }
802
803 void
mme_tu104_builder_dump(struct mme_builder * b,FILE * fp)804 mme_tu104_builder_dump(struct mme_builder *b, FILE *fp)
805 {
806 struct mme_tu104_builder *tb = &b->tu104;
807
808 mme_tu104_print(stderr, tb->insts, tb->inst_count);
809 }
810