1 /*
2 * Copyright © 2022 Mary Guillemard
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_builder.h"
6
7 #include <stdio.h>
8 #include <stdlib.h>
9
10 #include "util/u_math.h"
11
12 void
mme_fermi_builder_init(struct mme_builder * b)13 mme_fermi_builder_init(struct mme_builder *b)
14 {
15 /* R0 is reserved for the zero register */
16 mme_reg_alloc_init(&b->reg_alloc, 0xfe);
17
18 /* Pre-allocate R1 for the first parameter value */
19 ASSERTED struct mme_value r1 = mme_reg_alloc_alloc(&b->reg_alloc);
20 assert(r1.reg == 1);
21 }
22
23 static inline bool
mme_fermi_is_zero_or_reg(struct mme_value x)24 mme_fermi_is_zero_or_reg(struct mme_value x)
25 {
26 switch (x.type) {
27 case MME_VALUE_TYPE_ZERO: return true;
28 case MME_VALUE_TYPE_IMM: return x.imm == 0;
29 case MME_VALUE_TYPE_REG: return true;
30 default: unreachable("Invalid MME value type");
31 }
32 }
33
34 static inline bool
mme_fermi_is_zero_or_imm(struct mme_value x)35 mme_fermi_is_zero_or_imm(struct mme_value x)
36 {
37 switch (x.type) {
38 case MME_VALUE_TYPE_ZERO: return true;
39 case MME_VALUE_TYPE_IMM: return true;
40 case MME_VALUE_TYPE_REG: return false;
41 default: unreachable("Invalid MME value type");
42 }
43 }
44
45 static inline enum mme_fermi_reg
mme_value_alu_reg(struct mme_value val)46 mme_value_alu_reg(struct mme_value val)
47 {
48 assert(mme_fermi_is_zero_or_reg(val));
49
50 switch (val.type) {
51 case MME_VALUE_TYPE_ZERO:
52 return MME_FERMI_REG_ZERO;
53 case MME_VALUE_TYPE_REG:
54 assert(val.reg > 0 && val.reg <= 7);
55 return MME_FERMI_REG_ZERO + val.reg;
56 case MME_VALUE_TYPE_IMM:
57 return MME_FERMI_REG_ZERO;
58 }
59 unreachable("Invalid value type");
60 }
61
62 static inline uint32_t
mme_value_alu_imm(struct mme_value val)63 mme_value_alu_imm(struct mme_value val)
64 {
65 assert(mme_fermi_is_zero_or_imm(val));
66
67 switch (val.type) {
68 case MME_VALUE_TYPE_ZERO:
69 return 0;
70 case MME_VALUE_TYPE_IMM:
71 return val.imm;
72 case MME_VALUE_TYPE_REG:
73 return 0;
74 }
75 unreachable("Invalid value type");
76 }
77
78 static inline void
mme_free_reg_if_tmp(struct mme_builder * b,struct mme_value data,struct mme_value maybe_tmp)79 mme_free_reg_if_tmp(struct mme_builder *b,
80 struct mme_value data,
81 struct mme_value maybe_tmp)
82 {
83 if (!mme_is_zero(data) &&
84 !mme_is_zero(maybe_tmp) &&
85 data.type != maybe_tmp.type)
86 mme_free_reg(b, maybe_tmp);
87 }
88
89 static void
mme_fermi_new_inst(struct mme_fermi_builder * b)90 mme_fermi_new_inst(struct mme_fermi_builder *b)
91 {
92 struct mme_fermi_inst noop = { MME_FERMI_INST_DEFAULTS };
93 assert(b->inst_count < ARRAY_SIZE(b->insts));
94 b->insts[b->inst_count] = noop;
95 b->inst_count++;
96 b->inst_parts = 0;
97 }
98
99 static struct mme_fermi_inst *
mme_fermi_cur_inst(struct mme_fermi_builder * b)100 mme_fermi_cur_inst(struct mme_fermi_builder *b)
101 {
102 assert(b->inst_count > 0 && b->inst_count < ARRAY_SIZE(b->insts));
103 return &b->insts[b->inst_count - 1];
104 }
105
106 void
mme_fermi_add_inst(struct mme_builder * b,const struct mme_fermi_inst * inst)107 mme_fermi_add_inst(struct mme_builder *b,
108 const struct mme_fermi_inst *inst)
109 {
110 struct mme_fermi_builder *fb = &b->fermi;
111
112 if (fb->inst_parts || fb->inst_count == 0)
113 mme_fermi_new_inst(fb);
114
115 *mme_fermi_cur_inst(fb) = *inst;
116 mme_fermi_new_inst(fb);
117 }
118
119 static inline void
mme_fermi_set_inst_parts(struct mme_fermi_builder * b,enum mme_fermi_instr_parts parts)120 mme_fermi_set_inst_parts(struct mme_fermi_builder *b,
121 enum mme_fermi_instr_parts parts)
122 {
123 assert(!(b->inst_parts & parts));
124 b->inst_parts |= parts;
125 }
126
127 static inline bool
mme_fermi_next_inst_can_fit_a_full_inst(struct mme_fermi_builder * b)128 mme_fermi_next_inst_can_fit_a_full_inst(struct mme_fermi_builder *b)
129 {
130 return !mme_fermi_is_empty(b) && b->inst_parts == 0;
131 }
132
133 void
mme_fermi_mthd_arr(struct mme_builder * b,uint16_t mthd,struct mme_value index)134 mme_fermi_mthd_arr(struct mme_builder *b,
135 uint16_t mthd, struct mme_value index)
136 {
137 struct mme_fermi_builder *fb = &b->fermi;
138 struct mme_value src_reg = mme_zero();
139
140 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
141 mme_fermi_new_inst(fb);
142
143 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
144
145 uint32_t mthd_imm = (1 << 12) | (mthd >> 2);
146
147 if (index.type == MME_VALUE_TYPE_REG) {
148 src_reg = index;
149 } else if (index.type == MME_VALUE_TYPE_IMM) {
150 mthd_imm += index.imm;
151 }
152
153 inst->op = MME_FERMI_OP_ADD_IMM;
154 inst->src[0] = mme_value_alu_reg(src_reg);
155 inst->imm = mthd_imm;
156 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR;
157 inst->dst = MME_FERMI_REG_ZERO;
158
159 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
160 MME_FERMI_INSTR_PART_ASSIGN);
161 }
162
163 static inline bool
mme_fermi_prev_inst_can_emit(struct mme_fermi_builder * b,struct mme_value data)164 mme_fermi_prev_inst_can_emit(struct mme_fermi_builder *b, struct mme_value data) {
165 if (mme_fermi_is_empty(b)) {
166 return false;
167 }
168
169 if ((b->inst_parts & MME_FERMI_INSTR_PART_ASSIGN) == MME_FERMI_INSTR_PART_ASSIGN) {
170 struct mme_fermi_inst *inst = mme_fermi_cur_inst(b);
171
172 if (inst->assign_op == MME_FERMI_ASSIGN_OP_MOVE && data.type == MME_VALUE_TYPE_REG &&
173 mme_value_alu_reg(data) == inst->dst) {
174 return true;
175 }
176 }
177
178 return false;
179 }
180
181 static inline bool
mme_fermi_next_inst_can_emit(struct mme_fermi_builder * fb,struct mme_value data)182 mme_fermi_next_inst_can_emit(struct mme_fermi_builder *fb,
183 struct mme_value data)
184 {
185 if (mme_fermi_is_empty(fb))
186 return false;
187
188 if (fb->inst_parts == 0)
189 return true;
190
191 return mme_fermi_prev_inst_can_emit(fb, data);
192 }
193
194 static inline struct mme_value
mme_fermi_reg(uint32_t reg)195 mme_fermi_reg(uint32_t reg)
196 {
197 struct mme_value val = {
198 .type = MME_VALUE_TYPE_REG,
199 .reg = reg,
200 };
201 return val;
202 }
203
204 static bool
is_int18(uint32_t i)205 is_int18(uint32_t i)
206 {
207 return i == (uint32_t)util_mask_sign_extend(i, 18);
208 }
209
210 static inline void
mme_fermi_add_imm18(struct mme_fermi_builder * fb,struct mme_value dst,struct mme_value src,uint32_t imm)211 mme_fermi_add_imm18(struct mme_fermi_builder *fb,
212 struct mme_value dst,
213 struct mme_value src,
214 uint32_t imm)
215 {
216 assert(dst.type == MME_VALUE_TYPE_REG &&
217 mme_fermi_is_zero_or_reg(src) && is_int18(imm));
218
219 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb)) {
220 mme_fermi_new_inst(fb);
221 }
222
223 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
224
225 inst->op = MME_FERMI_OP_ADD_IMM;
226 inst->src[0] = mme_value_alu_reg(src);
227 inst->imm = imm;
228 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
229 inst->dst = mme_value_alu_reg(dst);
230
231 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
232 MME_FERMI_INSTR_PART_ASSIGN);
233 }
234
235 static bool
mme_fermi_bfe_lsl_can_use_imm(struct mme_fermi_builder * b,struct mme_value src_bits,struct mme_value dst_bits)236 mme_fermi_bfe_lsl_can_use_imm(struct mme_fermi_builder *b,
237 struct mme_value src_bits,
238 struct mme_value dst_bits)
239 {
240 return (mme_fermi_is_zero_or_reg(src_bits) &&
241 mme_fermi_is_zero_or_imm(dst_bits) &&
242 mme_value_alu_imm(dst_bits) <= 31);
243 }
244
245 static bool
mme_fermi_bfe_lsl_can_use_reg(struct mme_fermi_builder * b,struct mme_value src_bits,struct mme_value dst_bits)246 mme_fermi_bfe_lsl_can_use_reg(struct mme_fermi_builder *b,
247 struct mme_value src_bits,
248 struct mme_value dst_bits)
249 {
250 return (mme_fermi_is_zero_or_imm(src_bits) &&
251 mme_fermi_is_zero_or_reg(dst_bits) &&
252 mme_value_alu_imm(src_bits) <= 31);
253 }
254
255 static void
mme_fermi_bfe(struct mme_fermi_builder * fb,struct mme_value dst_reg,struct mme_value src_bits,struct mme_value src_reg,struct mme_value dst_bits,uint32_t size)256 mme_fermi_bfe(struct mme_fermi_builder *fb,
257 struct mme_value dst_reg,
258 struct mme_value src_bits,
259 struct mme_value src_reg,
260 struct mme_value dst_bits,
261 uint32_t size)
262 {
263 assert(dst_reg.type == MME_VALUE_TYPE_REG &&
264 mme_fermi_is_zero_or_reg(src_reg) &&
265 (mme_fermi_bfe_lsl_can_use_imm(fb, src_bits, dst_bits) ||
266 mme_fermi_bfe_lsl_can_use_reg(fb, src_bits, dst_bits)));
267
268 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
269 mme_fermi_new_inst(fb);
270
271 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
272
273 if (mme_fermi_bfe_lsl_can_use_imm(fb, src_bits, dst_bits)) {
274 inst->op = MME_FERMI_OP_BFE_LSL_IMM;
275 inst->src[0] = mme_value_alu_reg(src_bits);
276 inst->src[1] = mme_value_alu_reg(src_reg);
277 inst->bitfield.dst_bit = mme_value_alu_imm(dst_bits);
278 inst->bitfield.size = size;
279 } else if (mme_fermi_bfe_lsl_can_use_reg(fb, src_bits, dst_bits)) {
280 inst->op = MME_FERMI_OP_BFE_LSL_REG;
281 inst->src[0] = mme_value_alu_reg(dst_bits);
282 inst->src[1] = mme_value_alu_reg(src_reg);
283 inst->bitfield.src_bit = mme_value_alu_imm(src_bits);
284 inst->bitfield.size = size;
285 }
286
287 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
288 inst->dst = mme_value_alu_reg(dst_reg);
289
290 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
291 MME_FERMI_INSTR_PART_ASSIGN);
292 }
293
294 static void
mme_fermi_sll_to(struct mme_fermi_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y)295 mme_fermi_sll_to(struct mme_fermi_builder *b,
296 struct mme_value dst,
297 struct mme_value x,
298 struct mme_value y)
299 {
300 assert(mme_fermi_is_zero_or_reg(dst));
301
302 mme_fermi_bfe(b, dst, mme_zero(), x, y, 31);
303 }
304
305 static void
mme_fermi_srl_to(struct mme_fermi_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y)306 mme_fermi_srl_to(struct mme_fermi_builder *b,
307 struct mme_value dst,
308 struct mme_value x,
309 struct mme_value y)
310 {
311 assert(mme_fermi_is_zero_or_reg(dst));
312
313 mme_fermi_bfe(b, dst, y, x, mme_zero(), 31);
314 }
315
316 void
mme_fermi_bfe_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value pos,uint8_t bits)317 mme_fermi_bfe_to(struct mme_builder *b, struct mme_value dst,
318 struct mme_value x, struct mme_value pos, uint8_t bits)
319 {
320 struct mme_fermi_builder *fb = &b->fermi;
321 assert(mme_fermi_is_zero_or_reg(dst));
322
323 mme_fermi_bfe(fb, dst, pos, x, mme_zero(), bits);
324 }
325
326 static struct mme_value
mme_fermi_load_imm_to_reg(struct mme_builder * b,struct mme_value data)327 mme_fermi_load_imm_to_reg(struct mme_builder *b, struct mme_value data)
328 {
329 struct mme_fermi_builder *fb = &b->fermi;
330
331 assert(data.type == MME_VALUE_TYPE_IMM ||
332 data.type == MME_VALUE_TYPE_ZERO);
333
334 /* If the immediate is zero, we can simplify this */
335 if (mme_is_zero(data)) {
336 return mme_zero();
337 } else {
338 uint32_t imm = data.imm;
339
340 struct mme_value dst = mme_alloc_reg(b);
341
342 if (is_int18(imm)) {
343 mme_fermi_add_imm18(fb, dst, mme_zero(), imm);
344 } else {
345 /* TODO: a possible optimisation involve searching for the first bit
346 * offset and see if it can fit in 16 bits.
347 */
348 uint32_t high_bits = imm >> 16;
349 uint32_t low_bits = imm & UINT16_MAX;
350
351 mme_fermi_add_imm18(fb, dst, mme_zero(), high_bits);
352 mme_fermi_sll_to(fb, dst, dst, mme_imm(16));
353 mme_fermi_add_imm18(fb, dst, dst, low_bits);
354 }
355
356 return dst;
357 }
358 }
359
360 static inline struct mme_value
mme_fermi_value_as_reg(struct mme_builder * b,struct mme_value data)361 mme_fermi_value_as_reg(struct mme_builder *b,
362 struct mme_value data)
363 {
364 if (data.type == MME_VALUE_TYPE_REG || mme_is_zero(data)) {
365 return data;
366 }
367
368 return mme_fermi_load_imm_to_reg(b, data);
369 }
370
mme_fermi_emit(struct mme_builder * b,struct mme_value data)371 void mme_fermi_emit(struct mme_builder *b,
372 struct mme_value data)
373 {
374 struct mme_fermi_builder *fb = &b->fermi;
375 struct mme_fermi_inst *inst;
376
377 /* Check if previous assign was to the same dst register and modify assign
378 * mode if needed
379 */
380 if (mme_fermi_prev_inst_can_emit(fb, data)) {
381 inst = mme_fermi_cur_inst(fb);
382 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT;
383 } else {
384 struct mme_value data_reg = mme_fermi_value_as_reg(b, data);
385
386 /* Because of mme_fermi_value_as_reg, it is possible that a new load
387 * that can be simplify
388 */
389 if (mme_fermi_prev_inst_can_emit(fb, data_reg)) {
390 inst = mme_fermi_cur_inst(fb);
391 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT;
392 } else {
393 if (!mme_fermi_next_inst_can_emit(fb, data))
394 mme_fermi_new_inst(fb);
395
396 inst = mme_fermi_cur_inst(fb);
397 inst->op = MME_FERMI_OP_ALU_REG;
398 inst->alu_op = MME_FERMI_ALU_OP_ADD;
399 inst->src[0] = mme_value_alu_reg(data_reg);
400 inst->src[1] = MME_FERMI_REG_ZERO;
401 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE_EMIT;
402 inst->dst = MME_FERMI_REG_ZERO;
403
404 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
405 MME_FERMI_INSTR_PART_ASSIGN);
406 }
407
408 mme_free_reg_if_tmp(b, data, data_reg);
409 }
410 }
411
412 static void
mme_fermi_branch(struct mme_fermi_builder * fb,enum mme_fermi_reg src,int32_t offset,bool if_zero)413 mme_fermi_branch(struct mme_fermi_builder *fb,
414 enum mme_fermi_reg src, int32_t offset, bool if_zero)
415 {
416 if (fb->inst_parts || mme_fermi_is_empty(fb))
417 mme_fermi_new_inst(fb);
418
419 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
420
421 inst->op = MME_FERMI_OP_BRANCH;
422 inst->src[0] = src;
423 inst->imm = offset;
424 inst->branch.no_delay = true;
425 inst->branch.not_zero = if_zero;
426
427 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
428 MME_FERMI_INSTR_PART_ASSIGN);
429 }
430
431 static void
mme_fermi_start_cf(struct mme_builder * b,enum mme_cf_type type,struct mme_value cond,bool is_zero)432 mme_fermi_start_cf(struct mme_builder *b,
433 enum mme_cf_type type,
434 struct mme_value cond,
435 bool is_zero)
436 {
437 struct mme_fermi_builder *fb = &b->fermi;
438
439 /* The condition here is inverted because we want to branch and skip the
440 * block when the condition fails.
441 */
442 assert(mme_fermi_is_zero_or_reg(cond));
443 mme_fermi_branch(fb, mme_value_alu_reg(cond), 0, is_zero);
444
445 uint16_t ip = fb->inst_count - 1;
446 assert(fb->insts[ip].op == MME_FERMI_OP_BRANCH);
447
448 assert(fb->cf_depth < ARRAY_SIZE(fb->cf_stack));
449 fb->cf_stack[fb->cf_depth++] = (struct mme_cf) {
450 .type = type,
451 .start_ip = ip,
452 };
453
454 /* The inside of control-flow needs to start with a new instruction */
455 mme_fermi_new_inst(fb);
456 }
457
458 static struct mme_cf
mme_fermi_end_cf(struct mme_builder * b,enum mme_cf_type type)459 mme_fermi_end_cf(struct mme_builder *b, enum mme_cf_type type)
460 {
461 struct mme_fermi_builder *fb = &b->fermi;
462
463 if (fb->inst_parts)
464 mme_fermi_new_inst(fb);
465
466 assert(fb->cf_depth > 0);
467 struct mme_cf cf = fb->cf_stack[--fb->cf_depth];
468 assert(cf.type == type);
469
470 assert(fb->insts[cf.start_ip].op == MME_FERMI_OP_BRANCH);
471 fb->insts[cf.start_ip].imm = fb->inst_count - cf.start_ip - 1;
472
473 return cf;
474 }
475
476 static struct mme_value
mme_fermi_neq(struct mme_builder * b,struct mme_value x,struct mme_value y)477 mme_fermi_neq(struct mme_builder *b, struct mme_value x, struct mme_value y)
478 {
479 struct mme_fermi_builder *fb = &b->fermi;
480
481 /* Generate some value that's non-zero if x != y */
482 struct mme_value res = mme_alloc_reg(b);
483 if (x.type == MME_VALUE_TYPE_IMM && is_int18(-x.imm)) {
484 mme_fermi_add_imm18(fb, res, y, -x.imm);
485 } else if (y.type == MME_VALUE_TYPE_IMM && is_int18(-y.imm)) {
486 mme_fermi_add_imm18(fb, res, x, -y.imm);
487 } else {
488 mme_xor_to(b, res, x, y);
489 }
490 return res;
491 }
492
493 void
mme_fermi_start_if(struct mme_builder * b,enum mme_cmp_op op,bool if_true,struct mme_value x,struct mme_value y)494 mme_fermi_start_if(struct mme_builder *b,
495 enum mme_cmp_op op,
496 bool if_true,
497 struct mme_value x,
498 struct mme_value y)
499 {
500 assert(op == MME_CMP_OP_EQ);
501
502 if (mme_is_zero(x)) {
503 mme_fermi_start_cf(b, MME_CF_TYPE_IF, y, if_true);
504 } else if (mme_is_zero(y)) {
505 mme_fermi_start_cf(b, MME_CF_TYPE_IF, x, if_true);
506 } else {
507 struct mme_value tmp = mme_fermi_neq(b, x, y);
508 mme_fermi_start_cf(b, MME_CF_TYPE_IF, tmp, if_true);
509 mme_free_reg(b, tmp);
510 }
511 }
512
513 void
mme_fermi_end_if(struct mme_builder * b)514 mme_fermi_end_if(struct mme_builder *b)
515 {
516 mme_fermi_end_cf(b, MME_CF_TYPE_IF);
517 }
518
519 void
mme_fermi_start_while(struct mme_builder * b)520 mme_fermi_start_while(struct mme_builder *b)
521 {
522 mme_fermi_start_cf(b, MME_CF_TYPE_WHILE, mme_zero(), false);
523 }
524
525 static void
mme_fermi_end_while_zero(struct mme_builder * b,struct mme_cf cf,struct mme_value cond,bool is_zero)526 mme_fermi_end_while_zero(struct mme_builder *b,
527 struct mme_cf cf,
528 struct mme_value cond,
529 bool is_zero)
530 {
531 struct mme_fermi_builder *fb = &b->fermi;
532
533 if (fb->inst_parts)
534 mme_fermi_new_inst(fb);
535
536 int delta = fb->inst_count - cf.start_ip - 2;
537 mme_fermi_branch(fb, mme_value_alu_reg(cond), -delta, !is_zero);
538 }
539
540 void
mme_fermi_end_while(struct mme_builder * b,enum mme_cmp_op op,bool if_true,struct mme_value x,struct mme_value y)541 mme_fermi_end_while(struct mme_builder *b,
542 enum mme_cmp_op op,
543 bool if_true,
544 struct mme_value x,
545 struct mme_value y)
546 {
547 assert(op == MME_CMP_OP_EQ);
548
549 struct mme_cf cf = mme_fermi_end_cf(b, MME_CF_TYPE_WHILE);
550
551 if (mme_is_zero(x)) {
552 mme_fermi_end_while_zero(b, cf, y, if_true);
553 } else if (mme_is_zero(y)) {
554 mme_fermi_end_while_zero(b, cf, x, if_true);
555 } else {
556 struct mme_value tmp = mme_fermi_neq(b, x, y);
557 mme_fermi_end_while_zero(b, cf, tmp, if_true);
558 mme_free_reg(b, tmp);
559 }
560 }
561
562 void
mme_fermi_start_loop(struct mme_builder * b,struct mme_value count)563 mme_fermi_start_loop(struct mme_builder *b,
564 struct mme_value count)
565 {
566 struct mme_fermi_builder *fb = &b->fermi;
567
568 assert(mme_is_zero(fb->loop_counter));
569 fb->loop_counter = mme_mov(b, count);
570
571 mme_start_while(b);
572 }
573
574 void
mme_fermi_end_loop(struct mme_builder * b)575 mme_fermi_end_loop(struct mme_builder *b)
576 {
577 struct mme_fermi_builder *fb = &b->fermi;
578
579 mme_sub_to(b, fb->loop_counter, fb->loop_counter, mme_imm(1));
580 mme_fermi_end_while(b, MME_CMP_OP_EQ, false, fb->loop_counter, mme_zero());
581
582 mme_free_reg(b, fb->loop_counter);
583 fb->loop_counter = mme_zero();
584 }
585
586 static inline bool
mme_fermi_next_inst_can_load_to(struct mme_fermi_builder * b)587 mme_fermi_next_inst_can_load_to(struct mme_fermi_builder *b)
588 {
589 return !mme_fermi_is_empty(b) && !(b->inst_parts & MME_FERMI_INSTR_PART_ASSIGN);
590 }
591
mme_fermi_load_to(struct mme_builder * b,struct mme_value dst)592 void mme_fermi_load_to(struct mme_builder *b,
593 struct mme_value dst)
594 {
595 struct mme_fermi_builder *fb = &b->fermi;
596
597 assert(dst.type == MME_VALUE_TYPE_REG ||
598 dst.type == MME_VALUE_TYPE_ZERO);
599
600 if (!fb->first_loaded) {
601 struct mme_value r1 = {
602 .type = MME_VALUE_TYPE_REG,
603 .reg = 1,
604 };
605 mme_mov_to(b, dst, r1);
606 mme_free_reg(b, r1);
607 fb->first_loaded = true;
608 return;
609 }
610
611 if (!mme_fermi_next_inst_can_load_to(fb))
612 mme_fermi_new_inst(fb);
613
614 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
615
616 inst->assign_op = MME_FERMI_ASSIGN_OP_LOAD;
617 inst->dst = mme_value_alu_reg(dst);
618
619 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_ASSIGN);
620 }
621
622
623 struct mme_value
mme_fermi_load(struct mme_builder * b)624 mme_fermi_load(struct mme_builder *b)
625 {
626 struct mme_fermi_builder *fb = &b->fermi;
627
628 if (!fb->first_loaded) {
629 struct mme_value r1 = {
630 .type = MME_VALUE_TYPE_REG,
631 .reg = 1,
632 };
633 fb->first_loaded = true;
634 return r1;
635 }
636
637 struct mme_value dst = mme_alloc_reg(b);
638 mme_fermi_load_to(b, dst);
639
640 return dst;
641 }
642
643 static enum mme_fermi_alu_op
mme_to_fermi_alu_op(enum mme_alu_op op)644 mme_to_fermi_alu_op(enum mme_alu_op op)
645 {
646 switch (op) {
647 #define ALU_CASE(op) case MME_ALU_OP_##op: return MME_FERMI_ALU_OP_##op;
648 ALU_CASE(ADD)
649 ALU_CASE(ADDC)
650 ALU_CASE(SUB)
651 ALU_CASE(SUBB)
652 ALU_CASE(AND)
653 ALU_CASE(NAND)
654 ALU_CASE(OR)
655 ALU_CASE(XOR)
656 #undef ALU_CASE
657 default:
658 unreachable("Unsupported MME ALU op");
659 }
660 }
661
662 void
mme_fermi_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_alu_op op,struct mme_value x,struct mme_value y)663 mme_fermi_alu_to(struct mme_builder *b,
664 struct mme_value dst,
665 enum mme_alu_op op,
666 struct mme_value x,
667 struct mme_value y)
668 {
669 struct mme_fermi_builder *fb = &b->fermi;
670
671 switch (op) {
672 case MME_ALU_OP_ADD:
673 if (x.type == MME_VALUE_TYPE_IMM && x.imm != 0 && is_int18(x.imm)) {
674 mme_fermi_add_imm18(fb, dst, y, x.imm);
675 return;
676 }
677 if (y.type == MME_VALUE_TYPE_IMM && y.imm != 0 && is_int18(y.imm)) {
678 mme_fermi_add_imm18(fb, dst, x, y.imm);
679 return;
680 }
681 break;
682 case MME_ALU_OP_SUB:
683 if (y.type == MME_VALUE_TYPE_IMM && is_int18(-y.imm)) {
684 mme_fermi_add_imm18(fb, dst, x, -y.imm);
685 return;
686 }
687 break;
688 case MME_ALU_OP_SLL:
689 mme_fermi_sll_to(fb, dst, x, y);
690 return;
691 case MME_ALU_OP_SRL:
692 mme_fermi_srl_to(fb, dst, x, y);
693 return;
694 default:
695 break;
696 }
697
698 assert(mme_fermi_is_zero_or_reg(dst));
699
700 struct mme_value x_reg = mme_fermi_value_as_reg(b, x);
701 struct mme_value y_reg = mme_fermi_value_as_reg(b, y);
702
703 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
704 mme_fermi_new_inst(fb);
705
706 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
707 inst->op = MME_FERMI_OP_ALU_REG;
708 inst->alu_op = mme_to_fermi_alu_op(op);
709 inst->src[0] = mme_value_alu_reg(x_reg);
710 inst->src[1] = mme_value_alu_reg(y_reg);
711 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
712 inst->dst = mme_value_alu_reg(dst);
713
714 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
715 MME_FERMI_INSTR_PART_ASSIGN);
716
717 mme_free_reg_if_tmp(b, x, x_reg);
718 mme_free_reg_if_tmp(b, y, y_reg);
719 }
720
721
mme_fermi_state_arr_to(struct mme_builder * b,struct mme_value dst,uint16_t state,struct mme_value index)722 void mme_fermi_state_arr_to(struct mme_builder *b,
723 struct mme_value dst,
724 uint16_t state,
725 struct mme_value index)
726 {
727 struct mme_fermi_builder *fb = &b->fermi;
728
729 assert(mme_fermi_is_zero_or_reg(dst));
730 assert(state % 4 == 0);
731
732 struct mme_value index_reg = mme_fermi_value_as_reg(b, index);
733
734 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
735 mme_fermi_new_inst(fb);
736
737 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
738 inst->op = MME_FERMI_OP_STATE;
739 inst->src[0] = mme_value_alu_reg(index_reg);
740 inst->src[1] = MME_FERMI_REG_ZERO;
741 inst->imm = state >> 2;
742 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
743 inst->dst = mme_value_alu_reg(dst);
744
745 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
746 MME_FERMI_INSTR_PART_ASSIGN);
747
748 mme_free_reg_if_tmp(b, index, index_reg);
749 }
750
751 void
mme_fermi_merge_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)752 mme_fermi_merge_to(struct mme_builder *b, struct mme_value dst,
753 struct mme_value x, struct mme_value y,
754 uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
755 {
756 struct mme_fermi_builder *fb = &b->fermi;
757
758 assert(mme_fermi_is_zero_or_reg(dst));
759 assert(dst_pos < 32);
760 assert(bits < 32);
761 assert(src_pos < 32);
762
763 struct mme_value x_reg = mme_fermi_value_as_reg(b, x);
764 struct mme_value y_reg = mme_fermi_value_as_reg(b, y);
765
766 if (!mme_fermi_next_inst_can_fit_a_full_inst(fb))
767 mme_fermi_new_inst(fb);
768
769 struct mme_fermi_inst *inst = mme_fermi_cur_inst(fb);
770
771 inst->op = MME_FERMI_OP_MERGE;
772 inst->src[0] = mme_value_alu_reg(x_reg);
773 inst->src[1] = mme_value_alu_reg(y_reg);
774 inst->bitfield.dst_bit = dst_pos;
775 inst->bitfield.src_bit = src_pos;
776 inst->bitfield.size = bits;
777
778 inst->assign_op = MME_FERMI_ASSIGN_OP_MOVE;
779 inst->dst = mme_value_alu_reg(dst);
780
781 mme_fermi_set_inst_parts(fb, MME_FERMI_INSTR_PART_OP |
782 MME_FERMI_INSTR_PART_ASSIGN);
783
784 mme_free_reg_if_tmp(b, x, x_reg);
785 mme_free_reg_if_tmp(b, y, y_reg);
786 }
787
788 uint32_t *
mme_fermi_builder_finish(struct mme_fermi_builder * b,size_t * size_out)789 mme_fermi_builder_finish(struct mme_fermi_builder *b, size_t *size_out)
790 {
791 assert(b->cf_depth == 0);
792
793 /* TODO: If there are at least two instructions and we can guarantee the
794 * last two instructions get exeucted (not in control-flow), we don't need
795 * to add a pair of NOPs.
796 */
797 mme_fermi_new_inst(b);
798 mme_fermi_new_inst(b);
799
800 b->insts[b->inst_count - 2].end_next = true;
801
802 size_t enc_size = b->inst_count * sizeof(uint32_t);
803 uint32_t *enc = malloc(enc_size);
804 if (enc != NULL) {
805 mme_fermi_encode(enc, b->inst_count, b->insts);
806 *size_out = enc_size;
807 }
808 return enc;
809 }
810
811 void
mme_fermi_builder_dump(struct mme_builder * b,FILE * fp)812 mme_fermi_builder_dump(struct mme_builder *b, FILE *fp)
813 {
814 struct mme_fermi_builder *fb = &b->fermi;
815
816 mme_fermi_print(fp, fb->insts, fb->inst_count);
817 }
818