• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Mary Guillemard
3  * SPDX-License-Identifier: MIT
4  */
5 #include "mme_fermi_sim.h"
6 
7 #include <inttypes.h>
8 
9 #include "mme_fermi.h"
10 #include "util/u_math.h"
11 
12 #include "nvk_cl9097.h"
13 #include "nvk_cl902d.h"
14 
15 struct mme_fermi_sim {
16    uint32_t param_count;
17    const uint32_t *params;
18 
19    /* Bound memory ranges */
20    uint32_t mem_count;
21    struct mme_fermi_sim_mem *mems;
22 
23    /* SET_MME_MEM_ADDRESS_A/B */
24    uint64_t mem_addr;
25 
26    /* RAM, accessed by STATE */
27    struct {
28       uint32_t data[MME_FERMI_DRAM_COUNT];
29 
30       /* SET_MME_MEM_RAM_ADDRESS */
31       uint32_t addr;
32    } ram;
33 
34    struct {
35       // TODO: check if read_fifo is supported.
36       struct {
37          uint32_t data[1024];
38          uint64_t count;
39       } read_fifo;
40    } dma;
41 
42    struct {
43       unsigned mthd:16;
44       unsigned inc:4;
45       bool has_mthd:1;
46    } mthd;
47 
48    /* SET_MME_SHADOW_SCRATCH(i) */
49    uint32_t scratch[MME_FERMI_SCRATCH_COUNT];
50 
51    uint32_t regs[7];
52    uint32_t alu_carry;
53    uint16_t ip;
54    uint16_t next_ip;
55 };
56 
57 static uint32_t *
find_mem(struct mme_fermi_sim * sim,uint64_t addr,const char * op_desc)58 find_mem(struct mme_fermi_sim *sim, uint64_t addr, const char *op_desc)
59 {
60    for (uint32_t i = 0; i < sim->mem_count; i++) {
61       if (addr < sim->mems[i].addr)
62          continue;
63 
64       uint64_t offset = addr - sim->mems[i].addr;
65       if (offset >= sim->mems[i].size)
66          continue;
67 
68       assert(sim->mems[i].data != NULL);
69       return (uint32_t *)((char *)sim->mems[i].data + offset);
70    }
71 
72    fprintf(stderr, "FAULT in %s at address 0x%"PRIx64"\n", op_desc, addr);
73    abort();
74 }
75 
76 static uint32_t
read_dmem(struct mme_fermi_sim * sim,uint64_t addr,const char * op_desc)77 read_dmem(struct mme_fermi_sim *sim, uint64_t addr, const char *op_desc)
78 {
79    uint32_t ram_index = addr / 4;
80 
81    if (ram_index < ARRAY_SIZE(sim->ram.data)) {
82       return sim->ram.data[ram_index];
83    }
84 
85    if (addr >= NV9097_SET_MME_SHADOW_SCRATCH(0) && addr < NV9097_CALL_MME_MACRO(0)) {
86       return sim->scratch[ram_index - NV9097_SET_MME_SHADOW_SCRATCH(0) / 4];
87    }
88 
89    fprintf(stderr, "FAULT in %s at DMEM address 0x%"PRIx64"\n (READ)", op_desc, addr);
90    abort();
91 }
92 
93 static void
write_dmem(struct mme_fermi_sim * sim,uint64_t addr,uint32_t val,const char * op_desc)94 write_dmem(struct mme_fermi_sim *sim, uint64_t addr, uint32_t val, const char *op_desc)
95 {
96    uint32_t ram_index = addr / 4;
97 
98    if (ram_index < ARRAY_SIZE(sim->ram.data)) {
99       sim->ram.data[ram_index] = val;
100    }
101    else if (addr >= NV9097_SET_MME_SHADOW_SCRATCH(0) && addr < NV9097_CALL_MME_MACRO(0)) {
102       sim->scratch[ram_index - NV9097_SET_MME_SHADOW_SCRATCH(0) / 4] = val;
103    } else {
104       fprintf(stderr, "FAULT in %s at DMEM address 0x%"PRIx64" (WRITE)\n", op_desc, addr);
105       abort();
106    }
107 }
108 
109 static uint64_t
read_dmem64(struct mme_fermi_sim * sim,uint64_t addr,const char * op_desc)110 read_dmem64(struct mme_fermi_sim *sim, uint64_t addr, const char *op_desc)
111 {
112    return ((uint64_t)read_dmem(sim, addr, op_desc) << 32) | read_dmem(sim, addr + 4, op_desc);
113 }
114 
load_param(struct mme_fermi_sim * sim)115 static uint32_t load_param(struct mme_fermi_sim *sim)
116 {
117    if (sim->param_count == 0) {
118       // TODO: know what happens on hardware here
119       return 0;
120    }
121 
122    uint32_t param = *sim->params;
123 
124    sim->params++;
125    sim->param_count--;
126 
127    return param;
128 }
129 
load_reg(struct mme_fermi_sim * sim,enum mme_fermi_reg reg)130 static uint32_t load_reg(struct mme_fermi_sim *sim, enum mme_fermi_reg reg)
131 {
132    if (reg == MME_FERMI_REG_ZERO) {
133       return 0;
134    }
135 
136    return sim->regs[reg - 1];
137 }
138 
store_reg(struct mme_fermi_sim * sim,enum mme_fermi_reg reg,uint32_t val)139 static void store_reg(struct mme_fermi_sim *sim, enum mme_fermi_reg reg, uint32_t val)
140 {
141    if (reg == MME_FERMI_REG_ZERO) {
142       return;
143    }
144 
145    sim->regs[reg - 1] = val;
146 }
147 
load_imm(const struct mme_fermi_inst * inst)148 static int32_t load_imm(const struct mme_fermi_inst *inst)
149 {
150    return util_mask_sign_extend(inst->imm, 18);
151 }
152 
eval_bfe_lsl(uint32_t value,uint32_t src_bit,uint32_t dst_bit,uint8_t size)153 static uint32_t eval_bfe_lsl(uint32_t value, uint32_t src_bit, uint32_t dst_bit, uint8_t size)
154 {
155    if (dst_bit > 31 || src_bit > 31) {
156       return 0;
157    }
158 
159    return ((value >> src_bit) & BITFIELD_MASK(size)) << dst_bit;
160 }
161 
eval_op(struct mme_fermi_sim * sim,const struct mme_fermi_inst * inst)162 static uint32_t eval_op(struct mme_fermi_sim *sim, const struct mme_fermi_inst *inst) {
163    assert(inst->op != MME_FERMI_OP_BRANCH);
164 
165    uint32_t x = load_reg(sim, inst->src[0]);
166    uint32_t y = load_reg(sim, inst->src[1]);
167 
168    switch (inst->op) {
169       case MME_FERMI_OP_ALU_REG: {
170          uint32_t res = 0;
171 
172          switch (inst->alu_op) {
173             case MME_FERMI_ALU_OP_ADD:
174                res = x + y;
175                sim->alu_carry = res < x;
176                break;
177             case MME_FERMI_ALU_OP_ADDC:
178                res = x + y + sim->alu_carry;
179                sim->alu_carry = res < x;
180                break;
181             case MME_FERMI_ALU_OP_SUB:
182                res = x - y;
183                sim->alu_carry = res > x;
184                break;
185             case MME_FERMI_ALU_OP_SUBB:
186                res = x - y - sim->alu_carry;
187                sim->alu_carry = res > x;
188                break;
189             case MME_FERMI_ALU_OP_XOR:
190                res = x ^ y;
191                break;
192             case MME_FERMI_ALU_OP_OR:
193                res = x | y;
194                break;
195             case MME_FERMI_ALU_OP_AND:
196                res = x & y;
197                break;
198             case MME_FERMI_ALU_OP_AND_NOT:
199                res = x & ~y;
200                break;
201             case MME_FERMI_ALU_OP_NAND:
202                res = ~(x & y);
203                break;
204             default:
205                unreachable("Unhandled ALU op");
206          }
207 
208          return res;
209       }
210       case MME_FERMI_OP_ADD_IMM:
211          return x + load_imm(inst);
212       case MME_FERMI_OP_MERGE:
213          return (x & ~(BITFIELD_MASK(inst->bitfield.size) << inst->bitfield.dst_bit)) | (((y >> inst->bitfield.src_bit) & BITFIELD_MASK(inst->bitfield.size)) << inst->bitfield.dst_bit);
214       case MME_FERMI_OP_BFE_LSL_IMM:
215          return eval_bfe_lsl(y, x, inst->bitfield.dst_bit, inst->bitfield.size);
216       case MME_FERMI_OP_BFE_LSL_REG:
217          return eval_bfe_lsl(y, inst->bitfield.src_bit, x, inst->bitfield.size);
218       case MME_FERMI_OP_STATE:
219          return read_dmem(sim, (x + load_imm(inst)) * 4, "STATE");
220       // TODO: reverse MME_FERMI_OP_UNK6
221       default:
222          unreachable("Unhandled op");
223    }
224 }
225 
226 static void
set_mthd(struct mme_fermi_sim * sim,uint32_t val)227 set_mthd(struct mme_fermi_sim *sim, uint32_t val)
228 {
229    sim->mthd.mthd = (val & 0xfff) << 2;
230    sim->mthd.inc = (val >> 12) & 0xf;
231    sim->mthd.has_mthd = true;
232 }
233 
234 static void
emit_mthd(struct mme_fermi_sim * sim,uint32_t val)235 emit_mthd(struct mme_fermi_sim *sim, uint32_t val)
236 {
237    // TODO: understand what happens on hardware when no mthd has been set.
238    if (!sim->mthd.has_mthd)
239       return;
240 
241    uint16_t mthd = sim->mthd.mthd;
242 
243    write_dmem(sim, mthd, val, "EMIT");
244 
245    switch (mthd) {
246    case NV9097_SET_REPORT_SEMAPHORE_D: {
247       assert(val == 0x10000000);
248 
249       uint64_t addr = read_dmem64(sim, NV9097_SET_REPORT_SEMAPHORE_A, "SET_REPORT_SEMAPHORE");
250       uint32_t data = read_dmem(sim, NV9097_SET_REPORT_SEMAPHORE_C, "SET_REPORT_SEMAPHORE");
251 
252       uint32_t *mem = find_mem(sim, addr, "SET_REPORT_SEMAPHORE");
253       *mem = data;
254       break;
255    }
256    case NV902D_SET_MME_DATA_RAM_ADDRESS:
257       sim->ram.addr = val;
258       break;
259    case NV902D_SET_MME_MEM_ADDRESS_B:
260       sim->mem_addr = read_dmem64(sim, NV902D_SET_MME_MEM_ADDRESS_A, "SET_MME_MEM_ADDRESS");
261       break;
262    case NV902D_MME_DMA_READ_FIFOED:
263       sim->dma.read_fifo.count = val;
264       break;
265    default:
266       break;
267    }
268 
269    sim->mthd.mthd += sim->mthd.inc * 4;
270 }
271 
272 static void
eval_inst(struct mme_fermi_sim * sim,const struct mme_fermi_inst * inst)273 eval_inst(struct mme_fermi_sim *sim, const struct mme_fermi_inst *inst)
274 {
275    if (inst->op == MME_FERMI_OP_BRANCH) {
276       uint32_t val = load_reg(sim, inst->src[0]);
277       bool cond = inst->branch.not_zero ? val != 0 : val == 0;
278 
279       if (cond) {
280          int32_t offset = load_imm(inst);
281          assert((int)sim->ip + offset >= 0);
282          assert((int)sim->ip + offset < 0x1000);
283          sim->next_ip = sim->ip + offset;
284       }
285    } else {
286       uint32_t scratch = eval_op(sim, inst);
287       switch (inst->assign_op) {
288          case MME_FERMI_ASSIGN_OP_LOAD:
289             store_reg(sim, inst->dst, load_param(sim));
290             break;
291          case MME_FERMI_ASSIGN_OP_MOVE:
292             store_reg(sim, inst->dst, scratch);
293             break;
294          case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR:
295             store_reg(sim, inst->dst, scratch);
296             set_mthd(sim, scratch);
297             break;
298          case MME_FERMI_ASSIGN_OP_LOAD_EMIT:
299             store_reg(sim, inst->dst, load_param(sim));
300             emit_mthd(sim, scratch);
301             break;
302          case MME_FERMI_ASSIGN_OP_MOVE_EMIT:
303             store_reg(sim, inst->dst, scratch);
304             emit_mthd(sim, scratch);
305             break;
306          case MME_FERMI_ASSIGN_OP_LOAD_SET_MADDR:
307             store_reg(sim, inst->dst, scratch);
308             set_mthd(sim, scratch);
309             break;
310          case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR_LOAD_EMIT:
311             store_reg(sim, inst->dst, scratch);
312             set_mthd(sim, scratch);
313             emit_mthd(sim, load_param(sim));
314             break;
315          case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR_LOAD_EMIT_HIGH:
316             store_reg(sim, inst->dst, scratch);
317             set_mthd(sim, scratch);
318             emit_mthd(sim, (scratch >> 12) & 0x3f);
319             break;
320          default:
321             unreachable("Unhandled ASSIGN op");
322       }
323    }
324 }
325 
mme_fermi_sim(uint32_t inst_count,const struct mme_fermi_inst * insts,uint32_t param_count,const uint32_t * params,uint32_t mem_count,struct mme_fermi_sim_mem * mems)326 void mme_fermi_sim(uint32_t inst_count, const struct mme_fermi_inst *insts,
327                    uint32_t param_count, const uint32_t *params,
328                    uint32_t mem_count, struct mme_fermi_sim_mem *mems)
329 {
330    struct mme_fermi_sim sim = {
331       .param_count = param_count,
332       .params = params,
333       .mem_count = mem_count,
334       .mems = mems,
335    };
336 
337    sim.ip = 0;
338    /* First preload first argument in R1*/
339    store_reg(&sim, MME_FERMI_REG_R1, load_param(&sim));
340 
341    bool end_next = false;
342    bool ignore_next_exit = false;
343    bool should_delay_branch = false;
344 
345    while (!end_next) {
346       assert(sim.ip < inst_count);
347       const struct mme_fermi_inst *inst = &insts[sim.ip];
348 
349       if (!should_delay_branch) {
350          sim.next_ip = sim.ip + 1;
351       }
352 
353       eval_inst(&sim, inst);
354 
355       should_delay_branch = inst->op == MME_FERMI_OP_BRANCH && !inst->branch.no_delay;
356 
357       if (should_delay_branch) {
358          sim.ip = sim.ip + 1;
359       } else {
360          sim.ip = sim.next_ip;
361       }
362 
363       if (inst->end_next && should_delay_branch) {
364          ignore_next_exit = true;
365          continue;
366       }
367 
368       end_next = inst->end_next && !ignore_next_exit;
369       ignore_next_exit = false;
370    }
371 
372    // Handle delay slot at exit
373    assert(sim.ip < inst_count);
374    eval_inst(&sim, &insts[sim.ip]);
375 }
376