1 /*
2 * Copyright © 2022 Mary Guillemard
3 * SPDX-License-Identifier: MIT
4 */
5 #include "mme_fermi_sim.h"
6
7 #include <inttypes.h>
8
9 #include "mme_fermi.h"
10 #include "util/u_math.h"
11
12 #include "nvk_cl9097.h"
13 #include "nvk_cl902d.h"
14
15 struct mme_fermi_sim {
16 uint32_t param_count;
17 const uint32_t *params;
18
19 /* Bound memory ranges */
20 uint32_t mem_count;
21 struct mme_fermi_sim_mem *mems;
22
23 /* SET_MME_MEM_ADDRESS_A/B */
24 uint64_t mem_addr;
25
26 /* RAM, accessed by STATE */
27 struct {
28 uint32_t data[MME_FERMI_DRAM_COUNT];
29
30 /* SET_MME_MEM_RAM_ADDRESS */
31 uint32_t addr;
32 } ram;
33
34 struct {
35 // TODO: check if read_fifo is supported.
36 struct {
37 uint32_t data[1024];
38 uint64_t count;
39 } read_fifo;
40 } dma;
41
42 struct {
43 unsigned mthd:16;
44 unsigned inc:4;
45 bool has_mthd:1;
46 } mthd;
47
48 /* SET_MME_SHADOW_SCRATCH(i) */
49 uint32_t scratch[MME_FERMI_SCRATCH_COUNT];
50
51 uint32_t regs[7];
52 uint32_t alu_carry;
53 uint16_t ip;
54 uint16_t next_ip;
55 };
56
57 static uint32_t *
find_mem(struct mme_fermi_sim * sim,uint64_t addr,const char * op_desc)58 find_mem(struct mme_fermi_sim *sim, uint64_t addr, const char *op_desc)
59 {
60 for (uint32_t i = 0; i < sim->mem_count; i++) {
61 if (addr < sim->mems[i].addr)
62 continue;
63
64 uint64_t offset = addr - sim->mems[i].addr;
65 if (offset >= sim->mems[i].size)
66 continue;
67
68 assert(sim->mems[i].data != NULL);
69 return (uint32_t *)((char *)sim->mems[i].data + offset);
70 }
71
72 fprintf(stderr, "FAULT in %s at address 0x%"PRIx64"\n", op_desc, addr);
73 abort();
74 }
75
76 static uint32_t
read_dmem(struct mme_fermi_sim * sim,uint64_t addr,const char * op_desc)77 read_dmem(struct mme_fermi_sim *sim, uint64_t addr, const char *op_desc)
78 {
79 uint32_t ram_index = addr / 4;
80
81 if (ram_index < ARRAY_SIZE(sim->ram.data)) {
82 return sim->ram.data[ram_index];
83 }
84
85 if (addr >= NV9097_SET_MME_SHADOW_SCRATCH(0) && addr < NV9097_CALL_MME_MACRO(0)) {
86 return sim->scratch[ram_index - NV9097_SET_MME_SHADOW_SCRATCH(0) / 4];
87 }
88
89 fprintf(stderr, "FAULT in %s at DMEM address 0x%"PRIx64"\n (READ)", op_desc, addr);
90 abort();
91 }
92
93 static void
write_dmem(struct mme_fermi_sim * sim,uint64_t addr,uint32_t val,const char * op_desc)94 write_dmem(struct mme_fermi_sim *sim, uint64_t addr, uint32_t val, const char *op_desc)
95 {
96 uint32_t ram_index = addr / 4;
97
98 if (ram_index < ARRAY_SIZE(sim->ram.data)) {
99 sim->ram.data[ram_index] = val;
100 }
101 else if (addr >= NV9097_SET_MME_SHADOW_SCRATCH(0) && addr < NV9097_CALL_MME_MACRO(0)) {
102 sim->scratch[ram_index - NV9097_SET_MME_SHADOW_SCRATCH(0) / 4] = val;
103 } else {
104 fprintf(stderr, "FAULT in %s at DMEM address 0x%"PRIx64" (WRITE)\n", op_desc, addr);
105 abort();
106 }
107 }
108
109 static uint64_t
read_dmem64(struct mme_fermi_sim * sim,uint64_t addr,const char * op_desc)110 read_dmem64(struct mme_fermi_sim *sim, uint64_t addr, const char *op_desc)
111 {
112 return ((uint64_t)read_dmem(sim, addr, op_desc) << 32) | read_dmem(sim, addr + 4, op_desc);
113 }
114
load_param(struct mme_fermi_sim * sim)115 static uint32_t load_param(struct mme_fermi_sim *sim)
116 {
117 if (sim->param_count == 0) {
118 // TODO: know what happens on hardware here
119 return 0;
120 }
121
122 uint32_t param = *sim->params;
123
124 sim->params++;
125 sim->param_count--;
126
127 return param;
128 }
129
load_reg(struct mme_fermi_sim * sim,enum mme_fermi_reg reg)130 static uint32_t load_reg(struct mme_fermi_sim *sim, enum mme_fermi_reg reg)
131 {
132 if (reg == MME_FERMI_REG_ZERO) {
133 return 0;
134 }
135
136 return sim->regs[reg - 1];
137 }
138
store_reg(struct mme_fermi_sim * sim,enum mme_fermi_reg reg,uint32_t val)139 static void store_reg(struct mme_fermi_sim *sim, enum mme_fermi_reg reg, uint32_t val)
140 {
141 if (reg == MME_FERMI_REG_ZERO) {
142 return;
143 }
144
145 sim->regs[reg - 1] = val;
146 }
147
load_imm(const struct mme_fermi_inst * inst)148 static int32_t load_imm(const struct mme_fermi_inst *inst)
149 {
150 return util_mask_sign_extend(inst->imm, 18);
151 }
152
eval_bfe_lsl(uint32_t value,uint32_t src_bit,uint32_t dst_bit,uint8_t size)153 static uint32_t eval_bfe_lsl(uint32_t value, uint32_t src_bit, uint32_t dst_bit, uint8_t size)
154 {
155 if (dst_bit > 31 || src_bit > 31) {
156 return 0;
157 }
158
159 return ((value >> src_bit) & BITFIELD_MASK(size)) << dst_bit;
160 }
161
eval_op(struct mme_fermi_sim * sim,const struct mme_fermi_inst * inst)162 static uint32_t eval_op(struct mme_fermi_sim *sim, const struct mme_fermi_inst *inst) {
163 assert(inst->op != MME_FERMI_OP_BRANCH);
164
165 uint32_t x = load_reg(sim, inst->src[0]);
166 uint32_t y = load_reg(sim, inst->src[1]);
167
168 switch (inst->op) {
169 case MME_FERMI_OP_ALU_REG: {
170 uint32_t res = 0;
171
172 switch (inst->alu_op) {
173 case MME_FERMI_ALU_OP_ADD:
174 res = x + y;
175 sim->alu_carry = res < x;
176 break;
177 case MME_FERMI_ALU_OP_ADDC:
178 res = x + y + sim->alu_carry;
179 sim->alu_carry = res < x;
180 break;
181 case MME_FERMI_ALU_OP_SUB:
182 res = x - y;
183 sim->alu_carry = res > x;
184 break;
185 case MME_FERMI_ALU_OP_SUBB:
186 res = x - y - sim->alu_carry;
187 sim->alu_carry = res > x;
188 break;
189 case MME_FERMI_ALU_OP_XOR:
190 res = x ^ y;
191 break;
192 case MME_FERMI_ALU_OP_OR:
193 res = x | y;
194 break;
195 case MME_FERMI_ALU_OP_AND:
196 res = x & y;
197 break;
198 case MME_FERMI_ALU_OP_AND_NOT:
199 res = x & ~y;
200 break;
201 case MME_FERMI_ALU_OP_NAND:
202 res = ~(x & y);
203 break;
204 default:
205 unreachable("Unhandled ALU op");
206 }
207
208 return res;
209 }
210 case MME_FERMI_OP_ADD_IMM:
211 return x + load_imm(inst);
212 case MME_FERMI_OP_MERGE:
213 return (x & ~(BITFIELD_MASK(inst->bitfield.size) << inst->bitfield.dst_bit)) | (((y >> inst->bitfield.src_bit) & BITFIELD_MASK(inst->bitfield.size)) << inst->bitfield.dst_bit);
214 case MME_FERMI_OP_BFE_LSL_IMM:
215 return eval_bfe_lsl(y, x, inst->bitfield.dst_bit, inst->bitfield.size);
216 case MME_FERMI_OP_BFE_LSL_REG:
217 return eval_bfe_lsl(y, inst->bitfield.src_bit, x, inst->bitfield.size);
218 case MME_FERMI_OP_STATE:
219 return read_dmem(sim, (x + load_imm(inst)) * 4, "STATE");
220 // TODO: reverse MME_FERMI_OP_UNK6
221 default:
222 unreachable("Unhandled op");
223 }
224 }
225
226 static void
set_mthd(struct mme_fermi_sim * sim,uint32_t val)227 set_mthd(struct mme_fermi_sim *sim, uint32_t val)
228 {
229 sim->mthd.mthd = (val & 0xfff) << 2;
230 sim->mthd.inc = (val >> 12) & 0xf;
231 sim->mthd.has_mthd = true;
232 }
233
234 static void
emit_mthd(struct mme_fermi_sim * sim,uint32_t val)235 emit_mthd(struct mme_fermi_sim *sim, uint32_t val)
236 {
237 // TODO: understand what happens on hardware when no mthd has been set.
238 if (!sim->mthd.has_mthd)
239 return;
240
241 uint16_t mthd = sim->mthd.mthd;
242
243 write_dmem(sim, mthd, val, "EMIT");
244
245 switch (mthd) {
246 case NV9097_SET_REPORT_SEMAPHORE_D: {
247 assert(val == 0x10000000);
248
249 uint64_t addr = read_dmem64(sim, NV9097_SET_REPORT_SEMAPHORE_A, "SET_REPORT_SEMAPHORE");
250 uint32_t data = read_dmem(sim, NV9097_SET_REPORT_SEMAPHORE_C, "SET_REPORT_SEMAPHORE");
251
252 uint32_t *mem = find_mem(sim, addr, "SET_REPORT_SEMAPHORE");
253 *mem = data;
254 break;
255 }
256 case NV902D_SET_MME_DATA_RAM_ADDRESS:
257 sim->ram.addr = val;
258 break;
259 case NV902D_SET_MME_MEM_ADDRESS_B:
260 sim->mem_addr = read_dmem64(sim, NV902D_SET_MME_MEM_ADDRESS_A, "SET_MME_MEM_ADDRESS");
261 break;
262 case NV902D_MME_DMA_READ_FIFOED:
263 sim->dma.read_fifo.count = val;
264 break;
265 default:
266 break;
267 }
268
269 sim->mthd.mthd += sim->mthd.inc * 4;
270 }
271
272 static void
eval_inst(struct mme_fermi_sim * sim,const struct mme_fermi_inst * inst)273 eval_inst(struct mme_fermi_sim *sim, const struct mme_fermi_inst *inst)
274 {
275 if (inst->op == MME_FERMI_OP_BRANCH) {
276 uint32_t val = load_reg(sim, inst->src[0]);
277 bool cond = inst->branch.not_zero ? val != 0 : val == 0;
278
279 if (cond) {
280 int32_t offset = load_imm(inst);
281 assert((int)sim->ip + offset >= 0);
282 assert((int)sim->ip + offset < 0x1000);
283 sim->next_ip = sim->ip + offset;
284 }
285 } else {
286 uint32_t scratch = eval_op(sim, inst);
287 switch (inst->assign_op) {
288 case MME_FERMI_ASSIGN_OP_LOAD:
289 store_reg(sim, inst->dst, load_param(sim));
290 break;
291 case MME_FERMI_ASSIGN_OP_MOVE:
292 store_reg(sim, inst->dst, scratch);
293 break;
294 case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR:
295 store_reg(sim, inst->dst, scratch);
296 set_mthd(sim, scratch);
297 break;
298 case MME_FERMI_ASSIGN_OP_LOAD_EMIT:
299 store_reg(sim, inst->dst, load_param(sim));
300 emit_mthd(sim, scratch);
301 break;
302 case MME_FERMI_ASSIGN_OP_MOVE_EMIT:
303 store_reg(sim, inst->dst, scratch);
304 emit_mthd(sim, scratch);
305 break;
306 case MME_FERMI_ASSIGN_OP_LOAD_SET_MADDR:
307 store_reg(sim, inst->dst, scratch);
308 set_mthd(sim, scratch);
309 break;
310 case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR_LOAD_EMIT:
311 store_reg(sim, inst->dst, scratch);
312 set_mthd(sim, scratch);
313 emit_mthd(sim, load_param(sim));
314 break;
315 case MME_FERMI_ASSIGN_OP_MOVE_SET_MADDR_LOAD_EMIT_HIGH:
316 store_reg(sim, inst->dst, scratch);
317 set_mthd(sim, scratch);
318 emit_mthd(sim, (scratch >> 12) & 0x3f);
319 break;
320 default:
321 unreachable("Unhandled ASSIGN op");
322 }
323 }
324 }
325
mme_fermi_sim(uint32_t inst_count,const struct mme_fermi_inst * insts,uint32_t param_count,const uint32_t * params,uint32_t mem_count,struct mme_fermi_sim_mem * mems)326 void mme_fermi_sim(uint32_t inst_count, const struct mme_fermi_inst *insts,
327 uint32_t param_count, const uint32_t *params,
328 uint32_t mem_count, struct mme_fermi_sim_mem *mems)
329 {
330 struct mme_fermi_sim sim = {
331 .param_count = param_count,
332 .params = params,
333 .mem_count = mem_count,
334 .mems = mems,
335 };
336
337 sim.ip = 0;
338 /* First preload first argument in R1*/
339 store_reg(&sim, MME_FERMI_REG_R1, load_param(&sim));
340
341 bool end_next = false;
342 bool ignore_next_exit = false;
343 bool should_delay_branch = false;
344
345 while (!end_next) {
346 assert(sim.ip < inst_count);
347 const struct mme_fermi_inst *inst = &insts[sim.ip];
348
349 if (!should_delay_branch) {
350 sim.next_ip = sim.ip + 1;
351 }
352
353 eval_inst(&sim, inst);
354
355 should_delay_branch = inst->op == MME_FERMI_OP_BRANCH && !inst->branch.no_delay;
356
357 if (should_delay_branch) {
358 sim.ip = sim.ip + 1;
359 } else {
360 sim.ip = sim.next_ip;
361 }
362
363 if (inst->end_next && should_delay_branch) {
364 ignore_next_exit = true;
365 continue;
366 }
367
368 end_next = inst->end_next && !ignore_next_exit;
369 ignore_next_exit = false;
370 }
371
372 // Handle delay slot at exit
373 assert(sim.ip < inst_count);
374 eval_inst(&sim, &insts[sim.ip]);
375 }
376