• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "mme_tu104_sim.h"
6 
7 #include <inttypes.h>
8 
9 #include "mme_tu104.h"
10 #include "util/u_math.h"
11 
12 #include "nv_push_clc597.h"
13 
14 struct mme_tu104_sim {
15    const struct mme_sim_state_ops *state_ops;
16    void *state_handler;
17 
18    uint32_t load[2];
19 
20    struct {
21       unsigned mthd:16;
22       unsigned inc:4;
23       unsigned _pad:12;
24    } mthd;
25 
26    uint32_t set_regs;
27    uint32_t regs[23];
28    uint32_t alu_res[2];
29    uint32_t alu_carry;
30 
31    uint16_t ip;
32    uint16_t next_ip;
33    bool stop;
34 
35    uint32_t loop_count;
36    uint16_t loop_start;
37    uint16_t loop_end;
38 };
39 
40 static bool
inst_loads_reg(const struct mme_tu104_inst * inst,enum mme_tu104_reg reg)41 inst_loads_reg(const struct mme_tu104_inst *inst,
42                enum mme_tu104_reg reg)
43 {
44    return inst->pred == reg ||
45           inst->alu[0].src[0] == reg ||
46           inst->alu[0].src[1] == reg ||
47           inst->alu[1].src[0] == reg ||
48           inst->alu[1].src[1] == reg;
49 }
50 
51 static bool
inst_loads_out(const struct mme_tu104_inst * inst,enum mme_tu104_out_op out)52 inst_loads_out(const struct mme_tu104_inst *inst,
53                enum mme_tu104_out_op out)
54 {
55    return inst->out[0].mthd == out ||
56           inst->out[0].emit == out ||
57           inst->out[1].mthd == out ||
58           inst->out[1].emit == out;
59 }
60 
61 static void
load_params(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst)62 load_params(struct mme_tu104_sim *sim,
63             const struct mme_tu104_inst *inst)
64 {
65    const bool has_load0 = inst_loads_reg(inst, MME_TU104_REG_LOAD0) ||
66                           inst_loads_out(inst, MME_TU104_OUT_OP_LOAD0);
67    const bool has_load1 = inst_loads_reg(inst, MME_TU104_REG_LOAD1) ||
68                           inst_loads_out(inst, MME_TU104_OUT_OP_LOAD1);
69    assert(has_load0 || !has_load1);
70 
71    if (has_load0)
72       sim->load[0] = sim->state_ops->load(sim->state_handler);
73 
74    if (has_load1)
75       sim->load[1] = sim->state_ops->load(sim->state_handler);
76 }
77 
78 static uint32_t
load_state(struct mme_tu104_sim * sim,uint16_t state)79 load_state(struct mme_tu104_sim *sim, uint16_t state)
80 {
81    return sim->state_ops->state(sim->state_handler, state);
82 }
83 
84 static void
eval_extended(struct mme_tu104_sim * sim,uint32_t x,uint32_t y)85 eval_extended(struct mme_tu104_sim *sim,
86               uint32_t x, uint32_t y)
87 {
88    /* The only extended method we know about appears to be some sort of
89     * barrier required when using READ_FIFOED.
90     */
91    assert(x == 0x1000);
92    assert(y == 1);
93    if (sim->state_ops->barrier)
94       sim->state_ops->barrier(sim->state_handler);
95 }
96 
97 static uint32_t
load_reg(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,uint32_t imm_idx,enum mme_tu104_reg reg)98 load_reg(struct mme_tu104_sim *sim,
99          const struct mme_tu104_inst *inst,
100          uint32_t imm_idx, enum mme_tu104_reg reg)
101 {
102    if (reg <= MME_TU104_REG_R23) {
103       assert(sim->set_regs & BITFIELD_BIT(reg));
104       return sim->regs[reg];
105    }
106 
107    switch (reg) {
108    case MME_TU104_REG_ZERO:
109       return 0;
110    case MME_TU104_REG_IMM:
111       assert(imm_idx < 2);
112       /* Immediates are treated as signed for ALU ops */
113       return (int16_t)inst->imm[imm_idx];
114    case MME_TU104_REG_IMMPAIR:
115       assert(imm_idx < 2);
116       /* Immediates are treated as signed for ALU ops */
117       return (int16_t)inst->imm[1 - imm_idx];
118    case MME_TU104_REG_IMM32:
119       return ((uint32_t)inst->imm[0] << 16) | inst->imm[1];
120    case MME_TU104_REG_LOAD0:
121       return sim->load[0];
122    case MME_TU104_REG_LOAD1:
123       return sim->load[1];
124    default:
125       unreachable("Unhandled register type");
126    }
127 }
128 
129 static uint8_t
load_pred(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst)130 load_pred(struct mme_tu104_sim *sim,
131           const struct mme_tu104_inst *inst)
132 {
133    if (inst->pred_mode == MME_TU104_PRED_UUUU)
134       return 0xf;
135 
136    uint32_t val = load_reg(sim, inst, -1, inst->pred);
137    const char *pred = mme_tu104_pred_to_str(inst->pred_mode);
138 
139    uint8_t mask = 0;
140    for (unsigned i = 0; i < 4; i++) {
141       if (pred[i] != (val ? 'T' : 'F'))
142          mask |= BITFIELD_BIT(i);
143    }
144 
145    return mask;
146 }
147 
148 static void
store_reg(struct mme_tu104_sim * sim,enum mme_tu104_reg reg,uint32_t val)149 store_reg(struct mme_tu104_sim *sim,
150           enum mme_tu104_reg reg,
151           uint32_t val)
152 {
153    if (reg <= MME_TU104_REG_R23) {
154       sim->set_regs |= BITFIELD_BIT(reg);
155       sim->regs[reg] = val;
156    } else if (reg <= MME_TU104_REG_ZERO) {
157       /* Do nothing */
158    } else {
159       unreachable("Unhandled register type");
160    }
161 }
162 
163 static bool
eval_cond(enum mme_tu104_alu_op op,uint32_t x,uint32_t y)164 eval_cond(enum mme_tu104_alu_op op, uint32_t x, uint32_t y)
165 {
166    switch (op) {
167    case MME_TU104_ALU_OP_BLT:
168    case MME_TU104_ALU_OP_SLT:
169       return (int32_t)x < (int32_t)y;
170    case MME_TU104_ALU_OP_BLTU:
171    case MME_TU104_ALU_OP_SLTU:
172       return (uint32_t)x < (uint32_t)y;
173    case MME_TU104_ALU_OP_BLE:
174    case MME_TU104_ALU_OP_SLE:
175       return (int32_t)x <= (int32_t)y;
176    case MME_TU104_ALU_OP_BLEU:
177    case MME_TU104_ALU_OP_SLEU:
178       return (uint32_t)x <= (uint32_t)y;
179    case MME_TU104_ALU_OP_BEQ:
180    case MME_TU104_ALU_OP_SEQ:
181       return x == y;
182    default:
183       unreachable("Not a comparison op");
184    }
185 }
186 
187 static void
eval_alu(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,uint32_t alu_idx)188 eval_alu(struct mme_tu104_sim *sim,
189          const struct mme_tu104_inst *inst,
190          uint32_t alu_idx)
191 {
192    const struct mme_tu104_alu *alu = &inst->alu[alu_idx];
193    const uint32_t x = load_reg(sim, inst, alu_idx, alu->src[0]);
194    const uint32_t y = load_reg(sim, inst, alu_idx, alu->src[1]);
195 
196    uint32_t res = 0;
197    switch (inst->alu[alu_idx].op) {
198    case MME_TU104_ALU_OP_ADD:
199       res = x + y;
200       sim->alu_carry = res < x;
201       break;
202    case MME_TU104_ALU_OP_ADDC:
203       assert(alu_idx == 1);
204       assert(inst->alu[0].op == MME_TU104_ALU_OP_ADD);
205       res = x + y + sim->alu_carry;
206       break;
207    case MME_TU104_ALU_OP_SUB:
208       res = x - y;
209       sim->alu_carry = res > x;
210       break;
211    case MME_TU104_ALU_OP_SUBB:
212       assert(alu_idx == 1);
213       assert(inst->alu[0].op == MME_TU104_ALU_OP_SUB);
214       res = x - y - sim->alu_carry;
215       break;
216    case MME_TU104_ALU_OP_MUL: {
217       /* Sign extend but use uint64_t for the multiply so that we avoid
218        * undefined behavior from possible signed multiply roll-over.
219        */
220       const uint64_t x_u64 = (int64_t)(int32_t)x;
221       const uint64_t y_u64 = (int64_t)(int32_t)y;
222       const uint64_t xy_u64 = x_u64 * y_u64;
223       res = xy_u64;
224       sim->alu_carry = xy_u64 >> 32;
225       break;
226    }
227    case MME_TU104_ALU_OP_MULH:
228       assert(inst->alu[alu_idx].src[0] == MME_TU104_REG_ZERO);
229       assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
230       res = sim->alu_carry;
231       break;
232    case MME_TU104_ALU_OP_MULU: {
233       const uint64_t x_u64 = x;
234       const uint64_t y_u64 = y;
235       const uint64_t xy_u64 = x_u64 * y_u64;
236       res = xy_u64;
237       sim->alu_carry = xy_u64 >> 32;
238       break;
239    }
240    case MME_TU104_ALU_OP_EXTENDED:
241       eval_extended(sim, x, y);
242       break;
243    case MME_TU104_ALU_OP_CLZ:
244       res = __builtin_clz(x);
245       break;
246    case MME_TU104_ALU_OP_SLL:
247       res = x << (y & 31);
248       break;
249    case MME_TU104_ALU_OP_SRL:
250       res = x >> (y & 31);
251       break;
252    case MME_TU104_ALU_OP_SRA:
253       res = (int32_t)x >> (y & 31);
254       break;
255    case MME_TU104_ALU_OP_AND:
256       res = x & y;
257       break;
258    case MME_TU104_ALU_OP_NAND:
259       res = ~(x & y);
260       break;
261    case MME_TU104_ALU_OP_OR:
262       res = x | y;
263       break;
264    case MME_TU104_ALU_OP_XOR:
265       res = x ^ y;
266       break;
267    case MME_TU104_ALU_OP_MERGE: {
268       uint16_t immed = inst->imm[alu_idx];
269       uint32_t dst_pos  = (immed >> 10) & 0x3f;
270       uint32_t bits     = (immed >> 5)  & 0x1f;
271       uint32_t src_pos  = (immed >> 0)  & 0x1f;
272       res = (x & ~(BITFIELD_MASK(bits) << dst_pos)) |
273             (((y >> src_pos) & BITFIELD_MASK(bits)) << dst_pos);
274       break;
275    }
276    case MME_TU104_ALU_OP_SLT:
277    case MME_TU104_ALU_OP_SLTU:
278    case MME_TU104_ALU_OP_SLE:
279    case MME_TU104_ALU_OP_SLEU:
280    case MME_TU104_ALU_OP_SEQ:
281       res = eval_cond(inst->alu[alu_idx].op, x, y) ? ~0u : 0u;
282       break;
283    case MME_TU104_ALU_OP_STATE:
284       res = load_state(sim, (uint16_t)(x + y) * 4);
285       break;
286    case MME_TU104_ALU_OP_LOOP:
287       assert(sim->loop_count == 0);
288       assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
289       assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
290       sim->loop_count = MAX2(1, x) - 1;
291       sim->loop_start = sim->ip;
292       sim->loop_end = sim->ip + inst->imm[alu_idx] - 1;
293       assert(sim->loop_end > sim->ip);
294       break;
295    case MME_TU104_ALU_OP_JAL: {
296       assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
297       assert(inst->alu[alu_idx].src[0] == MME_TU104_REG_ZERO);
298       assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
299       /* No idea what bit 15 does.  The NVIDIA blob always sets it. */
300       assert(inst->imm[alu_idx] & BITFIELD_BIT(15));
301       uint16_t offset = (inst->imm[alu_idx] & BITFIELD_MASK(15));
302       sim->next_ip = sim->ip + offset;
303       res = 0;
304       break;
305    }
306    case MME_TU104_ALU_OP_BLT:
307    case MME_TU104_ALU_OP_BLTU:
308    case MME_TU104_ALU_OP_BLE:
309    case MME_TU104_ALU_OP_BLEU:
310    case MME_TU104_ALU_OP_BEQ: {
311       assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
312       bool expect = (inst->imm[alu_idx] & BITFIELD_BIT(15)) != 0;
313       if (eval_cond(inst->alu[alu_idx].op, x, y) == expect) {
314          int16_t offset = util_mask_sign_extend(inst->imm[alu_idx], 13);
315          if ((uint16_t)offset == 0xf000) {
316             sim->stop = true;
317             break;
318          }
319 
320          assert((int)sim->ip + offset >= 0);
321          assert((int)sim->ip + offset < 0x1000);
322          sim->next_ip = sim->ip + offset;
323       }
324       break;
325    }
326    case MME_TU104_ALU_OP_DREAD: {
327       assert(inst->alu[alu_idx].src[1] == MME_TU104_REG_ZERO);
328       uint32_t *dram = sim->state_ops->map_dram(sim->state_handler, x);
329       res = *dram;
330       break;
331    }
332    case MME_TU104_ALU_OP_DWRITE: {
333       assert(inst->alu[alu_idx].dst == MME_TU104_REG_ZERO);
334       uint32_t *dram = sim->state_ops->map_dram(sim->state_handler, x);
335       *dram = y;
336       break;
337    }
338    default:
339       unreachable("Unhandled ALU op");
340    }
341 
342    sim->alu_res[alu_idx] = res;
343    store_reg(sim, inst->alu[alu_idx].dst, res);
344 }
345 
346 static uint32_t
load_out(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,enum mme_tu104_out_op op)347 load_out(struct mme_tu104_sim *sim,
348          const struct mme_tu104_inst *inst,
349          enum mme_tu104_out_op op)
350 {
351    switch (op) {
352    case MME_TU104_OUT_OP_ALU0:
353       return sim->alu_res[0];
354    case MME_TU104_OUT_OP_ALU1:
355       return sim->alu_res[1];
356    case MME_TU104_OUT_OP_LOAD0:
357       return sim->load[0];
358    case MME_TU104_OUT_OP_LOAD1:
359       return sim->load[1];
360    case MME_TU104_OUT_OP_IMM0:
361       return inst->imm[0];
362    case MME_TU104_OUT_OP_IMM1:
363       return inst->imm[1];
364    case MME_TU104_OUT_OP_IMMHIGH0:
365       return inst->imm[0] >> 12;
366    case MME_TU104_OUT_OP_IMMHIGH1:
367       return inst->imm[1] >> 12;
368    case MME_TU104_OUT_OP_IMM32:
369       return ((uint32_t)inst->imm[0] << 16) | inst->imm[1];
370    default:
371       unreachable("Unhandled output op");
372    }
373 }
374 
375 static void
eval_out(struct mme_tu104_sim * sim,const struct mme_tu104_inst * inst,uint32_t out_idx)376 eval_out(struct mme_tu104_sim *sim,
377          const struct mme_tu104_inst *inst,
378          uint32_t out_idx)
379 {
380    if (inst->out[out_idx].mthd != MME_TU104_OUT_OP_NONE) {
381       uint32_t data = load_out(sim, inst, inst->out[out_idx].mthd);
382 
383       sim->mthd.mthd = (data & 0xfff) << 2;
384       sim->mthd.inc = (data >> 12) & 0xf;
385    }
386 
387    if (inst->out[out_idx].emit != MME_TU104_OUT_OP_NONE) {
388       uint32_t data = load_out(sim, inst, inst->out[out_idx].emit);
389 
390       sim->state_ops->mthd(sim->state_handler,
391                            sim->mthd.mthd, data);
392       sim->mthd.mthd += sim->mthd.inc * 4;
393    }
394 }
395 
396 void
mme_tu104_sim_core(uint32_t inst_count,const struct mme_tu104_inst * insts,const struct mme_sim_state_ops * state_ops,void * state_handler)397 mme_tu104_sim_core(uint32_t inst_count, const struct mme_tu104_inst *insts,
398                    const struct mme_sim_state_ops *state_ops,
399                    void *state_handler)
400 {
401    struct mme_tu104_sim sim = {
402       .state_ops = state_ops,
403       .state_handler = state_handler,
404    };
405 
406    bool end_next = false;
407    while (true) {
408       assert(sim.ip < inst_count);
409       const struct mme_tu104_inst *inst = &insts[sim.ip];
410       sim.next_ip = sim.ip + 1;
411 
412       load_params(&sim, inst);
413 
414       uint8_t pred = load_pred(&sim, inst);
415 
416       /* No idea why the HW has this rule but it does */
417       assert(inst->alu[0].op != MME_TU104_ALU_OP_STATE ||
418              inst->alu[1].op != MME_TU104_ALU_OP_STATE);
419 
420       if (pred & BITFIELD_BIT(0))
421          eval_alu(&sim, inst, 0);
422       if (pred & BITFIELD_BIT(1))
423          eval_alu(&sim, inst, 1);
424       if (pred & BITFIELD_BIT(2))
425          eval_out(&sim, inst, 0);
426       if (pred & BITFIELD_BIT(3))
427          eval_out(&sim, inst, 1);
428 
429       if (end_next || sim.stop)
430          break;
431 
432       end_next = inst->end_next;
433 
434       if (sim.loop_count > 0 && sim.ip == sim.loop_end) {
435          sim.loop_count--;
436          sim.next_ip = sim.loop_start + 1;
437       }
438 
439       sim.ip = sim.next_ip;
440    }
441 }
442 
443 struct mme_tu104_state_sim {
444    uint32_t param_count;
445    const uint32_t *params;
446 
447    /* Bound memory ranges */
448    uint32_t mem_count;
449    struct mme_tu104_sim_mem *mems;
450 
451    /* SET_MME_MEM_ADDRESS_A/B */
452    uint64_t mem_addr_lo;
453    uint64_t mem_addr_hi;
454 
455    /* RAM, accessed by DREAD/DWRITE */
456    struct {
457       uint32_t data[MME_TU104_DRAM_COUNT];
458 
459       /* SET_MME_MEM_RAM_ADDRESS */
460       uint32_t addr;
461    } ram;
462 
463    struct {
464       struct {
465          uint32_t data[1024];
466          uint32_t count;
467       } read_fifo;
468    } dma;
469 
470    /* NVC597_SET_MME_SHADOW_SCRATCH(i) */
471    uint32_t scratch[MME_TU104_SCRATCH_COUNT];
472 
473    struct {
474       uint32_t addr_hi;
475       uint32_t addr_lo;
476       uint32_t data;
477    } report_sem;
478 };
479 
480 static uint32_t *
find_mem(struct mme_tu104_state_sim * sim,uint64_t addr,const char * op_desc)481 find_mem(struct mme_tu104_state_sim *sim, uint64_t addr, const char *op_desc)
482 {
483    for (uint32_t i = 0; i < sim->mem_count; i++) {
484       if (addr < sim->mems[i].addr)
485          continue;
486 
487       uint64_t offset = addr - sim->mems[i].addr;
488       if (offset >= sim->mems[i].size)
489          continue;
490 
491       assert(sim->mems[i].data != NULL);
492       return (uint32_t *)((char *)sim->mems[i].data + offset);
493    }
494 
495    fprintf(stderr, "FAULT in %s at address 0x%"PRIx64"\n", op_desc, addr);
496    abort();
497 }
498 
499 static uint32_t
mme_tu104_state_sim_load(void * _sim)500 mme_tu104_state_sim_load(void *_sim)
501 {
502    struct mme_tu104_state_sim *sim = _sim;
503 
504    assert(sim->param_count > 0);
505    uint32_t data = *sim->params;
506    sim->params++;
507    sim->param_count--;
508 
509    return data;
510 }
511 
512 static uint32_t
mme_tu104_state_sim_state(void * _sim,uint16_t addr)513 mme_tu104_state_sim_state(void *_sim, uint16_t addr)
514 {
515    struct mme_tu104_state_sim *sim = _sim;
516    assert(addr % 4 == 0);
517 
518    if (NVC597_SET_MME_SHADOW_SCRATCH(0) <= addr &&
519        addr < NVC597_CALL_MME_MACRO(0)) {
520       uint32_t i = (addr - NVC597_SET_MME_SHADOW_SCRATCH(0)) / 4;
521       assert(i <= ARRAY_SIZE(sim->scratch));
522       return sim->scratch[i];
523    }
524 
525    return 0;
526 }
527 
528 static void
mme_tu104_state_sim_mthd(void * _sim,uint16_t addr,uint32_t data)529 mme_tu104_state_sim_mthd(void *_sim, uint16_t addr, uint32_t data)
530 {
531    struct mme_tu104_state_sim *sim = _sim;
532    assert(addr % 4 == 0);
533 
534    switch (addr) {
535    case NVC597_SET_REPORT_SEMAPHORE_A:
536       sim->report_sem.addr_hi = data;
537       break;
538    case NVC597_SET_REPORT_SEMAPHORE_B:
539       sim->report_sem.addr_lo = data;
540       break;
541    case NVC597_SET_REPORT_SEMAPHORE_C:
542       sim->report_sem.data = data;
543       break;
544    case NVC597_SET_REPORT_SEMAPHORE_D: {
545       assert(data == 0x10000000);
546       uint64_t sem_report_addr =
547          ((uint64_t)sim->report_sem.addr_hi << 32) | sim->report_sem.addr_lo;
548       uint32_t *mem = find_mem(sim, sem_report_addr, "SET_REPORT_SEMAPHORE");
549       *mem = sim->report_sem.data;
550       break;
551    }
552    case NVC597_SET_MME_DATA_RAM_ADDRESS:
553       sim->ram.addr = data;
554       break;
555    case NVC597_SET_MME_MEM_ADDRESS_A:
556       sim->mem_addr_hi = data;
557       break;
558    case NVC597_SET_MME_MEM_ADDRESS_B:
559       sim->mem_addr_lo = data;
560       break;
561    case NVC597_MME_DMA_READ_FIFOED:
562       sim->dma.read_fifo.count = data;
563       break;
564    default:
565       if (NVC597_SET_MME_SHADOW_SCRATCH(0) <= addr &&
566           addr < NVC597_CALL_MME_MACRO(0)) {
567          uint32_t i = (addr - NVC597_SET_MME_SHADOW_SCRATCH(0)) / 4;
568          assert(i <= ARRAY_SIZE(sim->scratch));
569          sim->scratch[i] = data;
570       } else {
571          fprintf(stdout, "%s:\n", P_PARSE_NVC597_MTHD(addr));
572          P_DUMP_NVC597_MTHD_DATA(stdout, addr, data, "    ");
573       }
574       break;
575    }
576 }
577 
578 static void
mme_tu104_state_sim_barrier(void * _sim)579 mme_tu104_state_sim_barrier(void *_sim)
580 {
581    struct mme_tu104_state_sim *sim = _sim;
582 
583    if (sim->dma.read_fifo.count == 0)
584       return;
585 
586    const uint64_t mem_addr =
587       ((uint64_t)sim->mem_addr_hi << 32) | sim->mem_addr_lo;
588 
589    for (uint32_t i = 0; i < sim->dma.read_fifo.count; i++) {
590       uint32_t *src = find_mem(sim, mem_addr + i * 4,
591                                "MME_DMA_READ_FIFOED");
592       assert(src != NULL);
593       sim->dma.read_fifo.data[i] = *src;
594    }
595 
596    sim->param_count = sim->dma.read_fifo.count;
597    sim->params = sim->dma.read_fifo.data;
598 }
599 
600 static uint32_t *
mme_tu104_state_sim_map_dram(void * _sim,uint32_t idx)601 mme_tu104_state_sim_map_dram(void *_sim, uint32_t idx)
602 {
603    struct mme_tu104_state_sim *sim = _sim;
604 
605    assert(idx < ARRAY_SIZE(sim->ram.data));
606    return &sim->ram.data[idx];
607 }
608 
609 static const struct mme_sim_state_ops mme_tu104_state_sim_ops = {
610    .load = mme_tu104_state_sim_load,
611    .state = mme_tu104_state_sim_state,
612    .mthd = mme_tu104_state_sim_mthd,
613    .barrier = mme_tu104_state_sim_barrier,
614    .map_dram = mme_tu104_state_sim_map_dram,
615 };
616 
617 void
mme_tu104_sim(uint32_t inst_count,const struct mme_tu104_inst * insts,uint32_t param_count,const uint32_t * params,uint32_t mem_count,struct mme_tu104_sim_mem * mems)618 mme_tu104_sim(uint32_t inst_count, const struct mme_tu104_inst *insts,
619               uint32_t param_count, const uint32_t *params,
620               uint32_t mem_count, struct mme_tu104_sim_mem *mems)
621 {
622    struct mme_tu104_state_sim state_sim = {
623       .param_count = param_count,
624       .params = params,
625       .mem_count = mem_count,
626       .mems = mems,
627    };
628 
629    mme_tu104_sim_core(inst_count, insts, &mme_tu104_state_sim_ops, &state_sim);
630 }
631