1 /*
2 * Copyright © 2021 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mman.h>
31 #include <unistd.h>
32
33 #include "util/u_math.h"
34
35 #include "freedreno_pm4.h"
36
37 #include "emu.h"
38 #include "util.h"
39
40 #define rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
41 #define rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
42
43 /**
44 * AFUC emulator. Currently only supports a6xx
45 *
46 * TODO to add a5xx it might be easier to compile this multiple times
47 * with conditional compile to deal with differences between generations.
48 */
49
50 static uint32_t
emu_alu(struct emu * emu,afuc_opc opc,uint32_t src1,uint32_t src2)51 emu_alu(struct emu *emu, afuc_opc opc, uint32_t src1, uint32_t src2)
52 {
53 uint64_t tmp;
54 switch (opc) {
55 case OPC_ADD:
56 tmp = (uint64_t)src1 + (uint64_t)src2;
57 emu->carry = tmp >> 32;
58 return (uint32_t)tmp;
59 case OPC_ADDHI:
60 return src1 + src2 + emu->carry;
61 case OPC_SUB:
62 tmp = (uint64_t)src1 - (uint64_t)src2;
63 emu->carry = tmp >> 32;
64 return (uint32_t)tmp;
65 case OPC_SUBHI:
66 return src1 - src2 + emu->carry;
67 case OPC_AND:
68 return src1 & src2;
69 case OPC_OR:
70 return src1 | src2;
71 case OPC_XOR:
72 return src1 ^ src2;
73 case OPC_NOT:
74 return ~src1;
75 case OPC_SHL:
76 return src1 << src2;
77 case OPC_USHR:
78 return src1 >> src2;
79 case OPC_ISHR:
80 return (int32_t)src1 >> src2;
81 case OPC_ROT:
82 if (src2 & 0x80000000)
83 return rotl64(src1, -*(int32_t *)&src2);
84 else
85 return rotl32(src1, src2);
86 case OPC_MUL8:
87 return (src1 & 0xff) * (src2 & 0xff);
88 case OPC_MIN:
89 return MIN2(src1, src2);
90 case OPC_MAX:
91 return MAX2(src1, src2);
92 case OPC_CMP:
93 if (src1 > src2)
94 return 0x00;
95 else if (src1 == src2)
96 return 0x2b;
97 return 0x1e;
98 case OPC_MSB:
99 if (!src2)
100 return 0;
101 return util_last_bit(src2) - 1;
102 default:
103 printf("unhandled alu opc: 0x%02x\n", opc);
104 exit(1);
105 }
106 }
107
108 /**
109 * Helper to calculate load/store address based on LOAD_STORE_HI
110 */
111 static uintptr_t
load_store_addr(struct emu * emu,unsigned gpr)112 load_store_addr(struct emu *emu, unsigned gpr)
113 {
114 EMU_CONTROL_REG(LOAD_STORE_HI);
115
116 uintptr_t addr = emu_get_reg32(emu, &LOAD_STORE_HI);
117 addr <<= 32;
118
119 return addr + emu_get_gpr_reg(emu, gpr);
120 }
121
122 static void
emu_instr(struct emu * emu,afuc_instr * instr)123 emu_instr(struct emu *emu, afuc_instr *instr)
124 {
125 uint32_t rem = emu_get_gpr_reg(emu, REG_REM);
126 afuc_opc opc;
127 bool rep;
128
129 afuc_get_opc(instr, &opc, &rep);
130
131 switch (opc) {
132 case OPC_NOP:
133 break;
134 case OPC_ADD ... OPC_CMP: {
135 uint32_t val = emu_alu(emu, opc,
136 emu_get_gpr_reg(emu, instr->alui.src),
137 instr->alui.uimm);
138 emu_set_gpr_reg(emu, instr->alui.dst, val);
139 break;
140 }
141 case OPC_MOVI: {
142 uint32_t val = instr->movi.uimm << instr->movi.shift;
143 emu_set_gpr_reg(emu, instr->movi.dst, val);
144 break;
145 }
146 case OPC_ALU: {
147 uint32_t val = emu_alu(emu, instr->alu.alu,
148 emu_get_gpr_reg(emu, instr->alu.src1),
149 emu_get_gpr_reg(emu, instr->alu.src2));
150 emu_set_gpr_reg(emu, instr->alu.dst, val);
151
152 if (instr->alu.xmov) {
153 unsigned m = MIN2(instr->alu.xmov, rem);
154
155 assert(m <= 3);
156
157 if (m == 1) {
158 emu_set_gpr_reg(emu, REG_REM, --rem);
159 emu_dump_state_change(emu);
160 emu_set_gpr_reg(emu, REG_DATA,
161 emu_get_gpr_reg(emu, instr->alu.src2));
162 } else if (m == 2) {
163 emu_set_gpr_reg(emu, REG_REM, --rem);
164 emu_dump_state_change(emu);
165 emu_set_gpr_reg(emu, REG_DATA,
166 emu_get_gpr_reg(emu, instr->alu.src2));
167 emu_set_gpr_reg(emu, REG_REM, --rem);
168 emu_dump_state_change(emu);
169 emu_set_gpr_reg(emu, REG_DATA,
170 emu_get_gpr_reg(emu, instr->alu.src2));
171 } else if (m == 3) {
172 emu_set_gpr_reg(emu, REG_REM, --rem);
173 emu_dump_state_change(emu);
174 emu_set_gpr_reg(emu, REG_DATA,
175 emu_get_gpr_reg(emu, instr->alu.src2));
176 emu_set_gpr_reg(emu, REG_REM, --rem);
177 emu_dump_state_change(emu);
178 emu_set_gpr_reg(emu, instr->alu.dst,
179 emu_get_gpr_reg(emu, instr->alu.src2));
180 emu_set_gpr_reg(emu, REG_REM, --rem);
181 emu_dump_state_change(emu);
182 emu_set_gpr_reg(emu, REG_DATA,
183 emu_get_gpr_reg(emu, instr->alu.src2));
184 }
185 }
186 break;
187 }
188 case OPC_CWRITE6: {
189 uint32_t src1 = emu_get_gpr_reg(emu, instr->control.src1);
190 uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
191
192 if (instr->control.flags == 0x4) {
193 emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
194 } else if (instr->control.flags && !emu->quiet) {
195 printf("unhandled flags: %x\n", instr->control.flags);
196 }
197
198 emu_set_control_reg(emu, src2 + instr->control.uimm, src1);
199 break;
200 }
201 case OPC_CREAD6: {
202 uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
203
204 if (instr->control.flags == 0x4) {
205 emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
206 } else if (instr->control.flags && !emu->quiet) {
207 printf("unhandled flags: %x\n", instr->control.flags);
208 }
209
210 emu_set_gpr_reg(emu, instr->control.src1,
211 emu_get_control_reg(emu, src2 + instr->control.uimm));
212 break;
213 }
214 case OPC_LOAD6: {
215 uintptr_t addr = load_store_addr(emu, instr->control.src2) +
216 instr->control.uimm;
217
218 if (instr->control.flags == 0x4) {
219 uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
220 emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
221 } else if (instr->control.flags && !emu->quiet) {
222 printf("unhandled flags: %x\n", instr->control.flags);
223 }
224
225 uint32_t val = emu_mem_read_dword(emu, addr);
226
227 emu_set_gpr_reg(emu, instr->control.src1, val);
228
229 break;
230 }
231 case OPC_STORE6: {
232 uintptr_t addr = load_store_addr(emu, instr->control.src2) +
233 instr->control.uimm;
234
235 if (instr->control.flags == 0x4) {
236 uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
237 emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
238 } else if (instr->control.flags && !emu->quiet) {
239 printf("unhandled flags: %x\n", instr->control.flags);
240 }
241
242 uint32_t val = emu_get_gpr_reg(emu, instr->control.src1);
243
244 emu_mem_write_dword(emu, addr, val);
245
246 break;
247 }
248 case OPC_BRNEI ... OPC_BREQB: {
249 uint32_t off = emu->gpr_regs.pc + instr->br.ioff;
250 uint32_t src = emu_get_gpr_reg(emu, instr->br.src);
251
252 if (opc == OPC_BRNEI) {
253 if (src != instr->br.bit_or_imm)
254 emu->branch_target = off;
255 } else if (opc == OPC_BREQI) {
256 if (src == instr->br.bit_or_imm)
257 emu->branch_target = off;
258 } else if (opc == OPC_BRNEB) {
259 if (!(src & (1 << instr->br.bit_or_imm)))
260 emu->branch_target = off;
261 } else if (opc == OPC_BREQB) {
262 if (src & (1 << instr->br.bit_or_imm))
263 emu->branch_target = off;
264 } else {
265 assert(0);
266 }
267 break;
268 }
269 case OPC_RET: {
270 assert(emu->call_stack_idx > 0);
271
272 /* counter-part to 'call' instruction, also has a delay slot: */
273 emu->branch_target = emu->call_stack[--emu->call_stack_idx];
274
275 break;
276 }
277 case OPC_CALL: {
278 assert(emu->call_stack_idx < ARRAY_SIZE(emu->call_stack));
279
280 /* call looks to have same delay-slot behavior as branch/etc, so
281 * presumably the return PC is two instructions later:
282 */
283 emu->call_stack[emu->call_stack_idx++] = emu->gpr_regs.pc + 2;
284 emu->branch_target = instr->call.uoff;
285
286 break;
287 }
288 case OPC_WIN: {
289 assert(!emu->branch_target);
290 emu->run_mode = false;
291 emu->waitin = true;
292 break;
293 }
294 /* OPC_PREEMPTLEAVE6 */
295 case OPC_SETSECURE: {
296 // TODO this acts like a conditional branch, but in which case
297 // does it branch?
298 break;
299 }
300 default:
301 printf("unhandled opc: 0x%02x\n", opc);
302 exit(1);
303 }
304
305 if (rep) {
306 assert(rem > 0);
307 emu_set_gpr_reg(emu, REG_REM, --rem);
308 }
309 }
310
311 void
emu_step(struct emu * emu)312 emu_step(struct emu *emu)
313 {
314 afuc_instr *instr = (void *)&emu->instrs[emu->gpr_regs.pc];
315 afuc_opc opc;
316 bool rep;
317
318 emu_main_prompt(emu);
319
320 uint32_t branch_target = emu->branch_target;
321 emu->branch_target = 0;
322
323 bool waitin = emu->waitin;
324 emu->waitin = false;
325
326 afuc_get_opc(instr, &opc, &rep);
327
328 if (rep) {
329 do {
330 if (!emu_get_gpr_reg(emu, REG_REM))
331 break;
332
333 emu_clear_state_change(emu);
334 emu_instr(emu, instr);
335
336 /* defer last state-change dump until after any
337 * post-delay-slot handling below:
338 */
339 if (emu_get_gpr_reg(emu, REG_REM))
340 emu_dump_state_change(emu);
341 } while (true);
342 } else {
343 emu_clear_state_change(emu);
344 emu_instr(emu, instr);
345 }
346
347 emu->gpr_regs.pc++;
348
349 if (branch_target) {
350 emu->gpr_regs.pc = branch_target;
351 }
352
353 if (waitin) {
354 uint32_t hdr = emu_get_gpr_reg(emu, 1);
355 uint32_t id, count;
356
357 if (pkt_is_type4(hdr)) {
358 id = afuc_pm4_id("PKT4");
359 count = type4_pkt_size(hdr);
360
361 /* Possibly a hack, not sure what the hw actually
362 * does here, but we want to mask out the pkt
363 * type field from the hdr, so that PKT4 handler
364 * doesn't see it and interpret it as part as the
365 * register offset:
366 */
367 emu->gpr_regs.val[1] &= 0x0fffffff;
368 } else if (pkt_is_type7(hdr)) {
369 id = cp_type7_opcode(hdr);
370 count = type7_pkt_size(hdr);
371 } else {
372 printf("Invalid opcode: 0x%08x\n", hdr);
373 exit(1); /* GPU goes *boom* */
374 }
375
376 assert(id < ARRAY_SIZE(emu->jmptbl));
377
378 emu_set_gpr_reg(emu, REG_REM, count);
379 emu->gpr_regs.pc = emu->jmptbl[id];
380 }
381
382 emu_dump_state_change(emu);
383 }
384
385 void
emu_run_bootstrap(struct emu * emu)386 emu_run_bootstrap(struct emu *emu)
387 {
388 EMU_CONTROL_REG(PACKET_TABLE_WRITE_ADDR);
389
390 emu->quiet = true;
391 emu->run_mode = true;
392
393 while (emu_get_reg32(emu, &PACKET_TABLE_WRITE_ADDR) < 0x80) {
394 emu_step(emu);
395 }
396 }
397
398
399 static void
check_access(struct emu * emu,uintptr_t gpuaddr,unsigned sz)400 check_access(struct emu *emu, uintptr_t gpuaddr, unsigned sz)
401 {
402 if ((gpuaddr % sz) != 0) {
403 printf("unaligned access fault: %p\n", (void *)gpuaddr);
404 exit(1);
405 }
406
407 if ((gpuaddr + sz) >= EMU_MEMORY_SIZE) {
408 printf("iova fault: %p\n", (void *)gpuaddr);
409 exit(1);
410 }
411 }
412
413 uint32_t
emu_mem_read_dword(struct emu * emu,uintptr_t gpuaddr)414 emu_mem_read_dword(struct emu *emu, uintptr_t gpuaddr)
415 {
416 check_access(emu, gpuaddr, 4);
417 return *(uint32_t *)(emu->gpumem + gpuaddr);
418 }
419
420 static void
mem_write_dword(struct emu * emu,uintptr_t gpuaddr,uint32_t val)421 mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val)
422 {
423 check_access(emu, gpuaddr, 4);
424 *(uint32_t *)(emu->gpumem + gpuaddr) = val;
425 }
426
427 void
emu_mem_write_dword(struct emu * emu,uintptr_t gpuaddr,uint32_t val)428 emu_mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val)
429 {
430 mem_write_dword(emu, gpuaddr, val);
431 assert(emu->gpumem_written == ~0);
432 emu->gpumem_written = gpuaddr;
433 }
434
435 void
emu_init(struct emu * emu)436 emu_init(struct emu *emu)
437 {
438 emu->gpumem = mmap(NULL, EMU_MEMORY_SIZE,
439 PROT_READ | PROT_WRITE,
440 MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE,
441 0, 0);
442 if (emu->gpumem == MAP_FAILED) {
443 printf("Could not allocate GPU memory: %s\n", strerror(errno));
444 exit(1);
445 }
446
447 /* Copy the instructions into GPU memory: */
448 for (unsigned i = 0; i < emu->sizedwords; i++) {
449 mem_write_dword(emu, EMU_INSTR_BASE + (4 * i), emu->instrs[i]);
450 }
451
452 EMU_GPU_REG(CP_SQE_INSTR_BASE);
453 EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE);
454
455 /* Setup the address of the SQE fw, just use the normal CPU ptr address: */
456 if (emu->lpac) {
457 emu_set_reg64(emu, &CP_LPAC_SQE_INSTR_BASE, EMU_INSTR_BASE);
458 } else {
459 emu_set_reg64(emu, &CP_SQE_INSTR_BASE, EMU_INSTR_BASE);
460 }
461
462 if (emu->gpu_id == 660) {
463 emu_set_control_reg(emu, 0, 3 << 28);
464 } else if (emu->gpu_id == 650) {
465 emu_set_control_reg(emu, 0, 1 << 28);
466 }
467 }
468
469 void
emu_fini(struct emu * emu)470 emu_fini(struct emu *emu)
471 {
472 uint32_t *instrs = emu->instrs;
473 unsigned sizedwords = emu->sizedwords;
474 unsigned gpu_id = emu->gpu_id;
475
476 munmap(emu->gpumem, EMU_MEMORY_SIZE);
477 memset(emu, 0, sizeof(*emu));
478
479 emu->instrs = instrs;
480 emu->sizedwords = sizedwords;
481 emu->gpu_id = gpu_id;
482 }
483