• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/mman.h>
31 #include <unistd.h>
32 
33 #include "util/u_math.h"
34 
35 #include "freedreno_pm4.h"
36 
37 #include "isaspec.h"
38 
39 #include "emu.h"
40 #include "util.h"
41 
42 #define rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
43 #define rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
44 
45 EMU_SQE_REG(SP);
46 EMU_SQE_REG(STACK0);
47 EMU_CONTROL_REG(DRAW_STATE_SET_HDR);
48 
49 /**
50  * AFUC emulator.  Currently only supports a6xx
51  *
52  * TODO to add a5xx it might be easier to compile this multiple times
53  * with conditional compile to deal with differences between generations.
54  */
55 
56 static uint32_t
emu_alu(struct emu * emu,afuc_opc opc,uint32_t src1,uint32_t src2)57 emu_alu(struct emu *emu, afuc_opc opc, uint32_t src1, uint32_t src2)
58 {
59    uint64_t tmp;
60    switch (opc) {
61    case OPC_ADD:
62       tmp = (uint64_t)src1 + (uint64_t)src2;
63       emu->carry = tmp >> 32;
64       return (uint32_t)tmp;
65    case OPC_ADDHI:
66       return src1 + src2 + emu->carry;
67    case OPC_SUB:
68       tmp = (uint64_t)src1 - (uint64_t)src2;
69       emu->carry = tmp >> 32;
70       return (uint32_t)tmp;
71    case OPC_SUBHI:
72       return src1 - src2 + emu->carry;
73    case OPC_AND:
74       return src1 & src2;
75    case OPC_OR:
76       return src1 | src2;
77    case OPC_XOR:
78       return src1 ^ src2;
79    case OPC_NOT:
80       return ~src1;
81    case OPC_SHL:
82       return src1 << src2;
83    case OPC_USHR:
84       return src1 >> src2;
85    case OPC_ISHR:
86       return (int32_t)src1 >> src2;
87    case OPC_ROT:
88       if (src2 & 0x80000000)
89          return rotl64(src1, -*(int32_t *)&src2);
90       else
91          return rotl32(src1, src2);
92    case OPC_MUL8:
93       return (src1 & 0xff) * (src2 & 0xff);
94    case OPC_MIN:
95       return MIN2(src1, src2);
96    case OPC_MAX:
97       return MAX2(src1, src2);
98    case OPC_CMP:
99       if (src1 > src2)
100          return 0x00;
101       else if (src1 == src2)
102          return 0x2b;
103       return 0x1e;
104    case OPC_BIC:
105       return src1 & ~src2;
106    case OPC_MSB:
107       if (!src2)
108          return 0;
109       return util_last_bit(src2) - 1;
110    case OPC_SETBIT: {
111       unsigned bit = src2 >> 1;
112       unsigned val = src2 & 1;
113       return (src1 & ~(1u << bit)) | (val << bit);
114    }
115    default:
116       printf("unhandled alu opc: 0x%02x\n", opc);
117       exit(1);
118    }
119 }
120 
121 /**
122  * Helper to calculate load/store address based on LOAD_STORE_HI
123  */
124 static uintptr_t
load_store_addr(struct emu * emu,unsigned gpr)125 load_store_addr(struct emu *emu, unsigned gpr)
126 {
127    EMU_CONTROL_REG(LOAD_STORE_HI);
128 
129    uintptr_t addr = emu_get_reg32(emu, &LOAD_STORE_HI);
130    addr <<= 32;
131 
132    return addr + emu_get_gpr_reg(emu, gpr);
133 }
134 
135 static void
emu_instr(struct emu * emu,struct afuc_instr * instr)136 emu_instr(struct emu *emu, struct afuc_instr *instr)
137 {
138    uint32_t rem = emu_get_gpr_reg(emu, REG_REM);
139 
140    switch (instr->opc) {
141    case OPC_NOP:
142       break;
143    case OPC_MSB:
144    case OPC_ADD ... OPC_BIC: {
145       uint32_t val = emu_alu(emu, instr->opc,
146                              emu_get_gpr_reg(emu, instr->src1),
147                              instr->has_immed ? instr->immed :
148                              emu_get_gpr_reg(emu, instr->src2));
149       emu_set_gpr_reg(emu, instr->dst, val);
150 
151       if (instr->xmov) {
152          unsigned m = MIN2(instr->xmov, rem);
153 
154          assert(m <= 3);
155 
156          if (m == 1) {
157             emu_set_gpr_reg(emu, REG_REM, --rem);
158             emu_dump_state_change(emu);
159             emu_set_gpr_reg(emu, REG_DATA,
160                             emu_get_gpr_reg(emu, instr->src2));
161          } else if (m == 2) {
162             emu_set_gpr_reg(emu, REG_REM, --rem);
163             emu_dump_state_change(emu);
164             emu_set_gpr_reg(emu, REG_DATA,
165                             emu_get_gpr_reg(emu, instr->src2));
166             emu_set_gpr_reg(emu, REG_REM, --rem);
167             emu_dump_state_change(emu);
168             emu_set_gpr_reg(emu, REG_DATA,
169                             emu_get_gpr_reg(emu, instr->src2));
170          } else if (m == 3) {
171             emu_set_gpr_reg(emu, REG_REM, --rem);
172             emu_dump_state_change(emu);
173             emu_set_gpr_reg(emu, REG_DATA,
174                             emu_get_gpr_reg(emu, instr->src2));
175             emu_set_gpr_reg(emu, REG_REM, --rem);
176             emu_dump_state_change(emu);
177             emu_set_gpr_reg(emu, instr->dst,
178                             emu_get_gpr_reg(emu, instr->src2));
179             emu_set_gpr_reg(emu, REG_REM, --rem);
180             emu_dump_state_change(emu);
181             emu_set_gpr_reg(emu, REG_DATA,
182                             emu_get_gpr_reg(emu, instr->src2));
183          }
184       }
185       break;
186    }
187    case OPC_MOVI: {
188       uint32_t val = instr->immed << instr->shift;
189       emu_set_gpr_reg(emu, instr->dst, val);
190       break;
191    }
192    case OPC_SETBITI: {
193       uint32_t src = emu_get_gpr_reg(emu, instr->src1);
194       emu_set_gpr_reg(emu, instr->dst, src | (1u << instr->bit));
195       break;
196    }
197    case OPC_CLRBIT: {
198       uint32_t src = emu_get_gpr_reg(emu, instr->src1);
199       emu_set_gpr_reg(emu, instr->dst, src & ~(1u << instr->bit));
200       break;
201    }
202    case OPC_UBFX: {
203       uint32_t src = emu_get_gpr_reg(emu, instr->src1);
204       unsigned lo = instr->bit, hi = instr->immed;
205       uint32_t dst = (src >> lo) & BITFIELD_MASK(hi - lo + 1);
206       emu_set_gpr_reg(emu, instr->dst, dst);
207       break;
208    }
209    case OPC_BFI: {
210       uint32_t src = emu_get_gpr_reg(emu, instr->src1);
211       unsigned lo = instr->bit, hi = instr->immed;
212       src = (src & BITFIELD_MASK(hi - lo + 1)) << lo;
213       emu_set_gpr_reg(emu, instr->dst, emu_get_gpr_reg(emu, instr->dst) | src);
214       break;
215    }
216    case OPC_CWRITE: {
217       uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
218       uint32_t src2 = emu_get_gpr_reg(emu, instr->src2);
219       uint32_t reg = src2 + instr->immed;
220 
221       if (instr->preincrement) {
222          emu_set_gpr_reg(emu, instr->src2, reg);
223       }
224 
225       emu_set_control_reg(emu, reg, src1);
226 
227       for (unsigned i = 0; i < instr->sds; i++) {
228          uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
229 
230          /* TODO: There is likely a DRAW_STATE_SET_BASE register on a6xx, as
231           * there is on a7xx, and we should be writing that instead of setting
232           * the base directly.
233           */
234          if (reg == emu_reg_offset(&DRAW_STATE_SET_HDR))
235             emu_set_draw_state_base(emu, i, src1);
236       }
237       break;
238    }
239    case OPC_CREAD: {
240       uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
241 
242       if (instr->preincrement) {
243          emu_set_gpr_reg(emu, instr->src1, src1 + instr->immed);
244       }
245 
246       emu_set_gpr_reg(emu, instr->dst,
247                       emu_get_control_reg(emu, src1 + instr->immed));
248       break;
249    }
250    case OPC_SWRITE: {
251       uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
252       uint32_t src2 = emu_get_gpr_reg(emu, instr->src2);
253 
254       if (instr->preincrement) {
255          emu_set_gpr_reg(emu, instr->src2, src2 + instr->immed);
256       }
257 
258       emu_set_sqe_reg(emu, src2 + instr->immed, src1);
259       break;
260    }
261    case OPC_SREAD: {
262       uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
263 
264       if (instr->preincrement) {
265          emu_set_gpr_reg(emu, instr->src1, src1 + instr->immed);
266       }
267 
268       emu_set_gpr_reg(emu, instr->dst,
269                       emu_get_sqe_reg(emu, src1 + instr->immed));
270       break;
271    }
272    case OPC_LOAD: {
273       uintptr_t addr = load_store_addr(emu, instr->src1) +
274             instr->immed;
275 
276       if (instr->preincrement) {
277          uint32_t src1 = emu_get_gpr_reg(emu, instr->src1);
278          emu_set_gpr_reg(emu, instr->src1, src1 + instr->immed);
279       }
280 
281       uint32_t val = emu_mem_read_dword(emu, addr);
282 
283       emu_set_gpr_reg(emu, instr->dst, val);
284 
285       break;
286    }
287    case OPC_STORE: {
288       uintptr_t addr = load_store_addr(emu, instr->src2) +
289             instr->immed;
290 
291       if (instr->preincrement) {
292          uint32_t src2 = emu_get_gpr_reg(emu, instr->src2);
293          emu_set_gpr_reg(emu, instr->src2, src2 + instr->immed);
294       }
295 
296       uint32_t val = emu_get_gpr_reg(emu, instr->src1);
297 
298       emu_mem_write_dword(emu, addr, val);
299 
300       break;
301    }
302    case OPC_BRNEI ... OPC_BREQB: {
303       uint32_t off = emu->gpr_regs.pc + instr->offset;
304       uint32_t src = emu_get_gpr_reg(emu, instr->src1);
305 
306       if (instr->opc == OPC_BRNEI) {
307          if (src != instr->immed)
308             emu->branch_target = off;
309       } else if (instr->opc == OPC_BREQI) {
310          if (src == instr->immed)
311             emu->branch_target = off;
312       } else if (instr->opc == OPC_BRNEB) {
313          if (!(src & (1 << instr->bit)))
314             emu->branch_target = off;
315       } else if (instr->opc == OPC_BREQB) {
316          if (src & (1 << instr->bit))
317             emu->branch_target = off;
318       } else {
319          assert(0);
320       }
321       break;
322    }
323    case OPC_RET: {
324       unsigned sp = emu_get_reg32(emu, &SP);
325       assert(sp > 0);
326 
327       /* counter-part to 'call' instruction, also has a delay slot: */
328       emu->branch_target = emu_get_sqe_reg(emu, emu_reg_offset(&STACK0) + sp - 1);
329       emu_set_reg32(emu, &SP, sp - 1);
330 
331       break;
332    }
333    case OPC_CALL: {
334       unsigned sp = emu_get_reg32(emu, &SP);
335       assert(sp + emu_reg_offset(&STACK0) < ARRAY_SIZE(emu->sqe_regs.val));
336 
337       /* call looks to have same delay-slot behavior as branch/etc, so
338        * presumably the return PC is two instructions later:
339        */
340       emu_set_sqe_reg(emu, emu_reg_offset(&STACK0) + sp, emu->gpr_regs.pc + 2);
341       emu_set_reg32(emu, &SP, sp + 1);
342       emu->branch_target = instr->literal;
343 
344       break;
345    }
346    case OPC_WAITIN: {
347       assert(!emu->branch_target);
348       emu->run_mode = false;
349       emu->waitin = true;
350       break;
351    }
352    /* OPC_PREEMPTLEAVE6 */
353    case OPC_SETSECURE: {
354       // TODO this acts like a conditional branch, but in which case
355       // does it branch?
356       break;
357    }
358    default:
359       printf("unhandled opc: 0x%02x\n", instr->opc);
360       exit(1);
361    }
362 
363    if (instr->rep) {
364       assert(rem > 0);
365       emu_set_gpr_reg(emu, REG_REM, --rem);
366    }
367 }
368 
369 void
emu_step(struct emu * emu)370 emu_step(struct emu *emu)
371 {
372    struct afuc_instr *instr;
373    bool decoded = isa_decode((void *)&instr,
374                              (void *)&emu->instrs[emu->gpr_regs.pc],
375                              &(struct isa_decode_options) {
376                               .gpu_id = gpuver,
377                              });
378 
379    if (!decoded) {
380       uint32_t instr_val = emu->instrs[emu->gpr_regs.pc];
381       if ((instr_val >> 27) == 0) {
382          /* This is printed as an undecoded literal to show the immediate
383           * payload, but when executing it's just a NOP.
384           */
385          instr = calloc(1, sizeof(struct afuc_instr));
386          instr->opc = OPC_NOP;
387       } else {
388          printf("unmatched instruction: 0x%08x\n", instr_val);
389          exit(1);
390       }
391    }
392 
393    emu_main_prompt(emu);
394 
395    uint32_t branch_target = emu->branch_target;
396    emu->branch_target = 0;
397 
398    bool waitin = emu->waitin;
399    emu->waitin = false;
400 
401    if (instr->rep) {
402       do {
403          if (!emu_get_gpr_reg(emu, REG_REM))
404             break;
405 
406          emu_clear_state_change(emu);
407          emu_instr(emu, instr);
408 
409          /* defer last state-change dump until after any
410           * post-delay-slot handling below:
411           */
412          if (emu_get_gpr_reg(emu, REG_REM))
413             emu_dump_state_change(emu);
414       } while (true);
415    } else {
416       emu_clear_state_change(emu);
417       emu_instr(emu, instr);
418    }
419 
420    emu->gpr_regs.pc++;
421 
422    if (branch_target) {
423       emu->gpr_regs.pc = branch_target;
424    }
425 
426    if (waitin) {
427       uint32_t hdr = emu_get_gpr_reg(emu, 1);
428       uint32_t id, count;
429 
430       if (pkt_is_type4(hdr)) {
431          id = afuc_pm4_id("PKT4");
432          count = type4_pkt_size(hdr);
433 
434          /* Possibly a hack, not sure what the hw actually
435           * does here, but we want to mask out the pkt
436           * type field from the hdr, so that PKT4 handler
437           * doesn't see it and interpret it as part as the
438           * register offset:
439           */
440          emu->gpr_regs.val[1] &= 0x0fffffff;
441       } else if (pkt_is_type7(hdr)) {
442          id = cp_type7_opcode(hdr);
443          count = type7_pkt_size(hdr);
444       } else {
445          printf("Invalid opcode: 0x%08x\n", hdr);
446          exit(1);  /* GPU goes *boom* */
447       }
448 
449       assert(id < ARRAY_SIZE(emu->jmptbl));
450 
451       emu_set_gpr_reg(emu, REG_REM, count);
452       emu->gpr_regs.pc = emu->jmptbl[id];
453    }
454 
455    emu_dump_state_change(emu);
456 
457    free(instr);
458 }
459 
460 void
emu_run_bootstrap(struct emu * emu)461 emu_run_bootstrap(struct emu *emu)
462 {
463    EMU_CONTROL_REG(PACKET_TABLE_WRITE_ADDR);
464 
465    emu->quiet = true;
466    emu->run_mode = true;
467 
468    while (emu_get_reg32(emu, &PACKET_TABLE_WRITE_ADDR) < 0x80) {
469       emu_step(emu);
470    }
471 }
472 
473 
474 static void
check_access(struct emu * emu,uintptr_t gpuaddr,unsigned sz)475 check_access(struct emu *emu, uintptr_t gpuaddr, unsigned sz)
476 {
477    if ((gpuaddr % sz) != 0) {
478       printf("unaligned access fault: %p\n", (void *)gpuaddr);
479       exit(1);
480    }
481 
482    if ((gpuaddr + sz) >= EMU_MEMORY_SIZE) {
483       printf("iova fault: %p\n", (void *)gpuaddr);
484       exit(1);
485    }
486 }
487 
488 uint32_t
emu_mem_read_dword(struct emu * emu,uintptr_t gpuaddr)489 emu_mem_read_dword(struct emu *emu, uintptr_t gpuaddr)
490 {
491    check_access(emu, gpuaddr, 4);
492    return *(uint32_t *)(emu->gpumem + gpuaddr);
493 }
494 
495 static void
mem_write_dword(struct emu * emu,uintptr_t gpuaddr,uint32_t val)496 mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val)
497 {
498    check_access(emu, gpuaddr, 4);
499    *(uint32_t *)(emu->gpumem + gpuaddr) = val;
500 }
501 
502 void
emu_mem_write_dword(struct emu * emu,uintptr_t gpuaddr,uint32_t val)503 emu_mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val)
504 {
505    mem_write_dword(emu, gpuaddr, val);
506    assert(emu->gpumem_written == ~0);
507    emu->gpumem_written = gpuaddr;
508 }
509 
510 void
emu_init(struct emu * emu)511 emu_init(struct emu *emu)
512 {
513    emu->gpumem = mmap(NULL, EMU_MEMORY_SIZE,
514                       PROT_READ | PROT_WRITE,
515                       MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE,
516                       0, 0);
517    if (emu->gpumem == MAP_FAILED) {
518       printf("Could not allocate GPU memory: %s\n", strerror(errno));
519       exit(1);
520    }
521 
522    /* Copy the instructions into GPU memory: */
523    for (unsigned i = 0; i < emu->sizedwords; i++) {
524       mem_write_dword(emu, EMU_INSTR_BASE + (4 * i), emu->instrs[i]);
525    }
526 
527    EMU_GPU_REG(CP_SQE_INSTR_BASE);
528    EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE);
529    EMU_CONTROL_REG(BV_INSTR_BASE);
530    EMU_CONTROL_REG(LPAC_INSTR_BASE);
531 
532    /* Setup the address of the SQE fw, just use the normal CPU ptr address: */
533    switch (emu->processor) {
534    case EMU_PROC_SQE:
535       emu_set_reg64(emu, &CP_SQE_INSTR_BASE, EMU_INSTR_BASE);
536       break;
537    case EMU_PROC_BV:
538       emu_set_reg64(emu, &BV_INSTR_BASE, EMU_INSTR_BASE);
539       break;
540    case EMU_PROC_LPAC:
541       if (gpuver >= 7)
542          emu_set_reg64(emu, &LPAC_INSTR_BASE, EMU_INSTR_BASE);
543       else
544          emu_set_reg64(emu, &CP_LPAC_SQE_INSTR_BASE, EMU_INSTR_BASE);
545       break;
546    }
547 
548    if (emu->gpu_id == 730) {
549       emu_set_control_reg(emu, 0xef, 1 << 21);
550       emu_set_control_reg(emu, 0, 7 << 28);
551    } else if (emu->gpu_id == 660) {
552       emu_set_control_reg(emu, 0, 3 << 28);
553    } else if (emu->gpu_id == 650) {
554       emu_set_control_reg(emu, 0, 1 << 28);
555    }
556 }
557 
558 void
emu_fini(struct emu * emu)559 emu_fini(struct emu *emu)
560 {
561    uint32_t *instrs = emu->instrs;
562    unsigned sizedwords = emu->sizedwords;
563    unsigned gpu_id = emu->gpu_id;
564 
565    munmap(emu->gpumem, EMU_MEMORY_SIZE);
566    memset(emu, 0, sizeof(*emu));
567 
568    emu->instrs = instrs;
569    emu->sizedwords = sizedwords;
570    emu->gpu_id = gpu_id;
571 }
572