1 /*
2 * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <err.h>
26 #include <fcntl.h>
27 #include <getopt.h>
28 #include <stdarg.h>
29 #include <stdbool.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "util/os_file.h"
37
38 #include "freedreno_pm4.h"
39
40 #include "afuc.h"
41 #include "util.h"
42 #include "emu.h"
43
44 static int gpuver;
45
46 /* non-verbose mode should output something suitable to feed back into
47 * assembler.. verbose mode has additional output useful for debugging
48 * (like unexpected bits that are set)
49 */
50 static bool verbose = false;
51
52 /* emulator mode: */
53 static bool emulator = false;
54
55 static void
print_gpu_reg(uint32_t regbase)56 print_gpu_reg(uint32_t regbase)
57 {
58 if (regbase < 0x100)
59 return;
60
61 char *name = afuc_gpu_reg_name(regbase);
62 if (name) {
63 printf("\t; %s", name);
64 free(name);
65 }
66 }
67
68 #define printerr(fmt, ...) afuc_printc(AFUC_ERR, fmt, ##__VA_ARGS__)
69 #define printlbl(fmt, ...) afuc_printc(AFUC_LBL, fmt, ##__VA_ARGS__)
70
71 void
print_src(unsigned reg)72 print_src(unsigned reg)
73 {
74 if (reg == REG_REM)
75 printf("$rem"); /* remainding dwords in packet */
76 else if (reg == REG_MEMDATA)
77 printf("$memdata");
78 else if (reg == REG_REGDATA)
79 printf("$regdata");
80 else if (reg == REG_DATA)
81 printf("$data");
82 else
83 printf("$%02x", reg);
84 }
85
86 void
print_dst(unsigned reg)87 print_dst(unsigned reg)
88 {
89 if (reg == REG_REM)
90 printf("$rem"); /* remainding dwords in packet */
91 else if (reg == REG_ADDR)
92 printf("$addr");
93 else if (reg == REG_USRADDR)
94 printf("$usraddr");
95 else if (reg == REG_DATA)
96 printf("$data");
97 else
98 printf("$%02x", reg);
99 }
100
101 static void
print_alu_name(afuc_opc opc,uint32_t instr)102 print_alu_name(afuc_opc opc, uint32_t instr)
103 {
104 if (opc == OPC_ADD) {
105 printf("add ");
106 } else if (opc == OPC_ADDHI) {
107 printf("addhi ");
108 } else if (opc == OPC_SUB) {
109 printf("sub ");
110 } else if (opc == OPC_SUBHI) {
111 printf("subhi ");
112 } else if (opc == OPC_AND) {
113 printf("and ");
114 } else if (opc == OPC_OR) {
115 printf("or ");
116 } else if (opc == OPC_XOR) {
117 printf("xor ");
118 } else if (opc == OPC_NOT) {
119 printf("not ");
120 } else if (opc == OPC_SHL) {
121 printf("shl ");
122 } else if (opc == OPC_USHR) {
123 printf("ushr ");
124 } else if (opc == OPC_ISHR) {
125 printf("ishr ");
126 } else if (opc == OPC_ROT) {
127 printf("rot ");
128 } else if (opc == OPC_MUL8) {
129 printf("mul8 ");
130 } else if (opc == OPC_MIN) {
131 printf("min ");
132 } else if (opc == OPC_MAX) {
133 printf("max ");
134 } else if (opc == OPC_CMP) {
135 printf("cmp ");
136 } else if (opc == OPC_MSB) {
137 printf("msb ");
138 } else {
139 printerr("[%08x]", instr);
140 printf(" ; alu%02x ", opc);
141 }
142 }
143
144 static const char *
getpm4(uint32_t id)145 getpm4(uint32_t id)
146 {
147 return afuc_pm_id_name(id);
148 }
149
150 static struct {
151 uint32_t offset;
152 uint32_t num_jump_labels;
153 uint32_t jump_labels[256];
154 } jump_labels[1024];
155 int num_jump_labels;
156
157 static void
add_jump_table_entry(uint32_t n,uint32_t offset)158 add_jump_table_entry(uint32_t n, uint32_t offset)
159 {
160 int i;
161
162 if (n > 128) /* can't possibly be a PM4 PKT3.. */
163 return;
164
165 for (i = 0; i < num_jump_labels; i++)
166 if (jump_labels[i].offset == offset)
167 goto add_label;
168
169 num_jump_labels = i + 1;
170 jump_labels[i].offset = offset;
171 jump_labels[i].num_jump_labels = 0;
172
173 add_label:
174 jump_labels[i].jump_labels[jump_labels[i].num_jump_labels++] = n;
175 assert(jump_labels[i].num_jump_labels < 256);
176 }
177
178 static int
get_jump_table_entry(uint32_t offset)179 get_jump_table_entry(uint32_t offset)
180 {
181 int i;
182
183 for (i = 0; i < num_jump_labels; i++)
184 if (jump_labels[i].offset == offset)
185 return i;
186
187 return -1;
188 }
189
190 static uint32_t label_offsets[0x512];
191 static int num_label_offsets;
192
193 static int
label_idx(uint32_t offset,bool create)194 label_idx(uint32_t offset, bool create)
195 {
196 int i;
197 for (i = 0; i < num_label_offsets; i++)
198 if (offset == label_offsets[i])
199 return i;
200 if (!create)
201 return -1;
202 label_offsets[i] = offset;
203 num_label_offsets = i + 1;
204 return i;
205 }
206
207 static const char *
label_name(uint32_t offset,bool allow_jt)208 label_name(uint32_t offset, bool allow_jt)
209 {
210 static char name[12];
211 int lidx;
212
213 if (allow_jt) {
214 lidx = get_jump_table_entry(offset);
215 if (lidx >= 0) {
216 int j;
217 for (j = 0; j < jump_labels[lidx].num_jump_labels; j++) {
218 uint32_t jump_label = jump_labels[lidx].jump_labels[j];
219 const char *str = getpm4(jump_label);
220 if (str)
221 return str;
222 }
223 // if we don't find anything w/ known name, maybe we should
224 // return UNKN%d to at least make it clear that this is some
225 // sort of jump-table entry?
226 }
227 }
228
229 lidx = label_idx(offset, false);
230 if (lidx < 0)
231 return NULL;
232 sprintf(name, "l%03d", lidx);
233 return name;
234 }
235
236 static uint32_t fxn_offsets[0x512];
237 static int num_fxn_offsets;
238
239 static int
fxn_idx(uint32_t offset,bool create)240 fxn_idx(uint32_t offset, bool create)
241 {
242 int i;
243 for (i = 0; i < num_fxn_offsets; i++)
244 if (offset == fxn_offsets[i])
245 return i;
246 if (!create)
247 return -1;
248 fxn_offsets[i] = offset;
249 num_fxn_offsets = i + 1;
250 return i;
251 }
252
253 static const char *
fxn_name(uint32_t offset)254 fxn_name(uint32_t offset)
255 {
256 static char name[14];
257 int fidx = fxn_idx(offset, false);
258 if (fidx < 0)
259 return NULL;
260 sprintf(name, "fxn%02d", fidx);
261 return name;
262 }
263
264 void
print_control_reg(uint32_t id)265 print_control_reg(uint32_t id)
266 {
267 char *name = afuc_control_reg_name(id);
268 if (name) {
269 printf("@%s", name);
270 free(name);
271 } else {
272 printf("0x%03x", id);
273 }
274 }
275
276 void
print_pipe_reg(uint32_t id)277 print_pipe_reg(uint32_t id)
278 {
279 char *name = afuc_pipe_reg_name(id);
280 if (name) {
281 printf("|%s", name);
282 free(name);
283 } else {
284 printf("0x%03x", id);
285 }
286 }
287
288 static void
disasm_instr(uint32_t * instrs,unsigned pc)289 disasm_instr(uint32_t *instrs, unsigned pc)
290 {
291 int jump_label_idx;
292 afuc_instr *instr = (void *)&instrs[pc];
293 const char *fname, *lname;
294 afuc_opc opc;
295 bool rep;
296
297 afuc_get_opc(instr, &opc, &rep);
298
299 lname = label_name(pc, false);
300 fname = fxn_name(pc);
301 jump_label_idx = get_jump_table_entry(pc);
302
303 if (jump_label_idx >= 0) {
304 int j;
305 printf("\n");
306 for (j = 0; j < jump_labels[jump_label_idx].num_jump_labels; j++) {
307 uint32_t jump_label = jump_labels[jump_label_idx].jump_labels[j];
308 const char *name = getpm4(jump_label);
309 if (name) {
310 printlbl("%s", name);
311 } else {
312 printlbl("UNKN%d", jump_label);
313 }
314 printf(":\n");
315 }
316 }
317
318 if (fname) {
319 printlbl("%s", fname);
320 printf(":\n");
321 }
322
323 if (lname) {
324 printlbl(" %s", lname);
325 printf(":");
326 } else {
327 printf(" ");
328 }
329
330 if (verbose) {
331 printf("\t%04x: %08x ", pc, instrs[pc]);
332 } else {
333 printf(" ");
334 }
335
336 switch (opc) {
337 case OPC_NOP: {
338 /* a6xx changed the default immediate, and apparently 0
339 * is illegal now.
340 */
341 const uint32_t nop = gpuver >= 6 ? 0x1000000 : 0x0;
342 if (instrs[pc] != nop) {
343 printerr("[%08x]", instrs[pc]);
344 printf(" ; ");
345 }
346 if (rep)
347 printf("(rep)");
348 printf("nop");
349 print_gpu_reg(instrs[pc]);
350
351 break;
352 }
353 case OPC_ADD:
354 case OPC_ADDHI:
355 case OPC_SUB:
356 case OPC_SUBHI:
357 case OPC_AND:
358 case OPC_OR:
359 case OPC_XOR:
360 case OPC_NOT:
361 case OPC_SHL:
362 case OPC_USHR:
363 case OPC_ISHR:
364 case OPC_ROT:
365 case OPC_MUL8:
366 case OPC_MIN:
367 case OPC_MAX:
368 case OPC_CMP: {
369 bool src1 = true;
370
371 if (opc == OPC_NOT)
372 src1 = false;
373
374 if (rep)
375 printf("(rep)");
376
377 print_alu_name(opc, instrs[pc]);
378 print_dst(instr->alui.dst);
379 printf(", ");
380 if (src1) {
381 print_src(instr->alui.src);
382 printf(", ");
383 }
384 printf("0x%04x", instr->alui.uimm);
385 print_gpu_reg(instr->alui.uimm);
386
387 /* print out unexpected bits: */
388 if (verbose) {
389 if (instr->alui.src && !src1)
390 printerr(" (src=%02x)", instr->alui.src);
391 }
392
393 break;
394 }
395 case OPC_MOVI: {
396 if (rep)
397 printf("(rep)");
398 printf("mov ");
399 print_dst(instr->movi.dst);
400 printf(", 0x%04x", instr->movi.uimm);
401 if (instr->movi.shift)
402 printf(" << %u", instr->movi.shift);
403
404 if ((instr->movi.dst == REG_ADDR) && (instr->movi.shift >= 16)) {
405 uint32_t val = (uint32_t)instr->movi.uimm << (uint32_t)instr->movi.shift;
406 val &= ~0x40000; /* b18 seems to be a flag */
407
408 if ((val & 0x00ffffff) == 0) {
409 printf("\t; ");
410 print_pipe_reg(val >> 24);
411 break;
412 }
413 }
414 /* using mov w/ << 16 is popular way to construct a pkt7
415 * header to send (for ex, from PFP to ME), so check that
416 * case first
417 */
418 if ((instr->movi.shift == 16) &&
419 ((instr->movi.uimm & 0xff00) == 0x7000)) {
420 unsigned opc, p;
421
422 opc = instr->movi.uimm & 0x7f;
423 p = pm4_odd_parity_bit(opc);
424
425 /* So, you'd think that checking the parity bit would be
426 * a good way to rule out false positives, but seems like
427 * ME doesn't really care.. at least it would filter out
428 * things that look like actual legit packets between
429 * PFP and ME..
430 */
431 if (1 || p == ((instr->movi.uimm >> 7) & 0x1)) {
432 const char *name = getpm4(opc);
433 printf("\t; ");
434 if (name)
435 printlbl("%s", name);
436 else
437 printlbl("UNKN%u", opc);
438 break;
439 }
440 }
441
442 print_gpu_reg((uint32_t)instr->movi.uimm << (uint32_t)instr->movi.shift);
443
444 break;
445 }
446 case OPC_ALU: {
447 bool src1 = true;
448
449 if (instr->alu.alu == OPC_NOT || instr->alu.alu == OPC_MSB)
450 src1 = false;
451
452 if (instr->alu.pad)
453 printf("[%08x] ; ", instrs[pc]);
454
455 if (rep)
456 printf("(rep)");
457 if (instr->alu.xmov)
458 printf("(xmov%d)", instr->alu.xmov);
459
460 /* special case mnemonics:
461 * reading $00 seems to always yield zero, and so:
462 * or $dst, $00, $src -> mov $dst, $src
463 * Maybe add one for negate too, ie.
464 * sub $dst, $00, $src ???
465 */
466 if ((instr->alu.alu == OPC_OR) && !instr->alu.src1) {
467 printf("mov ");
468 src1 = false;
469 } else {
470 print_alu_name(instr->alu.alu, instrs[pc]);
471 }
472
473 print_dst(instr->alu.dst);
474 if (src1) {
475 printf(", ");
476 print_src(instr->alu.src1);
477 }
478 printf(", ");
479 print_src(instr->alu.src2);
480
481 /* print out unexpected bits: */
482 if (verbose) {
483 if (instr->alu.pad)
484 printerr(" (pad=%01x)", instr->alu.pad);
485 if (instr->alu.src1 && !src1)
486 printerr(" (src1=%02x)", instr->alu.src1);
487 }
488
489 /* xmov is a modifier that makes the processor execute up to 3
490 * extra mov's after the current instruction. Given an ALU
491 * instruction:
492 *
493 * (xmovN) alu $dst, $src1, $src2
494 *
495 * In all of the uses in the firmware blob, $dst and $src2 are one
496 * of the "special" registers $data, $addr, $addr2. I've observed
497 * that if $dst isn't "special" then it's replaced with $00
498 * instead of $data, but I haven't checked what happens if $src2
499 * isn't "special". Anyway, in the usual case, the HW produces a
500 * count M = min(N, $rem) and then does the following:
501 *
502 * M = 1:
503 * mov $data, $src2
504 *
505 * M = 2:
506 * mov $data, $src2
507 * mov $data, $src2
508 *
509 * M = 3:
510 * mov $data, $src2
511 * mov $dst, $src2 (special case for CP_CONTEXT_REG_BUNCH)
512 * mov $data, $src2
513 *
514 * It seems to be frequently used in combination with (rep) to
515 * provide a kind of hardware-based loop unrolling, and there's
516 * even a special case in the ISA to be able to do this with
517 * CP_CONTEXT_REG_BUNCH. However (rep) isn't required.
518 *
519 * This dumps the expected extra instructions, assuming that $rem
520 * isn't too small.
521 */
522 if (verbose && instr->alu.xmov) {
523 for (int i = 0; i < instr->alu.xmov; i++) {
524 printf("\n ; mov ");
525 if (instr->alu.dst < 0x1d)
526 printf("$00");
527 else if (instr->alu.xmov == 3 && i == 1)
528 print_dst(instr->alu.dst);
529 else
530 printf("$data");
531 printf(", ");
532 print_src(instr->alu.src2);
533 }
534 }
535
536 break;
537 }
538 case OPC_CWRITE6:
539 case OPC_CREAD6:
540 case OPC_STORE6:
541 case OPC_LOAD6: {
542 if (rep)
543 printf("(rep)");
544
545 bool is_control_reg = true;
546 bool is_store = true;
547 if (gpuver >= 6) {
548 switch (opc) {
549 case OPC_CWRITE6:
550 printf("cwrite ");
551 break;
552 case OPC_CREAD6:
553 is_store = false;
554 printf("cread ");
555 break;
556 case OPC_STORE6:
557 is_control_reg = false;
558 printf("store ");
559 break;
560 case OPC_LOAD6:
561 is_control_reg = false;
562 is_store = false;
563 printf("load ");
564 break;
565 default:
566 assert(!"unreachable");
567 }
568 } else {
569 switch (opc) {
570 case OPC_CWRITE5:
571 printf("cwrite ");
572 break;
573 case OPC_CREAD5:
574 is_store = false;
575 printf("cread ");
576 break;
577 default:
578 fprintf(stderr, "A6xx control opcode on A5xx?\n");
579 exit(1);
580 }
581 }
582
583 if (is_store)
584 print_src(instr->control.src1);
585 else
586 print_dst(instr->control.src1);
587 printf(", [");
588 print_src(instr->control.src2);
589 printf(" + ");
590 if (is_control_reg && instr->control.flags != 0x4)
591 print_control_reg(instr->control.uimm);
592 else
593 printf("0x%03x", instr->control.uimm);
594 printf("], 0x%x", instr->control.flags);
595 break;
596 }
597 case OPC_BRNEI:
598 case OPC_BREQI:
599 case OPC_BRNEB:
600 case OPC_BREQB: {
601 unsigned off = pc + instr->br.ioff;
602
603 assert(!rep);
604
605 /* Since $00 reads back zero, it can be used as src for
606 * unconditional branches. (This only really makes sense
607 * for the BREQB.. or possible BRNEI if imm==0.)
608 *
609 * If bit=0 then branch is taken if *all* bits are zero.
610 * Otherwise it is taken if bit (bit-1) is clear.
611 *
612 * Note the instruction after a jump/branch is executed
613 * regardless of whether branch is taken, so use nop or
614 * take that into account in code.
615 */
616 if (instr->br.src || (opc != OPC_BRNEB)) {
617 bool immed = false;
618
619 if (opc == OPC_BRNEI) {
620 printf("brne ");
621 immed = true;
622 } else if (opc == OPC_BREQI) {
623 printf("breq ");
624 immed = true;
625 } else if (opc == OPC_BRNEB) {
626 printf("brne ");
627 } else if (opc == OPC_BREQB) {
628 printf("breq ");
629 }
630 print_src(instr->br.src);
631 if (immed) {
632 printf(", 0x%x,", instr->br.bit_or_imm);
633 } else {
634 printf(", b%u,", instr->br.bit_or_imm);
635 }
636 } else {
637 printf("jump");
638 if (verbose && instr->br.bit_or_imm) {
639 printerr(" (src=%03x, bit=%03x) ", instr->br.src,
640 instr->br.bit_or_imm);
641 }
642 }
643
644 printf(" #");
645 printlbl("%s", label_name(off, true));
646 if (verbose)
647 printf(" (#%d, %04x)", instr->br.ioff, off);
648 break;
649 }
650 case OPC_CALL:
651 assert(!rep);
652 printf("call #");
653 printlbl("%s", fxn_name(instr->call.uoff));
654 if (verbose) {
655 printf(" (%04x)", instr->call.uoff);
656 if (instr->br.bit_or_imm || instr->br.src) {
657 printerr(" (src=%03x, bit=%03x) ", instr->br.src,
658 instr->br.bit_or_imm);
659 }
660 }
661 break;
662 case OPC_RET:
663 assert(!rep);
664 if (instr->ret.pad)
665 printf("[%08x] ; ", instrs[pc]);
666 if (instr->ret.interrupt)
667 printf("iret");
668 else
669 printf("ret");
670 break;
671 case OPC_WIN:
672 assert(!rep);
673 if (instr->waitin.pad)
674 printf("[%08x] ; ", instrs[pc]);
675 printf("waitin");
676 if (verbose && instr->waitin.pad)
677 printerr(" (pad=%x)", instr->waitin.pad);
678 break;
679 case OPC_PREEMPTLEAVE6:
680 if (gpuver < 6) {
681 printf("[%08x] ; op38", instrs[pc]);
682 } else {
683 printf("preemptleave #");
684 printlbl("%s", label_name(instr->call.uoff, true));
685 }
686 break;
687 case OPC_SETSECURE:
688 /* Note: This seems to implicitly read the secure/not-secure state
689 * to set from the low bit of $02, and implicitly jumps to pc + 3
690 * (i.e. skipping the next two instructions) if it succeeds. We
691 * print these implicit parameters to make reading the disassembly
692 * easier.
693 */
694 if (instr->pad)
695 printf("[%08x] ; ", instrs[pc]);
696 printf("setsecure $02, #");
697 printlbl("%s", label_name(pc + 3, true));
698 break;
699 default:
700 printerr("[%08x]", instrs[pc]);
701 printf(" ; op%02x ", opc);
702 print_dst(instr->alui.dst);
703 printf(", ");
704 print_src(instr->alui.src);
705 print_gpu_reg(instrs[pc] & 0xffff);
706 break;
707 }
708 printf("\n");
709 }
710
711 static void
setup_packet_table(uint32_t * jmptbl,uint32_t sizedwords)712 setup_packet_table(uint32_t *jmptbl, uint32_t sizedwords)
713 {
714 num_jump_labels = 0;
715
716 for (unsigned i = 0; i < sizedwords; i++) {
717 unsigned offset = jmptbl[i];
718 unsigned n = i; // + CP_NOP;
719 add_jump_table_entry(n, offset);
720 }
721 }
722
723 static void
setup_labels(uint32_t * instrs,uint32_t sizedwords)724 setup_labels(uint32_t *instrs, uint32_t sizedwords)
725 {
726 afuc_opc opc;
727 bool rep;
728
729 num_label_offsets = 0;
730
731 for (unsigned i = 0; i < sizedwords; i++) {
732 afuc_instr *instr = (void *)&instrs[i];
733
734 afuc_get_opc(instr, &opc, &rep);
735
736 switch (opc) {
737 case OPC_BRNEI:
738 case OPC_BREQI:
739 case OPC_BRNEB:
740 case OPC_BREQB:
741 label_idx(i + instr->br.ioff, true);
742 break;
743 case OPC_PREEMPTLEAVE6:
744 if (gpuver >= 6)
745 label_idx(instr->call.uoff, true);
746 break;
747 case OPC_CALL:
748 fxn_idx(instr->call.uoff, true);
749 break;
750 case OPC_SETSECURE:
751 /* this implicitly jumps to pc + 3 if successful */
752 label_idx(i + 3, true);
753 break;
754 default:
755 break;
756 }
757 }
758 }
759
760 static void
disasm(struct emu * emu)761 disasm(struct emu *emu)
762 {
763 uint32_t sizedwords = emu->sizedwords;
764 uint32_t lpac_offset = 0;
765
766 EMU_GPU_REG(CP_SQE_INSTR_BASE);
767 EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE);
768
769 emu_init(emu);
770
771 #ifdef BOOTSTRAP_DEBUG
772 while (true) {
773 disasm_instr(emu->instrs, emu->gpr_regs.pc);
774 emu_step(emu);
775 }
776 #endif
777
778 emu_run_bootstrap(emu);
779
780 /* Figure out if we have LPAC SQE appended: */
781 if (emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE)) {
782 lpac_offset = emu_get_reg64(emu, &CP_LPAC_SQE_INSTR_BASE) -
783 emu_get_reg64(emu, &CP_SQE_INSTR_BASE);
784 lpac_offset /= 4;
785 sizedwords = lpac_offset;
786 }
787
788 setup_packet_table(emu->jmptbl, ARRAY_SIZE(emu->jmptbl));
789 setup_labels(emu->instrs, emu->sizedwords);
790
791 /* TODO add option to emulate LPAC SQE instead: */
792 if (emulator) {
793 /* Start from clean slate: */
794 emu_fini(emu);
795 emu_init(emu);
796
797 while (true) {
798 disasm_instr(emu->instrs, emu->gpr_regs.pc);
799 emu_step(emu);
800 }
801 }
802
803 /* print instructions: */
804 for (int i = 0; i < sizedwords; i++) {
805 disasm_instr(emu->instrs, i);
806 }
807
808 if (!lpac_offset)
809 return;
810
811 printf(";\n");
812 printf("; LPAC microcode:\n");
813 printf(";\n");
814
815 emu_fini(emu);
816
817 emu->lpac = true;
818 emu->instrs += lpac_offset;
819 emu->sizedwords -= lpac_offset;
820
821 emu_init(emu);
822 emu_run_bootstrap(emu);
823
824 setup_packet_table(emu->jmptbl, ARRAY_SIZE(emu->jmptbl));
825 setup_labels(emu->instrs, emu->sizedwords);
826
827 /* print instructions: */
828 for (int i = 0; i < emu->sizedwords; i++) {
829 disasm_instr(emu->instrs, i);
830 }
831 }
832
833
834 static void
disasm_legacy(uint32_t * buf,int sizedwords)835 disasm_legacy(uint32_t *buf, int sizedwords)
836 {
837 uint32_t *instrs = buf;
838 const int jmptbl_start = instrs[1] & 0xffff;
839 uint32_t *jmptbl = &buf[jmptbl_start];
840 int i;
841
842 /* parse jumptable: */
843 setup_packet_table(jmptbl, 0x80);
844
845 /* do a pre-pass to find instructions that are potential branch targets,
846 * and add labels for them:
847 */
848 setup_labels(instrs, jmptbl_start);
849
850 /* print instructions: */
851 for (i = 0; i < jmptbl_start; i++) {
852 disasm_instr(instrs, i);
853 }
854
855 /* print jumptable: */
856 if (verbose) {
857 printf(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
858 printf("; JUMP TABLE\n");
859 for (i = 0; i < 0x7f; i++) {
860 int n = i; // + CP_NOP;
861 uint32_t offset = jmptbl[i];
862 const char *name = getpm4(n);
863 printf("%3d %02x: ", n, n);
864 printf("%04x", offset);
865 if (name) {
866 printf(" ; %s", name);
867 } else {
868 printf(" ; UNKN%d", n);
869 }
870 printf("\n");
871 }
872 }
873 }
874
875 static void
usage(void)876 usage(void)
877 {
878 fprintf(stderr, "Usage:\n"
879 "\tdisasm [-g GPUVER] [-v] [-c] filename.asm\n"
880 "\t\t-g - specify GPU version (5, etc)\n"
881 "\t\t-c - use colors\n"
882 "\t\t-v - verbose output\n"
883 "\t\t-e - emulator mode\n");
884 exit(2);
885 }
886
887 int
main(int argc,char ** argv)888 main(int argc, char **argv)
889 {
890 uint32_t *buf;
891 char *file;
892 bool colors = false;
893 uint32_t gpu_id = 0;
894 size_t sz;
895 int c, ret;
896 bool unit_test = false;
897
898 /* Argument parsing: */
899 while ((c = getopt(argc, argv, "g:vceu")) != -1) {
900 switch (c) {
901 case 'g':
902 gpu_id = atoi(optarg);
903 break;
904 case 'v':
905 verbose = true;
906 break;
907 case 'c':
908 colors = true;
909 break;
910 case 'e':
911 emulator = true;
912 verbose = true;
913 break;
914 case 'u':
915 unit_test = true;
916 break;
917 default:
918 usage();
919 }
920 }
921
922 if (optind >= argc) {
923 fprintf(stderr, "no file specified!\n");
924 usage();
925 }
926
927 file = argv[optind];
928
929 /* if gpu version not specified, infer from filename: */
930 if (!gpu_id) {
931 char *str = strstr(file, "a5");
932 if (!str)
933 str = strstr(file, "a6");
934 if (str)
935 gpu_id = atoi(str + 1);
936 }
937
938 if (gpu_id < 500) {
939 printf("invalid gpu_id: %d\n", gpu_id);
940 return -1;
941 }
942
943 gpuver = gpu_id / 100;
944
945 /* a6xx is *mostly* a superset of a5xx, but some opcodes shuffle
946 * around, and behavior of special regs is a bit different. Right
947 * now we only bother to support the a6xx variant.
948 */
949 if (emulator && (gpuver != 6)) {
950 fprintf(stderr, "Emulator only supported on a6xx!\n");
951 return 1;
952 }
953
954 ret = afuc_util_init(gpuver, colors);
955 if (ret < 0) {
956 usage();
957 }
958
959 printf("; a%dxx microcode\n", gpuver);
960
961 buf = (uint32_t *)os_read_file(file, &sz);
962
963 if (!unit_test)
964 printf("; Disassembling microcode: %s\n", file);
965 printf("; Version: %08x\n\n", buf[1]);
966
967 if (gpuver < 6) {
968 disasm_legacy(&buf[1], sz / 4 - 1);
969 } else {
970 struct emu emu = {
971 .instrs = &buf[1],
972 .sizedwords = sz / 4 - 1,
973 .gpu_id = gpu_id,
974 };
975
976 disasm(&emu);
977 }
978
979 return 0;
980 }
981