1 /*
2 * Copyright © 2012 Rob Clark <robdclark@gmail.com>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <assert.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <inttypes.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <stdbool.h>
15 #include <stdint.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <unistd.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <sys/wait.h>
23
24 #include "freedreno_pm4.h"
25
26 #include "buffers.h"
27 #include "cffdec.h"
28 #include "disasm.h"
29 #include "redump.h"
30 #include "rnnutil.h"
31 #include "script.h"
32
33 /* ************************************************************************* */
34 /* originally based on kernel recovery dump code: */
35
36 static const struct cffdec_options *options;
37
38 static bool needs_wfi = false;
39 static bool summary = false;
40 static bool in_summary = false;
41 static int vertices;
42
43 static inline unsigned
regcnt(void)44 regcnt(void)
45 {
46 if (options->info->chip >= 5)
47 return 0x3ffff;
48 else
49 return 0x7fff;
50 }
51
52 static int
is_64b(void)53 is_64b(void)
54 {
55 return options->info->chip >= 5;
56 }
57
58 static int draws[4];
59 static struct {
60 uint64_t base;
61 uint32_t size; /* in dwords */
62 /* Generally cmdstream consists of multiple IB calls to different
63 * buffers, which are themselves often re-used for each tile. The
64 * triggered flag serves two purposes to help make it more clear
65 * what part of the cmdstream is before vs after the the GPU hang:
66 *
67 * 1) if in IB2 we are passed the point within the IB2 buffer where
68 * the GPU hung, but IB1 is not passed the point within its
69 * buffer where the GPU had hung, then we know the GPU hang
70 * happens on a future use of that IB2 buffer.
71 *
72 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
73 * hung, but we've already passed the trigger point at the same
74 * IB level, we know that we are passed the point where the GPU
75 * had hung.
76 *
77 * So this is a one way switch, false->true. And a higher #'d
78 * IB level isn't considered triggered unless the lower #'d IB
79 * level is.
80 */
81 bool triggered : 1;
82 bool base_seen : 1;
83 } ibs[4];
84 static int ib;
85
86 static int draw_count;
87 static int current_draw_count;
88
89 /* query mode.. to handle symbolic register name queries, we need to
90 * defer parsing query string until after gpu_id is know and rnn db
91 * loaded:
92 */
93 static int *queryvals;
94
95 static bool
quiet(int lvl)96 quiet(int lvl)
97 {
98 if ((options->draw_filter != -1) &&
99 (options->draw_filter != current_draw_count))
100 return true;
101 if ((lvl >= 3) && (summary || options->querystrs || options->script))
102 return true;
103 if ((lvl >= 2) && (options->querystrs || options->script))
104 return true;
105 return false;
106 }
107
108 void
printl(int lvl,const char * fmt,...)109 printl(int lvl, const char *fmt, ...)
110 {
111 va_list args;
112 if (quiet(lvl))
113 return;
114 va_start(args, fmt);
115 vprintf(fmt, args);
116 va_end(args);
117 }
118
119 static const char *levels[] = {
120 "\t",
121 "\t\t",
122 "\t\t\t",
123 "\t\t\t\t",
124 "\t\t\t\t\t",
125 "\t\t\t\t\t\t",
126 "\t\t\t\t\t\t\t",
127 "\t\t\t\t\t\t\t\t",
128 "\t\t\t\t\t\t\t\t\t",
129 "x",
130 "x",
131 "x",
132 "x",
133 "x",
134 "x",
135 };
136
137 enum state_src_t {
138 STATE_SRC_DIRECT,
139 STATE_SRC_INDIRECT,
140 STATE_SRC_BINDLESS,
141 };
142
143 /* SDS (CP_SET_DRAW_STATE) helpers: */
144 static void load_all_groups(int level);
145 static void disable_all_groups(void);
146
147 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
148 int level);
149 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
150
151 static bool
highlight_gpuaddr(uint64_t gpuaddr)152 highlight_gpuaddr(uint64_t gpuaddr)
153 {
154 if (!options->ibs[ib].base)
155 return false;
156
157 if ((ib > 0) && options->ibs[ib - 1].base &&
158 !(ibs[ib - 1].triggered || ibs[ib - 1].base_seen))
159 return false;
160
161 if (ibs[ib].base_seen)
162 return false;
163
164 if (ibs[ib].triggered)
165 return options->color;
166
167 if (options->ibs[ib].base != ibs[ib].base)
168 return false;
169
170 uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
171 uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
172
173 bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
174
175 if (triggered && (ib < 2) && options->ibs[ib + 1].crash_found) {
176 ibs[ib].base_seen = true;
177 return false;
178 }
179
180 ibs[ib].triggered |= triggered;
181
182 if (triggered)
183 printf("ESTIMATED CRASH LOCATION!\n");
184
185 return triggered & options->color;
186 }
187
188 static void
dump_hex(uint32_t * dwords,uint32_t sizedwords,int level)189 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
190 {
191 int i, j;
192 int lastzero = 1;
193
194 if (quiet(2))
195 return;
196
197 bool highlight = highlight_gpuaddr(gpuaddr(dwords) + 4 * sizedwords - 1);
198
199 for (i = 0; i < sizedwords; i += 8) {
200 int zero = 1;
201
202 /* always show first row: */
203 if (i == 0)
204 zero = 0;
205
206 for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
207 if (dwords[i + j])
208 zero = 0;
209
210 if (zero && !lastzero)
211 printf("*\n");
212
213 lastzero = zero;
214
215 if (zero)
216 continue;
217
218 uint64_t addr = gpuaddr(&dwords[i]);
219
220 if (highlight)
221 printf("\x1b[0;1;31m");
222
223 if (is_64b()) {
224 printf("%016" PRIx64 ":%s", addr, levels[level]);
225 } else {
226 printf("%08x:%s", (uint32_t)addr, levels[level]);
227 }
228
229 if (highlight)
230 printf("\x1b[0m");
231
232 printf("%04x:", i * 4);
233
234 for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
235 printf(" %08x", dwords[i + j]);
236 }
237
238 printf("\n");
239 }
240 }
241
242 static void
dump_float(float * dwords,uint32_t sizedwords,int level)243 dump_float(float *dwords, uint32_t sizedwords, int level)
244 {
245 int i;
246 for (i = 0; i < sizedwords; i++) {
247 if ((i % 8) == 0) {
248 if (is_64b()) {
249 printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
250 } else {
251 printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
252 }
253 } else {
254 printf(" ");
255 }
256 printf("%8f", *(dwords++));
257 if ((i % 8) == 7)
258 printf("\n");
259 }
260 if (i % 8)
261 printf("\n");
262 }
263
264 /* I believe the surface format is low bits:
265 #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
266 comments in sys2gmem_tex_const indicate that address is [31:12], but
267 looks like at least some of the bits above the format have different meaning..
268 */
269 static void
parse_dword_addr(uint32_t dword,uint32_t * gpuaddr,uint32_t * flags,uint32_t mask)270 parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
271 uint32_t mask)
272 {
273 assert(!is_64b()); /* this is only used on a2xx */
274 *gpuaddr = dword & ~mask;
275 *flags = dword & mask;
276 }
277
278 static uint32_t type0_reg_vals[0x3ffff + 1];
279 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
280 8]; /* written since last draw */
281 static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
282 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
283
284 static bool
reg_rewritten(uint32_t regbase)285 reg_rewritten(uint32_t regbase)
286 {
287 return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
288 }
289
290 bool
reg_written(uint32_t regbase)291 reg_written(uint32_t regbase)
292 {
293 return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
294 }
295
296 static void
clear_rewritten(void)297 clear_rewritten(void)
298 {
299 memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
300 }
301
302 static void
clear_written(void)303 clear_written(void)
304 {
305 memset(type0_reg_written, 0, sizeof(type0_reg_written));
306 clear_rewritten();
307 }
308
309 uint32_t
reg_lastval(uint32_t regbase)310 reg_lastval(uint32_t regbase)
311 {
312 return lastvals[regbase];
313 }
314
315 static void
clear_lastvals(void)316 clear_lastvals(void)
317 {
318 memset(lastvals, 0, sizeof(lastvals));
319 }
320
321 uint32_t
reg_val(uint32_t regbase)322 reg_val(uint32_t regbase)
323 {
324 return type0_reg_vals[regbase];
325 }
326
327 void
reg_set(uint32_t regbase,uint32_t val)328 reg_set(uint32_t regbase, uint32_t val)
329 {
330 assert(regbase < regcnt());
331 type0_reg_vals[regbase] = val;
332 type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
333 type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
334 }
335
336 static void
reg_dump_scratch(const char * name,uint32_t dword,int level)337 reg_dump_scratch(const char *name, uint32_t dword, int level)
338 {
339 unsigned r;
340
341 if (quiet(3))
342 return;
343
344 r = regbase("CP_SCRATCH[0].REG");
345
346 // if not, try old a2xx/a3xx version:
347 if (!r)
348 r = regbase("CP_SCRATCH_REG0");
349
350 if (!r)
351 return;
352
353 printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
354 reg_val(r + 6), reg_val(r + 7));
355 }
356
357 static void
dump_gpuaddr_size(uint64_t gpuaddr,int level,int sizedwords,int quietlvl)358 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
359 {
360 void *buf;
361
362 if (quiet(quietlvl))
363 return;
364
365 buf = hostptr(gpuaddr);
366 if (buf) {
367 dump_hex(buf, sizedwords, level + 1);
368 }
369 }
370
371 static void
dump_gpuaddr(uint64_t gpuaddr,int level)372 dump_gpuaddr(uint64_t gpuaddr, int level)
373 {
374 dump_gpuaddr_size(gpuaddr, level, 64, 3);
375 }
376
377 static void
reg_dump_gpuaddr(const char * name,uint32_t dword,int level)378 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
379 {
380 dump_gpuaddr(dword, level);
381 }
382
383 uint32_t gpuaddr_lo;
384 static void
reg_gpuaddr_lo(const char * name,uint32_t dword,int level)385 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
386 {
387 gpuaddr_lo = dword;
388 }
389
390 static void
reg_dump_gpuaddr_hi(const char * name,uint32_t dword,int level)391 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
392 {
393 dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
394 }
395
396 static void
reg_dump_gpuaddr64(const char * name,uint64_t qword,int level)397 reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
398 {
399 dump_gpuaddr(qword, level);
400 }
401
402 static void
dump_shader(const char * ext,void * buf,int bufsz)403 dump_shader(const char *ext, void *buf, int bufsz)
404 {
405 if (options->dump_shaders) {
406 static int n = 0;
407 char filename[16];
408 int fd;
409 sprintf(filename, "%04d.%s", n++, ext);
410 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
411 if (fd != -1) {
412 write(fd, buf, bufsz);
413 close(fd);
414 }
415 }
416 }
417
418 static void
disasm_gpuaddr(const char * name,uint64_t gpuaddr,int level)419 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
420 {
421 void *buf;
422
423 gpuaddr &= 0xfffffffffffffff0;
424
425 if (quiet(3))
426 return;
427
428 buf = hostptr(gpuaddr);
429 if (buf) {
430 uint32_t sizedwords = hostlen(gpuaddr) / 4;
431 const char *ext;
432
433 dump_hex(buf, MIN2(64, sizedwords), level + 1);
434 try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->info->chip * 100);
435
436 /* this is a bit ugly way, but oh well.. */
437 if (strstr(name, "SP_VS_OBJ")) {
438 ext = "vo3";
439 } else if (strstr(name, "SP_FS_OBJ")) {
440 ext = "fo3";
441 } else if (strstr(name, "SP_GS_OBJ")) {
442 ext = "go3";
443 } else if (strstr(name, "SP_CS_OBJ")) {
444 ext = "co3";
445 } else {
446 ext = NULL;
447 }
448
449 if (ext)
450 dump_shader(ext, buf, sizedwords * 4);
451 }
452 }
453
454 static void
reg_disasm_gpuaddr(const char * name,uint32_t dword,int level)455 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
456 {
457 disasm_gpuaddr(name, dword, level);
458 }
459
460 static void
reg_disasm_gpuaddr_hi(const char * name,uint32_t dword,int level)461 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
462 {
463 disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
464 }
465
466 static void
reg_disasm_gpuaddr64(const char * name,uint64_t qword,int level)467 reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
468 {
469 disasm_gpuaddr(name, qword, level);
470 }
471
472 /* Find the value of the TEX_COUNT register that corresponds to the named
473 * TEX_SAMP/TEX_CONST reg.
474 *
475 * Note, this kinda assumes an equal # of samplers and textures, but not
476 * really sure if there is a much better option. I suppose on a6xx we
477 * could instead decode the bitfields in SP_xS_CONFIG
478 */
479 static int
get_tex_count(const char * name)480 get_tex_count(const char *name)
481 {
482 char count_reg[strlen(name) + 5];
483 char *p;
484
485 p = strstr(name, "CONST");
486 if (!p)
487 p = strstr(name, "SAMP");
488 if (!p)
489 return 0;
490
491 int n = p - name;
492 strncpy(count_reg, name, n);
493 strcpy(count_reg + n, "COUNT");
494
495 return reg_val(regbase(count_reg));
496 }
497
498 static void
reg_dump_tex_samp_hi(const char * name,uint32_t dword,int level)499 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
500 {
501 if (!in_summary)
502 return;
503
504 int num_unit = get_tex_count(name);
505 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
506 void *buf = hostptr(gpuaddr);
507
508 if (!buf)
509 return;
510
511 dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
512 }
513
514 static void
reg_dump_tex_const_hi(const char * name,uint32_t dword,int level)515 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
516 {
517 if (!in_summary)
518 return;
519
520 int num_unit = get_tex_count(name);
521 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
522 void *buf = hostptr(gpuaddr);
523
524 if (!buf)
525 return;
526
527 dump_tex_const(buf, num_unit, level + 1);
528 }
529
530 /*
531 * Registers with special handling (rnndec_decode() handles rest):
532 */
533 #define REG(x, fxn) { #x, fxn }
534 #define REG64(x, fxn) { #x, .fxn64 = fxn, .is_reg64 = true }
535 static struct {
536 const char *regname;
537 void (*fxn)(const char *name, uint32_t dword, int level);
538 void (*fxn64)(const char *name, uint64_t qword, int level);
539 uint32_t regbase;
540 bool is_reg64;
541 } reg_a2xx[] = {
542 REG(CP_SCRATCH_REG0, reg_dump_scratch),
543 REG(CP_SCRATCH_REG1, reg_dump_scratch),
544 REG(CP_SCRATCH_REG2, reg_dump_scratch),
545 REG(CP_SCRATCH_REG3, reg_dump_scratch),
546 REG(CP_SCRATCH_REG4, reg_dump_scratch),
547 REG(CP_SCRATCH_REG5, reg_dump_scratch),
548 REG(CP_SCRATCH_REG6, reg_dump_scratch),
549 REG(CP_SCRATCH_REG7, reg_dump_scratch),
550 {NULL},
551 }, reg_a3xx[] = {
552 REG(CP_SCRATCH_REG0, reg_dump_scratch),
553 REG(CP_SCRATCH_REG1, reg_dump_scratch),
554 REG(CP_SCRATCH_REG2, reg_dump_scratch),
555 REG(CP_SCRATCH_REG3, reg_dump_scratch),
556 REG(CP_SCRATCH_REG4, reg_dump_scratch),
557 REG(CP_SCRATCH_REG5, reg_dump_scratch),
558 REG(CP_SCRATCH_REG6, reg_dump_scratch),
559 REG(CP_SCRATCH_REG7, reg_dump_scratch),
560 REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
561 REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
562 REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
563 REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
564 REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
565 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
566 {NULL},
567 }, reg_a4xx[] = {
568 REG(CP_SCRATCH[0].REG, reg_dump_scratch),
569 REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
570 REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
571 REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
572 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
573 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
574 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
575 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
576 REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
577 REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
578 REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
579 REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
580 REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
581 REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
582 REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
583 REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
584 REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
585 REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
586 REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
587 REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
588 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
589 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
590 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
591 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
592 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
593 {NULL},
594 }, reg_a5xx[] = {
595 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
596 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
597 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
598 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
599 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
600 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
601 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
602 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
603 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
604 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
605 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
606 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
607 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
608 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
609 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
610 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
611 REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
612 REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
613 REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
614 REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
615 REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
616 REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
617 REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
618 REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
619 REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
620 REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
621 REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
622 REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
623 REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
624 REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
625 REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
626 REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
627 REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
628 REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
629 REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
630 REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
631 REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
632 REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
633 REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
634 REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
635 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
636 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
637 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
638 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
639 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
640 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
641 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
642 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
643 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
644 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
645 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
646 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
647 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
648 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
649 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
650 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
651 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
652 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
653 // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
654 // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
655 // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
656 // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
657 // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
658 // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
659 // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
660 // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
661 // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
662 // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
663
664 // REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
665 // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
666 // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
667 // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
668 // REG(RB_2D_DST_LO, reg_gpuaddr_lo),
669 // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
670 // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
671 // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
672
673 {NULL},
674 }, reg_a6xx[] = {
675 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
676 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
677 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
678 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
679
680 REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
681 REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
682 REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
683 REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
684 REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
685 REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
686
687 REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
688 REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
689 REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
690 REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
691 REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
692 REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
693 REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
694 REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
695 REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
696 REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
697 REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
698 REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
699
700 {NULL},
701 }, reg_a7xx[] = {
702 REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
703 REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
704 REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
705 REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
706 REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
707 REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
708
709 {NULL},
710 }, *type0_reg;
711
712 static struct rnn *rnn;
713
714 static void
init_rnn(const char * gpuname)715 init_rnn(const char *gpuname)
716 {
717 rnn = rnn_new(!options->color);
718
719 rnn_load(rnn, gpuname);
720
721 if (options->querystrs) {
722 int i;
723 queryvals = calloc(options->nquery, sizeof(queryvals[0]));
724
725 for (i = 0; i < options->nquery; i++) {
726 int val = strtol(options->querystrs[i], NULL, 0);
727
728 if (val == 0)
729 val = regbase(options->querystrs[i]);
730
731 queryvals[i] = val;
732 printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
733 }
734 }
735
736 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
737 type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
738 if (!type0_reg[idx].regbase) {
739 printf("invalid register name: %s\n", type0_reg[idx].regname);
740 exit(1);
741 }
742 }
743 }
744
745 void
reset_regs(void)746 reset_regs(void)
747 {
748 clear_written();
749 clear_lastvals();
750 memset(&ibs, 0, sizeof(ibs));
751 }
752
753 void
cffdec_init(const struct cffdec_options * _options)754 cffdec_init(const struct cffdec_options *_options)
755 {
756 options = _options;
757 summary = options->summary;
758
759 /* in case we're decoding multiple files: */
760 free(queryvals);
761 reset_regs();
762 draw_count = 0;
763
764 if (!options->info)
765 return;
766
767 switch (options->info->chip) {
768 case 2:
769 type0_reg = reg_a2xx;
770 init_rnn("a2xx");
771 break;
772 case 3:
773 type0_reg = reg_a3xx;
774 init_rnn("a3xx");
775 break;
776 case 4:
777 type0_reg = reg_a4xx;
778 init_rnn("a4xx");
779 break;
780 case 5:
781 type0_reg = reg_a5xx;
782 init_rnn("a5xx");
783 break;
784 case 6:
785 type0_reg = reg_a6xx;
786 init_rnn("a6xx");
787 break;
788 case 7:
789 type0_reg = reg_a7xx;
790 init_rnn("a7xx");
791 break;
792 default:
793 errx(-1, "unsupported generation: %u", options->info->chip);
794 }
795 }
796
797 const char *
pktname(unsigned opc)798 pktname(unsigned opc)
799 {
800 return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
801 }
802
803 const char *
regname(uint32_t regbase,int color)804 regname(uint32_t regbase, int color)
805 {
806 return rnn_regname(rnn, regbase, color);
807 }
808
809 uint32_t
regbase(const char * name)810 regbase(const char *name)
811 {
812 return rnn_regbase(rnn, name);
813 }
814
815 static int
endswith(uint32_t regbase,const char * suffix)816 endswith(uint32_t regbase, const char *suffix)
817 {
818 const char *name = regname(regbase, 0);
819 const char *s = strstr(name, suffix);
820 if (!s)
821 return 0;
822 return (s - strlen(name) + strlen(suffix)) == name;
823 }
824
825 struct regacc
regacc(struct rnn * r)826 regacc(struct rnn *r)
827 {
828 if (!r)
829 r = rnn;
830
831 return (struct regacc){ .rnn = r };
832 }
833
834 /* returns true if the complete reg value has been accumulated: */
835 bool
regacc_push(struct regacc * r,uint32_t regbase,uint32_t dword)836 regacc_push(struct regacc *r, uint32_t regbase, uint32_t dword)
837 {
838 if (r->has_dword_lo) {
839 /* Work around kernel devcore dumps which accidentially miss half of a 64b reg
840 * see: https://patchwork.freedesktop.org/series/112302/
841 */
842 if (regbase != r->regbase + 1) {
843 printf("WARNING: 64b discontinuity (%x, expected %x)\n", regbase, r->regbase + 1);
844 r->has_dword_lo = false;
845 return true;
846 }
847
848 r->value |= ((uint64_t)dword) << 32;
849 r->has_dword_lo = false;
850
851 return true;
852 }
853
854 r->regbase = regbase;
855 r->value = dword;
856
857 struct rnndecaddrinfo *info = rnn_reginfo(r->rnn, regbase);
858 r->has_dword_lo = (info->width == 64);
859
860 /* Workaround for kernel devcore dump bugs: */
861 if ((info->width == 64) && endswith(regbase, "_HI")) {
862 printf("WARNING: 64b discontinuity (no _LO dword for %x)\n", regbase);
863 r->has_dword_lo = false;
864 }
865
866 rnn_reginfo_free(info);
867
868 return !r->has_dword_lo;
869 }
870
871 void
dump_register_val(struct regacc * r,int level)872 dump_register_val(struct regacc *r, int level)
873 {
874 struct rnndecaddrinfo *info = rnn_reginfo(rnn, r->regbase);
875
876 if (info && info->typeinfo) {
877 uint64_t gpuaddr = 0;
878 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, r->value);
879 printf("%s%s: %s", levels[level], info->name, decoded);
880
881 /* Try and figure out if we are looking at a gpuaddr.. this
882 * might be useful for other gen's too, but at least a5xx has
883 * the _HI/_LO suffix we can look for. Maybe a better approach
884 * would be some special annotation in the xml..
885 * for a6xx use "address" and "waddress" types
886 */
887 if (options->info->chip >= 6) {
888 if (!strcmp(info->typeinfo->name, "address") ||
889 !strcmp(info->typeinfo->name, "waddress")) {
890 gpuaddr = r->value;
891 }
892 } else if (options->info->chip >= 5) {
893 /* TODO we shouldn't rely on reg_val() since reg_set() might
894 * not have been called yet for the other half of the 64b reg.
895 * We can remove this hack once a5xx.xml is converted to reg64
896 * and address/waddess.
897 */
898 if (endswith(r->regbase, "_HI") && endswith(r->regbase - 1, "_LO")) {
899 gpuaddr = (r->value << 32) | reg_val(r->regbase - 1);
900 } else if (endswith(r->regbase, "_LO") && endswith(r->regbase + 1, "_HI")) {
901 gpuaddr = (((uint64_t)reg_val(r->regbase + 1)) << 32) | r->value;
902 }
903 }
904
905 if (gpuaddr && hostptr(gpuaddr)) {
906 printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
907 gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
908 hostlen(gpubaseaddr(gpuaddr)));
909 }
910
911 printf("\n");
912
913 free(decoded);
914 } else if (info) {
915 printf("%s%s: %08"PRIx64"\n", levels[level], info->name, r->value);
916 } else {
917 printf("%s<%04x>: %08"PRIx64"\n", levels[level], r->regbase, r->value);
918 }
919
920 rnn_reginfo_free(info);
921 }
922
923 static void
dump_register(struct regacc * r,int level)924 dump_register(struct regacc *r, int level)
925 {
926 if (!quiet(3)) {
927 dump_register_val(r, level);
928 }
929
930 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
931 if (type0_reg[idx].regbase == r->regbase) {
932 if (type0_reg[idx].is_reg64) {
933 type0_reg[idx].fxn64(type0_reg[idx].regname, r->value, level);
934 } else {
935 type0_reg[idx].fxn(type0_reg[idx].regname, (uint32_t)r->value, level);
936 }
937 break;
938 }
939 }
940 }
941
942 static bool
is_banked_reg(uint32_t regbase)943 is_banked_reg(uint32_t regbase)
944 {
945 return (0x2000 <= regbase) && (regbase < 0x2400);
946 }
947
948 static void
dump_registers(uint32_t regbase,uint32_t * dwords,uint32_t sizedwords,int level)949 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
950 int level)
951 {
952 struct regacc r = regacc(NULL);
953
954 while (sizedwords--) {
955 int last_summary = summary;
956
957 /* access to non-banked registers needs a WFI:
958 * TODO banked register range for a2xx??
959 */
960 if (needs_wfi && !is_banked_reg(regbase))
961 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
962
963 reg_set(regbase, *dwords);
964 if (regacc_push(&r, regbase, *dwords))
965 dump_register(&r, level);
966 regbase++;
967 dwords++;
968 summary = last_summary;
969 }
970 }
971
972 static void
dump_domain(uint32_t * dwords,uint32_t sizedwords,int level,const char * name)973 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
974 {
975 struct rnndomain *dom;
976 int i;
977
978 dom = rnn_finddomain(rnn->db, name);
979
980 if (!dom)
981 return;
982
983 if (script_packet)
984 script_packet(dwords, sizedwords, rnn, dom);
985
986 if (quiet(2))
987 return;
988
989 for (i = 0; i < sizedwords; i++) {
990 struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
991 char *decoded;
992 if (!(info && info->typeinfo))
993 break;
994 uint64_t value = dwords[i];
995 if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
996 value |= (uint64_t)dwords[i + 1] << 32;
997 i++; /* skip the next dword since we're printing it now */
998 }
999 decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
1000 /* Unlike the register printing path, we don't print the name
1001 * of the register, so if it doesn't contain other named
1002 * things (i.e. it isn't a bitset) then print the register
1003 * name as if it's a bitset with a single entry. This avoids
1004 * having to create a dummy register with a single entry to
1005 * get a name in the decoding.
1006 */
1007 if (info->typeinfo->type == RNN_TTYPE_BITSET ||
1008 info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
1009 printf("%s%s\n", levels[level], decoded);
1010 } else {
1011 printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
1012 info->name, rnn->vc->colors->reset, decoded);
1013 }
1014 free(decoded);
1015 free(info->name);
1016 free(info);
1017 }
1018 }
1019
1020 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
1021 static unsigned mode;
1022 static const char *render_mode;
1023 static const char *thread;
1024 static enum {
1025 MODE_BINNING = 0x1,
1026 MODE_GMEM = 0x2,
1027 MODE_BYPASS = 0x4,
1028 MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
1029 } enable_mask = MODE_ALL;
1030 static bool skip_ib2_enable_global;
1031 static bool skip_ib2_enable_local;
1032
1033 static void
print_mode(int level)1034 print_mode(int level)
1035 {
1036 if ((options->info->chip >= 5) && !quiet(2)) {
1037 printf("%smode: %s", levels[level], render_mode);
1038 if (thread)
1039 printf(":%s", thread);
1040 printf("\n");
1041 printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
1042 skip_ib2_enable_local);
1043 }
1044 }
1045
1046 static bool
skip_query(void)1047 skip_query(void)
1048 {
1049 switch (options->query_mode) {
1050 case QUERY_ALL:
1051 /* never skip: */
1052 return false;
1053 case QUERY_WRITTEN:
1054 for (int i = 0; i < options->nquery; i++) {
1055 uint32_t regbase = queryvals[i];
1056 if (!reg_written(regbase)) {
1057 continue;
1058 }
1059 if (reg_rewritten(regbase)) {
1060 return false;
1061 }
1062 }
1063 return true;
1064 case QUERY_DELTA:
1065 for (int i = 0; i < options->nquery; i++) {
1066 uint32_t regbase = queryvals[i];
1067 if (!reg_written(regbase)) {
1068 continue;
1069 }
1070 uint32_t lastval = reg_val(regbase);
1071 if (lastval != lastvals[regbase]) {
1072 return false;
1073 }
1074 }
1075 return true;
1076 }
1077 return true;
1078 }
1079
1080 static void
__do_query(const char * primtype,uint32_t num_indices)1081 __do_query(const char *primtype, uint32_t num_indices)
1082 {
1083 int n = 0;
1084
1085 if ((5 <= options->info->chip) && (options->info->chip < 7)) {
1086 uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1087 uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1088
1089 bin_x1 = scissor_tl & 0xffff;
1090 bin_y1 = scissor_tl >> 16;
1091 bin_x2 = scissor_br & 0xffff;
1092 bin_y2 = scissor_br >> 16;
1093 }
1094
1095 for (int i = 0; i < options->nquery; i++) {
1096 uint32_t regbase = queryvals[i];
1097 if (!reg_written(regbase))
1098 continue;
1099
1100 struct regacc r = regacc(NULL);
1101
1102 /* 64b regs require two successive 32b dwords: */
1103 for (int d = 0; d < 2; d++)
1104 if (regacc_push(&r, regbase + d, reg_val(regbase + d)))
1105 break;
1106
1107 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1108 bin_y1, bin_x2, bin_y2, num_indices);
1109 if (options->info->chip >= 5)
1110 printf("%s:", render_mode);
1111 if (thread)
1112 printf("%s:", thread);
1113 printf("\t%08"PRIx64, r.value);
1114 if (r.value != lastvals[regbase]) {
1115 printf("!");
1116 } else {
1117 printf(" ");
1118 }
1119 if (reg_rewritten(regbase)) {
1120 printf("+");
1121 } else {
1122 printf(" ");
1123 }
1124 dump_register_val(&r, 0);
1125 n++;
1126 }
1127
1128 if (n > 1)
1129 printf("\n");
1130 }
1131
1132 static void
do_query_compare(const char * primtype,uint32_t num_indices)1133 do_query_compare(const char *primtype, uint32_t num_indices)
1134 {
1135 unsigned saved_enable_mask = enable_mask;
1136 const char *saved_render_mode = render_mode;
1137
1138 /* in 'query-compare' mode, we want to see if the register is writtten
1139 * or changed in any mode:
1140 *
1141 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1142 * is written with different values in binning vs sysmem/gmem mode, as
1143 * we don't track previous values per-mode, but I think we can live with
1144 * that)
1145 */
1146 enable_mask = MODE_ALL;
1147
1148 clear_rewritten();
1149 load_all_groups(0);
1150
1151 if (!skip_query()) {
1152 /* dump binning pass values: */
1153 enable_mask = MODE_BINNING;
1154 render_mode = "BINNING";
1155 clear_rewritten();
1156 load_all_groups(0);
1157 __do_query(primtype, num_indices);
1158
1159 /* dump draw pass values: */
1160 enable_mask = MODE_GMEM | MODE_BYPASS;
1161 render_mode = "DRAW";
1162 clear_rewritten();
1163 load_all_groups(0);
1164 __do_query(primtype, num_indices);
1165
1166 printf("\n");
1167 }
1168
1169 enable_mask = saved_enable_mask;
1170 render_mode = saved_render_mode;
1171
1172 disable_all_groups();
1173 }
1174
1175 /* well, actually query and script..
1176 * NOTE: call this before dump_register_summary()
1177 */
1178 static void
do_query(const char * primtype,uint32_t num_indices)1179 do_query(const char *primtype, uint32_t num_indices)
1180 {
1181 if (script_draw)
1182 script_draw(primtype, num_indices);
1183
1184 if (options->query_compare) {
1185 do_query_compare(primtype, num_indices);
1186 return;
1187 }
1188
1189 if (skip_query())
1190 return;
1191
1192 __do_query(primtype, num_indices);
1193 }
1194
1195 static void
cp_im_loadi(uint32_t * dwords,uint32_t sizedwords,int level)1196 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1197 {
1198 uint32_t start = dwords[1] >> 16;
1199 uint32_t size = dwords[1] & 0xffff;
1200 const char *type = NULL, *ext = NULL;
1201 gl_shader_stage disasm_type;
1202
1203 switch (dwords[0]) {
1204 case 0:
1205 type = "vertex";
1206 ext = "vo";
1207 disasm_type = MESA_SHADER_VERTEX;
1208 break;
1209 case 1:
1210 type = "fragment";
1211 ext = "fo";
1212 disasm_type = MESA_SHADER_FRAGMENT;
1213 break;
1214 default:
1215 type = "<unknown>";
1216 disasm_type = 0;
1217 break;
1218 }
1219
1220 printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1221 size);
1222 disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1223
1224 /* dump raw shader: */
1225 if (ext)
1226 dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1227 }
1228
1229 static void
cp_wide_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)1230 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1231 {
1232 uint32_t reg = dwords[0] & 0xffff;
1233 struct regacc r = regacc(NULL);
1234 for (int i = 1; i < sizedwords; i++) {
1235 if (regacc_push(&r, reg, dwords[i]))
1236 dump_register(&r, level + 1);
1237 reg_set(reg, dwords[i]);
1238 reg++;
1239 }
1240 }
1241
1242 enum state_t {
1243 TEX_SAMP = 1,
1244 TEX_CONST,
1245 TEX_MIPADDR, /* a3xx only */
1246 SHADER_PROG,
1247 SHADER_CONST,
1248
1249 // image/ssbo state:
1250 SSBO_0,
1251 SSBO_1,
1252 SSBO_2,
1253
1254 UBO,
1255
1256 // unknown things, just to hexdumps:
1257 UNKNOWN_DWORDS,
1258 UNKNOWN_2DWORDS,
1259 UNKNOWN_4DWORDS,
1260 };
1261
1262 enum adreno_state_block {
1263 SB_VERT_TEX = 0,
1264 SB_VERT_MIPADDR = 1,
1265 SB_FRAG_TEX = 2,
1266 SB_FRAG_MIPADDR = 3,
1267 SB_VERT_SHADER = 4,
1268 SB_GEOM_SHADER = 5,
1269 SB_FRAG_SHADER = 6,
1270 SB_COMPUTE_SHADER = 7,
1271 };
1272
1273 /* TODO there is probably a clever way to let rnndec parse things so
1274 * we don't have to care about packet format differences across gens
1275 */
1276
1277 static void
a3xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1278 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1279 enum state_t *state, enum state_src_t *src)
1280 {
1281 unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1282 unsigned state_type = dwords[1] & 0x3;
1283 static const struct {
1284 gl_shader_stage stage;
1285 enum state_t state;
1286 } lookup[0xf][0x3] = {
1287 [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1288 [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1289 [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1290 [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1291 [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1292 [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1293 [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1294 [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1295 };
1296
1297 *stage = lookup[state_block_id][state_type].stage;
1298 *state = lookup[state_block_id][state_type].state;
1299 unsigned state_src = (dwords[0] >> 16) & 0x7;
1300 if (state_src == 0 /* SS_DIRECT */)
1301 *src = STATE_SRC_DIRECT;
1302 else
1303 *src = STATE_SRC_INDIRECT;
1304 }
1305
1306 static enum state_src_t
_get_state_src(unsigned dword0)1307 _get_state_src(unsigned dword0)
1308 {
1309 switch ((dword0 >> 16) & 0x3) {
1310 case 0: /* SS4_DIRECT / SS6_DIRECT */
1311 return STATE_SRC_DIRECT;
1312 case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1313 return STATE_SRC_INDIRECT;
1314 case 1: /* SS6_BINDLESS */
1315 return STATE_SRC_BINDLESS;
1316 default:
1317 return STATE_SRC_DIRECT;
1318 }
1319 }
1320
1321 static void
_get_state_type(unsigned state_block_id,unsigned state_type,gl_shader_stage * stage,enum state_t * state)1322 _get_state_type(unsigned state_block_id, unsigned state_type,
1323 gl_shader_stage *stage, enum state_t *state)
1324 {
1325 static const struct {
1326 gl_shader_stage stage;
1327 enum state_t state;
1328 } lookup[0x10][0x4] = {
1329 // SB4_VS_TEX:
1330 [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1331 [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1332 [0x0][2] = {MESA_SHADER_VERTEX, UBO},
1333 // SB4_HS_TEX:
1334 [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1335 [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1336 [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1337 // SB4_DS_TEX:
1338 [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1339 [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1340 [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1341 // SB4_GS_TEX:
1342 [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1343 [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1344 [0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1345 // SB4_FS_TEX:
1346 [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1347 [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1348 [0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1349 // SB4_CS_TEX:
1350 [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1351 [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1352 [0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1353 // SB4_VS_SHADER:
1354 [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1355 [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1356 [0x8][2] = {MESA_SHADER_VERTEX, UBO},
1357 // SB4_HS_SHADER
1358 [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1359 [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1360 [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1361 // SB4_DS_SHADER
1362 [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1363 [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1364 [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1365 // SB4_GS_SHADER
1366 [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1367 [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1368 [0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1369 // SB4_FS_SHADER:
1370 [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1371 [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1372 [0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1373 // SB4_CS_SHADER:
1374 [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1375 [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1376 [0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1377 [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1378 // SB4_SSBO (shared across all stages)
1379 [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1380 [0xe][1] = {0, SSBO_1},
1381 [0xe][2] = {0, SSBO_2},
1382 // SB4_CS_SSBO
1383 [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1384 [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1385 [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1386 // unknown things
1387 /* This looks like combined UBO state for 3d stages (a5xx and
1388 * before?? I think a6xx has UBO state per shader stage:
1389 */
1390 [0x6][2] = {0, UBO},
1391 [0x7][1] = {0, UNKNOWN_2DWORDS},
1392 };
1393
1394 *stage = lookup[state_block_id][state_type].stage;
1395 *state = lookup[state_block_id][state_type].state;
1396 }
1397
1398 static void
a4xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1399 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1400 enum state_t *state, enum state_src_t *src)
1401 {
1402 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1403 unsigned state_type = dwords[1] & 0x3;
1404 _get_state_type(state_block_id, state_type, stage, state);
1405 *src = _get_state_src(dwords[0]);
1406 }
1407
1408 static void
a6xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1409 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1410 enum state_t *state, enum state_src_t *src)
1411 {
1412 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1413 unsigned state_type = (dwords[0] >> 14) & 0x3;
1414 _get_state_type(state_block_id, state_type, stage, state);
1415 *src = _get_state_src(dwords[0]);
1416 }
1417
1418 static void
dump_tex_samp(uint32_t * texsamp,enum state_src_t src,int num_unit,int level)1419 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1420 {
1421 for (int i = 0; i < num_unit; i++) {
1422 /* work-around to reduce noise for opencl blob which always
1423 * writes the max # regardless of # of textures used
1424 */
1425 if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1426 break;
1427
1428 if (options->info->chip == 3) {
1429 dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1430 dump_hex(texsamp, 2, level + 1);
1431 texsamp += 2;
1432 } else if (options->info->chip == 4) {
1433 dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1434 dump_hex(texsamp, 2, level + 1);
1435 texsamp += 2;
1436 } else if (options->info->chip == 5) {
1437 dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1438 dump_hex(texsamp, 4, level + 1);
1439 texsamp += 4;
1440 } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1441 dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1442 dump_hex(texsamp, 4, level + 1);
1443 texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1444 }
1445 }
1446 }
1447
1448 static void
dump_tex_const(uint32_t * texconst,int num_unit,int level)1449 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1450 {
1451 for (int i = 0; i < num_unit; i++) {
1452 /* work-around to reduce noise for opencl blob which always
1453 * writes the max # regardless of # of textures used
1454 */
1455 if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1456 (texconst[2] == 0) && (texconst[3] == 0))
1457 break;
1458
1459 if (options->info->chip == 3) {
1460 dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1461 dump_hex(texconst, 4, level + 1);
1462 texconst += 4;
1463 } else if (options->info->chip == 4) {
1464 dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1465 if (options->dump_textures) {
1466 uint32_t addr = texconst[4] & ~0x1f;
1467 dump_gpuaddr(addr, level - 2);
1468 }
1469 dump_hex(texconst, 8, level + 1);
1470 texconst += 8;
1471 } else if (options->info->chip == 5) {
1472 dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1473 if (options->dump_textures) {
1474 uint64_t addr =
1475 (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1476 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1477 }
1478 dump_hex(texconst, 12, level + 1);
1479 texconst += 12;
1480 } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1481 dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1482 if (options->dump_textures) {
1483 uint64_t addr =
1484 (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1485 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1486 }
1487 dump_hex(texconst, 16, level + 1);
1488 texconst += 16;
1489 }
1490 }
1491 }
1492
1493 static void
dump_bindless_descriptors(bool is_compute,int level)1494 dump_bindless_descriptors(bool is_compute, int level)
1495 {
1496 if (!options->dump_bindless)
1497 return;
1498
1499 printl(2, "%sdraw[%i] bindless descriptors\n", levels[level], draw_count);
1500
1501 for (unsigned i = 0; i < 128; i++) {
1502 static char reg_name[64];
1503 if (is_compute) {
1504 sprintf(reg_name, "SP_CS_BINDLESS_BASE[%u].DESCRIPTOR", i);
1505 } else {
1506 sprintf(reg_name, "SP_BINDLESS_BASE[%u].DESCRIPTOR", i);
1507 }
1508 const unsigned base_reg = regbase(reg_name);
1509 if (!base_reg)
1510 break;
1511
1512 printl(2, "%sset[%u]:\n", levels[level + 1], i);
1513
1514 uint64_t ext_src_addr;
1515 if (is_64b()) {
1516 const unsigned reg = base_reg + i * 2;
1517 if (!reg_written(reg))
1518 continue;
1519
1520 ext_src_addr = reg_val(reg) & 0xfffffffc;
1521 ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1522 } else {
1523 const unsigned reg = base_reg + i;
1524 if (!reg_written(reg))
1525 continue;
1526
1527 ext_src_addr = reg_val(reg) & 0xfffffffc;
1528 }
1529
1530 uint32_t *contents = NULL;
1531 if (ext_src_addr)
1532 contents = hostptr(ext_src_addr);
1533
1534 if (!contents)
1535 continue;
1536
1537 unsigned length = hostlen(ext_src_addr);
1538 unsigned desc_count = length / (16 * sizeof(uint32_t));
1539 for (unsigned desc_idx = 0; desc_idx < desc_count; desc_idx++) {
1540 printl(2, "%sUBO[%u]:\n", levels[level + 1], desc_idx);
1541 dump_domain(contents, 2, level + 2, "A6XX_UBO");
1542
1543 printl(2, "%sSTORAGE/TEXEL/IMAGE[%u]:\n", levels[level + 1], desc_idx);
1544 dump_tex_const(contents, 1, level);
1545
1546 printl(2, "%sSAMPLER[%u]:\n", levels[level + 1], desc_idx);
1547 dump_tex_samp(contents, STATE_SRC_BINDLESS, 1, level);
1548
1549 contents += 16;
1550 }
1551 }
1552 }
1553
1554 static void
cp_load_state(uint32_t * dwords,uint32_t sizedwords,int level)1555 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1556 {
1557 gl_shader_stage stage;
1558 enum state_t state;
1559 enum state_src_t src;
1560 uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1561 uint64_t ext_src_addr;
1562 void *contents;
1563 int i;
1564
1565 if (quiet(2) && !options->script)
1566 return;
1567
1568 if (options->info->chip >= 6)
1569 a6xx_get_state_type(dwords, &stage, &state, &src);
1570 else if (options->info->chip >= 4)
1571 a4xx_get_state_type(dwords, &stage, &state, &src);
1572 else
1573 a3xx_get_state_type(dwords, &stage, &state, &src);
1574
1575 switch (src) {
1576 case STATE_SRC_DIRECT:
1577 ext_src_addr = 0;
1578 break;
1579 case STATE_SRC_INDIRECT:
1580 if (is_64b()) {
1581 ext_src_addr = dwords[1] & 0xfffffffc;
1582 ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1583 } else {
1584 ext_src_addr = dwords[1] & 0xfffffffc;
1585 }
1586
1587 break;
1588 case STATE_SRC_BINDLESS: {
1589 const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1590 ? regbase("HLSQ_CS_BINDLESS_BASE[0].DESCRIPTOR")
1591 : regbase("HLSQ_BINDLESS_BASE[0].DESCRIPTOR");
1592
1593 if (is_64b()) {
1594 const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1595 ext_src_addr = reg_val(reg) & 0xfffffffc;
1596 ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1597 } else {
1598 const unsigned reg = base_reg + (dwords[1] >> 28);
1599 ext_src_addr = reg_val(reg) & 0xfffffffc;
1600 }
1601
1602 ext_src_addr += 4 * (dwords[1] & 0xffffff);
1603 break;
1604 }
1605 }
1606
1607 if (ext_src_addr)
1608 contents = hostptr(ext_src_addr);
1609 else
1610 contents = is_64b() ? dwords + 3 : dwords + 2;
1611
1612 if (!contents)
1613 return;
1614
1615 switch (state) {
1616 case SHADER_PROG: {
1617 const char *ext = NULL;
1618
1619 if (quiet(2))
1620 return;
1621
1622 if (options->info->chip >= 4)
1623 num_unit *= 16;
1624 else if (options->info->chip >= 3)
1625 num_unit *= 4;
1626
1627 /* shaders:
1628 *
1629 * note: num_unit seems to be # of instruction groups, where
1630 * an instruction group has 4 64bit instructions.
1631 */
1632 if (stage == MESA_SHADER_VERTEX) {
1633 ext = "vo3";
1634 } else if (stage == MESA_SHADER_GEOMETRY) {
1635 ext = "go3";
1636 } else if (stage == MESA_SHADER_COMPUTE) {
1637 ext = "co3";
1638 } else if (stage == MESA_SHADER_FRAGMENT) {
1639 ext = "fo3";
1640 }
1641
1642 if (contents)
1643 try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1644 options->info->chip * 100);
1645
1646 /* dump raw shader: */
1647 if (ext)
1648 dump_shader(ext, contents, num_unit * 2 * 4);
1649
1650 break;
1651 }
1652 case SHADER_CONST: {
1653 if (quiet(2))
1654 return;
1655
1656 /* uniforms/consts:
1657 *
1658 * note: num_unit seems to be # of pairs of dwords??
1659 */
1660
1661 if (options->info->chip >= 4)
1662 num_unit *= 2;
1663
1664 dump_float(contents, num_unit * 2, level + 1);
1665 dump_hex(contents, num_unit * 2, level + 1);
1666
1667 break;
1668 }
1669 case TEX_MIPADDR: {
1670 uint32_t *addrs = contents;
1671
1672 if (quiet(2))
1673 return;
1674
1675 /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1676 for (i = 0; i < num_unit; i++) {
1677 void *ptr = hostptr(addrs[i]);
1678 printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1679 if (options->dump_textures) {
1680 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1681 dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1682 }
1683 }
1684 break;
1685 }
1686 case TEX_SAMP: {
1687 dump_tex_samp(contents, src, num_unit, level);
1688 break;
1689 }
1690 case TEX_CONST: {
1691 dump_tex_const(contents, num_unit, level);
1692 break;
1693 }
1694 case SSBO_0: {
1695 uint32_t *ssboconst = (uint32_t *)contents;
1696
1697 for (i = 0; i < num_unit; i++) {
1698 int sz = 4;
1699 if (options->info->chip == 4) {
1700 dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1701 } else if (options->info->chip == 5) {
1702 dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1703 } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1704 sz = 16;
1705 dump_domain(ssboconst, 16, level + 2, "A6XX_TEX_CONST");
1706 }
1707 dump_hex(ssboconst, sz, level + 1);
1708 ssboconst += sz;
1709 }
1710 break;
1711 }
1712 case SSBO_1: {
1713 uint32_t *ssboconst = (uint32_t *)contents;
1714
1715 for (i = 0; i < num_unit; i++) {
1716 if (options->info->chip == 4)
1717 dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1718 else if (options->info->chip == 5)
1719 dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1720 dump_hex(ssboconst, 2, level + 1);
1721 ssboconst += 2;
1722 }
1723 break;
1724 }
1725 case SSBO_2: {
1726 uint32_t *ssboconst = (uint32_t *)contents;
1727
1728 for (i = 0; i < num_unit; i++) {
1729 /* TODO a4xx and a5xx might be same: */
1730 if (options->info->chip == 5) {
1731 dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1732 dump_hex(ssboconst, 2, level + 1);
1733 }
1734 if (options->dump_textures) {
1735 uint64_t addr =
1736 (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1737 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1738 }
1739 ssboconst += 2;
1740 }
1741 break;
1742 }
1743 case UBO: {
1744 uint32_t *uboconst = (uint32_t *)contents;
1745
1746 for (i = 0; i < num_unit; i++) {
1747 // TODO probably similar on a4xx..
1748 if (options->info->chip == 5)
1749 dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1750 else if (options->info->chip >= 6)
1751 dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1752 dump_hex(uboconst, 2, level + 1);
1753 if (options->dump_textures) {
1754 uint64_t addr =
1755 (((uint64_t)uboconst[1] & 0x1ffff) << 32) | uboconst[0];
1756 /* Size encoded in descriptor is in units of vec4: */
1757 unsigned sizedwords = 4 * (uboconst[1] >> 17);
1758 dump_gpuaddr_size(addr, level -2, sizedwords, 3);
1759 }
1760 uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1761 }
1762 break;
1763 }
1764 case UNKNOWN_DWORDS: {
1765 if (quiet(2))
1766 return;
1767 dump_hex(contents, num_unit, level + 1);
1768 break;
1769 }
1770 case UNKNOWN_2DWORDS: {
1771 if (quiet(2))
1772 return;
1773 dump_hex(contents, num_unit * 2, level + 1);
1774 break;
1775 }
1776 case UNKNOWN_4DWORDS: {
1777 if (quiet(2))
1778 return;
1779 dump_hex(contents, num_unit * 4, level + 1);
1780 break;
1781 }
1782 default:
1783 if (quiet(2))
1784 return;
1785 /* hmm.. */
1786 dump_hex(contents, num_unit, level + 1);
1787 break;
1788 }
1789 }
1790
1791 static void
cp_set_bin(uint32_t * dwords,uint32_t sizedwords,int level)1792 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1793 {
1794 bin_x1 = dwords[1] & 0xffff;
1795 bin_y1 = dwords[1] >> 16;
1796 bin_x2 = dwords[2] & 0xffff;
1797 bin_y2 = dwords[2] >> 16;
1798 }
1799
1800 static void
dump_a2xx_tex_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1801 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1802 int level)
1803 {
1804 uint32_t w, h, p;
1805 uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1806 uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1807 static const char *filter[] = {
1808 "point",
1809 "bilinear",
1810 "bicubic",
1811 };
1812 static const char *clamp[] = {
1813 "wrap",
1814 "mirror",
1815 "clamp-last-texel",
1816 };
1817 static const char swiznames[] = "xyzw01??";
1818
1819 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1820
1821 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1822 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1823 */
1824 p = (dwords[0] >> 22) << 5;
1825 clamp_x = (dwords[0] >> 10) & 0x3;
1826 clamp_y = (dwords[0] >> 13) & 0x3;
1827 clamp_z = (dwords[0] >> 16) & 0x3;
1828
1829 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1830 * NearestClamp=1:OGL Mode
1831 */
1832 parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1833
1834 /* Width, Height, EndianSwap=0:None */
1835 w = (dwords[2] & 0x1fff) + 1;
1836 h = ((dwords[2] >> 13) & 0x1fff) + 1;
1837
1838 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1839 * Mip=2:BaseMap
1840 */
1841 mag = (dwords[3] >> 19) & 0x3;
1842 min = (dwords[3] >> 21) & 0x3;
1843 swiz = (dwords[3] >> 1) & 0xfff;
1844
1845 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1846 * Dim3d=0
1847 */
1848 // XXX
1849
1850 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1851 * Dim=1:2d, MipPacking=0
1852 */
1853 parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1854
1855 printf("%sset texture const %04x\n", levels[level], val);
1856 printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1857 clamp[clamp_y], clamp[clamp_z]);
1858 printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1859 filter[mag]);
1860 printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1861 swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1862 swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1863 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1864 levels[level + 1], gpuaddr, flags, w, h, p,
1865 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1866 printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1867 mip_flags);
1868 }
1869
1870 static void
dump_a2xx_shader_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1871 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1872 int level)
1873 {
1874 int i;
1875 printf("%sset shader const %04x\n", levels[level], val);
1876 for (i = 0; i < sizedwords;) {
1877 uint32_t gpuaddr, flags;
1878 parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1879 void *addr = hostptr(gpuaddr);
1880 if (addr) {
1881 const char *fmt =
1882 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1883 uint32_t size = dwords[i++];
1884 printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1885 size, fmt);
1886 // TODO maybe dump these as bytes instead of dwords?
1887 size = (size + 3) / 4; // for now convert to dwords
1888 dump_hex(addr, MIN2(size, 64), level + 1);
1889 if (size > MIN2(size, 64))
1890 printf("%s\t\t...\n", levels[level + 1]);
1891 dump_float(addr, MIN2(size, 64), level + 1);
1892 if (size > MIN2(size, 64))
1893 printf("%s\t\t...\n", levels[level + 1]);
1894 }
1895 }
1896 }
1897
1898 static void
cp_set_const(uint32_t * dwords,uint32_t sizedwords,int level)1899 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1900 {
1901 uint32_t val = dwords[0] & 0xffff;
1902 switch ((dwords[0] >> 16) & 0xf) {
1903 case 0x0:
1904 dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1905 break;
1906 case 0x1:
1907 /* need to figure out how const space is partitioned between
1908 * attributes, textures, etc..
1909 */
1910 if (val < 0x78) {
1911 dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1912 } else {
1913 dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1914 }
1915 break;
1916 case 0x2:
1917 printf("%sset bool const %04x\n", levels[level], val);
1918 break;
1919 case 0x3:
1920 printf("%sset loop const %04x\n", levels[level], val);
1921 break;
1922 case 0x4:
1923 val += 0x2000;
1924 if (dwords[0] & 0x80000000) {
1925 uint32_t srcreg = dwords[1];
1926 uint32_t dstval = dwords[2];
1927
1928 /* TODO: not sure what happens w/ payload != 2.. */
1929 assert(sizedwords == 3);
1930 assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1931
1932 /* note: rnn_regname uses a static buf so we can't do
1933 * two regname() calls for one printf..
1934 */
1935 printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1936 printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1937
1938 dstval += type0_reg_vals[srcreg];
1939
1940 dump_registers(val, &dstval, 1, level + 1);
1941 } else {
1942 dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1943 }
1944 break;
1945 }
1946 }
1947
1948 static void dump_register_summary(int level);
1949
1950 static void
cp_event_write(uint32_t * dwords,uint32_t sizedwords,int level)1951 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1952 {
1953 const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0] & 0xff);
1954 printl(2, "%sevent %s\n", levels[level], name);
1955
1956 if (name && (options->info->chip > 5)) {
1957 char eventname[64];
1958 snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1959 if (!strcmp(name, "BLIT") || !strcmp(name, "LRZ_CLEAR")) {
1960 do_query(eventname, 0);
1961 print_mode(level);
1962 dump_register_summary(level);
1963 }
1964 }
1965 }
1966
1967 static void
dump_register_summary(int level)1968 dump_register_summary(int level)
1969 {
1970 uint32_t i;
1971 bool saved_summary = summary;
1972 summary = false;
1973
1974 in_summary = true;
1975
1976 struct regacc r = regacc(NULL);
1977
1978 /* dump current state of registers: */
1979 printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1980
1981 bool changed = false;
1982 bool written = false;
1983
1984 for (i = 0; i < regcnt(); i++) {
1985 uint32_t regbase = i;
1986 uint32_t lastval = reg_val(regbase);
1987 /* skip registers that haven't been updated since last draw/blit: */
1988 if (!(options->allregs || reg_rewritten(regbase)))
1989 continue;
1990 if (!reg_written(regbase))
1991 continue;
1992 if (lastval != lastvals[regbase]) {
1993 changed |= true;
1994 lastvals[regbase] = lastval;
1995 }
1996 if (reg_rewritten(regbase)) {
1997 written |= true;
1998 }
1999 if (!quiet(2)) {
2000 if (regacc_push(&r, regbase, lastval)) {
2001 if (changed) {
2002 printl(2, "!");
2003 } else {
2004 printl(2, " ");
2005 }
2006 if (written) {
2007 printl(2, "+");
2008 } else {
2009 printl(2, " ");
2010 }
2011 printl(2, "\t%08"PRIx64, r.value);
2012 dump_register(&r, level);
2013
2014 changed = written = false;
2015 }
2016 }
2017 }
2018
2019 clear_rewritten();
2020
2021 in_summary = false;
2022
2023 draw_count++;
2024 summary = saved_summary;
2025 }
2026
2027 static uint32_t
draw_indx_common(uint32_t * dwords,int level)2028 draw_indx_common(uint32_t *dwords, int level)
2029 {
2030 uint32_t prim_type = dwords[1] & 0x1f;
2031 uint32_t source_select = (dwords[1] >> 6) & 0x3;
2032 uint32_t num_indices = dwords[2];
2033 const char *primtype;
2034
2035 primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
2036
2037 do_query(primtype, num_indices);
2038
2039 printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
2040 printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype, prim_type);
2041 printl(2, "%ssource_select: %s (%d)\n", levels[level],
2042 rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
2043 printl(2, "%snum_indices: %d\n", levels[level], num_indices);
2044
2045 vertices += num_indices;
2046
2047 draws[ib]++;
2048
2049 return num_indices;
2050 }
2051
2052 enum pc_di_index_size {
2053 INDEX_SIZE_IGN = 0,
2054 INDEX_SIZE_16_BIT = 0,
2055 INDEX_SIZE_32_BIT = 1,
2056 INDEX_SIZE_8_BIT = 2,
2057 INDEX_SIZE_INVALID = 0,
2058 };
2059
2060 static void
cp_draw_indx(uint32_t * dwords,uint32_t sizedwords,int level)2061 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
2062 {
2063 uint32_t num_indices = draw_indx_common(dwords, level);
2064
2065 assert(!is_64b());
2066
2067 /* if we have an index buffer, dump that: */
2068 if (sizedwords == 5) {
2069 void *ptr = hostptr(dwords[3]);
2070 printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
2071 printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
2072 if (ptr) {
2073 enum pc_di_index_size size =
2074 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2075 if (!quiet(2)) {
2076 int i;
2077 printf("%sidxs: ", levels[level]);
2078 if (size == INDEX_SIZE_8_BIT) {
2079 uint8_t *idx = ptr;
2080 for (i = 0; i < dwords[4]; i++)
2081 printf(" %u", idx[i]);
2082 } else if (size == INDEX_SIZE_16_BIT) {
2083 uint16_t *idx = ptr;
2084 for (i = 0; i < dwords[4] / 2; i++)
2085 printf(" %u", idx[i]);
2086 } else if (size == INDEX_SIZE_32_BIT) {
2087 uint32_t *idx = ptr;
2088 for (i = 0; i < dwords[4] / 4; i++)
2089 printf(" %u", idx[i]);
2090 }
2091 printf("\n");
2092 dump_hex(ptr, dwords[4] / 4, level + 1);
2093 }
2094 }
2095 }
2096
2097 /* don't bother dumping registers for the dummy draw_indx's.. */
2098 if (num_indices > 0) {
2099 dump_bindless_descriptors(false, level);
2100 dump_register_summary(level);
2101 }
2102
2103 needs_wfi = true;
2104 }
2105
2106 static void
cp_draw_indx_2(uint32_t * dwords,uint32_t sizedwords,int level)2107 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
2108 {
2109 uint32_t num_indices = draw_indx_common(dwords, level);
2110 enum pc_di_index_size size =
2111 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2112 void *ptr = &dwords[3];
2113 int sz = 0;
2114
2115 assert(!is_64b());
2116
2117 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
2118 if (!quiet(2)) {
2119 int i;
2120 printf("%sidxs: ", levels[level]);
2121 if (size == INDEX_SIZE_8_BIT) {
2122 uint8_t *idx = ptr;
2123 for (i = 0; i < num_indices; i++)
2124 printf(" %u", idx[i]);
2125 sz = num_indices;
2126 } else if (size == INDEX_SIZE_16_BIT) {
2127 uint16_t *idx = ptr;
2128 for (i = 0; i < num_indices; i++)
2129 printf(" %u", idx[i]);
2130 sz = num_indices * 2;
2131 } else if (size == INDEX_SIZE_32_BIT) {
2132 uint32_t *idx = ptr;
2133 for (i = 0; i < num_indices; i++)
2134 printf(" %u", idx[i]);
2135 sz = num_indices * 4;
2136 }
2137 printf("\n");
2138 dump_hex(ptr, sz / 4, level + 1);
2139 }
2140
2141 /* don't bother dumping registers for the dummy draw_indx's.. */
2142 if (num_indices > 0) {
2143 dump_bindless_descriptors(false, level);
2144 dump_register_summary(level);
2145 }
2146 }
2147
2148 static void
cp_draw_indx_offset(uint32_t * dwords,uint32_t sizedwords,int level)2149 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
2150 {
2151 uint32_t num_indices = dwords[2];
2152 uint32_t prim_type = dwords[0] & 0x1f;
2153
2154 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
2155 print_mode(level);
2156
2157 /* don't bother dumping registers for the dummy draw_indx's.. */
2158 if (num_indices > 0) {
2159 dump_bindless_descriptors(false, level);
2160 dump_register_summary(level);
2161 }
2162 }
2163
2164 static void
cp_draw_indx_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2165 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2166 {
2167 uint32_t prim_type = dwords[0] & 0x1f;
2168 uint64_t addr;
2169
2170 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2171 print_mode(level);
2172
2173 if (is_64b())
2174 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2175 else
2176 addr = dwords[1];
2177 dump_gpuaddr_size(addr, level, 0x10, 2);
2178
2179 if (is_64b())
2180 addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2181 else
2182 addr = dwords[3];
2183 dump_gpuaddr_size(addr, level, 0x10, 2);
2184
2185 dump_bindless_descriptors(false, level);
2186 dump_register_summary(level);
2187 }
2188
2189 static void
cp_draw_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2190 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2191 {
2192 uint32_t prim_type = dwords[0] & 0x1f;
2193 uint64_t addr;
2194
2195 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2196 print_mode(level);
2197
2198 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2199 dump_gpuaddr_size(addr, level, 0x10, 2);
2200
2201 dump_bindless_descriptors(false, level);
2202 dump_register_summary(level);
2203 }
2204
2205 static void
cp_draw_indirect_multi(uint32_t * dwords,uint32_t sizedwords,int level)2206 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2207 {
2208 uint32_t prim_type = dwords[0] & 0x1f;
2209 uint32_t count = dwords[2];
2210
2211 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2212 print_mode(level);
2213
2214 struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2215 uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2216 uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2217 uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2218
2219 if (count_dword) {
2220 uint64_t count_addr =
2221 ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2222 uint32_t *buf = hostptr(count_addr);
2223
2224 /* Don't print more draws than this if we don't know the indirect
2225 * count. It's possible the user will give ~0 or some other large
2226 * value, expecting the GPU to fill in the draw count, and we don't
2227 * want to print a gazillion draws in that case:
2228 */
2229 const uint32_t max_draw_count = 0x100;
2230
2231 /* Assume the indirect count is garbage if it's larger than this
2232 * (quite large) value or 0. Hopefully this catches most cases.
2233 */
2234 const uint32_t max_indirect_draw_count = 0x10000;
2235
2236 if (buf) {
2237 printf("%sindirect count: %u\n", levels[level], *buf);
2238 if (*buf == 0 || *buf > max_indirect_draw_count) {
2239 /* garbage value */
2240 count = MIN2(count, max_draw_count);
2241 } else {
2242 /* not garbage */
2243 count = MIN2(count, *buf);
2244 }
2245 } else {
2246 count = MIN2(count, max_draw_count);
2247 }
2248 }
2249
2250 if (addr_dword && stride_dword) {
2251 uint64_t addr =
2252 ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2253 uint32_t stride = dwords[stride_dword];
2254
2255 for (unsigned i = 0; i < count; i++, addr += stride) {
2256 printf("%sdraw %d:\n", levels[level], i);
2257 dump_gpuaddr_size(addr, level, 0x10, 2);
2258 }
2259 }
2260
2261 dump_bindless_descriptors(false, level);
2262 dump_register_summary(level);
2263 }
2264
2265 static void
cp_draw_auto(uint32_t * dwords,uint32_t sizedwords,int level)2266 cp_draw_auto(uint32_t *dwords, uint32_t sizedwords, int level)
2267 {
2268 uint32_t prim_type = dwords[0] & 0x1f;
2269
2270 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2271 print_mode(level);
2272
2273 dump_bindless_descriptors(false, level);
2274 dump_register_summary(level);
2275 }
2276
2277 static void
cp_run_cl(uint32_t * dwords,uint32_t sizedwords,int level)2278 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2279 {
2280 do_query("COMPUTE", 1);
2281 dump_register_summary(level);
2282 }
2283
2284 static void
print_nop_tail_string(uint32_t * dwords,uint32_t sizedwords)2285 print_nop_tail_string(uint32_t *dwords, uint32_t sizedwords)
2286 {
2287 const char *buf = (void *)dwords;
2288 for (int i = 0; i < 4 * sizedwords; i++) {
2289 if (buf[i] == '\0')
2290 break;
2291 if (isascii(buf[i]))
2292 printf("%c", buf[i]);
2293 }
2294 }
2295
2296 static void
cp_nop(uint32_t * dwords,uint32_t sizedwords,int level)2297 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2298 {
2299 if (quiet(3))
2300 return;
2301
2302 /* NOP is used to encode special debug strings by Turnip.
2303 * See tu_cs_emit_debug_magic_strv(...)
2304 */
2305 static int scope_level = 0;
2306 uint32_t identifier = dwords[0];
2307 bool is_special = false;
2308 if (identifier == CP_NOP_MESG) {
2309 printf("### ");
2310 is_special = true;
2311 } else if (identifier == CP_NOP_BEGN) {
2312 printf(">>> #%d: ", ++scope_level);
2313 is_special = true;
2314 } else if (identifier == CP_NOP_END) {
2315 printf("<<< #%d: ", scope_level--);
2316 is_special = true;
2317 }
2318
2319 if (is_special) {
2320 if (sizedwords > 1) {
2321 print_nop_tail_string(dwords + 1, sizedwords - 1);
2322 printf("\n");
2323 }
2324 return;
2325 }
2326
2327 // blob doesn't use CP_NOP for string_marker but it does
2328 // use it for things that end up looking like, but aren't
2329 // ascii chars:
2330 if (!options->decode_markers)
2331 return;
2332
2333 print_nop_tail_string(dwords, sizedwords);
2334 printf("\n");
2335 }
2336
2337 uint32_t *
parse_cp_indirect(uint32_t * dwords,uint32_t sizedwords,uint64_t * ibaddr,uint32_t * ibsize)2338 parse_cp_indirect(uint32_t *dwords, uint32_t sizedwords,
2339 uint64_t *ibaddr, uint32_t *ibsize)
2340 {
2341 if (is_64b()) {
2342 assert(sizedwords == 3);
2343
2344 /* a5xx+.. high 32b of gpu addr, then size: */
2345 *ibaddr = dwords[0];
2346 *ibaddr |= ((uint64_t)dwords[1]) << 32;
2347 *ibsize = dwords[2];
2348
2349 return dwords + 3;
2350 } else {
2351 assert(sizedwords == 2);
2352
2353 *ibaddr = dwords[0];
2354 *ibsize = dwords[1];
2355
2356 return dwords + 2;
2357 }
2358 }
2359
2360 static void
cp_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2361 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2362 {
2363 /* traverse indirect buffers */
2364 uint64_t ibaddr;
2365 uint32_t ibsize;
2366 uint32_t *ptr = NULL;
2367
2368 dwords = parse_cp_indirect(dwords, sizedwords, &ibaddr, &ibsize);
2369
2370 if (!quiet(3)) {
2371 if (is_64b()) {
2372 printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2373 } else {
2374 printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2375 }
2376 printf("%sibsize:%08x\n", levels[level], ibsize);
2377 }
2378
2379 if (options->once && has_dumped(ibaddr, enable_mask))
2380 return;
2381
2382 /* 'query-compare' mode implies 'once' mode, although we need only to
2383 * process the cmdstream for *any* enable_mask mode, since we are
2384 * comparing binning vs draw reg values at the same time, ie. it is
2385 * not useful to process the same draw in both binning and draw pass.
2386 */
2387 if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2388 return;
2389
2390 /* map gpuaddr back to hostptr: */
2391 ptr = hostptr(ibaddr);
2392
2393 if (ptr) {
2394 /* If the GPU hung within the target IB, the trigger point will be
2395 * just after the current CP_INDIRECT_BUFFER. Because the IB is
2396 * executed but never returns. Account for this by checking if
2397 * the IB returned:
2398 */
2399 highlight_gpuaddr(gpuaddr(dwords));
2400
2401 ib++;
2402 ibs[ib].base = ibaddr;
2403 ibs[ib].size = ibsize;
2404
2405 dump_commands(ptr, ibsize, level);
2406 ib--;
2407 } else {
2408 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2409 }
2410 }
2411
2412 static void
cp_start_bin(uint32_t * dwords,uint32_t sizedwords,int level)2413 cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
2414 {
2415 uint64_t ibaddr;
2416 uint32_t ibsize;
2417 uint32_t loopcount;
2418 uint32_t *ptr = NULL;
2419
2420 loopcount = dwords[0];
2421 ibaddr = dwords[1];
2422 ibaddr |= ((uint64_t)dwords[2]) << 32;
2423 ibsize = dwords[3];
2424
2425 /* map gpuaddr back to hostptr: */
2426 ptr = hostptr(ibaddr);
2427
2428 if (ptr) {
2429 /* If the GPU hung within the target IB, the trigger point will be
2430 * just after the current CP_START_BIN. Because the IB is
2431 * executed but never returns. Account for this by checking if
2432 * the IB returned:
2433 */
2434 highlight_gpuaddr(gpuaddr(&dwords[5]));
2435
2436 /* TODO: we should duplicate the body of the loop after each bin, so
2437 * that draws get the correct state. We should also figure out if there
2438 * are any registers that can tell us what bin we're in when we hang so
2439 * that crashdec points to the right place.
2440 */
2441 ib++;
2442 for (uint32_t i = 0; i < loopcount; i++) {
2443 ibs[ib].base = ibaddr;
2444 ibs[ib].size = ibsize;
2445 printl(3, "%sbin %u\n", levels[level], i);
2446 dump_commands(ptr, ibsize, level);
2447 ibaddr += ibsize;
2448 ptr += ibsize;
2449 }
2450 ib--;
2451 } else {
2452 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2453 }
2454 }
2455
2456 static void
cp_fixed_stride_draw_table(uint32_t * dwords,uint32_t sizedwords,int level)2457 cp_fixed_stride_draw_table(uint32_t *dwords, uint32_t sizedwords, int level)
2458 {
2459 uint64_t ibaddr;
2460 uint32_t ibsize;
2461 uint32_t loopcount;
2462 uint32_t *ptr = NULL;
2463
2464 loopcount = dwords[3];
2465 ibaddr = dwords[0];
2466 ibaddr |= ((uint64_t)dwords[1]) << 32;
2467 ibsize = dwords[2] >> 20;
2468
2469 /* map gpuaddr back to hostptr: */
2470 ptr = hostptr(ibaddr);
2471
2472 if (ptr) {
2473 /* If the GPU hung within the target IB, the trigger point will be
2474 * just after the current CP_START_BIN. Because the IB is
2475 * executed but never returns. Account for this by checking if
2476 * the IB returned:
2477 */
2478 highlight_gpuaddr(gpuaddr(&dwords[5]));
2479
2480 ib++;
2481 for (uint32_t i = 0; i < loopcount; i++) {
2482 ibs[ib].base = ibaddr;
2483 ibs[ib].size = ibsize;
2484 printl(3, "%sdraw %u\n", levels[level], i);
2485 dump_commands(ptr, ibsize, level);
2486 ibaddr += ibsize;
2487 ptr += ibsize;
2488 }
2489 ib--;
2490 } else {
2491 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2492 }
2493 }
2494
2495 static void
cp_wfi(uint32_t * dwords,uint32_t sizedwords,int level)2496 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2497 {
2498 needs_wfi = false;
2499 }
2500
2501 static void
cp_mem_write(uint32_t * dwords,uint32_t sizedwords,int level)2502 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2503 {
2504 if (quiet(2))
2505 return;
2506
2507 if (is_64b()) {
2508 uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2509 printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2510 dump_hex(&dwords[2], sizedwords - 2, level + 1);
2511
2512 if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2513 dump_commands(&dwords[2], sizedwords - 2, level + 1);
2514 } else {
2515 uint32_t gpuaddr = dwords[0];
2516 printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2517 dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2518 }
2519 }
2520
2521 static void
cp_rmw(uint32_t * dwords,uint32_t sizedwords,int level)2522 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2523 {
2524 uint32_t val = dwords[0] & 0xffff;
2525 uint32_t and = dwords[1];
2526 uint32_t or = dwords[2];
2527 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2528 and, or);
2529 if (needs_wfi)
2530 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2531 and, or);
2532 reg_set(val, (reg_val(val) & and) | or);
2533 }
2534
2535 static void
cp_reg_mem(uint32_t * dwords,uint32_t sizedwords,int level)2536 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2537 {
2538 uint32_t val = dwords[0] & 0xffff;
2539 printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2540
2541 if (quiet(2))
2542 return;
2543
2544 uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2545 printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2546 void *ptr = hostptr(gpuaddr);
2547 if (ptr) {
2548 uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2549 dump_hex(ptr, cnt, level + 1);
2550 }
2551 }
2552
2553 struct draw_state {
2554 uint16_t enable_mask;
2555 uint16_t flags;
2556 uint32_t count;
2557 uint64_t addr;
2558 };
2559
2560 struct draw_state state[32];
2561
2562 #define FLAG_DIRTY 0x1
2563 #define FLAG_DISABLE 0x2
2564 #define FLAG_DISABLE_ALL_GROUPS 0x4
2565 #define FLAG_LOAD_IMMED 0x8
2566
2567 static int draw_mode;
2568
2569 static void
disable_group(unsigned group_id)2570 disable_group(unsigned group_id)
2571 {
2572 struct draw_state *ds = &state[group_id];
2573 memset(ds, 0, sizeof(*ds));
2574 }
2575
2576 static void
disable_all_groups(void)2577 disable_all_groups(void)
2578 {
2579 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2580 disable_group(i);
2581 }
2582
2583 static void
load_group(unsigned group_id,int level)2584 load_group(unsigned group_id, int level)
2585 {
2586 struct draw_state *ds = &state[group_id];
2587
2588 if (!ds->count)
2589 return;
2590
2591 printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2592 printl(2, "%scount: %d\n", levels[level], ds->count);
2593 printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2594 printl(2, "%sflags: %x\n", levels[level], ds->flags);
2595
2596 if (options->info->chip >= 6) {
2597 printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2598
2599 if (!(ds->enable_mask & enable_mask)) {
2600 printl(2, "%s\tskipped!\n\n", levels[level]);
2601 return;
2602 }
2603 }
2604
2605 void *ptr = hostptr(ds->addr);
2606 if (ptr) {
2607 if (!quiet(2))
2608 dump_hex(ptr, ds->count, level + 1);
2609
2610 ib++;
2611 dump_commands(ptr, ds->count, level + 1);
2612 ib--;
2613 }
2614 }
2615
2616 static void
load_all_groups(int level)2617 load_all_groups(int level)
2618 {
2619 /* sanity check, we should never recursively hit recursion here, and if
2620 * we do bad things happen:
2621 */
2622 static bool loading_groups = false;
2623 if (loading_groups) {
2624 printf("ERROR: nothing in draw state should trigger recursively loading "
2625 "groups!\n");
2626 return;
2627 }
2628 loading_groups = true;
2629 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2630 load_group(i, level);
2631 loading_groups = false;
2632
2633 /* in 'query-compare' mode, defer disabling all groups until we have a
2634 * chance to process the query:
2635 */
2636 if (!options->query_compare)
2637 disable_all_groups();
2638 }
2639
2640 static void
cp_set_draw_state(uint32_t * dwords,uint32_t sizedwords,int level)2641 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2642 {
2643 uint32_t i;
2644
2645 for (i = 0; i < sizedwords;) {
2646 struct draw_state *ds;
2647 uint32_t count = dwords[i] & 0xffff;
2648 uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2649 uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2650 uint32_t flags = (dwords[i] >> 16) & 0xf;
2651 uint64_t addr;
2652
2653 if (is_64b()) {
2654 addr = dwords[i + 1];
2655 addr |= ((uint64_t)dwords[i + 2]) << 32;
2656 i += 3;
2657 } else {
2658 addr = dwords[i + 1];
2659 i += 2;
2660 }
2661
2662 if (flags & FLAG_DISABLE_ALL_GROUPS) {
2663 disable_all_groups();
2664 continue;
2665 }
2666
2667 if (flags & FLAG_DISABLE) {
2668 disable_group(group_id);
2669 continue;
2670 }
2671
2672 assert(group_id < ARRAY_SIZE(state));
2673 disable_group(group_id);
2674
2675 ds = &state[group_id];
2676
2677 ds->enable_mask = enable_mask;
2678 ds->flags = flags;
2679 ds->count = count;
2680 ds->addr = addr;
2681
2682 if (flags & FLAG_LOAD_IMMED) {
2683 load_group(group_id, level);
2684 disable_group(group_id);
2685 }
2686 }
2687 }
2688
2689 static void
cp_set_mode(uint32_t * dwords,uint32_t sizedwords,int level)2690 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2691 {
2692 draw_mode = dwords[0];
2693 }
2694
2695 /* execute compute shader */
2696 static void
cp_exec_cs(uint32_t * dwords,uint32_t sizedwords,int level)2697 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2698 {
2699 do_query("compute", 0);
2700 dump_bindless_descriptors(true, level);
2701 dump_register_summary(level);
2702 }
2703
2704 static void
cp_exec_cs_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2705 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2706 {
2707 uint64_t addr;
2708
2709 if (is_64b()) {
2710 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2711 } else {
2712 addr = dwords[1];
2713 }
2714
2715 printl(3, "%saddr: %016llx\n", levels[level], addr);
2716 dump_gpuaddr_size(addr, level, 0x10, 2);
2717
2718 do_query("compute", 0);
2719 dump_bindless_descriptors(true, level);
2720 dump_register_summary(level);
2721 }
2722
2723 static void
cp_set_marker(uint32_t * dwords,uint32_t sizedwords,int level)2724 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2725 {
2726 uint32_t val = dwords[0] & 0xf;
2727 const char *mode = rnn_enumname(rnn, "a6xx_marker", val);
2728
2729 if (!mode) {
2730 static char buf[8];
2731 sprintf(buf, "0x%x", val);
2732 render_mode = buf;
2733 return;
2734 }
2735
2736 render_mode = mode;
2737
2738 if (!strcmp(render_mode, "RM6_BIN_VISIBILITY")) {
2739 enable_mask = MODE_BINNING;
2740 } else if (!strcmp(render_mode, "RM6_BIN_RENDER_START")) {
2741 enable_mask = MODE_GMEM;
2742 } else if (!strcmp(render_mode, "RM6_DIRECT_RENDER")) {
2743 enable_mask = MODE_BYPASS;
2744 }
2745 }
2746
2747 static void
cp_set_thread_control(uint32_t * dwords,uint32_t sizedwords,int level)2748 cp_set_thread_control(uint32_t *dwords, uint32_t sizedwords, int level)
2749 {
2750 uint32_t val = dwords[0] & 0x3;
2751 thread = rnn_enumname(rnn, "cp_thread", val);
2752 }
2753
2754 static void
cp_set_render_mode(uint32_t * dwords,uint32_t sizedwords,int level)2755 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2756 {
2757 uint64_t addr;
2758 uint32_t *ptr, len;
2759
2760 assert(is_64b());
2761
2762 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2763 * not sure if this can come in different sizes.
2764 *
2765 * First ptr doesn't seem to be cmdstream, second one does.
2766 *
2767 * Comment from downstream kernel:
2768 *
2769 * SRM -- set render mode (ex binning, direct render etc)
2770 * SRM is set by UMD usually at start of IB to tell CP the type of
2771 * preemption.
2772 * KMD needs to set SRM to NULL to indicate CP that rendering is
2773 * done by IB.
2774 * ------------------------------------------------------------------
2775 *
2776 * Seems to always be one of these two:
2777 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2778 * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2779 * 001c2000 00000000
2780 *
2781 */
2782
2783 assert(options->info->chip >= 5);
2784
2785 render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2786
2787 if (sizedwords == 1)
2788 return;
2789
2790 addr = dwords[1];
2791 addr |= ((uint64_t)dwords[2]) << 32;
2792
2793 mode = dwords[3];
2794
2795 dump_gpuaddr(addr, level + 1);
2796
2797 if (sizedwords == 5)
2798 return;
2799
2800 assert(sizedwords == 8);
2801
2802 len = dwords[5];
2803 addr = dwords[6];
2804 addr |= ((uint64_t)dwords[7]) << 32;
2805
2806 printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2807 printl(3, "%slen: 0x%x\n", levels[level], len);
2808
2809 ptr = hostptr(addr);
2810
2811 if (ptr) {
2812 if (!quiet(2)) {
2813 ib++;
2814 dump_commands(ptr, len, level + 1);
2815 ib--;
2816 dump_hex(ptr, len, level + 1);
2817 }
2818 }
2819 }
2820
2821 static void
cp_compute_checkpoint(uint32_t * dwords,uint32_t sizedwords,int level)2822 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2823 {
2824 uint64_t addr;
2825 uint32_t *ptr, len;
2826
2827 assert(is_64b());
2828 assert(options->info->chip >= 5);
2829
2830 if (sizedwords == 8) {
2831 addr = dwords[5];
2832 addr |= ((uint64_t)dwords[6]) << 32;
2833 len = dwords[7];
2834 } else {
2835 addr = dwords[5];
2836 addr |= ((uint64_t)dwords[6]) << 32;
2837 len = dwords[4];
2838 }
2839
2840 printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2841 printl(3, "%slen: 0x%x\n", levels[level], len);
2842
2843 ptr = hostptr(addr);
2844
2845 if (ptr) {
2846 if (!quiet(2)) {
2847 ib++;
2848 dump_commands(ptr, len, level + 1);
2849 ib--;
2850 dump_hex(ptr, len, level + 1);
2851 }
2852 }
2853 }
2854
2855 static void
cp_blit(uint32_t * dwords,uint32_t sizedwords,int level)2856 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2857 {
2858 do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2859 print_mode(level);
2860 dump_register_summary(level);
2861 }
2862
2863 static void
cp_context_reg_bunch(uint32_t * dwords,uint32_t sizedwords,int level)2864 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2865 {
2866 int i;
2867
2868 /* NOTE: seems to write same reg multiple times.. not sure if different parts
2869 * of these are triggered by the FLUSH_SO_n events?? (if that is what they
2870 * actually are?)
2871 */
2872 bool saved_summary = summary;
2873 summary = false;
2874
2875 struct regacc r = regacc(NULL);
2876
2877 for (i = 0; i < sizedwords; i += 2) {
2878 if (regacc_push(&r, dwords[i + 0], dwords[i + 1]))
2879 dump_register(&r, level + 1);
2880 reg_set(dwords[i + 0], dwords[i + 1]);
2881 }
2882
2883 summary = saved_summary;
2884 }
2885
2886 /* Looks similar to CP_CONTEXT_REG_BUNCH, but not quite the same...
2887 * discarding first two dwords??
2888 *
2889 * CP_CONTEXT_REG_BUNCH:
2890 * 0221: 9c1ff606 (rep)(xmov3)mov $usraddr, $data
2891 * ; mov $data, $data
2892 * ; mov $usraddr, $data
2893 * ; mov $data, $data
2894 * 0222: d8000000 waitin
2895 * 0223: 981f0806 mov $01, $data
2896 *
2897 * CP_UNK5D:
2898 * 0224: 981f0006 mov $00, $data
2899 * 0225: 981f0006 mov $00, $data
2900 * 0226: 9c1ff206 (rep)(xmov1)mov $usraddr, $data
2901 * ; mov $data, $data
2902 * 0227: d8000000 waitin
2903 * 0228: 981f0806 mov $01, $data
2904 *
2905 */
2906 static void
cp_context_reg_bunch2(uint32_t * dwords,uint32_t sizedwords,int level)2907 cp_context_reg_bunch2(uint32_t *dwords, uint32_t sizedwords, int level)
2908 {
2909 dwords += 2;
2910 sizedwords -= 2;
2911 cp_context_reg_bunch(dwords, sizedwords, level);
2912 }
2913
2914 static void
cp_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)2915 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2916 {
2917 uint32_t reg = dwords[1] & 0xffff;
2918
2919 struct regacc r = regacc(NULL);
2920 if (regacc_push(&r, reg, dwords[2]))
2921 dump_register(&r, level + 1);
2922 reg_set(reg, dwords[2]);
2923 }
2924
2925 static void
cp_set_ctxswitch_ib(uint32_t * dwords,uint32_t sizedwords,int level)2926 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2927 {
2928 uint64_t addr;
2929 uint32_t size = dwords[2] & 0xffff;
2930 void *ptr;
2931
2932 addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2933
2934 if (!quiet(3)) {
2935 printf("%saddr=%" PRIx64 "\n", levels[level], addr);
2936 }
2937
2938 ptr = hostptr(addr);
2939 if (ptr) {
2940 dump_commands(ptr, size, level + 1);
2941 }
2942 }
2943
2944 static void
cp_skip_ib2_enable_global(uint32_t * dwords,uint32_t sizedwords,int level)2945 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2946 {
2947 skip_ib2_enable_global = dwords[0];
2948 }
2949
2950 static void
cp_skip_ib2_enable_local(uint32_t * dwords,uint32_t sizedwords,int level)2951 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2952 {
2953 skip_ib2_enable_local = dwords[0];
2954 }
2955
2956 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2957 static const struct type3_op {
2958 const char *name;
2959 void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2960 struct {
2961 bool load_all_groups;
2962 } options;
2963 } type3_op[] = {
2964 CP(NOP, cp_nop),
2965 CP(INDIRECT_BUFFER, cp_indirect),
2966 CP(INDIRECT_BUFFER_PFD, cp_indirect),
2967 CP(WAIT_FOR_IDLE, cp_wfi),
2968 CP(REG_RMW, cp_rmw),
2969 CP(REG_TO_MEM, cp_reg_mem),
2970 CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2971 CP(MEM_WRITE, cp_mem_write),
2972 CP(EVENT_WRITE, cp_event_write),
2973 CP(RUN_OPENCL, cp_run_cl),
2974 CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2975 CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2976 CP(SET_CONSTANT, cp_set_const),
2977 CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2978 CP(WIDE_REG_WRITE, cp_wide_reg_write),
2979
2980 /* for a3xx */
2981 CP(LOAD_STATE, cp_load_state),
2982 CP(SET_BIN, cp_set_bin),
2983
2984 /* for a4xx */
2985 CP(LOAD_STATE4, cp_load_state),
2986 CP(SET_DRAW_STATE, cp_set_draw_state),
2987 CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2988 CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2989 CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2990
2991 /* for a5xx */
2992 CP(SET_RENDER_MODE, cp_set_render_mode),
2993 CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2994 CP(BLIT, cp_blit),
2995 CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2996 CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2997 CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2998 CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2999 CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
3000 CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
3001
3002 /* for a6xx */
3003 CP(LOAD_STATE6_GEOM, cp_load_state),
3004 CP(LOAD_STATE6_FRAG, cp_load_state),
3005 CP(LOAD_STATE6, cp_load_state),
3006 CP(SET_MODE, cp_set_mode),
3007 CP(SET_MARKER, cp_set_marker),
3008 CP(REG_WRITE, cp_reg_write),
3009 CP(DRAW_AUTO, cp_draw_auto, {.load_all_groups = true}),
3010
3011 CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
3012
3013 CP(START_BIN, cp_start_bin),
3014
3015 CP(FIXED_STRIDE_DRAW_TABLE, cp_fixed_stride_draw_table),
3016
3017 /* for a7xx */
3018 CP(THREAD_CONTROL, cp_set_thread_control),
3019 CP(CONTEXT_REG_BUNCH2, cp_context_reg_bunch2),
3020 CP(EVENT_WRITE7, cp_event_write),
3021 };
3022
3023 static void
noop_fxn(uint32_t * dwords,uint32_t sizedwords,int level)3024 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
3025 {
3026 }
3027
3028 static const struct type3_op *
get_type3_op(unsigned opc)3029 get_type3_op(unsigned opc)
3030 {
3031 static const struct type3_op dummy_op = {
3032 .fxn = noop_fxn,
3033 };
3034 const char *name = pktname(opc);
3035
3036 if (!name)
3037 return &dummy_op;
3038
3039 for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
3040 if (!strcmp(name, type3_op[i].name))
3041 return &type3_op[i];
3042
3043 return &dummy_op;
3044 }
3045
3046 void
dump_commands(uint32_t * dwords,uint32_t sizedwords,int level)3047 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
3048 {
3049 int dwords_left = sizedwords;
3050 uint32_t count = 0; /* dword count including packet header */
3051 uint32_t val;
3052
3053 // assert(dwords);
3054 if (!dwords) {
3055 printf("NULL cmd buffer!\n");
3056 return;
3057 }
3058
3059 assert(ib < ARRAY_SIZE(draws));
3060 draws[ib] = 0;
3061
3062 while (dwords_left > 0) {
3063
3064 current_draw_count = draw_count;
3065
3066 /* hack, this looks like a -1 underflow, in some versions
3067 * when it tries to write zero registers via pkt0
3068 */
3069 // if ((dwords[0] >> 16) == 0xffff)
3070 // goto skip;
3071
3072 if (pkt_is_regwrite(dwords[0], &val, &count)) {
3073 assert(val < regcnt());
3074 printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
3075 val);
3076 dump_registers(val, dwords + 1, count - 1, level + 2);
3077 if (!quiet(3))
3078 dump_hex(dwords, count, level + 1);
3079 #if 0
3080 } else if (pkt_is_type1(dwords[0])) {
3081 count = 3;
3082 val = dwords[0] & 0xfff;
3083 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3084 dump_registers(val, dwords+1, 1, level+2);
3085 val = (dwords[0] >> 12) & 0xfff;
3086 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3087 dump_registers(val, dwords+2, 1, level+2);
3088 if (!quiet(3))
3089 dump_hex(dwords, count, level+1);
3090 #endif
3091 } else if (pkt_is_opcode(dwords[0], &val, &count)) {
3092 const struct type3_op *op = get_type3_op(val);
3093 if (op->options.load_all_groups)
3094 load_all_groups(level + 1);
3095 const char *name = pktname(val);
3096 if (!quiet(2)) {
3097 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
3098 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
3099 count);
3100 }
3101 if (name) {
3102 /* special hack for two packets that decode the same way
3103 * on a6xx:
3104 */
3105 if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
3106 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
3107 name = "CP_LOAD_STATE6";
3108 dump_domain(dwords + 1, count - 1, level + 2, name);
3109 }
3110 op->fxn(dwords + 1, count - 1, level + 1);
3111 if (!quiet(2))
3112 dump_hex(dwords, count, level + 1);
3113 } else if (pkt_is_type2(dwords[0])) {
3114 printl(3, "%snop\n", levels[level + 1]);
3115 count = 1;
3116 } else {
3117 printf("bad type! %08x\n", dwords[0]);
3118 /* for 5xx+ we can do a passable job of looking for start of next valid
3119 * packet: */
3120 if (options->info->chip >= 5) {
3121 count = find_next_packet(dwords, dwords_left);
3122 } else {
3123 return;
3124 }
3125 }
3126
3127 dwords += count;
3128 dwords_left -= count;
3129 }
3130
3131 if (dwords_left < 0)
3132 printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
3133 }
3134