1 /*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <inttypes.h>
30 #include <signal.h>
31 #include <stdarg.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <sys/stat.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41
42 #include "freedreno_pm4.h"
43
44 #include "buffers.h"
45 #include "cffdec.h"
46 #include "disasm.h"
47 #include "redump.h"
48 #include "rnnutil.h"
49 #include "script.h"
50
51 /* ************************************************************************* */
52 /* originally based on kernel recovery dump code: */
53
54 static const struct cffdec_options *options;
55
56 static bool needs_wfi = false;
57 static bool summary = false;
58 static bool in_summary = false;
59 static int vertices;
60
61 static inline unsigned
regcnt(void)62 regcnt(void)
63 {
64 if (options->info->chip >= 5)
65 return 0xffff;
66 else
67 return 0x7fff;
68 }
69
70 static int
is_64b(void)71 is_64b(void)
72 {
73 return options->info->chip >= 5;
74 }
75
76 static int draws[4];
77 static struct {
78 uint64_t base;
79 uint32_t size; /* in dwords */
80 /* Generally cmdstream consists of multiple IB calls to different
81 * buffers, which are themselves often re-used for each tile. The
82 * triggered flag serves two purposes to help make it more clear
83 * what part of the cmdstream is before vs after the the GPU hang:
84 *
85 * 1) if in IB2 we are passed the point within the IB2 buffer where
86 * the GPU hung, but IB1 is not passed the point within its
87 * buffer where the GPU had hung, then we know the GPU hang
88 * happens on a future use of that IB2 buffer.
89 *
90 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
91 * hung, but we've already passed the trigger point at the same
92 * IB level, we know that we are passed the point where the GPU
93 * had hung.
94 *
95 * So this is a one way switch, false->true. And a higher #'d
96 * IB level isn't considered triggered unless the lower #'d IB
97 * level is.
98 */
99 bool triggered : 1;
100 bool base_seen : 1;
101 } ibs[4];
102 static int ib;
103
104 static int draw_count;
105 static int current_draw_count;
106
107 /* query mode.. to handle symbolic register name queries, we need to
108 * defer parsing query string until after gpu_id is know and rnn db
109 * loaded:
110 */
111 static int *queryvals;
112
113 static bool
quiet(int lvl)114 quiet(int lvl)
115 {
116 if ((options->draw_filter != -1) &&
117 (options->draw_filter != current_draw_count))
118 return true;
119 if ((lvl >= 3) && (summary || options->querystrs || options->script))
120 return true;
121 if ((lvl >= 2) && (options->querystrs || options->script))
122 return true;
123 return false;
124 }
125
126 void
printl(int lvl,const char * fmt,...)127 printl(int lvl, const char *fmt, ...)
128 {
129 va_list args;
130 if (quiet(lvl))
131 return;
132 va_start(args, fmt);
133 vprintf(fmt, args);
134 va_end(args);
135 }
136
137 static const char *levels[] = {
138 "\t",
139 "\t\t",
140 "\t\t\t",
141 "\t\t\t\t",
142 "\t\t\t\t\t",
143 "\t\t\t\t\t\t",
144 "\t\t\t\t\t\t\t",
145 "\t\t\t\t\t\t\t\t",
146 "\t\t\t\t\t\t\t\t\t",
147 "x",
148 "x",
149 "x",
150 "x",
151 "x",
152 "x",
153 };
154
155 enum state_src_t {
156 STATE_SRC_DIRECT,
157 STATE_SRC_INDIRECT,
158 STATE_SRC_BINDLESS,
159 };
160
161 /* SDS (CP_SET_DRAW_STATE) helpers: */
162 static void load_all_groups(int level);
163 static void disable_all_groups(void);
164
165 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
166 int level);
167 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
168
169 static bool
highlight_gpuaddr(uint64_t gpuaddr)170 highlight_gpuaddr(uint64_t gpuaddr)
171 {
172 if (!options->ibs[ib].base)
173 return false;
174
175 if ((ib > 0) && options->ibs[ib - 1].base &&
176 !(ibs[ib - 1].triggered || ibs[ib - 1].base_seen))
177 return false;
178
179 if (ibs[ib].base_seen)
180 return false;
181
182 if (ibs[ib].triggered)
183 return options->color;
184
185 if (options->ibs[ib].base != ibs[ib].base)
186 return false;
187
188 uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
189 uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
190
191 bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
192
193 if (triggered && (ib < 2) && options->ibs[ib + 1].crash_found) {
194 ibs[ib].base_seen = true;
195 return false;
196 }
197
198 ibs[ib].triggered |= triggered;
199
200 if (triggered)
201 printf("ESTIMATED CRASH LOCATION!\n");
202
203 return triggered & options->color;
204 }
205
206 static void
dump_hex(uint32_t * dwords,uint32_t sizedwords,int level)207 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
208 {
209 int i, j;
210 int lastzero = 1;
211
212 if (quiet(2))
213 return;
214
215 bool highlight = highlight_gpuaddr(gpuaddr(dwords) + 4 * sizedwords - 1);
216
217 for (i = 0; i < sizedwords; i += 8) {
218 int zero = 1;
219
220 /* always show first row: */
221 if (i == 0)
222 zero = 0;
223
224 for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
225 if (dwords[i + j])
226 zero = 0;
227
228 if (zero && !lastzero)
229 printf("*\n");
230
231 lastzero = zero;
232
233 if (zero)
234 continue;
235
236 uint64_t addr = gpuaddr(&dwords[i]);
237
238 if (highlight)
239 printf("\x1b[0;1;31m");
240
241 if (is_64b()) {
242 printf("%016" PRIx64 ":%s", addr, levels[level]);
243 } else {
244 printf("%08x:%s", (uint32_t)addr, levels[level]);
245 }
246
247 if (highlight)
248 printf("\x1b[0m");
249
250 printf("%04x:", i * 4);
251
252 for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
253 printf(" %08x", dwords[i + j]);
254 }
255
256 printf("\n");
257 }
258 }
259
260 static void
dump_float(float * dwords,uint32_t sizedwords,int level)261 dump_float(float *dwords, uint32_t sizedwords, int level)
262 {
263 int i;
264 for (i = 0; i < sizedwords; i++) {
265 if ((i % 8) == 0) {
266 if (is_64b()) {
267 printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
268 } else {
269 printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
270 }
271 } else {
272 printf(" ");
273 }
274 printf("%8f", *(dwords++));
275 if ((i % 8) == 7)
276 printf("\n");
277 }
278 if (i % 8)
279 printf("\n");
280 }
281
282 /* I believe the surface format is low bits:
283 #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
284 comments in sys2gmem_tex_const indicate that address is [31:12], but
285 looks like at least some of the bits above the format have different meaning..
286 */
287 static void
parse_dword_addr(uint32_t dword,uint32_t * gpuaddr,uint32_t * flags,uint32_t mask)288 parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
289 uint32_t mask)
290 {
291 assert(!is_64b()); /* this is only used on a2xx */
292 *gpuaddr = dword & ~mask;
293 *flags = dword & mask;
294 }
295
296 static uint32_t type0_reg_vals[0xffff + 1];
297 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
298 8]; /* written since last draw */
299 static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
300 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
301
302 static bool
reg_rewritten(uint32_t regbase)303 reg_rewritten(uint32_t regbase)
304 {
305 return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
306 }
307
308 bool
reg_written(uint32_t regbase)309 reg_written(uint32_t regbase)
310 {
311 return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
312 }
313
314 static void
clear_rewritten(void)315 clear_rewritten(void)
316 {
317 memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
318 }
319
320 static void
clear_written(void)321 clear_written(void)
322 {
323 memset(type0_reg_written, 0, sizeof(type0_reg_written));
324 clear_rewritten();
325 }
326
327 uint32_t
reg_lastval(uint32_t regbase)328 reg_lastval(uint32_t regbase)
329 {
330 return lastvals[regbase];
331 }
332
333 static void
clear_lastvals(void)334 clear_lastvals(void)
335 {
336 memset(lastvals, 0, sizeof(lastvals));
337 }
338
339 uint32_t
reg_val(uint32_t regbase)340 reg_val(uint32_t regbase)
341 {
342 return type0_reg_vals[regbase];
343 }
344
345 void
reg_set(uint32_t regbase,uint32_t val)346 reg_set(uint32_t regbase, uint32_t val)
347 {
348 assert(regbase < regcnt());
349 type0_reg_vals[regbase] = val;
350 type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
351 type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
352 }
353
354 static void
reg_dump_scratch(const char * name,uint32_t dword,int level)355 reg_dump_scratch(const char *name, uint32_t dword, int level)
356 {
357 unsigned r;
358
359 if (quiet(3))
360 return;
361
362 r = regbase("CP_SCRATCH[0].REG");
363
364 // if not, try old a2xx/a3xx version:
365 if (!r)
366 r = regbase("CP_SCRATCH_REG0");
367
368 if (!r)
369 return;
370
371 printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
372 reg_val(r + 6), reg_val(r + 7));
373 }
374
375 static void
dump_gpuaddr_size(uint64_t gpuaddr,int level,int sizedwords,int quietlvl)376 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
377 {
378 void *buf;
379
380 if (quiet(quietlvl))
381 return;
382
383 buf = hostptr(gpuaddr);
384 if (buf) {
385 dump_hex(buf, sizedwords, level + 1);
386 }
387 }
388
389 static void
dump_gpuaddr(uint64_t gpuaddr,int level)390 dump_gpuaddr(uint64_t gpuaddr, int level)
391 {
392 dump_gpuaddr_size(gpuaddr, level, 64, 3);
393 }
394
395 static void
reg_dump_gpuaddr(const char * name,uint32_t dword,int level)396 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
397 {
398 dump_gpuaddr(dword, level);
399 }
400
401 uint32_t gpuaddr_lo;
402 static void
reg_gpuaddr_lo(const char * name,uint32_t dword,int level)403 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
404 {
405 gpuaddr_lo = dword;
406 }
407
408 static void
reg_dump_gpuaddr_hi(const char * name,uint32_t dword,int level)409 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
410 {
411 dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
412 }
413
414 static void
reg_dump_gpuaddr64(const char * name,uint64_t qword,int level)415 reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
416 {
417 dump_gpuaddr(qword, level);
418 }
419
420 static void
dump_shader(const char * ext,void * buf,int bufsz)421 dump_shader(const char *ext, void *buf, int bufsz)
422 {
423 if (options->dump_shaders) {
424 static int n = 0;
425 char filename[16];
426 int fd;
427 sprintf(filename, "%04d.%s", n++, ext);
428 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
429 if (fd != -1) {
430 write(fd, buf, bufsz);
431 close(fd);
432 }
433 }
434 }
435
436 static void
disasm_gpuaddr(const char * name,uint64_t gpuaddr,int level)437 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
438 {
439 void *buf;
440
441 gpuaddr &= 0xfffffffffffffff0;
442
443 if (quiet(3))
444 return;
445
446 buf = hostptr(gpuaddr);
447 if (buf) {
448 uint32_t sizedwords = hostlen(gpuaddr) / 4;
449 const char *ext;
450
451 dump_hex(buf, MIN2(64, sizedwords), level + 1);
452 try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->info->chip * 100);
453
454 /* this is a bit ugly way, but oh well.. */
455 if (strstr(name, "SP_VS_OBJ")) {
456 ext = "vo3";
457 } else if (strstr(name, "SP_FS_OBJ")) {
458 ext = "fo3";
459 } else if (strstr(name, "SP_GS_OBJ")) {
460 ext = "go3";
461 } else if (strstr(name, "SP_CS_OBJ")) {
462 ext = "co3";
463 } else {
464 ext = NULL;
465 }
466
467 if (ext)
468 dump_shader(ext, buf, sizedwords * 4);
469 }
470 }
471
472 static void
reg_disasm_gpuaddr(const char * name,uint32_t dword,int level)473 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
474 {
475 disasm_gpuaddr(name, dword, level);
476 }
477
478 static void
reg_disasm_gpuaddr_hi(const char * name,uint32_t dword,int level)479 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
480 {
481 disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
482 }
483
484 static void
reg_disasm_gpuaddr64(const char * name,uint64_t qword,int level)485 reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
486 {
487 disasm_gpuaddr(name, qword, level);
488 }
489
490 /* Find the value of the TEX_COUNT register that corresponds to the named
491 * TEX_SAMP/TEX_CONST reg.
492 *
493 * Note, this kinda assumes an equal # of samplers and textures, but not
494 * really sure if there is a much better option. I suppose on a6xx we
495 * could instead decode the bitfields in SP_xS_CONFIG
496 */
497 static int
get_tex_count(const char * name)498 get_tex_count(const char *name)
499 {
500 char count_reg[strlen(name) + 5];
501 char *p;
502
503 p = strstr(name, "CONST");
504 if (!p)
505 p = strstr(name, "SAMP");
506 if (!p)
507 return 0;
508
509 int n = p - name;
510 strncpy(count_reg, name, n);
511 strcpy(count_reg + n, "COUNT");
512
513 return reg_val(regbase(count_reg));
514 }
515
516 static void
reg_dump_tex_samp_hi(const char * name,uint32_t dword,int level)517 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
518 {
519 if (!in_summary)
520 return;
521
522 int num_unit = get_tex_count(name);
523 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
524 void *buf = hostptr(gpuaddr);
525
526 if (!buf)
527 return;
528
529 dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
530 }
531
532 static void
reg_dump_tex_const_hi(const char * name,uint32_t dword,int level)533 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
534 {
535 if (!in_summary)
536 return;
537
538 int num_unit = get_tex_count(name);
539 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
540 void *buf = hostptr(gpuaddr);
541
542 if (!buf)
543 return;
544
545 dump_tex_const(buf, num_unit, level + 1);
546 }
547
548 /*
549 * Registers with special handling (rnndec_decode() handles rest):
550 */
551 #define REG(x, fxn) { #x, fxn }
552 #define REG64(x, fxn) { #x, .fxn64 = fxn, .is_reg64 = true }
553 static struct {
554 const char *regname;
555 void (*fxn)(const char *name, uint32_t dword, int level);
556 void (*fxn64)(const char *name, uint64_t qword, int level);
557 uint32_t regbase;
558 bool is_reg64;
559 } reg_a2xx[] = {
560 REG(CP_SCRATCH_REG0, reg_dump_scratch),
561 REG(CP_SCRATCH_REG1, reg_dump_scratch),
562 REG(CP_SCRATCH_REG2, reg_dump_scratch),
563 REG(CP_SCRATCH_REG3, reg_dump_scratch),
564 REG(CP_SCRATCH_REG4, reg_dump_scratch),
565 REG(CP_SCRATCH_REG5, reg_dump_scratch),
566 REG(CP_SCRATCH_REG6, reg_dump_scratch),
567 REG(CP_SCRATCH_REG7, reg_dump_scratch),
568 {NULL},
569 }, reg_a3xx[] = {
570 REG(CP_SCRATCH_REG0, reg_dump_scratch),
571 REG(CP_SCRATCH_REG1, reg_dump_scratch),
572 REG(CP_SCRATCH_REG2, reg_dump_scratch),
573 REG(CP_SCRATCH_REG3, reg_dump_scratch),
574 REG(CP_SCRATCH_REG4, reg_dump_scratch),
575 REG(CP_SCRATCH_REG5, reg_dump_scratch),
576 REG(CP_SCRATCH_REG6, reg_dump_scratch),
577 REG(CP_SCRATCH_REG7, reg_dump_scratch),
578 REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
579 REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
580 REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
581 REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
582 REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
583 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
584 {NULL},
585 }, reg_a4xx[] = {
586 REG(CP_SCRATCH[0].REG, reg_dump_scratch),
587 REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
588 REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
589 REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
590 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
591 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
592 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
593 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
594 REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
595 REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
596 REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
597 REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
598 REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
599 REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
600 REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
601 REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
602 REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
603 REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
604 REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
605 REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
606 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
607 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
608 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
609 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
610 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
611 {NULL},
612 }, reg_a5xx[] = {
613 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
614 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
615 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
616 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
617 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
618 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
619 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
620 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
621 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
622 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
623 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
624 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
625 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
626 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
627 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
628 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
629 REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
630 REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
631 REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
632 REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
633 REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
634 REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
635 REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
636 REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
637 REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
638 REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
639 REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
640 REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
641 REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
642 REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
643 REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
644 REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
645 REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
646 REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
647 REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
648 REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
649 REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
650 REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
651 REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
652 REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
653 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
654 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
655 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
656 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
657 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
658 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
659 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
660 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
661 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
662 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
663 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
664 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
665 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
666 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
667 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
668 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
669 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
670 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
671 // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
672 // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
673 // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
674 // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
675 // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
676 // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
677 // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
678 // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
679 // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
680 // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
681
682 // REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
683 // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
684 // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
685 // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
686 // REG(RB_2D_DST_LO, reg_gpuaddr_lo),
687 // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
688 // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
689 // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
690
691 {NULL},
692 }, reg_a6xx[] = {
693 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
694 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
695 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
696 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
697
698 REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
699 REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
700 REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
701 REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
702 REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
703 REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
704
705 REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
706 REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
707 REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
708 REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
709 REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
710 REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
711 REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
712 REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
713 REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
714 REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
715 REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
716 REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
717
718 {NULL},
719 }, reg_a7xx[] = {
720 REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
721 REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
722 REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
723 REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
724 REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
725 REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
726
727 {NULL},
728 }, *type0_reg;
729
730 static struct rnn *rnn;
731
732 static void
init_rnn(const char * gpuname)733 init_rnn(const char *gpuname)
734 {
735 rnn = rnn_new(!options->color);
736
737 rnn_load(rnn, gpuname);
738
739 if (options->querystrs) {
740 int i;
741 queryvals = calloc(options->nquery, sizeof(queryvals[0]));
742
743 for (i = 0; i < options->nquery; i++) {
744 int val = strtol(options->querystrs[i], NULL, 0);
745
746 if (val == 0)
747 val = regbase(options->querystrs[i]);
748
749 queryvals[i] = val;
750 printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
751 }
752 }
753
754 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
755 type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
756 if (!type0_reg[idx].regbase) {
757 printf("invalid register name: %s\n", type0_reg[idx].regname);
758 exit(1);
759 }
760 }
761 }
762
763 void
reset_regs(void)764 reset_regs(void)
765 {
766 clear_written();
767 clear_lastvals();
768 memset(&ibs, 0, sizeof(ibs));
769 }
770
771 void
cffdec_init(const struct cffdec_options * _options)772 cffdec_init(const struct cffdec_options *_options)
773 {
774 options = _options;
775 summary = options->summary;
776
777 /* in case we're decoding multiple files: */
778 free(queryvals);
779 reset_regs();
780 draw_count = 0;
781
782 if (!options->info)
783 return;
784
785 switch (options->info->chip) {
786 case 2:
787 type0_reg = reg_a2xx;
788 init_rnn("a2xx");
789 break;
790 case 3:
791 type0_reg = reg_a3xx;
792 init_rnn("a3xx");
793 break;
794 case 4:
795 type0_reg = reg_a4xx;
796 init_rnn("a4xx");
797 break;
798 case 5:
799 type0_reg = reg_a5xx;
800 init_rnn("a5xx");
801 break;
802 case 6:
803 type0_reg = reg_a6xx;
804 init_rnn("a6xx");
805 break;
806 case 7:
807 type0_reg = reg_a7xx;
808 init_rnn("a7xx");
809 break;
810 default:
811 errx(-1, "unsupported generation: %u", options->info->chip);
812 }
813 }
814
815 const char *
pktname(unsigned opc)816 pktname(unsigned opc)
817 {
818 return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
819 }
820
821 const char *
regname(uint32_t regbase,int color)822 regname(uint32_t regbase, int color)
823 {
824 return rnn_regname(rnn, regbase, color);
825 }
826
827 uint32_t
regbase(const char * name)828 regbase(const char *name)
829 {
830 return rnn_regbase(rnn, name);
831 }
832
833 static int
endswith(uint32_t regbase,const char * suffix)834 endswith(uint32_t regbase, const char *suffix)
835 {
836 const char *name = regname(regbase, 0);
837 const char *s = strstr(name, suffix);
838 if (!s)
839 return 0;
840 return (s - strlen(name) + strlen(suffix)) == name;
841 }
842
843 struct regacc
regacc(struct rnn * r)844 regacc(struct rnn *r)
845 {
846 if (!r)
847 r = rnn;
848
849 return (struct regacc){ .rnn = r };
850 }
851
852 /* returns true if the complete reg value has been accumulated: */
853 bool
regacc_push(struct regacc * r,uint32_t regbase,uint32_t dword)854 regacc_push(struct regacc *r, uint32_t regbase, uint32_t dword)
855 {
856 if (r->has_dword_lo) {
857 /* Work around kernel devcore dumps which accidentially miss half of a 64b reg
858 * see: https://patchwork.freedesktop.org/series/112302/
859 */
860 if (regbase != r->regbase + 1) {
861 printf("WARNING: 64b discontinuity (%x, expected %x)\n", regbase, r->regbase + 1);
862 r->has_dword_lo = false;
863 return true;
864 }
865
866 r->value |= ((uint64_t)dword) << 32;
867 r->has_dword_lo = false;
868
869 return true;
870 }
871
872 r->regbase = regbase;
873 r->value = dword;
874
875 struct rnndecaddrinfo *info = rnn_reginfo(r->rnn, regbase);
876 r->has_dword_lo = (info->width == 64);
877
878 /* Workaround for kernel devcore dump bugs: */
879 if ((info->width == 64) && endswith(regbase, "_HI")) {
880 printf("WARNING: 64b discontinuity (no _LO dword for %x)\n", regbase);
881 r->has_dword_lo = false;
882 }
883
884 rnn_reginfo_free(info);
885
886 return !r->has_dword_lo;
887 }
888
889 void
dump_register_val(struct regacc * r,int level)890 dump_register_val(struct regacc *r, int level)
891 {
892 struct rnndecaddrinfo *info = rnn_reginfo(rnn, r->regbase);
893
894 if (info && info->typeinfo) {
895 uint64_t gpuaddr = 0;
896 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, r->value);
897 printf("%s%s: %s", levels[level], info->name, decoded);
898
899 /* Try and figure out if we are looking at a gpuaddr.. this
900 * might be useful for other gen's too, but at least a5xx has
901 * the _HI/_LO suffix we can look for. Maybe a better approach
902 * would be some special annotation in the xml..
903 * for a6xx use "address" and "waddress" types
904 */
905 if (options->info->chip >= 6) {
906 if (!strcmp(info->typeinfo->name, "address") ||
907 !strcmp(info->typeinfo->name, "waddress")) {
908 gpuaddr = r->value;
909 }
910 } else if (options->info->chip >= 5) {
911 /* TODO we shouldn't rely on reg_val() since reg_set() might
912 * not have been called yet for the other half of the 64b reg.
913 * We can remove this hack once a5xx.xml is converted to reg64
914 * and address/waddess.
915 */
916 if (endswith(r->regbase, "_HI") && endswith(r->regbase - 1, "_LO")) {
917 gpuaddr = (r->value << 32) | reg_val(r->regbase - 1);
918 } else if (endswith(r->regbase, "_LO") && endswith(r->regbase + 1, "_HI")) {
919 gpuaddr = (((uint64_t)reg_val(r->regbase + 1)) << 32) | r->value;
920 }
921 }
922
923 if (gpuaddr && hostptr(gpuaddr)) {
924 printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
925 gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
926 hostlen(gpubaseaddr(gpuaddr)));
927 }
928
929 printf("\n");
930
931 free(decoded);
932 } else if (info) {
933 printf("%s%s: %08"PRIx64"\n", levels[level], info->name, r->value);
934 } else {
935 printf("%s<%04x>: %08"PRIx64"\n", levels[level], r->regbase, r->value);
936 }
937
938 rnn_reginfo_free(info);
939 }
940
941 static void
dump_register(struct regacc * r,int level)942 dump_register(struct regacc *r, int level)
943 {
944 if (!quiet(3)) {
945 dump_register_val(r, level);
946 }
947
948 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
949 if (type0_reg[idx].regbase == r->regbase) {
950 if (type0_reg[idx].is_reg64) {
951 type0_reg[idx].fxn64(type0_reg[idx].regname, r->value, level);
952 } else {
953 type0_reg[idx].fxn(type0_reg[idx].regname, (uint32_t)r->value, level);
954 }
955 break;
956 }
957 }
958 }
959
960 static bool
is_banked_reg(uint32_t regbase)961 is_banked_reg(uint32_t regbase)
962 {
963 return (0x2000 <= regbase) && (regbase < 0x2400);
964 }
965
966 static void
dump_registers(uint32_t regbase,uint32_t * dwords,uint32_t sizedwords,int level)967 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
968 int level)
969 {
970 struct regacc r = regacc(NULL);
971
972 while (sizedwords--) {
973 int last_summary = summary;
974
975 /* access to non-banked registers needs a WFI:
976 * TODO banked register range for a2xx??
977 */
978 if (needs_wfi && !is_banked_reg(regbase))
979 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
980
981 reg_set(regbase, *dwords);
982 if (regacc_push(&r, regbase, *dwords))
983 dump_register(&r, level);
984 regbase++;
985 dwords++;
986 summary = last_summary;
987 }
988 }
989
990 static void
dump_domain(uint32_t * dwords,uint32_t sizedwords,int level,const char * name)991 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
992 {
993 struct rnndomain *dom;
994 int i;
995
996 dom = rnn_finddomain(rnn->db, name);
997
998 if (!dom)
999 return;
1000
1001 if (script_packet)
1002 script_packet(dwords, sizedwords, rnn, dom);
1003
1004 if (quiet(2))
1005 return;
1006
1007 for (i = 0; i < sizedwords; i++) {
1008 struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
1009 char *decoded;
1010 if (!(info && info->typeinfo))
1011 break;
1012 uint64_t value = dwords[i];
1013 if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
1014 value |= (uint64_t)dwords[i + 1] << 32;
1015 i++; /* skip the next dword since we're printing it now */
1016 }
1017 decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
1018 /* Unlike the register printing path, we don't print the name
1019 * of the register, so if it doesn't contain other named
1020 * things (i.e. it isn't a bitset) then print the register
1021 * name as if it's a bitset with a single entry. This avoids
1022 * having to create a dummy register with a single entry to
1023 * get a name in the decoding.
1024 */
1025 if (info->typeinfo->type == RNN_TTYPE_BITSET ||
1026 info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
1027 printf("%s%s\n", levels[level], decoded);
1028 } else {
1029 printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
1030 info->name, rnn->vc->colors->reset, decoded);
1031 }
1032 free(decoded);
1033 free(info->name);
1034 free(info);
1035 }
1036 }
1037
1038 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
1039 static unsigned mode;
1040 static const char *render_mode;
1041 static const char *thread;
1042 static enum {
1043 MODE_BINNING = 0x1,
1044 MODE_GMEM = 0x2,
1045 MODE_BYPASS = 0x4,
1046 MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
1047 } enable_mask = MODE_ALL;
1048 static bool skip_ib2_enable_global;
1049 static bool skip_ib2_enable_local;
1050
1051 static void
print_mode(int level)1052 print_mode(int level)
1053 {
1054 if ((options->info->chip >= 5) && !quiet(2)) {
1055 printf("%smode: %s", levels[level], render_mode);
1056 if (thread)
1057 printf(":%s", thread);
1058 printf("\n");
1059 printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
1060 skip_ib2_enable_local);
1061 }
1062 }
1063
1064 static bool
skip_query(void)1065 skip_query(void)
1066 {
1067 switch (options->query_mode) {
1068 case QUERY_ALL:
1069 /* never skip: */
1070 return false;
1071 case QUERY_WRITTEN:
1072 for (int i = 0; i < options->nquery; i++) {
1073 uint32_t regbase = queryvals[i];
1074 if (!reg_written(regbase)) {
1075 continue;
1076 }
1077 if (reg_rewritten(regbase)) {
1078 return false;
1079 }
1080 }
1081 return true;
1082 case QUERY_DELTA:
1083 for (int i = 0; i < options->nquery; i++) {
1084 uint32_t regbase = queryvals[i];
1085 if (!reg_written(regbase)) {
1086 continue;
1087 }
1088 uint32_t lastval = reg_val(regbase);
1089 if (lastval != lastvals[regbase]) {
1090 return false;
1091 }
1092 }
1093 return true;
1094 }
1095 return true;
1096 }
1097
1098 static void
__do_query(const char * primtype,uint32_t num_indices)1099 __do_query(const char *primtype, uint32_t num_indices)
1100 {
1101 int n = 0;
1102
1103 if ((5 <= options->info->chip) && (options->info->chip < 7)) {
1104 uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1105 uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1106
1107 bin_x1 = scissor_tl & 0xffff;
1108 bin_y1 = scissor_tl >> 16;
1109 bin_x2 = scissor_br & 0xffff;
1110 bin_y2 = scissor_br >> 16;
1111 }
1112
1113 for (int i = 0; i < options->nquery; i++) {
1114 uint32_t regbase = queryvals[i];
1115 if (!reg_written(regbase))
1116 continue;
1117
1118 struct regacc r = regacc(NULL);
1119
1120 /* 64b regs require two successive 32b dwords: */
1121 for (int d = 0; d < 2; d++)
1122 if (regacc_push(&r, regbase + d, reg_val(regbase + d)))
1123 break;
1124
1125 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1126 bin_y1, bin_x2, bin_y2, num_indices);
1127 if (options->info->chip >= 5)
1128 printf("%s:", render_mode);
1129 if (thread)
1130 printf("%s:", thread);
1131 printf("\t%08"PRIx64, r.value);
1132 if (r.value != lastvals[regbase]) {
1133 printf("!");
1134 } else {
1135 printf(" ");
1136 }
1137 if (reg_rewritten(regbase)) {
1138 printf("+");
1139 } else {
1140 printf(" ");
1141 }
1142 dump_register_val(&r, 0);
1143 n++;
1144 }
1145
1146 if (n > 1)
1147 printf("\n");
1148 }
1149
1150 static void
do_query_compare(const char * primtype,uint32_t num_indices)1151 do_query_compare(const char *primtype, uint32_t num_indices)
1152 {
1153 unsigned saved_enable_mask = enable_mask;
1154 const char *saved_render_mode = render_mode;
1155
1156 /* in 'query-compare' mode, we want to see if the register is writtten
1157 * or changed in any mode:
1158 *
1159 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1160 * is written with different values in binning vs sysmem/gmem mode, as
1161 * we don't track previous values per-mode, but I think we can live with
1162 * that)
1163 */
1164 enable_mask = MODE_ALL;
1165
1166 clear_rewritten();
1167 load_all_groups(0);
1168
1169 if (!skip_query()) {
1170 /* dump binning pass values: */
1171 enable_mask = MODE_BINNING;
1172 render_mode = "BINNING";
1173 clear_rewritten();
1174 load_all_groups(0);
1175 __do_query(primtype, num_indices);
1176
1177 /* dump draw pass values: */
1178 enable_mask = MODE_GMEM | MODE_BYPASS;
1179 render_mode = "DRAW";
1180 clear_rewritten();
1181 load_all_groups(0);
1182 __do_query(primtype, num_indices);
1183
1184 printf("\n");
1185 }
1186
1187 enable_mask = saved_enable_mask;
1188 render_mode = saved_render_mode;
1189
1190 disable_all_groups();
1191 }
1192
1193 /* well, actually query and script..
1194 * NOTE: call this before dump_register_summary()
1195 */
1196 static void
do_query(const char * primtype,uint32_t num_indices)1197 do_query(const char *primtype, uint32_t num_indices)
1198 {
1199 if (script_draw)
1200 script_draw(primtype, num_indices);
1201
1202 if (options->query_compare) {
1203 do_query_compare(primtype, num_indices);
1204 return;
1205 }
1206
1207 if (skip_query())
1208 return;
1209
1210 __do_query(primtype, num_indices);
1211 }
1212
1213 static void
cp_im_loadi(uint32_t * dwords,uint32_t sizedwords,int level)1214 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1215 {
1216 uint32_t start = dwords[1] >> 16;
1217 uint32_t size = dwords[1] & 0xffff;
1218 const char *type = NULL, *ext = NULL;
1219 gl_shader_stage disasm_type;
1220
1221 switch (dwords[0]) {
1222 case 0:
1223 type = "vertex";
1224 ext = "vo";
1225 disasm_type = MESA_SHADER_VERTEX;
1226 break;
1227 case 1:
1228 type = "fragment";
1229 ext = "fo";
1230 disasm_type = MESA_SHADER_FRAGMENT;
1231 break;
1232 default:
1233 type = "<unknown>";
1234 disasm_type = 0;
1235 break;
1236 }
1237
1238 printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1239 size);
1240 disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1241
1242 /* dump raw shader: */
1243 if (ext)
1244 dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1245 }
1246
1247 static void
cp_wide_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)1248 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1249 {
1250 uint32_t reg = dwords[0] & 0xffff;
1251 struct regacc r = regacc(NULL);
1252 for (int i = 1; i < sizedwords; i++) {
1253 if (regacc_push(&r, reg, dwords[i]))
1254 dump_register(&r, level + 1);
1255 reg_set(reg, dwords[i]);
1256 reg++;
1257 }
1258 }
1259
1260 enum state_t {
1261 TEX_SAMP = 1,
1262 TEX_CONST,
1263 TEX_MIPADDR, /* a3xx only */
1264 SHADER_PROG,
1265 SHADER_CONST,
1266
1267 // image/ssbo state:
1268 SSBO_0,
1269 SSBO_1,
1270 SSBO_2,
1271
1272 UBO,
1273
1274 // unknown things, just to hexdumps:
1275 UNKNOWN_DWORDS,
1276 UNKNOWN_2DWORDS,
1277 UNKNOWN_4DWORDS,
1278 };
1279
1280 enum adreno_state_block {
1281 SB_VERT_TEX = 0,
1282 SB_VERT_MIPADDR = 1,
1283 SB_FRAG_TEX = 2,
1284 SB_FRAG_MIPADDR = 3,
1285 SB_VERT_SHADER = 4,
1286 SB_GEOM_SHADER = 5,
1287 SB_FRAG_SHADER = 6,
1288 SB_COMPUTE_SHADER = 7,
1289 };
1290
1291 /* TODO there is probably a clever way to let rnndec parse things so
1292 * we don't have to care about packet format differences across gens
1293 */
1294
1295 static void
a3xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1296 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1297 enum state_t *state, enum state_src_t *src)
1298 {
1299 unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1300 unsigned state_type = dwords[1] & 0x3;
1301 static const struct {
1302 gl_shader_stage stage;
1303 enum state_t state;
1304 } lookup[0xf][0x3] = {
1305 [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1306 [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1307 [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1308 [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1309 [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1310 [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1311 [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1312 [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1313 };
1314
1315 *stage = lookup[state_block_id][state_type].stage;
1316 *state = lookup[state_block_id][state_type].state;
1317 unsigned state_src = (dwords[0] >> 16) & 0x7;
1318 if (state_src == 0 /* SS_DIRECT */)
1319 *src = STATE_SRC_DIRECT;
1320 else
1321 *src = STATE_SRC_INDIRECT;
1322 }
1323
1324 static enum state_src_t
_get_state_src(unsigned dword0)1325 _get_state_src(unsigned dword0)
1326 {
1327 switch ((dword0 >> 16) & 0x3) {
1328 case 0: /* SS4_DIRECT / SS6_DIRECT */
1329 return STATE_SRC_DIRECT;
1330 case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1331 return STATE_SRC_INDIRECT;
1332 case 1: /* SS6_BINDLESS */
1333 return STATE_SRC_BINDLESS;
1334 default:
1335 return STATE_SRC_DIRECT;
1336 }
1337 }
1338
1339 static void
_get_state_type(unsigned state_block_id,unsigned state_type,gl_shader_stage * stage,enum state_t * state)1340 _get_state_type(unsigned state_block_id, unsigned state_type,
1341 gl_shader_stage *stage, enum state_t *state)
1342 {
1343 static const struct {
1344 gl_shader_stage stage;
1345 enum state_t state;
1346 } lookup[0x10][0x4] = {
1347 // SB4_VS_TEX:
1348 [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1349 [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1350 [0x0][2] = {MESA_SHADER_VERTEX, UBO},
1351 // SB4_HS_TEX:
1352 [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1353 [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1354 [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1355 // SB4_DS_TEX:
1356 [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1357 [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1358 [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1359 // SB4_GS_TEX:
1360 [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1361 [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1362 [0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1363 // SB4_FS_TEX:
1364 [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1365 [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1366 [0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1367 // SB4_CS_TEX:
1368 [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1369 [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1370 [0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1371 // SB4_VS_SHADER:
1372 [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1373 [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1374 [0x8][2] = {MESA_SHADER_VERTEX, UBO},
1375 // SB4_HS_SHADER
1376 [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1377 [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1378 [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1379 // SB4_DS_SHADER
1380 [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1381 [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1382 [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1383 // SB4_GS_SHADER
1384 [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1385 [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1386 [0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1387 // SB4_FS_SHADER:
1388 [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1389 [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1390 [0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1391 // SB4_CS_SHADER:
1392 [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1393 [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1394 [0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1395 [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1396 // SB4_SSBO (shared across all stages)
1397 [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1398 [0xe][1] = {0, SSBO_1},
1399 [0xe][2] = {0, SSBO_2},
1400 // SB4_CS_SSBO
1401 [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1402 [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1403 [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1404 // unknown things
1405 /* This looks like combined UBO state for 3d stages (a5xx and
1406 * before?? I think a6xx has UBO state per shader stage:
1407 */
1408 [0x6][2] = {0, UBO},
1409 [0x7][1] = {0, UNKNOWN_2DWORDS},
1410 };
1411
1412 *stage = lookup[state_block_id][state_type].stage;
1413 *state = lookup[state_block_id][state_type].state;
1414 }
1415
1416 static void
a4xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1417 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1418 enum state_t *state, enum state_src_t *src)
1419 {
1420 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1421 unsigned state_type = dwords[1] & 0x3;
1422 _get_state_type(state_block_id, state_type, stage, state);
1423 *src = _get_state_src(dwords[0]);
1424 }
1425
1426 static void
a6xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1427 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1428 enum state_t *state, enum state_src_t *src)
1429 {
1430 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1431 unsigned state_type = (dwords[0] >> 14) & 0x3;
1432 _get_state_type(state_block_id, state_type, stage, state);
1433 *src = _get_state_src(dwords[0]);
1434 }
1435
1436 static void
dump_tex_samp(uint32_t * texsamp,enum state_src_t src,int num_unit,int level)1437 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1438 {
1439 for (int i = 0; i < num_unit; i++) {
1440 /* work-around to reduce noise for opencl blob which always
1441 * writes the max # regardless of # of textures used
1442 */
1443 if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1444 break;
1445
1446 if (options->info->chip == 3) {
1447 dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1448 dump_hex(texsamp, 2, level + 1);
1449 texsamp += 2;
1450 } else if (options->info->chip == 4) {
1451 dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1452 dump_hex(texsamp, 2, level + 1);
1453 texsamp += 2;
1454 } else if (options->info->chip == 5) {
1455 dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1456 dump_hex(texsamp, 4, level + 1);
1457 texsamp += 4;
1458 } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1459 dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1460 dump_hex(texsamp, 4, level + 1);
1461 texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1462 }
1463 }
1464 }
1465
1466 static void
dump_tex_const(uint32_t * texconst,int num_unit,int level)1467 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1468 {
1469 for (int i = 0; i < num_unit; i++) {
1470 /* work-around to reduce noise for opencl blob which always
1471 * writes the max # regardless of # of textures used
1472 */
1473 if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1474 (texconst[2] == 0) && (texconst[3] == 0))
1475 break;
1476
1477 if (options->info->chip == 3) {
1478 dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1479 dump_hex(texconst, 4, level + 1);
1480 texconst += 4;
1481 } else if (options->info->chip == 4) {
1482 dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1483 if (options->dump_textures) {
1484 uint32_t addr = texconst[4] & ~0x1f;
1485 dump_gpuaddr(addr, level - 2);
1486 }
1487 dump_hex(texconst, 8, level + 1);
1488 texconst += 8;
1489 } else if (options->info->chip == 5) {
1490 dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1491 if (options->dump_textures) {
1492 uint64_t addr =
1493 (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1494 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1495 }
1496 dump_hex(texconst, 12, level + 1);
1497 texconst += 12;
1498 } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1499 dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1500 if (options->dump_textures) {
1501 uint64_t addr =
1502 (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1503 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1504 }
1505 dump_hex(texconst, 16, level + 1);
1506 texconst += 16;
1507 }
1508 }
1509 }
1510
1511 static void
cp_load_state(uint32_t * dwords,uint32_t sizedwords,int level)1512 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1513 {
1514 gl_shader_stage stage;
1515 enum state_t state;
1516 enum state_src_t src;
1517 uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1518 uint64_t ext_src_addr;
1519 void *contents;
1520 int i;
1521
1522 if (quiet(2) && !options->script)
1523 return;
1524
1525 if (options->info->chip >= 6)
1526 a6xx_get_state_type(dwords, &stage, &state, &src);
1527 else if (options->info->chip >= 4)
1528 a4xx_get_state_type(dwords, &stage, &state, &src);
1529 else
1530 a3xx_get_state_type(dwords, &stage, &state, &src);
1531
1532 switch (src) {
1533 case STATE_SRC_DIRECT:
1534 ext_src_addr = 0;
1535 break;
1536 case STATE_SRC_INDIRECT:
1537 if (is_64b()) {
1538 ext_src_addr = dwords[1] & 0xfffffffc;
1539 ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1540 } else {
1541 ext_src_addr = dwords[1] & 0xfffffffc;
1542 }
1543
1544 break;
1545 case STATE_SRC_BINDLESS: {
1546 const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1547 ? regbase("HLSQ_CS_BINDLESS_BASE[0].DESCRIPTOR")
1548 : regbase("HLSQ_BINDLESS_BASE[0].DESCRIPTOR");
1549
1550 if (is_64b()) {
1551 const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1552 ext_src_addr = reg_val(reg) & 0xfffffffc;
1553 ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1554 } else {
1555 const unsigned reg = base_reg + (dwords[1] >> 28);
1556 ext_src_addr = reg_val(reg) & 0xfffffffc;
1557 }
1558
1559 ext_src_addr += 4 * (dwords[1] & 0xffffff);
1560 break;
1561 }
1562 }
1563
1564 if (ext_src_addr)
1565 contents = hostptr(ext_src_addr);
1566 else
1567 contents = is_64b() ? dwords + 3 : dwords + 2;
1568
1569 if (!contents)
1570 return;
1571
1572 switch (state) {
1573 case SHADER_PROG: {
1574 const char *ext = NULL;
1575
1576 if (quiet(2))
1577 return;
1578
1579 if (options->info->chip >= 4)
1580 num_unit *= 16;
1581 else if (options->info->chip >= 3)
1582 num_unit *= 4;
1583
1584 /* shaders:
1585 *
1586 * note: num_unit seems to be # of instruction groups, where
1587 * an instruction group has 4 64bit instructions.
1588 */
1589 if (stage == MESA_SHADER_VERTEX) {
1590 ext = "vo3";
1591 } else if (stage == MESA_SHADER_GEOMETRY) {
1592 ext = "go3";
1593 } else if (stage == MESA_SHADER_COMPUTE) {
1594 ext = "co3";
1595 } else if (stage == MESA_SHADER_FRAGMENT) {
1596 ext = "fo3";
1597 }
1598
1599 if (contents)
1600 try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1601 options->info->chip * 100);
1602
1603 /* dump raw shader: */
1604 if (ext)
1605 dump_shader(ext, contents, num_unit * 2 * 4);
1606
1607 break;
1608 }
1609 case SHADER_CONST: {
1610 if (quiet(2))
1611 return;
1612
1613 /* uniforms/consts:
1614 *
1615 * note: num_unit seems to be # of pairs of dwords??
1616 */
1617
1618 if (options->info->chip >= 4)
1619 num_unit *= 2;
1620
1621 dump_float(contents, num_unit * 2, level + 1);
1622 dump_hex(contents, num_unit * 2, level + 1);
1623
1624 break;
1625 }
1626 case TEX_MIPADDR: {
1627 uint32_t *addrs = contents;
1628
1629 if (quiet(2))
1630 return;
1631
1632 /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1633 for (i = 0; i < num_unit; i++) {
1634 void *ptr = hostptr(addrs[i]);
1635 printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1636 if (options->dump_textures) {
1637 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1638 dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1639 }
1640 }
1641 break;
1642 }
1643 case TEX_SAMP: {
1644 dump_tex_samp(contents, src, num_unit, level);
1645 break;
1646 }
1647 case TEX_CONST: {
1648 dump_tex_const(contents, num_unit, level);
1649 break;
1650 }
1651 case SSBO_0: {
1652 uint32_t *ssboconst = (uint32_t *)contents;
1653
1654 for (i = 0; i < num_unit; i++) {
1655 int sz = 4;
1656 if (options->info->chip == 4) {
1657 dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1658 } else if (options->info->chip == 5) {
1659 dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1660 } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1661 sz = 16;
1662 dump_domain(ssboconst, 16, level + 2, "A6XX_TEX_CONST");
1663 }
1664 dump_hex(ssboconst, sz, level + 1);
1665 ssboconst += sz;
1666 }
1667 break;
1668 }
1669 case SSBO_1: {
1670 uint32_t *ssboconst = (uint32_t *)contents;
1671
1672 for (i = 0; i < num_unit; i++) {
1673 if (options->info->chip == 4)
1674 dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1675 else if (options->info->chip == 5)
1676 dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1677 dump_hex(ssboconst, 2, level + 1);
1678 ssboconst += 2;
1679 }
1680 break;
1681 }
1682 case SSBO_2: {
1683 uint32_t *ssboconst = (uint32_t *)contents;
1684
1685 for (i = 0; i < num_unit; i++) {
1686 /* TODO a4xx and a5xx might be same: */
1687 if (options->info->chip == 5) {
1688 dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1689 dump_hex(ssboconst, 2, level + 1);
1690 }
1691 if (options->dump_textures) {
1692 uint64_t addr =
1693 (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1694 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1695 }
1696 ssboconst += 2;
1697 }
1698 break;
1699 }
1700 case UBO: {
1701 uint32_t *uboconst = (uint32_t *)contents;
1702
1703 for (i = 0; i < num_unit; i++) {
1704 // TODO probably similar on a4xx..
1705 if (options->info->chip == 5)
1706 dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1707 else if (options->info->chip == 6)
1708 dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1709 dump_hex(uboconst, 2, level + 1);
1710 uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1711 }
1712 break;
1713 }
1714 case UNKNOWN_DWORDS: {
1715 if (quiet(2))
1716 return;
1717 dump_hex(contents, num_unit, level + 1);
1718 break;
1719 }
1720 case UNKNOWN_2DWORDS: {
1721 if (quiet(2))
1722 return;
1723 dump_hex(contents, num_unit * 2, level + 1);
1724 break;
1725 }
1726 case UNKNOWN_4DWORDS: {
1727 if (quiet(2))
1728 return;
1729 dump_hex(contents, num_unit * 4, level + 1);
1730 break;
1731 }
1732 default:
1733 if (quiet(2))
1734 return;
1735 /* hmm.. */
1736 dump_hex(contents, num_unit, level + 1);
1737 break;
1738 }
1739 }
1740
1741 static void
cp_set_bin(uint32_t * dwords,uint32_t sizedwords,int level)1742 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1743 {
1744 bin_x1 = dwords[1] & 0xffff;
1745 bin_y1 = dwords[1] >> 16;
1746 bin_x2 = dwords[2] & 0xffff;
1747 bin_y2 = dwords[2] >> 16;
1748 }
1749
1750 static void
dump_a2xx_tex_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1751 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1752 int level)
1753 {
1754 uint32_t w, h, p;
1755 uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1756 uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1757 static const char *filter[] = {
1758 "point",
1759 "bilinear",
1760 "bicubic",
1761 };
1762 static const char *clamp[] = {
1763 "wrap",
1764 "mirror",
1765 "clamp-last-texel",
1766 };
1767 static const char swiznames[] = "xyzw01??";
1768
1769 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1770
1771 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1772 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1773 */
1774 p = (dwords[0] >> 22) << 5;
1775 clamp_x = (dwords[0] >> 10) & 0x3;
1776 clamp_y = (dwords[0] >> 13) & 0x3;
1777 clamp_z = (dwords[0] >> 16) & 0x3;
1778
1779 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1780 * NearestClamp=1:OGL Mode
1781 */
1782 parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1783
1784 /* Width, Height, EndianSwap=0:None */
1785 w = (dwords[2] & 0x1fff) + 1;
1786 h = ((dwords[2] >> 13) & 0x1fff) + 1;
1787
1788 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1789 * Mip=2:BaseMap
1790 */
1791 mag = (dwords[3] >> 19) & 0x3;
1792 min = (dwords[3] >> 21) & 0x3;
1793 swiz = (dwords[3] >> 1) & 0xfff;
1794
1795 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1796 * Dim3d=0
1797 */
1798 // XXX
1799
1800 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1801 * Dim=1:2d, MipPacking=0
1802 */
1803 parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1804
1805 printf("%sset texture const %04x\n", levels[level], val);
1806 printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1807 clamp[clamp_y], clamp[clamp_z]);
1808 printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1809 filter[mag]);
1810 printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1811 swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1812 swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1813 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1814 levels[level + 1], gpuaddr, flags, w, h, p,
1815 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1816 printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1817 mip_flags);
1818 }
1819
1820 static void
dump_a2xx_shader_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1821 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1822 int level)
1823 {
1824 int i;
1825 printf("%sset shader const %04x\n", levels[level], val);
1826 for (i = 0; i < sizedwords;) {
1827 uint32_t gpuaddr, flags;
1828 parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1829 void *addr = hostptr(gpuaddr);
1830 if (addr) {
1831 const char *fmt =
1832 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1833 uint32_t size = dwords[i++];
1834 printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1835 size, fmt);
1836 // TODO maybe dump these as bytes instead of dwords?
1837 size = (size + 3) / 4; // for now convert to dwords
1838 dump_hex(addr, MIN2(size, 64), level + 1);
1839 if (size > MIN2(size, 64))
1840 printf("%s\t\t...\n", levels[level + 1]);
1841 dump_float(addr, MIN2(size, 64), level + 1);
1842 if (size > MIN2(size, 64))
1843 printf("%s\t\t...\n", levels[level + 1]);
1844 }
1845 }
1846 }
1847
1848 static void
cp_set_const(uint32_t * dwords,uint32_t sizedwords,int level)1849 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1850 {
1851 uint32_t val = dwords[0] & 0xffff;
1852 switch ((dwords[0] >> 16) & 0xf) {
1853 case 0x0:
1854 dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1855 break;
1856 case 0x1:
1857 /* need to figure out how const space is partitioned between
1858 * attributes, textures, etc..
1859 */
1860 if (val < 0x78) {
1861 dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1862 } else {
1863 dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1864 }
1865 break;
1866 case 0x2:
1867 printf("%sset bool const %04x\n", levels[level], val);
1868 break;
1869 case 0x3:
1870 printf("%sset loop const %04x\n", levels[level], val);
1871 break;
1872 case 0x4:
1873 val += 0x2000;
1874 if (dwords[0] & 0x80000000) {
1875 uint32_t srcreg = dwords[1];
1876 uint32_t dstval = dwords[2];
1877
1878 /* TODO: not sure what happens w/ payload != 2.. */
1879 assert(sizedwords == 3);
1880 assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1881
1882 /* note: rnn_regname uses a static buf so we can't do
1883 * two regname() calls for one printf..
1884 */
1885 printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1886 printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1887
1888 dstval += type0_reg_vals[srcreg];
1889
1890 dump_registers(val, &dstval, 1, level + 1);
1891 } else {
1892 dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1893 }
1894 break;
1895 }
1896 }
1897
1898 static void dump_register_summary(int level);
1899
1900 static void
cp_event_write(uint32_t * dwords,uint32_t sizedwords,int level)1901 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1902 {
1903 const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1904 printl(2, "%sevent %s\n", levels[level], name);
1905
1906 if (name && (options->info->chip > 5)) {
1907 char eventname[64];
1908 snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1909 if (!strcmp(name, "BLIT")) {
1910 do_query(eventname, 0);
1911 print_mode(level);
1912 dump_register_summary(level);
1913 }
1914 }
1915 }
1916
1917 static void
dump_register_summary(int level)1918 dump_register_summary(int level)
1919 {
1920 uint32_t i;
1921 bool saved_summary = summary;
1922 summary = false;
1923
1924 in_summary = true;
1925
1926 struct regacc r = regacc(NULL);
1927
1928 /* dump current state of registers: */
1929 printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1930
1931 bool changed = false;
1932 bool written = false;
1933
1934 for (i = 0; i < regcnt(); i++) {
1935 uint32_t regbase = i;
1936 uint32_t lastval = reg_val(regbase);
1937 /* skip registers that haven't been updated since last draw/blit: */
1938 if (!(options->allregs || reg_rewritten(regbase)))
1939 continue;
1940 if (!reg_written(regbase))
1941 continue;
1942 if (lastval != lastvals[regbase]) {
1943 changed |= true;
1944 lastvals[regbase] = lastval;
1945 }
1946 if (reg_rewritten(regbase)) {
1947 written |= true;
1948 }
1949 if (!quiet(2)) {
1950 if (regacc_push(&r, regbase, lastval)) {
1951 if (changed) {
1952 printl(2, "!");
1953 } else {
1954 printl(2, " ");
1955 }
1956 if (written) {
1957 printl(2, "+");
1958 } else {
1959 printl(2, " ");
1960 }
1961 printl(2, "\t%08"PRIx64, r.value);
1962 dump_register(&r, level);
1963
1964 changed = written = false;
1965 }
1966 }
1967 }
1968
1969 clear_rewritten();
1970
1971 in_summary = false;
1972
1973 draw_count++;
1974 summary = saved_summary;
1975 }
1976
1977 static uint32_t
draw_indx_common(uint32_t * dwords,int level)1978 draw_indx_common(uint32_t *dwords, int level)
1979 {
1980 uint32_t prim_type = dwords[1] & 0x1f;
1981 uint32_t source_select = (dwords[1] >> 6) & 0x3;
1982 uint32_t num_indices = dwords[2];
1983 const char *primtype;
1984
1985 primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1986
1987 do_query(primtype, num_indices);
1988
1989 printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
1990 printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype, prim_type);
1991 printl(2, "%ssource_select: %s (%d)\n", levels[level],
1992 rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
1993 printl(2, "%snum_indices: %d\n", levels[level], num_indices);
1994
1995 vertices += num_indices;
1996
1997 draws[ib]++;
1998
1999 return num_indices;
2000 }
2001
2002 enum pc_di_index_size {
2003 INDEX_SIZE_IGN = 0,
2004 INDEX_SIZE_16_BIT = 0,
2005 INDEX_SIZE_32_BIT = 1,
2006 INDEX_SIZE_8_BIT = 2,
2007 INDEX_SIZE_INVALID = 0,
2008 };
2009
2010 static void
cp_draw_indx(uint32_t * dwords,uint32_t sizedwords,int level)2011 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
2012 {
2013 uint32_t num_indices = draw_indx_common(dwords, level);
2014
2015 assert(!is_64b());
2016
2017 /* if we have an index buffer, dump that: */
2018 if (sizedwords == 5) {
2019 void *ptr = hostptr(dwords[3]);
2020 printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
2021 printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
2022 if (ptr) {
2023 enum pc_di_index_size size =
2024 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2025 if (!quiet(2)) {
2026 int i;
2027 printf("%sidxs: ", levels[level]);
2028 if (size == INDEX_SIZE_8_BIT) {
2029 uint8_t *idx = ptr;
2030 for (i = 0; i < dwords[4]; i++)
2031 printf(" %u", idx[i]);
2032 } else if (size == INDEX_SIZE_16_BIT) {
2033 uint16_t *idx = ptr;
2034 for (i = 0; i < dwords[4] / 2; i++)
2035 printf(" %u", idx[i]);
2036 } else if (size == INDEX_SIZE_32_BIT) {
2037 uint32_t *idx = ptr;
2038 for (i = 0; i < dwords[4] / 4; i++)
2039 printf(" %u", idx[i]);
2040 }
2041 printf("\n");
2042 dump_hex(ptr, dwords[4] / 4, level + 1);
2043 }
2044 }
2045 }
2046
2047 /* don't bother dumping registers for the dummy draw_indx's.. */
2048 if (num_indices > 0)
2049 dump_register_summary(level);
2050
2051 needs_wfi = true;
2052 }
2053
2054 static void
cp_draw_indx_2(uint32_t * dwords,uint32_t sizedwords,int level)2055 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
2056 {
2057 uint32_t num_indices = draw_indx_common(dwords, level);
2058 enum pc_di_index_size size =
2059 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2060 void *ptr = &dwords[3];
2061 int sz = 0;
2062
2063 assert(!is_64b());
2064
2065 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
2066 if (!quiet(2)) {
2067 int i;
2068 printf("%sidxs: ", levels[level]);
2069 if (size == INDEX_SIZE_8_BIT) {
2070 uint8_t *idx = ptr;
2071 for (i = 0; i < num_indices; i++)
2072 printf(" %u", idx[i]);
2073 sz = num_indices;
2074 } else if (size == INDEX_SIZE_16_BIT) {
2075 uint16_t *idx = ptr;
2076 for (i = 0; i < num_indices; i++)
2077 printf(" %u", idx[i]);
2078 sz = num_indices * 2;
2079 } else if (size == INDEX_SIZE_32_BIT) {
2080 uint32_t *idx = ptr;
2081 for (i = 0; i < num_indices; i++)
2082 printf(" %u", idx[i]);
2083 sz = num_indices * 4;
2084 }
2085 printf("\n");
2086 dump_hex(ptr, sz / 4, level + 1);
2087 }
2088
2089 /* don't bother dumping registers for the dummy draw_indx's.. */
2090 if (num_indices > 0)
2091 dump_register_summary(level);
2092 }
2093
2094 static void
cp_draw_indx_offset(uint32_t * dwords,uint32_t sizedwords,int level)2095 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
2096 {
2097 uint32_t num_indices = dwords[2];
2098 uint32_t prim_type = dwords[0] & 0x1f;
2099
2100 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
2101 print_mode(level);
2102
2103 /* don't bother dumping registers for the dummy draw_indx's.. */
2104 if (num_indices > 0)
2105 dump_register_summary(level);
2106 }
2107
2108 static void
cp_draw_indx_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2109 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2110 {
2111 uint32_t prim_type = dwords[0] & 0x1f;
2112 uint64_t addr;
2113
2114 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2115 print_mode(level);
2116
2117 if (is_64b())
2118 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2119 else
2120 addr = dwords[1];
2121 dump_gpuaddr_size(addr, level, 0x10, 2);
2122
2123 if (is_64b())
2124 addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2125 else
2126 addr = dwords[3];
2127 dump_gpuaddr_size(addr, level, 0x10, 2);
2128
2129 dump_register_summary(level);
2130 }
2131
2132 static void
cp_draw_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2133 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2134 {
2135 uint32_t prim_type = dwords[0] & 0x1f;
2136 uint64_t addr;
2137
2138 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2139 print_mode(level);
2140
2141 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2142 dump_gpuaddr_size(addr, level, 0x10, 2);
2143
2144 dump_register_summary(level);
2145 }
2146
2147 static void
cp_draw_indirect_multi(uint32_t * dwords,uint32_t sizedwords,int level)2148 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2149 {
2150 uint32_t prim_type = dwords[0] & 0x1f;
2151 uint32_t count = dwords[2];
2152
2153 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2154 print_mode(level);
2155
2156 struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2157 uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2158 uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2159 uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2160
2161 if (count_dword) {
2162 uint64_t count_addr =
2163 ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2164 uint32_t *buf = hostptr(count_addr);
2165
2166 /* Don't print more draws than this if we don't know the indirect
2167 * count. It's possible the user will give ~0 or some other large
2168 * value, expecting the GPU to fill in the draw count, and we don't
2169 * want to print a gazillion draws in that case:
2170 */
2171 const uint32_t max_draw_count = 0x100;
2172
2173 /* Assume the indirect count is garbage if it's larger than this
2174 * (quite large) value or 0. Hopefully this catches most cases.
2175 */
2176 const uint32_t max_indirect_draw_count = 0x10000;
2177
2178 if (buf) {
2179 printf("%sindirect count: %u\n", levels[level], *buf);
2180 if (*buf == 0 || *buf > max_indirect_draw_count) {
2181 /* garbage value */
2182 count = MIN2(count, max_draw_count);
2183 } else {
2184 /* not garbage */
2185 count = MIN2(count, *buf);
2186 }
2187 } else {
2188 count = MIN2(count, max_draw_count);
2189 }
2190 }
2191
2192 if (addr_dword && stride_dword) {
2193 uint64_t addr =
2194 ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2195 uint32_t stride = dwords[stride_dword];
2196
2197 for (unsigned i = 0; i < count; i++, addr += stride) {
2198 printf("%sdraw %d:\n", levels[level], i);
2199 dump_gpuaddr_size(addr, level, 0x10, 2);
2200 }
2201 }
2202
2203 dump_register_summary(level);
2204 }
2205
2206 static void
cp_draw_auto(uint32_t * dwords,uint32_t sizedwords,int level)2207 cp_draw_auto(uint32_t *dwords, uint32_t sizedwords, int level)
2208 {
2209 uint32_t prim_type = dwords[0] & 0x1f;
2210
2211 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2212 print_mode(level);
2213
2214 dump_register_summary(level);
2215 }
2216
2217 static void
cp_run_cl(uint32_t * dwords,uint32_t sizedwords,int level)2218 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2219 {
2220 do_query("COMPUTE", 1);
2221 dump_register_summary(level);
2222 }
2223
2224 static void
print_nop_tail_string(uint32_t * dwords,uint32_t sizedwords)2225 print_nop_tail_string(uint32_t *dwords, uint32_t sizedwords)
2226 {
2227 const char *buf = (void *)dwords;
2228 for (int i = 0; i < 4 * sizedwords; i++) {
2229 if (buf[i] == '\0')
2230 break;
2231 if (isascii(buf[i]))
2232 printf("%c", buf[i]);
2233 }
2234 }
2235
2236 static void
cp_nop(uint32_t * dwords,uint32_t sizedwords,int level)2237 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2238 {
2239 if (quiet(3))
2240 return;
2241
2242 /* NOP is used to encode special debug strings by Turnip.
2243 * See tu_cs_emit_debug_magic_strv(...)
2244 */
2245 static int scope_level = 0;
2246 uint32_t identifier = dwords[0];
2247 bool is_special = false;
2248 if (identifier == CP_NOP_MESG) {
2249 printf("### ");
2250 is_special = true;
2251 } else if (identifier == CP_NOP_BEGN) {
2252 printf(">>> #%d: ", ++scope_level);
2253 is_special = true;
2254 } else if (identifier == CP_NOP_END) {
2255 printf("<<< #%d: ", scope_level--);
2256 is_special = true;
2257 }
2258
2259 if (is_special) {
2260 if (sizedwords > 1) {
2261 print_nop_tail_string(dwords + 1, sizedwords - 1);
2262 printf("\n");
2263 }
2264 return;
2265 }
2266
2267 // blob doesn't use CP_NOP for string_marker but it does
2268 // use it for things that end up looking like, but aren't
2269 // ascii chars:
2270 if (!options->decode_markers)
2271 return;
2272
2273 print_nop_tail_string(dwords, sizedwords);
2274 printf("\n");
2275 }
2276
2277 uint32_t *
parse_cp_indirect(uint32_t * dwords,uint32_t sizedwords,uint64_t * ibaddr,uint32_t * ibsize)2278 parse_cp_indirect(uint32_t *dwords, uint32_t sizedwords,
2279 uint64_t *ibaddr, uint32_t *ibsize)
2280 {
2281 if (is_64b()) {
2282 assert(sizedwords == 3);
2283
2284 /* a5xx+.. high 32b of gpu addr, then size: */
2285 *ibaddr = dwords[0];
2286 *ibaddr |= ((uint64_t)dwords[1]) << 32;
2287 *ibsize = dwords[2];
2288
2289 return dwords + 3;
2290 } else {
2291 assert(sizedwords == 2);
2292
2293 *ibaddr = dwords[0];
2294 *ibsize = dwords[1];
2295
2296 return dwords + 2;
2297 }
2298 }
2299
2300 static void
cp_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2301 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2302 {
2303 /* traverse indirect buffers */
2304 uint64_t ibaddr;
2305 uint32_t ibsize;
2306 uint32_t *ptr = NULL;
2307
2308 dwords = parse_cp_indirect(dwords, sizedwords, &ibaddr, &ibsize);
2309
2310 if (!quiet(3)) {
2311 if (is_64b()) {
2312 printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2313 } else {
2314 printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2315 }
2316 printf("%sibsize:%08x\n", levels[level], ibsize);
2317 }
2318
2319 if (options->once && has_dumped(ibaddr, enable_mask))
2320 return;
2321
2322 /* 'query-compare' mode implies 'once' mode, although we need only to
2323 * process the cmdstream for *any* enable_mask mode, since we are
2324 * comparing binning vs draw reg values at the same time, ie. it is
2325 * not useful to process the same draw in both binning and draw pass.
2326 */
2327 if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2328 return;
2329
2330 /* map gpuaddr back to hostptr: */
2331 ptr = hostptr(ibaddr);
2332
2333 if (ptr) {
2334 /* If the GPU hung within the target IB, the trigger point will be
2335 * just after the current CP_INDIRECT_BUFFER. Because the IB is
2336 * executed but never returns. Account for this by checking if
2337 * the IB returned:
2338 */
2339 highlight_gpuaddr(gpuaddr(dwords));
2340
2341 ib++;
2342 ibs[ib].base = ibaddr;
2343 ibs[ib].size = ibsize;
2344
2345 dump_commands(ptr, ibsize, level);
2346 ib--;
2347 } else {
2348 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2349 }
2350 }
2351
2352 static void
cp_start_bin(uint32_t * dwords,uint32_t sizedwords,int level)2353 cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
2354 {
2355 uint64_t ibaddr;
2356 uint32_t ibsize;
2357 uint32_t loopcount;
2358 uint32_t *ptr = NULL;
2359
2360 loopcount = dwords[0];
2361 ibaddr = dwords[1];
2362 ibaddr |= ((uint64_t)dwords[2]) << 32;
2363 ibsize = dwords[3];
2364
2365 /* map gpuaddr back to hostptr: */
2366 ptr = hostptr(ibaddr);
2367
2368 if (ptr) {
2369 /* If the GPU hung within the target IB, the trigger point will be
2370 * just after the current CP_START_BIN. Because the IB is
2371 * executed but never returns. Account for this by checking if
2372 * the IB returned:
2373 */
2374 highlight_gpuaddr(gpuaddr(&dwords[5]));
2375
2376 /* TODO: we should duplicate the body of the loop after each bin, so
2377 * that draws get the correct state. We should also figure out if there
2378 * are any registers that can tell us what bin we're in when we hang so
2379 * that crashdec points to the right place.
2380 */
2381 ib++;
2382 for (uint32_t i = 0; i < loopcount; i++) {
2383 ibs[ib].base = ibaddr;
2384 ibs[ib].size = ibsize;
2385 printl(3, "%sbin %u\n", levels[level], i);
2386 dump_commands(ptr, ibsize, level);
2387 ibaddr += ibsize;
2388 ptr += ibsize;
2389 }
2390 ib--;
2391 } else {
2392 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2393 }
2394 }
2395
2396 static void
cp_fixed_stride_draw_table(uint32_t * dwords,uint32_t sizedwords,int level)2397 cp_fixed_stride_draw_table(uint32_t *dwords, uint32_t sizedwords, int level)
2398 {
2399 uint64_t ibaddr;
2400 uint32_t ibsize;
2401 uint32_t loopcount;
2402 uint32_t *ptr = NULL;
2403
2404 loopcount = dwords[3];
2405 ibaddr = dwords[0];
2406 ibaddr |= ((uint64_t)dwords[1]) << 32;
2407 ibsize = dwords[2] >> 20;
2408
2409 /* map gpuaddr back to hostptr: */
2410 ptr = hostptr(ibaddr);
2411
2412 if (ptr) {
2413 /* If the GPU hung within the target IB, the trigger point will be
2414 * just after the current CP_START_BIN. Because the IB is
2415 * executed but never returns. Account for this by checking if
2416 * the IB returned:
2417 */
2418 highlight_gpuaddr(gpuaddr(&dwords[5]));
2419
2420 ib++;
2421 for (uint32_t i = 0; i < loopcount; i++) {
2422 ibs[ib].base = ibaddr;
2423 ibs[ib].size = ibsize;
2424 printl(3, "%sdraw %u\n", levels[level], i);
2425 dump_commands(ptr, ibsize, level);
2426 ibaddr += ibsize;
2427 ptr += ibsize;
2428 }
2429 ib--;
2430 } else {
2431 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2432 }
2433 }
2434
2435 static void
cp_wfi(uint32_t * dwords,uint32_t sizedwords,int level)2436 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2437 {
2438 needs_wfi = false;
2439 }
2440
2441 static void
cp_mem_write(uint32_t * dwords,uint32_t sizedwords,int level)2442 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2443 {
2444 if (quiet(2))
2445 return;
2446
2447 if (is_64b()) {
2448 uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2449 printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2450 dump_hex(&dwords[2], sizedwords - 2, level + 1);
2451
2452 if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2453 dump_commands(&dwords[2], sizedwords - 2, level + 1);
2454 } else {
2455 uint32_t gpuaddr = dwords[0];
2456 printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2457 dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2458 }
2459 }
2460
2461 static void
cp_rmw(uint32_t * dwords,uint32_t sizedwords,int level)2462 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2463 {
2464 uint32_t val = dwords[0] & 0xffff;
2465 uint32_t and = dwords[1];
2466 uint32_t or = dwords[2];
2467 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2468 and, or);
2469 if (needs_wfi)
2470 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2471 and, or);
2472 reg_set(val, (reg_val(val) & and) | or);
2473 }
2474
2475 static void
cp_reg_mem(uint32_t * dwords,uint32_t sizedwords,int level)2476 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2477 {
2478 uint32_t val = dwords[0] & 0xffff;
2479 printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2480
2481 if (quiet(2))
2482 return;
2483
2484 uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2485 printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2486 void *ptr = hostptr(gpuaddr);
2487 if (ptr) {
2488 uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2489 dump_hex(ptr, cnt, level + 1);
2490 }
2491 }
2492
2493 struct draw_state {
2494 uint16_t enable_mask;
2495 uint16_t flags;
2496 uint32_t count;
2497 uint64_t addr;
2498 };
2499
2500 struct draw_state state[32];
2501
2502 #define FLAG_DIRTY 0x1
2503 #define FLAG_DISABLE 0x2
2504 #define FLAG_DISABLE_ALL_GROUPS 0x4
2505 #define FLAG_LOAD_IMMED 0x8
2506
2507 static int draw_mode;
2508
2509 static void
disable_group(unsigned group_id)2510 disable_group(unsigned group_id)
2511 {
2512 struct draw_state *ds = &state[group_id];
2513 memset(ds, 0, sizeof(*ds));
2514 }
2515
2516 static void
disable_all_groups(void)2517 disable_all_groups(void)
2518 {
2519 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2520 disable_group(i);
2521 }
2522
2523 static void
load_group(unsigned group_id,int level)2524 load_group(unsigned group_id, int level)
2525 {
2526 struct draw_state *ds = &state[group_id];
2527
2528 if (!ds->count)
2529 return;
2530
2531 printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2532 printl(2, "%scount: %d\n", levels[level], ds->count);
2533 printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2534 printl(2, "%sflags: %x\n", levels[level], ds->flags);
2535
2536 if (options->info->chip >= 6) {
2537 printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2538
2539 if (!(ds->enable_mask & enable_mask)) {
2540 printl(2, "%s\tskipped!\n\n", levels[level]);
2541 return;
2542 }
2543 }
2544
2545 void *ptr = hostptr(ds->addr);
2546 if (ptr) {
2547 if (!quiet(2))
2548 dump_hex(ptr, ds->count, level + 1);
2549
2550 ib++;
2551 dump_commands(ptr, ds->count, level + 1);
2552 ib--;
2553 }
2554 }
2555
2556 static void
load_all_groups(int level)2557 load_all_groups(int level)
2558 {
2559 /* sanity check, we should never recursively hit recursion here, and if
2560 * we do bad things happen:
2561 */
2562 static bool loading_groups = false;
2563 if (loading_groups) {
2564 printf("ERROR: nothing in draw state should trigger recursively loading "
2565 "groups!\n");
2566 return;
2567 }
2568 loading_groups = true;
2569 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2570 load_group(i, level);
2571 loading_groups = false;
2572
2573 /* in 'query-compare' mode, defer disabling all groups until we have a
2574 * chance to process the query:
2575 */
2576 if (!options->query_compare)
2577 disable_all_groups();
2578 }
2579
2580 static void
cp_set_draw_state(uint32_t * dwords,uint32_t sizedwords,int level)2581 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2582 {
2583 uint32_t i;
2584
2585 for (i = 0; i < sizedwords;) {
2586 struct draw_state *ds;
2587 uint32_t count = dwords[i] & 0xffff;
2588 uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2589 uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2590 uint32_t flags = (dwords[i] >> 16) & 0xf;
2591 uint64_t addr;
2592
2593 if (is_64b()) {
2594 addr = dwords[i + 1];
2595 addr |= ((uint64_t)dwords[i + 2]) << 32;
2596 i += 3;
2597 } else {
2598 addr = dwords[i + 1];
2599 i += 2;
2600 }
2601
2602 if (flags & FLAG_DISABLE_ALL_GROUPS) {
2603 disable_all_groups();
2604 continue;
2605 }
2606
2607 if (flags & FLAG_DISABLE) {
2608 disable_group(group_id);
2609 continue;
2610 }
2611
2612 assert(group_id < ARRAY_SIZE(state));
2613 disable_group(group_id);
2614
2615 ds = &state[group_id];
2616
2617 ds->enable_mask = enable_mask;
2618 ds->flags = flags;
2619 ds->count = count;
2620 ds->addr = addr;
2621
2622 if (flags & FLAG_LOAD_IMMED) {
2623 load_group(group_id, level);
2624 disable_group(group_id);
2625 }
2626 }
2627 }
2628
2629 static void
cp_set_mode(uint32_t * dwords,uint32_t sizedwords,int level)2630 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2631 {
2632 draw_mode = dwords[0];
2633 }
2634
2635 /* execute compute shader */
2636 static void
cp_exec_cs(uint32_t * dwords,uint32_t sizedwords,int level)2637 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2638 {
2639 do_query("compute", 0);
2640 dump_register_summary(level);
2641 }
2642
2643 static void
cp_exec_cs_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2644 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2645 {
2646 uint64_t addr;
2647
2648 if (is_64b()) {
2649 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2650 } else {
2651 addr = dwords[1];
2652 }
2653
2654 printl(3, "%saddr: %016llx\n", levels[level], addr);
2655 dump_gpuaddr_size(addr, level, 0x10, 2);
2656
2657 do_query("compute", 0);
2658 dump_register_summary(level);
2659 }
2660
2661 static void
cp_set_marker(uint32_t * dwords,uint32_t sizedwords,int level)2662 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2663 {
2664 uint32_t val = dwords[0] & 0xf;
2665 const char *mode = rnn_enumname(rnn, "a6xx_marker", val);
2666
2667 if (!mode) {
2668 static char buf[8];
2669 sprintf(buf, "0x%x", val);
2670 render_mode = buf;
2671 return;
2672 }
2673
2674 render_mode = mode;
2675
2676 if (!strcmp(render_mode, "RM6_BINNING")) {
2677 enable_mask = MODE_BINNING;
2678 } else if (!strcmp(render_mode, "RM6_GMEM")) {
2679 enable_mask = MODE_GMEM;
2680 } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2681 enable_mask = MODE_BYPASS;
2682 }
2683 }
2684
2685 static void
cp_set_thread_control(uint32_t * dwords,uint32_t sizedwords,int level)2686 cp_set_thread_control(uint32_t *dwords, uint32_t sizedwords, int level)
2687 {
2688 uint32_t val = dwords[0] & 0x3;
2689 thread = rnn_enumname(rnn, "cp_thread", val);
2690 }
2691
2692 static void
cp_set_render_mode(uint32_t * dwords,uint32_t sizedwords,int level)2693 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2694 {
2695 uint64_t addr;
2696 uint32_t *ptr, len;
2697
2698 assert(is_64b());
2699
2700 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2701 * not sure if this can come in different sizes.
2702 *
2703 * First ptr doesn't seem to be cmdstream, second one does.
2704 *
2705 * Comment from downstream kernel:
2706 *
2707 * SRM -- set render mode (ex binning, direct render etc)
2708 * SRM is set by UMD usually at start of IB to tell CP the type of
2709 * preemption.
2710 * KMD needs to set SRM to NULL to indicate CP that rendering is
2711 * done by IB.
2712 * ------------------------------------------------------------------
2713 *
2714 * Seems to always be one of these two:
2715 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2716 * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2717 * 001c2000 00000000
2718 *
2719 */
2720
2721 assert(options->info->chip >= 5);
2722
2723 render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2724
2725 if (sizedwords == 1)
2726 return;
2727
2728 addr = dwords[1];
2729 addr |= ((uint64_t)dwords[2]) << 32;
2730
2731 mode = dwords[3];
2732
2733 dump_gpuaddr(addr, level + 1);
2734
2735 if (sizedwords == 5)
2736 return;
2737
2738 assert(sizedwords == 8);
2739
2740 len = dwords[5];
2741 addr = dwords[6];
2742 addr |= ((uint64_t)dwords[7]) << 32;
2743
2744 printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2745 printl(3, "%slen: 0x%x\n", levels[level], len);
2746
2747 ptr = hostptr(addr);
2748
2749 if (ptr) {
2750 if (!quiet(2)) {
2751 ib++;
2752 dump_commands(ptr, len, level + 1);
2753 ib--;
2754 dump_hex(ptr, len, level + 1);
2755 }
2756 }
2757 }
2758
2759 static void
cp_compute_checkpoint(uint32_t * dwords,uint32_t sizedwords,int level)2760 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2761 {
2762 uint64_t addr;
2763 uint32_t *ptr, len;
2764
2765 assert(is_64b());
2766 assert(options->info->chip >= 5);
2767
2768 if (sizedwords == 8) {
2769 addr = dwords[5];
2770 addr |= ((uint64_t)dwords[6]) << 32;
2771 len = dwords[7];
2772 } else {
2773 addr = dwords[5];
2774 addr |= ((uint64_t)dwords[6]) << 32;
2775 len = dwords[4];
2776 }
2777
2778 printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2779 printl(3, "%slen: 0x%x\n", levels[level], len);
2780
2781 ptr = hostptr(addr);
2782
2783 if (ptr) {
2784 if (!quiet(2)) {
2785 ib++;
2786 dump_commands(ptr, len, level + 1);
2787 ib--;
2788 dump_hex(ptr, len, level + 1);
2789 }
2790 }
2791 }
2792
2793 static void
cp_blit(uint32_t * dwords,uint32_t sizedwords,int level)2794 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2795 {
2796 do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2797 print_mode(level);
2798 dump_register_summary(level);
2799 }
2800
2801 static void
cp_context_reg_bunch(uint32_t * dwords,uint32_t sizedwords,int level)2802 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2803 {
2804 int i;
2805
2806 /* NOTE: seems to write same reg multiple times.. not sure if different parts
2807 * of these are triggered by the FLUSH_SO_n events?? (if that is what they
2808 * actually are?)
2809 */
2810 bool saved_summary = summary;
2811 summary = false;
2812
2813 struct regacc r = regacc(NULL);
2814
2815 for (i = 0; i < sizedwords; i += 2) {
2816 if (regacc_push(&r, dwords[i + 0], dwords[i + 1]))
2817 dump_register(&r, level + 1);
2818 reg_set(dwords[i + 0], dwords[i + 1]);
2819 }
2820
2821 summary = saved_summary;
2822 }
2823
2824 /* Looks similar to CP_CONTEXT_REG_BUNCH, but not quite the same...
2825 * discarding first two dwords??
2826 *
2827 * CP_CONTEXT_REG_BUNCH:
2828 * 0221: 9c1ff606 (rep)(xmov3)mov $usraddr, $data
2829 * ; mov $data, $data
2830 * ; mov $usraddr, $data
2831 * ; mov $data, $data
2832 * 0222: d8000000 waitin
2833 * 0223: 981f0806 mov $01, $data
2834 *
2835 * CP_UNK5D:
2836 * 0224: 981f0006 mov $00, $data
2837 * 0225: 981f0006 mov $00, $data
2838 * 0226: 9c1ff206 (rep)(xmov1)mov $usraddr, $data
2839 * ; mov $data, $data
2840 * 0227: d8000000 waitin
2841 * 0228: 981f0806 mov $01, $data
2842 *
2843 */
2844 static void
cp_context_reg_bunch2(uint32_t * dwords,uint32_t sizedwords,int level)2845 cp_context_reg_bunch2(uint32_t *dwords, uint32_t sizedwords, int level)
2846 {
2847 dwords += 2;
2848 sizedwords -= 2;
2849 cp_context_reg_bunch(dwords, sizedwords, level);
2850 }
2851
2852 static void
cp_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)2853 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2854 {
2855 uint32_t reg = dwords[1] & 0xffff;
2856
2857 struct regacc r = regacc(NULL);
2858 if (regacc_push(&r, reg, dwords[2]))
2859 dump_register(&r, level + 1);
2860 reg_set(reg, dwords[2]);
2861 }
2862
2863 static void
cp_set_ctxswitch_ib(uint32_t * dwords,uint32_t sizedwords,int level)2864 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2865 {
2866 uint64_t addr;
2867 uint32_t size = dwords[2] & 0xffff;
2868 void *ptr;
2869
2870 addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2871
2872 if (!quiet(3)) {
2873 printf("%saddr=%" PRIx64 "\n", levels[level], addr);
2874 }
2875
2876 ptr = hostptr(addr);
2877 if (ptr) {
2878 dump_commands(ptr, size, level + 1);
2879 }
2880 }
2881
2882 static void
cp_skip_ib2_enable_global(uint32_t * dwords,uint32_t sizedwords,int level)2883 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2884 {
2885 skip_ib2_enable_global = dwords[0];
2886 }
2887
2888 static void
cp_skip_ib2_enable_local(uint32_t * dwords,uint32_t sizedwords,int level)2889 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2890 {
2891 skip_ib2_enable_local = dwords[0];
2892 }
2893
2894 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2895 static const struct type3_op {
2896 const char *name;
2897 void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2898 struct {
2899 bool load_all_groups;
2900 } options;
2901 } type3_op[] = {
2902 CP(NOP, cp_nop),
2903 CP(INDIRECT_BUFFER, cp_indirect),
2904 CP(INDIRECT_BUFFER_PFD, cp_indirect),
2905 CP(WAIT_FOR_IDLE, cp_wfi),
2906 CP(REG_RMW, cp_rmw),
2907 CP(REG_TO_MEM, cp_reg_mem),
2908 CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2909 CP(MEM_WRITE, cp_mem_write),
2910 CP(EVENT_WRITE, cp_event_write),
2911 CP(RUN_OPENCL, cp_run_cl),
2912 CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2913 CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2914 CP(SET_CONSTANT, cp_set_const),
2915 CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2916 CP(WIDE_REG_WRITE, cp_wide_reg_write),
2917
2918 /* for a3xx */
2919 CP(LOAD_STATE, cp_load_state),
2920 CP(SET_BIN, cp_set_bin),
2921
2922 /* for a4xx */
2923 CP(LOAD_STATE4, cp_load_state),
2924 CP(SET_DRAW_STATE, cp_set_draw_state),
2925 CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2926 CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2927 CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2928
2929 /* for a5xx */
2930 CP(SET_RENDER_MODE, cp_set_render_mode),
2931 CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2932 CP(BLIT, cp_blit),
2933 CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2934 CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2935 CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2936 CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2937 CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2938 CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2939
2940 /* for a6xx */
2941 CP(LOAD_STATE6_GEOM, cp_load_state),
2942 CP(LOAD_STATE6_FRAG, cp_load_state),
2943 CP(LOAD_STATE6, cp_load_state),
2944 CP(SET_MODE, cp_set_mode),
2945 CP(SET_MARKER, cp_set_marker),
2946 CP(REG_WRITE, cp_reg_write),
2947 CP(DRAW_AUTO, cp_draw_auto, {.load_all_groups = true}),
2948
2949 CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2950
2951 CP(START_BIN, cp_start_bin),
2952
2953 CP(FIXED_STRIDE_DRAW_TABLE, cp_fixed_stride_draw_table),
2954
2955 /* for a7xx */
2956 CP(THREAD_CONTROL, cp_set_thread_control),
2957 CP(CONTEXT_REG_BUNCH2, cp_context_reg_bunch2),
2958 };
2959
2960 static void
noop_fxn(uint32_t * dwords,uint32_t sizedwords,int level)2961 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2962 {
2963 }
2964
2965 static const struct type3_op *
get_type3_op(unsigned opc)2966 get_type3_op(unsigned opc)
2967 {
2968 static const struct type3_op dummy_op = {
2969 .fxn = noop_fxn,
2970 };
2971 const char *name = pktname(opc);
2972
2973 if (!name)
2974 return &dummy_op;
2975
2976 for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2977 if (!strcmp(name, type3_op[i].name))
2978 return &type3_op[i];
2979
2980 return &dummy_op;
2981 }
2982
2983 void
dump_commands(uint32_t * dwords,uint32_t sizedwords,int level)2984 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2985 {
2986 int dwords_left = sizedwords;
2987 uint32_t count = 0; /* dword count including packet header */
2988 uint32_t val;
2989
2990 // assert(dwords);
2991 if (!dwords) {
2992 printf("NULL cmd buffer!\n");
2993 return;
2994 }
2995
2996 assert(ib < ARRAY_SIZE(draws));
2997 draws[ib] = 0;
2998
2999 while (dwords_left > 0) {
3000
3001 current_draw_count = draw_count;
3002
3003 /* hack, this looks like a -1 underflow, in some versions
3004 * when it tries to write zero registers via pkt0
3005 */
3006 // if ((dwords[0] >> 16) == 0xffff)
3007 // goto skip;
3008
3009 if (pkt_is_regwrite(dwords[0], &val, &count)) {
3010 assert(val < regcnt());
3011 printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
3012 val);
3013 dump_registers(val, dwords + 1, count - 1, level + 2);
3014 if (!quiet(3))
3015 dump_hex(dwords, count, level + 1);
3016 #if 0
3017 } else if (pkt_is_type1(dwords[0])) {
3018 count = 3;
3019 val = dwords[0] & 0xfff;
3020 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3021 dump_registers(val, dwords+1, 1, level+2);
3022 val = (dwords[0] >> 12) & 0xfff;
3023 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3024 dump_registers(val, dwords+2, 1, level+2);
3025 if (!quiet(3))
3026 dump_hex(dwords, count, level+1);
3027 #endif
3028 } else if (pkt_is_opcode(dwords[0], &val, &count)) {
3029 const struct type3_op *op = get_type3_op(val);
3030 if (op->options.load_all_groups)
3031 load_all_groups(level + 1);
3032 const char *name = pktname(val);
3033 if (!quiet(2)) {
3034 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
3035 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
3036 count);
3037 }
3038 if (name) {
3039 /* special hack for two packets that decode the same way
3040 * on a6xx:
3041 */
3042 if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
3043 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
3044 name = "CP_LOAD_STATE6";
3045 dump_domain(dwords + 1, count - 1, level + 2, name);
3046 }
3047 op->fxn(dwords + 1, count - 1, level + 1);
3048 if (!quiet(2))
3049 dump_hex(dwords, count, level + 1);
3050 } else if (pkt_is_type2(dwords[0])) {
3051 printl(3, "%snop\n", levels[level + 1]);
3052 count = 1;
3053 } else {
3054 printf("bad type! %08x\n", dwords[0]);
3055 /* for 5xx+ we can do a passable job of looking for start of next valid
3056 * packet: */
3057 if (options->info->chip >= 5) {
3058 count = find_next_packet(dwords, dwords_left);
3059 } else {
3060 return;
3061 }
3062 }
3063
3064 dwords += count;
3065 dwords_left -= count;
3066 }
3067
3068 if (dwords_left < 0)
3069 printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
3070 }
3071