• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012 Rob Clark <robdclark@gmail.com>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <assert.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <inttypes.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <stdbool.h>
15 #include <stdint.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <unistd.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <sys/wait.h>
23 
24 #include "freedreno_pm4.h"
25 
26 #include "buffers.h"
27 #include "cffdec.h"
28 #include "disasm.h"
29 #include "redump.h"
30 #include "rnnutil.h"
31 #include "script.h"
32 
33 /* ************************************************************************* */
34 /* originally based on kernel recovery dump code: */
35 
36 static const struct cffdec_options *options;
37 
38 static bool needs_wfi = false;
39 static bool summary = false;
40 static bool in_summary = false;
41 static int vertices;
42 
43 static inline unsigned
regcnt(void)44 regcnt(void)
45 {
46    if (options->info->chip >= 5)
47       return 0x3ffff;
48    else
49       return 0x7fff;
50 }
51 
52 static int
is_64b(void)53 is_64b(void)
54 {
55    return options->info->chip >= 5;
56 }
57 
58 static int draws[4];
59 static struct {
60    uint64_t base;
61    uint32_t size; /* in dwords */
62    /* Generally cmdstream consists of multiple IB calls to different
63     * buffers, which are themselves often re-used for each tile.  The
64     * triggered flag serves two purposes to help make it more clear
65     * what part of the cmdstream is before vs after the the GPU hang:
66     *
67     * 1) if in IB2 we are passed the point within the IB2 buffer where
68     *    the GPU hung, but IB1 is not passed the point within its
69     *    buffer where the GPU had hung, then we know the GPU hang
70     *    happens on a future use of that IB2 buffer.
71     *
72     * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
73     *    hung, but we've already passed the trigger point at the same
74     *    IB level, we know that we are passed the point where the GPU
75     *    had hung.
76     *
77     * So this is a one way switch, false->true.  And a higher #'d
78     * IB level isn't considered triggered unless the lower #'d IB
79     * level is.
80     */
81    bool triggered : 1;
82    bool base_seen : 1;
83 } ibs[4];
84 static int ib;
85 
86 static int draw_count;
87 static int current_draw_count;
88 
89 /* query mode.. to handle symbolic register name queries, we need to
90  * defer parsing query string until after gpu_id is know and rnn db
91  * loaded:
92  */
93 static int *queryvals;
94 
95 static bool
quiet(int lvl)96 quiet(int lvl)
97 {
98    if ((options->draw_filter != -1) &&
99        (options->draw_filter != current_draw_count))
100       return true;
101    if ((lvl >= 3) && (summary || options->querystrs || options->script))
102       return true;
103    if ((lvl >= 2) && (options->querystrs || options->script))
104       return true;
105    return false;
106 }
107 
108 void
printl(int lvl,const char * fmt,...)109 printl(int lvl, const char *fmt, ...)
110 {
111    va_list args;
112    if (quiet(lvl))
113       return;
114    va_start(args, fmt);
115    vprintf(fmt, args);
116    va_end(args);
117 }
118 
119 static const char *levels[] = {
120    "\t",
121    "\t\t",
122    "\t\t\t",
123    "\t\t\t\t",
124    "\t\t\t\t\t",
125    "\t\t\t\t\t\t",
126    "\t\t\t\t\t\t\t",
127    "\t\t\t\t\t\t\t\t",
128    "\t\t\t\t\t\t\t\t\t",
129    "x",
130    "x",
131    "x",
132    "x",
133    "x",
134    "x",
135 };
136 
137 enum state_src_t {
138    STATE_SRC_DIRECT,
139    STATE_SRC_INDIRECT,
140    STATE_SRC_BINDLESS,
141 };
142 
143 /* SDS (CP_SET_DRAW_STATE) helpers: */
144 static void load_all_groups(int level);
145 static void disable_all_groups(void);
146 
147 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
148                           int level);
149 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
150 
151 static bool
highlight_gpuaddr(uint64_t gpuaddr)152 highlight_gpuaddr(uint64_t gpuaddr)
153 {
154    if (!options->ibs[ib].base)
155       return false;
156 
157    if ((ib > 0) && options->ibs[ib - 1].base &&
158        !(ibs[ib - 1].triggered || ibs[ib - 1].base_seen))
159       return false;
160 
161    if (ibs[ib].base_seen)
162       return false;
163 
164    if (ibs[ib].triggered)
165       return options->color;
166 
167    if (options->ibs[ib].base != ibs[ib].base)
168       return false;
169 
170    uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
171    uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
172 
173    bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
174 
175    if (triggered && (ib < 2) && options->ibs[ib + 1].crash_found) {
176       ibs[ib].base_seen = true;
177       return false;
178    }
179 
180    ibs[ib].triggered |= triggered;
181 
182    if (triggered)
183       printf("ESTIMATED CRASH LOCATION!\n");
184 
185    return triggered & options->color;
186 }
187 
188 static void
dump_hex(uint32_t * dwords,uint32_t sizedwords,int level)189 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
190 {
191    int i, j;
192    int lastzero = 1;
193 
194    if (quiet(2))
195       return;
196 
197    bool highlight = highlight_gpuaddr(gpuaddr(dwords) + 4 * sizedwords - 1);
198 
199    for (i = 0; i < sizedwords; i += 8) {
200       int zero = 1;
201 
202       /* always show first row: */
203       if (i == 0)
204          zero = 0;
205 
206       for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
207          if (dwords[i + j])
208             zero = 0;
209 
210       if (zero && !lastzero)
211          printf("*\n");
212 
213       lastzero = zero;
214 
215       if (zero)
216          continue;
217 
218       uint64_t addr = gpuaddr(&dwords[i]);
219 
220       if (highlight)
221          printf("\x1b[0;1;31m");
222 
223       if (is_64b()) {
224          printf("%016" PRIx64 ":%s", addr, levels[level]);
225       } else {
226          printf("%08x:%s", (uint32_t)addr, levels[level]);
227       }
228 
229       if (highlight)
230          printf("\x1b[0m");
231 
232       printf("%04x:", i * 4);
233 
234       for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
235          printf(" %08x", dwords[i + j]);
236       }
237 
238       printf("\n");
239    }
240 }
241 
242 static void
dump_float(float * dwords,uint32_t sizedwords,int level)243 dump_float(float *dwords, uint32_t sizedwords, int level)
244 {
245    int i;
246    for (i = 0; i < sizedwords; i++) {
247       if ((i % 8) == 0) {
248          if (is_64b()) {
249             printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
250          } else {
251             printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
252          }
253       } else {
254          printf(" ");
255       }
256       printf("%8f", *(dwords++));
257       if ((i % 8) == 7)
258          printf("\n");
259    }
260    if (i % 8)
261       printf("\n");
262 }
263 
264 /* I believe the surface format is low bits:
265 #define RB_COLOR_INFO__COLOR_FORMAT_MASK                   0x0000000fL
266 comments in sys2gmem_tex_const indicate that address is [31:12], but
267 looks like at least some of the bits above the format have different meaning..
268 */
269 static void
parse_dword_addr(uint32_t dword,uint32_t * gpuaddr,uint32_t * flags,uint32_t mask)270 parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
271                  uint32_t mask)
272 {
273    assert(!is_64b()); /* this is only used on a2xx */
274    *gpuaddr = dword & ~mask;
275    *flags = dword & mask;
276 }
277 
278 static uint32_t type0_reg_vals[0x3ffff + 1];
279 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
280                                    8]; /* written since last draw */
281 static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
282 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
283 
284 static bool
reg_rewritten(uint32_t regbase)285 reg_rewritten(uint32_t regbase)
286 {
287    return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
288 }
289 
290 bool
reg_written(uint32_t regbase)291 reg_written(uint32_t regbase)
292 {
293    return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
294 }
295 
296 static void
clear_rewritten(void)297 clear_rewritten(void)
298 {
299    memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
300 }
301 
302 static void
clear_written(void)303 clear_written(void)
304 {
305    memset(type0_reg_written, 0, sizeof(type0_reg_written));
306    clear_rewritten();
307 }
308 
309 uint32_t
reg_lastval(uint32_t regbase)310 reg_lastval(uint32_t regbase)
311 {
312    return lastvals[regbase];
313 }
314 
315 static void
clear_lastvals(void)316 clear_lastvals(void)
317 {
318    memset(lastvals, 0, sizeof(lastvals));
319 }
320 
321 uint32_t
reg_val(uint32_t regbase)322 reg_val(uint32_t regbase)
323 {
324    return type0_reg_vals[regbase];
325 }
326 
327 void
reg_set(uint32_t regbase,uint32_t val)328 reg_set(uint32_t regbase, uint32_t val)
329 {
330    assert(regbase < regcnt());
331    type0_reg_vals[regbase] = val;
332    type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
333    type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
334 }
335 
336 static void
reg_dump_scratch(const char * name,uint32_t dword,int level)337 reg_dump_scratch(const char *name, uint32_t dword, int level)
338 {
339    unsigned r;
340 
341    if (quiet(3))
342       return;
343 
344    r = regbase("CP_SCRATCH[0].REG");
345 
346    // if not, try old a2xx/a3xx version:
347    if (!r)
348       r = regbase("CP_SCRATCH_REG0");
349 
350    if (!r)
351       return;
352 
353    printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
354           reg_val(r + 6), reg_val(r + 7));
355 }
356 
357 static void
dump_gpuaddr_size(uint64_t gpuaddr,int level,int sizedwords,int quietlvl)358 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
359 {
360    void *buf;
361 
362    if (quiet(quietlvl))
363       return;
364 
365    buf = hostptr(gpuaddr);
366    if (buf) {
367       dump_hex(buf, sizedwords, level + 1);
368    }
369 }
370 
371 static void
dump_gpuaddr(uint64_t gpuaddr,int level)372 dump_gpuaddr(uint64_t gpuaddr, int level)
373 {
374    dump_gpuaddr_size(gpuaddr, level, 64, 3);
375 }
376 
377 static void
reg_dump_gpuaddr(const char * name,uint32_t dword,int level)378 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
379 {
380    dump_gpuaddr(dword, level);
381 }
382 
383 uint32_t gpuaddr_lo;
384 static void
reg_gpuaddr_lo(const char * name,uint32_t dword,int level)385 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
386 {
387    gpuaddr_lo = dword;
388 }
389 
390 static void
reg_dump_gpuaddr_hi(const char * name,uint32_t dword,int level)391 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
392 {
393    dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
394 }
395 
396 static void
reg_dump_gpuaddr64(const char * name,uint64_t qword,int level)397 reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
398 {
399    dump_gpuaddr(qword, level);
400 }
401 
402 static void
dump_shader(const char * ext,void * buf,int bufsz)403 dump_shader(const char *ext, void *buf, int bufsz)
404 {
405    if (options->dump_shaders) {
406       static int n = 0;
407       char filename[16];
408       int fd;
409       sprintf(filename, "%04d.%s", n++, ext);
410       fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
411       if (fd != -1) {
412          write(fd, buf, bufsz);
413          close(fd);
414       }
415    }
416 }
417 
418 static void
disasm_gpuaddr(const char * name,uint64_t gpuaddr,int level)419 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
420 {
421    void *buf;
422 
423    gpuaddr &= 0xfffffffffffffff0;
424 
425    if (quiet(3))
426       return;
427 
428    buf = hostptr(gpuaddr);
429    if (buf) {
430       uint32_t sizedwords = hostlen(gpuaddr) / 4;
431       const char *ext;
432 
433       dump_hex(buf, MIN2(64, sizedwords), level + 1);
434       try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->info->chip * 100);
435 
436       /* this is a bit ugly way, but oh well.. */
437       if (strstr(name, "SP_VS_OBJ")) {
438          ext = "vo3";
439       } else if (strstr(name, "SP_FS_OBJ")) {
440          ext = "fo3";
441       } else if (strstr(name, "SP_GS_OBJ")) {
442          ext = "go3";
443       } else if (strstr(name, "SP_CS_OBJ")) {
444          ext = "co3";
445       } else {
446          ext = NULL;
447       }
448 
449       if (ext)
450          dump_shader(ext, buf, sizedwords * 4);
451    }
452 }
453 
454 static void
reg_disasm_gpuaddr(const char * name,uint32_t dword,int level)455 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
456 {
457    disasm_gpuaddr(name, dword, level);
458 }
459 
460 static void
reg_disasm_gpuaddr_hi(const char * name,uint32_t dword,int level)461 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
462 {
463    disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
464 }
465 
466 static void
reg_disasm_gpuaddr64(const char * name,uint64_t qword,int level)467 reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
468 {
469    disasm_gpuaddr(name, qword, level);
470 }
471 
472 /* Find the value of the TEX_COUNT register that corresponds to the named
473  * TEX_SAMP/TEX_CONST reg.
474  *
475  * Note, this kinda assumes an equal # of samplers and textures, but not
476  * really sure if there is a much better option.  I suppose on a6xx we
477  * could instead decode the bitfields in SP_xS_CONFIG
478  */
479 static int
get_tex_count(const char * name)480 get_tex_count(const char *name)
481 {
482    char count_reg[strlen(name) + 5];
483    char *p;
484 
485    p = strstr(name, "CONST");
486    if (!p)
487       p = strstr(name, "SAMP");
488    if (!p)
489       return 0;
490 
491    int n = p - name;
492    strncpy(count_reg, name, n);
493    strcpy(count_reg + n, "COUNT");
494 
495    return reg_val(regbase(count_reg));
496 }
497 
498 static void
reg_dump_tex_samp_hi(const char * name,uint32_t dword,int level)499 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
500 {
501    if (!in_summary)
502       return;
503 
504    int num_unit = get_tex_count(name);
505    uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
506    void *buf = hostptr(gpuaddr);
507 
508    if (!buf)
509       return;
510 
511    dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
512 }
513 
514 static void
reg_dump_tex_const_hi(const char * name,uint32_t dword,int level)515 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
516 {
517    if (!in_summary)
518       return;
519 
520    int num_unit = get_tex_count(name);
521    uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
522    void *buf = hostptr(gpuaddr);
523 
524    if (!buf)
525       return;
526 
527    dump_tex_const(buf, num_unit, level + 1);
528 }
529 
530 /*
531  * Registers with special handling (rnndec_decode() handles rest):
532  */
533 #define REG(x, fxn)    { #x, fxn }
534 #define REG64(x, fxn)  { #x, .fxn64 = fxn, .is_reg64 = true }
535 static struct {
536    const char *regname;
537    void (*fxn)(const char *name, uint32_t dword, int level);
538    void (*fxn64)(const char *name, uint64_t qword, int level);
539    uint32_t regbase;
540    bool is_reg64;
541 } reg_a2xx[] = {
542       REG(CP_SCRATCH_REG0, reg_dump_scratch),
543       REG(CP_SCRATCH_REG1, reg_dump_scratch),
544       REG(CP_SCRATCH_REG2, reg_dump_scratch),
545       REG(CP_SCRATCH_REG3, reg_dump_scratch),
546       REG(CP_SCRATCH_REG4, reg_dump_scratch),
547       REG(CP_SCRATCH_REG5, reg_dump_scratch),
548       REG(CP_SCRATCH_REG6, reg_dump_scratch),
549       REG(CP_SCRATCH_REG7, reg_dump_scratch),
550       {NULL},
551 }, reg_a3xx[] = {
552       REG(CP_SCRATCH_REG0, reg_dump_scratch),
553       REG(CP_SCRATCH_REG1, reg_dump_scratch),
554       REG(CP_SCRATCH_REG2, reg_dump_scratch),
555       REG(CP_SCRATCH_REG3, reg_dump_scratch),
556       REG(CP_SCRATCH_REG4, reg_dump_scratch),
557       REG(CP_SCRATCH_REG5, reg_dump_scratch),
558       REG(CP_SCRATCH_REG6, reg_dump_scratch),
559       REG(CP_SCRATCH_REG7, reg_dump_scratch),
560       REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
561       REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
562       REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
563       REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
564       REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
565       REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
566       {NULL},
567 }, reg_a4xx[] = {
568       REG(CP_SCRATCH[0].REG, reg_dump_scratch),
569       REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
570       REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
571       REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
572       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
573       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
574       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
575       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
576       REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
577       REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
578       REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
579       REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
580       REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
581       REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
582       REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
583       REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
584       REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
585       REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
586       REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
587       REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
588       REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
589       REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
590       REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
591       REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
592       REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
593       {NULL},
594 }, reg_a5xx[] = {
595       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
596       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
597       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
598       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
599       REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
600       REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
601       REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
602       REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
603       REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
604       REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
605       REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
606       REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
607       REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
608       REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
609       REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
610       REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
611       REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
612       REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
613       REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
614       REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
615       REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
616       REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
617       REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
618       REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
619       REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
620       REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
621       REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
622       REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
623       REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
624       REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
625       REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
626       REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
627       REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
628       REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
629       REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
630       REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
631       REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
632       REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
633       REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
634       REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
635       REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
636       REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
637 //      REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
638 //      REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
639 //      REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
640 //      REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
641 //      REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
642 //      REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
643 //      REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
644 //      REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
645 //      REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
646 //      REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
647 //      REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
648 //      REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
649 //      REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
650 //      REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
651 //      REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
652 //      REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
653 //      REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
654 //      REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
655 //      REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
656 //      REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
657 //      REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
658 //      REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
659 //      REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
660 //      REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
661 //      REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
662 //      REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
663 
664 //      REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
665 //      REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
666 //      REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
667 //      REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
668 //      REG(RB_2D_DST_LO, reg_gpuaddr_lo),
669 //      REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
670 //      REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
671 //      REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
672 
673       {NULL},
674 }, reg_a6xx[] = {
675       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
676       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
677       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
678       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
679 
680       REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
681       REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
682       REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
683       REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
684       REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
685       REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
686 
687       REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
688       REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
689       REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
690       REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
691       REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
692       REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
693       REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
694       REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
695       REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
696       REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
697       REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
698       REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
699 
700       {NULL},
701 }, reg_a7xx[] = {
702       REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
703       REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
704       REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
705       REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
706       REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
707       REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
708 
709       {NULL},
710 }, *type0_reg;
711 
712 static struct rnn *rnn;
713 
714 static void
init_rnn(const char * gpuname)715 init_rnn(const char *gpuname)
716 {
717    rnn = rnn_new(!options->color);
718 
719    rnn_load(rnn, gpuname);
720 
721    if (options->querystrs) {
722       int i;
723       queryvals = calloc(options->nquery, sizeof(queryvals[0]));
724 
725       for (i = 0; i < options->nquery; i++) {
726          int val = strtol(options->querystrs[i], NULL, 0);
727 
728          if (val == 0)
729             val = regbase(options->querystrs[i]);
730 
731          queryvals[i] = val;
732          printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
733       }
734    }
735 
736    for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
737       type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
738       if (!type0_reg[idx].regbase) {
739          printf("invalid register name: %s\n", type0_reg[idx].regname);
740          exit(1);
741       }
742    }
743 }
744 
745 void
reset_regs(void)746 reset_regs(void)
747 {
748    clear_written();
749    clear_lastvals();
750    memset(&ibs, 0, sizeof(ibs));
751 }
752 
753 void
cffdec_init(const struct cffdec_options * _options)754 cffdec_init(const struct cffdec_options *_options)
755 {
756    options = _options;
757    summary = options->summary;
758 
759    /* in case we're decoding multiple files: */
760    free(queryvals);
761    reset_regs();
762    draw_count = 0;
763 
764    if (!options->info)
765       return;
766 
767    switch (options->info->chip) {
768    case 2:
769       type0_reg = reg_a2xx;
770       init_rnn("a2xx");
771       break;
772    case 3:
773       type0_reg = reg_a3xx;
774       init_rnn("a3xx");
775       break;
776    case 4:
777       type0_reg = reg_a4xx;
778       init_rnn("a4xx");
779       break;
780    case 5:
781       type0_reg = reg_a5xx;
782       init_rnn("a5xx");
783       break;
784    case 6:
785       type0_reg = reg_a6xx;
786       init_rnn("a6xx");
787       break;
788    case 7:
789       type0_reg = reg_a7xx;
790       init_rnn("a7xx");
791       break;
792    default:
793       errx(-1, "unsupported generation: %u", options->info->chip);
794    }
795 }
796 
797 const char *
pktname(unsigned opc)798 pktname(unsigned opc)
799 {
800    return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
801 }
802 
803 const char *
regname(uint32_t regbase,int color)804 regname(uint32_t regbase, int color)
805 {
806    return rnn_regname(rnn, regbase, color);
807 }
808 
809 uint32_t
regbase(const char * name)810 regbase(const char *name)
811 {
812    return rnn_regbase(rnn, name);
813 }
814 
815 static int
endswith(uint32_t regbase,const char * suffix)816 endswith(uint32_t regbase, const char *suffix)
817 {
818    const char *name = regname(regbase, 0);
819    const char *s = strstr(name, suffix);
820    if (!s)
821       return 0;
822    return (s - strlen(name) + strlen(suffix)) == name;
823 }
824 
825 struct regacc
regacc(struct rnn * r)826 regacc(struct rnn *r)
827 {
828    if (!r)
829       r = rnn;
830 
831    return (struct regacc){ .rnn = r };
832 }
833 
834 /* returns true if the complete reg value has been accumulated: */
835 bool
regacc_push(struct regacc * r,uint32_t regbase,uint32_t dword)836 regacc_push(struct regacc *r, uint32_t regbase, uint32_t dword)
837 {
838    if (r->has_dword_lo) {
839       /* Work around kernel devcore dumps which accidentially miss half of a 64b reg
840        * see: https://patchwork.freedesktop.org/series/112302/
841        */
842       if (regbase != r->regbase + 1) {
843          printf("WARNING: 64b discontinuity (%x, expected %x)\n", regbase, r->regbase + 1);
844          r->has_dword_lo = false;
845          return true;
846       }
847 
848       r->value |= ((uint64_t)dword) << 32;
849       r->has_dword_lo = false;
850 
851       return true;
852    }
853 
854    r->regbase = regbase;
855    r->value = dword;
856 
857    struct rnndecaddrinfo *info = rnn_reginfo(r->rnn, regbase);
858    r->has_dword_lo = (info->width == 64);
859 
860    /* Workaround for kernel devcore dump bugs: */
861    if ((info->width == 64) && endswith(regbase, "_HI")) {
862       printf("WARNING: 64b discontinuity (no _LO dword for %x)\n", regbase);
863       r->has_dword_lo = false;
864    }
865 
866    rnn_reginfo_free(info);
867 
868    return !r->has_dword_lo;
869 }
870 
871 void
dump_register_val(struct regacc * r,int level)872 dump_register_val(struct regacc *r, int level)
873 {
874    struct rnndecaddrinfo *info = rnn_reginfo(rnn, r->regbase);
875 
876    if (info && info->typeinfo) {
877       uint64_t gpuaddr = 0;
878       char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, r->value);
879       printf("%s%s: %s", levels[level], info->name, decoded);
880 
881       /* Try and figure out if we are looking at a gpuaddr.. this
882        * might be useful for other gen's too, but at least a5xx has
883        * the _HI/_LO suffix we can look for.  Maybe a better approach
884        * would be some special annotation in the xml..
885        * for a6xx use "address" and "waddress" types
886        */
887       if (options->info->chip >= 6) {
888          if (!strcmp(info->typeinfo->name, "address") ||
889              !strcmp(info->typeinfo->name, "waddress")) {
890             gpuaddr = r->value;
891          }
892       } else if (options->info->chip >= 5) {
893          /* TODO we shouldn't rely on reg_val() since reg_set() might
894           * not have been called yet for the other half of the 64b reg.
895           * We can remove this hack once a5xx.xml is converted to reg64
896           * and address/waddess.
897           */
898          if (endswith(r->regbase, "_HI") && endswith(r->regbase - 1, "_LO")) {
899             gpuaddr = (r->value << 32) | reg_val(r->regbase - 1);
900          } else if (endswith(r->regbase, "_LO") && endswith(r->regbase + 1, "_HI")) {
901             gpuaddr = (((uint64_t)reg_val(r->regbase + 1)) << 32) | r->value;
902          }
903       }
904 
905       if (gpuaddr && hostptr(gpuaddr)) {
906          printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
907                 gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
908                 hostlen(gpubaseaddr(gpuaddr)));
909       }
910 
911       printf("\n");
912 
913       free(decoded);
914    } else if (info) {
915       printf("%s%s: %08"PRIx64"\n", levels[level], info->name, r->value);
916    } else {
917       printf("%s<%04x>: %08"PRIx64"\n", levels[level], r->regbase, r->value);
918    }
919 
920    rnn_reginfo_free(info);
921 }
922 
923 static void
dump_register(struct regacc * r,int level)924 dump_register(struct regacc *r, int level)
925 {
926    if (!quiet(3)) {
927       dump_register_val(r, level);
928    }
929 
930    for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
931       if (type0_reg[idx].regbase == r->regbase) {
932          if (type0_reg[idx].is_reg64) {
933             type0_reg[idx].fxn64(type0_reg[idx].regname, r->value, level);
934          } else {
935             type0_reg[idx].fxn(type0_reg[idx].regname, (uint32_t)r->value, level);
936          }
937          break;
938       }
939    }
940 }
941 
942 static bool
is_banked_reg(uint32_t regbase)943 is_banked_reg(uint32_t regbase)
944 {
945    return (0x2000 <= regbase) && (regbase < 0x2400);
946 }
947 
948 static void
dump_registers(uint32_t regbase,uint32_t * dwords,uint32_t sizedwords,int level)949 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
950                int level)
951 {
952    struct regacc r = regacc(NULL);
953 
954    while (sizedwords--) {
955       int last_summary = summary;
956 
957       /* access to non-banked registers needs a WFI:
958        * TODO banked register range for a2xx??
959        */
960       if (needs_wfi && !is_banked_reg(regbase))
961          printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
962 
963       reg_set(regbase, *dwords);
964       if (regacc_push(&r, regbase, *dwords))
965          dump_register(&r, level);
966       regbase++;
967       dwords++;
968       summary = last_summary;
969    }
970 }
971 
972 static void
dump_domain(uint32_t * dwords,uint32_t sizedwords,int level,const char * name)973 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
974 {
975    struct rnndomain *dom;
976    int i;
977 
978    dom = rnn_finddomain(rnn->db, name);
979 
980    if (!dom)
981       return;
982 
983    if (script_packet)
984       script_packet(dwords, sizedwords, rnn, dom);
985 
986    if (quiet(2))
987       return;
988 
989    for (i = 0; i < sizedwords; i++) {
990       struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
991       char *decoded;
992       if (!(info && info->typeinfo))
993          break;
994       uint64_t value = dwords[i];
995       if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
996          value |= (uint64_t)dwords[i + 1] << 32;
997          i++; /* skip the next dword since we're printing it now */
998       }
999       decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
1000       /* Unlike the register printing path, we don't print the name
1001        * of the register, so if it doesn't contain other named
1002        * things (i.e. it isn't a bitset) then print the register
1003        * name as if it's a bitset with a single entry. This avoids
1004        * having to create a dummy register with a single entry to
1005        * get a name in the decoding.
1006        */
1007       if (info->typeinfo->type == RNN_TTYPE_BITSET ||
1008           info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
1009          printf("%s%s\n", levels[level], decoded);
1010       } else {
1011          printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
1012                 info->name, rnn->vc->colors->reset, decoded);
1013       }
1014       free(decoded);
1015       free(info->name);
1016       free(info);
1017    }
1018 }
1019 
1020 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
1021 static unsigned mode;
1022 static const char *render_mode;
1023 static const char *thread;
1024 static enum {
1025    MODE_BINNING = 0x1,
1026    MODE_GMEM = 0x2,
1027    MODE_BYPASS = 0x4,
1028    MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
1029 } enable_mask = MODE_ALL;
1030 static bool skip_ib2_enable_global;
1031 static bool skip_ib2_enable_local;
1032 
1033 static void
print_mode(int level)1034 print_mode(int level)
1035 {
1036    if ((options->info->chip >= 5) && !quiet(2)) {
1037       printf("%smode: %s", levels[level], render_mode);
1038       if (thread)
1039          printf(":%s", thread);
1040       printf("\n");
1041       printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
1042              skip_ib2_enable_local);
1043    }
1044 }
1045 
1046 static bool
skip_query(void)1047 skip_query(void)
1048 {
1049    switch (options->query_mode) {
1050    case QUERY_ALL:
1051       /* never skip: */
1052       return false;
1053    case QUERY_WRITTEN:
1054       for (int i = 0; i < options->nquery; i++) {
1055          uint32_t regbase = queryvals[i];
1056          if (!reg_written(regbase)) {
1057             continue;
1058          }
1059          if (reg_rewritten(regbase)) {
1060             return false;
1061          }
1062       }
1063       return true;
1064    case QUERY_DELTA:
1065       for (int i = 0; i < options->nquery; i++) {
1066          uint32_t regbase = queryvals[i];
1067          if (!reg_written(regbase)) {
1068             continue;
1069          }
1070          uint32_t lastval = reg_val(regbase);
1071          if (lastval != lastvals[regbase]) {
1072             return false;
1073          }
1074       }
1075       return true;
1076    }
1077    return true;
1078 }
1079 
1080 static void
__do_query(const char * primtype,uint32_t num_indices)1081 __do_query(const char *primtype, uint32_t num_indices)
1082 {
1083    int n = 0;
1084 
1085    if ((5 <= options->info->chip) && (options->info->chip < 7)) {
1086       uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1087       uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1088 
1089       bin_x1 = scissor_tl & 0xffff;
1090       bin_y1 = scissor_tl >> 16;
1091       bin_x2 = scissor_br & 0xffff;
1092       bin_y2 = scissor_br >> 16;
1093    }
1094 
1095    for (int i = 0; i < options->nquery; i++) {
1096       uint32_t regbase = queryvals[i];
1097       if (!reg_written(regbase))
1098          continue;
1099 
1100       struct regacc r = regacc(NULL);
1101 
1102       /* 64b regs require two successive 32b dwords: */
1103       for (int d = 0; d < 2; d++)
1104          if (regacc_push(&r, regbase + d, reg_val(regbase + d)))
1105             break;
1106 
1107       printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1108              bin_y1, bin_x2, bin_y2, num_indices);
1109       if (options->info->chip >= 5)
1110          printf("%s:", render_mode);
1111       if (thread)
1112          printf("%s:", thread);
1113       printf("\t%08"PRIx64, r.value);
1114       if (r.value != lastvals[regbase]) {
1115          printf("!");
1116       } else {
1117          printf(" ");
1118       }
1119       if (reg_rewritten(regbase)) {
1120          printf("+");
1121       } else {
1122          printf(" ");
1123       }
1124       dump_register_val(&r, 0);
1125       n++;
1126    }
1127 
1128    if (n > 1)
1129       printf("\n");
1130 }
1131 
1132 static void
do_query_compare(const char * primtype,uint32_t num_indices)1133 do_query_compare(const char *primtype, uint32_t num_indices)
1134 {
1135    unsigned saved_enable_mask = enable_mask;
1136    const char *saved_render_mode = render_mode;
1137 
1138    /* in 'query-compare' mode, we want to see if the register is writtten
1139     * or changed in any mode:
1140     *
1141     * (NOTE: this could cause false-positive for 'query-delta' if the reg
1142     * is written with different values in binning vs sysmem/gmem mode, as
1143     * we don't track previous values per-mode, but I think we can live with
1144     * that)
1145     */
1146    enable_mask = MODE_ALL;
1147 
1148    clear_rewritten();
1149    load_all_groups(0);
1150 
1151    if (!skip_query()) {
1152       /* dump binning pass values: */
1153       enable_mask = MODE_BINNING;
1154       render_mode = "BINNING";
1155       clear_rewritten();
1156       load_all_groups(0);
1157       __do_query(primtype, num_indices);
1158 
1159       /* dump draw pass values: */
1160       enable_mask = MODE_GMEM | MODE_BYPASS;
1161       render_mode = "DRAW";
1162       clear_rewritten();
1163       load_all_groups(0);
1164       __do_query(primtype, num_indices);
1165 
1166       printf("\n");
1167    }
1168 
1169    enable_mask = saved_enable_mask;
1170    render_mode = saved_render_mode;
1171 
1172    disable_all_groups();
1173 }
1174 
1175 /* well, actually query and script..
1176  * NOTE: call this before dump_register_summary()
1177  */
1178 static void
do_query(const char * primtype,uint32_t num_indices)1179 do_query(const char *primtype, uint32_t num_indices)
1180 {
1181    if (script_draw)
1182       script_draw(primtype, num_indices);
1183 
1184    if (options->query_compare) {
1185       do_query_compare(primtype, num_indices);
1186       return;
1187    }
1188 
1189    if (skip_query())
1190       return;
1191 
1192    __do_query(primtype, num_indices);
1193 }
1194 
1195 static void
cp_im_loadi(uint32_t * dwords,uint32_t sizedwords,int level)1196 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1197 {
1198    uint32_t start = dwords[1] >> 16;
1199    uint32_t size = dwords[1] & 0xffff;
1200    const char *type = NULL, *ext = NULL;
1201    gl_shader_stage disasm_type;
1202 
1203    switch (dwords[0]) {
1204    case 0:
1205       type = "vertex";
1206       ext = "vo";
1207       disasm_type = MESA_SHADER_VERTEX;
1208       break;
1209    case 1:
1210       type = "fragment";
1211       ext = "fo";
1212       disasm_type = MESA_SHADER_FRAGMENT;
1213       break;
1214    default:
1215       type = "<unknown>";
1216       disasm_type = 0;
1217       break;
1218    }
1219 
1220    printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1221           size);
1222    disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1223 
1224    /* dump raw shader: */
1225    if (ext)
1226       dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1227 }
1228 
1229 static void
cp_wide_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)1230 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1231 {
1232    uint32_t reg = dwords[0] & 0xffff;
1233    struct regacc r = regacc(NULL);
1234    for (int i = 1; i < sizedwords; i++) {
1235       if (regacc_push(&r, reg, dwords[i]))
1236          dump_register(&r, level + 1);
1237       reg_set(reg, dwords[i]);
1238       reg++;
1239    }
1240 }
1241 
1242 enum state_t {
1243    TEX_SAMP = 1,
1244    TEX_CONST,
1245    TEX_MIPADDR, /* a3xx only */
1246    SHADER_PROG,
1247    SHADER_CONST,
1248 
1249    // image/ssbo state:
1250    SSBO_0,
1251    SSBO_1,
1252    SSBO_2,
1253 
1254    UBO,
1255 
1256    // unknown things, just to hexdumps:
1257    UNKNOWN_DWORDS,
1258    UNKNOWN_2DWORDS,
1259    UNKNOWN_4DWORDS,
1260 };
1261 
1262 enum adreno_state_block {
1263    SB_VERT_TEX = 0,
1264    SB_VERT_MIPADDR = 1,
1265    SB_FRAG_TEX = 2,
1266    SB_FRAG_MIPADDR = 3,
1267    SB_VERT_SHADER = 4,
1268    SB_GEOM_SHADER = 5,
1269    SB_FRAG_SHADER = 6,
1270    SB_COMPUTE_SHADER = 7,
1271 };
1272 
1273 /* TODO there is probably a clever way to let rnndec parse things so
1274  * we don't have to care about packet format differences across gens
1275  */
1276 
1277 static void
a3xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1278 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1279                     enum state_t *state, enum state_src_t *src)
1280 {
1281    unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1282    unsigned state_type = dwords[1] & 0x3;
1283    static const struct {
1284       gl_shader_stage stage;
1285       enum state_t state;
1286    } lookup[0xf][0x3] = {
1287       [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1288       [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1289       [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1290       [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1291       [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1292       [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1293       [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1294       [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1295    };
1296 
1297    *stage = lookup[state_block_id][state_type].stage;
1298    *state = lookup[state_block_id][state_type].state;
1299    unsigned state_src = (dwords[0] >> 16) & 0x7;
1300    if (state_src == 0 /* SS_DIRECT */)
1301       *src = STATE_SRC_DIRECT;
1302    else
1303       *src = STATE_SRC_INDIRECT;
1304 }
1305 
1306 static enum state_src_t
_get_state_src(unsigned dword0)1307 _get_state_src(unsigned dword0)
1308 {
1309    switch ((dword0 >> 16) & 0x3) {
1310    case 0: /* SS4_DIRECT / SS6_DIRECT */
1311       return STATE_SRC_DIRECT;
1312    case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1313       return STATE_SRC_INDIRECT;
1314    case 1: /* SS6_BINDLESS */
1315       return STATE_SRC_BINDLESS;
1316    default:
1317       return STATE_SRC_DIRECT;
1318    }
1319 }
1320 
1321 static void
_get_state_type(unsigned state_block_id,unsigned state_type,gl_shader_stage * stage,enum state_t * state)1322 _get_state_type(unsigned state_block_id, unsigned state_type,
1323                 gl_shader_stage *stage, enum state_t *state)
1324 {
1325    static const struct {
1326       gl_shader_stage stage;
1327       enum state_t state;
1328    } lookup[0x10][0x4] = {
1329       // SB4_VS_TEX:
1330       [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1331       [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1332       [0x0][2] = {MESA_SHADER_VERTEX, UBO},
1333       // SB4_HS_TEX:
1334       [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1335       [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1336       [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1337       // SB4_DS_TEX:
1338       [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1339       [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1340       [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1341       // SB4_GS_TEX:
1342       [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1343       [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1344       [0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1345       // SB4_FS_TEX:
1346       [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1347       [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1348       [0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1349       // SB4_CS_TEX:
1350       [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1351       [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1352       [0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1353       // SB4_VS_SHADER:
1354       [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1355       [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1356       [0x8][2] = {MESA_SHADER_VERTEX, UBO},
1357       // SB4_HS_SHADER
1358       [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1359       [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1360       [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1361       // SB4_DS_SHADER
1362       [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1363       [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1364       [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1365       // SB4_GS_SHADER
1366       [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1367       [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1368       [0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1369       // SB4_FS_SHADER:
1370       [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1371       [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1372       [0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1373       // SB4_CS_SHADER:
1374       [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1375       [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1376       [0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1377       [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1378       // SB4_SSBO (shared across all stages)
1379       [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1380       [0xe][1] = {0, SSBO_1},
1381       [0xe][2] = {0, SSBO_2},
1382       // SB4_CS_SSBO
1383       [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1384       [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1385       [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1386       // unknown things
1387       /* This looks like combined UBO state for 3d stages (a5xx and
1388        * before??  I think a6xx has UBO state per shader stage:
1389        */
1390       [0x6][2] = {0, UBO},
1391       [0x7][1] = {0, UNKNOWN_2DWORDS},
1392    };
1393 
1394    *stage = lookup[state_block_id][state_type].stage;
1395    *state = lookup[state_block_id][state_type].state;
1396 }
1397 
1398 static void
a4xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1399 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1400                     enum state_t *state, enum state_src_t *src)
1401 {
1402    unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1403    unsigned state_type = dwords[1] & 0x3;
1404    _get_state_type(state_block_id, state_type, stage, state);
1405    *src = _get_state_src(dwords[0]);
1406 }
1407 
1408 static void
a6xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1409 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1410                     enum state_t *state, enum state_src_t *src)
1411 {
1412    unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1413    unsigned state_type = (dwords[0] >> 14) & 0x3;
1414    _get_state_type(state_block_id, state_type, stage, state);
1415    *src = _get_state_src(dwords[0]);
1416 }
1417 
1418 static void
dump_tex_samp(uint32_t * texsamp,enum state_src_t src,int num_unit,int level)1419 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1420 {
1421    for (int i = 0; i < num_unit; i++) {
1422       /* work-around to reduce noise for opencl blob which always
1423        * writes the max # regardless of # of textures used
1424        */
1425       if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1426          break;
1427 
1428       if (options->info->chip == 3) {
1429          dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1430          dump_hex(texsamp, 2, level + 1);
1431          texsamp += 2;
1432       } else if (options->info->chip == 4) {
1433          dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1434          dump_hex(texsamp, 2, level + 1);
1435          texsamp += 2;
1436       } else if (options->info->chip == 5) {
1437          dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1438          dump_hex(texsamp, 4, level + 1);
1439          texsamp += 4;
1440       } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1441          dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1442          dump_hex(texsamp, 4, level + 1);
1443          texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1444       }
1445    }
1446 }
1447 
1448 static void
dump_tex_const(uint32_t * texconst,int num_unit,int level)1449 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1450 {
1451    for (int i = 0; i < num_unit; i++) {
1452       /* work-around to reduce noise for opencl blob which always
1453        * writes the max # regardless of # of textures used
1454        */
1455       if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1456           (texconst[2] == 0) && (texconst[3] == 0))
1457          break;
1458 
1459       if (options->info->chip == 3) {
1460          dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1461          dump_hex(texconst, 4, level + 1);
1462          texconst += 4;
1463       } else if (options->info->chip == 4) {
1464          dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1465          if (options->dump_textures) {
1466             uint32_t addr = texconst[4] & ~0x1f;
1467             dump_gpuaddr(addr, level - 2);
1468          }
1469          dump_hex(texconst, 8, level + 1);
1470          texconst += 8;
1471       } else if (options->info->chip == 5) {
1472          dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1473          if (options->dump_textures) {
1474             uint64_t addr =
1475                (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1476             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1477          }
1478          dump_hex(texconst, 12, level + 1);
1479          texconst += 12;
1480       } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1481          dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1482          if (options->dump_textures) {
1483             uint64_t addr =
1484                (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1485             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1486          }
1487          dump_hex(texconst, 16, level + 1);
1488          texconst += 16;
1489       }
1490    }
1491 }
1492 
1493 static void
dump_bindless_descriptors(bool is_compute,int level)1494 dump_bindless_descriptors(bool is_compute, int level)
1495 {
1496    if (!options->dump_bindless)
1497       return;
1498 
1499    printl(2, "%sdraw[%i] bindless descriptors\n", levels[level], draw_count);
1500 
1501    for (unsigned i = 0; i < 128; i++) {
1502       static char reg_name[64];
1503       if (is_compute) {
1504          sprintf(reg_name, "SP_CS_BINDLESS_BASE[%u].DESCRIPTOR", i);
1505       } else {
1506          sprintf(reg_name, "SP_BINDLESS_BASE[%u].DESCRIPTOR", i);
1507       }
1508       const unsigned base_reg = regbase(reg_name);
1509       if (!base_reg)
1510          break;
1511 
1512       printl(2, "%sset[%u]:\n", levels[level + 1], i);
1513 
1514       uint64_t ext_src_addr;
1515       if (is_64b()) {
1516          const unsigned reg = base_reg + i * 2;
1517          if (!reg_written(reg))
1518             continue;
1519 
1520          ext_src_addr = reg_val(reg) & 0xfffffffc;
1521          ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1522       } else {
1523          const unsigned reg = base_reg + i;
1524          if (!reg_written(reg))
1525             continue;
1526 
1527          ext_src_addr = reg_val(reg) & 0xfffffffc;
1528       }
1529 
1530       uint32_t *contents = NULL;
1531       if (ext_src_addr)
1532          contents = hostptr(ext_src_addr);
1533 
1534       if (!contents)
1535          continue;
1536 
1537       unsigned length = hostlen(ext_src_addr);
1538       unsigned desc_count = length / (16 * sizeof(uint32_t));
1539       for (unsigned desc_idx = 0; desc_idx < desc_count; desc_idx++) {
1540          printl(2, "%sUBO[%u]:\n", levels[level + 1], desc_idx);
1541          dump_domain(contents, 2, level + 2, "A6XX_UBO");
1542 
1543          printl(2, "%sSTORAGE/TEXEL/IMAGE[%u]:\n", levels[level + 1], desc_idx);
1544          dump_tex_const(contents, 1, level);
1545 
1546          printl(2, "%sSAMPLER[%u]:\n", levels[level + 1], desc_idx);
1547          dump_tex_samp(contents, STATE_SRC_BINDLESS, 1, level);
1548 
1549          contents += 16;
1550       }
1551    }
1552 }
1553 
1554 static void
cp_load_state(uint32_t * dwords,uint32_t sizedwords,int level)1555 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1556 {
1557    gl_shader_stage stage;
1558    enum state_t state;
1559    enum state_src_t src;
1560    uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1561    uint64_t ext_src_addr;
1562    void *contents;
1563    int i;
1564 
1565    if (quiet(2) && !options->script)
1566       return;
1567 
1568    if (options->info->chip >= 6)
1569       a6xx_get_state_type(dwords, &stage, &state, &src);
1570    else if (options->info->chip >= 4)
1571       a4xx_get_state_type(dwords, &stage, &state, &src);
1572    else
1573       a3xx_get_state_type(dwords, &stage, &state, &src);
1574 
1575    switch (src) {
1576    case STATE_SRC_DIRECT:
1577       ext_src_addr = 0;
1578       break;
1579    case STATE_SRC_INDIRECT:
1580       if (is_64b()) {
1581          ext_src_addr = dwords[1] & 0xfffffffc;
1582          ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1583       } else {
1584          ext_src_addr = dwords[1] & 0xfffffffc;
1585       }
1586 
1587       break;
1588    case STATE_SRC_BINDLESS: {
1589       const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1590                                    ? regbase("HLSQ_CS_BINDLESS_BASE[0].DESCRIPTOR")
1591                                    : regbase("HLSQ_BINDLESS_BASE[0].DESCRIPTOR");
1592 
1593       if (is_64b()) {
1594          const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1595          ext_src_addr = reg_val(reg) & 0xfffffffc;
1596          ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1597       } else {
1598          const unsigned reg = base_reg + (dwords[1] >> 28);
1599          ext_src_addr = reg_val(reg) & 0xfffffffc;
1600       }
1601 
1602       ext_src_addr += 4 * (dwords[1] & 0xffffff);
1603       break;
1604    }
1605    }
1606 
1607    if (ext_src_addr)
1608       contents = hostptr(ext_src_addr);
1609    else
1610       contents = is_64b() ? dwords + 3 : dwords + 2;
1611 
1612    if (!contents)
1613       return;
1614 
1615    switch (state) {
1616    case SHADER_PROG: {
1617       const char *ext = NULL;
1618 
1619       if (quiet(2))
1620          return;
1621 
1622       if (options->info->chip >= 4)
1623          num_unit *= 16;
1624       else if (options->info->chip >= 3)
1625          num_unit *= 4;
1626 
1627       /* shaders:
1628        *
1629        * note: num_unit seems to be # of instruction groups, where
1630        * an instruction group has 4 64bit instructions.
1631        */
1632       if (stage == MESA_SHADER_VERTEX) {
1633          ext = "vo3";
1634       } else if (stage == MESA_SHADER_GEOMETRY) {
1635          ext = "go3";
1636       } else if (stage == MESA_SHADER_COMPUTE) {
1637          ext = "co3";
1638       } else if (stage == MESA_SHADER_FRAGMENT) {
1639          ext = "fo3";
1640       }
1641 
1642       if (contents)
1643          try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1644                          options->info->chip * 100);
1645 
1646       /* dump raw shader: */
1647       if (ext)
1648          dump_shader(ext, contents, num_unit * 2 * 4);
1649 
1650       break;
1651    }
1652    case SHADER_CONST: {
1653       if (quiet(2))
1654          return;
1655 
1656       /* uniforms/consts:
1657        *
1658        * note: num_unit seems to be # of pairs of dwords??
1659        */
1660 
1661       if (options->info->chip >= 4)
1662          num_unit *= 2;
1663 
1664       dump_float(contents, num_unit * 2, level + 1);
1665       dump_hex(contents, num_unit * 2, level + 1);
1666 
1667       break;
1668    }
1669    case TEX_MIPADDR: {
1670       uint32_t *addrs = contents;
1671 
1672       if (quiet(2))
1673          return;
1674 
1675       /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1676       for (i = 0; i < num_unit; i++) {
1677          void *ptr = hostptr(addrs[i]);
1678          printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1679          if (options->dump_textures) {
1680             printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1681             dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1682          }
1683       }
1684       break;
1685    }
1686    case TEX_SAMP: {
1687       dump_tex_samp(contents, src, num_unit, level);
1688       break;
1689    }
1690    case TEX_CONST: {
1691       dump_tex_const(contents, num_unit, level);
1692       break;
1693    }
1694    case SSBO_0: {
1695       uint32_t *ssboconst = (uint32_t *)contents;
1696 
1697       for (i = 0; i < num_unit; i++) {
1698          int sz = 4;
1699          if (options->info->chip == 4) {
1700             dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1701          } else if (options->info->chip == 5) {
1702             dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1703          } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1704             sz = 16;
1705             dump_domain(ssboconst, 16, level + 2, "A6XX_TEX_CONST");
1706          }
1707          dump_hex(ssboconst, sz, level + 1);
1708          ssboconst += sz;
1709       }
1710       break;
1711    }
1712    case SSBO_1: {
1713       uint32_t *ssboconst = (uint32_t *)contents;
1714 
1715       for (i = 0; i < num_unit; i++) {
1716          if (options->info->chip == 4)
1717             dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1718          else if (options->info->chip == 5)
1719             dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1720          dump_hex(ssboconst, 2, level + 1);
1721          ssboconst += 2;
1722       }
1723       break;
1724    }
1725    case SSBO_2: {
1726       uint32_t *ssboconst = (uint32_t *)contents;
1727 
1728       for (i = 0; i < num_unit; i++) {
1729          /* TODO a4xx and a5xx might be same: */
1730          if (options->info->chip == 5) {
1731             dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1732             dump_hex(ssboconst, 2, level + 1);
1733          }
1734          if (options->dump_textures) {
1735             uint64_t addr =
1736                (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1737             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1738          }
1739          ssboconst += 2;
1740       }
1741       break;
1742    }
1743    case UBO: {
1744       uint32_t *uboconst = (uint32_t *)contents;
1745 
1746       for (i = 0; i < num_unit; i++) {
1747          // TODO probably similar on a4xx..
1748          if (options->info->chip == 5)
1749             dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1750          else if (options->info->chip >= 6)
1751             dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1752          dump_hex(uboconst, 2, level + 1);
1753          if (options->dump_textures) {
1754             uint64_t addr =
1755                (((uint64_t)uboconst[1] & 0x1ffff) << 32) | uboconst[0];
1756             /* Size encoded in descriptor is in units of vec4: */
1757             unsigned sizedwords = 4 * (uboconst[1] >> 17);
1758             dump_gpuaddr_size(addr, level -2, sizedwords, 3);
1759          }
1760          uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1761       }
1762       break;
1763    }
1764    case UNKNOWN_DWORDS: {
1765       if (quiet(2))
1766          return;
1767       dump_hex(contents, num_unit, level + 1);
1768       break;
1769    }
1770    case UNKNOWN_2DWORDS: {
1771       if (quiet(2))
1772          return;
1773       dump_hex(contents, num_unit * 2, level + 1);
1774       break;
1775    }
1776    case UNKNOWN_4DWORDS: {
1777       if (quiet(2))
1778          return;
1779       dump_hex(contents, num_unit * 4, level + 1);
1780       break;
1781    }
1782    default:
1783       if (quiet(2))
1784          return;
1785       /* hmm.. */
1786       dump_hex(contents, num_unit, level + 1);
1787       break;
1788    }
1789 }
1790 
1791 static void
cp_set_bin(uint32_t * dwords,uint32_t sizedwords,int level)1792 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1793 {
1794    bin_x1 = dwords[1] & 0xffff;
1795    bin_y1 = dwords[1] >> 16;
1796    bin_x2 = dwords[2] & 0xffff;
1797    bin_y2 = dwords[2] >> 16;
1798 }
1799 
1800 static void
dump_a2xx_tex_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1801 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1802                     int level)
1803 {
1804    uint32_t w, h, p;
1805    uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1806    uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1807    static const char *filter[] = {
1808       "point",
1809       "bilinear",
1810       "bicubic",
1811    };
1812    static const char *clamp[] = {
1813       "wrap",
1814       "mirror",
1815       "clamp-last-texel",
1816    };
1817    static const char swiznames[] = "xyzw01??";
1818 
1819    /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1820 
1821    /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1822     * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1823     */
1824    p = (dwords[0] >> 22) << 5;
1825    clamp_x = (dwords[0] >> 10) & 0x3;
1826    clamp_y = (dwords[0] >> 13) & 0x3;
1827    clamp_z = (dwords[0] >> 16) & 0x3;
1828 
1829    /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1830     * NearestClamp=1:OGL Mode
1831     */
1832    parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1833 
1834    /* Width, Height, EndianSwap=0:None */
1835    w = (dwords[2] & 0x1fff) + 1;
1836    h = ((dwords[2] >> 13) & 0x1fff) + 1;
1837 
1838    /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1839     * Mip=2:BaseMap
1840     */
1841    mag = (dwords[3] >> 19) & 0x3;
1842    min = (dwords[3] >> 21) & 0x3;
1843    swiz = (dwords[3] >> 1) & 0xfff;
1844 
1845    /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1846     * Dim3d=0
1847     */
1848    // XXX
1849 
1850    /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1851     * Dim=1:2d, MipPacking=0
1852     */
1853    parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1854 
1855    printf("%sset texture const %04x\n", levels[level], val);
1856    printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1857           clamp[clamp_y], clamp[clamp_z]);
1858    printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1859           filter[mag]);
1860    printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1861           swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1862           swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1863    printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1864           levels[level + 1], gpuaddr, flags, w, h, p,
1865           rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1866    printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1867           mip_flags);
1868 }
1869 
1870 static void
dump_a2xx_shader_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1871 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1872                        int level)
1873 {
1874    int i;
1875    printf("%sset shader const %04x\n", levels[level], val);
1876    for (i = 0; i < sizedwords;) {
1877       uint32_t gpuaddr, flags;
1878       parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1879       void *addr = hostptr(gpuaddr);
1880       if (addr) {
1881          const char *fmt =
1882             rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1883          uint32_t size = dwords[i++];
1884          printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1885                 size, fmt);
1886          // TODO maybe dump these as bytes instead of dwords?
1887          size = (size + 3) / 4; // for now convert to dwords
1888          dump_hex(addr, MIN2(size, 64), level + 1);
1889          if (size > MIN2(size, 64))
1890             printf("%s\t\t...\n", levels[level + 1]);
1891          dump_float(addr, MIN2(size, 64), level + 1);
1892          if (size > MIN2(size, 64))
1893             printf("%s\t\t...\n", levels[level + 1]);
1894       }
1895    }
1896 }
1897 
1898 static void
cp_set_const(uint32_t * dwords,uint32_t sizedwords,int level)1899 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1900 {
1901    uint32_t val = dwords[0] & 0xffff;
1902    switch ((dwords[0] >> 16) & 0xf) {
1903    case 0x0:
1904       dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1905       break;
1906    case 0x1:
1907       /* need to figure out how const space is partitioned between
1908        * attributes, textures, etc..
1909        */
1910       if (val < 0x78) {
1911          dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1912       } else {
1913          dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1914       }
1915       break;
1916    case 0x2:
1917       printf("%sset bool const %04x\n", levels[level], val);
1918       break;
1919    case 0x3:
1920       printf("%sset loop const %04x\n", levels[level], val);
1921       break;
1922    case 0x4:
1923       val += 0x2000;
1924       if (dwords[0] & 0x80000000) {
1925          uint32_t srcreg = dwords[1];
1926          uint32_t dstval = dwords[2];
1927 
1928          /* TODO: not sure what happens w/ payload != 2.. */
1929          assert(sizedwords == 3);
1930          assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1931 
1932          /* note: rnn_regname uses a static buf so we can't do
1933           * two regname() calls for one printf..
1934           */
1935          printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1936          printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1937 
1938          dstval += type0_reg_vals[srcreg];
1939 
1940          dump_registers(val, &dstval, 1, level + 1);
1941       } else {
1942          dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1943       }
1944       break;
1945    }
1946 }
1947 
1948 static void dump_register_summary(int level);
1949 
1950 static void
cp_event_write(uint32_t * dwords,uint32_t sizedwords,int level)1951 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1952 {
1953    const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0] & 0xff);
1954    printl(2, "%sevent %s\n", levels[level], name);
1955 
1956    if (name && (options->info->chip > 5)) {
1957       char eventname[64];
1958       snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1959       if (!strcmp(name, "BLIT") || !strcmp(name, "LRZ_CLEAR")) {
1960          do_query(eventname, 0);
1961          print_mode(level);
1962          dump_register_summary(level);
1963       }
1964    }
1965 }
1966 
1967 static void
dump_register_summary(int level)1968 dump_register_summary(int level)
1969 {
1970    uint32_t i;
1971    bool saved_summary = summary;
1972    summary = false;
1973 
1974    in_summary = true;
1975 
1976    struct regacc r = regacc(NULL);
1977 
1978    /* dump current state of registers: */
1979    printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1980 
1981    bool changed = false;
1982    bool written = false;
1983 
1984    for (i = 0; i < regcnt(); i++) {
1985       uint32_t regbase = i;
1986       uint32_t lastval = reg_val(regbase);
1987       /* skip registers that haven't been updated since last draw/blit: */
1988       if (!(options->allregs || reg_rewritten(regbase)))
1989          continue;
1990       if (!reg_written(regbase))
1991          continue;
1992       if (lastval != lastvals[regbase]) {
1993          changed |= true;
1994          lastvals[regbase] = lastval;
1995       }
1996       if (reg_rewritten(regbase)) {
1997          written |= true;
1998       }
1999       if (!quiet(2)) {
2000          if (regacc_push(&r, regbase, lastval)) {
2001             if (changed) {
2002                printl(2, "!");
2003             } else {
2004                printl(2, " ");
2005             }
2006             if (written) {
2007                printl(2, "+");
2008             } else {
2009                printl(2, " ");
2010             }
2011             printl(2, "\t%08"PRIx64, r.value);
2012             dump_register(&r, level);
2013 
2014             changed = written = false;
2015          }
2016       }
2017    }
2018 
2019    clear_rewritten();
2020 
2021    in_summary = false;
2022 
2023    draw_count++;
2024    summary = saved_summary;
2025 }
2026 
2027 static uint32_t
draw_indx_common(uint32_t * dwords,int level)2028 draw_indx_common(uint32_t *dwords, int level)
2029 {
2030    uint32_t prim_type = dwords[1] & 0x1f;
2031    uint32_t source_select = (dwords[1] >> 6) & 0x3;
2032    uint32_t num_indices = dwords[2];
2033    const char *primtype;
2034 
2035    primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
2036 
2037    do_query(primtype, num_indices);
2038 
2039    printl(2, "%sdraw:          %d\n", levels[level], draws[ib]);
2040    printl(2, "%sprim_type:     %s (%d)\n", levels[level], primtype, prim_type);
2041    printl(2, "%ssource_select: %s (%d)\n", levels[level],
2042           rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
2043    printl(2, "%snum_indices:   %d\n", levels[level], num_indices);
2044 
2045    vertices += num_indices;
2046 
2047    draws[ib]++;
2048 
2049    return num_indices;
2050 }
2051 
2052 enum pc_di_index_size {
2053    INDEX_SIZE_IGN = 0,
2054    INDEX_SIZE_16_BIT = 0,
2055    INDEX_SIZE_32_BIT = 1,
2056    INDEX_SIZE_8_BIT = 2,
2057    INDEX_SIZE_INVALID = 0,
2058 };
2059 
2060 static void
cp_draw_indx(uint32_t * dwords,uint32_t sizedwords,int level)2061 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
2062 {
2063    uint32_t num_indices = draw_indx_common(dwords, level);
2064 
2065    assert(!is_64b());
2066 
2067    /* if we have an index buffer, dump that: */
2068    if (sizedwords == 5) {
2069       void *ptr = hostptr(dwords[3]);
2070       printl(2, "%sgpuaddr:       %08x\n", levels[level], dwords[3]);
2071       printl(2, "%sidx_size:      %d\n", levels[level], dwords[4]);
2072       if (ptr) {
2073          enum pc_di_index_size size =
2074             ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2075          if (!quiet(2)) {
2076             int i;
2077             printf("%sidxs:         ", levels[level]);
2078             if (size == INDEX_SIZE_8_BIT) {
2079                uint8_t *idx = ptr;
2080                for (i = 0; i < dwords[4]; i++)
2081                   printf(" %u", idx[i]);
2082             } else if (size == INDEX_SIZE_16_BIT) {
2083                uint16_t *idx = ptr;
2084                for (i = 0; i < dwords[4] / 2; i++)
2085                   printf(" %u", idx[i]);
2086             } else if (size == INDEX_SIZE_32_BIT) {
2087                uint32_t *idx = ptr;
2088                for (i = 0; i < dwords[4] / 4; i++)
2089                   printf(" %u", idx[i]);
2090             }
2091             printf("\n");
2092             dump_hex(ptr, dwords[4] / 4, level + 1);
2093          }
2094       }
2095    }
2096 
2097    /* don't bother dumping registers for the dummy draw_indx's.. */
2098    if (num_indices > 0) {
2099       dump_bindless_descriptors(false, level);
2100       dump_register_summary(level);
2101    }
2102 
2103    needs_wfi = true;
2104 }
2105 
2106 static void
cp_draw_indx_2(uint32_t * dwords,uint32_t sizedwords,int level)2107 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
2108 {
2109    uint32_t num_indices = draw_indx_common(dwords, level);
2110    enum pc_di_index_size size =
2111       ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2112    void *ptr = &dwords[3];
2113    int sz = 0;
2114 
2115    assert(!is_64b());
2116 
2117    /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
2118    if (!quiet(2)) {
2119       int i;
2120       printf("%sidxs:         ", levels[level]);
2121       if (size == INDEX_SIZE_8_BIT) {
2122          uint8_t *idx = ptr;
2123          for (i = 0; i < num_indices; i++)
2124             printf(" %u", idx[i]);
2125          sz = num_indices;
2126       } else if (size == INDEX_SIZE_16_BIT) {
2127          uint16_t *idx = ptr;
2128          for (i = 0; i < num_indices; i++)
2129             printf(" %u", idx[i]);
2130          sz = num_indices * 2;
2131       } else if (size == INDEX_SIZE_32_BIT) {
2132          uint32_t *idx = ptr;
2133          for (i = 0; i < num_indices; i++)
2134             printf(" %u", idx[i]);
2135          sz = num_indices * 4;
2136       }
2137       printf("\n");
2138       dump_hex(ptr, sz / 4, level + 1);
2139    }
2140 
2141    /* don't bother dumping registers for the dummy draw_indx's.. */
2142    if (num_indices > 0) {
2143       dump_bindless_descriptors(false, level);
2144       dump_register_summary(level);
2145    }
2146 }
2147 
2148 static void
cp_draw_indx_offset(uint32_t * dwords,uint32_t sizedwords,int level)2149 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
2150 {
2151    uint32_t num_indices = dwords[2];
2152    uint32_t prim_type = dwords[0] & 0x1f;
2153 
2154    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
2155    print_mode(level);
2156 
2157    /* don't bother dumping registers for the dummy draw_indx's.. */
2158    if (num_indices > 0) {
2159       dump_bindless_descriptors(false, level);
2160       dump_register_summary(level);
2161    }
2162 }
2163 
2164 static void
cp_draw_indx_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2165 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2166 {
2167    uint32_t prim_type = dwords[0] & 0x1f;
2168    uint64_t addr;
2169 
2170    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2171    print_mode(level);
2172 
2173    if (is_64b())
2174       addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2175    else
2176       addr = dwords[1];
2177    dump_gpuaddr_size(addr, level, 0x10, 2);
2178 
2179    if (is_64b())
2180       addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2181    else
2182       addr = dwords[3];
2183    dump_gpuaddr_size(addr, level, 0x10, 2);
2184 
2185    dump_bindless_descriptors(false, level);
2186    dump_register_summary(level);
2187 }
2188 
2189 static void
cp_draw_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2190 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2191 {
2192    uint32_t prim_type = dwords[0] & 0x1f;
2193    uint64_t addr;
2194 
2195    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2196    print_mode(level);
2197 
2198    addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2199    dump_gpuaddr_size(addr, level, 0x10, 2);
2200 
2201    dump_bindless_descriptors(false, level);
2202    dump_register_summary(level);
2203 }
2204 
2205 static void
cp_draw_indirect_multi(uint32_t * dwords,uint32_t sizedwords,int level)2206 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2207 {
2208    uint32_t prim_type = dwords[0] & 0x1f;
2209    uint32_t count = dwords[2];
2210 
2211    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2212    print_mode(level);
2213 
2214    struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2215    uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2216    uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2217    uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2218 
2219    if (count_dword) {
2220       uint64_t count_addr =
2221          ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2222       uint32_t *buf = hostptr(count_addr);
2223 
2224       /* Don't print more draws than this if we don't know the indirect
2225        * count. It's possible the user will give ~0 or some other large
2226        * value, expecting the GPU to fill in the draw count, and we don't
2227        * want to print a gazillion draws in that case:
2228        */
2229       const uint32_t max_draw_count = 0x100;
2230 
2231       /* Assume the indirect count is garbage if it's larger than this
2232        * (quite large) value or 0. Hopefully this catches most cases.
2233        */
2234       const uint32_t max_indirect_draw_count = 0x10000;
2235 
2236       if (buf) {
2237          printf("%sindirect count: %u\n", levels[level], *buf);
2238          if (*buf == 0 || *buf > max_indirect_draw_count) {
2239             /* garbage value */
2240             count = MIN2(count, max_draw_count);
2241          } else {
2242             /* not garbage */
2243             count = MIN2(count, *buf);
2244          }
2245       } else {
2246          count = MIN2(count, max_draw_count);
2247       }
2248    }
2249 
2250    if (addr_dword && stride_dword) {
2251       uint64_t addr =
2252          ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2253       uint32_t stride = dwords[stride_dword];
2254 
2255       for (unsigned i = 0; i < count; i++, addr += stride) {
2256          printf("%sdraw %d:\n", levels[level], i);
2257          dump_gpuaddr_size(addr, level, 0x10, 2);
2258       }
2259    }
2260 
2261    dump_bindless_descriptors(false, level);
2262    dump_register_summary(level);
2263 }
2264 
2265 static void
cp_draw_auto(uint32_t * dwords,uint32_t sizedwords,int level)2266 cp_draw_auto(uint32_t *dwords, uint32_t sizedwords, int level)
2267 {
2268    uint32_t prim_type = dwords[0] & 0x1f;
2269 
2270    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2271    print_mode(level);
2272 
2273    dump_bindless_descriptors(false, level);
2274    dump_register_summary(level);
2275 }
2276 
2277 static void
cp_run_cl(uint32_t * dwords,uint32_t sizedwords,int level)2278 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2279 {
2280    do_query("COMPUTE", 1);
2281    dump_register_summary(level);
2282 }
2283 
2284 static void
print_nop_tail_string(uint32_t * dwords,uint32_t sizedwords)2285 print_nop_tail_string(uint32_t *dwords, uint32_t sizedwords)
2286 {
2287    const char *buf = (void *)dwords;
2288    for (int i = 0; i < 4 * sizedwords; i++) {
2289       if (buf[i] == '\0')
2290          break;
2291       if (isascii(buf[i]))
2292          printf("%c", buf[i]);
2293    }
2294 }
2295 
2296 static void
cp_nop(uint32_t * dwords,uint32_t sizedwords,int level)2297 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2298 {
2299    if (quiet(3))
2300       return;
2301 
2302    /* NOP is used to encode special debug strings by Turnip.
2303     * See tu_cs_emit_debug_magic_strv(...)
2304     */
2305    static int scope_level = 0;
2306    uint32_t identifier = dwords[0];
2307    bool is_special = false;
2308    if (identifier == CP_NOP_MESG) {
2309       printf("### ");
2310       is_special = true;
2311    } else if (identifier == CP_NOP_BEGN) {
2312       printf(">>> #%d: ", ++scope_level);
2313       is_special = true;
2314    } else if (identifier == CP_NOP_END) {
2315       printf("<<< #%d: ", scope_level--);
2316       is_special = true;
2317    }
2318 
2319    if (is_special) {
2320       if (sizedwords > 1) {
2321          print_nop_tail_string(dwords + 1, sizedwords - 1);
2322          printf("\n");
2323       }
2324       return;
2325    }
2326 
2327    // blob doesn't use CP_NOP for string_marker but it does
2328    // use it for things that end up looking like, but aren't
2329    // ascii chars:
2330    if (!options->decode_markers)
2331       return;
2332 
2333    print_nop_tail_string(dwords, sizedwords);
2334    printf("\n");
2335 }
2336 
2337 uint32_t *
parse_cp_indirect(uint32_t * dwords,uint32_t sizedwords,uint64_t * ibaddr,uint32_t * ibsize)2338 parse_cp_indirect(uint32_t *dwords, uint32_t sizedwords,
2339                   uint64_t *ibaddr, uint32_t *ibsize)
2340 {
2341    if (is_64b()) {
2342       assert(sizedwords == 3);
2343 
2344       /* a5xx+.. high 32b of gpu addr, then size: */
2345       *ibaddr = dwords[0];
2346       *ibaddr |= ((uint64_t)dwords[1]) << 32;
2347       *ibsize = dwords[2];
2348 
2349       return dwords + 3;
2350    } else {
2351       assert(sizedwords == 2);
2352 
2353       *ibaddr = dwords[0];
2354       *ibsize = dwords[1];
2355 
2356       return dwords + 2;
2357    }
2358 }
2359 
2360 static void
cp_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2361 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2362 {
2363    /* traverse indirect buffers */
2364    uint64_t ibaddr;
2365    uint32_t ibsize;
2366    uint32_t *ptr = NULL;
2367 
2368    dwords = parse_cp_indirect(dwords, sizedwords, &ibaddr, &ibsize);
2369 
2370    if (!quiet(3)) {
2371       if (is_64b()) {
2372          printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2373       } else {
2374          printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2375       }
2376       printf("%sibsize:%08x\n", levels[level], ibsize);
2377    }
2378 
2379    if (options->once && has_dumped(ibaddr, enable_mask))
2380       return;
2381 
2382    /* 'query-compare' mode implies 'once' mode, although we need only to
2383     * process the cmdstream for *any* enable_mask mode, since we are
2384     * comparing binning vs draw reg values at the same time, ie. it is
2385     * not useful to process the same draw in both binning and draw pass.
2386     */
2387    if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2388       return;
2389 
2390    /* map gpuaddr back to hostptr: */
2391    ptr = hostptr(ibaddr);
2392 
2393    if (ptr) {
2394       /* If the GPU hung within the target IB, the trigger point will be
2395        * just after the current CP_INDIRECT_BUFFER.  Because the IB is
2396        * executed but never returns.  Account for this by checking if
2397        * the IB returned:
2398        */
2399       highlight_gpuaddr(gpuaddr(dwords));
2400 
2401       ib++;
2402       ibs[ib].base = ibaddr;
2403       ibs[ib].size = ibsize;
2404 
2405       dump_commands(ptr, ibsize, level);
2406       ib--;
2407    } else {
2408       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2409    }
2410 }
2411 
2412 static void
cp_start_bin(uint32_t * dwords,uint32_t sizedwords,int level)2413 cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
2414 {
2415    uint64_t ibaddr;
2416    uint32_t ibsize;
2417    uint32_t loopcount;
2418    uint32_t *ptr = NULL;
2419 
2420    loopcount = dwords[0];
2421    ibaddr = dwords[1];
2422    ibaddr |= ((uint64_t)dwords[2]) << 32;
2423    ibsize = dwords[3];
2424 
2425    /* map gpuaddr back to hostptr: */
2426    ptr = hostptr(ibaddr);
2427 
2428    if (ptr) {
2429       /* If the GPU hung within the target IB, the trigger point will be
2430        * just after the current CP_START_BIN.  Because the IB is
2431        * executed but never returns.  Account for this by checking if
2432        * the IB returned:
2433        */
2434       highlight_gpuaddr(gpuaddr(&dwords[5]));
2435 
2436       /* TODO: we should duplicate the body of the loop after each bin, so
2437        * that draws get the correct state. We should also figure out if there
2438        * are any registers that can tell us what bin we're in when we hang so
2439        * that crashdec points to the right place.
2440        */
2441       ib++;
2442       for (uint32_t i = 0; i < loopcount; i++) {
2443          ibs[ib].base = ibaddr;
2444          ibs[ib].size = ibsize;
2445          printl(3, "%sbin %u\n", levels[level], i);
2446          dump_commands(ptr, ibsize, level);
2447          ibaddr += ibsize;
2448          ptr += ibsize;
2449       }
2450       ib--;
2451    } else {
2452       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2453    }
2454 }
2455 
2456 static void
cp_fixed_stride_draw_table(uint32_t * dwords,uint32_t sizedwords,int level)2457 cp_fixed_stride_draw_table(uint32_t *dwords, uint32_t sizedwords, int level)
2458 {
2459    uint64_t ibaddr;
2460    uint32_t ibsize;
2461    uint32_t loopcount;
2462    uint32_t *ptr = NULL;
2463 
2464    loopcount = dwords[3];
2465    ibaddr = dwords[0];
2466    ibaddr |= ((uint64_t)dwords[1]) << 32;
2467    ibsize = dwords[2] >> 20;
2468 
2469    /* map gpuaddr back to hostptr: */
2470    ptr = hostptr(ibaddr);
2471 
2472    if (ptr) {
2473       /* If the GPU hung within the target IB, the trigger point will be
2474        * just after the current CP_START_BIN.  Because the IB is
2475        * executed but never returns.  Account for this by checking if
2476        * the IB returned:
2477        */
2478       highlight_gpuaddr(gpuaddr(&dwords[5]));
2479 
2480       ib++;
2481       for (uint32_t i = 0; i < loopcount; i++) {
2482          ibs[ib].base = ibaddr;
2483          ibs[ib].size = ibsize;
2484          printl(3, "%sdraw %u\n", levels[level], i);
2485          dump_commands(ptr, ibsize, level);
2486          ibaddr += ibsize;
2487          ptr += ibsize;
2488       }
2489       ib--;
2490    } else {
2491       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2492    }
2493 }
2494 
2495 static void
cp_wfi(uint32_t * dwords,uint32_t sizedwords,int level)2496 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2497 {
2498    needs_wfi = false;
2499 }
2500 
2501 static void
cp_mem_write(uint32_t * dwords,uint32_t sizedwords,int level)2502 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2503 {
2504    if (quiet(2))
2505       return;
2506 
2507    if (is_64b()) {
2508       uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2509       printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2510       dump_hex(&dwords[2], sizedwords - 2, level + 1);
2511 
2512       if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2513          dump_commands(&dwords[2], sizedwords - 2, level + 1);
2514    } else {
2515       uint32_t gpuaddr = dwords[0];
2516       printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2517       dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2518    }
2519 }
2520 
2521 static void
cp_rmw(uint32_t * dwords,uint32_t sizedwords,int level)2522 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2523 {
2524    uint32_t val = dwords[0] & 0xffff;
2525    uint32_t and = dwords[1];
2526    uint32_t or = dwords[2];
2527    printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2528           and, or);
2529    if (needs_wfi)
2530       printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2531              and, or);
2532    reg_set(val, (reg_val(val) & and) | or);
2533 }
2534 
2535 static void
cp_reg_mem(uint32_t * dwords,uint32_t sizedwords,int level)2536 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2537 {
2538    uint32_t val = dwords[0] & 0xffff;
2539    printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2540 
2541    if (quiet(2))
2542       return;
2543 
2544    uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2545    printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2546    void *ptr = hostptr(gpuaddr);
2547    if (ptr) {
2548       uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2549       dump_hex(ptr, cnt, level + 1);
2550    }
2551 }
2552 
2553 struct draw_state {
2554    uint16_t enable_mask;
2555    uint16_t flags;
2556    uint32_t count;
2557    uint64_t addr;
2558 };
2559 
2560 struct draw_state state[32];
2561 
2562 #define FLAG_DIRTY              0x1
2563 #define FLAG_DISABLE            0x2
2564 #define FLAG_DISABLE_ALL_GROUPS 0x4
2565 #define FLAG_LOAD_IMMED         0x8
2566 
2567 static int draw_mode;
2568 
2569 static void
disable_group(unsigned group_id)2570 disable_group(unsigned group_id)
2571 {
2572    struct draw_state *ds = &state[group_id];
2573    memset(ds, 0, sizeof(*ds));
2574 }
2575 
2576 static void
disable_all_groups(void)2577 disable_all_groups(void)
2578 {
2579    for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2580       disable_group(i);
2581 }
2582 
2583 static void
load_group(unsigned group_id,int level)2584 load_group(unsigned group_id, int level)
2585 {
2586    struct draw_state *ds = &state[group_id];
2587 
2588    if (!ds->count)
2589       return;
2590 
2591    printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2592    printl(2, "%scount: %d\n", levels[level], ds->count);
2593    printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2594    printl(2, "%sflags: %x\n", levels[level], ds->flags);
2595 
2596    if (options->info->chip >= 6) {
2597       printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2598 
2599       if (!(ds->enable_mask & enable_mask)) {
2600          printl(2, "%s\tskipped!\n\n", levels[level]);
2601          return;
2602       }
2603    }
2604 
2605    void *ptr = hostptr(ds->addr);
2606    if (ptr) {
2607       if (!quiet(2))
2608          dump_hex(ptr, ds->count, level + 1);
2609 
2610       ib++;
2611       dump_commands(ptr, ds->count, level + 1);
2612       ib--;
2613    }
2614 }
2615 
2616 static void
load_all_groups(int level)2617 load_all_groups(int level)
2618 {
2619    /* sanity check, we should never recursively hit recursion here, and if
2620     * we do bad things happen:
2621     */
2622    static bool loading_groups = false;
2623    if (loading_groups) {
2624       printf("ERROR: nothing in draw state should trigger recursively loading "
2625              "groups!\n");
2626       return;
2627    }
2628    loading_groups = true;
2629    for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2630       load_group(i, level);
2631    loading_groups = false;
2632 
2633    /* in 'query-compare' mode, defer disabling all groups until we have a
2634     * chance to process the query:
2635     */
2636    if (!options->query_compare)
2637       disable_all_groups();
2638 }
2639 
2640 static void
cp_set_draw_state(uint32_t * dwords,uint32_t sizedwords,int level)2641 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2642 {
2643    uint32_t i;
2644 
2645    for (i = 0; i < sizedwords;) {
2646       struct draw_state *ds;
2647       uint32_t count = dwords[i] & 0xffff;
2648       uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2649       uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2650       uint32_t flags = (dwords[i] >> 16) & 0xf;
2651       uint64_t addr;
2652 
2653       if (is_64b()) {
2654          addr = dwords[i + 1];
2655          addr |= ((uint64_t)dwords[i + 2]) << 32;
2656          i += 3;
2657       } else {
2658          addr = dwords[i + 1];
2659          i += 2;
2660       }
2661 
2662       if (flags & FLAG_DISABLE_ALL_GROUPS) {
2663          disable_all_groups();
2664          continue;
2665       }
2666 
2667       if (flags & FLAG_DISABLE) {
2668          disable_group(group_id);
2669          continue;
2670       }
2671 
2672       assert(group_id < ARRAY_SIZE(state));
2673       disable_group(group_id);
2674 
2675       ds = &state[group_id];
2676 
2677       ds->enable_mask = enable_mask;
2678       ds->flags = flags;
2679       ds->count = count;
2680       ds->addr = addr;
2681 
2682       if (flags & FLAG_LOAD_IMMED) {
2683          load_group(group_id, level);
2684          disable_group(group_id);
2685       }
2686    }
2687 }
2688 
2689 static void
cp_set_mode(uint32_t * dwords,uint32_t sizedwords,int level)2690 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2691 {
2692    draw_mode = dwords[0];
2693 }
2694 
2695 /* execute compute shader */
2696 static void
cp_exec_cs(uint32_t * dwords,uint32_t sizedwords,int level)2697 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2698 {
2699    do_query("compute", 0);
2700    dump_bindless_descriptors(true, level);
2701    dump_register_summary(level);
2702 }
2703 
2704 static void
cp_exec_cs_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2705 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2706 {
2707    uint64_t addr;
2708 
2709    if (is_64b()) {
2710       addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2711    } else {
2712       addr = dwords[1];
2713    }
2714 
2715    printl(3, "%saddr: %016llx\n", levels[level], addr);
2716    dump_gpuaddr_size(addr, level, 0x10, 2);
2717 
2718    do_query("compute", 0);
2719    dump_bindless_descriptors(true, level);
2720    dump_register_summary(level);
2721 }
2722 
2723 static void
cp_set_marker(uint32_t * dwords,uint32_t sizedwords,int level)2724 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2725 {
2726    uint32_t val = dwords[0] & 0xf;
2727    const char *mode = rnn_enumname(rnn, "a6xx_marker", val);
2728 
2729    if (!mode) {
2730       static char buf[8];
2731       sprintf(buf, "0x%x", val);
2732       render_mode = buf;
2733       return;
2734    }
2735 
2736    render_mode = mode;
2737 
2738    if (!strcmp(render_mode, "RM6_BIN_VISIBILITY")) {
2739       enable_mask = MODE_BINNING;
2740    } else if (!strcmp(render_mode, "RM6_BIN_RENDER_START")) {
2741       enable_mask = MODE_GMEM;
2742    } else if (!strcmp(render_mode, "RM6_DIRECT_RENDER")) {
2743       enable_mask = MODE_BYPASS;
2744    }
2745 }
2746 
2747 static void
cp_set_thread_control(uint32_t * dwords,uint32_t sizedwords,int level)2748 cp_set_thread_control(uint32_t *dwords, uint32_t sizedwords, int level)
2749 {
2750    uint32_t val = dwords[0] & 0x3;
2751    thread = rnn_enumname(rnn, "cp_thread", val);
2752 }
2753 
2754 static void
cp_set_render_mode(uint32_t * dwords,uint32_t sizedwords,int level)2755 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2756 {
2757    uint64_t addr;
2758    uint32_t *ptr, len;
2759 
2760    assert(is_64b());
2761 
2762    /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2763     * not sure if this can come in different sizes.
2764     *
2765     * First ptr doesn't seem to be cmdstream, second one does.
2766     *
2767     * Comment from downstream kernel:
2768     *
2769     * SRM -- set render mode (ex binning, direct render etc)
2770     * SRM is set by UMD usually at start of IB to tell CP the type of
2771     * preemption.
2772     * KMD needs to set SRM to NULL to indicate CP that rendering is
2773     * done by IB.
2774     * ------------------------------------------------------------------
2775     *
2776     * Seems to always be one of these two:
2777     * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2778     * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2779     * 001c2000 00000000
2780     *
2781     */
2782 
2783    assert(options->info->chip >= 5);
2784 
2785    render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2786 
2787    if (sizedwords == 1)
2788       return;
2789 
2790    addr = dwords[1];
2791    addr |= ((uint64_t)dwords[2]) << 32;
2792 
2793    mode = dwords[3];
2794 
2795    dump_gpuaddr(addr, level + 1);
2796 
2797    if (sizedwords == 5)
2798       return;
2799 
2800    assert(sizedwords == 8);
2801 
2802    len = dwords[5];
2803    addr = dwords[6];
2804    addr |= ((uint64_t)dwords[7]) << 32;
2805 
2806    printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2807    printl(3, "%slen:  0x%x\n", levels[level], len);
2808 
2809    ptr = hostptr(addr);
2810 
2811    if (ptr) {
2812       if (!quiet(2)) {
2813          ib++;
2814          dump_commands(ptr, len, level + 1);
2815          ib--;
2816          dump_hex(ptr, len, level + 1);
2817       }
2818    }
2819 }
2820 
2821 static void
cp_compute_checkpoint(uint32_t * dwords,uint32_t sizedwords,int level)2822 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2823 {
2824    uint64_t addr;
2825    uint32_t *ptr, len;
2826 
2827    assert(is_64b());
2828    assert(options->info->chip >= 5);
2829 
2830    if (sizedwords == 8) {
2831       addr = dwords[5];
2832       addr |= ((uint64_t)dwords[6]) << 32;
2833       len = dwords[7];
2834    } else {
2835       addr = dwords[5];
2836       addr |= ((uint64_t)dwords[6]) << 32;
2837       len = dwords[4];
2838    }
2839 
2840    printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2841    printl(3, "%slen:  0x%x\n", levels[level], len);
2842 
2843    ptr = hostptr(addr);
2844 
2845    if (ptr) {
2846       if (!quiet(2)) {
2847          ib++;
2848          dump_commands(ptr, len, level + 1);
2849          ib--;
2850          dump_hex(ptr, len, level + 1);
2851       }
2852    }
2853 }
2854 
2855 static void
cp_blit(uint32_t * dwords,uint32_t sizedwords,int level)2856 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2857 {
2858    do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2859    print_mode(level);
2860    dump_register_summary(level);
2861 }
2862 
2863 static void
cp_context_reg_bunch(uint32_t * dwords,uint32_t sizedwords,int level)2864 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2865 {
2866    int i;
2867 
2868    /* NOTE: seems to write same reg multiple times.. not sure if different parts
2869     * of these are triggered by the FLUSH_SO_n events?? (if that is what they
2870     * actually are?)
2871     */
2872    bool saved_summary = summary;
2873    summary = false;
2874 
2875    struct regacc r = regacc(NULL);
2876 
2877    for (i = 0; i < sizedwords; i += 2) {
2878       if (regacc_push(&r, dwords[i + 0], dwords[i + 1]))
2879          dump_register(&r, level + 1);
2880       reg_set(dwords[i + 0], dwords[i + 1]);
2881    }
2882 
2883    summary = saved_summary;
2884 }
2885 
2886 /* Looks similar to CP_CONTEXT_REG_BUNCH, but not quite the same...
2887  * discarding first two dwords??
2888  *
2889  *   CP_CONTEXT_REG_BUNCH:
2890  *        0221: 9c1ff606  (rep)(xmov3)mov $usraddr, $data
2891  *        ; mov $data, $data
2892  *        ; mov $usraddr, $data
2893  *        ; mov $data, $data
2894  *        0222: d8000000  waitin
2895  *        0223: 981f0806  mov $01, $data
2896  *
2897  *   CP_UNK5D:
2898  *        0224: 981f0006  mov $00, $data
2899  *        0225: 981f0006  mov $00, $data
2900  *        0226: 9c1ff206  (rep)(xmov1)mov $usraddr, $data
2901  *        ; mov $data, $data
2902  *        0227: d8000000  waitin
2903  *        0228: 981f0806  mov $01, $data
2904  *
2905  */
2906 static void
cp_context_reg_bunch2(uint32_t * dwords,uint32_t sizedwords,int level)2907 cp_context_reg_bunch2(uint32_t *dwords, uint32_t sizedwords, int level)
2908 {
2909    dwords += 2;
2910    sizedwords -= 2;
2911    cp_context_reg_bunch(dwords, sizedwords, level);
2912 }
2913 
2914 static void
cp_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)2915 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2916 {
2917    uint32_t reg = dwords[1] & 0xffff;
2918 
2919    struct regacc r = regacc(NULL);
2920    if (regacc_push(&r, reg, dwords[2]))
2921       dump_register(&r, level + 1);
2922    reg_set(reg, dwords[2]);
2923 }
2924 
2925 static void
cp_set_ctxswitch_ib(uint32_t * dwords,uint32_t sizedwords,int level)2926 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2927 {
2928    uint64_t addr;
2929    uint32_t size = dwords[2] & 0xffff;
2930    void *ptr;
2931 
2932    addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2933 
2934    if (!quiet(3)) {
2935       printf("%saddr=%" PRIx64 "\n", levels[level], addr);
2936    }
2937 
2938    ptr = hostptr(addr);
2939    if (ptr) {
2940       dump_commands(ptr, size, level + 1);
2941    }
2942 }
2943 
2944 static void
cp_skip_ib2_enable_global(uint32_t * dwords,uint32_t sizedwords,int level)2945 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2946 {
2947    skip_ib2_enable_global = dwords[0];
2948 }
2949 
2950 static void
cp_skip_ib2_enable_local(uint32_t * dwords,uint32_t sizedwords,int level)2951 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2952 {
2953    skip_ib2_enable_local = dwords[0];
2954 }
2955 
2956 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2957 static const struct type3_op {
2958    const char *name;
2959    void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2960    struct {
2961       bool load_all_groups;
2962    } options;
2963 } type3_op[] = {
2964    CP(NOP, cp_nop),
2965    CP(INDIRECT_BUFFER, cp_indirect),
2966    CP(INDIRECT_BUFFER_PFD, cp_indirect),
2967    CP(WAIT_FOR_IDLE, cp_wfi),
2968    CP(REG_RMW, cp_rmw),
2969    CP(REG_TO_MEM, cp_reg_mem),
2970    CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2971    CP(MEM_WRITE, cp_mem_write),
2972    CP(EVENT_WRITE, cp_event_write),
2973    CP(RUN_OPENCL, cp_run_cl),
2974    CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2975    CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2976    CP(SET_CONSTANT, cp_set_const),
2977    CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2978    CP(WIDE_REG_WRITE, cp_wide_reg_write),
2979 
2980    /* for a3xx */
2981    CP(LOAD_STATE, cp_load_state),
2982    CP(SET_BIN, cp_set_bin),
2983 
2984    /* for a4xx */
2985    CP(LOAD_STATE4, cp_load_state),
2986    CP(SET_DRAW_STATE, cp_set_draw_state),
2987    CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2988    CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2989    CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2990 
2991    /* for a5xx */
2992    CP(SET_RENDER_MODE, cp_set_render_mode),
2993    CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2994    CP(BLIT, cp_blit),
2995    CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2996    CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2997    CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2998    CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2999    CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
3000    CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
3001 
3002    /* for a6xx */
3003    CP(LOAD_STATE6_GEOM, cp_load_state),
3004    CP(LOAD_STATE6_FRAG, cp_load_state),
3005    CP(LOAD_STATE6, cp_load_state),
3006    CP(SET_MODE, cp_set_mode),
3007    CP(SET_MARKER, cp_set_marker),
3008    CP(REG_WRITE, cp_reg_write),
3009    CP(DRAW_AUTO, cp_draw_auto, {.load_all_groups = true}),
3010 
3011    CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
3012 
3013    CP(START_BIN, cp_start_bin),
3014 
3015    CP(FIXED_STRIDE_DRAW_TABLE, cp_fixed_stride_draw_table),
3016 
3017    /* for a7xx */
3018    CP(THREAD_CONTROL, cp_set_thread_control),
3019    CP(CONTEXT_REG_BUNCH2, cp_context_reg_bunch2),
3020    CP(EVENT_WRITE7, cp_event_write),
3021 };
3022 
3023 static void
noop_fxn(uint32_t * dwords,uint32_t sizedwords,int level)3024 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
3025 {
3026 }
3027 
3028 static const struct type3_op *
get_type3_op(unsigned opc)3029 get_type3_op(unsigned opc)
3030 {
3031    static const struct type3_op dummy_op = {
3032       .fxn = noop_fxn,
3033    };
3034    const char *name = pktname(opc);
3035 
3036    if (!name)
3037       return &dummy_op;
3038 
3039    for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
3040       if (!strcmp(name, type3_op[i].name))
3041          return &type3_op[i];
3042 
3043    return &dummy_op;
3044 }
3045 
3046 void
dump_commands(uint32_t * dwords,uint32_t sizedwords,int level)3047 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
3048 {
3049    int dwords_left = sizedwords;
3050    uint32_t count = 0; /* dword count including packet header */
3051    uint32_t val;
3052 
3053    //	assert(dwords);
3054    if (!dwords) {
3055       printf("NULL cmd buffer!\n");
3056       return;
3057    }
3058 
3059    assert(ib < ARRAY_SIZE(draws));
3060    draws[ib] = 0;
3061 
3062    while (dwords_left > 0) {
3063 
3064       current_draw_count = draw_count;
3065 
3066       /* hack, this looks like a -1 underflow, in some versions
3067        * when it tries to write zero registers via pkt0
3068        */
3069       //		if ((dwords[0] >> 16) == 0xffff)
3070       //			goto skip;
3071 
3072       if (pkt_is_regwrite(dwords[0], &val, &count)) {
3073          assert(val < regcnt());
3074          printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
3075                 val);
3076          dump_registers(val, dwords + 1, count - 1, level + 2);
3077          if (!quiet(3))
3078             dump_hex(dwords, count, level + 1);
3079 #if 0
3080       } else if (pkt_is_type1(dwords[0])) {
3081          count = 3;
3082          val = dwords[0] & 0xfff;
3083          printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3084          dump_registers(val, dwords+1, 1, level+2);
3085          val = (dwords[0] >> 12) & 0xfff;
3086          printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3087          dump_registers(val, dwords+2, 1, level+2);
3088          if (!quiet(3))
3089             dump_hex(dwords, count, level+1);
3090 #endif
3091       } else if (pkt_is_opcode(dwords[0], &val, &count)) {
3092          const struct type3_op *op = get_type3_op(val);
3093          if (op->options.load_all_groups)
3094             load_all_groups(level + 1);
3095          const char *name = pktname(val);
3096          if (!quiet(2)) {
3097             printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
3098                    rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
3099                    count);
3100          }
3101          if (name) {
3102             /* special hack for two packets that decode the same way
3103              * on a6xx:
3104              */
3105             if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
3106                 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
3107                name = "CP_LOAD_STATE6";
3108             dump_domain(dwords + 1, count - 1, level + 2, name);
3109          }
3110          op->fxn(dwords + 1, count - 1, level + 1);
3111          if (!quiet(2))
3112             dump_hex(dwords, count, level + 1);
3113       } else if (pkt_is_type2(dwords[0])) {
3114          printl(3, "%snop\n", levels[level + 1]);
3115          count = 1;
3116       } else {
3117          printf("bad type! %08x\n", dwords[0]);
3118          /* for 5xx+ we can do a passable job of looking for start of next valid
3119           * packet: */
3120          if (options->info->chip >= 5) {
3121             count = find_next_packet(dwords, dwords_left);
3122          } else {
3123             return;
3124          }
3125       }
3126 
3127       dwords += count;
3128       dwords_left -= count;
3129    }
3130 
3131    if (dwords_left < 0)
3132       printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
3133 }
3134