• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <inttypes.h>
30 #include <signal.h>
31 #include <stdarg.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <sys/stat.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41 
42 #include "freedreno_pm4.h"
43 
44 #include "buffers.h"
45 #include "cffdec.h"
46 #include "disasm.h"
47 #include "redump.h"
48 #include "rnnutil.h"
49 #include "script.h"
50 
51 /* ************************************************************************* */
52 /* originally based on kernel recovery dump code: */
53 
54 static const struct cffdec_options *options;
55 
56 static bool needs_wfi = false;
57 static bool summary = false;
58 static bool in_summary = false;
59 static int vertices;
60 
61 static inline unsigned
regcnt(void)62 regcnt(void)
63 {
64    if (options->info->chip >= 5)
65       return 0xffff;
66    else
67       return 0x7fff;
68 }
69 
70 static int
is_64b(void)71 is_64b(void)
72 {
73    return options->info->chip >= 5;
74 }
75 
76 static int draws[4];
77 static struct {
78    uint64_t base;
79    uint32_t size; /* in dwords */
80    /* Generally cmdstream consists of multiple IB calls to different
81     * buffers, which are themselves often re-used for each tile.  The
82     * triggered flag serves two purposes to help make it more clear
83     * what part of the cmdstream is before vs after the the GPU hang:
84     *
85     * 1) if in IB2 we are passed the point within the IB2 buffer where
86     *    the GPU hung, but IB1 is not passed the point within its
87     *    buffer where the GPU had hung, then we know the GPU hang
88     *    happens on a future use of that IB2 buffer.
89     *
90     * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
91     *    hung, but we've already passed the trigger point at the same
92     *    IB level, we know that we are passed the point where the GPU
93     *    had hung.
94     *
95     * So this is a one way switch, false->true.  And a higher #'d
96     * IB level isn't considered triggered unless the lower #'d IB
97     * level is.
98     */
99    bool triggered : 1;
100    bool base_seen : 1;
101 } ibs[4];
102 static int ib;
103 
104 static int draw_count;
105 static int current_draw_count;
106 
107 /* query mode.. to handle symbolic register name queries, we need to
108  * defer parsing query string until after gpu_id is know and rnn db
109  * loaded:
110  */
111 static int *queryvals;
112 
113 static bool
quiet(int lvl)114 quiet(int lvl)
115 {
116    if ((options->draw_filter != -1) &&
117        (options->draw_filter != current_draw_count))
118       return true;
119    if ((lvl >= 3) && (summary || options->querystrs || options->script))
120       return true;
121    if ((lvl >= 2) && (options->querystrs || options->script))
122       return true;
123    return false;
124 }
125 
126 void
printl(int lvl,const char * fmt,...)127 printl(int lvl, const char *fmt, ...)
128 {
129    va_list args;
130    if (quiet(lvl))
131       return;
132    va_start(args, fmt);
133    vprintf(fmt, args);
134    va_end(args);
135 }
136 
137 static const char *levels[] = {
138    "\t",
139    "\t\t",
140    "\t\t\t",
141    "\t\t\t\t",
142    "\t\t\t\t\t",
143    "\t\t\t\t\t\t",
144    "\t\t\t\t\t\t\t",
145    "\t\t\t\t\t\t\t\t",
146    "\t\t\t\t\t\t\t\t\t",
147    "x",
148    "x",
149    "x",
150    "x",
151    "x",
152    "x",
153 };
154 
155 enum state_src_t {
156    STATE_SRC_DIRECT,
157    STATE_SRC_INDIRECT,
158    STATE_SRC_BINDLESS,
159 };
160 
161 /* SDS (CP_SET_DRAW_STATE) helpers: */
162 static void load_all_groups(int level);
163 static void disable_all_groups(void);
164 
165 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
166                           int level);
167 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
168 
169 static bool
highlight_gpuaddr(uint64_t gpuaddr)170 highlight_gpuaddr(uint64_t gpuaddr)
171 {
172    if (!options->ibs[ib].base)
173       return false;
174 
175    if ((ib > 0) && options->ibs[ib - 1].base &&
176        !(ibs[ib - 1].triggered || ibs[ib - 1].base_seen))
177       return false;
178 
179    if (ibs[ib].base_seen)
180       return false;
181 
182    if (ibs[ib].triggered)
183       return options->color;
184 
185    if (options->ibs[ib].base != ibs[ib].base)
186       return false;
187 
188    uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
189    uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
190 
191    bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
192 
193    if (triggered && (ib < 2) && options->ibs[ib + 1].crash_found) {
194       ibs[ib].base_seen = true;
195       return false;
196    }
197 
198    ibs[ib].triggered |= triggered;
199 
200    if (triggered)
201       printf("ESTIMATED CRASH LOCATION!\n");
202 
203    return triggered & options->color;
204 }
205 
206 static void
dump_hex(uint32_t * dwords,uint32_t sizedwords,int level)207 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
208 {
209    int i, j;
210    int lastzero = 1;
211 
212    if (quiet(2))
213       return;
214 
215    bool highlight = highlight_gpuaddr(gpuaddr(dwords) + 4 * sizedwords - 1);
216 
217    for (i = 0; i < sizedwords; i += 8) {
218       int zero = 1;
219 
220       /* always show first row: */
221       if (i == 0)
222          zero = 0;
223 
224       for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
225          if (dwords[i + j])
226             zero = 0;
227 
228       if (zero && !lastzero)
229          printf("*\n");
230 
231       lastzero = zero;
232 
233       if (zero)
234          continue;
235 
236       uint64_t addr = gpuaddr(&dwords[i]);
237 
238       if (highlight)
239          printf("\x1b[0;1;31m");
240 
241       if (is_64b()) {
242          printf("%016" PRIx64 ":%s", addr, levels[level]);
243       } else {
244          printf("%08x:%s", (uint32_t)addr, levels[level]);
245       }
246 
247       if (highlight)
248          printf("\x1b[0m");
249 
250       printf("%04x:", i * 4);
251 
252       for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
253          printf(" %08x", dwords[i + j]);
254       }
255 
256       printf("\n");
257    }
258 }
259 
260 static void
dump_float(float * dwords,uint32_t sizedwords,int level)261 dump_float(float *dwords, uint32_t sizedwords, int level)
262 {
263    int i;
264    for (i = 0; i < sizedwords; i++) {
265       if ((i % 8) == 0) {
266          if (is_64b()) {
267             printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
268          } else {
269             printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
270          }
271       } else {
272          printf(" ");
273       }
274       printf("%8f", *(dwords++));
275       if ((i % 8) == 7)
276          printf("\n");
277    }
278    if (i % 8)
279       printf("\n");
280 }
281 
282 /* I believe the surface format is low bits:
283 #define RB_COLOR_INFO__COLOR_FORMAT_MASK                   0x0000000fL
284 comments in sys2gmem_tex_const indicate that address is [31:12], but
285 looks like at least some of the bits above the format have different meaning..
286 */
287 static void
parse_dword_addr(uint32_t dword,uint32_t * gpuaddr,uint32_t * flags,uint32_t mask)288 parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
289                  uint32_t mask)
290 {
291    assert(!is_64b()); /* this is only used on a2xx */
292    *gpuaddr = dword & ~mask;
293    *flags = dword & mask;
294 }
295 
296 static uint32_t type0_reg_vals[0xffff + 1];
297 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
298                                    8]; /* written since last draw */
299 static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
300 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
301 
302 static bool
reg_rewritten(uint32_t regbase)303 reg_rewritten(uint32_t regbase)
304 {
305    return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
306 }
307 
308 bool
reg_written(uint32_t regbase)309 reg_written(uint32_t regbase)
310 {
311    return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
312 }
313 
314 static void
clear_rewritten(void)315 clear_rewritten(void)
316 {
317    memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
318 }
319 
320 static void
clear_written(void)321 clear_written(void)
322 {
323    memset(type0_reg_written, 0, sizeof(type0_reg_written));
324    clear_rewritten();
325 }
326 
327 uint32_t
reg_lastval(uint32_t regbase)328 reg_lastval(uint32_t regbase)
329 {
330    return lastvals[regbase];
331 }
332 
333 static void
clear_lastvals(void)334 clear_lastvals(void)
335 {
336    memset(lastvals, 0, sizeof(lastvals));
337 }
338 
339 uint32_t
reg_val(uint32_t regbase)340 reg_val(uint32_t regbase)
341 {
342    return type0_reg_vals[regbase];
343 }
344 
345 void
reg_set(uint32_t regbase,uint32_t val)346 reg_set(uint32_t regbase, uint32_t val)
347 {
348    assert(regbase < regcnt());
349    type0_reg_vals[regbase] = val;
350    type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
351    type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
352 }
353 
354 static void
reg_dump_scratch(const char * name,uint32_t dword,int level)355 reg_dump_scratch(const char *name, uint32_t dword, int level)
356 {
357    unsigned r;
358 
359    if (quiet(3))
360       return;
361 
362    r = regbase("CP_SCRATCH[0].REG");
363 
364    // if not, try old a2xx/a3xx version:
365    if (!r)
366       r = regbase("CP_SCRATCH_REG0");
367 
368    if (!r)
369       return;
370 
371    printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
372           reg_val(r + 6), reg_val(r + 7));
373 }
374 
375 static void
dump_gpuaddr_size(uint64_t gpuaddr,int level,int sizedwords,int quietlvl)376 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
377 {
378    void *buf;
379 
380    if (quiet(quietlvl))
381       return;
382 
383    buf = hostptr(gpuaddr);
384    if (buf) {
385       dump_hex(buf, sizedwords, level + 1);
386    }
387 }
388 
389 static void
dump_gpuaddr(uint64_t gpuaddr,int level)390 dump_gpuaddr(uint64_t gpuaddr, int level)
391 {
392    dump_gpuaddr_size(gpuaddr, level, 64, 3);
393 }
394 
395 static void
reg_dump_gpuaddr(const char * name,uint32_t dword,int level)396 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
397 {
398    dump_gpuaddr(dword, level);
399 }
400 
401 uint32_t gpuaddr_lo;
402 static void
reg_gpuaddr_lo(const char * name,uint32_t dword,int level)403 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
404 {
405    gpuaddr_lo = dword;
406 }
407 
408 static void
reg_dump_gpuaddr_hi(const char * name,uint32_t dword,int level)409 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
410 {
411    dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
412 }
413 
414 static void
reg_dump_gpuaddr64(const char * name,uint64_t qword,int level)415 reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
416 {
417    dump_gpuaddr(qword, level);
418 }
419 
420 static void
dump_shader(const char * ext,void * buf,int bufsz)421 dump_shader(const char *ext, void *buf, int bufsz)
422 {
423    if (options->dump_shaders) {
424       static int n = 0;
425       char filename[16];
426       int fd;
427       sprintf(filename, "%04d.%s", n++, ext);
428       fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
429       if (fd != -1) {
430          write(fd, buf, bufsz);
431          close(fd);
432       }
433    }
434 }
435 
436 static void
disasm_gpuaddr(const char * name,uint64_t gpuaddr,int level)437 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
438 {
439    void *buf;
440 
441    gpuaddr &= 0xfffffffffffffff0;
442 
443    if (quiet(3))
444       return;
445 
446    buf = hostptr(gpuaddr);
447    if (buf) {
448       uint32_t sizedwords = hostlen(gpuaddr) / 4;
449       const char *ext;
450 
451       dump_hex(buf, MIN2(64, sizedwords), level + 1);
452       try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->info->chip * 100);
453 
454       /* this is a bit ugly way, but oh well.. */
455       if (strstr(name, "SP_VS_OBJ")) {
456          ext = "vo3";
457       } else if (strstr(name, "SP_FS_OBJ")) {
458          ext = "fo3";
459       } else if (strstr(name, "SP_GS_OBJ")) {
460          ext = "go3";
461       } else if (strstr(name, "SP_CS_OBJ")) {
462          ext = "co3";
463       } else {
464          ext = NULL;
465       }
466 
467       if (ext)
468          dump_shader(ext, buf, sizedwords * 4);
469    }
470 }
471 
472 static void
reg_disasm_gpuaddr(const char * name,uint32_t dword,int level)473 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
474 {
475    disasm_gpuaddr(name, dword, level);
476 }
477 
478 static void
reg_disasm_gpuaddr_hi(const char * name,uint32_t dword,int level)479 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
480 {
481    disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
482 }
483 
484 static void
reg_disasm_gpuaddr64(const char * name,uint64_t qword,int level)485 reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
486 {
487    disasm_gpuaddr(name, qword, level);
488 }
489 
490 /* Find the value of the TEX_COUNT register that corresponds to the named
491  * TEX_SAMP/TEX_CONST reg.
492  *
493  * Note, this kinda assumes an equal # of samplers and textures, but not
494  * really sure if there is a much better option.  I suppose on a6xx we
495  * could instead decode the bitfields in SP_xS_CONFIG
496  */
497 static int
get_tex_count(const char * name)498 get_tex_count(const char *name)
499 {
500    char count_reg[strlen(name) + 5];
501    char *p;
502 
503    p = strstr(name, "CONST");
504    if (!p)
505       p = strstr(name, "SAMP");
506    if (!p)
507       return 0;
508 
509    int n = p - name;
510    strncpy(count_reg, name, n);
511    strcpy(count_reg + n, "COUNT");
512 
513    return reg_val(regbase(count_reg));
514 }
515 
516 static void
reg_dump_tex_samp_hi(const char * name,uint32_t dword,int level)517 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
518 {
519    if (!in_summary)
520       return;
521 
522    int num_unit = get_tex_count(name);
523    uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
524    void *buf = hostptr(gpuaddr);
525 
526    if (!buf)
527       return;
528 
529    dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
530 }
531 
532 static void
reg_dump_tex_const_hi(const char * name,uint32_t dword,int level)533 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
534 {
535    if (!in_summary)
536       return;
537 
538    int num_unit = get_tex_count(name);
539    uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
540    void *buf = hostptr(gpuaddr);
541 
542    if (!buf)
543       return;
544 
545    dump_tex_const(buf, num_unit, level + 1);
546 }
547 
548 /*
549  * Registers with special handling (rnndec_decode() handles rest):
550  */
551 #define REG(x, fxn)    { #x, fxn }
552 #define REG64(x, fxn)  { #x, .fxn64 = fxn, .is_reg64 = true }
553 static struct {
554    const char *regname;
555    void (*fxn)(const char *name, uint32_t dword, int level);
556    void (*fxn64)(const char *name, uint64_t qword, int level);
557    uint32_t regbase;
558    bool is_reg64;
559 } reg_a2xx[] = {
560       REG(CP_SCRATCH_REG0, reg_dump_scratch),
561       REG(CP_SCRATCH_REG1, reg_dump_scratch),
562       REG(CP_SCRATCH_REG2, reg_dump_scratch),
563       REG(CP_SCRATCH_REG3, reg_dump_scratch),
564       REG(CP_SCRATCH_REG4, reg_dump_scratch),
565       REG(CP_SCRATCH_REG5, reg_dump_scratch),
566       REG(CP_SCRATCH_REG6, reg_dump_scratch),
567       REG(CP_SCRATCH_REG7, reg_dump_scratch),
568       {NULL},
569 }, reg_a3xx[] = {
570       REG(CP_SCRATCH_REG0, reg_dump_scratch),
571       REG(CP_SCRATCH_REG1, reg_dump_scratch),
572       REG(CP_SCRATCH_REG2, reg_dump_scratch),
573       REG(CP_SCRATCH_REG3, reg_dump_scratch),
574       REG(CP_SCRATCH_REG4, reg_dump_scratch),
575       REG(CP_SCRATCH_REG5, reg_dump_scratch),
576       REG(CP_SCRATCH_REG6, reg_dump_scratch),
577       REG(CP_SCRATCH_REG7, reg_dump_scratch),
578       REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
579       REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
580       REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
581       REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
582       REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
583       REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
584       {NULL},
585 }, reg_a4xx[] = {
586       REG(CP_SCRATCH[0].REG, reg_dump_scratch),
587       REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
588       REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
589       REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
590       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
591       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
592       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
593       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
594       REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
595       REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
596       REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
597       REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
598       REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
599       REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
600       REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
601       REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
602       REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
603       REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
604       REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
605       REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
606       REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
607       REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
608       REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
609       REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
610       REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
611       {NULL},
612 }, reg_a5xx[] = {
613       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
614       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
615       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
616       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
617       REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
618       REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
619       REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
620       REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
621       REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
622       REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
623       REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
624       REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
625       REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
626       REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
627       REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
628       REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
629       REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
630       REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
631       REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
632       REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
633       REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
634       REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
635       REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
636       REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
637       REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
638       REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
639       REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
640       REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
641       REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
642       REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
643       REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
644       REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
645       REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
646       REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
647       REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
648       REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
649       REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
650       REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
651       REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
652       REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
653       REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
654       REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
655 //      REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
656 //      REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
657 //      REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
658 //      REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
659 //      REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
660 //      REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
661 //      REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
662 //      REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
663 //      REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
664 //      REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
665 //      REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
666 //      REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
667 //      REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
668 //      REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
669 //      REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
670 //      REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
671 //      REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
672 //      REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
673 //      REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
674 //      REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
675 //      REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
676 //      REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
677 //      REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
678 //      REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
679 //      REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
680 //      REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
681 
682 //      REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
683 //      REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
684 //      REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
685 //      REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
686 //      REG(RB_2D_DST_LO, reg_gpuaddr_lo),
687 //      REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
688 //      REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
689 //      REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
690 
691       {NULL},
692 }, reg_a6xx[] = {
693       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
694       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
695       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
696       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
697 
698       REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
699       REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
700       REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
701       REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
702       REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
703       REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
704 
705       REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
706       REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
707       REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
708       REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
709       REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
710       REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
711       REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
712       REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
713       REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
714       REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
715       REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
716       REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
717 
718       {NULL},
719 }, reg_a7xx[] = {
720       REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
721       REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
722       REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
723       REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
724       REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
725       REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
726 
727       {NULL},
728 }, *type0_reg;
729 
730 static struct rnn *rnn;
731 
732 static void
init_rnn(const char * gpuname)733 init_rnn(const char *gpuname)
734 {
735    rnn = rnn_new(!options->color);
736 
737    rnn_load(rnn, gpuname);
738 
739    if (options->querystrs) {
740       int i;
741       queryvals = calloc(options->nquery, sizeof(queryvals[0]));
742 
743       for (i = 0; i < options->nquery; i++) {
744          int val = strtol(options->querystrs[i], NULL, 0);
745 
746          if (val == 0)
747             val = regbase(options->querystrs[i]);
748 
749          queryvals[i] = val;
750          printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
751       }
752    }
753 
754    for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
755       type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
756       if (!type0_reg[idx].regbase) {
757          printf("invalid register name: %s\n", type0_reg[idx].regname);
758          exit(1);
759       }
760    }
761 }
762 
763 void
reset_regs(void)764 reset_regs(void)
765 {
766    clear_written();
767    clear_lastvals();
768    memset(&ibs, 0, sizeof(ibs));
769 }
770 
771 void
cffdec_init(const struct cffdec_options * _options)772 cffdec_init(const struct cffdec_options *_options)
773 {
774    options = _options;
775    summary = options->summary;
776 
777    /* in case we're decoding multiple files: */
778    free(queryvals);
779    reset_regs();
780    draw_count = 0;
781 
782    if (!options->info)
783       return;
784 
785    switch (options->info->chip) {
786    case 2:
787       type0_reg = reg_a2xx;
788       init_rnn("a2xx");
789       break;
790    case 3:
791       type0_reg = reg_a3xx;
792       init_rnn("a3xx");
793       break;
794    case 4:
795       type0_reg = reg_a4xx;
796       init_rnn("a4xx");
797       break;
798    case 5:
799       type0_reg = reg_a5xx;
800       init_rnn("a5xx");
801       break;
802    case 6:
803       type0_reg = reg_a6xx;
804       init_rnn("a6xx");
805       break;
806    case 7:
807       type0_reg = reg_a7xx;
808       init_rnn("a7xx");
809       break;
810    default:
811       errx(-1, "unsupported generation: %u", options->info->chip);
812    }
813 }
814 
815 const char *
pktname(unsigned opc)816 pktname(unsigned opc)
817 {
818    return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
819 }
820 
821 const char *
regname(uint32_t regbase,int color)822 regname(uint32_t regbase, int color)
823 {
824    return rnn_regname(rnn, regbase, color);
825 }
826 
827 uint32_t
regbase(const char * name)828 regbase(const char *name)
829 {
830    return rnn_regbase(rnn, name);
831 }
832 
833 static int
endswith(uint32_t regbase,const char * suffix)834 endswith(uint32_t regbase, const char *suffix)
835 {
836    const char *name = regname(regbase, 0);
837    const char *s = strstr(name, suffix);
838    if (!s)
839       return 0;
840    return (s - strlen(name) + strlen(suffix)) == name;
841 }
842 
843 struct regacc
regacc(struct rnn * r)844 regacc(struct rnn *r)
845 {
846    if (!r)
847       r = rnn;
848 
849    return (struct regacc){ .rnn = r };
850 }
851 
852 /* returns true if the complete reg value has been accumulated: */
853 bool
regacc_push(struct regacc * r,uint32_t regbase,uint32_t dword)854 regacc_push(struct regacc *r, uint32_t regbase, uint32_t dword)
855 {
856    if (r->has_dword_lo) {
857       /* Work around kernel devcore dumps which accidentially miss half of a 64b reg
858        * see: https://patchwork.freedesktop.org/series/112302/
859        */
860       if (regbase != r->regbase + 1) {
861          printf("WARNING: 64b discontinuity (%x, expected %x)\n", regbase, r->regbase + 1);
862          r->has_dword_lo = false;
863          return true;
864       }
865 
866       r->value |= ((uint64_t)dword) << 32;
867       r->has_dword_lo = false;
868 
869       return true;
870    }
871 
872    r->regbase = regbase;
873    r->value = dword;
874 
875    struct rnndecaddrinfo *info = rnn_reginfo(r->rnn, regbase);
876    r->has_dword_lo = (info->width == 64);
877 
878    /* Workaround for kernel devcore dump bugs: */
879    if ((info->width == 64) && endswith(regbase, "_HI")) {
880       printf("WARNING: 64b discontinuity (no _LO dword for %x)\n", regbase);
881       r->has_dword_lo = false;
882    }
883 
884    rnn_reginfo_free(info);
885 
886    return !r->has_dword_lo;
887 }
888 
889 void
dump_register_val(struct regacc * r,int level)890 dump_register_val(struct regacc *r, int level)
891 {
892    struct rnndecaddrinfo *info = rnn_reginfo(rnn, r->regbase);
893 
894    if (info && info->typeinfo) {
895       uint64_t gpuaddr = 0;
896       char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, r->value);
897       printf("%s%s: %s", levels[level], info->name, decoded);
898 
899       /* Try and figure out if we are looking at a gpuaddr.. this
900        * might be useful for other gen's too, but at least a5xx has
901        * the _HI/_LO suffix we can look for.  Maybe a better approach
902        * would be some special annotation in the xml..
903        * for a6xx use "address" and "waddress" types
904        */
905       if (options->info->chip >= 6) {
906          if (!strcmp(info->typeinfo->name, "address") ||
907              !strcmp(info->typeinfo->name, "waddress")) {
908             gpuaddr = r->value;
909          }
910       } else if (options->info->chip >= 5) {
911          /* TODO we shouldn't rely on reg_val() since reg_set() might
912           * not have been called yet for the other half of the 64b reg.
913           * We can remove this hack once a5xx.xml is converted to reg64
914           * and address/waddess.
915           */
916          if (endswith(r->regbase, "_HI") && endswith(r->regbase - 1, "_LO")) {
917             gpuaddr = (r->value << 32) | reg_val(r->regbase - 1);
918          } else if (endswith(r->regbase, "_LO") && endswith(r->regbase + 1, "_HI")) {
919             gpuaddr = (((uint64_t)reg_val(r->regbase + 1)) << 32) | r->value;
920          }
921       }
922 
923       if (gpuaddr && hostptr(gpuaddr)) {
924          printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
925                 gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
926                 hostlen(gpubaseaddr(gpuaddr)));
927       }
928 
929       printf("\n");
930 
931       free(decoded);
932    } else if (info) {
933       printf("%s%s: %08"PRIx64"\n", levels[level], info->name, r->value);
934    } else {
935       printf("%s<%04x>: %08"PRIx64"\n", levels[level], r->regbase, r->value);
936    }
937 
938    rnn_reginfo_free(info);
939 }
940 
941 static void
dump_register(struct regacc * r,int level)942 dump_register(struct regacc *r, int level)
943 {
944    if (!quiet(3)) {
945       dump_register_val(r, level);
946    }
947 
948    for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
949       if (type0_reg[idx].regbase == r->regbase) {
950          if (type0_reg[idx].is_reg64) {
951             type0_reg[idx].fxn64(type0_reg[idx].regname, r->value, level);
952          } else {
953             type0_reg[idx].fxn(type0_reg[idx].regname, (uint32_t)r->value, level);
954          }
955          break;
956       }
957    }
958 }
959 
960 static bool
is_banked_reg(uint32_t regbase)961 is_banked_reg(uint32_t regbase)
962 {
963    return (0x2000 <= regbase) && (regbase < 0x2400);
964 }
965 
966 static void
dump_registers(uint32_t regbase,uint32_t * dwords,uint32_t sizedwords,int level)967 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
968                int level)
969 {
970    struct regacc r = regacc(NULL);
971 
972    while (sizedwords--) {
973       int last_summary = summary;
974 
975       /* access to non-banked registers needs a WFI:
976        * TODO banked register range for a2xx??
977        */
978       if (needs_wfi && !is_banked_reg(regbase))
979          printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
980 
981       reg_set(regbase, *dwords);
982       if (regacc_push(&r, regbase, *dwords))
983          dump_register(&r, level);
984       regbase++;
985       dwords++;
986       summary = last_summary;
987    }
988 }
989 
990 static void
dump_domain(uint32_t * dwords,uint32_t sizedwords,int level,const char * name)991 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
992 {
993    struct rnndomain *dom;
994    int i;
995 
996    dom = rnn_finddomain(rnn->db, name);
997 
998    if (!dom)
999       return;
1000 
1001    if (script_packet)
1002       script_packet(dwords, sizedwords, rnn, dom);
1003 
1004    if (quiet(2))
1005       return;
1006 
1007    for (i = 0; i < sizedwords; i++) {
1008       struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
1009       char *decoded;
1010       if (!(info && info->typeinfo))
1011          break;
1012       uint64_t value = dwords[i];
1013       if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
1014          value |= (uint64_t)dwords[i + 1] << 32;
1015          i++; /* skip the next dword since we're printing it now */
1016       }
1017       decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
1018       /* Unlike the register printing path, we don't print the name
1019        * of the register, so if it doesn't contain other named
1020        * things (i.e. it isn't a bitset) then print the register
1021        * name as if it's a bitset with a single entry. This avoids
1022        * having to create a dummy register with a single entry to
1023        * get a name in the decoding.
1024        */
1025       if (info->typeinfo->type == RNN_TTYPE_BITSET ||
1026           info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
1027          printf("%s%s\n", levels[level], decoded);
1028       } else {
1029          printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
1030                 info->name, rnn->vc->colors->reset, decoded);
1031       }
1032       free(decoded);
1033       free(info->name);
1034       free(info);
1035    }
1036 }
1037 
1038 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
1039 static unsigned mode;
1040 static const char *render_mode;
1041 static const char *thread;
1042 static enum {
1043    MODE_BINNING = 0x1,
1044    MODE_GMEM = 0x2,
1045    MODE_BYPASS = 0x4,
1046    MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
1047 } enable_mask = MODE_ALL;
1048 static bool skip_ib2_enable_global;
1049 static bool skip_ib2_enable_local;
1050 
1051 static void
print_mode(int level)1052 print_mode(int level)
1053 {
1054    if ((options->info->chip >= 5) && !quiet(2)) {
1055       printf("%smode: %s", levels[level], render_mode);
1056       if (thread)
1057          printf(":%s", thread);
1058       printf("\n");
1059       printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
1060              skip_ib2_enable_local);
1061    }
1062 }
1063 
1064 static bool
skip_query(void)1065 skip_query(void)
1066 {
1067    switch (options->query_mode) {
1068    case QUERY_ALL:
1069       /* never skip: */
1070       return false;
1071    case QUERY_WRITTEN:
1072       for (int i = 0; i < options->nquery; i++) {
1073          uint32_t regbase = queryvals[i];
1074          if (!reg_written(regbase)) {
1075             continue;
1076          }
1077          if (reg_rewritten(regbase)) {
1078             return false;
1079          }
1080       }
1081       return true;
1082    case QUERY_DELTA:
1083       for (int i = 0; i < options->nquery; i++) {
1084          uint32_t regbase = queryvals[i];
1085          if (!reg_written(regbase)) {
1086             continue;
1087          }
1088          uint32_t lastval = reg_val(regbase);
1089          if (lastval != lastvals[regbase]) {
1090             return false;
1091          }
1092       }
1093       return true;
1094    }
1095    return true;
1096 }
1097 
1098 static void
__do_query(const char * primtype,uint32_t num_indices)1099 __do_query(const char *primtype, uint32_t num_indices)
1100 {
1101    int n = 0;
1102 
1103    if ((5 <= options->info->chip) && (options->info->chip < 7)) {
1104       uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1105       uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1106 
1107       bin_x1 = scissor_tl & 0xffff;
1108       bin_y1 = scissor_tl >> 16;
1109       bin_x2 = scissor_br & 0xffff;
1110       bin_y2 = scissor_br >> 16;
1111    }
1112 
1113    for (int i = 0; i < options->nquery; i++) {
1114       uint32_t regbase = queryvals[i];
1115       if (!reg_written(regbase))
1116          continue;
1117 
1118       struct regacc r = regacc(NULL);
1119 
1120       /* 64b regs require two successive 32b dwords: */
1121       for (int d = 0; d < 2; d++)
1122          if (regacc_push(&r, regbase + d, reg_val(regbase + d)))
1123             break;
1124 
1125       printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1126              bin_y1, bin_x2, bin_y2, num_indices);
1127       if (options->info->chip >= 5)
1128          printf("%s:", render_mode);
1129       if (thread)
1130          printf("%s:", thread);
1131       printf("\t%08"PRIx64, r.value);
1132       if (r.value != lastvals[regbase]) {
1133          printf("!");
1134       } else {
1135          printf(" ");
1136       }
1137       if (reg_rewritten(regbase)) {
1138          printf("+");
1139       } else {
1140          printf(" ");
1141       }
1142       dump_register_val(&r, 0);
1143       n++;
1144    }
1145 
1146    if (n > 1)
1147       printf("\n");
1148 }
1149 
1150 static void
do_query_compare(const char * primtype,uint32_t num_indices)1151 do_query_compare(const char *primtype, uint32_t num_indices)
1152 {
1153    unsigned saved_enable_mask = enable_mask;
1154    const char *saved_render_mode = render_mode;
1155 
1156    /* in 'query-compare' mode, we want to see if the register is writtten
1157     * or changed in any mode:
1158     *
1159     * (NOTE: this could cause false-positive for 'query-delta' if the reg
1160     * is written with different values in binning vs sysmem/gmem mode, as
1161     * we don't track previous values per-mode, but I think we can live with
1162     * that)
1163     */
1164    enable_mask = MODE_ALL;
1165 
1166    clear_rewritten();
1167    load_all_groups(0);
1168 
1169    if (!skip_query()) {
1170       /* dump binning pass values: */
1171       enable_mask = MODE_BINNING;
1172       render_mode = "BINNING";
1173       clear_rewritten();
1174       load_all_groups(0);
1175       __do_query(primtype, num_indices);
1176 
1177       /* dump draw pass values: */
1178       enable_mask = MODE_GMEM | MODE_BYPASS;
1179       render_mode = "DRAW";
1180       clear_rewritten();
1181       load_all_groups(0);
1182       __do_query(primtype, num_indices);
1183 
1184       printf("\n");
1185    }
1186 
1187    enable_mask = saved_enable_mask;
1188    render_mode = saved_render_mode;
1189 
1190    disable_all_groups();
1191 }
1192 
1193 /* well, actually query and script..
1194  * NOTE: call this before dump_register_summary()
1195  */
1196 static void
do_query(const char * primtype,uint32_t num_indices)1197 do_query(const char *primtype, uint32_t num_indices)
1198 {
1199    if (script_draw)
1200       script_draw(primtype, num_indices);
1201 
1202    if (options->query_compare) {
1203       do_query_compare(primtype, num_indices);
1204       return;
1205    }
1206 
1207    if (skip_query())
1208       return;
1209 
1210    __do_query(primtype, num_indices);
1211 }
1212 
1213 static void
cp_im_loadi(uint32_t * dwords,uint32_t sizedwords,int level)1214 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1215 {
1216    uint32_t start = dwords[1] >> 16;
1217    uint32_t size = dwords[1] & 0xffff;
1218    const char *type = NULL, *ext = NULL;
1219    gl_shader_stage disasm_type;
1220 
1221    switch (dwords[0]) {
1222    case 0:
1223       type = "vertex";
1224       ext = "vo";
1225       disasm_type = MESA_SHADER_VERTEX;
1226       break;
1227    case 1:
1228       type = "fragment";
1229       ext = "fo";
1230       disasm_type = MESA_SHADER_FRAGMENT;
1231       break;
1232    default:
1233       type = "<unknown>";
1234       disasm_type = 0;
1235       break;
1236    }
1237 
1238    printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1239           size);
1240    disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1241 
1242    /* dump raw shader: */
1243    if (ext)
1244       dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1245 }
1246 
1247 static void
cp_wide_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)1248 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1249 {
1250    uint32_t reg = dwords[0] & 0xffff;
1251    struct regacc r = regacc(NULL);
1252    for (int i = 1; i < sizedwords; i++) {
1253       if (regacc_push(&r, reg, dwords[i]))
1254          dump_register(&r, level + 1);
1255       reg_set(reg, dwords[i]);
1256       reg++;
1257    }
1258 }
1259 
1260 enum state_t {
1261    TEX_SAMP = 1,
1262    TEX_CONST,
1263    TEX_MIPADDR, /* a3xx only */
1264    SHADER_PROG,
1265    SHADER_CONST,
1266 
1267    // image/ssbo state:
1268    SSBO_0,
1269    SSBO_1,
1270    SSBO_2,
1271 
1272    UBO,
1273 
1274    // unknown things, just to hexdumps:
1275    UNKNOWN_DWORDS,
1276    UNKNOWN_2DWORDS,
1277    UNKNOWN_4DWORDS,
1278 };
1279 
1280 enum adreno_state_block {
1281    SB_VERT_TEX = 0,
1282    SB_VERT_MIPADDR = 1,
1283    SB_FRAG_TEX = 2,
1284    SB_FRAG_MIPADDR = 3,
1285    SB_VERT_SHADER = 4,
1286    SB_GEOM_SHADER = 5,
1287    SB_FRAG_SHADER = 6,
1288    SB_COMPUTE_SHADER = 7,
1289 };
1290 
1291 /* TODO there is probably a clever way to let rnndec parse things so
1292  * we don't have to care about packet format differences across gens
1293  */
1294 
1295 static void
a3xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1296 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1297                     enum state_t *state, enum state_src_t *src)
1298 {
1299    unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1300    unsigned state_type = dwords[1] & 0x3;
1301    static const struct {
1302       gl_shader_stage stage;
1303       enum state_t state;
1304    } lookup[0xf][0x3] = {
1305       [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1306       [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1307       [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1308       [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1309       [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1310       [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1311       [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1312       [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1313    };
1314 
1315    *stage = lookup[state_block_id][state_type].stage;
1316    *state = lookup[state_block_id][state_type].state;
1317    unsigned state_src = (dwords[0] >> 16) & 0x7;
1318    if (state_src == 0 /* SS_DIRECT */)
1319       *src = STATE_SRC_DIRECT;
1320    else
1321       *src = STATE_SRC_INDIRECT;
1322 }
1323 
1324 static enum state_src_t
_get_state_src(unsigned dword0)1325 _get_state_src(unsigned dword0)
1326 {
1327    switch ((dword0 >> 16) & 0x3) {
1328    case 0: /* SS4_DIRECT / SS6_DIRECT */
1329       return STATE_SRC_DIRECT;
1330    case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1331       return STATE_SRC_INDIRECT;
1332    case 1: /* SS6_BINDLESS */
1333       return STATE_SRC_BINDLESS;
1334    default:
1335       return STATE_SRC_DIRECT;
1336    }
1337 }
1338 
1339 static void
_get_state_type(unsigned state_block_id,unsigned state_type,gl_shader_stage * stage,enum state_t * state)1340 _get_state_type(unsigned state_block_id, unsigned state_type,
1341                 gl_shader_stage *stage, enum state_t *state)
1342 {
1343    static const struct {
1344       gl_shader_stage stage;
1345       enum state_t state;
1346    } lookup[0x10][0x4] = {
1347       // SB4_VS_TEX:
1348       [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1349       [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1350       [0x0][2] = {MESA_SHADER_VERTEX, UBO},
1351       // SB4_HS_TEX:
1352       [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1353       [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1354       [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1355       // SB4_DS_TEX:
1356       [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1357       [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1358       [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1359       // SB4_GS_TEX:
1360       [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1361       [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1362       [0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1363       // SB4_FS_TEX:
1364       [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1365       [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1366       [0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1367       // SB4_CS_TEX:
1368       [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1369       [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1370       [0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1371       // SB4_VS_SHADER:
1372       [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1373       [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1374       [0x8][2] = {MESA_SHADER_VERTEX, UBO},
1375       // SB4_HS_SHADER
1376       [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1377       [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1378       [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1379       // SB4_DS_SHADER
1380       [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1381       [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1382       [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1383       // SB4_GS_SHADER
1384       [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1385       [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1386       [0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1387       // SB4_FS_SHADER:
1388       [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1389       [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1390       [0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1391       // SB4_CS_SHADER:
1392       [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1393       [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1394       [0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1395       [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1396       // SB4_SSBO (shared across all stages)
1397       [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1398       [0xe][1] = {0, SSBO_1},
1399       [0xe][2] = {0, SSBO_2},
1400       // SB4_CS_SSBO
1401       [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1402       [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1403       [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1404       // unknown things
1405       /* This looks like combined UBO state for 3d stages (a5xx and
1406        * before??  I think a6xx has UBO state per shader stage:
1407        */
1408       [0x6][2] = {0, UBO},
1409       [0x7][1] = {0, UNKNOWN_2DWORDS},
1410    };
1411 
1412    *stage = lookup[state_block_id][state_type].stage;
1413    *state = lookup[state_block_id][state_type].state;
1414 }
1415 
1416 static void
a4xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1417 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1418                     enum state_t *state, enum state_src_t *src)
1419 {
1420    unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1421    unsigned state_type = dwords[1] & 0x3;
1422    _get_state_type(state_block_id, state_type, stage, state);
1423    *src = _get_state_src(dwords[0]);
1424 }
1425 
1426 static void
a6xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1427 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1428                     enum state_t *state, enum state_src_t *src)
1429 {
1430    unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1431    unsigned state_type = (dwords[0] >> 14) & 0x3;
1432    _get_state_type(state_block_id, state_type, stage, state);
1433    *src = _get_state_src(dwords[0]);
1434 }
1435 
1436 static void
dump_tex_samp(uint32_t * texsamp,enum state_src_t src,int num_unit,int level)1437 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1438 {
1439    for (int i = 0; i < num_unit; i++) {
1440       /* work-around to reduce noise for opencl blob which always
1441        * writes the max # regardless of # of textures used
1442        */
1443       if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1444          break;
1445 
1446       if (options->info->chip == 3) {
1447          dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1448          dump_hex(texsamp, 2, level + 1);
1449          texsamp += 2;
1450       } else if (options->info->chip == 4) {
1451          dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1452          dump_hex(texsamp, 2, level + 1);
1453          texsamp += 2;
1454       } else if (options->info->chip == 5) {
1455          dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1456          dump_hex(texsamp, 4, level + 1);
1457          texsamp += 4;
1458       } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1459          dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1460          dump_hex(texsamp, 4, level + 1);
1461          texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1462       }
1463    }
1464 }
1465 
1466 static void
dump_tex_const(uint32_t * texconst,int num_unit,int level)1467 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1468 {
1469    for (int i = 0; i < num_unit; i++) {
1470       /* work-around to reduce noise for opencl blob which always
1471        * writes the max # regardless of # of textures used
1472        */
1473       if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1474           (texconst[2] == 0) && (texconst[3] == 0))
1475          break;
1476 
1477       if (options->info->chip == 3) {
1478          dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1479          dump_hex(texconst, 4, level + 1);
1480          texconst += 4;
1481       } else if (options->info->chip == 4) {
1482          dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1483          if (options->dump_textures) {
1484             uint32_t addr = texconst[4] & ~0x1f;
1485             dump_gpuaddr(addr, level - 2);
1486          }
1487          dump_hex(texconst, 8, level + 1);
1488          texconst += 8;
1489       } else if (options->info->chip == 5) {
1490          dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1491          if (options->dump_textures) {
1492             uint64_t addr =
1493                (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1494             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1495          }
1496          dump_hex(texconst, 12, level + 1);
1497          texconst += 12;
1498       } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1499          dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1500          if (options->dump_textures) {
1501             uint64_t addr =
1502                (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1503             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1504          }
1505          dump_hex(texconst, 16, level + 1);
1506          texconst += 16;
1507       }
1508    }
1509 }
1510 
1511 static void
cp_load_state(uint32_t * dwords,uint32_t sizedwords,int level)1512 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1513 {
1514    gl_shader_stage stage;
1515    enum state_t state;
1516    enum state_src_t src;
1517    uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1518    uint64_t ext_src_addr;
1519    void *contents;
1520    int i;
1521 
1522    if (quiet(2) && !options->script)
1523       return;
1524 
1525    if (options->info->chip >= 6)
1526       a6xx_get_state_type(dwords, &stage, &state, &src);
1527    else if (options->info->chip >= 4)
1528       a4xx_get_state_type(dwords, &stage, &state, &src);
1529    else
1530       a3xx_get_state_type(dwords, &stage, &state, &src);
1531 
1532    switch (src) {
1533    case STATE_SRC_DIRECT:
1534       ext_src_addr = 0;
1535       break;
1536    case STATE_SRC_INDIRECT:
1537       if (is_64b()) {
1538          ext_src_addr = dwords[1] & 0xfffffffc;
1539          ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1540       } else {
1541          ext_src_addr = dwords[1] & 0xfffffffc;
1542       }
1543 
1544       break;
1545    case STATE_SRC_BINDLESS: {
1546       const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1547                                    ? regbase("HLSQ_CS_BINDLESS_BASE[0].DESCRIPTOR")
1548                                    : regbase("HLSQ_BINDLESS_BASE[0].DESCRIPTOR");
1549 
1550       if (is_64b()) {
1551          const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1552          ext_src_addr = reg_val(reg) & 0xfffffffc;
1553          ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1554       } else {
1555          const unsigned reg = base_reg + (dwords[1] >> 28);
1556          ext_src_addr = reg_val(reg) & 0xfffffffc;
1557       }
1558 
1559       ext_src_addr += 4 * (dwords[1] & 0xffffff);
1560       break;
1561    }
1562    }
1563 
1564    if (ext_src_addr)
1565       contents = hostptr(ext_src_addr);
1566    else
1567       contents = is_64b() ? dwords + 3 : dwords + 2;
1568 
1569    if (!contents)
1570       return;
1571 
1572    switch (state) {
1573    case SHADER_PROG: {
1574       const char *ext = NULL;
1575 
1576       if (quiet(2))
1577          return;
1578 
1579       if (options->info->chip >= 4)
1580          num_unit *= 16;
1581       else if (options->info->chip >= 3)
1582          num_unit *= 4;
1583 
1584       /* shaders:
1585        *
1586        * note: num_unit seems to be # of instruction groups, where
1587        * an instruction group has 4 64bit instructions.
1588        */
1589       if (stage == MESA_SHADER_VERTEX) {
1590          ext = "vo3";
1591       } else if (stage == MESA_SHADER_GEOMETRY) {
1592          ext = "go3";
1593       } else if (stage == MESA_SHADER_COMPUTE) {
1594          ext = "co3";
1595       } else if (stage == MESA_SHADER_FRAGMENT) {
1596          ext = "fo3";
1597       }
1598 
1599       if (contents)
1600          try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1601                          options->info->chip * 100);
1602 
1603       /* dump raw shader: */
1604       if (ext)
1605          dump_shader(ext, contents, num_unit * 2 * 4);
1606 
1607       break;
1608    }
1609    case SHADER_CONST: {
1610       if (quiet(2))
1611          return;
1612 
1613       /* uniforms/consts:
1614        *
1615        * note: num_unit seems to be # of pairs of dwords??
1616        */
1617 
1618       if (options->info->chip >= 4)
1619          num_unit *= 2;
1620 
1621       dump_float(contents, num_unit * 2, level + 1);
1622       dump_hex(contents, num_unit * 2, level + 1);
1623 
1624       break;
1625    }
1626    case TEX_MIPADDR: {
1627       uint32_t *addrs = contents;
1628 
1629       if (quiet(2))
1630          return;
1631 
1632       /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1633       for (i = 0; i < num_unit; i++) {
1634          void *ptr = hostptr(addrs[i]);
1635          printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1636          if (options->dump_textures) {
1637             printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1638             dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1639          }
1640       }
1641       break;
1642    }
1643    case TEX_SAMP: {
1644       dump_tex_samp(contents, src, num_unit, level);
1645       break;
1646    }
1647    case TEX_CONST: {
1648       dump_tex_const(contents, num_unit, level);
1649       break;
1650    }
1651    case SSBO_0: {
1652       uint32_t *ssboconst = (uint32_t *)contents;
1653 
1654       for (i = 0; i < num_unit; i++) {
1655          int sz = 4;
1656          if (options->info->chip == 4) {
1657             dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1658          } else if (options->info->chip == 5) {
1659             dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1660          } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1661             sz = 16;
1662             dump_domain(ssboconst, 16, level + 2, "A6XX_TEX_CONST");
1663          }
1664          dump_hex(ssboconst, sz, level + 1);
1665          ssboconst += sz;
1666       }
1667       break;
1668    }
1669    case SSBO_1: {
1670       uint32_t *ssboconst = (uint32_t *)contents;
1671 
1672       for (i = 0; i < num_unit; i++) {
1673          if (options->info->chip == 4)
1674             dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1675          else if (options->info->chip == 5)
1676             dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1677          dump_hex(ssboconst, 2, level + 1);
1678          ssboconst += 2;
1679       }
1680       break;
1681    }
1682    case SSBO_2: {
1683       uint32_t *ssboconst = (uint32_t *)contents;
1684 
1685       for (i = 0; i < num_unit; i++) {
1686          /* TODO a4xx and a5xx might be same: */
1687          if (options->info->chip == 5) {
1688             dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1689             dump_hex(ssboconst, 2, level + 1);
1690          }
1691          if (options->dump_textures) {
1692             uint64_t addr =
1693                (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1694             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1695          }
1696          ssboconst += 2;
1697       }
1698       break;
1699    }
1700    case UBO: {
1701       uint32_t *uboconst = (uint32_t *)contents;
1702 
1703       for (i = 0; i < num_unit; i++) {
1704          // TODO probably similar on a4xx..
1705          if (options->info->chip == 5)
1706             dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1707          else if (options->info->chip == 6)
1708             dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1709          dump_hex(uboconst, 2, level + 1);
1710          uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1711       }
1712       break;
1713    }
1714    case UNKNOWN_DWORDS: {
1715       if (quiet(2))
1716          return;
1717       dump_hex(contents, num_unit, level + 1);
1718       break;
1719    }
1720    case UNKNOWN_2DWORDS: {
1721       if (quiet(2))
1722          return;
1723       dump_hex(contents, num_unit * 2, level + 1);
1724       break;
1725    }
1726    case UNKNOWN_4DWORDS: {
1727       if (quiet(2))
1728          return;
1729       dump_hex(contents, num_unit * 4, level + 1);
1730       break;
1731    }
1732    default:
1733       if (quiet(2))
1734          return;
1735       /* hmm.. */
1736       dump_hex(contents, num_unit, level + 1);
1737       break;
1738    }
1739 }
1740 
1741 static void
cp_set_bin(uint32_t * dwords,uint32_t sizedwords,int level)1742 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1743 {
1744    bin_x1 = dwords[1] & 0xffff;
1745    bin_y1 = dwords[1] >> 16;
1746    bin_x2 = dwords[2] & 0xffff;
1747    bin_y2 = dwords[2] >> 16;
1748 }
1749 
1750 static void
dump_a2xx_tex_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1751 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1752                     int level)
1753 {
1754    uint32_t w, h, p;
1755    uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1756    uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1757    static const char *filter[] = {
1758       "point",
1759       "bilinear",
1760       "bicubic",
1761    };
1762    static const char *clamp[] = {
1763       "wrap",
1764       "mirror",
1765       "clamp-last-texel",
1766    };
1767    static const char swiznames[] = "xyzw01??";
1768 
1769    /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1770 
1771    /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1772     * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1773     */
1774    p = (dwords[0] >> 22) << 5;
1775    clamp_x = (dwords[0] >> 10) & 0x3;
1776    clamp_y = (dwords[0] >> 13) & 0x3;
1777    clamp_z = (dwords[0] >> 16) & 0x3;
1778 
1779    /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1780     * NearestClamp=1:OGL Mode
1781     */
1782    parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1783 
1784    /* Width, Height, EndianSwap=0:None */
1785    w = (dwords[2] & 0x1fff) + 1;
1786    h = ((dwords[2] >> 13) & 0x1fff) + 1;
1787 
1788    /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1789     * Mip=2:BaseMap
1790     */
1791    mag = (dwords[3] >> 19) & 0x3;
1792    min = (dwords[3] >> 21) & 0x3;
1793    swiz = (dwords[3] >> 1) & 0xfff;
1794 
1795    /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1796     * Dim3d=0
1797     */
1798    // XXX
1799 
1800    /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1801     * Dim=1:2d, MipPacking=0
1802     */
1803    parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1804 
1805    printf("%sset texture const %04x\n", levels[level], val);
1806    printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1807           clamp[clamp_y], clamp[clamp_z]);
1808    printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1809           filter[mag]);
1810    printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1811           swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1812           swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1813    printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1814           levels[level + 1], gpuaddr, flags, w, h, p,
1815           rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1816    printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1817           mip_flags);
1818 }
1819 
1820 static void
dump_a2xx_shader_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1821 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1822                        int level)
1823 {
1824    int i;
1825    printf("%sset shader const %04x\n", levels[level], val);
1826    for (i = 0; i < sizedwords;) {
1827       uint32_t gpuaddr, flags;
1828       parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1829       void *addr = hostptr(gpuaddr);
1830       if (addr) {
1831          const char *fmt =
1832             rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1833          uint32_t size = dwords[i++];
1834          printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1835                 size, fmt);
1836          // TODO maybe dump these as bytes instead of dwords?
1837          size = (size + 3) / 4; // for now convert to dwords
1838          dump_hex(addr, MIN2(size, 64), level + 1);
1839          if (size > MIN2(size, 64))
1840             printf("%s\t\t...\n", levels[level + 1]);
1841          dump_float(addr, MIN2(size, 64), level + 1);
1842          if (size > MIN2(size, 64))
1843             printf("%s\t\t...\n", levels[level + 1]);
1844       }
1845    }
1846 }
1847 
1848 static void
cp_set_const(uint32_t * dwords,uint32_t sizedwords,int level)1849 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1850 {
1851    uint32_t val = dwords[0] & 0xffff;
1852    switch ((dwords[0] >> 16) & 0xf) {
1853    case 0x0:
1854       dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1855       break;
1856    case 0x1:
1857       /* need to figure out how const space is partitioned between
1858        * attributes, textures, etc..
1859        */
1860       if (val < 0x78) {
1861          dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1862       } else {
1863          dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1864       }
1865       break;
1866    case 0x2:
1867       printf("%sset bool const %04x\n", levels[level], val);
1868       break;
1869    case 0x3:
1870       printf("%sset loop const %04x\n", levels[level], val);
1871       break;
1872    case 0x4:
1873       val += 0x2000;
1874       if (dwords[0] & 0x80000000) {
1875          uint32_t srcreg = dwords[1];
1876          uint32_t dstval = dwords[2];
1877 
1878          /* TODO: not sure what happens w/ payload != 2.. */
1879          assert(sizedwords == 3);
1880          assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1881 
1882          /* note: rnn_regname uses a static buf so we can't do
1883           * two regname() calls for one printf..
1884           */
1885          printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1886          printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1887 
1888          dstval += type0_reg_vals[srcreg];
1889 
1890          dump_registers(val, &dstval, 1, level + 1);
1891       } else {
1892          dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1893       }
1894       break;
1895    }
1896 }
1897 
1898 static void dump_register_summary(int level);
1899 
1900 static void
cp_event_write(uint32_t * dwords,uint32_t sizedwords,int level)1901 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1902 {
1903    const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1904    printl(2, "%sevent %s\n", levels[level], name);
1905 
1906    if (name && (options->info->chip > 5)) {
1907       char eventname[64];
1908       snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1909       if (!strcmp(name, "BLIT")) {
1910          do_query(eventname, 0);
1911          print_mode(level);
1912          dump_register_summary(level);
1913       }
1914    }
1915 }
1916 
1917 static void
dump_register_summary(int level)1918 dump_register_summary(int level)
1919 {
1920    uint32_t i;
1921    bool saved_summary = summary;
1922    summary = false;
1923 
1924    in_summary = true;
1925 
1926    struct regacc r = regacc(NULL);
1927 
1928    /* dump current state of registers: */
1929    printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1930 
1931    bool changed = false;
1932    bool written = false;
1933 
1934    for (i = 0; i < regcnt(); i++) {
1935       uint32_t regbase = i;
1936       uint32_t lastval = reg_val(regbase);
1937       /* skip registers that haven't been updated since last draw/blit: */
1938       if (!(options->allregs || reg_rewritten(regbase)))
1939          continue;
1940       if (!reg_written(regbase))
1941          continue;
1942       if (lastval != lastvals[regbase]) {
1943          changed |= true;
1944          lastvals[regbase] = lastval;
1945       }
1946       if (reg_rewritten(regbase)) {
1947          written |= true;
1948       }
1949       if (!quiet(2)) {
1950          if (regacc_push(&r, regbase, lastval)) {
1951             if (changed) {
1952                printl(2, "!");
1953             } else {
1954                printl(2, " ");
1955             }
1956             if (written) {
1957                printl(2, "+");
1958             } else {
1959                printl(2, " ");
1960             }
1961             printl(2, "\t%08"PRIx64, r.value);
1962             dump_register(&r, level);
1963 
1964             changed = written = false;
1965          }
1966       }
1967    }
1968 
1969    clear_rewritten();
1970 
1971    in_summary = false;
1972 
1973    draw_count++;
1974    summary = saved_summary;
1975 }
1976 
1977 static uint32_t
draw_indx_common(uint32_t * dwords,int level)1978 draw_indx_common(uint32_t *dwords, int level)
1979 {
1980    uint32_t prim_type = dwords[1] & 0x1f;
1981    uint32_t source_select = (dwords[1] >> 6) & 0x3;
1982    uint32_t num_indices = dwords[2];
1983    const char *primtype;
1984 
1985    primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1986 
1987    do_query(primtype, num_indices);
1988 
1989    printl(2, "%sdraw:          %d\n", levels[level], draws[ib]);
1990    printl(2, "%sprim_type:     %s (%d)\n", levels[level], primtype, prim_type);
1991    printl(2, "%ssource_select: %s (%d)\n", levels[level],
1992           rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
1993    printl(2, "%snum_indices:   %d\n", levels[level], num_indices);
1994 
1995    vertices += num_indices;
1996 
1997    draws[ib]++;
1998 
1999    return num_indices;
2000 }
2001 
2002 enum pc_di_index_size {
2003    INDEX_SIZE_IGN = 0,
2004    INDEX_SIZE_16_BIT = 0,
2005    INDEX_SIZE_32_BIT = 1,
2006    INDEX_SIZE_8_BIT = 2,
2007    INDEX_SIZE_INVALID = 0,
2008 };
2009 
2010 static void
cp_draw_indx(uint32_t * dwords,uint32_t sizedwords,int level)2011 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
2012 {
2013    uint32_t num_indices = draw_indx_common(dwords, level);
2014 
2015    assert(!is_64b());
2016 
2017    /* if we have an index buffer, dump that: */
2018    if (sizedwords == 5) {
2019       void *ptr = hostptr(dwords[3]);
2020       printl(2, "%sgpuaddr:       %08x\n", levels[level], dwords[3]);
2021       printl(2, "%sidx_size:      %d\n", levels[level], dwords[4]);
2022       if (ptr) {
2023          enum pc_di_index_size size =
2024             ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2025          if (!quiet(2)) {
2026             int i;
2027             printf("%sidxs:         ", levels[level]);
2028             if (size == INDEX_SIZE_8_BIT) {
2029                uint8_t *idx = ptr;
2030                for (i = 0; i < dwords[4]; i++)
2031                   printf(" %u", idx[i]);
2032             } else if (size == INDEX_SIZE_16_BIT) {
2033                uint16_t *idx = ptr;
2034                for (i = 0; i < dwords[4] / 2; i++)
2035                   printf(" %u", idx[i]);
2036             } else if (size == INDEX_SIZE_32_BIT) {
2037                uint32_t *idx = ptr;
2038                for (i = 0; i < dwords[4] / 4; i++)
2039                   printf(" %u", idx[i]);
2040             }
2041             printf("\n");
2042             dump_hex(ptr, dwords[4] / 4, level + 1);
2043          }
2044       }
2045    }
2046 
2047    /* don't bother dumping registers for the dummy draw_indx's.. */
2048    if (num_indices > 0)
2049       dump_register_summary(level);
2050 
2051    needs_wfi = true;
2052 }
2053 
2054 static void
cp_draw_indx_2(uint32_t * dwords,uint32_t sizedwords,int level)2055 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
2056 {
2057    uint32_t num_indices = draw_indx_common(dwords, level);
2058    enum pc_di_index_size size =
2059       ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2060    void *ptr = &dwords[3];
2061    int sz = 0;
2062 
2063    assert(!is_64b());
2064 
2065    /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
2066    if (!quiet(2)) {
2067       int i;
2068       printf("%sidxs:         ", levels[level]);
2069       if (size == INDEX_SIZE_8_BIT) {
2070          uint8_t *idx = ptr;
2071          for (i = 0; i < num_indices; i++)
2072             printf(" %u", idx[i]);
2073          sz = num_indices;
2074       } else if (size == INDEX_SIZE_16_BIT) {
2075          uint16_t *idx = ptr;
2076          for (i = 0; i < num_indices; i++)
2077             printf(" %u", idx[i]);
2078          sz = num_indices * 2;
2079       } else if (size == INDEX_SIZE_32_BIT) {
2080          uint32_t *idx = ptr;
2081          for (i = 0; i < num_indices; i++)
2082             printf(" %u", idx[i]);
2083          sz = num_indices * 4;
2084       }
2085       printf("\n");
2086       dump_hex(ptr, sz / 4, level + 1);
2087    }
2088 
2089    /* don't bother dumping registers for the dummy draw_indx's.. */
2090    if (num_indices > 0)
2091       dump_register_summary(level);
2092 }
2093 
2094 static void
cp_draw_indx_offset(uint32_t * dwords,uint32_t sizedwords,int level)2095 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
2096 {
2097    uint32_t num_indices = dwords[2];
2098    uint32_t prim_type = dwords[0] & 0x1f;
2099 
2100    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
2101    print_mode(level);
2102 
2103    /* don't bother dumping registers for the dummy draw_indx's.. */
2104    if (num_indices > 0)
2105       dump_register_summary(level);
2106 }
2107 
2108 static void
cp_draw_indx_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2109 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2110 {
2111    uint32_t prim_type = dwords[0] & 0x1f;
2112    uint64_t addr;
2113 
2114    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2115    print_mode(level);
2116 
2117    if (is_64b())
2118       addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2119    else
2120       addr = dwords[1];
2121    dump_gpuaddr_size(addr, level, 0x10, 2);
2122 
2123    if (is_64b())
2124       addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2125    else
2126       addr = dwords[3];
2127    dump_gpuaddr_size(addr, level, 0x10, 2);
2128 
2129    dump_register_summary(level);
2130 }
2131 
2132 static void
cp_draw_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2133 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2134 {
2135    uint32_t prim_type = dwords[0] & 0x1f;
2136    uint64_t addr;
2137 
2138    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2139    print_mode(level);
2140 
2141    addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2142    dump_gpuaddr_size(addr, level, 0x10, 2);
2143 
2144    dump_register_summary(level);
2145 }
2146 
2147 static void
cp_draw_indirect_multi(uint32_t * dwords,uint32_t sizedwords,int level)2148 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2149 {
2150    uint32_t prim_type = dwords[0] & 0x1f;
2151    uint32_t count = dwords[2];
2152 
2153    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2154    print_mode(level);
2155 
2156    struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2157    uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2158    uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2159    uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2160 
2161    if (count_dword) {
2162       uint64_t count_addr =
2163          ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2164       uint32_t *buf = hostptr(count_addr);
2165 
2166       /* Don't print more draws than this if we don't know the indirect
2167        * count. It's possible the user will give ~0 or some other large
2168        * value, expecting the GPU to fill in the draw count, and we don't
2169        * want to print a gazillion draws in that case:
2170        */
2171       const uint32_t max_draw_count = 0x100;
2172 
2173       /* Assume the indirect count is garbage if it's larger than this
2174        * (quite large) value or 0. Hopefully this catches most cases.
2175        */
2176       const uint32_t max_indirect_draw_count = 0x10000;
2177 
2178       if (buf) {
2179          printf("%sindirect count: %u\n", levels[level], *buf);
2180          if (*buf == 0 || *buf > max_indirect_draw_count) {
2181             /* garbage value */
2182             count = MIN2(count, max_draw_count);
2183          } else {
2184             /* not garbage */
2185             count = MIN2(count, *buf);
2186          }
2187       } else {
2188          count = MIN2(count, max_draw_count);
2189       }
2190    }
2191 
2192    if (addr_dword && stride_dword) {
2193       uint64_t addr =
2194          ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2195       uint32_t stride = dwords[stride_dword];
2196 
2197       for (unsigned i = 0; i < count; i++, addr += stride) {
2198          printf("%sdraw %d:\n", levels[level], i);
2199          dump_gpuaddr_size(addr, level, 0x10, 2);
2200       }
2201    }
2202 
2203    dump_register_summary(level);
2204 }
2205 
2206 static void
cp_draw_auto(uint32_t * dwords,uint32_t sizedwords,int level)2207 cp_draw_auto(uint32_t *dwords, uint32_t sizedwords, int level)
2208 {
2209    uint32_t prim_type = dwords[0] & 0x1f;
2210 
2211    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2212    print_mode(level);
2213 
2214    dump_register_summary(level);
2215 }
2216 
2217 static void
cp_run_cl(uint32_t * dwords,uint32_t sizedwords,int level)2218 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2219 {
2220    do_query("COMPUTE", 1);
2221    dump_register_summary(level);
2222 }
2223 
2224 static void
print_nop_tail_string(uint32_t * dwords,uint32_t sizedwords)2225 print_nop_tail_string(uint32_t *dwords, uint32_t sizedwords)
2226 {
2227    const char *buf = (void *)dwords;
2228    for (int i = 0; i < 4 * sizedwords; i++) {
2229       if (buf[i] == '\0')
2230          break;
2231       if (isascii(buf[i]))
2232          printf("%c", buf[i]);
2233    }
2234 }
2235 
2236 static void
cp_nop(uint32_t * dwords,uint32_t sizedwords,int level)2237 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2238 {
2239    if (quiet(3))
2240       return;
2241 
2242    /* NOP is used to encode special debug strings by Turnip.
2243     * See tu_cs_emit_debug_magic_strv(...)
2244     */
2245    static int scope_level = 0;
2246    uint32_t identifier = dwords[0];
2247    bool is_special = false;
2248    if (identifier == CP_NOP_MESG) {
2249       printf("### ");
2250       is_special = true;
2251    } else if (identifier == CP_NOP_BEGN) {
2252       printf(">>> #%d: ", ++scope_level);
2253       is_special = true;
2254    } else if (identifier == CP_NOP_END) {
2255       printf("<<< #%d: ", scope_level--);
2256       is_special = true;
2257    }
2258 
2259    if (is_special) {
2260       if (sizedwords > 1) {
2261          print_nop_tail_string(dwords + 1, sizedwords - 1);
2262          printf("\n");
2263       }
2264       return;
2265    }
2266 
2267    // blob doesn't use CP_NOP for string_marker but it does
2268    // use it for things that end up looking like, but aren't
2269    // ascii chars:
2270    if (!options->decode_markers)
2271       return;
2272 
2273    print_nop_tail_string(dwords, sizedwords);
2274    printf("\n");
2275 }
2276 
2277 uint32_t *
parse_cp_indirect(uint32_t * dwords,uint32_t sizedwords,uint64_t * ibaddr,uint32_t * ibsize)2278 parse_cp_indirect(uint32_t *dwords, uint32_t sizedwords,
2279                   uint64_t *ibaddr, uint32_t *ibsize)
2280 {
2281    if (is_64b()) {
2282       assert(sizedwords == 3);
2283 
2284       /* a5xx+.. high 32b of gpu addr, then size: */
2285       *ibaddr = dwords[0];
2286       *ibaddr |= ((uint64_t)dwords[1]) << 32;
2287       *ibsize = dwords[2];
2288 
2289       return dwords + 3;
2290    } else {
2291       assert(sizedwords == 2);
2292 
2293       *ibaddr = dwords[0];
2294       *ibsize = dwords[1];
2295 
2296       return dwords + 2;
2297    }
2298 }
2299 
2300 static void
cp_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2301 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2302 {
2303    /* traverse indirect buffers */
2304    uint64_t ibaddr;
2305    uint32_t ibsize;
2306    uint32_t *ptr = NULL;
2307 
2308    dwords = parse_cp_indirect(dwords, sizedwords, &ibaddr, &ibsize);
2309 
2310    if (!quiet(3)) {
2311       if (is_64b()) {
2312          printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2313       } else {
2314          printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2315       }
2316       printf("%sibsize:%08x\n", levels[level], ibsize);
2317    }
2318 
2319    if (options->once && has_dumped(ibaddr, enable_mask))
2320       return;
2321 
2322    /* 'query-compare' mode implies 'once' mode, although we need only to
2323     * process the cmdstream for *any* enable_mask mode, since we are
2324     * comparing binning vs draw reg values at the same time, ie. it is
2325     * not useful to process the same draw in both binning and draw pass.
2326     */
2327    if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2328       return;
2329 
2330    /* map gpuaddr back to hostptr: */
2331    ptr = hostptr(ibaddr);
2332 
2333    if (ptr) {
2334       /* If the GPU hung within the target IB, the trigger point will be
2335        * just after the current CP_INDIRECT_BUFFER.  Because the IB is
2336        * executed but never returns.  Account for this by checking if
2337        * the IB returned:
2338        */
2339       highlight_gpuaddr(gpuaddr(dwords));
2340 
2341       ib++;
2342       ibs[ib].base = ibaddr;
2343       ibs[ib].size = ibsize;
2344 
2345       dump_commands(ptr, ibsize, level);
2346       ib--;
2347    } else {
2348       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2349    }
2350 }
2351 
2352 static void
cp_start_bin(uint32_t * dwords,uint32_t sizedwords,int level)2353 cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
2354 {
2355    uint64_t ibaddr;
2356    uint32_t ibsize;
2357    uint32_t loopcount;
2358    uint32_t *ptr = NULL;
2359 
2360    loopcount = dwords[0];
2361    ibaddr = dwords[1];
2362    ibaddr |= ((uint64_t)dwords[2]) << 32;
2363    ibsize = dwords[3];
2364 
2365    /* map gpuaddr back to hostptr: */
2366    ptr = hostptr(ibaddr);
2367 
2368    if (ptr) {
2369       /* If the GPU hung within the target IB, the trigger point will be
2370        * just after the current CP_START_BIN.  Because the IB is
2371        * executed but never returns.  Account for this by checking if
2372        * the IB returned:
2373        */
2374       highlight_gpuaddr(gpuaddr(&dwords[5]));
2375 
2376       /* TODO: we should duplicate the body of the loop after each bin, so
2377        * that draws get the correct state. We should also figure out if there
2378        * are any registers that can tell us what bin we're in when we hang so
2379        * that crashdec points to the right place.
2380        */
2381       ib++;
2382       for (uint32_t i = 0; i < loopcount; i++) {
2383          ibs[ib].base = ibaddr;
2384          ibs[ib].size = ibsize;
2385          printl(3, "%sbin %u\n", levels[level], i);
2386          dump_commands(ptr, ibsize, level);
2387          ibaddr += ibsize;
2388          ptr += ibsize;
2389       }
2390       ib--;
2391    } else {
2392       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2393    }
2394 }
2395 
2396 static void
cp_fixed_stride_draw_table(uint32_t * dwords,uint32_t sizedwords,int level)2397 cp_fixed_stride_draw_table(uint32_t *dwords, uint32_t sizedwords, int level)
2398 {
2399    uint64_t ibaddr;
2400    uint32_t ibsize;
2401    uint32_t loopcount;
2402    uint32_t *ptr = NULL;
2403 
2404    loopcount = dwords[3];
2405    ibaddr = dwords[0];
2406    ibaddr |= ((uint64_t)dwords[1]) << 32;
2407    ibsize = dwords[2] >> 20;
2408 
2409    /* map gpuaddr back to hostptr: */
2410    ptr = hostptr(ibaddr);
2411 
2412    if (ptr) {
2413       /* If the GPU hung within the target IB, the trigger point will be
2414        * just after the current CP_START_BIN.  Because the IB is
2415        * executed but never returns.  Account for this by checking if
2416        * the IB returned:
2417        */
2418       highlight_gpuaddr(gpuaddr(&dwords[5]));
2419 
2420       ib++;
2421       for (uint32_t i = 0; i < loopcount; i++) {
2422          ibs[ib].base = ibaddr;
2423          ibs[ib].size = ibsize;
2424          printl(3, "%sdraw %u\n", levels[level], i);
2425          dump_commands(ptr, ibsize, level);
2426          ibaddr += ibsize;
2427          ptr += ibsize;
2428       }
2429       ib--;
2430    } else {
2431       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2432    }
2433 }
2434 
2435 static void
cp_wfi(uint32_t * dwords,uint32_t sizedwords,int level)2436 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2437 {
2438    needs_wfi = false;
2439 }
2440 
2441 static void
cp_mem_write(uint32_t * dwords,uint32_t sizedwords,int level)2442 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2443 {
2444    if (quiet(2))
2445       return;
2446 
2447    if (is_64b()) {
2448       uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2449       printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2450       dump_hex(&dwords[2], sizedwords - 2, level + 1);
2451 
2452       if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2453          dump_commands(&dwords[2], sizedwords - 2, level + 1);
2454    } else {
2455       uint32_t gpuaddr = dwords[0];
2456       printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2457       dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2458    }
2459 }
2460 
2461 static void
cp_rmw(uint32_t * dwords,uint32_t sizedwords,int level)2462 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2463 {
2464    uint32_t val = dwords[0] & 0xffff;
2465    uint32_t and = dwords[1];
2466    uint32_t or = dwords[2];
2467    printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2468           and, or);
2469    if (needs_wfi)
2470       printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2471              and, or);
2472    reg_set(val, (reg_val(val) & and) | or);
2473 }
2474 
2475 static void
cp_reg_mem(uint32_t * dwords,uint32_t sizedwords,int level)2476 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2477 {
2478    uint32_t val = dwords[0] & 0xffff;
2479    printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2480 
2481    if (quiet(2))
2482       return;
2483 
2484    uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2485    printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2486    void *ptr = hostptr(gpuaddr);
2487    if (ptr) {
2488       uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2489       dump_hex(ptr, cnt, level + 1);
2490    }
2491 }
2492 
2493 struct draw_state {
2494    uint16_t enable_mask;
2495    uint16_t flags;
2496    uint32_t count;
2497    uint64_t addr;
2498 };
2499 
2500 struct draw_state state[32];
2501 
2502 #define FLAG_DIRTY              0x1
2503 #define FLAG_DISABLE            0x2
2504 #define FLAG_DISABLE_ALL_GROUPS 0x4
2505 #define FLAG_LOAD_IMMED         0x8
2506 
2507 static int draw_mode;
2508 
2509 static void
disable_group(unsigned group_id)2510 disable_group(unsigned group_id)
2511 {
2512    struct draw_state *ds = &state[group_id];
2513    memset(ds, 0, sizeof(*ds));
2514 }
2515 
2516 static void
disable_all_groups(void)2517 disable_all_groups(void)
2518 {
2519    for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2520       disable_group(i);
2521 }
2522 
2523 static void
load_group(unsigned group_id,int level)2524 load_group(unsigned group_id, int level)
2525 {
2526    struct draw_state *ds = &state[group_id];
2527 
2528    if (!ds->count)
2529       return;
2530 
2531    printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2532    printl(2, "%scount: %d\n", levels[level], ds->count);
2533    printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2534    printl(2, "%sflags: %x\n", levels[level], ds->flags);
2535 
2536    if (options->info->chip >= 6) {
2537       printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2538 
2539       if (!(ds->enable_mask & enable_mask)) {
2540          printl(2, "%s\tskipped!\n\n", levels[level]);
2541          return;
2542       }
2543    }
2544 
2545    void *ptr = hostptr(ds->addr);
2546    if (ptr) {
2547       if (!quiet(2))
2548          dump_hex(ptr, ds->count, level + 1);
2549 
2550       ib++;
2551       dump_commands(ptr, ds->count, level + 1);
2552       ib--;
2553    }
2554 }
2555 
2556 static void
load_all_groups(int level)2557 load_all_groups(int level)
2558 {
2559    /* sanity check, we should never recursively hit recursion here, and if
2560     * we do bad things happen:
2561     */
2562    static bool loading_groups = false;
2563    if (loading_groups) {
2564       printf("ERROR: nothing in draw state should trigger recursively loading "
2565              "groups!\n");
2566       return;
2567    }
2568    loading_groups = true;
2569    for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2570       load_group(i, level);
2571    loading_groups = false;
2572 
2573    /* in 'query-compare' mode, defer disabling all groups until we have a
2574     * chance to process the query:
2575     */
2576    if (!options->query_compare)
2577       disable_all_groups();
2578 }
2579 
2580 static void
cp_set_draw_state(uint32_t * dwords,uint32_t sizedwords,int level)2581 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2582 {
2583    uint32_t i;
2584 
2585    for (i = 0; i < sizedwords;) {
2586       struct draw_state *ds;
2587       uint32_t count = dwords[i] & 0xffff;
2588       uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2589       uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2590       uint32_t flags = (dwords[i] >> 16) & 0xf;
2591       uint64_t addr;
2592 
2593       if (is_64b()) {
2594          addr = dwords[i + 1];
2595          addr |= ((uint64_t)dwords[i + 2]) << 32;
2596          i += 3;
2597       } else {
2598          addr = dwords[i + 1];
2599          i += 2;
2600       }
2601 
2602       if (flags & FLAG_DISABLE_ALL_GROUPS) {
2603          disable_all_groups();
2604          continue;
2605       }
2606 
2607       if (flags & FLAG_DISABLE) {
2608          disable_group(group_id);
2609          continue;
2610       }
2611 
2612       assert(group_id < ARRAY_SIZE(state));
2613       disable_group(group_id);
2614 
2615       ds = &state[group_id];
2616 
2617       ds->enable_mask = enable_mask;
2618       ds->flags = flags;
2619       ds->count = count;
2620       ds->addr = addr;
2621 
2622       if (flags & FLAG_LOAD_IMMED) {
2623          load_group(group_id, level);
2624          disable_group(group_id);
2625       }
2626    }
2627 }
2628 
2629 static void
cp_set_mode(uint32_t * dwords,uint32_t sizedwords,int level)2630 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2631 {
2632    draw_mode = dwords[0];
2633 }
2634 
2635 /* execute compute shader */
2636 static void
cp_exec_cs(uint32_t * dwords,uint32_t sizedwords,int level)2637 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2638 {
2639    do_query("compute", 0);
2640    dump_register_summary(level);
2641 }
2642 
2643 static void
cp_exec_cs_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2644 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2645 {
2646    uint64_t addr;
2647 
2648    if (is_64b()) {
2649       addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2650    } else {
2651       addr = dwords[1];
2652    }
2653 
2654    printl(3, "%saddr: %016llx\n", levels[level], addr);
2655    dump_gpuaddr_size(addr, level, 0x10, 2);
2656 
2657    do_query("compute", 0);
2658    dump_register_summary(level);
2659 }
2660 
2661 static void
cp_set_marker(uint32_t * dwords,uint32_t sizedwords,int level)2662 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2663 {
2664    uint32_t val = dwords[0] & 0xf;
2665    const char *mode = rnn_enumname(rnn, "a6xx_marker", val);
2666 
2667    if (!mode) {
2668       static char buf[8];
2669       sprintf(buf, "0x%x", val);
2670       render_mode = buf;
2671       return;
2672    }
2673 
2674    render_mode = mode;
2675 
2676    if (!strcmp(render_mode, "RM6_BINNING")) {
2677       enable_mask = MODE_BINNING;
2678    } else if (!strcmp(render_mode, "RM6_GMEM")) {
2679       enable_mask = MODE_GMEM;
2680    } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2681       enable_mask = MODE_BYPASS;
2682    }
2683 }
2684 
2685 static void
cp_set_thread_control(uint32_t * dwords,uint32_t sizedwords,int level)2686 cp_set_thread_control(uint32_t *dwords, uint32_t sizedwords, int level)
2687 {
2688    uint32_t val = dwords[0] & 0x3;
2689    thread = rnn_enumname(rnn, "cp_thread", val);
2690 }
2691 
2692 static void
cp_set_render_mode(uint32_t * dwords,uint32_t sizedwords,int level)2693 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2694 {
2695    uint64_t addr;
2696    uint32_t *ptr, len;
2697 
2698    assert(is_64b());
2699 
2700    /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2701     * not sure if this can come in different sizes.
2702     *
2703     * First ptr doesn't seem to be cmdstream, second one does.
2704     *
2705     * Comment from downstream kernel:
2706     *
2707     * SRM -- set render mode (ex binning, direct render etc)
2708     * SRM is set by UMD usually at start of IB to tell CP the type of
2709     * preemption.
2710     * KMD needs to set SRM to NULL to indicate CP that rendering is
2711     * done by IB.
2712     * ------------------------------------------------------------------
2713     *
2714     * Seems to always be one of these two:
2715     * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2716     * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2717     * 001c2000 00000000
2718     *
2719     */
2720 
2721    assert(options->info->chip >= 5);
2722 
2723    render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2724 
2725    if (sizedwords == 1)
2726       return;
2727 
2728    addr = dwords[1];
2729    addr |= ((uint64_t)dwords[2]) << 32;
2730 
2731    mode = dwords[3];
2732 
2733    dump_gpuaddr(addr, level + 1);
2734 
2735    if (sizedwords == 5)
2736       return;
2737 
2738    assert(sizedwords == 8);
2739 
2740    len = dwords[5];
2741    addr = dwords[6];
2742    addr |= ((uint64_t)dwords[7]) << 32;
2743 
2744    printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2745    printl(3, "%slen:  0x%x\n", levels[level], len);
2746 
2747    ptr = hostptr(addr);
2748 
2749    if (ptr) {
2750       if (!quiet(2)) {
2751          ib++;
2752          dump_commands(ptr, len, level + 1);
2753          ib--;
2754          dump_hex(ptr, len, level + 1);
2755       }
2756    }
2757 }
2758 
2759 static void
cp_compute_checkpoint(uint32_t * dwords,uint32_t sizedwords,int level)2760 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2761 {
2762    uint64_t addr;
2763    uint32_t *ptr, len;
2764 
2765    assert(is_64b());
2766    assert(options->info->chip >= 5);
2767 
2768    if (sizedwords == 8) {
2769       addr = dwords[5];
2770       addr |= ((uint64_t)dwords[6]) << 32;
2771       len = dwords[7];
2772    } else {
2773       addr = dwords[5];
2774       addr |= ((uint64_t)dwords[6]) << 32;
2775       len = dwords[4];
2776    }
2777 
2778    printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2779    printl(3, "%slen:  0x%x\n", levels[level], len);
2780 
2781    ptr = hostptr(addr);
2782 
2783    if (ptr) {
2784       if (!quiet(2)) {
2785          ib++;
2786          dump_commands(ptr, len, level + 1);
2787          ib--;
2788          dump_hex(ptr, len, level + 1);
2789       }
2790    }
2791 }
2792 
2793 static void
cp_blit(uint32_t * dwords,uint32_t sizedwords,int level)2794 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2795 {
2796    do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2797    print_mode(level);
2798    dump_register_summary(level);
2799 }
2800 
2801 static void
cp_context_reg_bunch(uint32_t * dwords,uint32_t sizedwords,int level)2802 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2803 {
2804    int i;
2805 
2806    /* NOTE: seems to write same reg multiple times.. not sure if different parts
2807     * of these are triggered by the FLUSH_SO_n events?? (if that is what they
2808     * actually are?)
2809     */
2810    bool saved_summary = summary;
2811    summary = false;
2812 
2813    struct regacc r = regacc(NULL);
2814 
2815    for (i = 0; i < sizedwords; i += 2) {
2816       if (regacc_push(&r, dwords[i + 0], dwords[i + 1]))
2817          dump_register(&r, level + 1);
2818       reg_set(dwords[i + 0], dwords[i + 1]);
2819    }
2820 
2821    summary = saved_summary;
2822 }
2823 
2824 /* Looks similar to CP_CONTEXT_REG_BUNCH, but not quite the same...
2825  * discarding first two dwords??
2826  *
2827  *   CP_CONTEXT_REG_BUNCH:
2828  *        0221: 9c1ff606  (rep)(xmov3)mov $usraddr, $data
2829  *        ; mov $data, $data
2830  *        ; mov $usraddr, $data
2831  *        ; mov $data, $data
2832  *        0222: d8000000  waitin
2833  *        0223: 981f0806  mov $01, $data
2834  *
2835  *   CP_UNK5D:
2836  *        0224: 981f0006  mov $00, $data
2837  *        0225: 981f0006  mov $00, $data
2838  *        0226: 9c1ff206  (rep)(xmov1)mov $usraddr, $data
2839  *        ; mov $data, $data
2840  *        0227: d8000000  waitin
2841  *        0228: 981f0806  mov $01, $data
2842  *
2843  */
2844 static void
cp_context_reg_bunch2(uint32_t * dwords,uint32_t sizedwords,int level)2845 cp_context_reg_bunch2(uint32_t *dwords, uint32_t sizedwords, int level)
2846 {
2847    dwords += 2;
2848    sizedwords -= 2;
2849    cp_context_reg_bunch(dwords, sizedwords, level);
2850 }
2851 
2852 static void
cp_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)2853 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2854 {
2855    uint32_t reg = dwords[1] & 0xffff;
2856 
2857    struct regacc r = regacc(NULL);
2858    if (regacc_push(&r, reg, dwords[2]))
2859       dump_register(&r, level + 1);
2860    reg_set(reg, dwords[2]);
2861 }
2862 
2863 static void
cp_set_ctxswitch_ib(uint32_t * dwords,uint32_t sizedwords,int level)2864 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2865 {
2866    uint64_t addr;
2867    uint32_t size = dwords[2] & 0xffff;
2868    void *ptr;
2869 
2870    addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2871 
2872    if (!quiet(3)) {
2873       printf("%saddr=%" PRIx64 "\n", levels[level], addr);
2874    }
2875 
2876    ptr = hostptr(addr);
2877    if (ptr) {
2878       dump_commands(ptr, size, level + 1);
2879    }
2880 }
2881 
2882 static void
cp_skip_ib2_enable_global(uint32_t * dwords,uint32_t sizedwords,int level)2883 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2884 {
2885    skip_ib2_enable_global = dwords[0];
2886 }
2887 
2888 static void
cp_skip_ib2_enable_local(uint32_t * dwords,uint32_t sizedwords,int level)2889 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2890 {
2891    skip_ib2_enable_local = dwords[0];
2892 }
2893 
2894 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2895 static const struct type3_op {
2896    const char *name;
2897    void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2898    struct {
2899       bool load_all_groups;
2900    } options;
2901 } type3_op[] = {
2902    CP(NOP, cp_nop),
2903    CP(INDIRECT_BUFFER, cp_indirect),
2904    CP(INDIRECT_BUFFER_PFD, cp_indirect),
2905    CP(WAIT_FOR_IDLE, cp_wfi),
2906    CP(REG_RMW, cp_rmw),
2907    CP(REG_TO_MEM, cp_reg_mem),
2908    CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2909    CP(MEM_WRITE, cp_mem_write),
2910    CP(EVENT_WRITE, cp_event_write),
2911    CP(RUN_OPENCL, cp_run_cl),
2912    CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2913    CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2914    CP(SET_CONSTANT, cp_set_const),
2915    CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2916    CP(WIDE_REG_WRITE, cp_wide_reg_write),
2917 
2918    /* for a3xx */
2919    CP(LOAD_STATE, cp_load_state),
2920    CP(SET_BIN, cp_set_bin),
2921 
2922    /* for a4xx */
2923    CP(LOAD_STATE4, cp_load_state),
2924    CP(SET_DRAW_STATE, cp_set_draw_state),
2925    CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2926    CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2927    CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2928 
2929    /* for a5xx */
2930    CP(SET_RENDER_MODE, cp_set_render_mode),
2931    CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2932    CP(BLIT, cp_blit),
2933    CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2934    CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2935    CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2936    CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2937    CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2938    CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2939 
2940    /* for a6xx */
2941    CP(LOAD_STATE6_GEOM, cp_load_state),
2942    CP(LOAD_STATE6_FRAG, cp_load_state),
2943    CP(LOAD_STATE6, cp_load_state),
2944    CP(SET_MODE, cp_set_mode),
2945    CP(SET_MARKER, cp_set_marker),
2946    CP(REG_WRITE, cp_reg_write),
2947    CP(DRAW_AUTO, cp_draw_auto, {.load_all_groups = true}),
2948 
2949    CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2950 
2951    CP(START_BIN, cp_start_bin),
2952 
2953    CP(FIXED_STRIDE_DRAW_TABLE, cp_fixed_stride_draw_table),
2954 
2955    /* for a7xx */
2956    CP(THREAD_CONTROL, cp_set_thread_control),
2957    CP(CONTEXT_REG_BUNCH2, cp_context_reg_bunch2),
2958 };
2959 
2960 static void
noop_fxn(uint32_t * dwords,uint32_t sizedwords,int level)2961 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2962 {
2963 }
2964 
2965 static const struct type3_op *
get_type3_op(unsigned opc)2966 get_type3_op(unsigned opc)
2967 {
2968    static const struct type3_op dummy_op = {
2969       .fxn = noop_fxn,
2970    };
2971    const char *name = pktname(opc);
2972 
2973    if (!name)
2974       return &dummy_op;
2975 
2976    for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2977       if (!strcmp(name, type3_op[i].name))
2978          return &type3_op[i];
2979 
2980    return &dummy_op;
2981 }
2982 
2983 void
dump_commands(uint32_t * dwords,uint32_t sizedwords,int level)2984 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2985 {
2986    int dwords_left = sizedwords;
2987    uint32_t count = 0; /* dword count including packet header */
2988    uint32_t val;
2989 
2990    //	assert(dwords);
2991    if (!dwords) {
2992       printf("NULL cmd buffer!\n");
2993       return;
2994    }
2995 
2996    assert(ib < ARRAY_SIZE(draws));
2997    draws[ib] = 0;
2998 
2999    while (dwords_left > 0) {
3000 
3001       current_draw_count = draw_count;
3002 
3003       /* hack, this looks like a -1 underflow, in some versions
3004        * when it tries to write zero registers via pkt0
3005        */
3006       //		if ((dwords[0] >> 16) == 0xffff)
3007       //			goto skip;
3008 
3009       if (pkt_is_regwrite(dwords[0], &val, &count)) {
3010          assert(val < regcnt());
3011          printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
3012                 val);
3013          dump_registers(val, dwords + 1, count - 1, level + 2);
3014          if (!quiet(3))
3015             dump_hex(dwords, count, level + 1);
3016 #if 0
3017       } else if (pkt_is_type1(dwords[0])) {
3018          count = 3;
3019          val = dwords[0] & 0xfff;
3020          printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3021          dump_registers(val, dwords+1, 1, level+2);
3022          val = (dwords[0] >> 12) & 0xfff;
3023          printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3024          dump_registers(val, dwords+2, 1, level+2);
3025          if (!quiet(3))
3026             dump_hex(dwords, count, level+1);
3027 #endif
3028       } else if (pkt_is_opcode(dwords[0], &val, &count)) {
3029          const struct type3_op *op = get_type3_op(val);
3030          if (op->options.load_all_groups)
3031             load_all_groups(level + 1);
3032          const char *name = pktname(val);
3033          if (!quiet(2)) {
3034             printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
3035                    rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
3036                    count);
3037          }
3038          if (name) {
3039             /* special hack for two packets that decode the same way
3040              * on a6xx:
3041              */
3042             if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
3043                 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
3044                name = "CP_LOAD_STATE6";
3045             dump_domain(dwords + 1, count - 1, level + 2, name);
3046          }
3047          op->fxn(dwords + 1, count - 1, level + 1);
3048          if (!quiet(2))
3049             dump_hex(dwords, count, level + 1);
3050       } else if (pkt_is_type2(dwords[0])) {
3051          printl(3, "%snop\n", levels[level + 1]);
3052          count = 1;
3053       } else {
3054          printf("bad type! %08x\n", dwords[0]);
3055          /* for 5xx+ we can do a passable job of looking for start of next valid
3056           * packet: */
3057          if (options->info->chip >= 5) {
3058             count = find_next_packet(dwords, dwords_left);
3059          } else {
3060             return;
3061          }
3062       }
3063 
3064       dwords += count;
3065       dwords_left -= count;
3066    }
3067 
3068    if (dwords_left < 0)
3069       printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
3070 }
3071