1 /*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <inttypes.h>
30 #include <signal.h>
31 #include <stdarg.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <sys/stat.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41
42 #include "freedreno_pm4.h"
43
44 #include "buffers.h"
45 #include "cffdec.h"
46 #include "disasm.h"
47 #include "redump.h"
48 #include "rnnutil.h"
49 #include "script.h"
50
51 /* ************************************************************************* */
52 /* originally based on kernel recovery dump code: */
53
54 static const struct cffdec_options *options;
55
56 static bool needs_wfi = false;
57 static bool summary = false;
58 static bool in_summary = false;
59 static int vertices;
60
61 static inline unsigned
regcnt(void)62 regcnt(void)
63 {
64 if (options->gpu_id >= 500)
65 return 0xffff;
66 else
67 return 0x7fff;
68 }
69
70 static int
is_64b(void)71 is_64b(void)
72 {
73 return options->gpu_id >= 500;
74 }
75
76 static int draws[4];
77 static struct {
78 uint64_t base;
79 uint32_t size; /* in dwords */
80 /* Generally cmdstream consists of multiple IB calls to different
81 * buffers, which are themselves often re-used for each tile. The
82 * triggered flag serves two purposes to help make it more clear
83 * what part of the cmdstream is before vs after the the GPU hang:
84 *
85 * 1) if in IB2 we are passed the point within the IB2 buffer where
86 * the GPU hung, but IB1 is not passed the point within its
87 * buffer where the GPU had hung, then we know the GPU hang
88 * happens on a future use of that IB2 buffer.
89 *
90 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
91 * hung, but we've already passed the trigger point at the same
92 * IB level, we know that we are passed the point where the GPU
93 * had hung.
94 *
95 * So this is a one way switch, false->true. And a higher #'d
96 * IB level isn't considered triggered unless the lower #'d IB
97 * level is.
98 */
99 bool triggered;
100 } ibs[4];
101 static int ib;
102
103 static int draw_count;
104 static int current_draw_count;
105
106 /* query mode.. to handle symbolic register name queries, we need to
107 * defer parsing query string until after gpu_id is know and rnn db
108 * loaded:
109 */
110 static int *queryvals;
111
112 static bool
quiet(int lvl)113 quiet(int lvl)
114 {
115 if ((options->draw_filter != -1) &&
116 (options->draw_filter != current_draw_count))
117 return true;
118 if ((lvl >= 3) && (summary || options->querystrs || options->script))
119 return true;
120 if ((lvl >= 2) && (options->querystrs || options->script))
121 return true;
122 return false;
123 }
124
125 void
printl(int lvl,const char * fmt,...)126 printl(int lvl, const char *fmt, ...)
127 {
128 va_list args;
129 if (quiet(lvl))
130 return;
131 va_start(args, fmt);
132 vprintf(fmt, args);
133 va_end(args);
134 }
135
136 static const char *levels[] = {
137 "\t",
138 "\t\t",
139 "\t\t\t",
140 "\t\t\t\t",
141 "\t\t\t\t\t",
142 "\t\t\t\t\t\t",
143 "\t\t\t\t\t\t\t",
144 "\t\t\t\t\t\t\t\t",
145 "\t\t\t\t\t\t\t\t\t",
146 "x",
147 "x",
148 "x",
149 "x",
150 "x",
151 "x",
152 };
153
154 enum state_src_t {
155 STATE_SRC_DIRECT,
156 STATE_SRC_INDIRECT,
157 STATE_SRC_BINDLESS,
158 };
159
160 /* SDS (CP_SET_DRAW_STATE) helpers: */
161 static void load_all_groups(int level);
162 static void disable_all_groups(void);
163
164 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
165 int level);
166 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
167
168 static bool
highlight_gpuaddr(uint64_t gpuaddr)169 highlight_gpuaddr(uint64_t gpuaddr)
170 {
171 if (!options->ibs[ib].base)
172 return false;
173
174 if ((ib > 0) && options->ibs[ib - 1].base && !ibs[ib - 1].triggered)
175 return false;
176
177 if (ibs[ib].triggered)
178 return options->color;
179
180 if (options->ibs[ib].base != ibs[ib].base)
181 return false;
182
183 uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
184 uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
185
186 bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
187
188 ibs[ib].triggered |= triggered;
189
190 if (triggered)
191 printf("ESTIMATED CRASH LOCATION!\n");
192
193 return triggered & options->color;
194 }
195
196 static void
dump_hex(uint32_t * dwords,uint32_t sizedwords,int level)197 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
198 {
199 int i, j;
200 int lastzero = 1;
201
202 if (quiet(2))
203 return;
204
205 for (i = 0; i < sizedwords; i += 8) {
206 int zero = 1;
207
208 /* always show first row: */
209 if (i == 0)
210 zero = 0;
211
212 for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
213 if (dwords[i + j])
214 zero = 0;
215
216 if (zero && !lastzero)
217 printf("*\n");
218
219 lastzero = zero;
220
221 if (zero)
222 continue;
223
224 uint64_t addr = gpuaddr(&dwords[i]);
225 bool highlight = highlight_gpuaddr(addr);
226
227 if (highlight)
228 printf("\x1b[0;1;31m");
229
230 if (is_64b()) {
231 printf("%016" PRIx64 ":%s", addr, levels[level]);
232 } else {
233 printf("%08x:%s", (uint32_t)addr, levels[level]);
234 }
235
236 if (highlight)
237 printf("\x1b[0m");
238
239 printf("%04x:", i * 4);
240
241 for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
242 printf(" %08x", dwords[i + j]);
243 }
244
245 printf("\n");
246 }
247 }
248
249 static void
dump_float(float * dwords,uint32_t sizedwords,int level)250 dump_float(float *dwords, uint32_t sizedwords, int level)
251 {
252 int i;
253 for (i = 0; i < sizedwords; i++) {
254 if ((i % 8) == 0) {
255 if (is_64b()) {
256 printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
257 } else {
258 printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
259 }
260 } else {
261 printf(" ");
262 }
263 printf("%8f", *(dwords++));
264 if ((i % 8) == 7)
265 printf("\n");
266 }
267 if (i % 8)
268 printf("\n");
269 }
270
271 /* I believe the surface format is low bits:
272 #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
273 comments in sys2gmem_tex_const indicate that address is [31:12], but
274 looks like at least some of the bits above the format have different meaning..
275 */
276 static void
parse_dword_addr(uint32_t dword,uint32_t * gpuaddr,uint32_t * flags,uint32_t mask)277 parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
278 uint32_t mask)
279 {
280 assert(!is_64b()); /* this is only used on a2xx */
281 *gpuaddr = dword & ~mask;
282 *flags = dword & mask;
283 }
284
285 static uint32_t type0_reg_vals[0xffff + 1];
286 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
287 8]; /* written since last draw */
288 static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
289 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
290
291 static bool
reg_rewritten(uint32_t regbase)292 reg_rewritten(uint32_t regbase)
293 {
294 return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
295 }
296
297 bool
reg_written(uint32_t regbase)298 reg_written(uint32_t regbase)
299 {
300 return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
301 }
302
303 static void
clear_rewritten(void)304 clear_rewritten(void)
305 {
306 memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
307 }
308
309 static void
clear_written(void)310 clear_written(void)
311 {
312 memset(type0_reg_written, 0, sizeof(type0_reg_written));
313 clear_rewritten();
314 }
315
316 uint32_t
reg_lastval(uint32_t regbase)317 reg_lastval(uint32_t regbase)
318 {
319 return lastvals[regbase];
320 }
321
322 static void
clear_lastvals(void)323 clear_lastvals(void)
324 {
325 memset(lastvals, 0, sizeof(lastvals));
326 }
327
328 uint32_t
reg_val(uint32_t regbase)329 reg_val(uint32_t regbase)
330 {
331 return type0_reg_vals[regbase];
332 }
333
334 void
reg_set(uint32_t regbase,uint32_t val)335 reg_set(uint32_t regbase, uint32_t val)
336 {
337 assert(regbase < regcnt());
338 type0_reg_vals[regbase] = val;
339 type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
340 type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
341 }
342
343 static void
reg_dump_scratch(const char * name,uint32_t dword,int level)344 reg_dump_scratch(const char *name, uint32_t dword, int level)
345 {
346 unsigned r;
347
348 if (quiet(3))
349 return;
350
351 r = regbase("CP_SCRATCH[0].REG");
352
353 // if not, try old a2xx/a3xx version:
354 if (!r)
355 r = regbase("CP_SCRATCH_REG0");
356
357 if (!r)
358 return;
359
360 printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
361 reg_val(r + 6), reg_val(r + 7));
362 }
363
364 static void
dump_gpuaddr_size(uint64_t gpuaddr,int level,int sizedwords,int quietlvl)365 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
366 {
367 void *buf;
368
369 if (quiet(quietlvl))
370 return;
371
372 buf = hostptr(gpuaddr);
373 if (buf) {
374 dump_hex(buf, sizedwords, level + 1);
375 }
376 }
377
378 static void
dump_gpuaddr(uint64_t gpuaddr,int level)379 dump_gpuaddr(uint64_t gpuaddr, int level)
380 {
381 dump_gpuaddr_size(gpuaddr, level, 64, 3);
382 }
383
384 static void
reg_dump_gpuaddr(const char * name,uint32_t dword,int level)385 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
386 {
387 dump_gpuaddr(dword, level);
388 }
389
390 uint32_t gpuaddr_lo;
391 static void
reg_gpuaddr_lo(const char * name,uint32_t dword,int level)392 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
393 {
394 gpuaddr_lo = dword;
395 }
396
397 static void
reg_dump_gpuaddr_hi(const char * name,uint32_t dword,int level)398 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
399 {
400 dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
401 }
402
403 static void
reg_dump_gpuaddr64(const char * name,uint64_t qword,int level)404 reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
405 {
406 dump_gpuaddr(qword, level);
407 }
408
409 static void
dump_shader(const char * ext,void * buf,int bufsz)410 dump_shader(const char *ext, void *buf, int bufsz)
411 {
412 if (options->dump_shaders) {
413 static int n = 0;
414 char filename[16];
415 int fd;
416 sprintf(filename, "%04d.%s", n++, ext);
417 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
418 if (fd != -1) {
419 write(fd, buf, bufsz);
420 close(fd);
421 }
422 }
423 }
424
425 static void
disasm_gpuaddr(const char * name,uint64_t gpuaddr,int level)426 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
427 {
428 void *buf;
429
430 gpuaddr &= 0xfffffffffffffff0;
431
432 if (quiet(3))
433 return;
434
435 buf = hostptr(gpuaddr);
436 if (buf) {
437 uint32_t sizedwords = hostlen(gpuaddr) / 4;
438 const char *ext;
439
440 dump_hex(buf, min(64, sizedwords), level + 1);
441 try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->gpu_id);
442
443 /* this is a bit ugly way, but oh well.. */
444 if (strstr(name, "SP_VS_OBJ")) {
445 ext = "vo3";
446 } else if (strstr(name, "SP_FS_OBJ")) {
447 ext = "fo3";
448 } else if (strstr(name, "SP_GS_OBJ")) {
449 ext = "go3";
450 } else if (strstr(name, "SP_CS_OBJ")) {
451 ext = "co3";
452 } else {
453 ext = NULL;
454 }
455
456 if (ext)
457 dump_shader(ext, buf, sizedwords * 4);
458 }
459 }
460
461 static void
reg_disasm_gpuaddr(const char * name,uint32_t dword,int level)462 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
463 {
464 disasm_gpuaddr(name, dword, level);
465 }
466
467 static void
reg_disasm_gpuaddr_hi(const char * name,uint32_t dword,int level)468 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
469 {
470 disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
471 }
472
473 static void
reg_disasm_gpuaddr64(const char * name,uint64_t qword,int level)474 reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
475 {
476 disasm_gpuaddr(name, qword, level);
477 }
478
479 /* Find the value of the TEX_COUNT register that corresponds to the named
480 * TEX_SAMP/TEX_CONST reg.
481 *
482 * Note, this kinda assumes an equal # of samplers and textures, but not
483 * really sure if there is a much better option. I suppose on a6xx we
484 * could instead decode the bitfields in SP_xS_CONFIG
485 */
486 static int
get_tex_count(const char * name)487 get_tex_count(const char *name)
488 {
489 char count_reg[strlen(name) + 5];
490 char *p;
491
492 p = strstr(name, "CONST");
493 if (!p)
494 p = strstr(name, "SAMP");
495 if (!p)
496 return 0;
497
498 int n = p - name;
499 strncpy(count_reg, name, n);
500 strcpy(count_reg + n, "COUNT");
501
502 return reg_val(regbase(count_reg));
503 }
504
505 static void
reg_dump_tex_samp_hi(const char * name,uint32_t dword,int level)506 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
507 {
508 if (!in_summary)
509 return;
510
511 int num_unit = get_tex_count(name);
512 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
513 void *buf = hostptr(gpuaddr);
514
515 if (!buf)
516 return;
517
518 dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
519 }
520
521 static void
reg_dump_tex_const_hi(const char * name,uint32_t dword,int level)522 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
523 {
524 if (!in_summary)
525 return;
526
527 int num_unit = get_tex_count(name);
528 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
529 void *buf = hostptr(gpuaddr);
530
531 if (!buf)
532 return;
533
534 dump_tex_const(buf, num_unit, level + 1);
535 }
536
537 /*
538 * Registers with special handling (rnndec_decode() handles rest):
539 */
540 #define REG(x, fxn) { #x, fxn }
541 #define REG64(x, fxn) { #x, .fxn64 = fxn, .is_reg64 = true }
542 static struct {
543 const char *regname;
544 void (*fxn)(const char *name, uint32_t dword, int level);
545 void (*fxn64)(const char *name, uint64_t qword, int level);
546 uint32_t regbase;
547 bool is_reg64;
548 } reg_a2xx[] = {
549 REG(CP_SCRATCH_REG0, reg_dump_scratch),
550 REG(CP_SCRATCH_REG1, reg_dump_scratch),
551 REG(CP_SCRATCH_REG2, reg_dump_scratch),
552 REG(CP_SCRATCH_REG3, reg_dump_scratch),
553 REG(CP_SCRATCH_REG4, reg_dump_scratch),
554 REG(CP_SCRATCH_REG5, reg_dump_scratch),
555 REG(CP_SCRATCH_REG6, reg_dump_scratch),
556 REG(CP_SCRATCH_REG7, reg_dump_scratch),
557 {NULL},
558 }, reg_a3xx[] = {
559 REG(CP_SCRATCH_REG0, reg_dump_scratch),
560 REG(CP_SCRATCH_REG1, reg_dump_scratch),
561 REG(CP_SCRATCH_REG2, reg_dump_scratch),
562 REG(CP_SCRATCH_REG3, reg_dump_scratch),
563 REG(CP_SCRATCH_REG4, reg_dump_scratch),
564 REG(CP_SCRATCH_REG5, reg_dump_scratch),
565 REG(CP_SCRATCH_REG6, reg_dump_scratch),
566 REG(CP_SCRATCH_REG7, reg_dump_scratch),
567 REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
568 REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
569 REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
570 REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
571 REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
572 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
573 {NULL},
574 }, reg_a4xx[] = {
575 REG(CP_SCRATCH[0].REG, reg_dump_scratch),
576 REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
577 REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
578 REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
579 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
580 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
581 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
582 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
583 REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
584 REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
585 REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
586 REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
587 REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
588 REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
589 REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
590 REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
591 REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
592 REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
593 REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
594 REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
595 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
596 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
597 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
598 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
599 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
600 {NULL},
601 }, reg_a5xx[] = {
602 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
603 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
604 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
605 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
606 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
607 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
608 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
609 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
610 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
611 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
612 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
613 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
614 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
615 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
616 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
617 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
618 REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
619 REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
620 REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
621 REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
622 REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
623 REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
624 REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
625 REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
626 REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
627 REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
628 REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
629 REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
630 REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
631 REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
632 REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
633 REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
634 REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
635 REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
636 REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
637 REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
638 REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
639 REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
640 REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
641 REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
642 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
643 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
644 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
645 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
646 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
647 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
648 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
649 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
650 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
651 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
652 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
653 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
654 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
655 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
656 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
657 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
658 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
659 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
660 // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
661 // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
662 // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
663 // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
664 // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
665 // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
666 // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
667 // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
668 // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
669 // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
670
671 // REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
672 // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
673 // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
674 // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
675 // REG(RB_2D_DST_LO, reg_gpuaddr_lo),
676 // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
677 // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
678 // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
679
680 {NULL},
681 }, reg_a6xx[] = {
682 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
683 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
684 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
685 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
686
687 REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
688 REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
689 REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
690 REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
691 REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
692 REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
693
694 REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
695 REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
696 REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
697 REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
698 REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
699 REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
700 REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
701 REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
702 REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
703 REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
704 REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
705 REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
706
707 {NULL},
708 }, *type0_reg;
709
710 static struct rnn *rnn;
711
712 static void
init_rnn(const char * gpuname)713 init_rnn(const char *gpuname)
714 {
715 rnn = rnn_new(!options->color);
716
717 rnn_load(rnn, gpuname);
718
719 if (options->querystrs) {
720 int i;
721 queryvals = calloc(options->nquery, sizeof(queryvals[0]));
722
723 for (i = 0; i < options->nquery; i++) {
724 int val = strtol(options->querystrs[i], NULL, 0);
725
726 if (val == 0)
727 val = regbase(options->querystrs[i]);
728
729 queryvals[i] = val;
730 printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
731 }
732 }
733
734 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
735 type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
736 if (!type0_reg[idx].regbase) {
737 printf("invalid register name: %s\n", type0_reg[idx].regname);
738 exit(1);
739 }
740 }
741 }
742
743 void
reset_regs(void)744 reset_regs(void)
745 {
746 clear_written();
747 clear_lastvals();
748 memset(&ibs, 0, sizeof(ibs));
749 }
750
751 void
cffdec_init(const struct cffdec_options * _options)752 cffdec_init(const struct cffdec_options *_options)
753 {
754 options = _options;
755 summary = options->summary;
756
757 /* in case we're decoding multiple files: */
758 free(queryvals);
759 reset_regs();
760 draw_count = 0;
761
762 /* TODO we need an API to free/cleanup any previous rnn */
763
764 switch (options->gpu_id) {
765 case 200 ... 299:
766 type0_reg = reg_a2xx;
767 init_rnn("a2xx");
768 break;
769 case 300 ... 399:
770 type0_reg = reg_a3xx;
771 init_rnn("a3xx");
772 break;
773 case 400 ... 499:
774 type0_reg = reg_a4xx;
775 init_rnn("a4xx");
776 break;
777 case 500 ... 599:
778 type0_reg = reg_a5xx;
779 init_rnn("a5xx");
780 break;
781 case 600 ... 699:
782 type0_reg = reg_a6xx;
783 init_rnn("a6xx");
784 break;
785 default:
786 errx(-1, "unsupported gpu");
787 }
788 }
789
790 const char *
pktname(unsigned opc)791 pktname(unsigned opc)
792 {
793 return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
794 }
795
796 const char *
regname(uint32_t regbase,int color)797 regname(uint32_t regbase, int color)
798 {
799 return rnn_regname(rnn, regbase, color);
800 }
801
802 uint32_t
regbase(const char * name)803 regbase(const char *name)
804 {
805 return rnn_regbase(rnn, name);
806 }
807
808 static int
endswith(uint32_t regbase,const char * suffix)809 endswith(uint32_t regbase, const char *suffix)
810 {
811 const char *name = regname(regbase, 0);
812 const char *s = strstr(name, suffix);
813 if (!s)
814 return 0;
815 return (s - strlen(name) + strlen(suffix)) == name;
816 }
817
818 void
dump_register_val(uint32_t regbase,uint32_t dword,int level)819 dump_register_val(uint32_t regbase, uint32_t dword, int level)
820 {
821 struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
822
823 if (info && info->typeinfo) {
824 uint64_t gpuaddr = 0;
825 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
826 printf("%s%s: %s", levels[level], info->name, decoded);
827
828 /* Try and figure out if we are looking at a gpuaddr.. this
829 * might be useful for other gen's too, but at least a5xx has
830 * the _HI/_LO suffix we can look for. Maybe a better approach
831 * would be some special annotation in the xml..
832 * for a6xx use "address" and "waddress" types
833 */
834 if (options->gpu_id >= 600) {
835 if (!strcmp(info->typeinfo->name, "address") ||
836 !strcmp(info->typeinfo->name, "waddress")) {
837 gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
838 }
839 } else if (options->gpu_id >= 500) {
840 if (endswith(regbase, "_HI") && endswith(regbase - 1, "_LO")) {
841 gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase - 1);
842 } else if (endswith(regbase, "_LO") && endswith(regbase + 1, "_HI")) {
843 gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
844 }
845 }
846
847 if (gpuaddr && hostptr(gpuaddr)) {
848 printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
849 gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
850 hostlen(gpubaseaddr(gpuaddr)));
851 }
852
853 printf("\n");
854
855 free(decoded);
856 } else if (info) {
857 printf("%s%s: %08x\n", levels[level], info->name, dword);
858 } else {
859 printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
860 }
861
862 if (info) {
863 free(info->name);
864 free(info);
865 }
866 }
867
868 static void
dump_register(uint32_t regbase,uint32_t dword,int level)869 dump_register(uint32_t regbase, uint32_t dword, int level)
870 {
871 if (!quiet(3)) {
872 dump_register_val(regbase, dword, level);
873 }
874
875 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
876 if (type0_reg[idx].regbase == regbase) {
877 if (type0_reg[idx].is_reg64) {
878 uint64_t qword = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
879 type0_reg[idx].fxn64(type0_reg[idx].regname, qword, level);
880 } else {
881 type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
882 }
883 break;
884 }
885 }
886 }
887
888 static bool
is_banked_reg(uint32_t regbase)889 is_banked_reg(uint32_t regbase)
890 {
891 return (0x2000 <= regbase) && (regbase < 0x2400);
892 }
893
894 static void
dump_registers(uint32_t regbase,uint32_t * dwords,uint32_t sizedwords,int level)895 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
896 int level)
897 {
898 while (sizedwords--) {
899 int last_summary = summary;
900
901 /* access to non-banked registers needs a WFI:
902 * TODO banked register range for a2xx??
903 */
904 if (needs_wfi && !is_banked_reg(regbase))
905 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
906
907 reg_set(regbase, *dwords);
908 dump_register(regbase, *dwords, level);
909 regbase++;
910 dwords++;
911 summary = last_summary;
912 }
913 }
914
915 static void
dump_domain(uint32_t * dwords,uint32_t sizedwords,int level,const char * name)916 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
917 {
918 struct rnndomain *dom;
919 int i;
920
921 dom = rnn_finddomain(rnn->db, name);
922
923 if (!dom)
924 return;
925
926 if (script_packet)
927 script_packet(dwords, sizedwords, rnn, dom);
928
929 if (quiet(2))
930 return;
931
932 for (i = 0; i < sizedwords; i++) {
933 struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
934 char *decoded;
935 if (!(info && info->typeinfo))
936 break;
937 uint64_t value = dwords[i];
938 if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
939 value |= (uint64_t)dwords[i + 1] << 32;
940 i++; /* skip the next dword since we're printing it now */
941 }
942 decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
943 /* Unlike the register printing path, we don't print the name
944 * of the register, so if it doesn't contain other named
945 * things (i.e. it isn't a bitset) then print the register
946 * name as if it's a bitset with a single entry. This avoids
947 * having to create a dummy register with a single entry to
948 * get a name in the decoding.
949 */
950 if (info->typeinfo->type == RNN_TTYPE_BITSET ||
951 info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
952 printf("%s%s\n", levels[level], decoded);
953 } else {
954 printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
955 info->name, rnn->vc->colors->reset, decoded);
956 }
957 free(decoded);
958 free(info->name);
959 free(info);
960 }
961 }
962
963 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
964 static unsigned mode;
965 static const char *render_mode;
966 static enum {
967 MODE_BINNING = 0x1,
968 MODE_GMEM = 0x2,
969 MODE_BYPASS = 0x4,
970 MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
971 } enable_mask = MODE_ALL;
972 static bool skip_ib2_enable_global;
973 static bool skip_ib2_enable_local;
974
975 static void
print_mode(int level)976 print_mode(int level)
977 {
978 if ((options->gpu_id >= 500) && !quiet(2)) {
979 printf("%smode: %s\n", levels[level], render_mode);
980 printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
981 skip_ib2_enable_local);
982 }
983 }
984
985 static bool
skip_query(void)986 skip_query(void)
987 {
988 switch (options->query_mode) {
989 case QUERY_ALL:
990 /* never skip: */
991 return false;
992 case QUERY_WRITTEN:
993 for (int i = 0; i < options->nquery; i++) {
994 uint32_t regbase = queryvals[i];
995 if (!reg_written(regbase)) {
996 continue;
997 }
998 if (reg_rewritten(regbase)) {
999 return false;
1000 }
1001 }
1002 return true;
1003 case QUERY_DELTA:
1004 for (int i = 0; i < options->nquery; i++) {
1005 uint32_t regbase = queryvals[i];
1006 if (!reg_written(regbase)) {
1007 continue;
1008 }
1009 uint32_t lastval = reg_val(regbase);
1010 if (lastval != lastvals[regbase]) {
1011 return false;
1012 }
1013 }
1014 return true;
1015 }
1016 return true;
1017 }
1018
1019 static void
__do_query(const char * primtype,uint32_t num_indices)1020 __do_query(const char *primtype, uint32_t num_indices)
1021 {
1022 int n = 0;
1023
1024 if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
1025 uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1026 uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1027
1028 bin_x1 = scissor_tl & 0xffff;
1029 bin_y1 = scissor_tl >> 16;
1030 bin_x2 = scissor_br & 0xffff;
1031 bin_y2 = scissor_br >> 16;
1032 }
1033
1034 for (int i = 0; i < options->nquery; i++) {
1035 uint32_t regbase = queryvals[i];
1036 if (reg_written(regbase)) {
1037 uint32_t lastval = reg_val(regbase);
1038 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1039 bin_y1, bin_x2, bin_y2, num_indices);
1040 if (options->gpu_id >= 500)
1041 printf("%s:", render_mode);
1042 printf("\t%08x", lastval);
1043 if (lastval != lastvals[regbase]) {
1044 printf("!");
1045 } else {
1046 printf(" ");
1047 }
1048 if (reg_rewritten(regbase)) {
1049 printf("+");
1050 } else {
1051 printf(" ");
1052 }
1053 dump_register_val(regbase, lastval, 0);
1054 n++;
1055 }
1056 }
1057
1058 if (n > 1)
1059 printf("\n");
1060 }
1061
1062 static void
do_query_compare(const char * primtype,uint32_t num_indices)1063 do_query_compare(const char *primtype, uint32_t num_indices)
1064 {
1065 unsigned saved_enable_mask = enable_mask;
1066 const char *saved_render_mode = render_mode;
1067
1068 /* in 'query-compare' mode, we want to see if the register is writtten
1069 * or changed in any mode:
1070 *
1071 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1072 * is written with different values in binning vs sysmem/gmem mode, as
1073 * we don't track previous values per-mode, but I think we can live with
1074 * that)
1075 */
1076 enable_mask = MODE_ALL;
1077
1078 clear_rewritten();
1079 load_all_groups(0);
1080
1081 if (!skip_query()) {
1082 /* dump binning pass values: */
1083 enable_mask = MODE_BINNING;
1084 render_mode = "BINNING";
1085 clear_rewritten();
1086 load_all_groups(0);
1087 __do_query(primtype, num_indices);
1088
1089 /* dump draw pass values: */
1090 enable_mask = MODE_GMEM | MODE_BYPASS;
1091 render_mode = "DRAW";
1092 clear_rewritten();
1093 load_all_groups(0);
1094 __do_query(primtype, num_indices);
1095
1096 printf("\n");
1097 }
1098
1099 enable_mask = saved_enable_mask;
1100 render_mode = saved_render_mode;
1101
1102 disable_all_groups();
1103 }
1104
1105 /* well, actually query and script..
1106 * NOTE: call this before dump_register_summary()
1107 */
1108 static void
do_query(const char * primtype,uint32_t num_indices)1109 do_query(const char *primtype, uint32_t num_indices)
1110 {
1111 if (script_draw)
1112 script_draw(primtype, num_indices);
1113
1114 if (options->query_compare) {
1115 do_query_compare(primtype, num_indices);
1116 return;
1117 }
1118
1119 if (skip_query())
1120 return;
1121
1122 __do_query(primtype, num_indices);
1123 }
1124
1125 static void
cp_im_loadi(uint32_t * dwords,uint32_t sizedwords,int level)1126 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1127 {
1128 uint32_t start = dwords[1] >> 16;
1129 uint32_t size = dwords[1] & 0xffff;
1130 const char *type = NULL, *ext = NULL;
1131 gl_shader_stage disasm_type;
1132
1133 switch (dwords[0]) {
1134 case 0:
1135 type = "vertex";
1136 ext = "vo";
1137 disasm_type = MESA_SHADER_VERTEX;
1138 break;
1139 case 1:
1140 type = "fragment";
1141 ext = "fo";
1142 disasm_type = MESA_SHADER_FRAGMENT;
1143 break;
1144 default:
1145 type = "<unknown>";
1146 disasm_type = 0;
1147 break;
1148 }
1149
1150 printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1151 size);
1152 disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1153
1154 /* dump raw shader: */
1155 if (ext)
1156 dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1157 }
1158
1159 static void
cp_wide_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)1160 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1161 {
1162 uint32_t reg = dwords[0] & 0xffff;
1163 int i;
1164 for (i = 1; i < sizedwords; i++) {
1165 dump_register(reg, dwords[i], level + 1);
1166 reg_set(reg, dwords[i]);
1167 reg++;
1168 }
1169 }
1170
1171 enum state_t {
1172 TEX_SAMP = 1,
1173 TEX_CONST,
1174 TEX_MIPADDR, /* a3xx only */
1175 SHADER_PROG,
1176 SHADER_CONST,
1177
1178 // image/ssbo state:
1179 SSBO_0,
1180 SSBO_1,
1181 SSBO_2,
1182
1183 UBO,
1184
1185 // unknown things, just to hexdumps:
1186 UNKNOWN_DWORDS,
1187 UNKNOWN_2DWORDS,
1188 UNKNOWN_4DWORDS,
1189 };
1190
1191 enum adreno_state_block {
1192 SB_VERT_TEX = 0,
1193 SB_VERT_MIPADDR = 1,
1194 SB_FRAG_TEX = 2,
1195 SB_FRAG_MIPADDR = 3,
1196 SB_VERT_SHADER = 4,
1197 SB_GEOM_SHADER = 5,
1198 SB_FRAG_SHADER = 6,
1199 SB_COMPUTE_SHADER = 7,
1200 };
1201
1202 /* TODO there is probably a clever way to let rnndec parse things so
1203 * we don't have to care about packet format differences across gens
1204 */
1205
1206 static void
a3xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1207 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1208 enum state_t *state, enum state_src_t *src)
1209 {
1210 unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1211 unsigned state_type = dwords[1] & 0x3;
1212 static const struct {
1213 gl_shader_stage stage;
1214 enum state_t state;
1215 } lookup[0xf][0x3] = {
1216 [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1217 [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1218 [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1219 [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1220 [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1221 [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1222 [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1223 [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1224 };
1225
1226 *stage = lookup[state_block_id][state_type].stage;
1227 *state = lookup[state_block_id][state_type].state;
1228 unsigned state_src = (dwords[0] >> 16) & 0x7;
1229 if (state_src == 0 /* SS_DIRECT */)
1230 *src = STATE_SRC_DIRECT;
1231 else
1232 *src = STATE_SRC_INDIRECT;
1233 }
1234
1235 static enum state_src_t
_get_state_src(unsigned dword0)1236 _get_state_src(unsigned dword0)
1237 {
1238 switch ((dword0 >> 16) & 0x3) {
1239 case 0: /* SS4_DIRECT / SS6_DIRECT */
1240 return STATE_SRC_DIRECT;
1241 case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1242 return STATE_SRC_INDIRECT;
1243 case 1: /* SS6_BINDLESS */
1244 return STATE_SRC_BINDLESS;
1245 default:
1246 return STATE_SRC_DIRECT;
1247 }
1248 }
1249
1250 static void
_get_state_type(unsigned state_block_id,unsigned state_type,gl_shader_stage * stage,enum state_t * state)1251 _get_state_type(unsigned state_block_id, unsigned state_type,
1252 gl_shader_stage *stage, enum state_t *state)
1253 {
1254 static const struct {
1255 gl_shader_stage stage;
1256 enum state_t state;
1257 } lookup[0x10][0x4] = {
1258 // SB4_VS_TEX:
1259 [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1260 [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1261 [0x0][2] = {MESA_SHADER_VERTEX, UBO},
1262 // SB4_HS_TEX:
1263 [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1264 [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1265 [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1266 // SB4_DS_TEX:
1267 [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1268 [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1269 [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1270 // SB4_GS_TEX:
1271 [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1272 [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1273 [0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1274 // SB4_FS_TEX:
1275 [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1276 [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1277 [0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1278 // SB4_CS_TEX:
1279 [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1280 [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1281 [0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1282 // SB4_VS_SHADER:
1283 [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1284 [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1285 [0x8][2] = {MESA_SHADER_VERTEX, UBO},
1286 // SB4_HS_SHADER
1287 [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1288 [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1289 [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1290 // SB4_DS_SHADER
1291 [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1292 [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1293 [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1294 // SB4_GS_SHADER
1295 [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1296 [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1297 [0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1298 // SB4_FS_SHADER:
1299 [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1300 [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1301 [0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1302 // SB4_CS_SHADER:
1303 [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1304 [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1305 [0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1306 [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1307 // SB4_SSBO (shared across all stages)
1308 [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1309 [0xe][1] = {0, SSBO_1},
1310 [0xe][2] = {0, SSBO_2},
1311 // SB4_CS_SSBO
1312 [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1313 [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1314 [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1315 // unknown things
1316 /* This looks like combined UBO state for 3d stages (a5xx and
1317 * before?? I think a6xx has UBO state per shader stage:
1318 */
1319 [0x6][2] = {0, UBO},
1320 [0x7][1] = {0, UNKNOWN_2DWORDS},
1321 };
1322
1323 *stage = lookup[state_block_id][state_type].stage;
1324 *state = lookup[state_block_id][state_type].state;
1325 }
1326
1327 static void
a4xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1328 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1329 enum state_t *state, enum state_src_t *src)
1330 {
1331 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1332 unsigned state_type = dwords[1] & 0x3;
1333 _get_state_type(state_block_id, state_type, stage, state);
1334 *src = _get_state_src(dwords[0]);
1335 }
1336
1337 static void
a6xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1338 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1339 enum state_t *state, enum state_src_t *src)
1340 {
1341 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1342 unsigned state_type = (dwords[0] >> 14) & 0x3;
1343 _get_state_type(state_block_id, state_type, stage, state);
1344 *src = _get_state_src(dwords[0]);
1345 }
1346
1347 static void
dump_tex_samp(uint32_t * texsamp,enum state_src_t src,int num_unit,int level)1348 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1349 {
1350 for (int i = 0; i < num_unit; i++) {
1351 /* work-around to reduce noise for opencl blob which always
1352 * writes the max # regardless of # of textures used
1353 */
1354 if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1355 break;
1356
1357 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1358 dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1359 dump_hex(texsamp, 2, level + 1);
1360 texsamp += 2;
1361 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1362 dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1363 dump_hex(texsamp, 2, level + 1);
1364 texsamp += 2;
1365 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1366 dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1367 dump_hex(texsamp, 4, level + 1);
1368 texsamp += 4;
1369 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1370 dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1371 dump_hex(texsamp, 4, level + 1);
1372 texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1373 }
1374 }
1375 }
1376
1377 static void
dump_tex_const(uint32_t * texconst,int num_unit,int level)1378 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1379 {
1380 for (int i = 0; i < num_unit; i++) {
1381 /* work-around to reduce noise for opencl blob which always
1382 * writes the max # regardless of # of textures used
1383 */
1384 if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1385 (texconst[2] == 0) && (texconst[3] == 0))
1386 break;
1387
1388 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1389 dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1390 dump_hex(texconst, 4, level + 1);
1391 texconst += 4;
1392 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1393 dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1394 if (options->dump_textures) {
1395 uint32_t addr = texconst[4] & ~0x1f;
1396 dump_gpuaddr(addr, level - 2);
1397 }
1398 dump_hex(texconst, 8, level + 1);
1399 texconst += 8;
1400 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1401 dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1402 if (options->dump_textures) {
1403 uint64_t addr =
1404 (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1405 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1406 }
1407 dump_hex(texconst, 12, level + 1);
1408 texconst += 12;
1409 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1410 dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1411 if (options->dump_textures) {
1412 uint64_t addr =
1413 (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1414 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1415 }
1416 dump_hex(texconst, 16, level + 1);
1417 texconst += 16;
1418 }
1419 }
1420 }
1421
1422 static void
cp_load_state(uint32_t * dwords,uint32_t sizedwords,int level)1423 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1424 {
1425 gl_shader_stage stage;
1426 enum state_t state;
1427 enum state_src_t src;
1428 uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1429 uint64_t ext_src_addr;
1430 void *contents;
1431 int i;
1432
1433 if (quiet(2) && !options->script)
1434 return;
1435
1436 if (options->gpu_id >= 600)
1437 a6xx_get_state_type(dwords, &stage, &state, &src);
1438 else if (options->gpu_id >= 400)
1439 a4xx_get_state_type(dwords, &stage, &state, &src);
1440 else
1441 a3xx_get_state_type(dwords, &stage, &state, &src);
1442
1443 switch (src) {
1444 case STATE_SRC_DIRECT:
1445 ext_src_addr = 0;
1446 break;
1447 case STATE_SRC_INDIRECT:
1448 if (is_64b()) {
1449 ext_src_addr = dwords[1] & 0xfffffffc;
1450 ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1451 } else {
1452 ext_src_addr = dwords[1] & 0xfffffffc;
1453 }
1454
1455 break;
1456 case STATE_SRC_BINDLESS: {
1457 const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1458 ? regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR")
1459 : regbase("HLSQ_BINDLESS_BASE[0].ADDR");
1460
1461 if (is_64b()) {
1462 const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1463 ext_src_addr = reg_val(reg) & 0xfffffffc;
1464 ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1465 } else {
1466 const unsigned reg = base_reg + (dwords[1] >> 28);
1467 ext_src_addr = reg_val(reg) & 0xfffffffc;
1468 }
1469
1470 ext_src_addr += 4 * (dwords[1] & 0xffffff);
1471 break;
1472 }
1473 }
1474
1475 if (ext_src_addr)
1476 contents = hostptr(ext_src_addr);
1477 else
1478 contents = is_64b() ? dwords + 3 : dwords + 2;
1479
1480 if (!contents)
1481 return;
1482
1483 switch (state) {
1484 case SHADER_PROG: {
1485 const char *ext = NULL;
1486
1487 if (quiet(2))
1488 return;
1489
1490 if (options->gpu_id >= 400)
1491 num_unit *= 16;
1492 else if (options->gpu_id >= 300)
1493 num_unit *= 4;
1494
1495 /* shaders:
1496 *
1497 * note: num_unit seems to be # of instruction groups, where
1498 * an instruction group has 4 64bit instructions.
1499 */
1500 if (stage == MESA_SHADER_VERTEX) {
1501 ext = "vo3";
1502 } else if (stage == MESA_SHADER_GEOMETRY) {
1503 ext = "go3";
1504 } else if (stage == MESA_SHADER_COMPUTE) {
1505 ext = "co3";
1506 } else if (stage == MESA_SHADER_FRAGMENT) {
1507 ext = "fo3";
1508 }
1509
1510 if (contents)
1511 try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1512 options->gpu_id);
1513
1514 /* dump raw shader: */
1515 if (ext)
1516 dump_shader(ext, contents, num_unit * 2 * 4);
1517
1518 break;
1519 }
1520 case SHADER_CONST: {
1521 if (quiet(2))
1522 return;
1523
1524 /* uniforms/consts:
1525 *
1526 * note: num_unit seems to be # of pairs of dwords??
1527 */
1528
1529 if (options->gpu_id >= 400)
1530 num_unit *= 2;
1531
1532 dump_float(contents, num_unit * 2, level + 1);
1533 dump_hex(contents, num_unit * 2, level + 1);
1534
1535 break;
1536 }
1537 case TEX_MIPADDR: {
1538 uint32_t *addrs = contents;
1539
1540 if (quiet(2))
1541 return;
1542
1543 /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1544 for (i = 0; i < num_unit; i++) {
1545 void *ptr = hostptr(addrs[i]);
1546 printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1547 if (options->dump_textures) {
1548 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1549 dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1550 }
1551 }
1552 break;
1553 }
1554 case TEX_SAMP: {
1555 dump_tex_samp(contents, src, num_unit, level);
1556 break;
1557 }
1558 case TEX_CONST: {
1559 dump_tex_const(contents, num_unit, level);
1560 break;
1561 }
1562 case SSBO_0: {
1563 uint32_t *ssboconst = (uint32_t *)contents;
1564
1565 for (i = 0; i < num_unit; i++) {
1566 int sz = 4;
1567 if (400 <= options->gpu_id && options->gpu_id < 500) {
1568 dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1569 } else if (500 <= options->gpu_id && options->gpu_id < 600) {
1570 dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1571 } else if (600 <= options->gpu_id && options->gpu_id < 700) {
1572 sz = 16;
1573 dump_domain(ssboconst, 16, level + 2, "A6XX_TEX_CONST");
1574 }
1575 dump_hex(ssboconst, sz, level + 1);
1576 ssboconst += sz;
1577 }
1578 break;
1579 }
1580 case SSBO_1: {
1581 uint32_t *ssboconst = (uint32_t *)contents;
1582
1583 for (i = 0; i < num_unit; i++) {
1584 if (400 <= options->gpu_id && options->gpu_id < 500)
1585 dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1586 else if (500 <= options->gpu_id && options->gpu_id < 600)
1587 dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1588 dump_hex(ssboconst, 2, level + 1);
1589 ssboconst += 2;
1590 }
1591 break;
1592 }
1593 case SSBO_2: {
1594 uint32_t *ssboconst = (uint32_t *)contents;
1595
1596 for (i = 0; i < num_unit; i++) {
1597 /* TODO a4xx and a5xx might be same: */
1598 if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1599 dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1600 dump_hex(ssboconst, 2, level + 1);
1601 }
1602 if (options->dump_textures) {
1603 uint64_t addr =
1604 (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1605 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1606 }
1607 ssboconst += 2;
1608 }
1609 break;
1610 }
1611 case UBO: {
1612 uint32_t *uboconst = (uint32_t *)contents;
1613
1614 for (i = 0; i < num_unit; i++) {
1615 // TODO probably similar on a4xx..
1616 if (500 <= options->gpu_id && options->gpu_id < 600)
1617 dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1618 else if (600 <= options->gpu_id && options->gpu_id < 700)
1619 dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1620 dump_hex(uboconst, 2, level + 1);
1621 uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1622 }
1623 break;
1624 }
1625 case UNKNOWN_DWORDS: {
1626 if (quiet(2))
1627 return;
1628 dump_hex(contents, num_unit, level + 1);
1629 break;
1630 }
1631 case UNKNOWN_2DWORDS: {
1632 if (quiet(2))
1633 return;
1634 dump_hex(contents, num_unit * 2, level + 1);
1635 break;
1636 }
1637 case UNKNOWN_4DWORDS: {
1638 if (quiet(2))
1639 return;
1640 dump_hex(contents, num_unit * 4, level + 1);
1641 break;
1642 }
1643 default:
1644 if (quiet(2))
1645 return;
1646 /* hmm.. */
1647 dump_hex(contents, num_unit, level + 1);
1648 break;
1649 }
1650 }
1651
1652 static void
cp_set_bin(uint32_t * dwords,uint32_t sizedwords,int level)1653 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1654 {
1655 bin_x1 = dwords[1] & 0xffff;
1656 bin_y1 = dwords[1] >> 16;
1657 bin_x2 = dwords[2] & 0xffff;
1658 bin_y2 = dwords[2] >> 16;
1659 }
1660
1661 static void
dump_a2xx_tex_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1662 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1663 int level)
1664 {
1665 uint32_t w, h, p;
1666 uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1667 uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1668 static const char *filter[] = {
1669 "point",
1670 "bilinear",
1671 "bicubic",
1672 };
1673 static const char *clamp[] = {
1674 "wrap",
1675 "mirror",
1676 "clamp-last-texel",
1677 };
1678 static const char swiznames[] = "xyzw01??";
1679
1680 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1681
1682 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1683 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1684 */
1685 p = (dwords[0] >> 22) << 5;
1686 clamp_x = (dwords[0] >> 10) & 0x3;
1687 clamp_y = (dwords[0] >> 13) & 0x3;
1688 clamp_z = (dwords[0] >> 16) & 0x3;
1689
1690 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1691 * NearestClamp=1:OGL Mode
1692 */
1693 parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1694
1695 /* Width, Height, EndianSwap=0:None */
1696 w = (dwords[2] & 0x1fff) + 1;
1697 h = ((dwords[2] >> 13) & 0x1fff) + 1;
1698
1699 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1700 * Mip=2:BaseMap
1701 */
1702 mag = (dwords[3] >> 19) & 0x3;
1703 min = (dwords[3] >> 21) & 0x3;
1704 swiz = (dwords[3] >> 1) & 0xfff;
1705
1706 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1707 * Dim3d=0
1708 */
1709 // XXX
1710
1711 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1712 * Dim=1:2d, MipPacking=0
1713 */
1714 parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1715
1716 printf("%sset texture const %04x\n", levels[level], val);
1717 printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1718 clamp[clamp_y], clamp[clamp_z]);
1719 printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1720 filter[mag]);
1721 printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1722 swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1723 swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1724 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1725 levels[level + 1], gpuaddr, flags, w, h, p,
1726 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1727 printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1728 mip_flags);
1729 }
1730
1731 static void
dump_a2xx_shader_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1732 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1733 int level)
1734 {
1735 int i;
1736 printf("%sset shader const %04x\n", levels[level], val);
1737 for (i = 0; i < sizedwords;) {
1738 uint32_t gpuaddr, flags;
1739 parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1740 void *addr = hostptr(gpuaddr);
1741 if (addr) {
1742 const char *fmt =
1743 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1744 uint32_t size = dwords[i++];
1745 printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1746 size, fmt);
1747 // TODO maybe dump these as bytes instead of dwords?
1748 size = (size + 3) / 4; // for now convert to dwords
1749 dump_hex(addr, min(size, 64), level + 1);
1750 if (size > min(size, 64))
1751 printf("%s\t\t...\n", levels[level + 1]);
1752 dump_float(addr, min(size, 64), level + 1);
1753 if (size > min(size, 64))
1754 printf("%s\t\t...\n", levels[level + 1]);
1755 }
1756 }
1757 }
1758
1759 static void
cp_set_const(uint32_t * dwords,uint32_t sizedwords,int level)1760 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1761 {
1762 uint32_t val = dwords[0] & 0xffff;
1763 switch ((dwords[0] >> 16) & 0xf) {
1764 case 0x0:
1765 dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1766 break;
1767 case 0x1:
1768 /* need to figure out how const space is partitioned between
1769 * attributes, textures, etc..
1770 */
1771 if (val < 0x78) {
1772 dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1773 } else {
1774 dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1775 }
1776 break;
1777 case 0x2:
1778 printf("%sset bool const %04x\n", levels[level], val);
1779 break;
1780 case 0x3:
1781 printf("%sset loop const %04x\n", levels[level], val);
1782 break;
1783 case 0x4:
1784 val += 0x2000;
1785 if (dwords[0] & 0x80000000) {
1786 uint32_t srcreg = dwords[1];
1787 uint32_t dstval = dwords[2];
1788
1789 /* TODO: not sure what happens w/ payload != 2.. */
1790 assert(sizedwords == 3);
1791 assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1792
1793 /* note: rnn_regname uses a static buf so we can't do
1794 * two regname() calls for one printf..
1795 */
1796 printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1797 printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1798
1799 dstval += type0_reg_vals[srcreg];
1800
1801 dump_registers(val, &dstval, 1, level + 1);
1802 } else {
1803 dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1804 }
1805 break;
1806 }
1807 }
1808
1809 static void dump_register_summary(int level);
1810
1811 static void
cp_event_write(uint32_t * dwords,uint32_t sizedwords,int level)1812 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1813 {
1814 const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1815 printl(2, "%sevent %s\n", levels[level], name);
1816
1817 if (name && (options->gpu_id > 500)) {
1818 char eventname[64];
1819 snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1820 if (!strcmp(name, "BLIT")) {
1821 do_query(eventname, 0);
1822 print_mode(level);
1823 dump_register_summary(level);
1824 }
1825 }
1826 }
1827
1828 static void
dump_register_summary(int level)1829 dump_register_summary(int level)
1830 {
1831 uint32_t i;
1832 bool saved_summary = summary;
1833 summary = false;
1834
1835 in_summary = true;
1836
1837 /* dump current state of registers: */
1838 printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1839 for (i = 0; i < regcnt(); i++) {
1840 uint32_t regbase = i;
1841 uint32_t lastval = reg_val(regbase);
1842 /* skip registers that haven't been updated since last draw/blit: */
1843 if (!(options->allregs || reg_rewritten(regbase)))
1844 continue;
1845 if (!reg_written(regbase))
1846 continue;
1847 if (lastval != lastvals[regbase]) {
1848 printl(2, "!");
1849 lastvals[regbase] = lastval;
1850 } else {
1851 printl(2, " ");
1852 }
1853 if (reg_rewritten(regbase)) {
1854 printl(2, "+");
1855 } else {
1856 printl(2, " ");
1857 }
1858 printl(2, "\t%08x", lastval);
1859 if (!quiet(2)) {
1860 dump_register(regbase, lastval, level);
1861 }
1862 }
1863
1864 clear_rewritten();
1865
1866 in_summary = false;
1867
1868 draw_count++;
1869 summary = saved_summary;
1870 }
1871
1872 static uint32_t
draw_indx_common(uint32_t * dwords,int level)1873 draw_indx_common(uint32_t *dwords, int level)
1874 {
1875 uint32_t prim_type = dwords[1] & 0x1f;
1876 uint32_t source_select = (dwords[1] >> 6) & 0x3;
1877 uint32_t num_indices = dwords[2];
1878 const char *primtype;
1879
1880 primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1881
1882 do_query(primtype, num_indices);
1883
1884 printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
1885 printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype, prim_type);
1886 printl(2, "%ssource_select: %s (%d)\n", levels[level],
1887 rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
1888 printl(2, "%snum_indices: %d\n", levels[level], num_indices);
1889
1890 vertices += num_indices;
1891
1892 draws[ib]++;
1893
1894 return num_indices;
1895 }
1896
1897 enum pc_di_index_size {
1898 INDEX_SIZE_IGN = 0,
1899 INDEX_SIZE_16_BIT = 0,
1900 INDEX_SIZE_32_BIT = 1,
1901 INDEX_SIZE_8_BIT = 2,
1902 INDEX_SIZE_INVALID = 0,
1903 };
1904
1905 static void
cp_draw_indx(uint32_t * dwords,uint32_t sizedwords,int level)1906 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1907 {
1908 uint32_t num_indices = draw_indx_common(dwords, level);
1909
1910 assert(!is_64b());
1911
1912 /* if we have an index buffer, dump that: */
1913 if (sizedwords == 5) {
1914 void *ptr = hostptr(dwords[3]);
1915 printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
1916 printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
1917 if (ptr) {
1918 enum pc_di_index_size size =
1919 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1920 if (!quiet(2)) {
1921 int i;
1922 printf("%sidxs: ", levels[level]);
1923 if (size == INDEX_SIZE_8_BIT) {
1924 uint8_t *idx = ptr;
1925 for (i = 0; i < dwords[4]; i++)
1926 printf(" %u", idx[i]);
1927 } else if (size == INDEX_SIZE_16_BIT) {
1928 uint16_t *idx = ptr;
1929 for (i = 0; i < dwords[4] / 2; i++)
1930 printf(" %u", idx[i]);
1931 } else if (size == INDEX_SIZE_32_BIT) {
1932 uint32_t *idx = ptr;
1933 for (i = 0; i < dwords[4] / 4; i++)
1934 printf(" %u", idx[i]);
1935 }
1936 printf("\n");
1937 dump_hex(ptr, dwords[4] / 4, level + 1);
1938 }
1939 }
1940 }
1941
1942 /* don't bother dumping registers for the dummy draw_indx's.. */
1943 if (num_indices > 0)
1944 dump_register_summary(level);
1945
1946 needs_wfi = true;
1947 }
1948
1949 static void
cp_draw_indx_2(uint32_t * dwords,uint32_t sizedwords,int level)1950 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
1951 {
1952 uint32_t num_indices = draw_indx_common(dwords, level);
1953 enum pc_di_index_size size =
1954 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1955 void *ptr = &dwords[3];
1956 int sz = 0;
1957
1958 assert(!is_64b());
1959
1960 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1961 if (!quiet(2)) {
1962 int i;
1963 printf("%sidxs: ", levels[level]);
1964 if (size == INDEX_SIZE_8_BIT) {
1965 uint8_t *idx = ptr;
1966 for (i = 0; i < num_indices; i++)
1967 printf(" %u", idx[i]);
1968 sz = num_indices;
1969 } else if (size == INDEX_SIZE_16_BIT) {
1970 uint16_t *idx = ptr;
1971 for (i = 0; i < num_indices; i++)
1972 printf(" %u", idx[i]);
1973 sz = num_indices * 2;
1974 } else if (size == INDEX_SIZE_32_BIT) {
1975 uint32_t *idx = ptr;
1976 for (i = 0; i < num_indices; i++)
1977 printf(" %u", idx[i]);
1978 sz = num_indices * 4;
1979 }
1980 printf("\n");
1981 dump_hex(ptr, sz / 4, level + 1);
1982 }
1983
1984 /* don't bother dumping registers for the dummy draw_indx's.. */
1985 if (num_indices > 0)
1986 dump_register_summary(level);
1987 }
1988
1989 static void
cp_draw_indx_offset(uint32_t * dwords,uint32_t sizedwords,int level)1990 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
1991 {
1992 uint32_t num_indices = dwords[2];
1993 uint32_t prim_type = dwords[0] & 0x1f;
1994
1995 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
1996 print_mode(level);
1997
1998 /* don't bother dumping registers for the dummy draw_indx's.. */
1999 if (num_indices > 0)
2000 dump_register_summary(level);
2001 }
2002
2003 static void
cp_draw_indx_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2004 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2005 {
2006 uint32_t prim_type = dwords[0] & 0x1f;
2007 uint64_t addr;
2008
2009 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2010 print_mode(level);
2011
2012 if (is_64b())
2013 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2014 else
2015 addr = dwords[1];
2016 dump_gpuaddr_size(addr, level, 0x10, 2);
2017
2018 if (is_64b())
2019 addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2020 else
2021 addr = dwords[3];
2022 dump_gpuaddr_size(addr, level, 0x10, 2);
2023
2024 dump_register_summary(level);
2025 }
2026
2027 static void
cp_draw_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2028 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2029 {
2030 uint32_t prim_type = dwords[0] & 0x1f;
2031 uint64_t addr;
2032
2033 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2034 print_mode(level);
2035
2036 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2037 dump_gpuaddr_size(addr, level, 0x10, 2);
2038
2039 dump_register_summary(level);
2040 }
2041
2042 static void
cp_draw_indirect_multi(uint32_t * dwords,uint32_t sizedwords,int level)2043 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2044 {
2045 uint32_t prim_type = dwords[0] & 0x1f;
2046 uint32_t count = dwords[2];
2047
2048 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2049 print_mode(level);
2050
2051 struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2052 uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2053 uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2054 uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2055
2056 if (count_dword) {
2057 uint64_t count_addr =
2058 ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2059 uint32_t *buf = hostptr(count_addr);
2060
2061 /* Don't print more draws than this if we don't know the indirect
2062 * count. It's possible the user will give ~0 or some other large
2063 * value, expecting the GPU to fill in the draw count, and we don't
2064 * want to print a gazillion draws in that case:
2065 */
2066 const uint32_t max_draw_count = 0x100;
2067
2068 /* Assume the indirect count is garbage if it's larger than this
2069 * (quite large) value or 0. Hopefully this catches most cases.
2070 */
2071 const uint32_t max_indirect_draw_count = 0x10000;
2072
2073 if (buf) {
2074 printf("%sindirect count: %u\n", levels[level], *buf);
2075 if (*buf == 0 || *buf > max_indirect_draw_count) {
2076 /* garbage value */
2077 count = min(count, max_draw_count);
2078 } else {
2079 /* not garbage */
2080 count = min(count, *buf);
2081 }
2082 } else {
2083 count = min(count, max_draw_count);
2084 }
2085 }
2086
2087 if (addr_dword && stride_dword) {
2088 uint64_t addr =
2089 ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2090 uint32_t stride = dwords[stride_dword];
2091
2092 for (unsigned i = 0; i < count; i++, addr += stride) {
2093 printf("%sdraw %d:\n", levels[level], i);
2094 dump_gpuaddr_size(addr, level, 0x10, 2);
2095 }
2096 }
2097
2098 dump_register_summary(level);
2099 }
2100
2101 static void
cp_run_cl(uint32_t * dwords,uint32_t sizedwords,int level)2102 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2103 {
2104 do_query("COMPUTE", 1);
2105 dump_register_summary(level);
2106 }
2107
2108 static void
cp_nop(uint32_t * dwords,uint32_t sizedwords,int level)2109 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2110 {
2111 const char *buf = (void *)dwords;
2112 int i;
2113
2114 if (quiet(3))
2115 return;
2116
2117 // blob doesn't use CP_NOP for string_marker but it does
2118 // use it for things that end up looking like, but aren't
2119 // ascii chars:
2120 if (!options->decode_markers)
2121 return;
2122
2123 for (i = 0; i < 4 * sizedwords; i++) {
2124 if (buf[i] == '\0')
2125 break;
2126 if (isascii(buf[i]))
2127 printf("%c", buf[i]);
2128 }
2129 printf("\n");
2130 }
2131
2132 static void
cp_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2133 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2134 {
2135 /* traverse indirect buffers */
2136 uint64_t ibaddr;
2137 uint32_t ibsize;
2138 uint32_t *ptr = NULL;
2139
2140 if (is_64b()) {
2141 /* a5xx+.. high 32b of gpu addr, then size: */
2142 ibaddr = dwords[0];
2143 ibaddr |= ((uint64_t)dwords[1]) << 32;
2144 ibsize = dwords[2];
2145 } else {
2146 ibaddr = dwords[0];
2147 ibsize = dwords[1];
2148 }
2149
2150 if (!quiet(3)) {
2151 if (is_64b()) {
2152 printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2153 } else {
2154 printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2155 }
2156 printf("%sibsize:%08x\n", levels[level], ibsize);
2157 }
2158
2159 if (options->once && has_dumped(ibaddr, enable_mask))
2160 return;
2161
2162 /* 'query-compare' mode implies 'once' mode, although we need only to
2163 * process the cmdstream for *any* enable_mask mode, since we are
2164 * comparing binning vs draw reg values at the same time, ie. it is
2165 * not useful to process the same draw in both binning and draw pass.
2166 */
2167 if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2168 return;
2169
2170 /* map gpuaddr back to hostptr: */
2171 ptr = hostptr(ibaddr);
2172
2173 if (ptr) {
2174 /* If the GPU hung within the target IB, the trigger point will be
2175 * just after the current CP_INDIRECT_BUFFER. Because the IB is
2176 * executed but never returns. Account for this by checking if
2177 * the IB returned:
2178 */
2179 highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
2180
2181 ib++;
2182 ibs[ib].base = ibaddr;
2183 ibs[ib].size = ibsize;
2184
2185 dump_commands(ptr, ibsize, level);
2186 ib--;
2187 } else {
2188 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2189 }
2190 }
2191
2192 static void
cp_start_bin(uint32_t * dwords,uint32_t sizedwords,int level)2193 cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
2194 {
2195 uint64_t ibaddr;
2196 uint32_t ibsize;
2197 uint32_t loopcount;
2198 uint32_t *ptr = NULL;
2199
2200 loopcount = dwords[0];
2201 ibaddr = dwords[1];
2202 ibaddr |= ((uint64_t)dwords[2]) << 32;
2203 ibsize = dwords[3];
2204
2205 /* map gpuaddr back to hostptr: */
2206 ptr = hostptr(ibaddr);
2207
2208 if (ptr) {
2209 /* If the GPU hung within the target IB, the trigger point will be
2210 * just after the current CP_START_BIN. Because the IB is
2211 * executed but never returns. Account for this by checking if
2212 * the IB returned:
2213 */
2214 highlight_gpuaddr(gpuaddr(&dwords[5]));
2215
2216 /* TODO: we should duplicate the body of the loop after each bin, so
2217 * that draws get the correct state. We should also figure out if there
2218 * are any registers that can tell us what bin we're in when we hang so
2219 * that crashdec points to the right place.
2220 */
2221 ib++;
2222 for (uint32_t i = 0; i < loopcount; i++) {
2223 ibs[ib].base = ibaddr;
2224 ibs[ib].size = ibsize;
2225 printf("%sbin %u\n", levels[level], i);
2226 dump_commands(ptr, ibsize, level);
2227 ibaddr += ibsize;
2228 ptr += ibsize;
2229 }
2230 ib--;
2231 } else {
2232 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2233 }
2234 }
2235
2236 static void
cp_wfi(uint32_t * dwords,uint32_t sizedwords,int level)2237 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2238 {
2239 needs_wfi = false;
2240 }
2241
2242 static void
cp_mem_write(uint32_t * dwords,uint32_t sizedwords,int level)2243 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2244 {
2245 if (quiet(2))
2246 return;
2247
2248 if (is_64b()) {
2249 uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2250 printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2251 dump_hex(&dwords[2], sizedwords - 2, level + 1);
2252
2253 if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2254 dump_commands(&dwords[2], sizedwords - 2, level + 1);
2255 } else {
2256 uint32_t gpuaddr = dwords[0];
2257 printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2258 dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2259 }
2260 }
2261
2262 static void
cp_rmw(uint32_t * dwords,uint32_t sizedwords,int level)2263 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2264 {
2265 uint32_t val = dwords[0] & 0xffff;
2266 uint32_t and = dwords[1];
2267 uint32_t or = dwords[2];
2268 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2269 and, or);
2270 if (needs_wfi)
2271 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2272 and, or);
2273 reg_set(val, (reg_val(val) & and) | or);
2274 }
2275
2276 static void
cp_reg_mem(uint32_t * dwords,uint32_t sizedwords,int level)2277 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2278 {
2279 uint32_t val = dwords[0] & 0xffff;
2280 printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2281
2282 if (quiet(2))
2283 return;
2284
2285 uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2286 printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2287 void *ptr = hostptr(gpuaddr);
2288 if (ptr) {
2289 uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2290 dump_hex(ptr, cnt, level + 1);
2291 }
2292 }
2293
2294 struct draw_state {
2295 uint16_t enable_mask;
2296 uint16_t flags;
2297 uint32_t count;
2298 uint64_t addr;
2299 };
2300
2301 struct draw_state state[32];
2302
2303 #define FLAG_DIRTY 0x1
2304 #define FLAG_DISABLE 0x2
2305 #define FLAG_DISABLE_ALL_GROUPS 0x4
2306 #define FLAG_LOAD_IMMED 0x8
2307
2308 static int draw_mode;
2309
2310 static void
disable_group(unsigned group_id)2311 disable_group(unsigned group_id)
2312 {
2313 struct draw_state *ds = &state[group_id];
2314 memset(ds, 0, sizeof(*ds));
2315 }
2316
2317 static void
disable_all_groups(void)2318 disable_all_groups(void)
2319 {
2320 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2321 disable_group(i);
2322 }
2323
2324 static void
load_group(unsigned group_id,int level)2325 load_group(unsigned group_id, int level)
2326 {
2327 struct draw_state *ds = &state[group_id];
2328
2329 if (!ds->count)
2330 return;
2331
2332 printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2333 printl(2, "%scount: %d\n", levels[level], ds->count);
2334 printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2335 printl(2, "%sflags: %x\n", levels[level], ds->flags);
2336
2337 if (options->gpu_id >= 600) {
2338 printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2339
2340 if (!(ds->enable_mask & enable_mask)) {
2341 printl(2, "%s\tskipped!\n\n", levels[level]);
2342 return;
2343 }
2344 }
2345
2346 void *ptr = hostptr(ds->addr);
2347 if (ptr) {
2348 if (!quiet(2))
2349 dump_hex(ptr, ds->count, level + 1);
2350
2351 ib++;
2352 dump_commands(ptr, ds->count, level + 1);
2353 ib--;
2354 }
2355 }
2356
2357 static void
load_all_groups(int level)2358 load_all_groups(int level)
2359 {
2360 /* sanity check, we should never recursively hit recursion here, and if
2361 * we do bad things happen:
2362 */
2363 static bool loading_groups = false;
2364 if (loading_groups) {
2365 printf("ERROR: nothing in draw state should trigger recursively loading "
2366 "groups!\n");
2367 return;
2368 }
2369 loading_groups = true;
2370 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2371 load_group(i, level);
2372 loading_groups = false;
2373
2374 /* in 'query-compare' mode, defer disabling all groups until we have a
2375 * chance to process the query:
2376 */
2377 if (!options->query_compare)
2378 disable_all_groups();
2379 }
2380
2381 static void
cp_set_draw_state(uint32_t * dwords,uint32_t sizedwords,int level)2382 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2383 {
2384 uint32_t i;
2385
2386 for (i = 0; i < sizedwords;) {
2387 struct draw_state *ds;
2388 uint32_t count = dwords[i] & 0xffff;
2389 uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2390 uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2391 uint32_t flags = (dwords[i] >> 16) & 0xf;
2392 uint64_t addr;
2393
2394 if (is_64b()) {
2395 addr = dwords[i + 1];
2396 addr |= ((uint64_t)dwords[i + 2]) << 32;
2397 i += 3;
2398 } else {
2399 addr = dwords[i + 1];
2400 i += 2;
2401 }
2402
2403 if (flags & FLAG_DISABLE_ALL_GROUPS) {
2404 disable_all_groups();
2405 continue;
2406 }
2407
2408 if (flags & FLAG_DISABLE) {
2409 disable_group(group_id);
2410 continue;
2411 }
2412
2413 assert(group_id < ARRAY_SIZE(state));
2414 disable_group(group_id);
2415
2416 ds = &state[group_id];
2417
2418 ds->enable_mask = enable_mask;
2419 ds->flags = flags;
2420 ds->count = count;
2421 ds->addr = addr;
2422
2423 if (flags & FLAG_LOAD_IMMED) {
2424 load_group(group_id, level);
2425 disable_group(group_id);
2426 }
2427 }
2428 }
2429
2430 static void
cp_set_mode(uint32_t * dwords,uint32_t sizedwords,int level)2431 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2432 {
2433 draw_mode = dwords[0];
2434 }
2435
2436 /* execute compute shader */
2437 static void
cp_exec_cs(uint32_t * dwords,uint32_t sizedwords,int level)2438 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2439 {
2440 do_query("compute", 0);
2441 dump_register_summary(level);
2442 }
2443
2444 static void
cp_exec_cs_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2445 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2446 {
2447 uint64_t addr;
2448
2449 if (is_64b()) {
2450 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2451 } else {
2452 addr = dwords[1];
2453 }
2454
2455 printl(3, "%saddr: %016llx\n", levels[level], addr);
2456 dump_gpuaddr_size(addr, level, 0x10, 2);
2457
2458 do_query("compute", 0);
2459 dump_register_summary(level);
2460 }
2461
2462 static void
cp_set_marker(uint32_t * dwords,uint32_t sizedwords,int level)2463 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2464 {
2465 render_mode = rnn_enumname(rnn, "a6xx_marker", dwords[0] & 0xf);
2466
2467 if (!strcmp(render_mode, "RM6_BINNING")) {
2468 enable_mask = MODE_BINNING;
2469 } else if (!strcmp(render_mode, "RM6_GMEM")) {
2470 enable_mask = MODE_GMEM;
2471 } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2472 enable_mask = MODE_BYPASS;
2473 }
2474 }
2475
2476 static void
cp_set_render_mode(uint32_t * dwords,uint32_t sizedwords,int level)2477 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2478 {
2479 uint64_t addr;
2480 uint32_t *ptr, len;
2481
2482 assert(is_64b());
2483
2484 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2485 * not sure if this can come in different sizes.
2486 *
2487 * First ptr doesn't seem to be cmdstream, second one does.
2488 *
2489 * Comment from downstream kernel:
2490 *
2491 * SRM -- set render mode (ex binning, direct render etc)
2492 * SRM is set by UMD usually at start of IB to tell CP the type of
2493 * preemption.
2494 * KMD needs to set SRM to NULL to indicate CP that rendering is
2495 * done by IB.
2496 * ------------------------------------------------------------------
2497 *
2498 * Seems to always be one of these two:
2499 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2500 * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2501 * 001c2000 00000000
2502 *
2503 */
2504
2505 assert(options->gpu_id >= 500);
2506
2507 render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2508
2509 if (sizedwords == 1)
2510 return;
2511
2512 addr = dwords[1];
2513 addr |= ((uint64_t)dwords[2]) << 32;
2514
2515 mode = dwords[3];
2516
2517 dump_gpuaddr(addr, level + 1);
2518
2519 if (sizedwords == 5)
2520 return;
2521
2522 assert(sizedwords == 8);
2523
2524 len = dwords[5];
2525 addr = dwords[6];
2526 addr |= ((uint64_t)dwords[7]) << 32;
2527
2528 printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2529 printl(3, "%slen: 0x%x\n", levels[level], len);
2530
2531 ptr = hostptr(addr);
2532
2533 if (ptr) {
2534 if (!quiet(2)) {
2535 ib++;
2536 dump_commands(ptr, len, level + 1);
2537 ib--;
2538 dump_hex(ptr, len, level + 1);
2539 }
2540 }
2541 }
2542
2543 static void
cp_compute_checkpoint(uint32_t * dwords,uint32_t sizedwords,int level)2544 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2545 {
2546 uint64_t addr;
2547 uint32_t *ptr, len;
2548
2549 assert(is_64b());
2550 assert(options->gpu_id >= 500);
2551
2552 assert(sizedwords == 8);
2553
2554 addr = dwords[5];
2555 addr |= ((uint64_t)dwords[6]) << 32;
2556 len = dwords[7];
2557
2558 printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2559 printl(3, "%slen: 0x%x\n", levels[level], len);
2560
2561 ptr = hostptr(addr);
2562
2563 if (ptr) {
2564 if (!quiet(2)) {
2565 ib++;
2566 dump_commands(ptr, len, level + 1);
2567 ib--;
2568 dump_hex(ptr, len, level + 1);
2569 }
2570 }
2571 }
2572
2573 static void
cp_blit(uint32_t * dwords,uint32_t sizedwords,int level)2574 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2575 {
2576 do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2577 print_mode(level);
2578 dump_register_summary(level);
2579 }
2580
2581 static void
cp_context_reg_bunch(uint32_t * dwords,uint32_t sizedwords,int level)2582 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2583 {
2584 int i;
2585
2586 /* NOTE: seems to write same reg multiple times.. not sure if different parts
2587 * of these are triggered by the FLUSH_SO_n events?? (if that is what they
2588 * actually are?)
2589 */
2590 bool saved_summary = summary;
2591 summary = false;
2592
2593 for (i = 0; i < sizedwords; i += 2) {
2594 dump_register(dwords[i + 0], dwords[i + 1], level + 1);
2595 reg_set(dwords[i + 0], dwords[i + 1]);
2596 }
2597
2598 summary = saved_summary;
2599 }
2600
2601 static void
cp_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)2602 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2603 {
2604 uint32_t reg = dwords[1] & 0xffff;
2605
2606 dump_register(reg, dwords[2], level + 1);
2607 reg_set(reg, dwords[2]);
2608 }
2609
2610 static void
cp_set_ctxswitch_ib(uint32_t * dwords,uint32_t sizedwords,int level)2611 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2612 {
2613 uint64_t addr;
2614 uint32_t size = dwords[2] & 0xffff;
2615 void *ptr;
2616
2617 addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2618
2619 if (!quiet(3)) {
2620 printf("%saddr=%" PRIx64 "\n", levels[level], addr);
2621 }
2622
2623 ptr = hostptr(addr);
2624 if (ptr) {
2625 dump_commands(ptr, size, level + 1);
2626 }
2627 }
2628
2629 static void
cp_skip_ib2_enable_global(uint32_t * dwords,uint32_t sizedwords,int level)2630 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2631 {
2632 skip_ib2_enable_global = dwords[0];
2633 }
2634
2635 static void
cp_skip_ib2_enable_local(uint32_t * dwords,uint32_t sizedwords,int level)2636 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2637 {
2638 skip_ib2_enable_local = dwords[0];
2639 }
2640
2641 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2642 static const struct type3_op {
2643 const char *name;
2644 void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2645 struct {
2646 bool load_all_groups;
2647 } options;
2648 } type3_op[] = {
2649 CP(NOP, cp_nop),
2650 CP(INDIRECT_BUFFER, cp_indirect),
2651 CP(INDIRECT_BUFFER_PFD, cp_indirect),
2652 CP(WAIT_FOR_IDLE, cp_wfi),
2653 CP(REG_RMW, cp_rmw),
2654 CP(REG_TO_MEM, cp_reg_mem),
2655 CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2656 CP(MEM_WRITE, cp_mem_write),
2657 CP(EVENT_WRITE, cp_event_write),
2658 CP(RUN_OPENCL, cp_run_cl),
2659 CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2660 CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2661 CP(SET_CONSTANT, cp_set_const),
2662 CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2663 CP(WIDE_REG_WRITE, cp_wide_reg_write),
2664
2665 /* for a3xx */
2666 CP(LOAD_STATE, cp_load_state),
2667 CP(SET_BIN, cp_set_bin),
2668
2669 /* for a4xx */
2670 CP(LOAD_STATE4, cp_load_state),
2671 CP(SET_DRAW_STATE, cp_set_draw_state),
2672 CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2673 CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2674 CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2675
2676 /* for a5xx */
2677 CP(SET_RENDER_MODE, cp_set_render_mode),
2678 CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2679 CP(BLIT, cp_blit),
2680 CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2681 CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2682 CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2683 CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2684 CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2685 CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2686
2687 /* for a6xx */
2688 CP(LOAD_STATE6_GEOM, cp_load_state),
2689 CP(LOAD_STATE6_FRAG, cp_load_state),
2690 CP(LOAD_STATE6, cp_load_state),
2691 CP(SET_MODE, cp_set_mode),
2692 CP(SET_MARKER, cp_set_marker),
2693 CP(REG_WRITE, cp_reg_write),
2694
2695 CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2696
2697 CP(START_BIN, cp_start_bin),
2698 };
2699
2700 static void
noop_fxn(uint32_t * dwords,uint32_t sizedwords,int level)2701 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2702 {
2703 }
2704
2705 static const struct type3_op *
get_type3_op(unsigned opc)2706 get_type3_op(unsigned opc)
2707 {
2708 static const struct type3_op dummy_op = {
2709 .fxn = noop_fxn,
2710 };
2711 const char *name = pktname(opc);
2712
2713 if (!name)
2714 return &dummy_op;
2715
2716 for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2717 if (!strcmp(name, type3_op[i].name))
2718 return &type3_op[i];
2719
2720 return &dummy_op;
2721 }
2722
2723 void
dump_commands(uint32_t * dwords,uint32_t sizedwords,int level)2724 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2725 {
2726 int dwords_left = sizedwords;
2727 uint32_t count = 0; /* dword count including packet header */
2728 uint32_t val;
2729
2730 // assert(dwords);
2731 if (!dwords) {
2732 printf("NULL cmd buffer!\n");
2733 return;
2734 }
2735
2736 assert(ib < ARRAY_SIZE(draws));
2737 draws[ib] = 0;
2738
2739 while (dwords_left > 0) {
2740
2741 current_draw_count = draw_count;
2742
2743 /* hack, this looks like a -1 underflow, in some versions
2744 * when it tries to write zero registers via pkt0
2745 */
2746 // if ((dwords[0] >> 16) == 0xffff)
2747 // goto skip;
2748
2749 if (pkt_is_type0(dwords[0])) {
2750 printl(3, "t0");
2751 count = type0_pkt_size(dwords[0]) + 1;
2752 val = type0_pkt_offset(dwords[0]);
2753 assert(val < regcnt());
2754 printl(3, "%swrite %s%s (%04x)\n", levels[level + 1], regname(val, 1),
2755 (dwords[0] & 0x8000) ? " (same register)" : "", val);
2756 dump_registers(val, dwords + 1, count - 1, level + 2);
2757 if (!quiet(3))
2758 dump_hex(dwords, count, level + 1);
2759 } else if (pkt_is_type4(dwords[0])) {
2760 /* basically the same(ish) as type0 prior to a5xx */
2761 printl(3, "t4");
2762 count = type4_pkt_size(dwords[0]) + 1;
2763 val = type4_pkt_offset(dwords[0]);
2764 assert(val < regcnt());
2765 printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
2766 val);
2767 dump_registers(val, dwords + 1, count - 1, level + 2);
2768 if (!quiet(3))
2769 dump_hex(dwords, count, level + 1);
2770 #if 0
2771 } else if (pkt_is_type1(dwords[0])) {
2772 printl(3, "t1");
2773 count = 3;
2774 val = dwords[0] & 0xfff;
2775 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2776 dump_registers(val, dwords+1, 1, level+2);
2777 val = (dwords[0] >> 12) & 0xfff;
2778 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2779 dump_registers(val, dwords+2, 1, level+2);
2780 if (!quiet(3))
2781 dump_hex(dwords, count, level+1);
2782 } else if (pkt_is_type2(dwords[0])) {
2783 printl(3, "t2");
2784 printf("%sNOP\n", levels[level+1]);
2785 count = 1;
2786 if (!quiet(3))
2787 dump_hex(dwords, count, level+1);
2788 #endif
2789 } else if (pkt_is_type3(dwords[0])) {
2790 count = type3_pkt_size(dwords[0]) + 1;
2791 val = cp_type3_opcode(dwords[0]);
2792 const struct type3_op *op = get_type3_op(val);
2793 if (op->options.load_all_groups)
2794 load_all_groups(level + 1);
2795 printl(3, "t3");
2796 const char *name = pktname(val);
2797 if (!quiet(2)) {
2798 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
2799 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
2800 count, (dwords[0] & 0x1) ? " (predicated)" : "");
2801 }
2802 if (name)
2803 dump_domain(dwords + 1, count - 1, level + 2, name);
2804 op->fxn(dwords + 1, count - 1, level + 1);
2805 if (!quiet(2))
2806 dump_hex(dwords, count, level + 1);
2807 } else if (pkt_is_type7(dwords[0])) {
2808 count = type7_pkt_size(dwords[0]) + 1;
2809 val = cp_type7_opcode(dwords[0]);
2810 const struct type3_op *op = get_type3_op(val);
2811 if (op->options.load_all_groups)
2812 load_all_groups(level + 1);
2813 printl(3, "t7");
2814 const char *name = pktname(val);
2815 if (!quiet(2)) {
2816 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
2817 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
2818 count);
2819 }
2820 if (name) {
2821 /* special hack for two packets that decode the same way
2822 * on a6xx:
2823 */
2824 if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
2825 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
2826 name = "CP_LOAD_STATE6";
2827 dump_domain(dwords + 1, count - 1, level + 2, name);
2828 }
2829 op->fxn(dwords + 1, count - 1, level + 1);
2830 if (!quiet(2))
2831 dump_hex(dwords, count, level + 1);
2832 } else if (pkt_is_type2(dwords[0])) {
2833 printl(3, "t2");
2834 printl(3, "%snop\n", levels[level + 1]);
2835 } else {
2836 /* for 5xx+ we can do a passable job of looking for start of next valid
2837 * packet: */
2838 if (options->gpu_id >= 500) {
2839 while (dwords_left > 0) {
2840 if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
2841 break;
2842 printf("bad type! %08x\n", dwords[0]);
2843 dwords++;
2844 dwords_left--;
2845 }
2846 } else {
2847 printf("bad type! %08x\n", dwords[0]);
2848 return;
2849 }
2850 }
2851
2852 dwords += count;
2853 dwords_left -= count;
2854 }
2855
2856 if (dwords_left < 0)
2857 printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
2858 }
2859