1 /*
2 * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <err.h>
27 #include <inttypes.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <stdarg.h>
32 #include <stdbool.h>
33 #include <unistd.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/wait.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <assert.h>
40 #include <signal.h>
41 #include <errno.h>
42
43 #include "redump.h"
44 #include "disasm.h"
45 #include "script.h"
46 #include "rnnutil.h"
47 #include "buffers.h"
48 #include "cffdec.h"
49
50 /* ************************************************************************* */
51 /* originally based on kernel recovery dump code: */
52
53 static const struct cffdec_options *options;
54
55 static bool needs_wfi = false;
56 static bool summary = false;
57 static bool in_summary = false;
58 static int vertices;
59
regcnt(void)60 static inline unsigned regcnt(void)
61 {
62 if (options->gpu_id >= 500)
63 return 0xffff;
64 else
65 return 0x7fff;
66 }
67
is_64b(void)68 static int is_64b(void)
69 {
70 return options->gpu_id >= 500;
71 }
72
73
74 static int draws[3];
75 static struct {
76 uint64_t base;
77 uint32_t size; /* in dwords */
78 /* Generally cmdstream consists of multiple IB calls to different
79 * buffers, which are themselves often re-used for each tile. The
80 * triggered flag serves two purposes to help make it more clear
81 * what part of the cmdstream is before vs after the the GPU hang:
82 *
83 * 1) if in IB2 we are passed the point within the IB2 buffer where
84 * the GPU hung, but IB1 is not passed the point within its
85 * buffer where the GPU had hung, then we know the GPU hang
86 * happens on a future use of that IB2 buffer.
87 *
88 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
89 * hung, but we've already passed the trigger point at the same
90 * IB level, we know that we are passed the point where the GPU
91 * had hung.
92 *
93 * So this is a one way switch, false->true. And a higher #'d
94 * IB level isn't considered triggered unless the lower #'d IB
95 * level is.
96 */
97 bool triggered;
98 } ibs[4];
99 static int ib;
100
101 static int draw_count;
102 static int current_draw_count;
103
104 /* query mode.. to handle symbolic register name queries, we need to
105 * defer parsing query string until after gpu_id is know and rnn db
106 * loaded:
107 */
108 static int *queryvals;
109
110 static bool
quiet(int lvl)111 quiet(int lvl)
112 {
113 if ((options->draw_filter != -1) && (options->draw_filter != current_draw_count))
114 return true;
115 if ((lvl >= 3) && (summary || options->querystrs || options->script))
116 return true;
117 if ((lvl >= 2) && (options->querystrs || options->script))
118 return true;
119 return false;
120 }
121
122 void
printl(int lvl,const char * fmt,...)123 printl(int lvl, const char *fmt, ...)
124 {
125 va_list args;
126 if (quiet(lvl))
127 return;
128 va_start(args, fmt);
129 vprintf(fmt, args);
130 va_end(args);
131 }
132
133 static const char *levels[] = {
134 "\t",
135 "\t\t",
136 "\t\t\t",
137 "\t\t\t\t",
138 "\t\t\t\t\t",
139 "\t\t\t\t\t\t",
140 "\t\t\t\t\t\t\t",
141 "\t\t\t\t\t\t\t\t",
142 "\t\t\t\t\t\t\t\t\t",
143 "x",
144 "x",
145 "x",
146 "x",
147 "x",
148 "x",
149 };
150
151 enum state_src_t {
152 STATE_SRC_DIRECT,
153 STATE_SRC_INDIRECT,
154 STATE_SRC_BINDLESS,
155 };
156
157 /* SDS (CP_SET_DRAW_STATE) helpers: */
158 static void load_all_groups(int level);
159 static void disable_all_groups(void);
160
161 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level);
162 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
163
164 static bool
highlight_gpuaddr(uint64_t gpuaddr)165 highlight_gpuaddr(uint64_t gpuaddr)
166 {
167 if (!options->color)
168 return false;
169
170 if (!options->ibs[ib].base)
171 return false;
172
173 if ((ib > 0) && options->ibs[ib-1].base && !ibs[ib-1].triggered)
174 return false;
175
176 if (ibs[ib].triggered)
177 return true;
178
179 if (options->ibs[ib].base != ibs[ib].base)
180 return false;
181
182 uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
183 uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
184
185 bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
186
187 ibs[ib].triggered |= triggered;
188
189 if (triggered)
190 printf("ESTIMATED CRASH LOCATION!\n");
191
192 return triggered;
193 }
194
195 static void
dump_hex(uint32_t * dwords,uint32_t sizedwords,int level)196 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
197 {
198 int i, j;
199 int lastzero = 1;
200
201 if (quiet(2))
202 return;
203
204 for (i = 0; i < sizedwords; i += 8) {
205 int zero = 1;
206
207 /* always show first row: */
208 if (i == 0)
209 zero = 0;
210
211 for (j = 0; (j < 8) && (i+j < sizedwords) && zero; j++)
212 if (dwords[i+j])
213 zero = 0;
214
215 if (zero && !lastzero)
216 printf("*\n");
217
218 lastzero = zero;
219
220 if (zero)
221 continue;
222
223 uint64_t addr = gpuaddr(&dwords[i]);
224 bool highlight = highlight_gpuaddr(addr);
225
226 if (highlight)
227 printf("\x1b[0;1;31m");
228
229 if (is_64b()) {
230 printf("%016"PRIx64":%s", addr, levels[level]);
231 } else {
232 printf("%08x:%s", (uint32_t)addr, levels[level]);
233 }
234
235 if (highlight)
236 printf("\x1b[0m");
237
238 printf("%04x:", i * 4);
239
240 for (j = 0; (j < 8) && (i+j < sizedwords); j++) {
241 printf(" %08x", dwords[i+j]);
242 }
243
244 printf("\n");
245 }
246 }
247
248 static void
dump_float(float * dwords,uint32_t sizedwords,int level)249 dump_float(float *dwords, uint32_t sizedwords, int level)
250 {
251 int i;
252 for (i = 0; i < sizedwords; i++) {
253 if ((i % 8) == 0) {
254 if (is_64b()) {
255 printf("%016"PRIx64":%s", gpuaddr(dwords), levels[level]);
256 } else {
257 printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
258 }
259 } else {
260 printf(" ");
261 }
262 printf("%8f", *(dwords++));
263 if ((i % 8) == 7)
264 printf("\n");
265 }
266 if (i % 8)
267 printf("\n");
268 }
269
270 /* I believe the surface format is low bits:
271 #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
272 comments in sys2gmem_tex_const indicate that address is [31:12], but
273 looks like at least some of the bits above the format have different meaning..
274 */
parse_dword_addr(uint32_t dword,uint32_t * gpuaddr,uint32_t * flags,uint32_t mask)275 static void parse_dword_addr(uint32_t dword, uint32_t *gpuaddr,
276 uint32_t *flags, uint32_t mask)
277 {
278 assert(!is_64b()); /* this is only used on a2xx */
279 *gpuaddr = dword & ~mask;
280 *flags = dword & mask;
281 }
282
283 static uint32_t type0_reg_vals[0xffff + 1];
284 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals)/8]; /* written since last draw */
285 static uint8_t type0_reg_written[sizeof(type0_reg_vals)/8];
286 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
287
reg_rewritten(uint32_t regbase)288 static bool reg_rewritten(uint32_t regbase)
289 {
290 return !!(type0_reg_rewritten[regbase/8] & (1 << (regbase % 8)));
291 }
292
reg_written(uint32_t regbase)293 bool reg_written(uint32_t regbase)
294 {
295 return !!(type0_reg_written[regbase/8] & (1 << (regbase % 8)));
296 }
297
clear_rewritten(void)298 static void clear_rewritten(void)
299 {
300 memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
301 }
302
clear_written(void)303 static void clear_written(void)
304 {
305 memset(type0_reg_written, 0, sizeof(type0_reg_written));
306 clear_rewritten();
307 }
308
reg_lastval(uint32_t regbase)309 uint32_t reg_lastval(uint32_t regbase)
310 {
311 return lastvals[regbase];
312 }
313
314 static void
clear_lastvals(void)315 clear_lastvals(void)
316 {
317 memset(lastvals, 0, sizeof(lastvals));
318 }
319
320 uint32_t
reg_val(uint32_t regbase)321 reg_val(uint32_t regbase)
322 {
323 return type0_reg_vals[regbase];
324 }
325
326 void
reg_set(uint32_t regbase,uint32_t val)327 reg_set(uint32_t regbase, uint32_t val)
328 {
329 assert(regbase < regcnt());
330 type0_reg_vals[regbase] = val;
331 type0_reg_written[regbase/8] |= (1 << (regbase % 8));
332 type0_reg_rewritten[regbase/8] |= (1 << (regbase % 8));
333 }
334
335 static void
reg_dump_scratch(const char * name,uint32_t dword,int level)336 reg_dump_scratch(const char *name, uint32_t dword, int level)
337 {
338 unsigned r;
339
340 if (quiet(3))
341 return;
342
343 r = regbase("CP_SCRATCH[0].REG");
344
345 // if not, try old a2xx/a3xx version:
346 if (!r)
347 r = regbase("CP_SCRATCH_REG0");
348
349 if (!r)
350 return;
351
352 printf("%s:%u,%u,%u,%u\n", levels[level],
353 reg_val(r + 4), reg_val(r + 5),
354 reg_val(r + 6), reg_val(r + 7));
355 }
356
357 static void
dump_gpuaddr_size(uint64_t gpuaddr,int level,int sizedwords,int quietlvl)358 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
359 {
360 void *buf;
361
362 if (quiet(quietlvl))
363 return;
364
365 buf = hostptr(gpuaddr);
366 if (buf) {
367 dump_hex(buf, sizedwords, level+1);
368 }
369 }
370
371 static void
dump_gpuaddr(uint64_t gpuaddr,int level)372 dump_gpuaddr(uint64_t gpuaddr, int level)
373 {
374 dump_gpuaddr_size(gpuaddr, level, 64, 3);
375 }
376
377 static void
reg_dump_gpuaddr(const char * name,uint32_t dword,int level)378 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
379 {
380 dump_gpuaddr(dword, level);
381 }
382
383 uint32_t gpuaddr_lo;
384 static void
reg_gpuaddr_lo(const char * name,uint32_t dword,int level)385 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
386 {
387 gpuaddr_lo = dword;
388 }
389
390 static void
reg_dump_gpuaddr_hi(const char * name,uint32_t dword,int level)391 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
392 {
393 dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
394 }
395
396
397 static void
dump_shader(const char * ext,void * buf,int bufsz)398 dump_shader(const char *ext, void *buf, int bufsz)
399 {
400 if (options->dump_shaders) {
401 static int n = 0;
402 char filename[16];
403 int fd;
404 sprintf(filename, "%04d.%s", n++, ext);
405 fd = open(filename, O_WRONLY| O_TRUNC | O_CREAT, 0644);
406 if (fd != -1) {
407 write(fd, buf, bufsz);
408 close(fd);
409 }
410 }
411 }
412
413 static void
disasm_gpuaddr(const char * name,uint64_t gpuaddr,int level)414 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
415 {
416 void *buf;
417
418 gpuaddr &= 0xfffffffffffffff0;
419
420 if (quiet(3))
421 return;
422
423 buf = hostptr(gpuaddr);
424 if (buf) {
425 uint32_t sizedwords = hostlen(gpuaddr) / 4;
426 const char *ext;
427
428 dump_hex(buf, min(64, sizedwords), level+1);
429 try_disasm_a3xx(buf, sizedwords, level+2, stdout, options->gpu_id);
430
431 /* this is a bit ugly way, but oh well.. */
432 if (strstr(name, "SP_VS_OBJ")) {
433 ext = "vo3";
434 } else if (strstr(name, "SP_FS_OBJ")) {
435 ext = "fo3";
436 } else if (strstr(name, "SP_GS_OBJ")) {
437 ext = "go3";
438 } else if (strstr(name, "SP_CS_OBJ")) {
439 ext = "co3";
440 } else {
441 ext = NULL;
442 }
443
444 if (ext)
445 dump_shader(ext, buf, sizedwords * 4);
446 }
447 }
448
449 static void
reg_disasm_gpuaddr(const char * name,uint32_t dword,int level)450 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
451 {
452 disasm_gpuaddr(name, dword, level);
453 }
454
455 static void
reg_disasm_gpuaddr_hi(const char * name,uint32_t dword,int level)456 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
457 {
458 disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
459 }
460
461 /* Find the value of the TEX_COUNT register that corresponds to the named
462 * TEX_SAMP/TEX_CONST reg.
463 *
464 * Note, this kinda assumes an equal # of samplers and textures, but not
465 * really sure if there is a much better option. I suppose on a6xx we
466 * could instead decode the bitfields in SP_xS_CONFIG
467 */
468 static int
get_tex_count(const char * name)469 get_tex_count(const char *name)
470 {
471 char count_reg[strlen(name) + 5];
472 char *p;
473
474 p = strstr(name, "CONST");
475 if (!p)
476 p = strstr(name, "SAMP");
477 if (!p)
478 return 0;
479
480 int n = p - name;
481 strncpy(count_reg, name, n);
482 strcpy(count_reg + n, "COUNT");
483
484 return reg_val(regbase(count_reg));
485 }
486
487 static void
reg_dump_tex_samp_hi(const char * name,uint32_t dword,int level)488 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
489 {
490 if (!in_summary)
491 return;
492
493 int num_unit = get_tex_count(name);
494 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
495 void *buf = hostptr(gpuaddr);
496
497 if (!buf)
498 return;
499
500 dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level+1);
501 }
502
503 static void
reg_dump_tex_const_hi(const char * name,uint32_t dword,int level)504 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
505 {
506 if (!in_summary)
507 return;
508
509 int num_unit = get_tex_count(name);
510 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
511 void *buf = hostptr(gpuaddr);
512
513 if (!buf)
514 return;
515
516 dump_tex_const(buf, num_unit, level+1);
517 }
518
519 /*
520 * Registers with special handling (rnndec_decode() handles rest):
521 */
522 #define REG(x, fxn) { #x, fxn }
523 static struct {
524 const char *regname;
525 void (*fxn)(const char *name, uint32_t dword, int level);
526 uint32_t regbase;
527 } reg_a2xx[] = {
528 REG(CP_SCRATCH_REG0, reg_dump_scratch),
529 REG(CP_SCRATCH_REG1, reg_dump_scratch),
530 REG(CP_SCRATCH_REG2, reg_dump_scratch),
531 REG(CP_SCRATCH_REG3, reg_dump_scratch),
532 REG(CP_SCRATCH_REG4, reg_dump_scratch),
533 REG(CP_SCRATCH_REG5, reg_dump_scratch),
534 REG(CP_SCRATCH_REG6, reg_dump_scratch),
535 REG(CP_SCRATCH_REG7, reg_dump_scratch),
536 {NULL},
537 }, reg_a3xx[] = {
538 REG(CP_SCRATCH_REG0, reg_dump_scratch),
539 REG(CP_SCRATCH_REG1, reg_dump_scratch),
540 REG(CP_SCRATCH_REG2, reg_dump_scratch),
541 REG(CP_SCRATCH_REG3, reg_dump_scratch),
542 REG(CP_SCRATCH_REG4, reg_dump_scratch),
543 REG(CP_SCRATCH_REG5, reg_dump_scratch),
544 REG(CP_SCRATCH_REG6, reg_dump_scratch),
545 REG(CP_SCRATCH_REG7, reg_dump_scratch),
546 REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
547 REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
548 REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
549 REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
550 REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
551 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
552 {NULL},
553 }, reg_a4xx[] = {
554 REG(CP_SCRATCH[0].REG, reg_dump_scratch),
555 REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
556 REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
557 REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
558 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
559 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
560 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
561 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
562 REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
563 REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
564 REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
565 REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
566 REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
567 REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
568 REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
569 REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
570 REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
571 REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
572 REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
573 REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
574 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
575 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
576 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
577 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
578 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
579 {NULL},
580 }, reg_a5xx[] = {
581 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
582 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
583 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
584 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
585 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
586 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
587 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
588 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
589 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
590 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
591 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
592 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
593 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
594 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
595 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
596 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
597 REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
598 REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
599 REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
600 REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
601 REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
602 REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
603 REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
604 REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
605 REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
606 REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
607 REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
608 REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
609 REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
610 REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
611 REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
612 REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
613 REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
614 REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
615 REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
616 REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
617 REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
618 REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
619 REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
620 REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
621 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
622 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
623 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
624 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
625 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
626 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
627 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
628 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
629 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
630 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
631 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
632 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
633 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
634 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
635 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
636 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
637 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
638 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
639 // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
640 // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
641 // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
642 // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
643 // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
644 // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
645 // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
646 // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
647 // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
648 // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
649
650 // REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
651 // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
652 // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
653 // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
654 // REG(RB_2D_DST_LO, reg_gpuaddr_lo),
655 // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
656 // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
657 // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
658
659 {NULL},
660 }, reg_a6xx[] = {
661 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
662 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
663 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
664 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
665
666 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
667 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
668 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
669 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
670 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
671 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
672 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
673 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
674 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
675 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
676 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
677 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
678
679 REG(SP_VS_TEX_CONST_LO, reg_gpuaddr_lo),
680 REG(SP_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
681 REG(SP_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
682 REG(SP_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
683 REG(SP_HS_TEX_CONST_LO, reg_gpuaddr_lo),
684 REG(SP_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
685 REG(SP_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
686 REG(SP_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
687 REG(SP_DS_TEX_CONST_LO, reg_gpuaddr_lo),
688 REG(SP_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
689 REG(SP_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
690 REG(SP_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
691 REG(SP_GS_TEX_CONST_LO, reg_gpuaddr_lo),
692 REG(SP_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
693 REG(SP_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
694 REG(SP_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
695 REG(SP_FS_TEX_CONST_LO, reg_gpuaddr_lo),
696 REG(SP_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
697 REG(SP_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
698 REG(SP_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
699 REG(SP_CS_TEX_CONST_LO, reg_gpuaddr_lo),
700 REG(SP_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
701 REG(SP_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
702 REG(SP_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
703
704 {NULL},
705 }, *type0_reg;
706
707 static struct rnn *rnn;
708
709 static void
init_rnn(const char * gpuname)710 init_rnn(const char *gpuname)
711 {
712 rnn = rnn_new(!options->color);
713
714 rnn_load(rnn, gpuname);
715
716 if (options->querystrs) {
717 int i;
718 queryvals = calloc(options->nquery, sizeof(queryvals[0]));
719
720 for (i = 0; i < options->nquery; i++) {
721 int val = strtol(options->querystrs[i], NULL, 0);
722
723 if (val == 0)
724 val = regbase(options->querystrs[i]);
725
726 queryvals[i] = val;
727 printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
728 }
729 }
730
731 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
732 type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
733 if (!type0_reg[idx].regbase) {
734 printf("invalid register name: %s\n", type0_reg[idx].regname);
735 exit(1);
736 }
737 }
738 }
739
740 void
reset_regs(void)741 reset_regs(void)
742 {
743 clear_written();
744 clear_lastvals();
745 memset(&ibs, 0, sizeof(ibs));
746 }
747
748 void
cffdec_init(const struct cffdec_options * _options)749 cffdec_init(const struct cffdec_options *_options)
750 {
751 options = _options;
752 summary = options->summary;
753
754 /* in case we're decoding multiple files: */
755 free(queryvals);
756 reset_regs();
757 draw_count = 0;
758
759 /* TODO we need an API to free/cleanup any previous rnn */
760
761 switch (options->gpu_id) {
762 case 200 ... 299:
763 type0_reg = reg_a2xx;
764 init_rnn("a2xx");
765 break;
766 case 300 ... 399:
767 type0_reg = reg_a3xx;
768 init_rnn("a3xx");
769 break;
770 case 400 ... 499:
771 type0_reg = reg_a4xx;
772 init_rnn("a4xx");
773 break;
774 case 500 ... 599:
775 type0_reg = reg_a5xx;
776 init_rnn("a5xx");
777 break;
778 case 600 ... 699:
779 type0_reg = reg_a6xx;
780 init_rnn("a6xx");
781 break;
782 default:
783 errx(-1, "unsupported gpu");
784 }
785 }
786
787 const char *
pktname(unsigned opc)788 pktname(unsigned opc)
789 {
790 return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
791 }
792
793 const char *
regname(uint32_t regbase,int color)794 regname(uint32_t regbase, int color)
795 {
796 return rnn_regname(rnn, regbase, color);
797 }
798
799 uint32_t
regbase(const char * name)800 regbase(const char *name)
801 {
802 return rnn_regbase(rnn, name);
803 }
804
805 static int
endswith(uint32_t regbase,const char * suffix)806 endswith(uint32_t regbase, const char *suffix)
807 {
808 const char *name = regname(regbase, 0);
809 const char *s = strstr(name, suffix);
810 if (!s)
811 return 0;
812 return (s - strlen(name) + strlen(suffix)) == name;
813 }
814
815 void
dump_register_val(uint32_t regbase,uint32_t dword,int level)816 dump_register_val(uint32_t regbase, uint32_t dword, int level)
817 {
818 struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
819
820 if (info && info->typeinfo) {
821 uint64_t gpuaddr = 0;
822 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
823 printf("%s%s: %s", levels[level], info->name, decoded);
824
825 /* Try and figure out if we are looking at a gpuaddr.. this
826 * might be useful for other gen's too, but at least a5xx has
827 * the _HI/_LO suffix we can look for. Maybe a better approach
828 * would be some special annotation in the xml..
829 */
830 if (options->gpu_id >= 500) {
831 if (endswith(regbase, "_HI") && endswith(regbase-1, "_LO")) {
832 gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase-1);
833 } else if (endswith(regbase, "_LO") && endswith(regbase+1, "_HI")) {
834 gpuaddr = (((uint64_t)reg_val(regbase+1)) << 32) | dword;
835 }
836 }
837
838 if (gpuaddr && hostptr(gpuaddr)) {
839 printf("\t\tbase=%"PRIx64", offset=%"PRIu64", size=%u",
840 gpubaseaddr(gpuaddr),
841 gpuaddr - gpubaseaddr(gpuaddr),
842 hostlen(gpubaseaddr(gpuaddr)));
843 }
844
845 printf("\n");
846
847 free(decoded);
848 } else if (info) {
849 printf("%s%s: %08x\n", levels[level], info->name, dword);
850 } else {
851 printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
852 }
853
854 if (info) {
855 free(info->name);
856 free(info);
857 }
858 }
859
860 static void
dump_register(uint32_t regbase,uint32_t dword,int level)861 dump_register(uint32_t regbase, uint32_t dword, int level)
862 {
863 if (!quiet(3)) {
864 dump_register_val(regbase, dword, level);
865 }
866
867 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
868 if (type0_reg[idx].regbase == regbase) {
869 type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
870 break;
871 }
872 }
873 }
874
875 static bool
is_banked_reg(uint32_t regbase)876 is_banked_reg(uint32_t regbase)
877 {
878 return (0x2000 <= regbase) && (regbase < 0x2400);
879 }
880
881 static void
dump_registers(uint32_t regbase,uint32_t * dwords,uint32_t sizedwords,int level)882 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, int level)
883 {
884 while (sizedwords--) {
885 int last_summary = summary;
886
887 /* access to non-banked registers needs a WFI:
888 * TODO banked register range for a2xx??
889 */
890 if (needs_wfi && !is_banked_reg(regbase))
891 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
892
893 reg_set(regbase, *dwords);
894 dump_register(regbase, *dwords, level);
895 regbase++;
896 dwords++;
897 summary = last_summary;
898 }
899 }
900
901 static void
dump_domain(uint32_t * dwords,uint32_t sizedwords,int level,const char * name)902 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level,
903 const char *name)
904 {
905 struct rnndomain *dom;
906 int i;
907
908 dom = rnn_finddomain(rnn->db, name);
909
910 if (!dom)
911 return;
912
913 if (script_packet)
914 script_packet(dwords, sizedwords, rnn, dom);
915
916 if (quiet(2))
917 return;
918
919 for (i = 0; i < sizedwords; i++) {
920 struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
921 char *decoded;
922 if (!(info && info->typeinfo))
923 break;
924 uint64_t value = dwords[i];
925 if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
926 value |= (uint64_t) dwords[i + 1] << 32;
927 i++; /* skip the next dword since we're printing it now */
928 }
929 decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
930 /* Unlike the register printing path, we don't print the name
931 * of the register, so if it doesn't contain other named
932 * things (i.e. it isn't a bitset) then print the register
933 * name as if it's a bitset with a single entry. This avoids
934 * having to create a dummy register with a single entry to
935 * get a name in the decoding.
936 */
937 if (info->typeinfo->type == RNN_TTYPE_BITSET ||
938 info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
939 printf("%s%s\n", levels[level], decoded);
940 } else {
941 printf("%s{ %s%s%s = %s }\n", levels[level],
942 rnn->vc->colors->rname, info->name,
943 rnn->vc->colors->reset, decoded);
944 }
945 free(decoded);
946 free(info->name);
947 free(info);
948 }
949 }
950
951
952 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
953 static unsigned mode;
954 static const char *render_mode;
955 static enum {
956 MODE_BINNING = 0x1,
957 MODE_GMEM = 0x2,
958 MODE_BYPASS = 0x4,
959 MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
960 } enable_mask = MODE_ALL;
961 static bool skip_ib2_enable_global;
962 static bool skip_ib2_enable_local;
963
964 static void
print_mode(int level)965 print_mode(int level)
966 {
967 if ((options->gpu_id >= 500) && !quiet(2)) {
968 printf("%smode: %s\n", levels[level], render_mode);
969 printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global, skip_ib2_enable_local);
970 }
971 }
972
973 static bool
skip_query(void)974 skip_query(void)
975 {
976 switch (options->query_mode) {
977 case QUERY_ALL:
978 /* never skip: */
979 return false;
980 case QUERY_WRITTEN:
981 for (int i = 0; i < options->nquery; i++) {
982 uint32_t regbase = queryvals[i];
983 if (!reg_written(regbase)) {
984 continue;
985 }
986 if (reg_rewritten(regbase)) {
987 return false;
988 }
989 }
990 return true;
991 case QUERY_DELTA:
992 for (int i = 0; i < options->nquery; i++) {
993 uint32_t regbase = queryvals[i];
994 if (!reg_written(regbase)) {
995 continue;
996 }
997 uint32_t lastval = reg_val(regbase);
998 if (lastval != lastvals[regbase]) {
999 return false;
1000 }
1001 }
1002 return true;
1003 }
1004 return true;
1005 }
1006
1007 static void
__do_query(const char * primtype,uint32_t num_indices)1008 __do_query(const char *primtype, uint32_t num_indices)
1009 {
1010 int n = 0;
1011
1012 if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
1013 uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1014 uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1015
1016 bin_x1 = scissor_tl & 0xffff;
1017 bin_y1 = scissor_tl >> 16;
1018 bin_x2 = scissor_br & 0xffff;
1019 bin_y2 = scissor_br >> 16;
1020 }
1021
1022 for (int i = 0; i < options->nquery; i++) {
1023 uint32_t regbase = queryvals[i];
1024 if (reg_written(regbase)) {
1025 uint32_t lastval = reg_val(regbase);
1026 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype,
1027 bin_x1, bin_y1, bin_x2, bin_y2, num_indices);
1028 if (options->gpu_id >= 500)
1029 printf("%s:", render_mode);
1030 printf("\t%08x", lastval);
1031 if (lastval != lastvals[regbase]) {
1032 printf("!");
1033 } else {
1034 printf(" ");
1035 }
1036 if (reg_rewritten(regbase)) {
1037 printf("+");
1038 } else {
1039 printf(" ");
1040 }
1041 dump_register_val(regbase, lastval, 0);
1042 n++;
1043 }
1044 }
1045
1046 if (n > 1)
1047 printf("\n");
1048 }
1049
1050 static void
do_query_compare(const char * primtype,uint32_t num_indices)1051 do_query_compare(const char *primtype, uint32_t num_indices)
1052 {
1053 unsigned saved_enable_mask = enable_mask;
1054 const char *saved_render_mode = render_mode;
1055
1056 /* in 'query-compare' mode, we want to see if the register is writtten
1057 * or changed in any mode:
1058 *
1059 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1060 * is written with different values in binning vs sysmem/gmem mode, as
1061 * we don't track previous values per-mode, but I think we can live with
1062 * that)
1063 */
1064 enable_mask = MODE_ALL;
1065
1066 clear_rewritten();
1067 load_all_groups(0);
1068
1069 if (!skip_query()) {
1070 /* dump binning pass values: */
1071 enable_mask = MODE_BINNING;
1072 render_mode = "BINNING";
1073 clear_rewritten();
1074 load_all_groups(0);
1075 __do_query(primtype, num_indices);
1076
1077 /* dump draw pass values: */
1078 enable_mask = MODE_GMEM | MODE_BYPASS;
1079 render_mode = "DRAW";
1080 clear_rewritten();
1081 load_all_groups(0);
1082 __do_query(primtype, num_indices);
1083
1084 printf("\n");
1085 }
1086
1087 enable_mask = saved_enable_mask;
1088 render_mode = saved_render_mode;
1089
1090 disable_all_groups();
1091 }
1092
1093 /* well, actually query and script..
1094 * NOTE: call this before dump_register_summary()
1095 */
1096 static void
do_query(const char * primtype,uint32_t num_indices)1097 do_query(const char *primtype, uint32_t num_indices)
1098 {
1099 if (script_draw)
1100 script_draw(primtype, num_indices);
1101
1102 if (options->query_compare) {
1103 do_query_compare(primtype, num_indices);
1104 return;
1105 }
1106
1107 if (skip_query())
1108 return;
1109
1110 __do_query(primtype, num_indices);
1111 }
1112
1113 static void
cp_im_loadi(uint32_t * dwords,uint32_t sizedwords,int level)1114 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1115 {
1116 uint32_t start = dwords[1] >> 16;
1117 uint32_t size = dwords[1] & 0xffff;
1118 const char *type = NULL, *ext = NULL;
1119 gl_shader_stage disasm_type;
1120
1121 switch (dwords[0]) {
1122 case 0:
1123 type = "vertex";
1124 ext = "vo";
1125 disasm_type = MESA_SHADER_VERTEX;
1126 break;
1127 case 1:
1128 type = "fragment";
1129 ext = "fo";
1130 disasm_type = MESA_SHADER_FRAGMENT;
1131 break;
1132 default:
1133 type = "<unknown>";
1134 disasm_type = 0;
1135 break;
1136 }
1137
1138 printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start, size);
1139 disasm_a2xx(dwords + 2, sizedwords - 2, level+2, disasm_type);
1140
1141 /* dump raw shader: */
1142 if (ext)
1143 dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1144 }
1145
1146 static void
cp_wide_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)1147 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1148 {
1149 uint32_t reg = dwords[0] & 0xffff;
1150 int i;
1151 for (i = 1; i < sizedwords; i++) {
1152 dump_register(reg, dwords[i], level+1);
1153 reg_set(reg, dwords[i]);
1154 reg++;
1155 }
1156 }
1157
1158 enum state_t {
1159 TEX_SAMP = 1,
1160 TEX_CONST,
1161 TEX_MIPADDR, /* a3xx only */
1162 SHADER_PROG,
1163 SHADER_CONST,
1164
1165 // image/ssbo state:
1166 SSBO_0,
1167 SSBO_1,
1168 SSBO_2,
1169
1170 UBO,
1171
1172 // unknown things, just to hexdumps:
1173 UNKNOWN_DWORDS,
1174 UNKNOWN_2DWORDS,
1175 UNKNOWN_4DWORDS,
1176 };
1177
1178 enum adreno_state_block {
1179 SB_VERT_TEX = 0,
1180 SB_VERT_MIPADDR = 1,
1181 SB_FRAG_TEX = 2,
1182 SB_FRAG_MIPADDR = 3,
1183 SB_VERT_SHADER = 4,
1184 SB_GEOM_SHADER = 5,
1185 SB_FRAG_SHADER = 6,
1186 SB_COMPUTE_SHADER = 7,
1187 };
1188
1189 /* TODO there is probably a clever way to let rnndec parse things so
1190 * we don't have to care about packet format differences across gens
1191 */
1192
1193 static void
a3xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1194 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1195 enum state_src_t *src)
1196 {
1197 unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1198 unsigned state_type = dwords[1] & 0x3;
1199 static const struct {
1200 gl_shader_stage stage;
1201 enum state_t state;
1202 } lookup[0xf][0x3] = {
1203 [SB_VERT_TEX][0] = { MESA_SHADER_VERTEX, TEX_SAMP },
1204 [SB_VERT_TEX][1] = { MESA_SHADER_VERTEX, TEX_CONST },
1205 [SB_FRAG_TEX][0] = { MESA_SHADER_FRAGMENT, TEX_SAMP },
1206 [SB_FRAG_TEX][1] = { MESA_SHADER_FRAGMENT, TEX_CONST },
1207 [SB_VERT_SHADER][0] = { MESA_SHADER_VERTEX, SHADER_PROG },
1208 [SB_VERT_SHADER][1] = { MESA_SHADER_VERTEX, SHADER_CONST },
1209 [SB_FRAG_SHADER][0] = { MESA_SHADER_FRAGMENT, SHADER_PROG },
1210 [SB_FRAG_SHADER][1] = { MESA_SHADER_FRAGMENT, SHADER_CONST },
1211 };
1212
1213 *stage = lookup[state_block_id][state_type].stage;
1214 *state = lookup[state_block_id][state_type].state;
1215 unsigned state_src = (dwords[0] >> 16) & 0x7;
1216 if (state_src == 0 /* SS_DIRECT */)
1217 *src = STATE_SRC_DIRECT;
1218 else
1219 *src = STATE_SRC_INDIRECT;
1220 }
1221
1222 static enum state_src_t
_get_state_src(unsigned dword0)1223 _get_state_src(unsigned dword0)
1224 {
1225 switch ((dword0 >> 16) & 0x3) {
1226 case 0: /* SS4_DIRECT / SS6_DIRECT */
1227 return STATE_SRC_DIRECT;
1228 case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1229 return STATE_SRC_INDIRECT;
1230 case 1: /* SS6_BINDLESS */
1231 return STATE_SRC_BINDLESS;
1232 default:
1233 return STATE_SRC_DIRECT;
1234 }
1235 }
1236
1237 static void
_get_state_type(unsigned state_block_id,unsigned state_type,gl_shader_stage * stage,enum state_t * state)1238 _get_state_type(unsigned state_block_id, unsigned state_type,
1239 gl_shader_stage *stage, enum state_t *state)
1240 {
1241 static const struct {
1242 gl_shader_stage stage;
1243 enum state_t state;
1244 } lookup[0x10][0x4] = {
1245 // SB4_VS_TEX:
1246 [0x0][0] = { MESA_SHADER_VERTEX, TEX_SAMP },
1247 [0x0][1] = { MESA_SHADER_VERTEX, TEX_CONST },
1248 [0x0][2] = { MESA_SHADER_VERTEX, UBO },
1249 // SB4_HS_TEX:
1250 [0x1][0] = { MESA_SHADER_TESS_CTRL, TEX_SAMP },
1251 [0x1][1] = { MESA_SHADER_TESS_CTRL, TEX_CONST },
1252 [0x1][2] = { MESA_SHADER_TESS_CTRL, UBO },
1253 // SB4_DS_TEX:
1254 [0x2][0] = { MESA_SHADER_TESS_EVAL, TEX_SAMP },
1255 [0x2][1] = { MESA_SHADER_TESS_EVAL, TEX_CONST },
1256 [0x2][2] = { MESA_SHADER_TESS_EVAL, UBO },
1257 // SB4_GS_TEX:
1258 [0x3][0] = { MESA_SHADER_GEOMETRY, TEX_SAMP },
1259 [0x3][1] = { MESA_SHADER_GEOMETRY, TEX_CONST },
1260 [0x3][2] = { MESA_SHADER_GEOMETRY, UBO },
1261 // SB4_FS_TEX:
1262 [0x4][0] = { MESA_SHADER_FRAGMENT, TEX_SAMP },
1263 [0x4][1] = { MESA_SHADER_FRAGMENT, TEX_CONST },
1264 [0x4][2] = { MESA_SHADER_FRAGMENT, UBO },
1265 // SB4_CS_TEX:
1266 [0x5][0] = { MESA_SHADER_COMPUTE, TEX_SAMP },
1267 [0x5][1] = { MESA_SHADER_COMPUTE, TEX_CONST },
1268 [0x5][2] = { MESA_SHADER_COMPUTE, UBO },
1269 // SB4_VS_SHADER:
1270 [0x8][0] = { MESA_SHADER_VERTEX, SHADER_PROG },
1271 [0x8][1] = { MESA_SHADER_VERTEX, SHADER_CONST },
1272 [0x8][2] = { MESA_SHADER_VERTEX, UBO },
1273 // SB4_HS_SHADER
1274 [0x9][0] = { MESA_SHADER_TESS_CTRL, SHADER_PROG },
1275 [0x9][1] = { MESA_SHADER_TESS_CTRL, SHADER_CONST },
1276 [0x9][2] = { MESA_SHADER_TESS_CTRL, UBO },
1277 // SB4_DS_SHADER
1278 [0xa][0] = { MESA_SHADER_TESS_EVAL, SHADER_PROG },
1279 [0xa][1] = { MESA_SHADER_TESS_EVAL, SHADER_CONST },
1280 [0xa][2] = { MESA_SHADER_TESS_EVAL, UBO },
1281 // SB4_GS_SHADER
1282 [0xb][0] = { MESA_SHADER_GEOMETRY, SHADER_PROG },
1283 [0xb][1] = { MESA_SHADER_GEOMETRY, SHADER_CONST },
1284 [0xb][2] = { MESA_SHADER_GEOMETRY, UBO },
1285 // SB4_FS_SHADER:
1286 [0xc][0] = { MESA_SHADER_FRAGMENT, SHADER_PROG },
1287 [0xc][1] = { MESA_SHADER_FRAGMENT, SHADER_CONST },
1288 [0xc][2] = { MESA_SHADER_FRAGMENT, UBO },
1289 // SB4_CS_SHADER:
1290 [0xd][0] = { MESA_SHADER_COMPUTE, SHADER_PROG },
1291 [0xd][1] = { MESA_SHADER_COMPUTE, SHADER_CONST },
1292 [0xd][2] = { MESA_SHADER_COMPUTE, UBO },
1293 [0xd][3] = { MESA_SHADER_COMPUTE, SSBO_0 }, /* a6xx location */
1294 // SB4_SSBO (shared across all stages)
1295 [0xe][0] = { 0, SSBO_0 }, /* a5xx (and a4xx?) location */
1296 [0xe][1] = { 0, SSBO_1 },
1297 [0xe][2] = { 0, SSBO_2 },
1298 // SB4_CS_SSBO
1299 [0xf][0] = { MESA_SHADER_COMPUTE, SSBO_0 },
1300 [0xf][1] = { MESA_SHADER_COMPUTE, SSBO_1 },
1301 [0xf][2] = { MESA_SHADER_COMPUTE, SSBO_2 },
1302 // unknown things
1303 /* This looks like combined UBO state for 3d stages (a5xx and
1304 * before?? I think a6xx has UBO state per shader stage:
1305 */
1306 [0x6][2] = { 0, UBO },
1307 [0x7][1] = { 0, UNKNOWN_2DWORDS },
1308 };
1309
1310 *stage = lookup[state_block_id][state_type].stage;
1311 *state = lookup[state_block_id][state_type].state;
1312 }
1313
1314 static void
a4xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1315 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1316 enum state_src_t *src)
1317 {
1318 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1319 unsigned state_type = dwords[1] & 0x3;
1320 _get_state_type(state_block_id, state_type, stage, state);
1321 *src = _get_state_src(dwords[0]);
1322 }
1323
1324 static void
a6xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1325 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage, enum state_t *state,
1326 enum state_src_t *src)
1327 {
1328 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1329 unsigned state_type = (dwords[0] >> 14) & 0x3;
1330 _get_state_type(state_block_id, state_type, stage, state);
1331 *src = _get_state_src(dwords[0]);
1332 }
1333
1334 static void
dump_tex_samp(uint32_t * texsamp,enum state_src_t src,int num_unit,int level)1335 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1336 {
1337 for (int i = 0; i < num_unit; i++) {
1338 /* work-around to reduce noise for opencl blob which always
1339 * writes the max # regardless of # of textures used
1340 */
1341 if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1342 break;
1343
1344 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1345 dump_domain(texsamp, 2, level+2, "A3XX_TEX_SAMP");
1346 dump_hex(texsamp, 2, level+1);
1347 texsamp += 2;
1348 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1349 dump_domain(texsamp, 2, level+2, "A4XX_TEX_SAMP");
1350 dump_hex(texsamp, 2, level+1);
1351 texsamp += 2;
1352 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1353 dump_domain(texsamp, 4, level+2, "A5XX_TEX_SAMP");
1354 dump_hex(texsamp, 4, level+1);
1355 texsamp += 4;
1356 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1357 dump_domain(texsamp, 4, level+2, "A6XX_TEX_SAMP");
1358 dump_hex(texsamp, 4, level+1);
1359 texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1360 }
1361 }
1362 }
1363
1364 static void
dump_tex_const(uint32_t * texconst,int num_unit,int level)1365 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1366 {
1367 for (int i = 0; i < num_unit; i++) {
1368 /* work-around to reduce noise for opencl blob which always
1369 * writes the max # regardless of # of textures used
1370 */
1371 if ((num_unit == 16) &&
1372 (texconst[0] == 0) && (texconst[1] == 0) &&
1373 (texconst[2] == 0) && (texconst[3] == 0))
1374 break;
1375
1376 if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1377 dump_domain(texconst, 4, level+2, "A3XX_TEX_CONST");
1378 dump_hex(texconst, 4, level+1);
1379 texconst += 4;
1380 } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1381 dump_domain(texconst, 8, level+2, "A4XX_TEX_CONST");
1382 if (options->dump_textures) {
1383 uint32_t addr = texconst[4] & ~0x1f;
1384 dump_gpuaddr(addr, level-2);
1385 }
1386 dump_hex(texconst, 8, level+1);
1387 texconst += 8;
1388 } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1389 dump_domain(texconst, 12, level+2, "A5XX_TEX_CONST");
1390 if (options->dump_textures) {
1391 uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1392 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1393 }
1394 dump_hex(texconst, 12, level+1);
1395 texconst += 12;
1396 } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1397 dump_domain(texconst, 16, level+2, "A6XX_TEX_CONST");
1398 if (options->dump_textures) {
1399 uint64_t addr = (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1400 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1401 }
1402 dump_hex(texconst, 16, level+1);
1403 texconst += 16;
1404 }
1405 }
1406 }
1407
1408 static void
cp_load_state(uint32_t * dwords,uint32_t sizedwords,int level)1409 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1410 {
1411 gl_shader_stage stage;
1412 enum state_t state;
1413 enum state_src_t src;
1414 uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1415 uint64_t ext_src_addr;
1416 void *contents;
1417 int i;
1418
1419 if (quiet(2) && !options->script)
1420 return;
1421
1422 if (options->gpu_id >= 600)
1423 a6xx_get_state_type(dwords, &stage, &state, &src);
1424 else if (options->gpu_id >= 400)
1425 a4xx_get_state_type(dwords, &stage, &state, &src);
1426 else
1427 a3xx_get_state_type(dwords, &stage, &state, &src);
1428
1429 switch (src) {
1430 case STATE_SRC_DIRECT: ext_src_addr = 0; break;
1431 case STATE_SRC_INDIRECT:
1432 if (is_64b()) {
1433 ext_src_addr = dwords[1] & 0xfffffffc;
1434 ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1435 } else {
1436 ext_src_addr = dwords[1] & 0xfffffffc;
1437 }
1438
1439 break;
1440 case STATE_SRC_BINDLESS: {
1441 const unsigned base_reg =
1442 stage == MESA_SHADER_COMPUTE ?
1443 regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR") :
1444 regbase("HLSQ_BINDLESS_BASE[0].ADDR");
1445
1446 if (is_64b()) {
1447 const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1448 ext_src_addr = reg_val(reg) & 0xfffffffc;
1449 ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1450 } else {
1451 const unsigned reg = base_reg + (dwords[1] >> 28);
1452 ext_src_addr = reg_val(reg) & 0xfffffffc;
1453 }
1454
1455 ext_src_addr += 4 * (dwords[1] & 0xffffff);
1456 break;
1457 }
1458 }
1459
1460 if (ext_src_addr)
1461 contents = hostptr(ext_src_addr);
1462 else
1463 contents = is_64b() ? dwords + 3 : dwords + 2;
1464
1465 if (!contents)
1466 return;
1467
1468 switch (state) {
1469 case SHADER_PROG: {
1470 const char *ext = NULL;
1471
1472 if (quiet(2))
1473 return;
1474
1475 if (options->gpu_id >= 400)
1476 num_unit *= 16;
1477 else if (options->gpu_id >= 300)
1478 num_unit *= 4;
1479
1480 /* shaders:
1481 *
1482 * note: num_unit seems to be # of instruction groups, where
1483 * an instruction group has 4 64bit instructions.
1484 */
1485 if (stage == MESA_SHADER_VERTEX) {
1486 ext = "vo3";
1487 } else if (stage == MESA_SHADER_GEOMETRY) {
1488 ext = "go3";
1489 } else if (stage == MESA_SHADER_COMPUTE) {
1490 ext = "co3";
1491 } else if (stage == MESA_SHADER_FRAGMENT){
1492 ext = "fo3";
1493 }
1494
1495 if (contents)
1496 try_disasm_a3xx(contents, num_unit * 2, level+2, stdout, options->gpu_id);
1497
1498 /* dump raw shader: */
1499 if (ext)
1500 dump_shader(ext, contents, num_unit * 2 * 4);
1501
1502 break;
1503 }
1504 case SHADER_CONST: {
1505 if (quiet(2))
1506 return;
1507
1508 /* uniforms/consts:
1509 *
1510 * note: num_unit seems to be # of pairs of dwords??
1511 */
1512
1513 if (options->gpu_id >= 400)
1514 num_unit *= 2;
1515
1516 dump_float(contents, num_unit*2, level+1);
1517 dump_hex(contents, num_unit*2, level+1);
1518
1519 break;
1520 }
1521 case TEX_MIPADDR: {
1522 uint32_t *addrs = contents;
1523
1524 if (quiet(2))
1525 return;
1526
1527 /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1528 for (i = 0; i < num_unit; i++) {
1529 void *ptr = hostptr(addrs[i]);
1530 printf("%s%2d: %08x\n", levels[level+1], i, addrs[i]);
1531 if (options->dump_textures) {
1532 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1533 dump_hex(ptr, hostlen(addrs[i])/4, level+1);
1534 }
1535 }
1536 break;
1537 }
1538 case TEX_SAMP: {
1539 dump_tex_samp(contents, src, num_unit, level);
1540 break;
1541 }
1542 case TEX_CONST: {
1543 dump_tex_const(contents, num_unit, level);
1544 break;
1545 }
1546 case SSBO_0: {
1547 uint32_t *ssboconst = (uint32_t *)contents;
1548
1549 for (i = 0; i < num_unit; i++) {
1550 int sz = 4;
1551 if (400 <= options->gpu_id && options->gpu_id < 500) {
1552 dump_domain(ssboconst, 4, level+2, "A4XX_SSBO_0");
1553 } else if (500 <= options->gpu_id && options->gpu_id < 600) {
1554 dump_domain(ssboconst, 4, level+2, "A5XX_SSBO_0");
1555 } else if (600 <= options->gpu_id && options->gpu_id < 700) {
1556 sz = 16;
1557 dump_domain(ssboconst, 16, level+2, "A6XX_IBO");
1558 }
1559 dump_hex(ssboconst, sz, level+1);
1560 ssboconst += sz;
1561 }
1562 break;
1563 }
1564 case SSBO_1: {
1565 uint32_t *ssboconst = (uint32_t *)contents;
1566
1567 for (i = 0; i < num_unit; i++) {
1568 if (400 <= options->gpu_id && options->gpu_id < 500)
1569 dump_domain(ssboconst, 2, level+2, "A4XX_SSBO_1");
1570 else if (500 <= options->gpu_id && options->gpu_id < 600)
1571 dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_1");
1572 dump_hex(ssboconst, 2, level+1);
1573 ssboconst += 2;
1574 }
1575 break;
1576 }
1577 case SSBO_2: {
1578 uint32_t *ssboconst = (uint32_t *)contents;
1579
1580 for (i = 0; i < num_unit; i++) {
1581 /* TODO a4xx and a5xx might be same: */
1582 if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1583 dump_domain(ssboconst, 2, level+2, "A5XX_SSBO_2");
1584 dump_hex(ssboconst, 2, level+1);
1585 }
1586 if (options->dump_textures) {
1587 uint64_t addr = (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1588 dump_gpuaddr_size(addr, level-2, hostlen(addr) / 4, 3);
1589 }
1590 ssboconst += 2;
1591 }
1592 break;
1593 }
1594 case UBO: {
1595 uint32_t *uboconst = (uint32_t *)contents;
1596
1597 for (i = 0; i < num_unit; i++) {
1598 // TODO probably similar on a4xx..
1599 if (500 <= options->gpu_id && options->gpu_id < 600)
1600 dump_domain(uboconst, 2, level+2, "A5XX_UBO");
1601 else if (600 <= options->gpu_id && options->gpu_id < 700)
1602 dump_domain(uboconst, 2, level+2, "A6XX_UBO");
1603 dump_hex(uboconst, 2, level+1);
1604 uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1605 }
1606 break;
1607 }
1608 case UNKNOWN_DWORDS: {
1609 if (quiet(2))
1610 return;
1611 dump_hex(contents, num_unit, level+1);
1612 break;
1613 }
1614 case UNKNOWN_2DWORDS: {
1615 if (quiet(2))
1616 return;
1617 dump_hex(contents, num_unit * 2, level+1);
1618 break;
1619 }
1620 case UNKNOWN_4DWORDS: {
1621 if (quiet(2))
1622 return;
1623 dump_hex(contents, num_unit * 4, level+1);
1624 break;
1625 }
1626 default:
1627 if (quiet(2))
1628 return;
1629 /* hmm.. */
1630 dump_hex(contents, num_unit, level+1);
1631 break;
1632 }
1633 }
1634
1635 static void
cp_set_bin(uint32_t * dwords,uint32_t sizedwords,int level)1636 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1637 {
1638 bin_x1 = dwords[1] & 0xffff;
1639 bin_y1 = dwords[1] >> 16;
1640 bin_x2 = dwords[2] & 0xffff;
1641 bin_y2 = dwords[2] >> 16;
1642 }
1643
1644 static void
dump_a2xx_tex_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1645 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
1646 {
1647 uint32_t w, h, p;
1648 uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1649 uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1650 static const char *filter[] = {
1651 "point", "bilinear", "bicubic",
1652 };
1653 static const char *clamp[] = {
1654 "wrap", "mirror", "clamp-last-texel",
1655 };
1656 static const char swiznames[] = "xyzw01??";
1657
1658 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1659
1660 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1661 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1662 */
1663 p = (dwords[0] >> 22) << 5;
1664 clamp_x = (dwords[0] >> 10) & 0x3;
1665 clamp_y = (dwords[0] >> 13) & 0x3;
1666 clamp_z = (dwords[0] >> 16) & 0x3;
1667
1668 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1669 * NearestClamp=1:OGL Mode
1670 */
1671 parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1672
1673 /* Width, Height, EndianSwap=0:None */
1674 w = (dwords[2] & 0x1fff) + 1;
1675 h = ((dwords[2] >> 13) & 0x1fff) + 1;
1676
1677 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1678 * Mip=2:BaseMap
1679 */
1680 mag = (dwords[3] >> 19) & 0x3;
1681 min = (dwords[3] >> 21) & 0x3;
1682 swiz = (dwords[3] >> 1) & 0xfff;
1683
1684 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1685 * Dim3d=0
1686 */
1687 // XXX
1688
1689 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1690 * Dim=1:2d, MipPacking=0
1691 */
1692 parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1693
1694 printf("%sset texture const %04x\n", levels[level], val);
1695 printf("%sclamp x/y/z: %s/%s/%s\n", levels[level+1],
1696 clamp[clamp_x], clamp[clamp_y], clamp[clamp_z]);
1697 printf("%sfilter min/mag: %s/%s\n", levels[level+1], filter[min], filter[mag]);
1698 printf("%sswizzle: %c%c%c%c\n", levels[level+1],
1699 swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1700 swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1701 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1702 levels[level+1], gpuaddr, flags, w, h, p,
1703 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1704 printf("%smipaddr=%08x (flags=%03x)\n", levels[level+1],
1705 mip_gpuaddr, mip_flags);
1706 }
1707
1708 static void
dump_a2xx_shader_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1709 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val, int level)
1710 {
1711 int i;
1712 printf("%sset shader const %04x\n", levels[level], val);
1713 for (i = 0; i < sizedwords; ) {
1714 uint32_t gpuaddr, flags;
1715 parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1716 void *addr = hostptr(gpuaddr);
1717 if (addr) {
1718 const char * fmt =
1719 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1720 uint32_t size = dwords[i++];
1721 printf("%saddr=%08x, size=%d, format=%s\n", levels[level+1],
1722 gpuaddr, size, fmt);
1723 // TODO maybe dump these as bytes instead of dwords?
1724 size = (size + 3) / 4; // for now convert to dwords
1725 dump_hex(addr, min(size, 64), level + 1);
1726 if (size > min(size, 64))
1727 printf("%s\t\t...\n", levels[level+1]);
1728 dump_float(addr, min(size, 64), level + 1);
1729 if (size > min(size, 64))
1730 printf("%s\t\t...\n", levels[level+1]);
1731 }
1732 }
1733 }
1734
1735 static void
cp_set_const(uint32_t * dwords,uint32_t sizedwords,int level)1736 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1737 {
1738 uint32_t val = dwords[0] & 0xffff;
1739 switch((dwords[0] >> 16) & 0xf) {
1740 case 0x0:
1741 dump_float((float *)(dwords+1), sizedwords-1, level+1);
1742 break;
1743 case 0x1:
1744 /* need to figure out how const space is partitioned between
1745 * attributes, textures, etc..
1746 */
1747 if (val < 0x78) {
1748 dump_a2xx_tex_const(dwords+1, sizedwords-1, val, level);
1749 } else {
1750 dump_a2xx_shader_const(dwords+1, sizedwords-1, val, level);
1751 }
1752 break;
1753 case 0x2:
1754 printf("%sset bool const %04x\n", levels[level], val);
1755 break;
1756 case 0x3:
1757 printf("%sset loop const %04x\n", levels[level], val);
1758 break;
1759 case 0x4:
1760 val += 0x2000;
1761 if (dwords[0] & 0x80000000) {
1762 uint32_t srcreg = dwords[1];
1763 uint32_t dstval = dwords[2];
1764
1765 /* TODO: not sure what happens w/ payload != 2.. */
1766 assert(sizedwords == 3);
1767 assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1768
1769 /* note: rnn_regname uses a static buf so we can't do
1770 * two regname() calls for one printf..
1771 */
1772 printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1773 printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1774
1775 dstval += type0_reg_vals[srcreg];
1776
1777 dump_registers(val, &dstval, 1, level+1);
1778 } else {
1779 dump_registers(val, dwords+1, sizedwords-1, level+1);
1780 }
1781 break;
1782 }
1783 }
1784
1785 static void dump_register_summary(int level);
1786
1787 static void
cp_event_write(uint32_t * dwords,uint32_t sizedwords,int level)1788 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1789 {
1790 const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1791 printl(2, "%sevent %s\n", levels[level], name);
1792
1793 if (name && (options->gpu_id > 500)) {
1794 char eventname[64];
1795 snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1796 if (!strcmp(name, "BLIT")) {
1797 do_query(eventname, 0);
1798 print_mode(level);
1799 dump_register_summary(level);
1800 }
1801 }
1802 }
1803
1804 static void
dump_register_summary(int level)1805 dump_register_summary(int level)
1806 {
1807 uint32_t i;
1808 bool saved_summary = summary;
1809 summary = false;
1810
1811 in_summary = true;
1812
1813 /* dump current state of registers: */
1814 printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1815 for (i = 0; i < regcnt(); i++) {
1816 uint32_t regbase = i;
1817 uint32_t lastval = reg_val(regbase);
1818 /* skip registers that haven't been updated since last draw/blit: */
1819 if (!(options->allregs || reg_rewritten(regbase)))
1820 continue;
1821 if (!reg_written(regbase))
1822 continue;
1823 if (lastval != lastvals[regbase]) {
1824 printl(2, "!");
1825 lastvals[regbase] = lastval;
1826 } else {
1827 printl(2, " ");
1828 }
1829 if (reg_rewritten(regbase)) {
1830 printl(2, "+");
1831 } else {
1832 printl(2, " ");
1833 }
1834 printl(2, "\t%08x", lastval);
1835 if (!quiet(2)) {
1836 dump_register(regbase, lastval, level);
1837 }
1838 }
1839
1840 clear_rewritten();
1841
1842 in_summary = false;
1843
1844 draw_count++;
1845 summary = saved_summary;
1846 }
1847
1848 static uint32_t
draw_indx_common(uint32_t * dwords,int level)1849 draw_indx_common(uint32_t *dwords, int level)
1850 {
1851 uint32_t prim_type = dwords[1] & 0x1f;
1852 uint32_t source_select = (dwords[1] >> 6) & 0x3;
1853 uint32_t num_indices = dwords[2];
1854 const char *primtype;
1855
1856 primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1857
1858 do_query(primtype, num_indices);
1859
1860 printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
1861 printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype,
1862 prim_type);
1863 printl(2, "%ssource_select: %s (%d)\n", levels[level],
1864 rnn_enumname(rnn, "pc_di_src_sel", source_select),
1865 source_select);
1866 printl(2, "%snum_indices: %d\n", levels[level], num_indices);
1867
1868 vertices += num_indices;
1869
1870 draws[ib]++;
1871
1872 return num_indices;
1873 }
1874
1875 enum pc_di_index_size {
1876 INDEX_SIZE_IGN = 0,
1877 INDEX_SIZE_16_BIT = 0,
1878 INDEX_SIZE_32_BIT = 1,
1879 INDEX_SIZE_8_BIT = 2,
1880 INDEX_SIZE_INVALID = 0,
1881 };
1882
1883 static void
cp_draw_indx(uint32_t * dwords,uint32_t sizedwords,int level)1884 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1885 {
1886 uint32_t num_indices = draw_indx_common(dwords, level);
1887
1888 assert(!is_64b());
1889
1890 /* if we have an index buffer, dump that: */
1891 if (sizedwords == 5) {
1892 void *ptr = hostptr(dwords[3]);
1893 printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
1894 printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
1895 if (ptr) {
1896 enum pc_di_index_size size =
1897 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1898 if (!quiet(2)) {
1899 int i;
1900 printf("%sidxs: ", levels[level]);
1901 if (size == INDEX_SIZE_8_BIT) {
1902 uint8_t *idx = ptr;
1903 for (i = 0; i < dwords[4]; i++)
1904 printf(" %u", idx[i]);
1905 } else if (size == INDEX_SIZE_16_BIT) {
1906 uint16_t *idx = ptr;
1907 for (i = 0; i < dwords[4]/2; i++)
1908 printf(" %u", idx[i]);
1909 } else if (size == INDEX_SIZE_32_BIT) {
1910 uint32_t *idx = ptr;
1911 for (i = 0; i < dwords[4]/4; i++)
1912 printf(" %u", idx[i]);
1913 }
1914 printf("\n");
1915 dump_hex(ptr, dwords[4]/4, level+1);
1916 }
1917 }
1918 }
1919
1920 /* don't bother dumping registers for the dummy draw_indx's.. */
1921 if (num_indices > 0)
1922 dump_register_summary(level);
1923
1924 needs_wfi = true;
1925 }
1926
1927 static void
cp_draw_indx_2(uint32_t * dwords,uint32_t sizedwords,int level)1928 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
1929 {
1930 uint32_t num_indices = draw_indx_common(dwords, level);
1931 enum pc_di_index_size size =
1932 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1933 void *ptr = &dwords[3];
1934 int sz = 0;
1935
1936 assert(!is_64b());
1937
1938 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1939 if (!quiet(2)) {
1940 int i;
1941 printf("%sidxs: ", levels[level]);
1942 if (size == INDEX_SIZE_8_BIT) {
1943 uint8_t *idx = ptr;
1944 for (i = 0; i < num_indices; i++)
1945 printf(" %u", idx[i]);
1946 sz = num_indices;
1947 } else if (size == INDEX_SIZE_16_BIT) {
1948 uint16_t *idx = ptr;
1949 for (i = 0; i < num_indices; i++)
1950 printf(" %u", idx[i]);
1951 sz = num_indices * 2;
1952 } else if (size == INDEX_SIZE_32_BIT) {
1953 uint32_t *idx = ptr;
1954 for (i = 0; i < num_indices; i++)
1955 printf(" %u", idx[i]);
1956 sz = num_indices * 4;
1957 }
1958 printf("\n");
1959 dump_hex(ptr, sz / 4, level+1);
1960 }
1961
1962 /* don't bother dumping registers for the dummy draw_indx's.. */
1963 if (num_indices > 0)
1964 dump_register_summary(level);
1965 }
1966
1967 static void
cp_draw_indx_offset(uint32_t * dwords,uint32_t sizedwords,int level)1968 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
1969 {
1970 uint32_t num_indices = dwords[2];
1971 uint32_t prim_type = dwords[0] & 0x1f;
1972
1973 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
1974 print_mode(level);
1975
1976 /* don't bother dumping registers for the dummy draw_indx's.. */
1977 if (num_indices > 0)
1978 dump_register_summary(level);
1979 }
1980
1981 static void
cp_draw_indx_indirect(uint32_t * dwords,uint32_t sizedwords,int level)1982 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
1983 {
1984 uint32_t prim_type = dwords[0] & 0x1f;
1985 uint64_t addr;
1986
1987 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
1988 print_mode(level);
1989
1990 if (is_64b())
1991 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
1992 else
1993 addr = dwords[1];
1994 dump_gpuaddr_size(addr, level, 0x10, 2);
1995
1996 if (is_64b())
1997 addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
1998 else
1999 addr = dwords[3];
2000 dump_gpuaddr_size(addr, level, 0x10, 2);
2001
2002 dump_register_summary(level);
2003 }
2004
2005 static void
cp_draw_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2006 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2007 {
2008 uint32_t prim_type = dwords[0] & 0x1f;
2009 uint64_t addr;
2010
2011 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2012 print_mode(level);
2013
2014 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2015 dump_gpuaddr_size(addr, level, 0x10, 2);
2016
2017 dump_register_summary(level);
2018 }
2019
2020 static void
cp_draw_indirect_multi(uint32_t * dwords,uint32_t sizedwords,int level)2021 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2022 {
2023 uint32_t prim_type = dwords[0] & 0x1f;
2024 uint32_t count = dwords[2];
2025
2026 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2027 print_mode(level);
2028
2029 struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2030 uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2031 uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2032 uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2033
2034 if (count_dword) {
2035 uint64_t count_addr = ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2036 uint32_t *buf = hostptr(count_addr);
2037
2038 /* Don't print more draws than this if we don't know the indirect
2039 * count. It's possible the user will give ~0 or some other large
2040 * value, expecting the GPU to fill in the draw count, and we don't
2041 * want to print a gazillion draws in that case:
2042 */
2043 const uint32_t max_draw_count = 0x100;
2044
2045 /* Assume the indirect count is garbage if it's larger than this
2046 * (quite large) value or 0. Hopefully this catches most cases.
2047 */
2048 const uint32_t max_indirect_draw_count = 0x10000;
2049
2050 if (buf) {
2051 printf("%sindirect count: %u\n", levels[level], *buf);
2052 if (*buf == 0 || *buf > max_indirect_draw_count) {
2053 /* garbage value */
2054 count = min(count, max_draw_count);
2055 } else {
2056 /* not garbage */
2057 count = min(count, *buf);
2058 }
2059 } else {
2060 count = min(count, max_draw_count);
2061 }
2062 }
2063
2064 if (addr_dword && stride_dword) {
2065 uint64_t addr = ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2066 uint32_t stride = dwords[stride_dword];
2067
2068 for (unsigned i = 0; i < count; i++, addr += stride) {
2069 printf("%sdraw %d:\n", levels[level], i);
2070 dump_gpuaddr_size(addr, level, 0x10, 2);
2071 }
2072 }
2073
2074 dump_register_summary(level);
2075 }
2076
2077 static void
cp_run_cl(uint32_t * dwords,uint32_t sizedwords,int level)2078 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2079 {
2080 do_query("COMPUTE", 1);
2081 dump_register_summary(level);
2082 }
2083
2084 static void
cp_nop(uint32_t * dwords,uint32_t sizedwords,int level)2085 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2086 {
2087 const char *buf = (void *)dwords;
2088 int i;
2089
2090 if (quiet(3))
2091 return;
2092
2093 // blob doesn't use CP_NOP for string_marker but it does
2094 // use it for things that end up looking like, but aren't
2095 // ascii chars:
2096 if (!options->decode_markers)
2097 return;
2098
2099 for (i = 0; i < 4 * sizedwords; i++) {
2100 if (buf[i] == '\0')
2101 break;
2102 if (isascii(buf[i]))
2103 printf("%c", buf[i]);
2104 }
2105 printf("\n");
2106 }
2107
2108 static void
cp_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2109 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2110 {
2111 /* traverse indirect buffers */
2112 uint64_t ibaddr;
2113 uint32_t ibsize;
2114 uint32_t *ptr = NULL;
2115
2116 if (is_64b()) {
2117 /* a5xx+.. high 32b of gpu addr, then size: */
2118 ibaddr = dwords[0];
2119 ibaddr |= ((uint64_t)dwords[1]) << 32;
2120 ibsize = dwords[2];
2121 } else {
2122 ibaddr = dwords[0];
2123 ibsize = dwords[1];
2124 }
2125
2126 if (!quiet(3)) {
2127 if (is_64b()) {
2128 printf("%sibaddr:%016"PRIx64"\n", levels[level], ibaddr);
2129 } else {
2130 printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2131 }
2132 printf("%sibsize:%08x\n", levels[level], ibsize);
2133 }
2134
2135 if (options->once && has_dumped(ibaddr, enable_mask))
2136 return;
2137
2138 /* 'query-compare' mode implies 'once' mode, although we need only to
2139 * process the cmdstream for *any* enable_mask mode, since we are
2140 * comparing binning vs draw reg values at the same time, ie. it is
2141 * not useful to process the same draw in both binning and draw pass.
2142 */
2143 if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2144 return;
2145
2146 /* map gpuaddr back to hostptr: */
2147 ptr = hostptr(ibaddr);
2148
2149 if (ptr) {
2150 /* If the GPU hung within the target IB, the trigger point will be
2151 * just after the current CP_INDIRECT_BUFFER. Because the IB is
2152 * executed but never returns. Account for this by checking if
2153 * the IB returned:
2154 */
2155 highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
2156
2157 ib++;
2158 ibs[ib].base = ibaddr;
2159 ibs[ib].size = ibsize;
2160
2161 dump_commands(ptr, ibsize, level);
2162 ib--;
2163 } else {
2164 fprintf(stderr, "could not find: %016"PRIx64" (%d)\n", ibaddr, ibsize);
2165 }
2166 }
2167
2168 static void
cp_wfi(uint32_t * dwords,uint32_t sizedwords,int level)2169 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2170 {
2171 needs_wfi = false;
2172 }
2173
2174 static void
cp_mem_write(uint32_t * dwords,uint32_t sizedwords,int level)2175 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2176 {
2177 if (quiet(2))
2178 return;
2179
2180 if (is_64b()) {
2181 uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2182 printf("%sgpuaddr:%016"PRIx64"\n", levels[level], gpuaddr);
2183 dump_hex(&dwords[2], sizedwords-2, level+1);
2184
2185 if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2186 dump_commands(&dwords[2], sizedwords-2, level+1);
2187 } else {
2188 uint32_t gpuaddr = dwords[0];
2189 printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2190 dump_float((float *)&dwords[1], sizedwords-1, level+1);
2191 }
2192 }
2193
2194 static void
cp_rmw(uint32_t * dwords,uint32_t sizedwords,int level)2195 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2196 {
2197 uint32_t val = dwords[0] & 0xffff;
2198 uint32_t and = dwords[1];
2199 uint32_t or = dwords[2];
2200 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1), and, or);
2201 if (needs_wfi)
2202 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1), and, or);
2203 reg_set(val, (reg_val(val) & and) | or);
2204 }
2205
2206 static void
cp_reg_mem(uint32_t * dwords,uint32_t sizedwords,int level)2207 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2208 {
2209 uint32_t val = dwords[0] & 0xffff;
2210 printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2211
2212 if (quiet(2))
2213 return;
2214
2215 uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2216 printf("%sgpuaddr:%016"PRIx64"\n", levels[level], gpuaddr);
2217 void *ptr = hostptr(gpuaddr);
2218 if (ptr) {
2219 uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2220 dump_hex(ptr, cnt, level + 1);
2221 }
2222 }
2223
2224 struct draw_state {
2225 uint16_t enable_mask;
2226 uint16_t flags;
2227 uint32_t count;
2228 uint64_t addr;
2229 };
2230
2231 struct draw_state state[32];
2232
2233 #define FLAG_DIRTY 0x1
2234 #define FLAG_DISABLE 0x2
2235 #define FLAG_DISABLE_ALL_GROUPS 0x4
2236 #define FLAG_LOAD_IMMED 0x8
2237
2238 static int draw_mode;
2239
2240 static void
disable_group(unsigned group_id)2241 disable_group(unsigned group_id)
2242 {
2243 struct draw_state *ds = &state[group_id];
2244 memset(ds, 0, sizeof(*ds));
2245 }
2246
2247 static void
disable_all_groups(void)2248 disable_all_groups(void)
2249 {
2250 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2251 disable_group(i);
2252 }
2253
2254 static void
load_group(unsigned group_id,int level)2255 load_group(unsigned group_id, int level)
2256 {
2257 struct draw_state *ds = &state[group_id];
2258
2259 if (!ds->count)
2260 return;
2261
2262 printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2263 printl(2, "%scount: %d\n", levels[level], ds->count);
2264 printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2265 printl(2, "%sflags: %x\n", levels[level], ds->flags);
2266
2267 if (options->gpu_id >= 600) {
2268 printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2269
2270 if (!(ds->enable_mask & enable_mask)) {
2271 printl(2, "%s\tskipped!\n\n", levels[level]);
2272 return;
2273 }
2274 }
2275
2276 void *ptr = hostptr(ds->addr);
2277 if (ptr) {
2278 if (!quiet(2))
2279 dump_hex(ptr, ds->count, level+1);
2280
2281 ib++;
2282 dump_commands(ptr, ds->count, level+1);
2283 ib--;
2284 }
2285 }
2286
2287 static void
load_all_groups(int level)2288 load_all_groups(int level)
2289 {
2290 /* sanity check, we should never recursively hit recursion here, and if
2291 * we do bad things happen:
2292 */
2293 static bool loading_groups = false;
2294 if (loading_groups) {
2295 printf("ERROR: nothing in draw state should trigger recursively loading groups!\n");
2296 return;
2297 }
2298 loading_groups = true;
2299 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2300 load_group(i, level);
2301 loading_groups = false;
2302
2303 /* in 'query-compare' mode, defer disabling all groups until we have a
2304 * chance to process the query:
2305 */
2306 if (!options->query_compare)
2307 disable_all_groups();
2308 }
2309
2310 static void
cp_set_draw_state(uint32_t * dwords,uint32_t sizedwords,int level)2311 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2312 {
2313 uint32_t i;
2314
2315 for (i = 0; i < sizedwords; ) {
2316 struct draw_state *ds;
2317 uint32_t count = dwords[i] & 0xffff;
2318 uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2319 uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2320 uint32_t flags = (dwords[i] >> 16) & 0xf;
2321 uint64_t addr;
2322
2323 if (is_64b()) {
2324 addr = dwords[i + 1];
2325 addr |= ((uint64_t)dwords[i + 2]) << 32;
2326 i += 3;
2327 } else {
2328 addr = dwords[i + 1];
2329 i += 2;
2330 }
2331
2332 if (flags & FLAG_DISABLE_ALL_GROUPS) {
2333 disable_all_groups();
2334 continue;
2335 }
2336
2337 if (flags & FLAG_DISABLE) {
2338 disable_group(group_id);
2339 continue;
2340 }
2341
2342 assert(group_id < ARRAY_SIZE(state));
2343 disable_group(group_id);
2344
2345 ds = &state[group_id];
2346
2347 ds->enable_mask = enable_mask;
2348 ds->flags = flags;
2349 ds->count = count;
2350 ds->addr = addr;
2351
2352 if (flags & FLAG_LOAD_IMMED) {
2353 load_group(group_id, level);
2354 disable_group(group_id);
2355 }
2356 }
2357 }
2358
2359 static void
cp_set_mode(uint32_t * dwords,uint32_t sizedwords,int level)2360 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2361 {
2362 draw_mode = dwords[0];
2363 }
2364
2365 /* execute compute shader */
2366 static void
cp_exec_cs(uint32_t * dwords,uint32_t sizedwords,int level)2367 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2368 {
2369 do_query("compute", 0);
2370 dump_register_summary(level);
2371 }
2372
2373 static void
cp_exec_cs_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2374 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2375 {
2376 uint64_t addr;
2377
2378 if (is_64b()) {
2379 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2380 } else {
2381 addr = dwords[1];
2382 }
2383
2384 printl(3, "%saddr: %016llx\n", levels[level], addr);
2385 dump_gpuaddr_size(addr, level, 0x10, 2);
2386
2387 do_query("compute", 0);
2388 dump_register_summary(level);
2389 }
2390
2391 static void
cp_set_marker(uint32_t * dwords,uint32_t sizedwords,int level)2392 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2393 {
2394 render_mode = rnn_enumname(rnn, "a6xx_render_mode", dwords[0] & 0xf);
2395
2396 if (!strcmp(render_mode, "RM6_BINNING")) {
2397 enable_mask = MODE_BINNING;
2398 } else if (!strcmp(render_mode, "RM6_GMEM")) {
2399 enable_mask = MODE_GMEM;
2400 } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2401 enable_mask = MODE_BYPASS;
2402 }
2403 }
2404
2405 static void
cp_set_render_mode(uint32_t * dwords,uint32_t sizedwords,int level)2406 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2407 {
2408 uint64_t addr;
2409 uint32_t *ptr, len;
2410
2411 assert(is_64b());
2412
2413 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2414 * not sure if this can come in different sizes.
2415 *
2416 * First ptr doesn't seem to be cmdstream, second one does.
2417 *
2418 * Comment from downstream kernel:
2419 *
2420 * SRM -- set render mode (ex binning, direct render etc)
2421 * SRM is set by UMD usually at start of IB to tell CP the type of
2422 * preemption.
2423 * KMD needs to set SRM to NULL to indicate CP that rendering is
2424 * done by IB.
2425 * ------------------------------------------------------------------
2426 *
2427 * Seems to always be one of these two:
2428 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000 00000000
2429 * 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d 001c2000 00000000
2430 *
2431 */
2432
2433 assert(options->gpu_id >= 500);
2434
2435 render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2436
2437 if (sizedwords == 1)
2438 return;
2439
2440 addr = dwords[1];
2441 addr |= ((uint64_t)dwords[2]) << 32;
2442
2443 mode = dwords[3];
2444
2445 dump_gpuaddr(addr, level+1);
2446
2447 if (sizedwords == 5)
2448 return;
2449
2450 assert(sizedwords == 8);
2451
2452 len = dwords[5];
2453 addr = dwords[6];
2454 addr |= ((uint64_t)dwords[7]) << 32;
2455
2456 printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2457 printl(3, "%slen: 0x%x\n", levels[level], len);
2458
2459 ptr = hostptr(addr);
2460
2461 if (ptr) {
2462 if (!quiet(2)) {
2463 ib++;
2464 dump_commands(ptr, len, level+1);
2465 ib--;
2466 dump_hex(ptr, len, level+1);
2467 }
2468 }
2469 }
2470
2471 static void
cp_compute_checkpoint(uint32_t * dwords,uint32_t sizedwords,int level)2472 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2473 {
2474 uint64_t addr;
2475 uint32_t *ptr, len;
2476
2477 assert(is_64b());
2478 assert(options->gpu_id >= 500);
2479
2480 assert(sizedwords == 8);
2481
2482 addr = dwords[5];
2483 addr |= ((uint64_t)dwords[6]) << 32;
2484 len = dwords[7];
2485
2486 printl(3, "%saddr: 0x%016"PRIx64"\n", levels[level], addr);
2487 printl(3, "%slen: 0x%x\n", levels[level], len);
2488
2489 ptr = hostptr(addr);
2490
2491 if (ptr) {
2492 if (!quiet(2)) {
2493 ib++;
2494 dump_commands(ptr, len, level+1);
2495 ib--;
2496 dump_hex(ptr, len, level+1);
2497 }
2498 }
2499 }
2500
2501 static void
cp_blit(uint32_t * dwords,uint32_t sizedwords,int level)2502 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2503 {
2504 do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2505 print_mode(level);
2506 dump_register_summary(level);
2507 }
2508
2509 static void
cp_context_reg_bunch(uint32_t * dwords,uint32_t sizedwords,int level)2510 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2511 {
2512 int i;
2513
2514 /* NOTE: seems to write same reg multiple times.. not sure if different parts of
2515 * these are triggered by the FLUSH_SO_n events?? (if that is what they actually
2516 * are?)
2517 */
2518 bool saved_summary = summary;
2519 summary = false;
2520
2521 for (i = 0; i < sizedwords; i += 2) {
2522 dump_register(dwords[i+0], dwords[i+1], level+1);
2523 reg_set(dwords[i+0], dwords[i+1]);
2524 }
2525
2526 summary = saved_summary;
2527 }
2528
2529 static void
cp_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)2530 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2531 {
2532 uint32_t reg = dwords[1] & 0xffff;
2533
2534 dump_register(reg, dwords[2], level+1);
2535 reg_set(reg, dwords[2]);
2536 }
2537
2538 static void
cp_set_ctxswitch_ib(uint32_t * dwords,uint32_t sizedwords,int level)2539 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2540 {
2541 uint64_t addr;
2542 uint32_t size = dwords[2] & 0xffff;
2543 void *ptr;
2544
2545 addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2546
2547 printf("addr=%"PRIx64"\n", addr);
2548 ptr = hostptr(addr);
2549 if (ptr) {
2550 dump_commands(ptr, size, level+1);
2551 }
2552 }
2553
2554 static void
cp_skip_ib2_enable_global(uint32_t * dwords,uint32_t sizedwords,int level)2555 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2556 {
2557 skip_ib2_enable_global = dwords[0];
2558 }
2559
2560 static void
cp_skip_ib2_enable_local(uint32_t * dwords,uint32_t sizedwords,int level)2561 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2562 {
2563 skip_ib2_enable_local = dwords[0];
2564 }
2565
2566 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2567 static const struct type3_op {
2568 const char *name;
2569 void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2570 struct {
2571 bool load_all_groups;
2572 } options;
2573 } type3_op[] = {
2574 CP(NOP, cp_nop),
2575 CP(INDIRECT_BUFFER, cp_indirect),
2576 CP(INDIRECT_BUFFER_PFD, cp_indirect),
2577 CP(WAIT_FOR_IDLE, cp_wfi),
2578 CP(REG_RMW, cp_rmw),
2579 CP(REG_TO_MEM, cp_reg_mem),
2580 CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2581 CP(MEM_WRITE, cp_mem_write),
2582 CP(EVENT_WRITE, cp_event_write),
2583 CP(RUN_OPENCL, cp_run_cl),
2584 CP(DRAW_INDX, cp_draw_indx, {.load_all_groups=true}),
2585 CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups=true}),
2586 CP(SET_CONSTANT, cp_set_const),
2587 CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2588 CP(WIDE_REG_WRITE, cp_wide_reg_write),
2589
2590 /* for a3xx */
2591 CP(LOAD_STATE, cp_load_state),
2592 CP(SET_BIN, cp_set_bin),
2593
2594 /* for a4xx */
2595 CP(LOAD_STATE4, cp_load_state),
2596 CP(SET_DRAW_STATE, cp_set_draw_state),
2597 CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups=true}),
2598 CP(EXEC_CS, cp_exec_cs, {.load_all_groups=true}),
2599 CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups=true}),
2600
2601 /* for a5xx */
2602 CP(SET_RENDER_MODE, cp_set_render_mode),
2603 CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2604 CP(BLIT, cp_blit),
2605 CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2606 CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups=true}),
2607 CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups=true}),
2608 CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups=true}),
2609 CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2610 CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2611
2612 /* for a6xx */
2613 CP(LOAD_STATE6_GEOM, cp_load_state),
2614 CP(LOAD_STATE6_FRAG, cp_load_state),
2615 CP(LOAD_STATE6, cp_load_state),
2616 CP(SET_MODE, cp_set_mode),
2617 CP(SET_MARKER, cp_set_marker),
2618 CP(REG_WRITE, cp_reg_write),
2619
2620 CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2621 };
2622
2623 static void
noop_fxn(uint32_t * dwords,uint32_t sizedwords,int level)2624 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2625 {
2626 }
2627
2628 static const struct type3_op *
get_type3_op(unsigned opc)2629 get_type3_op(unsigned opc)
2630 {
2631 static const struct type3_op dummy_op = {
2632 .fxn = noop_fxn,
2633 };
2634 const char *name = pktname(opc);
2635
2636 if (!name)
2637 return &dummy_op;
2638
2639 for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2640 if (!strcmp(name, type3_op[i].name))
2641 return &type3_op[i];
2642
2643 return &dummy_op;
2644 }
2645
2646 void
dump_commands(uint32_t * dwords,uint32_t sizedwords,int level)2647 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2648 {
2649 int dwords_left = sizedwords;
2650 uint32_t count = 0; /* dword count including packet header */
2651 uint32_t val;
2652
2653 // assert(dwords);
2654 if (!dwords) {
2655 printf("NULL cmd buffer!\n");
2656 return;
2657 }
2658
2659 draws[ib] = 0;
2660
2661 while (dwords_left > 0) {
2662
2663 current_draw_count = draw_count;
2664
2665 /* hack, this looks like a -1 underflow, in some versions
2666 * when it tries to write zero registers via pkt0
2667 */
2668 // if ((dwords[0] >> 16) == 0xffff)
2669 // goto skip;
2670
2671 if (pkt_is_type0(dwords[0])) {
2672 printl(3, "t0");
2673 count = type0_pkt_size(dwords[0]) + 1;
2674 val = type0_pkt_offset(dwords[0]);
2675 assert(val < regcnt());
2676 printl(3, "%swrite %s%s (%04x)\n", levels[level+1], regname(val, 1),
2677 (dwords[0] & 0x8000) ? " (same register)" : "", val);
2678 dump_registers(val, dwords+1, count-1, level+2);
2679 if (!quiet(3))
2680 dump_hex(dwords, count, level+1);
2681 } else if (pkt_is_type4(dwords[0])) {
2682 /* basically the same(ish) as type0 prior to a5xx */
2683 printl(3, "t4");
2684 count = type4_pkt_size(dwords[0]) + 1;
2685 val = type4_pkt_offset(dwords[0]);
2686 assert(val < regcnt());
2687 printl(3, "%swrite %s (%04x)\n", levels[level+1], regname(val, 1), val);
2688 dump_registers(val, dwords+1, count-1, level+2);
2689 if (!quiet(3))
2690 dump_hex(dwords, count, level+1);
2691 #if 0
2692 } else if (pkt_is_type1(dwords[0])) {
2693 printl(3, "t1");
2694 count = 3;
2695 val = dwords[0] & 0xfff;
2696 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2697 dump_registers(val, dwords+1, 1, level+2);
2698 val = (dwords[0] >> 12) & 0xfff;
2699 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2700 dump_registers(val, dwords+2, 1, level+2);
2701 if (!quiet(3))
2702 dump_hex(dwords, count, level+1);
2703 } else if (pkt_is_type2(dwords[0])) {
2704 printl(3, "t2");
2705 printf("%sNOP\n", levels[level+1]);
2706 count = 1;
2707 if (!quiet(3))
2708 dump_hex(dwords, count, level+1);
2709 #endif
2710 } else if (pkt_is_type3(dwords[0])) {
2711 count = type3_pkt_size(dwords[0]) + 1;
2712 val = cp_type3_opcode(dwords[0]);
2713 const struct type3_op *op = get_type3_op(val);
2714 if (op->options.load_all_groups)
2715 load_all_groups(level+1);
2716 printl(3, "t3");
2717 const char *name = pktname(val);
2718 if (!quiet(2)) {
2719 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
2720 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
2721 val, count, (dwords[0] & 0x1) ? " (predicated)" : "");
2722 }
2723 if (name)
2724 dump_domain(dwords+1, count-1, level+2, name);
2725 op->fxn(dwords+1, count-1, level+1);
2726 if (!quiet(2))
2727 dump_hex(dwords, count, level+1);
2728 } else if (pkt_is_type7(dwords[0])) {
2729 count = type7_pkt_size(dwords[0]) + 1;
2730 val = cp_type7_opcode(dwords[0]);
2731 const struct type3_op *op = get_type3_op(val);
2732 if (op->options.load_all_groups)
2733 load_all_groups(level+1);
2734 printl(3, "t7");
2735 const char *name = pktname(val);
2736 if (!quiet(2)) {
2737 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
2738 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset,
2739 val, count);
2740 }
2741 if (name) {
2742 /* special hack for two packets that decode the same way
2743 * on a6xx:
2744 */
2745 if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
2746 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
2747 name = "CP_LOAD_STATE6";
2748 dump_domain(dwords+1, count-1, level+2, name);
2749 }
2750 op->fxn(dwords+1, count-1, level+1);
2751 if (!quiet(2))
2752 dump_hex(dwords, count, level+1);
2753 } else if (pkt_is_type2(dwords[0])) {
2754 printl(3, "t2");
2755 printl(3, "%snop\n", levels[level+1]);
2756 } else {
2757 /* for 5xx+ we can do a passable job of looking for start of next valid packet: */
2758 if (options->gpu_id >= 500) {
2759 while (dwords_left > 0) {
2760 if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
2761 break;
2762 printf("bad type! %08x\n", dwords[0]);
2763 dwords++;
2764 dwords_left--;
2765 }
2766 } else {
2767 printf("bad type! %08x\n", dwords[0]);
2768 return;
2769 }
2770 }
2771
2772 dwords += count;
2773 dwords_left -= count;
2774
2775 }
2776
2777 if (dwords_left < 0)
2778 printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
2779 }
2780