1 /*
2 * Copyright 2015 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_pipe.h"
25 #include "si_compute.h"
26 #include "sid.h"
27 #include "gfx9d.h"
28 #include "sid_tables.h"
29 #include "ddebug/dd_util.h"
30 #include "util/u_dump.h"
31 #include "util/u_log.h"
32 #include "util/u_memory.h"
33 #include "ac_debug.h"
34
35 static void si_dump_bo_list(struct si_context *sctx,
36 const struct radeon_saved_cs *saved, FILE *f);
37
38 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
39
si_dump_shader(struct si_screen * sscreen,enum pipe_shader_type processor,const struct si_shader * shader,FILE * f)40 static void si_dump_shader(struct si_screen *sscreen,
41 enum pipe_shader_type processor,
42 const struct si_shader *shader, FILE *f)
43 {
44 if (shader->shader_log)
45 fwrite(shader->shader_log, shader->shader_log_size, 1, f);
46 else
47 si_shader_dump(sscreen, shader, NULL, processor, f, false);
48 }
49
50 struct si_log_chunk_shader {
51 /* The shader destroy code assumes a current context for unlinking of
52 * PM4 packets etc.
53 *
54 * While we should be able to destroy shaders without a context, doing
55 * so would happen only very rarely and be therefore likely to fail
56 * just when you're trying to debug something. Let's just remember the
57 * current context in the chunk.
58 */
59 struct si_context *ctx;
60 struct si_shader *shader;
61 enum pipe_shader_type processor;
62
63 /* For keep-alive reference counts */
64 struct si_shader_selector *sel;
65 struct si_compute *program;
66 };
67
68 static void
si_log_chunk_shader_destroy(void * data)69 si_log_chunk_shader_destroy(void *data)
70 {
71 struct si_log_chunk_shader *chunk = data;
72 si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL);
73 si_compute_reference(&chunk->program, NULL);
74 FREE(chunk);
75 }
76
77 static void
si_log_chunk_shader_print(void * data,FILE * f)78 si_log_chunk_shader_print(void *data, FILE *f)
79 {
80 struct si_log_chunk_shader *chunk = data;
81 struct si_screen *sscreen = chunk->ctx->screen;
82 si_dump_shader(sscreen, chunk->processor,
83 chunk->shader, f);
84 }
85
86 static struct u_log_chunk_type si_log_chunk_type_shader = {
87 .destroy = si_log_chunk_shader_destroy,
88 .print = si_log_chunk_shader_print,
89 };
90
si_dump_gfx_shader(struct si_context * ctx,const struct si_shader_ctx_state * state,struct u_log_context * log)91 static void si_dump_gfx_shader(struct si_context *ctx,
92 const struct si_shader_ctx_state *state,
93 struct u_log_context *log)
94 {
95 struct si_shader *current = state->current;
96
97 if (!state->cso || !current)
98 return;
99
100 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
101 chunk->ctx = ctx;
102 chunk->processor = state->cso->info.processor;
103 chunk->shader = current;
104 si_shader_selector_reference(ctx, &chunk->sel, current->selector);
105 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
106 }
107
si_dump_compute_shader(struct si_context * ctx,struct u_log_context * log)108 static void si_dump_compute_shader(struct si_context *ctx,
109 struct u_log_context *log)
110 {
111 const struct si_cs_shader_state *state = &ctx->cs_shader_state;
112
113 if (!state->program)
114 return;
115
116 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
117 chunk->ctx = ctx;
118 chunk->processor = PIPE_SHADER_COMPUTE;
119 chunk->shader = &state->program->shader;
120 si_compute_reference(&chunk->program, state->program);
121 u_log_chunk(log, &si_log_chunk_type_shader, chunk);
122 }
123
124 /**
125 * Shader compiles can be overridden with arbitrary ELF objects by setting
126 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
127 */
si_replace_shader(unsigned num,struct ac_shader_binary * binary)128 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
129 {
130 const char *p = debug_get_option_replace_shaders();
131 const char *semicolon;
132 char *copy = NULL;
133 FILE *f;
134 long filesize, nread;
135 char *buf = NULL;
136 bool replaced = false;
137
138 if (!p)
139 return false;
140
141 while (*p) {
142 unsigned long i;
143 char *endp;
144 i = strtoul(p, &endp, 0);
145
146 p = endp;
147 if (*p != ':') {
148 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
149 exit(1);
150 }
151 ++p;
152
153 if (i == num)
154 break;
155
156 p = strchr(p, ';');
157 if (!p)
158 return false;
159 ++p;
160 }
161 if (!*p)
162 return false;
163
164 semicolon = strchr(p, ';');
165 if (semicolon) {
166 p = copy = strndup(p, semicolon - p);
167 if (!copy) {
168 fprintf(stderr, "out of memory\n");
169 return false;
170 }
171 }
172
173 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
174
175 f = fopen(p, "r");
176 if (!f) {
177 perror("radeonsi: failed to open file");
178 goto out_free;
179 }
180
181 if (fseek(f, 0, SEEK_END) != 0)
182 goto file_error;
183
184 filesize = ftell(f);
185 if (filesize < 0)
186 goto file_error;
187
188 if (fseek(f, 0, SEEK_SET) != 0)
189 goto file_error;
190
191 buf = MALLOC(filesize);
192 if (!buf) {
193 fprintf(stderr, "out of memory\n");
194 goto out_close;
195 }
196
197 nread = fread(buf, 1, filesize, f);
198 if (nread != filesize)
199 goto file_error;
200
201 ac_elf_read(buf, filesize, binary);
202 replaced = true;
203
204 out_close:
205 fclose(f);
206 out_free:
207 FREE(buf);
208 free(copy);
209 return replaced;
210
211 file_error:
212 perror("radeonsi: reading shader");
213 goto out_close;
214 }
215
216 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
217 * read them, or use "aha -b -f file" to convert them to html.
218 */
219 #define COLOR_RESET "\033[0m"
220 #define COLOR_RED "\033[31m"
221 #define COLOR_GREEN "\033[1;32m"
222 #define COLOR_YELLOW "\033[1;33m"
223 #define COLOR_CYAN "\033[1;36m"
224
si_dump_mmapped_reg(struct si_context * sctx,FILE * f,unsigned offset)225 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
226 unsigned offset)
227 {
228 struct radeon_winsys *ws = sctx->b.ws;
229 uint32_t value;
230
231 if (ws->read_registers(ws, offset, 1, &value))
232 ac_dump_reg(f, sctx->b.chip_class, offset, value, ~0);
233 }
234
si_dump_debug_registers(struct si_context * sctx,FILE * f)235 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
236 {
237 if (sctx->screen->info.drm_major == 2 &&
238 sctx->screen->info.drm_minor < 42)
239 return; /* no radeon support */
240
241 fprintf(f, "Memory-mapped registers:\n");
242 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
243
244 /* No other registers can be read on DRM < 3.1.0. */
245 if (sctx->screen->info.drm_major < 3 ||
246 sctx->screen->info.drm_minor < 1) {
247 fprintf(f, "\n");
248 return;
249 }
250
251 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
252 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
253 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
254 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
255 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
256 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
257 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
258 if (sctx->b.chip_class <= VI) {
259 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
260 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
261 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
262 }
263 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
264 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
265 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
266 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
267 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
268 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
269 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
270 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
271 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
272 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
273 fprintf(f, "\n");
274 }
275
276 struct si_log_chunk_cs {
277 struct si_context *ctx;
278 struct si_saved_cs *cs;
279 bool dump_bo_list;
280 unsigned gfx_begin, gfx_end;
281 };
282
si_log_chunk_type_cs_destroy(void * data)283 static void si_log_chunk_type_cs_destroy(void *data)
284 {
285 struct si_log_chunk_cs *chunk = data;
286 si_saved_cs_reference(&chunk->cs, NULL);
287 free(chunk);
288 }
289
si_parse_current_ib(FILE * f,struct radeon_winsys_cs * cs,unsigned begin,unsigned end,int * last_trace_id,unsigned trace_id_count,const char * name,enum chip_class chip_class)290 static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
291 unsigned begin, unsigned end,
292 int *last_trace_id, unsigned trace_id_count,
293 const char *name, enum chip_class chip_class)
294 {
295 unsigned orig_end = end;
296
297 assert(begin <= end);
298
299 fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
300 name, begin);
301
302 for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
303 struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
304
305 if (begin < chunk->cdw) {
306 ac_parse_ib_chunk(f, chunk->buf + begin,
307 MIN2(end, chunk->cdw) - begin,
308 last_trace_id, trace_id_count,
309 chip_class, NULL, NULL);
310 }
311
312 if (end <= chunk->cdw)
313 return;
314
315 if (begin < chunk->cdw)
316 fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
317 name);
318
319 begin -= MIN2(begin, chunk->cdw);
320 end -= chunk->cdw;
321 }
322
323 assert(end <= cs->current.cdw);
324
325 ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
326 trace_id_count, chip_class, NULL, NULL);
327
328 fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
329 name, orig_end);
330 }
331
si_log_chunk_type_cs_print(void * data,FILE * f)332 static void si_log_chunk_type_cs_print(void *data, FILE *f)
333 {
334 struct si_log_chunk_cs *chunk = data;
335 struct si_context *ctx = chunk->ctx;
336 struct si_saved_cs *scs = chunk->cs;
337 int last_trace_id = -1;
338
339 /* We are expecting that the ddebug pipe has already
340 * waited for the context, so this buffer should be idle.
341 * If the GPU is hung, there is no point in waiting for it.
342 */
343 uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
344 NULL,
345 PIPE_TRANSFER_UNSYNCHRONIZED |
346 PIPE_TRANSFER_READ);
347 if (map)
348 last_trace_id = map[0];
349
350 if (chunk->gfx_end != chunk->gfx_begin) {
351 if (chunk->gfx_begin == 0) {
352 if (ctx->init_config)
353 ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
354 NULL, 0, "IB2: Init config", ctx->b.chip_class,
355 NULL, NULL);
356
357 if (ctx->init_config_gs_rings)
358 ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
359 ctx->init_config_gs_rings->ndw,
360 NULL, 0, "IB2: Init GS rings", ctx->b.chip_class,
361 NULL, NULL);
362 }
363
364 if (scs->flushed) {
365 ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
366 chunk->gfx_end - chunk->gfx_begin,
367 &last_trace_id, map ? 1 : 0, "IB", ctx->b.chip_class,
368 NULL, NULL);
369 } else {
370 si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
371 chunk->gfx_end, &last_trace_id, map ? 1 : 0,
372 "IB", ctx->b.chip_class);
373 }
374 }
375
376 if (chunk->dump_bo_list) {
377 fprintf(f, "Flushing. Time: ");
378 util_dump_ns(f, scs->time_flush);
379 fprintf(f, "\n\n");
380 si_dump_bo_list(ctx, &scs->gfx, f);
381 }
382 }
383
384 static const struct u_log_chunk_type si_log_chunk_type_cs = {
385 .destroy = si_log_chunk_type_cs_destroy,
386 .print = si_log_chunk_type_cs_print,
387 };
388
si_log_cs(struct si_context * ctx,struct u_log_context * log,bool dump_bo_list)389 static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
390 bool dump_bo_list)
391 {
392 assert(ctx->current_saved_cs);
393
394 struct si_saved_cs *scs = ctx->current_saved_cs;
395 unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
396
397 if (!dump_bo_list &&
398 gfx_cur == scs->gfx_last_dw)
399 return;
400
401 struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
402
403 chunk->ctx = ctx;
404 si_saved_cs_reference(&chunk->cs, scs);
405 chunk->dump_bo_list = dump_bo_list;
406
407 chunk->gfx_begin = scs->gfx_last_dw;
408 chunk->gfx_end = gfx_cur;
409 scs->gfx_last_dw = gfx_cur;
410
411 u_log_chunk(log, &si_log_chunk_type_cs, chunk);
412 }
413
si_auto_log_cs(void * data,struct u_log_context * log)414 void si_auto_log_cs(void *data, struct u_log_context *log)
415 {
416 struct si_context *ctx = (struct si_context *)data;
417 si_log_cs(ctx, log, false);
418 }
419
si_log_hw_flush(struct si_context * sctx)420 void si_log_hw_flush(struct si_context *sctx)
421 {
422 if (!sctx->b.log)
423 return;
424
425 si_log_cs(sctx, sctx->b.log, true);
426 }
427
priority_to_string(enum radeon_bo_priority priority)428 static const char *priority_to_string(enum radeon_bo_priority priority)
429 {
430 #define ITEM(x) [RADEON_PRIO_##x] = #x
431 static const char *table[64] = {
432 ITEM(FENCE),
433 ITEM(TRACE),
434 ITEM(SO_FILLED_SIZE),
435 ITEM(QUERY),
436 ITEM(IB1),
437 ITEM(IB2),
438 ITEM(DRAW_INDIRECT),
439 ITEM(INDEX_BUFFER),
440 ITEM(VCE),
441 ITEM(UVD),
442 ITEM(SDMA_BUFFER),
443 ITEM(SDMA_TEXTURE),
444 ITEM(CP_DMA),
445 ITEM(CONST_BUFFER),
446 ITEM(DESCRIPTORS),
447 ITEM(BORDER_COLORS),
448 ITEM(SAMPLER_BUFFER),
449 ITEM(VERTEX_BUFFER),
450 ITEM(SHADER_RW_BUFFER),
451 ITEM(COMPUTE_GLOBAL),
452 ITEM(SAMPLER_TEXTURE),
453 ITEM(SHADER_RW_IMAGE),
454 ITEM(SAMPLER_TEXTURE_MSAA),
455 ITEM(COLOR_BUFFER),
456 ITEM(DEPTH_BUFFER),
457 ITEM(COLOR_BUFFER_MSAA),
458 ITEM(DEPTH_BUFFER_MSAA),
459 ITEM(CMASK),
460 ITEM(DCC),
461 ITEM(HTILE),
462 ITEM(SHADER_BINARY),
463 ITEM(SHADER_RINGS),
464 ITEM(SCRATCH_BUFFER),
465 };
466 #undef ITEM
467
468 assert(priority < ARRAY_SIZE(table));
469 return table[priority];
470 }
471
bo_list_compare_va(const struct radeon_bo_list_item * a,const struct radeon_bo_list_item * b)472 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
473 const struct radeon_bo_list_item *b)
474 {
475 return a->vm_address < b->vm_address ? -1 :
476 a->vm_address > b->vm_address ? 1 : 0;
477 }
478
si_dump_bo_list(struct si_context * sctx,const struct radeon_saved_cs * saved,FILE * f)479 static void si_dump_bo_list(struct si_context *sctx,
480 const struct radeon_saved_cs *saved, FILE *f)
481 {
482 unsigned i,j;
483
484 if (!saved->bo_list)
485 return;
486
487 /* Sort the list according to VM adddresses first. */
488 qsort(saved->bo_list, saved->bo_count,
489 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
490
491 fprintf(f, "Buffer list (in units of pages = 4kB):\n"
492 COLOR_YELLOW " Size VM start page "
493 "VM end page Usage" COLOR_RESET "\n");
494
495 for (i = 0; i < saved->bo_count; i++) {
496 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
497 const unsigned page_size = sctx->b.screen->info.gart_page_size;
498 uint64_t va = saved->bo_list[i].vm_address;
499 uint64_t size = saved->bo_list[i].bo_size;
500 bool hit = false;
501
502 /* If there's unused virtual memory between 2 buffers, print it. */
503 if (i) {
504 uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
505 saved->bo_list[i-1].bo_size;
506
507 if (va > previous_va_end) {
508 fprintf(f, " %10"PRIu64" -- hole --\n",
509 (va - previous_va_end) / page_size);
510 }
511 }
512
513 /* Print the buffer. */
514 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
515 size / page_size, va / page_size, (va + size) / page_size);
516
517 /* Print the usage. */
518 for (j = 0; j < 64; j++) {
519 if (!(saved->bo_list[i].priority_usage & (1ull << j)))
520 continue;
521
522 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
523 hit = true;
524 }
525 fprintf(f, "\n");
526 }
527 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
528 " Other buffers can still be allocated there.\n\n");
529 }
530
si_dump_framebuffer(struct si_context * sctx,struct u_log_context * log)531 static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log)
532 {
533 struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
534 struct r600_texture *rtex;
535 int i;
536
537 for (i = 0; i < state->nr_cbufs; i++) {
538 if (!state->cbufs[i])
539 continue;
540
541 rtex = (struct r600_texture*)state->cbufs[i]->texture;
542 u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
543 si_print_texture_info(sctx->b.screen, rtex, log);
544 u_log_printf(log, "\n");
545 }
546
547 if (state->zsbuf) {
548 rtex = (struct r600_texture*)state->zsbuf->texture;
549 u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
550 si_print_texture_info(sctx->b.screen, rtex, log);
551 u_log_printf(log, "\n");
552 }
553 }
554
555 typedef unsigned (*slot_remap_func)(unsigned);
556
557 struct si_log_chunk_desc_list {
558 /** Pointer to memory map of buffer where the list is uploader */
559 uint32_t *gpu_list;
560 /** Reference of buffer where the list is uploaded, so that gpu_list
561 * is kept live. */
562 struct r600_resource *buf;
563
564 const char *shader_name;
565 const char *elem_name;
566 slot_remap_func slot_remap;
567 enum chip_class chip_class;
568 unsigned element_dw_size;
569 unsigned num_elements;
570
571 uint32_t list[0];
572 };
573
574 static void
si_log_chunk_desc_list_destroy(void * data)575 si_log_chunk_desc_list_destroy(void *data)
576 {
577 struct si_log_chunk_desc_list *chunk = data;
578 r600_resource_reference(&chunk->buf, NULL);
579 FREE(chunk);
580 }
581
582 static void
si_log_chunk_desc_list_print(void * data,FILE * f)583 si_log_chunk_desc_list_print(void *data, FILE *f)
584 {
585 struct si_log_chunk_desc_list *chunk = data;
586
587 for (unsigned i = 0; i < chunk->num_elements; i++) {
588 unsigned cpu_dw_offset = i * chunk->element_dw_size;
589 unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size;
590 const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list";
591 uint32_t *cpu_list = chunk->list + cpu_dw_offset;
592 uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list;
593
594 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
595 chunk->shader_name, chunk->elem_name, i, list_note);
596
597 switch (chunk->element_dw_size) {
598 case 4:
599 for (unsigned j = 0; j < 4; j++)
600 ac_dump_reg(f, chunk->chip_class,
601 R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
602 gpu_list[j], 0xffffffff);
603 break;
604 case 8:
605 for (unsigned j = 0; j < 8; j++)
606 ac_dump_reg(f, chunk->chip_class,
607 R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
608 gpu_list[j], 0xffffffff);
609
610 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
611 for (unsigned j = 0; j < 4; j++)
612 ac_dump_reg(f, chunk->chip_class,
613 R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
614 gpu_list[4+j], 0xffffffff);
615 break;
616 case 16:
617 for (unsigned j = 0; j < 8; j++)
618 ac_dump_reg(f, chunk->chip_class,
619 R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
620 gpu_list[j], 0xffffffff);
621
622 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
623 for (unsigned j = 0; j < 4; j++)
624 ac_dump_reg(f, chunk->chip_class,
625 R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
626 gpu_list[4+j], 0xffffffff);
627
628 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
629 for (unsigned j = 0; j < 8; j++)
630 ac_dump_reg(f, chunk->chip_class,
631 R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
632 gpu_list[8+j], 0xffffffff);
633
634 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
635 for (unsigned j = 0; j < 4; j++)
636 ac_dump_reg(f, chunk->chip_class,
637 R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
638 gpu_list[12+j], 0xffffffff);
639 break;
640 }
641
642 if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) {
643 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
644 COLOR_RESET "\n");
645 }
646
647 fprintf(f, "\n");
648 }
649
650 }
651
652 static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = {
653 .destroy = si_log_chunk_desc_list_destroy,
654 .print = si_log_chunk_desc_list_print,
655 };
656
si_dump_descriptor_list(struct si_screen * screen,struct si_descriptors * desc,const char * shader_name,const char * elem_name,unsigned element_dw_size,unsigned num_elements,slot_remap_func slot_remap,struct u_log_context * log)657 static void si_dump_descriptor_list(struct si_screen *screen,
658 struct si_descriptors *desc,
659 const char *shader_name,
660 const char *elem_name,
661 unsigned element_dw_size,
662 unsigned num_elements,
663 slot_remap_func slot_remap,
664 struct u_log_context *log)
665 {
666 if (!desc->list)
667 return;
668
669 /* In some cases, the caller doesn't know how many elements are really
670 * uploaded. Reduce num_elements to fit in the range of active slots. */
671 unsigned active_range_dw_begin =
672 desc->first_active_slot * desc->element_dw_size;
673 unsigned active_range_dw_end =
674 active_range_dw_begin + desc->num_active_slots * desc->element_dw_size;
675
676 while (num_elements > 0) {
677 int i = slot_remap(num_elements - 1);
678 unsigned dw_begin = i * element_dw_size;
679 unsigned dw_end = dw_begin + element_dw_size;
680
681 if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end)
682 break;
683
684 num_elements--;
685 }
686
687 struct si_log_chunk_desc_list *chunk =
688 CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list,
689 4 * element_dw_size * num_elements);
690 chunk->shader_name = shader_name;
691 chunk->elem_name = elem_name;
692 chunk->element_dw_size = element_dw_size;
693 chunk->num_elements = num_elements;
694 chunk->slot_remap = slot_remap;
695 chunk->chip_class = screen->info.chip_class;
696
697 r600_resource_reference(&chunk->buf, desc->buffer);
698 chunk->gpu_list = desc->gpu_list;
699
700 for (unsigned i = 0; i < num_elements; ++i) {
701 memcpy(&chunk->list[i * element_dw_size],
702 &desc->list[slot_remap(i) * element_dw_size],
703 4 * element_dw_size);
704 }
705
706 u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk);
707 }
708
si_identity(unsigned slot)709 static unsigned si_identity(unsigned slot)
710 {
711 return slot;
712 }
713
si_dump_descriptors(struct si_context * sctx,enum pipe_shader_type processor,const struct tgsi_shader_info * info,struct u_log_context * log)714 static void si_dump_descriptors(struct si_context *sctx,
715 enum pipe_shader_type processor,
716 const struct tgsi_shader_info *info,
717 struct u_log_context *log)
718 {
719 struct si_descriptors *descs =
720 &sctx->descriptors[SI_DESCS_FIRST_SHADER +
721 processor * SI_NUM_SHADER_DESCS];
722 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
723 const char *name = shader_name[processor];
724 unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers;
725 unsigned enabled_images;
726
727 if (info) {
728 enabled_constbuf = info->const_buffers_declared;
729 enabled_shaderbuf = info->shader_buffers_declared;
730 enabled_samplers = info->samplers_declared;
731 enabled_images = info->images_declared;
732 } else {
733 enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >>
734 SI_NUM_SHADER_BUFFERS;
735 enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
736 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
737 enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
738 (32 - SI_NUM_SHADER_BUFFERS);
739 enabled_samplers = sctx->samplers[processor].enabled_mask;
740 enabled_images = sctx->images[processor].enabled_mask;
741 }
742
743 if (processor == PIPE_SHADER_VERTEX) {
744 assert(info); /* only CS may not have an info struct */
745
746 si_dump_descriptor_list(sctx->screen, &sctx->vertex_buffers, name,
747 " - Vertex buffer", 4, info->num_inputs,
748 si_identity, log);
749 }
750
751 si_dump_descriptor_list(sctx->screen,
752 &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
753 name, " - Constant buffer", 4,
754 util_last_bit(enabled_constbuf),
755 si_get_constbuf_slot, log);
756 si_dump_descriptor_list(sctx->screen,
757 &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
758 name, " - Shader buffer", 4,
759 util_last_bit(enabled_shaderbuf),
760 si_get_shaderbuf_slot, log);
761 si_dump_descriptor_list(sctx->screen,
762 &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
763 name, " - Sampler", 16,
764 util_last_bit(enabled_samplers),
765 si_get_sampler_slot, log);
766 si_dump_descriptor_list(sctx->screen,
767 &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
768 name, " - Image", 8,
769 util_last_bit(enabled_images),
770 si_get_image_slot, log);
771 }
772
si_dump_gfx_descriptors(struct si_context * sctx,const struct si_shader_ctx_state * state,struct u_log_context * log)773 static void si_dump_gfx_descriptors(struct si_context *sctx,
774 const struct si_shader_ctx_state *state,
775 struct u_log_context *log)
776 {
777 if (!state->cso || !state->current)
778 return;
779
780 si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log);
781 }
782
si_dump_compute_descriptors(struct si_context * sctx,struct u_log_context * log)783 static void si_dump_compute_descriptors(struct si_context *sctx,
784 struct u_log_context *log)
785 {
786 if (!sctx->cs_shader_state.program)
787 return;
788
789 si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log);
790 }
791
792 struct si_shader_inst {
793 char text[160]; /* one disasm line */
794 unsigned offset; /* instruction offset */
795 unsigned size; /* instruction size = 4 or 8 */
796 };
797
798 /* Split a disassembly string into lines and add them to the array pointed
799 * to by "instructions". */
si_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct si_shader_inst * instructions)800 static void si_add_split_disasm(const char *disasm,
801 uint64_t start_addr,
802 unsigned *num,
803 struct si_shader_inst *instructions)
804 {
805 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
806 char *next;
807
808 while ((next = strchr(disasm, '\n'))) {
809 struct si_shader_inst *inst = &instructions[*num];
810 unsigned len = next - disasm;
811
812 assert(len < ARRAY_SIZE(inst->text));
813 memcpy(inst->text, disasm, len);
814 inst->text[len] = 0;
815 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
816
817 const char *semicolon = strchr(disasm, ';');
818 assert(semicolon);
819 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
820 inst->size = next - semicolon > 16 ? 8 : 4;
821
822 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
823 " [PC=0x%"PRIx64", off=%u, size=%u]",
824 start_addr + inst->offset, inst->offset, inst->size);
825
826 last_inst = inst;
827 (*num)++;
828 disasm = next + 1;
829 }
830 }
831
832 /* If the shader is being executed, print its asm instructions, and annotate
833 * those that are being executed right now with information about waves that
834 * execute them. This is most useful during a GPU hang.
835 */
si_print_annotated_shader(struct si_shader * shader,struct ac_wave_info * waves,unsigned num_waves,FILE * f)836 static void si_print_annotated_shader(struct si_shader *shader,
837 struct ac_wave_info *waves,
838 unsigned num_waves,
839 FILE *f)
840 {
841 if (!shader || !shader->binary.disasm_string)
842 return;
843
844 uint64_t start_addr = shader->bo->gpu_address;
845 uint64_t end_addr = start_addr + shader->bo->b.b.width0;
846 unsigned i;
847
848 /* See if any wave executes the shader. */
849 for (i = 0; i < num_waves; i++) {
850 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
851 break;
852 }
853 if (i == num_waves)
854 return; /* the shader is not being executed */
855
856 /* Remember the first found wave. The waves are sorted according to PC. */
857 waves = &waves[i];
858 num_waves -= i;
859
860 /* Get the list of instructions.
861 * Buffer size / 4 is the upper bound of the instruction count.
862 */
863 unsigned num_inst = 0;
864 struct si_shader_inst *instructions =
865 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
866
867 if (shader->prolog) {
868 si_add_split_disasm(shader->prolog->binary.disasm_string,
869 start_addr, &num_inst, instructions);
870 }
871 if (shader->previous_stage) {
872 si_add_split_disasm(shader->previous_stage->binary.disasm_string,
873 start_addr, &num_inst, instructions);
874 }
875 if (shader->prolog2) {
876 si_add_split_disasm(shader->prolog2->binary.disasm_string,
877 start_addr, &num_inst, instructions);
878 }
879 si_add_split_disasm(shader->binary.disasm_string,
880 start_addr, &num_inst, instructions);
881 if (shader->epilog) {
882 si_add_split_disasm(shader->epilog->binary.disasm_string,
883 start_addr, &num_inst, instructions);
884 }
885
886 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
887 si_get_shader_name(shader, shader->selector->type));
888
889 /* Print instructions with annotations. */
890 for (i = 0; i < num_inst; i++) {
891 struct si_shader_inst *inst = &instructions[i];
892
893 fprintf(f, "%s\n", inst->text);
894
895 /* Print which waves execute the instruction right now. */
896 while (num_waves && start_addr + inst->offset == waves->pc) {
897 fprintf(f,
898 " " COLOR_GREEN "^ SE%u SH%u CU%u "
899 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
900 waves->se, waves->sh, waves->cu, waves->simd,
901 waves->wave, waves->exec);
902
903 if (inst->size == 4) {
904 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
905 waves->inst_dw0);
906 } else {
907 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
908 waves->inst_dw0, waves->inst_dw1);
909 }
910
911 waves->matched = true;
912 waves = &waves[1];
913 num_waves--;
914 }
915 }
916
917 fprintf(f, "\n\n");
918 free(instructions);
919 }
920
si_dump_annotated_shaders(struct si_context * sctx,FILE * f)921 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
922 {
923 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
924 unsigned num_waves = ac_get_wave_info(waves);
925
926 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
927 "\n\n", num_waves);
928
929 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
930 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
931 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
932 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
933 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
934
935 /* Print waves executing shaders that are not currently bound. */
936 unsigned i;
937 bool found = false;
938 for (i = 0; i < num_waves; i++) {
939 if (waves[i].matched)
940 continue;
941
942 if (!found) {
943 fprintf(f, COLOR_CYAN
944 "Waves not executing currently-bound shaders:"
945 COLOR_RESET "\n");
946 found = true;
947 }
948 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
949 " INST=%08X %08X PC=%"PRIx64"\n",
950 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
951 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
952 waves[i].inst_dw1, waves[i].pc);
953 }
954 if (found)
955 fprintf(f, "\n\n");
956 }
957
si_dump_command(const char * title,const char * command,FILE * f)958 static void si_dump_command(const char *title, const char *command, FILE *f)
959 {
960 char line[2000];
961
962 FILE *p = popen(command, "r");
963 if (!p)
964 return;
965
966 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
967 while (fgets(line, sizeof(line), p))
968 fputs(line, f);
969 fprintf(f, "\n\n");
970 pclose(p);
971 }
972
si_dump_debug_state(struct pipe_context * ctx,FILE * f,unsigned flags)973 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
974 unsigned flags)
975 {
976 struct si_context *sctx = (struct si_context*)ctx;
977
978 if (sctx->b.log)
979 u_log_flush(sctx->b.log);
980
981 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
982 si_dump_debug_registers(sctx, f);
983
984 si_dump_annotated_shaders(sctx, f);
985 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
986 si_dump_command("Wave information", "umr -O bits -wa", f);
987 }
988 }
989
si_log_draw_state(struct si_context * sctx,struct u_log_context * log)990 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
991 {
992 if (!log)
993 return;
994
995 si_dump_framebuffer(sctx, log);
996
997 si_dump_gfx_shader(sctx, &sctx->vs_shader, log);
998 si_dump_gfx_shader(sctx, &sctx->tcs_shader, log);
999 si_dump_gfx_shader(sctx, &sctx->tes_shader, log);
1000 si_dump_gfx_shader(sctx, &sctx->gs_shader, log);
1001 si_dump_gfx_shader(sctx, &sctx->ps_shader, log);
1002
1003 si_dump_descriptor_list(sctx->screen,
1004 &sctx->descriptors[SI_DESCS_RW_BUFFERS],
1005 "", "RW buffers", 4, SI_NUM_RW_BUFFERS,
1006 si_identity, log);
1007 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log);
1008 si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log);
1009 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log);
1010 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log);
1011 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log);
1012 }
1013
si_log_compute_state(struct si_context * sctx,struct u_log_context * log)1014 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log)
1015 {
1016 if (!log)
1017 return;
1018
1019 si_dump_compute_shader(sctx, log);
1020 si_dump_compute_descriptors(sctx, log);
1021 }
1022
si_dump_dma(struct si_context * sctx,struct radeon_saved_cs * saved,FILE * f)1023 static void si_dump_dma(struct si_context *sctx,
1024 struct radeon_saved_cs *saved, FILE *f)
1025 {
1026 static const char ib_name[] = "sDMA IB";
1027 unsigned i;
1028
1029 si_dump_bo_list(sctx, saved, f);
1030
1031 fprintf(f, "------------------ %s begin ------------------\n", ib_name);
1032
1033 for (i = 0; i < saved->num_dw; ++i) {
1034 fprintf(f, " %08x\n", saved->ib[i]);
1035 }
1036
1037 fprintf(f, "------------------- %s end -------------------\n", ib_name);
1038 fprintf(f, "\n");
1039
1040 fprintf(f, "SDMA Dump Done.\n");
1041 }
1042
si_check_vm_faults(struct r600_common_context * ctx,struct radeon_saved_cs * saved,enum ring_type ring)1043 void si_check_vm_faults(struct r600_common_context *ctx,
1044 struct radeon_saved_cs *saved, enum ring_type ring)
1045 {
1046 struct si_context *sctx = (struct si_context *)ctx;
1047 struct pipe_screen *screen = sctx->b.b.screen;
1048 FILE *f;
1049 uint64_t addr;
1050 char cmd_line[4096];
1051
1052 if (!ac_vm_fault_occured(sctx->b.chip_class,
1053 &sctx->dmesg_timestamp, &addr))
1054 return;
1055
1056 f = dd_get_debug_file(false);
1057 if (!f)
1058 return;
1059
1060 fprintf(f, "VM fault report.\n\n");
1061 if (os_get_command_line(cmd_line, sizeof(cmd_line)))
1062 fprintf(f, "Command: %s\n", cmd_line);
1063 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
1064 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
1065 fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
1066 fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
1067
1068 if (sctx->apitrace_call_number)
1069 fprintf(f, "Last apitrace call: %u\n\n",
1070 sctx->apitrace_call_number);
1071
1072 switch (ring) {
1073 case RING_GFX: {
1074 struct u_log_context log;
1075 u_log_context_init(&log);
1076
1077 si_log_draw_state(sctx, &log);
1078 si_log_compute_state(sctx, &log);
1079 si_log_cs(sctx, &log, true);
1080
1081 u_log_new_page_print(&log, f);
1082 u_log_context_destroy(&log);
1083 break;
1084 }
1085 case RING_DMA:
1086 si_dump_dma(sctx, saved, f);
1087 break;
1088
1089 default:
1090 break;
1091 }
1092
1093 fclose(f);
1094
1095 fprintf(stderr, "Detected a VM fault, exiting...\n");
1096 exit(0);
1097 }
1098
si_init_debug_functions(struct si_context * sctx)1099 void si_init_debug_functions(struct si_context *sctx)
1100 {
1101 sctx->b.b.dump_debug_state = si_dump_debug_state;
1102 sctx->b.check_vm_faults = si_check_vm_faults;
1103
1104 /* Set the initial dmesg timestamp for this context, so that
1105 * only new messages will be checked for VM faults.
1106 */
1107 if (sctx->screen->debug_flags & DBG(CHECK_VM))
1108 ac_vm_fault_occured(sctx->b.chip_class,
1109 &sctx->dmesg_timestamp, NULL);
1110 }
1111