1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <sys/utsname.h>
31 #include <sys/stat.h>
32
33 #include "util/mesa-sha1.h"
34 #include "sid.h"
35 #include "ac_debug.h"
36 #include "radv_debug.h"
37 #include "radv_shader.h"
38
39 #define TRACE_BO_SIZE 4096
40 #define TMA_BO_SIZE 4096
41
42 #define COLOR_RESET "\033[0m"
43 #define COLOR_RED "\033[31m"
44 #define COLOR_GREEN "\033[1;32m"
45 #define COLOR_YELLOW "\033[1;33m"
46 #define COLOR_CYAN "\033[1;36m"
47
48 #define RADV_DUMP_DIR "radv_dumps"
49
50 /* Trace BO layout (offsets are 4 bytes):
51 *
52 * [0]: primary trace ID
53 * [1]: secondary trace ID
54 * [2-3]: 64-bit GFX ring pipeline pointer
55 * [4-5]: 64-bit COMPUTE ring pipeline pointer
56 * [6-7]: 64-bit descriptor set #0 pointer
57 * ...
58 * [68-69]: 64-bit descriptor set #31 pointer
59 */
60
61 bool
radv_init_trace(struct radv_device * device)62 radv_init_trace(struct radv_device *device)
63 {
64 struct radeon_winsys *ws = device->ws;
65
66 device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
67 RADEON_DOMAIN_VRAM,
68 RADEON_FLAG_CPU_ACCESS|
69 RADEON_FLAG_NO_INTERPROCESS_SHARING |
70 RADEON_FLAG_ZERO_VRAM,
71 RADV_BO_PRIORITY_UPLOAD_BUFFER);
72 if (!device->trace_bo)
73 return false;
74
75 device->trace_id_ptr = ws->buffer_map(device->trace_bo);
76 if (!device->trace_id_ptr)
77 return false;
78
79 ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
80 &device->dmesg_timestamp, NULL);
81
82 return true;
83 }
84
85 static void
radv_dump_trace(struct radv_device * device,struct radeon_cmdbuf * cs,FILE * f)86 radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
87 {
88 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
89 device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
90 }
91
92 static void
radv_dump_mmapped_reg(struct radv_device * device,FILE * f,unsigned offset)93 radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
94 {
95 struct radeon_winsys *ws = device->ws;
96 uint32_t value;
97
98 if (ws->read_registers(ws, offset, 1, &value))
99 ac_dump_reg(f, device->physical_device->rad_info.chip_class,
100 offset, value, ~0);
101 }
102
103 static void
radv_dump_debug_registers(struct radv_device * device,FILE * f)104 radv_dump_debug_registers(struct radv_device *device, FILE *f)
105 {
106 struct radeon_info *info = &device->physical_device->rad_info;
107
108 fprintf(f, "Memory-mapped registers:\n");
109 radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
110
111 /* No other registers can be read on DRM < 3.1.0. */
112 if (info->drm_minor < 1) {
113 fprintf(f, "\n");
114 return;
115 }
116
117 radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
118 radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
119 radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
120 radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
121 radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
122 radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
123 radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
124 if (info->chip_class <= GFX8) {
125 radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
126 radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
127 radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
128 }
129 radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
130 radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
131 radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
132 radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
133 radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
134 radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
135 radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
136 radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
137 radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
138 radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
139 fprintf(f, "\n");
140 }
141
142 static void
radv_dump_buffer_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)143 radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc,
144 FILE *f)
145 {
146 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
147 for (unsigned j = 0; j < 4; j++)
148 ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4,
149 desc[j], 0xffffffff);
150 }
151
152 static void
radv_dump_image_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)153 radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc,
154 FILE *f)
155 {
156 unsigned sq_img_rsrc_word0 = chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0
157 : R_008F10_SQ_IMG_RSRC_WORD0;
158
159 fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
160 for (unsigned j = 0; j < 8; j++)
161 ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4,
162 desc[j], 0xffffffff);
163
164 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
165 for (unsigned j = 0; j < 8; j++)
166 ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4,
167 desc[8 + j], 0xffffffff);
168 }
169
170 static void
radv_dump_sampler_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)171 radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
172 FILE *f)
173 {
174 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
175 for (unsigned j = 0; j < 4; j++) {
176 ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4,
177 desc[j], 0xffffffff);
178 }
179 }
180
181 static void
radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,const uint32_t * desc,FILE * f)182 radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,
183 const uint32_t *desc, FILE *f)
184 {
185 radv_dump_image_descriptor(chip_class, desc, f);
186 radv_dump_sampler_descriptor(chip_class, desc + 16, f);
187 }
188
189 static void
radv_dump_descriptor_set(struct radv_device * device,struct radv_descriptor_set * set,unsigned id,FILE * f)190 radv_dump_descriptor_set(struct radv_device *device,
191 struct radv_descriptor_set *set, unsigned id, FILE *f)
192 {
193 enum chip_class chip_class = device->physical_device->rad_info.chip_class;
194 const struct radv_descriptor_set_layout *layout;
195 int i;
196
197 if (!set)
198 return;
199 layout = set->layout;
200
201 for (i = 0; i < set->layout->binding_count; i++) {
202 uint32_t *desc =
203 set->mapped_ptr + layout->binding[i].offset / 4;
204
205 switch (layout->binding[i].type) {
206 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
207 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
208 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
209 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
210 radv_dump_buffer_descriptor(chip_class, desc, f);
211 break;
212 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
213 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
214 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
215 radv_dump_image_descriptor(chip_class, desc, f);
216 break;
217 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
218 radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
219 break;
220 case VK_DESCRIPTOR_TYPE_SAMPLER:
221 radv_dump_sampler_descriptor(chip_class, desc, f);
222 break;
223 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
224 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
225 /* todo */
226 break;
227 default:
228 assert(!"unknown descriptor type");
229 break;
230 }
231 fprintf(f, "\n");
232 }
233 fprintf(f, "\n\n");
234 }
235
236 static void
radv_dump_descriptors(struct radv_device * device,FILE * f)237 radv_dump_descriptors(struct radv_device *device, FILE *f)
238 {
239 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
240 int i;
241
242 fprintf(f, "Descriptors:\n");
243 for (i = 0; i < MAX_SETS; i++) {
244 struct radv_descriptor_set *set =
245 *(struct radv_descriptor_set **)(ptr + i + 3);
246
247 radv_dump_descriptor_set(device, set, i, f);
248 }
249 }
250
251 struct radv_shader_inst {
252 char text[160]; /* one disasm line */
253 unsigned offset; /* instruction offset */
254 unsigned size; /* instruction size = 4 or 8 */
255 };
256
257 /* Split a disassembly string into lines and add them to the array pointed
258 * to by "instructions". */
si_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct radv_shader_inst * instructions)259 static void si_add_split_disasm(const char *disasm,
260 uint64_t start_addr,
261 unsigned *num,
262 struct radv_shader_inst *instructions)
263 {
264 struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
265 char *next;
266
267 while ((next = strchr(disasm, '\n'))) {
268 struct radv_shader_inst *inst = &instructions[*num];
269 unsigned len = next - disasm;
270
271 if (!memchr(disasm, ';', len)) {
272 /* Ignore everything that is not an instruction. */
273 disasm = next + 1;
274 continue;
275 }
276
277 assert(len < ARRAY_SIZE(inst->text));
278 memcpy(inst->text, disasm, len);
279 inst->text[len] = 0;
280 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
281
282 const char *semicolon = strchr(disasm, ';');
283 assert(semicolon);
284 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
285 inst->size = next - semicolon > 16 ? 8 : 4;
286
287 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
288 " [PC=0x%"PRIx64", off=%u, size=%u]",
289 start_addr + inst->offset, inst->offset, inst->size);
290
291 last_inst = inst;
292 (*num)++;
293 disasm = next + 1;
294 }
295 }
296
297 static void
radv_dump_annotated_shader(struct radv_shader_variant * shader,gl_shader_stage stage,struct ac_wave_info * waves,unsigned num_waves,FILE * f)298 radv_dump_annotated_shader(struct radv_shader_variant *shader,
299 gl_shader_stage stage, struct ac_wave_info *waves,
300 unsigned num_waves, FILE *f)
301 {
302 uint64_t start_addr, end_addr;
303 unsigned i;
304
305 if (!shader)
306 return;
307
308 start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
309 end_addr = start_addr + shader->code_size;
310
311 /* See if any wave executes the shader. */
312 for (i = 0; i < num_waves; i++) {
313 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
314 break;
315 }
316
317 if (i == num_waves)
318 return; /* the shader is not being executed */
319
320 /* Remember the first found wave. The waves are sorted according to PC. */
321 waves = &waves[i];
322 num_waves -= i;
323
324 /* Get the list of instructions.
325 * Buffer size / 4 is the upper bound of the instruction count.
326 */
327 unsigned num_inst = 0;
328 struct radv_shader_inst *instructions =
329 calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
330
331 si_add_split_disasm(shader->disasm_string,
332 start_addr, &num_inst, instructions);
333
334 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
335 radv_get_shader_name(&shader->info, stage));
336
337 /* Print instructions with annotations. */
338 for (i = 0; i < num_inst; i++) {
339 struct radv_shader_inst *inst = &instructions[i];
340
341 fprintf(f, "%s\n", inst->text);
342
343 /* Print which waves execute the instruction right now. */
344 while (num_waves && start_addr + inst->offset == waves->pc) {
345 fprintf(f,
346 " " COLOR_GREEN "^ SE%u SH%u CU%u "
347 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
348 waves->se, waves->sh, waves->cu, waves->simd,
349 waves->wave, waves->exec);
350
351 if (inst->size == 4) {
352 fprintf(f, "INST32=%08X" COLOR_RESET "\n",
353 waves->inst_dw0);
354 } else {
355 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
356 waves->inst_dw0, waves->inst_dw1);
357 }
358
359 waves->matched = true;
360 waves = &waves[1];
361 num_waves--;
362 }
363 }
364
365 fprintf(f, "\n\n");
366 free(instructions);
367 }
368
369 static void
radv_dump_annotated_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,FILE * f)370 radv_dump_annotated_shaders(struct radv_pipeline *pipeline,
371 VkShaderStageFlagBits active_stages, FILE *f)
372 {
373 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
374 enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
375 unsigned num_waves = ac_get_wave_info(chip_class, waves);
376
377 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
378 "\n\n", num_waves);
379
380 /* Dump annotated active graphics shaders. */
381 while (active_stages) {
382 int stage = u_bit_scan(&active_stages);
383
384 radv_dump_annotated_shader(pipeline->shaders[stage],
385 stage, waves, num_waves, f);
386 }
387
388 /* Print waves executing shaders that are not currently bound. */
389 unsigned i;
390 bool found = false;
391 for (i = 0; i < num_waves; i++) {
392 if (waves[i].matched)
393 continue;
394
395 if (!found) {
396 fprintf(f, COLOR_CYAN
397 "Waves not executing currently-bound shaders:"
398 COLOR_RESET "\n");
399 found = true;
400 }
401 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
402 " INST=%08X %08X PC=%"PRIx64"\n",
403 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
404 waves[i].wave, waves[i].exec, waves[i].inst_dw0,
405 waves[i].inst_dw1, waves[i].pc);
406 }
407 if (found)
408 fprintf(f, "\n\n");
409 }
410
411 static void
radv_dump_shader(struct radv_pipeline * pipeline,struct radv_shader_variant * shader,gl_shader_stage stage,FILE * f)412 radv_dump_shader(struct radv_pipeline *pipeline,
413 struct radv_shader_variant *shader, gl_shader_stage stage,
414 FILE *f)
415 {
416 if (!shader)
417 return;
418
419 fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
420
421 if (shader->spirv) {
422 unsigned char sha1[21];
423 char sha1buf[41];
424
425 _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
426 _mesa_sha1_format(sha1buf, sha1);
427
428 fprintf(f, "SPIRV (sha1: %s):\n", sha1buf);
429 radv_print_spirv(shader->spirv, shader->spirv_size, f);
430 }
431
432 if (shader->nir_string) {
433 fprintf(f, "NIR:\n%s\n", shader->nir_string);
434 }
435
436 fprintf(f, "%s IR:\n%s\n",
437 pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
438 shader->ir_string);
439 fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
440
441 radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
442 }
443
444 static void
radv_dump_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,FILE * f)445 radv_dump_shaders(struct radv_pipeline *pipeline,
446 VkShaderStageFlagBits active_stages, FILE *f)
447 {
448 /* Dump active graphics shaders. */
449 while (active_stages) {
450 int stage = u_bit_scan(&active_stages);
451
452 radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
453 }
454 }
455
456 static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device * device,enum ring_type ring)457 radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
458 {
459 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
460 int offset = ring == RING_GFX ? 1 : 2;
461
462 return *(struct radv_pipeline **)(ptr + offset);
463 }
464
465 static void
radv_dump_queue_state(struct radv_queue * queue,FILE * f)466 radv_dump_queue_state(struct radv_queue *queue, FILE *f)
467 {
468 enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
469 struct radv_pipeline *pipeline;
470
471 fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
472
473 pipeline = radv_get_saved_pipeline(queue->device, ring);
474 if (pipeline) {
475 radv_dump_shaders(pipeline, pipeline->active_stages, f);
476 radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
477 radv_dump_descriptors(queue->device, f);
478 }
479 }
480
481 static void
radv_dump_cmd(const char * cmd,FILE * f)482 radv_dump_cmd(const char *cmd, FILE *f)
483 {
484 char line[2048];
485 FILE *p;
486
487 p = popen(cmd, "r");
488 if (p) {
489 while (fgets(line, sizeof(line), p))
490 fputs(line, f);
491 fprintf(f, "\n");
492 pclose(p);
493 }
494 }
495
496 static void
radv_dump_dmesg(FILE * f)497 radv_dump_dmesg(FILE *f)
498 {
499 fprintf(f, "\nLast 60 lines of dmesg:\n\n");
500 radv_dump_cmd("dmesg | tail -n60", f);
501 }
502
503 void
radv_dump_enabled_options(struct radv_device * device,FILE * f)504 radv_dump_enabled_options(struct radv_device *device, FILE *f)
505 {
506 uint64_t mask;
507
508 if (device->instance->debug_flags) {
509 fprintf(f, "Enabled debug options: ");
510
511 mask = device->instance->debug_flags;
512 while (mask) {
513 int i = u_bit_scan64(&mask);
514 fprintf(f, "%s, ", radv_get_debug_option_name(i));
515 }
516 fprintf(f, "\n");
517 }
518
519 if (device->instance->perftest_flags) {
520 fprintf(f, "Enabled perftest options: ");
521
522 mask = device->instance->perftest_flags;
523 while (mask) {
524 int i = u_bit_scan64(&mask);
525 fprintf(f, "%s, ", radv_get_perftest_option_name(i));
526 }
527 fprintf(f, "\n");
528 }
529 }
530
531 static void
radv_dump_device_name(struct radv_device * device,FILE * f)532 radv_dump_device_name(struct radv_device *device, FILE *f)
533 {
534 struct radeon_info *info = &device->physical_device->rad_info;
535 char kernel_version[128] = {0};
536 struct utsname uname_data;
537 const char *chip_name;
538
539 chip_name = device->ws->get_chip_name(device->ws);
540
541 if (uname(&uname_data) == 0)
542 snprintf(kernel_version, sizeof(kernel_version),
543 " / %s", uname_data.release);
544
545 fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n",
546 chip_name, device->physical_device->name,
547 info->drm_major, info->drm_minor, info->drm_patchlevel,
548 kernel_version);
549 }
550
551 static void
radv_dump_umr_ring(struct radv_queue * queue,FILE * f)552 radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
553 {
554 enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
555 struct radv_device *device = queue->device;
556 char cmd[128];
557
558 /* TODO: Dump compute ring. */
559 if (ring != RING_GFX)
560 return;
561
562 sprintf(cmd, "umr -R %s 2>&1",
563 device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
564
565 fprintf(f, "\nUMR GFX ring:\n\n");
566 radv_dump_cmd(cmd, f);
567 }
568
569 static void
radv_dump_umr_waves(struct radv_queue * queue,FILE * f)570 radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
571 {
572 enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
573 struct radv_device *device = queue->device;
574 char cmd[128];
575
576 /* TODO: Dump compute ring. */
577 if (ring != RING_GFX)
578 return;
579
580 sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
581 device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
582
583 fprintf(f, "\nUMR GFX waves:\n\n");
584 radv_dump_cmd(cmd, f);
585 }
586
587 static bool
radv_gpu_hang_occured(struct radv_queue * queue,enum ring_type ring)588 radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
589 {
590 struct radeon_winsys *ws = queue->device->ws;
591
592 if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))
593 return true;
594
595 return false;
596 }
597
598 void
radv_check_gpu_hangs(struct radv_queue * queue,struct radeon_cmdbuf * cs)599 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
600 {
601 struct radv_device *device = queue->device;
602 char dump_dir[256], dump_path[512];
603 enum ring_type ring;
604 uint64_t addr;
605 FILE *f;
606
607 ring = radv_queue_family_to_ring(queue->queue_family_index);
608
609 bool hang_occurred = radv_gpu_hang_occured(queue, ring);
610 bool vm_fault_occurred = false;
611 if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
612 vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
613 &device->dmesg_timestamp, &addr);
614 if (!hang_occurred && !vm_fault_occurred)
615 return;
616
617 fprintf(stderr, "radv: GPU hang detected...\n");
618
619 /* Create a directory into $HOME/radv_dumps_<pid> to save various
620 * debugging info about that GPU hang.
621 */
622 snprintf(dump_dir, sizeof(dump_dir), "%s/"RADV_DUMP_DIR"_%d",
623 debug_get_option("HOME", "."), getpid());
624 if (mkdir(dump_dir, 0774) && errno != EEXIST) {
625 fprintf(stderr, "radv: can't create directory '%s' (%i).\n",
626 dump_dir, errno);
627 abort();
628 }
629
630 /* Dump trace file. */
631 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
632 f = fopen(dump_path, "w+");
633 if (f) {
634 radv_dump_trace(queue->device, cs, f);
635 fclose(f);
636 }
637
638 /* Dump pipeline state. */
639 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
640 f = fopen(dump_path, "w+");
641 if (f) {
642 radv_dump_queue_state(queue, f);
643 fclose(f);
644 }
645
646 /* Dump UMR ring. */
647 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
648 f = fopen(dump_path, "w+");
649 if (f) {
650 radv_dump_umr_ring(queue, f);
651 fclose(f);
652 }
653
654 /* Dump UMR waves. */
655 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
656 f = fopen(dump_path, "w+");
657 if (f) {
658 radv_dump_umr_waves(queue, f);
659 fclose(f);
660 }
661
662 /* Dump debug registers. */
663 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
664 f = fopen(dump_path, "w+");
665 if (f) {
666 radv_dump_debug_registers(device, f);
667 fclose(f);
668 }
669
670 /* Dump VM fault info. */
671 if (vm_fault_occurred) {
672 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
673 f = fopen(dump_path, "w+");
674 if (f) {
675 fprintf(f, "VM fault report.\n\n");
676 fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
677 fclose(f);
678 }
679 }
680
681 /* Dump enabled debug/perftest options. */
682 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "options.log");
683 f = fopen(dump_path, "w+");
684 if (f) {
685 radv_dump_enabled_options(device, f);
686 fclose(f);
687 }
688
689 /* Dump GPU info. */
690 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
691 f = fopen(dump_path, "w+");
692 if (f) {
693 radv_dump_device_name(device, f);
694 ac_print_gpu_info(&device->physical_device->rad_info, f);
695 fclose(f);
696 }
697
698 /* Dump dmesg. */
699 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
700 f = fopen(dump_path, "w+");
701 if (f) {
702 radv_dump_dmesg(f);
703 fclose(f);
704 }
705
706 fprintf(stderr, "radv: GPU hang report saved to '%s'!\n", dump_dir);
707 abort();
708 }
709
710 void
radv_print_spirv(const char * data,uint32_t size,FILE * fp)711 radv_print_spirv(const char *data, uint32_t size, FILE *fp)
712 {
713 char path[] = "/tmp/fileXXXXXX";
714 char command[128];
715 int fd;
716
717 /* Dump the binary into a temporary file. */
718 fd = mkstemp(path);
719 if (fd < 0)
720 return;
721
722 if (write(fd, data, size) == -1)
723 goto fail;
724
725 /* Disassemble using spirv-dis if installed. */
726 sprintf(command, "spirv-dis %s", path);
727 radv_dump_cmd(command, fp);
728
729 fail:
730 close(fd);
731 unlink(path);
732 }
733
734 bool
radv_trap_handler_init(struct radv_device * device)735 radv_trap_handler_init(struct radv_device *device)
736 {
737 struct radeon_winsys *ws = device->ws;
738
739 /* Create the trap handler shader and upload it like other shaders. */
740 device->trap_handler_shader = radv_create_trap_handler_shader(device);
741 if (!device->trap_handler_shader) {
742 fprintf(stderr, "radv: failed to create the trap handler shader.\n");
743 return false;
744 }
745
746 device->tma_bo = ws->buffer_create(ws, TMA_BO_SIZE, 256,
747 RADEON_DOMAIN_VRAM,
748 RADEON_FLAG_CPU_ACCESS |
749 RADEON_FLAG_NO_INTERPROCESS_SHARING |
750 RADEON_FLAG_ZERO_VRAM |
751 RADEON_FLAG_32BIT,
752 RADV_BO_PRIORITY_SCRATCH);
753 if (!device->tma_bo)
754 return false;
755
756 device->tma_ptr = ws->buffer_map(device->tma_bo);
757 if (!device->tma_ptr)
758 return false;
759
760 /* Upload a buffer descriptor to store various info from the trap. */
761 uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
762 uint32_t desc[4];
763
764 desc[0] = tma_va;
765 desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
766 desc[2] = TMA_BO_SIZE;
767 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
768 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
769 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
770 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
771 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
772
773 memcpy(device->tma_ptr, desc, sizeof(desc));
774
775 return true;
776 }
777
778 void
radv_trap_handler_finish(struct radv_device * device)779 radv_trap_handler_finish(struct radv_device *device)
780 {
781 struct radeon_winsys *ws = device->ws;
782
783 if (unlikely(device->trap_handler_shader))
784 radv_shader_variant_destroy(device, device->trap_handler_shader);
785
786 if (unlikely(device->tma_bo))
787 ws->buffer_destroy(device->tma_bo);
788 }
789
790 static struct radv_shader_variant *
radv_get_faulty_shader(struct radv_device * device,uint64_t faulty_pc)791 radv_get_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
792 {
793 struct radv_shader_variant *shader = NULL;
794
795 mtx_lock(&device->shader_slab_mutex);
796 list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
797 list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) {
798 uint64_t offset = align_u64(s->bo_offset + s->code_size, 256);
799 uint64_t va = radv_buffer_get_va(s->bo);
800
801 if (faulty_pc >= va + s->bo_offset && faulty_pc < va + offset) {
802 mtx_unlock(&device->shader_slab_mutex);
803 return s;
804 }
805 }
806 }
807 mtx_unlock(&device->shader_slab_mutex);
808
809 return shader;
810 }
811
812 static void
radv_dump_faulty_shader(struct radv_device * device,uint64_t faulty_pc)813 radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
814 {
815 struct radv_shader_variant *shader;
816 uint64_t start_addr, end_addr;
817 uint32_t instr_offset;
818
819 shader = radv_get_faulty_shader(device, faulty_pc);
820 if (!shader)
821 return;
822
823 start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
824 end_addr = start_addr + shader->code_size;
825 instr_offset = faulty_pc - start_addr;
826
827 fprintf(stderr, "Faulty shader found "
828 "VA=[0x%"PRIx64"-0x%"PRIx64"], instr_offset=%d\n",
829 start_addr, end_addr, instr_offset);
830
831 /* Get the list of instructions.
832 * Buffer size / 4 is the upper bound of the instruction count.
833 */
834 unsigned num_inst = 0;
835 struct radv_shader_inst *instructions =
836 calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
837
838 /* Split the disassembly string into instructions. */
839 si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
840
841 /* Print instructions with annotations. */
842 for (unsigned i = 0; i < num_inst; i++) {
843 struct radv_shader_inst *inst = &instructions[i];
844
845 if (start_addr + inst->offset == faulty_pc) {
846 fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
847 fprintf(stderr, "%s\n", inst->text);
848 fprintf(stderr, "\n");
849 } else {
850 fprintf(stderr, "%s\n", inst->text);
851 }
852 }
853
854 free(instructions);
855 }
856
857 struct radv_sq_hw_reg {
858 uint32_t status;
859 uint32_t trap_sts;
860 uint32_t hw_id;
861 uint32_t ib_sts;
862 };
863
864 static void
radv_dump_sq_hw_regs(struct radv_device * device)865 radv_dump_sq_hw_regs(struct radv_device *device)
866 {
867 struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
868
869 fprintf(stderr, "\nHardware registers:\n");
870 if (device->physical_device->rad_info.chip_class >= GFX10) {
871 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
872 R_000408_SQ_WAVE_STATUS, regs->status, ~0);
873 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
874 R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
875 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
876 R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0);
877 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
878 R_00041C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
879 } else {
880 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
881 R_000048_SQ_WAVE_STATUS, regs->status, ~0);
882 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
883 R_00004C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
884 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
885 R_000050_SQ_WAVE_HW_ID, regs->hw_id, ~0);
886 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
887 R_00005C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
888 }
889 fprintf(stderr, "\n\n");
890 }
891
892 void
radv_check_trap_handler(struct radv_queue * queue)893 radv_check_trap_handler(struct radv_queue *queue)
894 {
895 enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
896 struct radv_device *device = queue->device;
897 struct radeon_winsys *ws = device->ws;
898
899 /* Wait for the context to be idle in a finite time. */
900 ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx);
901
902 /* Try to detect if the trap handler has been reached by the hw by
903 * looking at ttmp0 which should be non-zero if a shader exception
904 * happened.
905 */
906 if (!device->tma_ptr[4])
907 return;
908
909 #if 0
910 fprintf(stderr, "tma_ptr:\n");
911 for (unsigned i = 0; i < 10; i++)
912 fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
913 #endif
914
915 radv_dump_sq_hw_regs(device);
916
917 uint32_t ttmp0 = device->tma_ptr[4];
918 uint32_t ttmp1 = device->tma_ptr[5];
919
920 /* According to the ISA docs, 3.10 Trap and Exception Registers:
921 *
922 * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
923 *
924 * "When the trap handler is entered, the PC of the faulting
925 * instruction is: (PC - PC_rewind * 4)."
926 * */
927 uint8_t trap_id = (ttmp1 >> 16) & 0xff;
928 uint8_t ht = (ttmp1 >> 24) & 0x1;
929 uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
930 uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
931
932 fprintf(stderr, "PC=0x%"PRIx64", trapID=%d, HT=%d, PC_rewind=%d\n",
933 pc, trap_id, ht, pc_rewind);
934
935 radv_dump_faulty_shader(device, pc);
936
937 abort();
938 }
939