1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #ifndef _WIN32
31 #include <sys/utsname.h>
32 #endif
33 #include <sys/stat.h>
34
35 #include "util/mesa-sha1.h"
36 #include "util/os_time.h"
37 #include "ac_debug.h"
38 #include "radv_debug.h"
39 #include "radv_shader.h"
40 #include "sid.h"
41
42 #define TRACE_BO_SIZE 4096
43 #define TMA_BO_SIZE 4096
44
45 #define COLOR_RESET "\033[0m"
46 #define COLOR_RED "\033[31m"
47 #define COLOR_GREEN "\033[1;32m"
48 #define COLOR_YELLOW "\033[1;33m"
49 #define COLOR_CYAN "\033[1;36m"
50
51 #define RADV_DUMP_DIR "radv_dumps"
52
53 /* Trace BO layout (offsets are 4 bytes):
54 *
55 * [0]: primary trace ID
56 * [1]: secondary trace ID
57 * [2-3]: 64-bit GFX ring pipeline pointer
58 * [4-5]: 64-bit COMPUTE ring pipeline pointer
59 * [6-7]: Vertex descriptors pointer
60 * [8-9]: 64-bit Vertex prolog pointer
61 * [10-11]: 64-bit descriptor set #0 pointer
62 * ...
63 * [72-73]: 64-bit descriptor set #31 pointer
64 */
65
66 bool
radv_init_trace(struct radv_device * device)67 radv_init_trace(struct radv_device *device)
68 {
69 struct radeon_winsys *ws = device->ws;
70 VkResult result;
71
72 result = ws->buffer_create(
73 ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
74 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_VA_UNCACHED,
75 RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
76 if (result != VK_SUCCESS)
77 return false;
78
79 result = ws->buffer_make_resident(ws, device->trace_bo, true);
80 if (result != VK_SUCCESS)
81 return false;
82
83 device->trace_id_ptr = ws->buffer_map(device->trace_bo);
84 if (!device->trace_id_ptr)
85 return false;
86
87 return true;
88 }
89
90 void
radv_finish_trace(struct radv_device * device)91 radv_finish_trace(struct radv_device *device)
92 {
93 struct radeon_winsys *ws = device->ws;
94
95 if (unlikely(device->trace_bo)) {
96 ws->buffer_make_resident(ws, device->trace_bo, false);
97 ws->buffer_destroy(ws, device->trace_bo);
98 }
99 }
100
101 static void
radv_dump_trace(const struct radv_device * device,struct radeon_cmdbuf * cs,FILE * f)102 radv_dump_trace(const struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
103 {
104 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
105 device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2, RADV_CS_DUMP_TYPE_IBS);
106 }
107
108 static void
radv_dump_mmapped_reg(const struct radv_device * device,FILE * f,unsigned offset)109 radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset)
110 {
111 struct radeon_winsys *ws = device->ws;
112 uint32_t value;
113
114 if (ws->read_registers(ws, offset, 1, &value))
115 ac_dump_reg(f, device->physical_device->rad_info.gfx_level, device->physical_device->rad_info.family, offset,
116 value, ~0);
117 }
118
119 static void
radv_dump_debug_registers(const struct radv_device * device,FILE * f)120 radv_dump_debug_registers(const struct radv_device *device, FILE *f)
121 {
122 const struct radeon_info *info = &device->physical_device->rad_info;
123
124 fprintf(f, "Memory-mapped registers:\n");
125 radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
126
127 radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
128 radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
129 radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
130 radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
131 radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
132 radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
133 radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
134 if (info->gfx_level <= GFX8) {
135 radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
136 radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
137 radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
138 }
139 radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
140 radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
141 radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
142 radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
143 radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
144 radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
145 radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
146 radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
147 radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
148 radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
149 fprintf(f, "\n");
150 }
151
152 static void
radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)153 radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
154 {
155 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
156 for (unsigned j = 0; j < 4; j++)
157 ac_dump_reg(f, gfx_level, family, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
158 }
159
160 static void
radv_dump_image_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)161 radv_dump_image_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
162 {
163 unsigned sq_img_rsrc_word0 = gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
164
165 fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
166 for (unsigned j = 0; j < 8; j++)
167 ac_dump_reg(f, gfx_level, family, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
168
169 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
170 for (unsigned j = 0; j < 8; j++)
171 ac_dump_reg(f, gfx_level, family, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
172 }
173
174 static void
radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)175 radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
176 {
177 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
178 for (unsigned j = 0; j < 4; j++) {
179 ac_dump_reg(f, gfx_level, family, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
180 }
181 }
182
183 static void
radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)184 radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family,
185 const uint32_t *desc, FILE *f)
186 {
187 radv_dump_image_descriptor(gfx_level, family, desc, f);
188 radv_dump_sampler_descriptor(gfx_level, family, desc + 16, f);
189 }
190
191 static void
radv_dump_descriptor_set(const struct radv_device * device,const struct radv_descriptor_set * set,unsigned id,FILE * f)192 radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f)
193 {
194 enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
195 enum radeon_family family = device->physical_device->rad_info.family;
196 const struct radv_descriptor_set_layout *layout;
197 int i;
198
199 if (!set)
200 return;
201 layout = set->header.layout;
202
203 for (i = 0; i < set->header.layout->binding_count; i++) {
204 uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
205
206 switch (layout->binding[i].type) {
207 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
208 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
209 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
210 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
211 radv_dump_buffer_descriptor(gfx_level, family, desc, f);
212 break;
213 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
214 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
215 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
216 radv_dump_image_descriptor(gfx_level, family, desc, f);
217 break;
218 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
219 radv_dump_combined_image_sampler_descriptor(gfx_level, family, desc, f);
220 break;
221 case VK_DESCRIPTOR_TYPE_SAMPLER:
222 radv_dump_sampler_descriptor(gfx_level, family, desc, f);
223 break;
224 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
225 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
226 case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
227 case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
228 /* todo */
229 break;
230 default:
231 assert(!"unknown descriptor type");
232 break;
233 }
234 fprintf(f, "\n");
235 }
236 fprintf(f, "\n\n");
237 }
238
239 static void
radv_dump_descriptors(struct radv_device * device,FILE * f)240 radv_dump_descriptors(struct radv_device *device, FILE *f)
241 {
242 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
243 int i;
244
245 fprintf(f, "Descriptors:\n");
246 for (i = 0; i < MAX_SETS; i++) {
247 struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 5);
248
249 radv_dump_descriptor_set(device, set, i, f);
250 }
251 }
252
253 struct radv_shader_inst {
254 char text[160]; /* one disasm line */
255 unsigned offset; /* instruction offset */
256 unsigned size; /* instruction size = 4 or 8 */
257 };
258
259 /* Split a disassembly string into lines and add them to the array pointed
260 * to by "instructions". */
261 static void
radv_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct radv_shader_inst * instructions)262 radv_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, struct radv_shader_inst *instructions)
263 {
264 struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
265 char *next;
266
267 while ((next = strchr(disasm, '\n'))) {
268 struct radv_shader_inst *inst = &instructions[*num];
269 unsigned len = next - disasm;
270
271 if (!memchr(disasm, ';', len)) {
272 /* Ignore everything that is not an instruction. */
273 disasm = next + 1;
274 continue;
275 }
276
277 assert(len < ARRAY_SIZE(inst->text));
278 memcpy(inst->text, disasm, len);
279 inst->text[len] = 0;
280 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
281
282 const char *semicolon = strchr(disasm, ';');
283 assert(semicolon);
284 /* More than 16 chars after ";" means the instruction is 8 bytes long. */
285 inst->size = next - semicolon > 16 ? 8 : 4;
286
287 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, " [PC=0x%" PRIx64 ", off=%u, size=%u]",
288 start_addr + inst->offset, inst->offset, inst->size);
289
290 last_inst = inst;
291 (*num)++;
292 disasm = next + 1;
293 }
294 }
295
296 static void
radv_dump_annotated_shader(const struct radv_shader * shader,gl_shader_stage stage,struct ac_wave_info * waves,unsigned num_waves,FILE * f)297 radv_dump_annotated_shader(const struct radv_shader *shader, gl_shader_stage stage, struct ac_wave_info *waves,
298 unsigned num_waves, FILE *f)
299 {
300 uint64_t start_addr, end_addr;
301 unsigned i;
302
303 if (!shader)
304 return;
305
306 start_addr = radv_shader_get_va(shader);
307 end_addr = start_addr + shader->code_size;
308
309 /* See if any wave executes the shader. */
310 for (i = 0; i < num_waves; i++) {
311 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
312 break;
313 }
314
315 if (i == num_waves)
316 return; /* the shader is not being executed */
317
318 /* Remember the first found wave. The waves are sorted according to PC. */
319 waves = &waves[i];
320 num_waves -= i;
321
322 /* Get the list of instructions.
323 * Buffer size / 4 is the upper bound of the instruction count.
324 */
325 unsigned num_inst = 0;
326 struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
327
328 radv_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
329
330 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", radv_get_shader_name(&shader->info, stage));
331
332 /* Print instructions with annotations. */
333 for (i = 0; i < num_inst; i++) {
334 struct radv_shader_inst *inst = &instructions[i];
335
336 fprintf(f, "%s\n", inst->text);
337
338 /* Print which waves execute the instruction right now. */
339 while (num_waves && start_addr + inst->offset == waves->pc) {
340 fprintf(f,
341 " " COLOR_GREEN "^ SE%u SH%u CU%u "
342 "SIMD%u WAVE%u EXEC=%016" PRIx64 " ",
343 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
344
345 if (inst->size == 4) {
346 fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
347 } else {
348 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
349 }
350
351 waves->matched = true;
352 waves = &waves[1];
353 num_waves--;
354 }
355 }
356
357 fprintf(f, "\n\n");
358 free(instructions);
359 }
360
361 static void
radv_dump_spirv(const struct radv_shader * shader,const char * sha1,const char * dump_dir)362 radv_dump_spirv(const struct radv_shader *shader, const char *sha1, const char *dump_dir)
363 {
364 char dump_path[512];
365 FILE *f;
366
367 snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
368
369 f = fopen(dump_path, "w+");
370 if (f) {
371 fwrite(shader->spirv, shader->spirv_size, 1, f);
372 fclose(f);
373 }
374 }
375
376 static void
radv_dump_shader(struct radv_device * device,struct radv_pipeline * pipeline,struct radv_shader * shader,gl_shader_stage stage,const char * dump_dir,FILE * f)377 radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
378 gl_shader_stage stage, const char *dump_dir, FILE *f)
379 {
380 if (!shader)
381 return;
382
383 fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
384
385 if (shader->spirv) {
386 unsigned char sha1[21];
387 char sha1buf[41];
388
389 _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
390 _mesa_sha1_format(sha1buf, sha1);
391
392 if (device->vk.enabled_features.deviceFaultVendorBinary) {
393 radv_print_spirv(shader->spirv, shader->spirv_size, f);
394 } else {
395 fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
396 radv_dump_spirv(shader, sha1buf, dump_dir);
397 }
398 }
399
400 if (shader->nir_string) {
401 fprintf(f, "NIR:\n%s\n", shader->nir_string);
402 }
403
404 fprintf(f, "%s IR:\n%s\n", device->physical_device->use_llvm ? "LLVM" : "ACO", shader->ir_string);
405 fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
406
407 radv_dump_shader_stats(device, pipeline, shader, stage, f);
408 }
409
410 static void
radv_dump_vertex_descriptors(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,FILE * f)411 radv_dump_vertex_descriptors(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f)
412 {
413 struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
414 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
415 uint32_t count = util_bitcount(vs->info.vs.vb_desc_usage_mask);
416 uint32_t *vb_ptr = *(uint32_t **)(ptr + 3);
417
418 if (!count)
419 return;
420
421 fprintf(f, "Num vertex %s: %d\n", vs->info.vs.use_per_attribute_vb_descs ? "attributes" : "bindings", count);
422 for (uint32_t i = 0; i < count; i++) {
423 uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
424 uint64_t va = 0;
425
426 va |= desc[0];
427 va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
428
429 fprintf(f, "VBO#%d:\n", i);
430 fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
431 fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
432 fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
433 }
434 }
435
436 static struct radv_shader_part *
radv_get_saved_vs_prolog(const struct radv_device * device)437 radv_get_saved_vs_prolog(const struct radv_device *device)
438 {
439 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
440 return *(struct radv_shader_part **)(ptr + 4);
441 }
442
443 static void
radv_dump_vs_prolog(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,FILE * f)444 radv_dump_vs_prolog(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f)
445 {
446 struct radv_shader_part *vs_prolog = radv_get_saved_vs_prolog(device);
447 struct radv_shader *vs_shader = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
448
449 if (!vs_prolog || !vs_shader || !vs_shader->info.vs.has_prolog)
450 return;
451
452 fprintf(f, "Vertex prolog:\n\n");
453 fprintf(f, "DISASM:\n%s\n", vs_prolog->disasm_string);
454 }
455
456 static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device * device,enum amd_ip_type ring)457 radv_get_saved_pipeline(struct radv_device *device, enum amd_ip_type ring)
458 {
459 uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
460 int offset = ring == AMD_IP_GFX ? 1 : 2;
461
462 return *(struct radv_pipeline **)(ptr + offset);
463 }
464
465 static void
radv_dump_queue_state(struct radv_queue * queue,const char * dump_dir,FILE * f)466 radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
467 {
468 struct radv_device *device = queue->device;
469 enum amd_ip_type ring = radv_queue_ring(queue);
470 struct radv_pipeline *pipeline;
471
472 fprintf(f, "AMD_IP_%s:\n", ring == AMD_IP_GFX ? "GFX" : "COMPUTE");
473
474 pipeline = radv_get_saved_pipeline(queue->device, ring);
475 if (pipeline) {
476 fprintf(f, "Pipeline hash: %" PRIx64 "\n", pipeline->pipeline_hash);
477
478 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
479 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
480
481 radv_dump_vs_prolog(device, graphics_pipeline, f);
482
483 /* Dump active graphics shaders. */
484 unsigned stages = graphics_pipeline->active_stages;
485 while (stages) {
486 int stage = u_bit_scan(&stages);
487
488 radv_dump_shader(device, &graphics_pipeline->base, graphics_pipeline->base.shaders[stage], stage, dump_dir,
489 f);
490 }
491 } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
492 struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
493 for (unsigned i = 0; i < rt_pipeline->stage_count; i++) {
494 struct radv_shader *shader = rt_pipeline->stages[i].shader;
495 if (shader)
496 radv_dump_shader(device, pipeline, shader, shader->info.stage, dump_dir, f);
497 }
498 radv_dump_shader(device, pipeline, pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION,
499 dump_dir, f);
500 } else {
501 struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
502
503 radv_dump_shader(device, &compute_pipeline->base, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE],
504 MESA_SHADER_COMPUTE, dump_dir, f);
505 }
506
507 if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
508 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
509 enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
510 unsigned num_waves = ac_get_wave_info(gfx_level, &device->physical_device->rad_info, waves);
511
512 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
513
514 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
515 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
516
517 /* Dump annotated active graphics shaders. */
518 unsigned stages = graphics_pipeline->active_stages;
519 while (stages) {
520 int stage = u_bit_scan(&stages);
521
522 radv_dump_annotated_shader(graphics_pipeline->base.shaders[stage], stage, waves, num_waves, f);
523 }
524 } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
525 struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
526 for (unsigned i = 0; i < rt_pipeline->stage_count; i++) {
527 struct radv_shader *shader = rt_pipeline->stages[i].shader;
528 if (shader)
529 radv_dump_annotated_shader(shader, shader->info.stage, waves, num_waves, f);
530 }
531 radv_dump_annotated_shader(pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION, waves,
532 num_waves, f);
533 } else {
534 struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
535
536 radv_dump_annotated_shader(compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE, waves,
537 num_waves, f);
538 }
539
540 /* Print waves executing shaders that are not currently bound. */
541 unsigned i;
542 bool found = false;
543 for (i = 0; i < num_waves; i++) {
544 if (waves[i].matched)
545 continue;
546
547 if (!found) {
548 fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
549 found = true;
550 }
551 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 "\n",
552 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
553 waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
554 }
555 if (found)
556 fprintf(f, "\n\n");
557 }
558
559 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
560 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
561 radv_dump_vertex_descriptors(device, graphics_pipeline, f);
562 }
563 radv_dump_descriptors(queue->device, f);
564 }
565 }
566
567 static void
radv_dump_cmd(const char * cmd,FILE * f)568 radv_dump_cmd(const char *cmd, FILE *f)
569 {
570 #ifndef _WIN32
571 char line[2048];
572 FILE *p;
573
574 p = popen(cmd, "r");
575 if (p) {
576 while (fgets(line, sizeof(line), p))
577 fputs(line, f);
578 fprintf(f, "\n");
579 pclose(p);
580 }
581 #endif
582 }
583
584 static void
radv_dump_dmesg(FILE * f)585 radv_dump_dmesg(FILE *f)
586 {
587 fprintf(f, "\nLast 60 lines of dmesg:\n\n");
588 radv_dump_cmd("dmesg | tail -n60", f);
589 }
590
591 void
radv_dump_enabled_options(const struct radv_device * device,FILE * f)592 radv_dump_enabled_options(const struct radv_device *device, FILE *f)
593 {
594 uint64_t mask;
595
596 if (device->instance->debug_flags) {
597 fprintf(f, "Enabled debug options: ");
598
599 mask = device->instance->debug_flags;
600 while (mask) {
601 int i = u_bit_scan64(&mask);
602 fprintf(f, "%s, ", radv_get_debug_option_name(i));
603 }
604 fprintf(f, "\n");
605 }
606
607 if (device->instance->perftest_flags) {
608 fprintf(f, "Enabled perftest options: ");
609
610 mask = device->instance->perftest_flags;
611 while (mask) {
612 int i = u_bit_scan64(&mask);
613 fprintf(f, "%s, ", radv_get_perftest_option_name(i));
614 }
615 fprintf(f, "\n");
616 }
617 }
618
619 static void
radv_dump_app_info(const struct radv_device * device,FILE * f)620 radv_dump_app_info(const struct radv_device *device, FILE *f)
621 {
622 const struct radv_instance *instance = device->instance;
623
624 fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
625 fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
626 fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
627 fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
628 fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
629 VK_VERSION_MINOR(instance->vk.app_info.api_version), VK_VERSION_PATCH(instance->vk.app_info.api_version));
630
631 radv_dump_enabled_options(device, f);
632 }
633
634 static void
radv_dump_device_name(const struct radv_device * device,FILE * f)635 radv_dump_device_name(const struct radv_device *device, FILE *f)
636 {
637 const struct radeon_info *info = &device->physical_device->rad_info;
638 #ifndef _WIN32
639 char kernel_version[128] = {0};
640 struct utsname uname_data;
641 #endif
642
643 #ifdef _WIN32
644 fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, info->drm_major,
645 info->drm_minor, info->drm_patchlevel);
646 #else
647 if (uname(&uname_data) == 0)
648 snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
649
650 fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, info->drm_major,
651 info->drm_minor, info->drm_patchlevel, kernel_version);
652 #endif
653 }
654
655 static void
radv_dump_umr_ring(const struct radv_queue * queue,FILE * f)656 radv_dump_umr_ring(const struct radv_queue *queue, FILE *f)
657 {
658 #ifndef _WIN32
659 const enum amd_ip_type ring = radv_queue_ring(queue);
660 const struct radv_device *device = queue->device;
661 char cmd[256];
662
663 /* TODO: Dump compute ring. */
664 if (ring != AMD_IP_GFX)
665 return;
666
667 sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", device->physical_device->bus_info.domain,
668 device->physical_device->bus_info.bus, device->physical_device->bus_info.dev,
669 device->physical_device->bus_info.func,
670 device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
671 fprintf(f, "\nUMR GFX ring:\n\n");
672 radv_dump_cmd(cmd, f);
673 #endif
674 }
675
676 static void
radv_dump_umr_waves(struct radv_queue * queue,FILE * f)677 radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
678 {
679 #ifndef _WIN32
680 enum amd_ip_type ring = radv_queue_ring(queue);
681 struct radv_device *device = queue->device;
682 char cmd[256];
683
684 /* TODO: Dump compute ring. */
685 if (ring != AMD_IP_GFX)
686 return;
687
688 sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -O bits,halt_waves -go 0 -wa %s -go 1 2>&1",
689 device->physical_device->bus_info.domain, device->physical_device->bus_info.bus,
690 device->physical_device->bus_info.dev, device->physical_device->bus_info.func,
691 device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
692 fprintf(f, "\nUMR GFX waves:\n\n");
693 radv_dump_cmd(cmd, f);
694 #endif
695 }
696
697 static bool
radv_gpu_hang_occurred(struct radv_queue * queue,enum amd_ip_type ring)698 radv_gpu_hang_occurred(struct radv_queue *queue, enum amd_ip_type ring)
699 {
700 struct radeon_winsys *ws = queue->device->ws;
701
702 if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
703 return true;
704
705 return false;
706 }
707
708 bool
radv_vm_fault_occurred(struct radv_device * device,struct radv_winsys_gpuvm_fault_info * fault_info)709 radv_vm_fault_occurred(struct radv_device *device, struct radv_winsys_gpuvm_fault_info *fault_info)
710 {
711 if (!device->physical_device->rad_info.has_gpuvm_fault_query)
712 return false;
713
714 return device->ws->query_gpuvm_fault(device->ws, fault_info);
715 }
716
717 enum radv_device_fault_chunk {
718 RADV_DEVICE_FAULT_CHUNK_TRACE,
719 RADV_DEVICE_FAULT_CHUNK_QUEUE_STATE,
720 RADV_DEVICE_FAULT_CHUNK_UMR_WAVES,
721 RADV_DEVICE_FAULT_CHUNK_UMR_RING,
722 RADV_DEVICE_FAULT_CHUNK_REGISTERS,
723 RADV_DEVICE_FAULT_CHUNK_BO_RANGES,
724 RADV_DEVICE_FAULT_CHUNK_BO_HISTORY,
725 RADV_DEVICE_FAULT_CHUNK_VM_FAULT,
726 RADV_DEVICE_FAULT_CHUNK_APP_INFO,
727 RADV_DEVICE_FAULT_CHUNK_GPU_INFO,
728 RADV_DEVICE_FAULT_CHUNK_DMESG,
729 RADV_DEVICE_FAULT_CHUNK_COUNT,
730 };
731
732 void
radv_check_gpu_hangs(struct radv_queue * queue,const struct radv_winsys_submit_info * submit_info)733 radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_info *submit_info)
734 {
735 enum amd_ip_type ring;
736
737 ring = radv_queue_ring(queue);
738
739 bool hang_occurred = radv_gpu_hang_occurred(queue, ring);
740 if (!hang_occurred)
741 return;
742
743 fprintf(stderr, "radv: GPU hang detected...\n");
744
745 #ifndef _WIN32
746 const bool save_hang_report = !queue->device->vk.enabled_features.deviceFaultVendorBinary;
747 struct radv_winsys_gpuvm_fault_info fault_info = {0};
748 struct radv_device *device = queue->device;
749
750 /* Query if a VM fault happened for this GPU hang. */
751 bool vm_fault_occurred = radv_vm_fault_occurred(queue->device, &fault_info);
752
753 /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
754 * various debugging info about that GPU hang.
755 */
756 struct tm *timep, result;
757 time_t raw_time;
758 FILE *f;
759 char dump_dir[256], dump_path[512], buf_time[128];
760
761 if (save_hang_report) {
762 time(&raw_time);
763 timep = os_localtime(&raw_time, &result);
764 strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
765
766 snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."), getpid(),
767 buf_time);
768 if (mkdir(dump_dir, 0774) && errno != EEXIST) {
769 fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
770 abort();
771 }
772
773 fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
774 }
775
776 struct {
777 const char *name;
778 char *ptr;
779 size_t size;
780 } chunks[RADV_DEVICE_FAULT_CHUNK_COUNT] = {
781 {"trace"}, {"pipeline"}, {"umr_waves"}, {"umr_ring"}, {"registers"}, {"bo_ranges"},
782 {"bo_history"}, {"vm_fault"}, {"app_info"}, {"gpu_info"}, {"dmesg"},
783 };
784
785 for (uint32_t i = 0; i < RADV_DEVICE_FAULT_CHUNK_COUNT; i++) {
786
787 if (save_hang_report) {
788 snprintf(dump_path, sizeof(dump_path), "%s/%s.log", dump_dir, chunks[i].name);
789
790 f = fopen(dump_path, "w+");
791 } else {
792 f = open_memstream(&chunks[i].ptr, &chunks[i].size);
793 }
794
795 if (!f)
796 continue;
797
798 switch (i) {
799 case RADV_DEVICE_FAULT_CHUNK_TRACE:
800 radv_dump_trace(queue->device, submit_info->cs_array[0], f);
801 break;
802 case RADV_DEVICE_FAULT_CHUNK_QUEUE_STATE:
803 radv_dump_queue_state(queue, dump_dir, f);
804 break;
805 case RADV_DEVICE_FAULT_CHUNK_UMR_WAVES:
806 if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR))
807 radv_dump_umr_waves(queue, f);
808 break;
809 case RADV_DEVICE_FAULT_CHUNK_UMR_RING:
810 if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR))
811 radv_dump_umr_ring(queue, f);
812 break;
813 case RADV_DEVICE_FAULT_CHUNK_REGISTERS:
814 radv_dump_debug_registers(device, f);
815 break;
816 case RADV_DEVICE_FAULT_CHUNK_BO_RANGES:
817 device->ws->dump_bo_ranges(device->ws, f);
818 break;
819 case RADV_DEVICE_FAULT_CHUNK_BO_HISTORY:
820 device->ws->dump_bo_log(device->ws, f);
821 break;
822 case RADV_DEVICE_FAULT_CHUNK_VM_FAULT:
823 if (vm_fault_occurred) {
824 fprintf(f, "VM fault report.\n\n");
825 fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n", fault_info.addr);
826 ac_print_gpuvm_fault_status(f, device->physical_device->rad_info.gfx_level, fault_info.status);
827 }
828 break;
829 case RADV_DEVICE_FAULT_CHUNK_APP_INFO:
830 radv_dump_app_info(device, f);
831 break;
832 case RADV_DEVICE_FAULT_CHUNK_GPU_INFO:
833 radv_dump_device_name(device, f);
834 ac_print_gpu_info(&device->physical_device->rad_info, f);
835 break;
836 case RADV_DEVICE_FAULT_CHUNK_DMESG:
837 radv_dump_dmesg(f);
838 break;
839 default:
840 break;
841 }
842
843 fclose(f);
844 }
845
846 if (save_hang_report) {
847 fprintf(stderr, "radv: GPU hang report saved successfully!\n");
848 abort();
849 } else {
850 char *report;
851
852 report = ralloc_strdup(NULL, "========== RADV GPU hang report ==========\n");
853 for (uint32_t i = 0; i < RADV_DEVICE_FAULT_CHUNK_COUNT; i++) {
854 if (!chunks[i].size)
855 continue;
856
857 ralloc_asprintf_append(&report, "\n========== %s ==========\n", chunks[i].name);
858 ralloc_asprintf_append(&report, "%s", chunks[i].ptr);
859
860 free(chunks[i].ptr);
861 }
862
863 device->gpu_hang_report = report;
864 }
865
866 #endif
867 }
868
869 void
radv_print_spirv(const char * data,uint32_t size,FILE * fp)870 radv_print_spirv(const char *data, uint32_t size, FILE *fp)
871 {
872 #ifndef _WIN32
873 char path[] = "/tmp/fileXXXXXX";
874 char command[128];
875 int fd;
876
877 /* Dump the binary into a temporary file. */
878 fd = mkstemp(path);
879 if (fd < 0)
880 return;
881
882 if (write(fd, data, size) == -1)
883 goto fail;
884
885 /* Disassemble using spirv-dis if installed. */
886 sprintf(command, "spirv-dis %s", path);
887 radv_dump_cmd(command, fp);
888
889 fail:
890 close(fd);
891 unlink(path);
892 #endif
893 }
894
895 bool
radv_trap_handler_init(struct radv_device * device)896 radv_trap_handler_init(struct radv_device *device)
897 {
898 struct radeon_winsys *ws = device->ws;
899 VkResult result;
900
901 /* Create the trap handler shader and upload it like other shaders. */
902 device->trap_handler_shader = radv_create_trap_handler_shader(device);
903 if (!device->trap_handler_shader) {
904 fprintf(stderr, "radv: failed to create the trap handler shader.\n");
905 return false;
906 }
907
908 result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
909 if (result != VK_SUCCESS)
910 return false;
911
912 result = ws->buffer_create(
913 ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
914 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
915 RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
916 if (result != VK_SUCCESS)
917 return false;
918
919 result = ws->buffer_make_resident(ws, device->tma_bo, true);
920 if (result != VK_SUCCESS)
921 return false;
922
923 device->tma_ptr = ws->buffer_map(device->tma_bo);
924 if (!device->tma_ptr)
925 return false;
926
927 /* Upload a buffer descriptor to store various info from the trap. */
928 uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
929 uint32_t desc[4];
930
931 desc[0] = tma_va;
932 desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
933 desc[2] = TMA_BO_SIZE;
934 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
935 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
936 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
937
938 memcpy(device->tma_ptr, desc, sizeof(desc));
939
940 return true;
941 }
942
943 void
radv_trap_handler_finish(struct radv_device * device)944 radv_trap_handler_finish(struct radv_device *device)
945 {
946 struct radeon_winsys *ws = device->ws;
947
948 if (unlikely(device->trap_handler_shader)) {
949 ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
950 radv_shader_unref(device, device->trap_handler_shader);
951 }
952
953 if (unlikely(device->tma_bo)) {
954 ws->buffer_make_resident(ws, device->tma_bo, false);
955 ws->buffer_destroy(ws, device->tma_bo);
956 }
957 }
958
959 static void
radv_dump_faulty_shader(struct radv_device * device,uint64_t faulty_pc)960 radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
961 {
962 struct radv_shader *shader;
963 uint64_t start_addr, end_addr;
964 uint32_t instr_offset;
965
966 shader = radv_find_shader(device, faulty_pc);
967 if (!shader)
968 return;
969
970 start_addr = radv_shader_get_va(shader);
971 end_addr = start_addr + shader->code_size;
972 instr_offset = faulty_pc - start_addr;
973
974 fprintf(stderr,
975 "Faulty shader found "
976 "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
977 start_addr, end_addr, instr_offset);
978
979 /* Get the list of instructions.
980 * Buffer size / 4 is the upper bound of the instruction count.
981 */
982 unsigned num_inst = 0;
983 struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
984
985 /* Split the disassembly string into instructions. */
986 radv_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
987
988 /* Print instructions with annotations. */
989 for (unsigned i = 0; i < num_inst; i++) {
990 struct radv_shader_inst *inst = &instructions[i];
991
992 if (start_addr + inst->offset == faulty_pc) {
993 fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
994 fprintf(stderr, "%s\n", inst->text);
995 fprintf(stderr, "\n");
996 } else {
997 fprintf(stderr, "%s\n", inst->text);
998 }
999 }
1000
1001 free(instructions);
1002 }
1003
1004 struct radv_sq_hw_reg {
1005 uint32_t status;
1006 uint32_t trap_sts;
1007 uint32_t hw_id;
1008 uint32_t ib_sts;
1009 };
1010
1011 static void
radv_dump_sq_hw_regs(struct radv_device * device)1012 radv_dump_sq_hw_regs(struct radv_device *device)
1013 {
1014 enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
1015 enum radeon_family family = device->physical_device->rad_info.family;
1016 struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
1017
1018 fprintf(stderr, "\nHardware registers:\n");
1019 if (device->physical_device->rad_info.gfx_level >= GFX10) {
1020 ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS, regs->status, ~0);
1021 ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
1022 ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0);
1023 ac_dump_reg(stderr, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
1024 } else {
1025 ac_dump_reg(stderr, gfx_level, family, R_000048_SQ_WAVE_STATUS, regs->status, ~0);
1026 ac_dump_reg(stderr, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
1027 ac_dump_reg(stderr, gfx_level, family, R_000050_SQ_WAVE_HW_ID, regs->hw_id, ~0);
1028 ac_dump_reg(stderr, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
1029 }
1030 fprintf(stderr, "\n\n");
1031 }
1032
1033 void
radv_check_trap_handler(struct radv_queue * queue)1034 radv_check_trap_handler(struct radv_queue *queue)
1035 {
1036 enum amd_ip_type ring = radv_queue_ring(queue);
1037 struct radv_device *device = queue->device;
1038 struct radeon_winsys *ws = device->ws;
1039
1040 /* Wait for the context to be idle in a finite time. */
1041 ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
1042
1043 /* Try to detect if the trap handler has been reached by the hw by
1044 * looking at ttmp0 which should be non-zero if a shader exception
1045 * happened.
1046 */
1047 if (!device->tma_ptr[4])
1048 return;
1049
1050 #if 0
1051 fprintf(stderr, "tma_ptr:\n");
1052 for (unsigned i = 0; i < 10; i++)
1053 fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
1054 #endif
1055
1056 radv_dump_sq_hw_regs(device);
1057
1058 uint32_t ttmp0 = device->tma_ptr[4];
1059 uint32_t ttmp1 = device->tma_ptr[5];
1060
1061 /* According to the ISA docs, 3.10 Trap and Exception Registers:
1062 *
1063 * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
1064 *
1065 * "When the trap handler is entered, the PC of the faulting
1066 * instruction is: (PC - PC_rewind * 4)."
1067 * */
1068 uint8_t trap_id = (ttmp1 >> 16) & 0xff;
1069 uint8_t ht = (ttmp1 >> 24) & 0x1;
1070 uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
1071 uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
1072
1073 fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht, pc_rewind);
1074
1075 radv_dump_faulty_shader(device, pc);
1076
1077 abort();
1078 }
1079
1080 /* VK_EXT_device_fault */
1081 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetDeviceFaultInfoEXT(VkDevice _device,VkDeviceFaultCountsEXT * pFaultCounts,VkDeviceFaultInfoEXT * pFaultInfo)1082 radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCounts, VkDeviceFaultInfoEXT *pFaultInfo)
1083 {
1084 VK_OUTARRAY_MAKE_TYPED(VkDeviceFaultAddressInfoEXT, out, pFaultInfo ? pFaultInfo->pAddressInfos : NULL,
1085 &pFaultCounts->addressInfoCount);
1086 struct radv_winsys_gpuvm_fault_info fault_info = {0};
1087 RADV_FROM_HANDLE(radv_device, device, _device);
1088 bool vm_fault_occurred = false;
1089
1090 /* Query if a GPUVM fault happened. */
1091 vm_fault_occurred = radv_vm_fault_occurred(device, &fault_info);
1092
1093 /* No vendor-specific crash dumps yet. */
1094 pFaultCounts->vendorInfoCount = 0;
1095 pFaultCounts->vendorBinarySize = 0;
1096
1097 if (device->gpu_hang_report) {
1098 const struct radv_physical_device *pdevice = device->physical_device;
1099
1100 VkDeviceFaultVendorBinaryHeaderVersionOneEXT hdr;
1101
1102 hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT);
1103 hdr.headerVersion = VK_DEVICE_FAULT_VENDOR_BINARY_HEADER_VERSION_ONE_EXT;
1104 hdr.vendorID = pdevice->vk.properties.vendorID;
1105 hdr.deviceID = pdevice->vk.properties.deviceID;
1106 hdr.driverVersion = pdevice->vk.properties.driverVersion;
1107 memcpy(hdr.pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1108 hdr.applicationNameOffset = 0;
1109 hdr.applicationVersion = pdevice->instance->vk.app_info.app_version;
1110 hdr.engineNameOffset = 0;
1111 hdr.engineVersion = pdevice->instance->vk.app_info.engine_version;
1112 hdr.apiVersion = pdevice->instance->vk.app_info.api_version;
1113
1114 pFaultCounts->vendorBinarySize = sizeof(hdr) + strlen(device->gpu_hang_report);
1115 if (pFaultInfo) {
1116 memcpy(pFaultInfo->pVendorBinaryData, &hdr, sizeof(hdr));
1117 memcpy((char *)pFaultInfo->pVendorBinaryData + sizeof(hdr), device->gpu_hang_report,
1118 strlen(device->gpu_hang_report));
1119 }
1120 }
1121
1122 if (vm_fault_occurred) {
1123 VkDeviceFaultAddressInfoEXT addr_fault_info = {
1124 .reportedAddress = fault_info.addr,
1125 .addressPrecision = 4096, /* 4K page granularity */
1126 };
1127
1128 if (pFaultInfo)
1129 strncpy(pFaultInfo->description, "A GPUVM fault has been detected", sizeof(pFaultInfo->description));
1130
1131 if (device->physical_device->rad_info.gfx_level >= GFX10) {
1132 addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT
1133 : VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT;
1134 } else {
1135 /* Not sure how to get the access status on GFX6-9. */
1136 addr_fault_info.addressType = VK_DEVICE_FAULT_ADDRESS_TYPE_NONE_EXT;
1137 }
1138 vk_outarray_append_typed(VkDeviceFaultAddressInfoEXT, &out, elem) *elem = addr_fault_info;
1139 }
1140
1141 return vk_outarray_status(&out);
1142 }
1143