1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * SPDX-License-Identifier: MIT
9 */
10
11 #include <stdio.h>
12 #include <stdlib.h>
13 #ifndef _WIN32
14 #include <sys/utsname.h>
15 #endif
16 #include <sys/stat.h>
17
18 #include "spirv/nir_spirv.h"
19 #include "util/mesa-sha1.h"
20 #include "util/os_time.h"
21 #include "ac_debug.h"
22 #include "ac_descriptors.h"
23 #include "git_sha1.h"
24 #include "radv_buffer.h"
25 #include "radv_debug.h"
26 #include "radv_descriptor_set.h"
27 #include "radv_entrypoints.h"
28 #include "radv_pipeline_graphics.h"
29 #include "radv_pipeline_rt.h"
30 #include "radv_shader.h"
31 #include "sid.h"
32
33 #include "vk_common_entrypoints.h"
34 #include "vk_enum_to_str.h"
35
36 #define COLOR_RESET "\033[0m"
37 #define COLOR_RED "\033[31m"
38 #define COLOR_GREEN "\033[1;32m"
39 #define COLOR_YELLOW "\033[1;33m"
40 #define COLOR_CYAN "\033[1;36m"
41
42 #define RADV_DUMP_DIR "radv_dumps"
43
44 static void
radv_dump_address_binding_report(const struct radv_address_binding_report * report,FILE * f)45 radv_dump_address_binding_report(const struct radv_address_binding_report *report, FILE *f)
46 {
47 fprintf(f, "timestamp=%llu, VA=%.16llx-%.16llx, binding_type=%s, object_type=%s, object_handle=0x%llx\n",
48 (long long)report->timestamp, (long long)report->va, (long long)(report->va + report->size),
49 (report->binding_type == VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT) ? "bind" : "unbind",
50 vk_ObjectType_to_str(report->object_type), (long long)report->object_handle);
51 }
52
53 static void
radv_dump_address_binding_reports(struct radv_device * device,FILE * f)54 radv_dump_address_binding_reports(struct radv_device *device, FILE *f)
55 {
56 struct radv_address_binding_tracker *tracker = device->addr_binding_tracker;
57
58 simple_mtx_lock(&tracker->mtx);
59 util_dynarray_foreach (&tracker->reports, struct radv_address_binding_report, report)
60 radv_dump_address_binding_report(report, f);
61 simple_mtx_unlock(&tracker->mtx);
62 }
63
64 static void
radv_dump_address_binding_report_check(struct radv_device * device,uint64_t va,FILE * f)65 radv_dump_address_binding_report_check(struct radv_device *device, uint64_t va, FILE *f)
66 {
67 struct radv_address_binding_tracker *tracker = device->addr_binding_tracker;
68 bool va_found = false;
69 bool va_valid = false;
70
71 if (!tracker)
72 return;
73
74 fprintf(f, "\nPerforming some verifications with address binding report...\n");
75
76 simple_mtx_lock(&tracker->mtx);
77
78 util_dynarray_foreach (&tracker->reports, struct radv_address_binding_report, report) {
79 if (va < report->va || va >= report->va + report->size)
80 continue;
81
82 if (report->object_type == VK_OBJECT_TYPE_DEVICE_MEMORY) {
83 if (report->binding_type == VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT) {
84 va_valid = true; /* BO alloc */
85 } else {
86 va_valid = false; /* BO destroy */
87 }
88 }
89
90 radv_dump_address_binding_report(report, f);
91 va_found = true;
92 }
93
94 simple_mtx_unlock(&tracker->mtx);
95
96 if (va_found) {
97 if (!va_valid)
98 fprintf(f, "\nPotential use-after-free detected! See addr_binding_report.log for more info.\n");
99 } else {
100 fprintf(f, "VA not found!\n");
101 }
102 }
103
104 static VkBool32 VKAPI_PTR
radv_address_binding_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,VkDebugUtilsMessageTypeFlagsEXT message_types,const VkDebugUtilsMessengerCallbackDataEXT * callback_data,void * userdata)105 radv_address_binding_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,
106 VkDebugUtilsMessageTypeFlagsEXT message_types,
107 const VkDebugUtilsMessengerCallbackDataEXT *callback_data, void *userdata)
108 {
109 struct radv_address_binding_tracker *tracker = userdata;
110 const VkDeviceAddressBindingCallbackDataEXT *data;
111
112 if (!callback_data)
113 return VK_FALSE;
114
115 data = vk_find_struct_const(callback_data->pNext, DEVICE_ADDRESS_BINDING_CALLBACK_DATA_EXT);
116 if (!data)
117 return VK_FALSE;
118
119 simple_mtx_lock(&tracker->mtx);
120
121 for (uint32_t i = 0; i < callback_data->objectCount; i++) {
122 struct radv_address_binding_report report = {
123 .timestamp = os_time_get_nano(),
124 .va = data->baseAddress & ((1ull << 48) - 1),
125 .size = data->size,
126 .flags = data->flags,
127 .binding_type = data->bindingType,
128 .object_handle = callback_data->pObjects[i].objectHandle,
129 .object_type = callback_data->pObjects[i].objectType,
130 };
131
132 util_dynarray_append(&tracker->reports, struct radv_address_binding_report, report);
133 }
134
135 simple_mtx_unlock(&tracker->mtx);
136
137 return VK_FALSE;
138 }
139
140 static bool
radv_init_adress_binding_report(struct radv_device * device)141 radv_init_adress_binding_report(struct radv_device *device)
142 {
143 struct radv_physical_device *pdev = radv_device_physical(device);
144 struct radv_instance *instance = radv_physical_device_instance(pdev);
145 VkResult result;
146
147 device->addr_binding_tracker = calloc(1, sizeof(*device->addr_binding_tracker));
148 if (!device->addr_binding_tracker)
149 return false;
150
151 simple_mtx_init(&device->addr_binding_tracker->mtx, mtx_plain);
152 util_dynarray_init(&device->addr_binding_tracker->reports, NULL);
153
154 VkDebugUtilsMessengerCreateInfoEXT create_info = {
155 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT,
156 .pUserData = device->addr_binding_tracker,
157 .pfnUserCallback = radv_address_binding_callback,
158 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_DEVICE_ADDRESS_BINDING_BIT_EXT,
159 };
160
161 result = vk_common_CreateDebugUtilsMessengerEXT(radv_instance_to_handle(instance), &create_info, NULL,
162 &device->addr_binding_tracker->messenger);
163 if (result != VK_SUCCESS)
164 return false;
165
166 return true;
167 }
168
169 static void
radv_finish_address_binding_report(struct radv_device * device)170 radv_finish_address_binding_report(struct radv_device *device)
171 {
172 struct radv_physical_device *pdev = radv_device_physical(device);
173 struct radv_instance *instance = radv_physical_device_instance(pdev);
174 struct radv_address_binding_tracker *tracker = device->addr_binding_tracker;
175
176 util_dynarray_fini(&tracker->reports);
177 simple_mtx_destroy(&tracker->mtx);
178
179 vk_common_DestroyDebugUtilsMessengerEXT(radv_instance_to_handle(instance), tracker->messenger, NULL);
180 free(device->addr_binding_tracker);
181 }
182
183 bool
radv_init_trace(struct radv_device * device)184 radv_init_trace(struct radv_device *device)
185 {
186 struct radeon_winsys *ws = device->ws;
187 VkResult result;
188
189 result = radv_bo_create(
190 device, NULL, sizeof(struct radv_trace_data), 8, RADEON_DOMAIN_VRAM,
191 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_VA_UNCACHED,
192 RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, true, &device->trace_bo);
193 if (result != VK_SUCCESS)
194 return false;
195
196 result = ws->buffer_make_resident(ws, device->trace_bo, true);
197 if (result != VK_SUCCESS)
198 return false;
199
200 device->trace_data = radv_buffer_map(ws, device->trace_bo);
201 if (!device->trace_data)
202 return false;
203
204 if (!radv_init_adress_binding_report(device))
205 return false;
206
207 return true;
208 }
209
210 void
radv_finish_trace(struct radv_device * device)211 radv_finish_trace(struct radv_device *device)
212 {
213 struct radeon_winsys *ws = device->ws;
214
215 if (device->addr_binding_tracker)
216 radv_finish_address_binding_report(device);
217
218 if (unlikely(device->trace_bo)) {
219 ws->buffer_make_resident(ws, device->trace_bo, false);
220 radv_bo_destroy(device, NULL, device->trace_bo);
221 }
222 }
223
224 static void
radv_dump_trace(const struct radv_device * device,struct radeon_cmdbuf * cs,FILE * f)225 radv_dump_trace(const struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
226 {
227 fprintf(f, "Trace ID: %x\n", device->trace_data->primary_id);
228 device->ws->cs_dump(cs, f, (const int *)&device->trace_data->primary_id, 2, RADV_CS_DUMP_TYPE_IBS);
229 }
230
231 static void
radv_dump_mmapped_reg(const struct radv_device * device,FILE * f,unsigned offset)232 radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset)
233 {
234 const struct radv_physical_device *pdev = radv_device_physical(device);
235 struct radeon_winsys *ws = device->ws;
236 uint32_t value;
237
238 if (ws->read_registers(ws, offset, 1, &value))
239 ac_dump_reg(f, pdev->info.gfx_level, pdev->info.family, offset, value, ~0);
240 }
241
242 static void
radv_dump_debug_registers(const struct radv_device * device,FILE * f)243 radv_dump_debug_registers(const struct radv_device *device, FILE *f)
244 {
245 const struct radv_physical_device *pdev = radv_device_physical(device);
246 const struct radeon_info *gpu_info = &pdev->info;
247
248 fprintf(f, "Memory-mapped registers:\n");
249 radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
250
251 radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
252 radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
253 radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
254 radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
255 radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
256 radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
257 radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
258 if (gpu_info->gfx_level <= GFX8) {
259 radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
260 radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
261 radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
262 }
263 radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
264 radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
265 radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
266 radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
267 radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
268 radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
269 radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
270 radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
271 radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
272 radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
273 fprintf(f, "\n");
274 }
275
276 static void
radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)277 radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
278 {
279 fprintf(f, COLOR_CYAN "Buffer:" COLOR_RESET "\n");
280 for (unsigned j = 0; j < 4; j++)
281 ac_dump_reg(f, gfx_level, family, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
282 }
283
284 static void
radv_dump_image_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)285 radv_dump_image_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
286 {
287 unsigned sq_img_rsrc_word0 = gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
288
289 fprintf(f, COLOR_CYAN "Image:" COLOR_RESET "\n");
290 for (unsigned j = 0; j < 8; j++)
291 ac_dump_reg(f, gfx_level, family, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
292
293 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
294 for (unsigned j = 0; j < 8; j++)
295 ac_dump_reg(f, gfx_level, family, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
296 }
297
298 static void
radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)299 radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
300 {
301 fprintf(f, COLOR_CYAN "Sampler state:" COLOR_RESET "\n");
302 for (unsigned j = 0; j < 4; j++) {
303 ac_dump_reg(f, gfx_level, family, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
304 }
305 }
306
307 static void
radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)308 radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family,
309 const uint32_t *desc, FILE *f)
310 {
311 radv_dump_image_descriptor(gfx_level, family, desc, f);
312 radv_dump_sampler_descriptor(gfx_level, family, desc + 16, f);
313 }
314
315 static void
radv_dump_descriptor_set(const struct radv_device * device,const struct radv_descriptor_set * set,unsigned id,FILE * f)316 radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f)
317 {
318 const struct radv_physical_device *pdev = radv_device_physical(device);
319 enum amd_gfx_level gfx_level = pdev->info.gfx_level;
320 enum radeon_family family = pdev->info.family;
321 const struct radv_descriptor_set_layout *layout;
322 int i;
323
324 if (!set)
325 return;
326 layout = set->header.layout;
327
328 for (i = 0; i < set->header.layout->binding_count; i++) {
329 uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
330
331 fprintf(f, "(set=%u binding=%u offset=0x%x) ", id, i, layout->binding[i].offset);
332
333 switch (layout->binding[i].type) {
334 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
335 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
336 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
337 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
338 radv_dump_buffer_descriptor(gfx_level, family, desc, f);
339 break;
340 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
341 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
342 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
343 radv_dump_image_descriptor(gfx_level, family, desc, f);
344 break;
345 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
346 radv_dump_combined_image_sampler_descriptor(gfx_level, family, desc, f);
347 break;
348 case VK_DESCRIPTOR_TYPE_SAMPLER:
349 radv_dump_sampler_descriptor(gfx_level, family, desc, f);
350 break;
351 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
352 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
353 case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
354 case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
355 /* todo */
356 break;
357 default:
358 assert(!"unknown descriptor type");
359 break;
360 }
361 fprintf(f, "\n");
362 }
363 fprintf(f, "\n\n");
364 }
365
366 static void
radv_dump_descriptors(struct radv_device * device,FILE * f)367 radv_dump_descriptors(struct radv_device *device, FILE *f)
368 {
369 int i;
370
371 fprintf(f, "Descriptors:\n");
372 for (i = 0; i < MAX_SETS; i++) {
373 struct radv_descriptor_set *set = (struct radv_descriptor_set *)(uintptr_t)device->trace_data->descriptor_sets[i];
374
375 radv_dump_descriptor_set(device, set, i, f);
376 }
377 }
378
379 struct radv_shader_inst {
380 char text[160]; /* one disasm line */
381 unsigned offset; /* instruction offset */
382 unsigned size; /* instruction size >= 4 */
383 };
384
385 /* Split a disassembly string into lines and add them to the array pointed
386 * to by "instructions". */
387 static void
radv_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct radv_shader_inst * instructions)388 radv_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, struct radv_shader_inst *instructions)
389 {
390 struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
391 char *next;
392 char *repeat = strstr(disasm, "then repeated");
393
394 while ((next = strchr(disasm, '\n'))) {
395 struct radv_shader_inst *inst = &instructions[*num];
396 unsigned len = next - disasm;
397
398 if (repeat >= disasm && repeat < next) {
399 uint32_t repeat_count;
400 sscanf(repeat, "then repeated %u times", &repeat_count);
401
402 for (uint32_t i = 0; i < repeat_count; i++) {
403 inst = &instructions[*num];
404 memcpy(inst, last_inst, sizeof(struct radv_shader_inst));
405 inst->offset = last_inst->offset + last_inst->size * (i + 1);
406 (*num)++;
407 }
408
409 last_inst = inst;
410
411 disasm = next + 1;
412 repeat = strstr(disasm, "then repeated");
413 continue;
414 }
415
416 if (!memchr(disasm, ';', len)) {
417 /* Ignore everything that is not an instruction. */
418 disasm = next + 1;
419 continue;
420 }
421
422 assert(len < ARRAY_SIZE(inst->text));
423 memcpy(inst->text, disasm, len);
424 inst->text[len] = 0;
425 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
426
427 const char *semicolon = strchr(disasm, ';');
428 assert(semicolon);
429 /* 9 = 8 hex digits + a leading space */
430 inst->size = (next - semicolon) / 9 * 4;
431
432 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, " [PC=0x%" PRIx64 ", off=%u, size=%u]",
433 start_addr + inst->offset, inst->offset, inst->size);
434
435 last_inst = inst;
436 (*num)++;
437 disasm = next + 1;
438 }
439 }
440
441 static void
radv_dump_annotated_shader(const struct radv_shader * shader,gl_shader_stage stage,struct ac_wave_info * waves,unsigned num_waves,FILE * f)442 radv_dump_annotated_shader(const struct radv_shader *shader, gl_shader_stage stage, struct ac_wave_info *waves,
443 unsigned num_waves, FILE *f)
444 {
445 uint64_t start_addr, end_addr;
446 unsigned i;
447
448 if (!shader)
449 return;
450
451 start_addr = radv_shader_get_va(shader) & ((1ull << 48) - 1);
452 end_addr = start_addr + shader->code_size;
453
454 /* See if any wave executes the shader. */
455 for (i = 0; i < num_waves; i++) {
456 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
457 break;
458 }
459
460 if (i == num_waves)
461 return; /* the shader is not being executed */
462
463 /* Remember the first found wave. The waves are sorted according to PC. */
464 waves = &waves[i];
465 num_waves -= i;
466
467 /* Get the list of instructions.
468 * Buffer size / 4 is the upper bound of the instruction count.
469 */
470 unsigned num_inst = 0;
471 struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
472
473 radv_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
474
475 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", radv_get_shader_name(&shader->info, stage));
476
477 /* Print instructions with annotations. */
478 for (i = 0; i < num_inst; i++) {
479 struct radv_shader_inst *inst = &instructions[i];
480
481 fprintf(f, "%s\n", inst->text);
482
483 /* Print which waves execute the instruction right now. */
484 while (num_waves && start_addr + inst->offset == waves->pc) {
485 fprintf(f,
486 " " COLOR_GREEN "^ SE%u SH%u CU%u "
487 "SIMD%u WAVE%u EXEC=%016" PRIx64 " ",
488 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
489
490 if (inst->size == 4) {
491 fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
492 } else {
493 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
494 }
495
496 waves->matched = true;
497 waves = &waves[1];
498 num_waves--;
499 }
500 }
501
502 fprintf(f, "\n\n");
503 free(instructions);
504 }
505
506 static void
radv_dump_spirv(const struct radv_shader * shader,const char * sha1,const char * dump_dir)507 radv_dump_spirv(const struct radv_shader *shader, const char *sha1, const char *dump_dir)
508 {
509 char dump_path[512];
510 FILE *f;
511
512 snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
513
514 f = fopen(dump_path, "w+");
515 if (f) {
516 fwrite(shader->spirv, shader->spirv_size, 1, f);
517 fclose(f);
518 }
519 }
520
521 static void
radv_dump_shader(struct radv_device * device,struct radv_pipeline * pipeline,struct radv_shader * shader,gl_shader_stage stage,const char * dump_dir,FILE * f)522 radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
523 gl_shader_stage stage, const char *dump_dir, FILE *f)
524 {
525 const struct radv_physical_device *pdev = radv_device_physical(device);
526
527 if (!shader)
528 return;
529
530 fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
531
532 if (shader->spirv) {
533 unsigned char sha1[21];
534 char sha1buf[41];
535
536 _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
537 _mesa_sha1_format(sha1buf, sha1);
538
539 if (device->vk.enabled_features.deviceFaultVendorBinary) {
540 spirv_print_asm(f, (const uint32_t *)shader->spirv, shader->spirv_size / 4);
541 } else {
542 fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
543 radv_dump_spirv(shader, sha1buf, dump_dir);
544 }
545 }
546
547 if (shader->nir_string) {
548 fprintf(f, "NIR:\n%s\n", shader->nir_string);
549 }
550
551 fprintf(f, "%s IR:\n%s\n", pdev->use_llvm ? "LLVM" : "ACO", shader->ir_string);
552 fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
553
554 if (pipeline)
555 radv_dump_shader_stats(device, pipeline, shader, stage, f);
556 }
557
558 static void
radv_dump_vertex_descriptors(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,FILE * f)559 radv_dump_vertex_descriptors(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f)
560 {
561 struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
562 uint32_t count = util_bitcount(vs->info.vs.vb_desc_usage_mask);
563 uint32_t *vb_ptr = (uint32_t *)(uintptr_t)device->trace_data->vertex_descriptors;
564
565 if (!count)
566 return;
567
568 fprintf(f, "Num vertex %s: %d\n", vs->info.vs.use_per_attribute_vb_descs ? "attributes" : "bindings", count);
569 for (uint32_t i = 0; i < count; i++) {
570 uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
571 uint64_t va = 0;
572
573 va |= desc[0];
574 va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
575
576 fprintf(f, "VBO#%d:\n", i);
577 fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
578 fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
579 fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
580 }
581 }
582
583 static void
radv_dump_vs_prolog(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,FILE * f)584 radv_dump_vs_prolog(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f)
585 {
586 struct radv_shader_part *vs_prolog = (struct radv_shader_part *)(uintptr_t)device->trace_data->vertex_prolog;
587 struct radv_shader *vs_shader = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
588
589 if (!vs_prolog || !vs_shader || !vs_shader->info.vs.has_prolog)
590 return;
591
592 fprintf(f, "Vertex prolog:\n\n");
593 fprintf(f, "DISASM:\n%s\n", vs_prolog->disasm_string);
594 }
595
596 static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device * device,enum amd_ip_type ring)597 radv_get_saved_pipeline(struct radv_device *device, enum amd_ip_type ring)
598 {
599 if (ring == AMD_IP_GFX)
600 return (struct radv_pipeline *)(uintptr_t)device->trace_data->gfx_ring_pipeline;
601 else
602 return (struct radv_pipeline *)(uintptr_t)device->trace_data->comp_ring_pipeline;
603 }
604
605 static void
radv_dump_queue_state(struct radv_queue * queue,const char * dump_dir,const char * wave_dump,FILE * f)606 radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, const char *wave_dump, FILE *f)
607 {
608 struct radv_device *device = radv_queue_device(queue);
609 const struct radv_physical_device *pdev = radv_device_physical(device);
610 enum amd_ip_type ring = radv_queue_ring(queue);
611 struct radv_pipeline *pipeline;
612
613 fprintf(f, "AMD_IP_%s:\n", ac_get_ip_type_string(&pdev->info, ring));
614
615 pipeline = radv_get_saved_pipeline(device, ring);
616 if (pipeline) {
617 fprintf(f, "Pipeline hash: %" PRIx64 "\n", pipeline->pipeline_hash);
618
619 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
620 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
621
622 radv_dump_vs_prolog(device, graphics_pipeline, f);
623
624 /* Dump active graphics shaders. */
625 unsigned stages = graphics_pipeline->active_stages;
626 while (stages) {
627 int stage = u_bit_scan(&stages);
628
629 radv_dump_shader(device, &graphics_pipeline->base, graphics_pipeline->base.shaders[stage], stage, dump_dir,
630 f);
631 }
632 } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
633 struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
634 for (unsigned i = 0; i < rt_pipeline->stage_count; i++) {
635 struct radv_shader *shader = rt_pipeline->stages[i].shader;
636 if (shader)
637 radv_dump_shader(device, pipeline, shader, shader->info.stage, dump_dir, f);
638 }
639 radv_dump_shader(device, pipeline, pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION,
640 dump_dir, f);
641 } else {
642 struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
643
644 radv_dump_shader(device, &compute_pipeline->base, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE],
645 MESA_SHADER_COMPUTE, dump_dir, f);
646 }
647
648 if (wave_dump) {
649 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
650 enum amd_gfx_level gfx_level = pdev->info.gfx_level;
651 unsigned num_waves = ac_get_wave_info(gfx_level, &pdev->info, wave_dump, waves);
652
653 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
654
655 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
656 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
657
658 /* Dump annotated active graphics shaders. */
659 unsigned stages = graphics_pipeline->active_stages;
660 while (stages) {
661 int stage = u_bit_scan(&stages);
662
663 radv_dump_annotated_shader(graphics_pipeline->base.shaders[stage], stage, waves, num_waves, f);
664 }
665 } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
666 struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
667 for (unsigned i = 0; i < rt_pipeline->stage_count; i++) {
668 struct radv_shader *shader = rt_pipeline->stages[i].shader;
669 if (shader)
670 radv_dump_annotated_shader(shader, shader->info.stage, waves, num_waves, f);
671 }
672 radv_dump_annotated_shader(pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION, waves,
673 num_waves, f);
674 } else {
675 struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
676
677 radv_dump_annotated_shader(compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE, waves,
678 num_waves, f);
679 }
680
681 /* Print waves executing shaders that are not currently bound. */
682 unsigned i;
683 bool found = false;
684 for (i = 0; i < num_waves; i++) {
685 if (waves[i].matched)
686 continue;
687
688 if (!found) {
689 fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
690 found = true;
691 }
692
693 struct radv_shader *shader = radv_find_shader(device, waves[0].pc);
694 if (shader) {
695 radv_dump_annotated_shader(shader, shader->info.stage, waves, num_waves, f);
696 if (waves[i].matched)
697 continue;
698 }
699
700 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 "\n",
701 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
702 waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
703 }
704 if (found)
705 fprintf(f, "\n\n");
706 }
707
708 VkDispatchIndirectCommand dispatch_indirect = device->trace_data->indirect_dispatch;
709 if (dispatch_indirect.x || dispatch_indirect.y || dispatch_indirect.z)
710 fprintf(f, "VkDispatchIndirectCommand: x=%u y=%u z=%u\n\n\n", dispatch_indirect.x, dispatch_indirect.y,
711 dispatch_indirect.z);
712
713 if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
714 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
715 radv_dump_vertex_descriptors(device, graphics_pipeline, f);
716 }
717 radv_dump_descriptors(device, f);
718 }
719 }
720
721 static void
radv_dump_cmd(const char * cmd,FILE * f)722 radv_dump_cmd(const char *cmd, FILE *f)
723 {
724 #ifndef _WIN32
725 char line[2048];
726 FILE *p;
727
728 p = popen(cmd, "r");
729 if (p) {
730 while (fgets(line, sizeof(line), p))
731 fputs(line, f);
732 fprintf(f, "\n");
733 pclose(p);
734 }
735 #endif
736 }
737
738 static void
radv_dump_dmesg(FILE * f)739 radv_dump_dmesg(FILE *f)
740 {
741 fprintf(f, "\nLast 60 lines of dmesg:\n\n");
742 radv_dump_cmd("dmesg | tail -n60", f);
743 }
744
745 void
radv_dump_enabled_options(const struct radv_device * device,FILE * f)746 radv_dump_enabled_options(const struct radv_device *device, FILE *f)
747 {
748 const struct radv_physical_device *pdev = radv_device_physical(device);
749 const struct radv_instance *instance = radv_physical_device_instance(pdev);
750 uint64_t mask;
751
752 if (instance->debug_flags) {
753 fprintf(f, "Enabled debug options: ");
754
755 mask = instance->debug_flags;
756 while (mask) {
757 int i = u_bit_scan64(&mask);
758 fprintf(f, "%s, ", radv_get_debug_option_name(i));
759 }
760 fprintf(f, "\n");
761 }
762
763 if (instance->perftest_flags) {
764 fprintf(f, "Enabled perftest options: ");
765
766 mask = instance->perftest_flags;
767 while (mask) {
768 int i = u_bit_scan64(&mask);
769 fprintf(f, "%s, ", radv_get_perftest_option_name(i));
770 }
771 fprintf(f, "\n");
772 }
773 }
774
775 static void
radv_dump_app_info(const struct radv_device * device,FILE * f)776 radv_dump_app_info(const struct radv_device *device, FILE *f)
777 {
778 const struct radv_physical_device *pdev = radv_device_physical(device);
779 const struct radv_instance *instance = radv_physical_device_instance(pdev);
780
781 fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
782 fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
783 fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
784 fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
785 fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
786 VK_VERSION_MINOR(instance->vk.app_info.api_version), VK_VERSION_PATCH(instance->vk.app_info.api_version));
787
788 radv_dump_enabled_options(device, f);
789 }
790
791 static void
radv_dump_device_name(const struct radv_device * device,FILE * f)792 radv_dump_device_name(const struct radv_device *device, FILE *f)
793 {
794 #ifndef _WIN32
795 const struct radv_physical_device *pdev = radv_device_physical(device);
796 const struct radeon_info *gpu_info = &pdev->info;
797 char kernel_version[128] = {0};
798 struct utsname uname_data;
799
800 if (uname(&uname_data) == 0)
801 snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
802
803 fprintf(f, "Mesa version: " PACKAGE_VERSION MESA_GIT_SHA1 "\n");
804 fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", pdev->marketing_name, gpu_info->drm_major, gpu_info->drm_minor,
805 gpu_info->drm_patchlevel, kernel_version);
806 #endif
807 }
808
809 static void
radv_dump_umr_ring(const struct radv_queue * queue,FILE * f)810 radv_dump_umr_ring(const struct radv_queue *queue, FILE *f)
811 {
812 #ifndef _WIN32
813 const struct radv_device *device = radv_queue_device(queue);
814 const struct radv_physical_device *pdev = radv_device_physical(device);
815 const enum amd_ip_type ring = radv_queue_ring(queue);
816 char cmd[256];
817
818 /* TODO: Dump compute ring. */
819 if (ring != AMD_IP_GFX)
820 return;
821
822 sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", pdev->bus_info.domain, pdev->bus_info.bus,
823 pdev->bus_info.dev, pdev->bus_info.func, pdev->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
824 fprintf(f, "\nUMR GFX ring:\n\n");
825 radv_dump_cmd(cmd, f);
826 #endif
827 }
828
829 static void
radv_dump_umr_waves(struct radv_queue * queue,const char * wave_dump,FILE * f)830 radv_dump_umr_waves(struct radv_queue *queue, const char *wave_dump, FILE *f)
831 {
832 fprintf(f, "\nUMR GFX waves:\n\n%s", wave_dump ? wave_dump : "");
833 }
834
835 static void
radv_dump_vm_fault(struct radv_device * device,const struct radv_winsys_gpuvm_fault_info * fault_info,FILE * f)836 radv_dump_vm_fault(struct radv_device *device, const struct radv_winsys_gpuvm_fault_info *fault_info, FILE *f)
837 {
838 struct radv_physical_device *pdev = radv_device_physical(device);
839
840 fprintf(f, "VM fault report.\n\n");
841 fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n", fault_info->addr);
842 ac_print_gpuvm_fault_status(f, pdev->info.gfx_level, fault_info->status);
843
844 radv_dump_address_binding_report_check(device, fault_info->addr, f);
845 }
846
847 static bool
radv_gpu_hang_occurred(struct radv_queue * queue,enum amd_ip_type ring)848 radv_gpu_hang_occurred(struct radv_queue *queue, enum amd_ip_type ring)
849 {
850 const struct radv_device *device = radv_queue_device(queue);
851 struct radeon_winsys *ws = device->ws;
852
853 if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
854 return true;
855
856 return false;
857 }
858
859 bool
radv_vm_fault_occurred(struct radv_device * device,struct radv_winsys_gpuvm_fault_info * fault_info)860 radv_vm_fault_occurred(struct radv_device *device, struct radv_winsys_gpuvm_fault_info *fault_info)
861 {
862 const struct radv_physical_device *pdev = radv_device_physical(device);
863
864 if (!pdev->info.has_gpuvm_fault_query)
865 return false;
866
867 return device->ws->query_gpuvm_fault(device->ws, fault_info);
868 }
869
870 enum radv_device_fault_chunk {
871 RADV_DEVICE_FAULT_CHUNK_TRACE,
872 RADV_DEVICE_FAULT_CHUNK_QUEUE_STATE,
873 RADV_DEVICE_FAULT_CHUNK_UMR_WAVES,
874 RADV_DEVICE_FAULT_CHUNK_UMR_RING,
875 RADV_DEVICE_FAULT_CHUNK_REGISTERS,
876 RADV_DEVICE_FAULT_CHUNK_BO_RANGES,
877 RADV_DEVICE_FAULT_CHUNK_BO_HISTORY,
878 RADV_DEVICE_FAULT_CHUNK_ADDR_BINDING_REPORT,
879 RADV_DEVICE_FAULT_CHUNK_VM_FAULT,
880 RADV_DEVICE_FAULT_CHUNK_APP_INFO,
881 RADV_DEVICE_FAULT_CHUNK_GPU_INFO,
882 RADV_DEVICE_FAULT_CHUNK_DMESG,
883 RADV_DEVICE_FAULT_CHUNK_COUNT,
884 };
885
886 static char *
radv_create_dump_dir()887 radv_create_dump_dir()
888 {
889 #ifndef _WIN32
890 char dump_dir[256], buf_time[128];
891 struct tm *timep, result;
892 time_t raw_time;
893
894 time(&raw_time);
895 timep = os_localtime(&raw_time, &result);
896 strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
897
898 snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."), getpid(),
899 buf_time);
900 if (mkdir(dump_dir, 0774) && errno != EEXIST) {
901 fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
902 abort();
903 }
904
905 return strdup(dump_dir);
906 #else
907 return NULL;
908 #endif
909 }
910
911 VkResult
radv_check_gpu_hangs(struct radv_queue * queue,const struct radv_winsys_submit_info * submit_info)912 radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_info *submit_info)
913 {
914 enum amd_ip_type ring;
915
916 ring = radv_queue_ring(queue);
917
918 bool hang_occurred = radv_gpu_hang_occurred(queue, ring);
919 if (!hang_occurred)
920 return VK_SUCCESS;
921
922 fprintf(stderr, "radv: GPU hang detected...\n");
923
924 #ifndef _WIN32
925 struct radv_device *device = radv_queue_device(queue);
926 const struct radv_physical_device *pdev = radv_device_physical(device);
927 const struct radv_instance *instance = radv_physical_device_instance(pdev);
928 const bool save_hang_report = !device->vk.enabled_features.deviceFaultVendorBinary;
929 struct radv_winsys_gpuvm_fault_info fault_info = {0};
930
931 /* Query if a VM fault happened for this GPU hang. */
932 bool vm_fault_occurred = radv_vm_fault_occurred(device, &fault_info);
933
934 /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
935 * various debugging info about that GPU hang.
936 */
937 FILE *f;
938 char *dump_dir = NULL;
939 char dump_path[512];
940
941 if (save_hang_report) {
942 dump_dir = radv_create_dump_dir();
943
944 fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
945 }
946
947 struct {
948 const char *name;
949 char *ptr;
950 size_t size;
951 } chunks[RADV_DEVICE_FAULT_CHUNK_COUNT] = {
952 {"trace"}, {"pipeline"}, {"umr_waves"}, {"umr_ring"},
953 {"registers"}, {"bo_ranges"}, {"bo_history"}, {"addr_binding_report"},
954 {"vm_fault"}, {"app_info"}, {"gpu_info"}, {"dmesg"},
955 };
956
957 char *wave_dump = NULL;
958 if (!(instance->debug_flags & RADV_DEBUG_NO_UMR))
959 wave_dump = ac_get_umr_waves(&pdev->info, radv_queue_ring(queue));
960
961 for (uint32_t i = 0; i < RADV_DEVICE_FAULT_CHUNK_COUNT; i++) {
962
963 if (save_hang_report) {
964 snprintf(dump_path, sizeof(dump_path), "%s/%s.log", dump_dir, chunks[i].name);
965
966 f = fopen(dump_path, "w+");
967 } else {
968 f = open_memstream(&chunks[i].ptr, &chunks[i].size);
969 }
970
971 if (!f)
972 continue;
973
974 switch (i) {
975 case RADV_DEVICE_FAULT_CHUNK_TRACE:
976 radv_dump_trace(device, submit_info->cs_array[0], f);
977 break;
978 case RADV_DEVICE_FAULT_CHUNK_QUEUE_STATE:
979 radv_dump_queue_state(queue, dump_dir, wave_dump, f);
980 break;
981 case RADV_DEVICE_FAULT_CHUNK_UMR_WAVES:
982 if (!(instance->debug_flags & RADV_DEBUG_NO_UMR))
983 radv_dump_umr_waves(queue, wave_dump, f);
984 break;
985 case RADV_DEVICE_FAULT_CHUNK_UMR_RING:
986 if (!(instance->debug_flags & RADV_DEBUG_NO_UMR))
987 radv_dump_umr_ring(queue, f);
988 break;
989 case RADV_DEVICE_FAULT_CHUNK_REGISTERS:
990 radv_dump_debug_registers(device, f);
991 break;
992 case RADV_DEVICE_FAULT_CHUNK_BO_RANGES:
993 device->ws->dump_bo_ranges(device->ws, f);
994 break;
995 case RADV_DEVICE_FAULT_CHUNK_BO_HISTORY:
996 device->ws->dump_bo_log(device->ws, f);
997 break;
998 case RADV_DEVICE_FAULT_CHUNK_ADDR_BINDING_REPORT:
999 radv_dump_address_binding_reports(device, f);
1000 break;
1001 case RADV_DEVICE_FAULT_CHUNK_VM_FAULT:
1002 if (vm_fault_occurred)
1003 radv_dump_vm_fault(device, &fault_info, f);
1004 break;
1005 case RADV_DEVICE_FAULT_CHUNK_APP_INFO:
1006 radv_dump_app_info(device, f);
1007 break;
1008 case RADV_DEVICE_FAULT_CHUNK_GPU_INFO:
1009 radv_dump_device_name(device, f);
1010 ac_print_gpu_info(&pdev->info, f);
1011 break;
1012 case RADV_DEVICE_FAULT_CHUNK_DMESG:
1013 radv_dump_dmesg(f);
1014 break;
1015 default:
1016 break;
1017 }
1018
1019 fclose(f);
1020 }
1021
1022 free(dump_dir);
1023 free(wave_dump);
1024
1025 if (save_hang_report) {
1026 fprintf(stderr, "radv: GPU hang report saved successfully!\n");
1027 abort();
1028 } else {
1029 char *report;
1030
1031 report = ralloc_strdup(NULL, "========== RADV GPU hang report ==========\n");
1032 for (uint32_t i = 0; i < RADV_DEVICE_FAULT_CHUNK_COUNT; i++) {
1033 if (!chunks[i].size)
1034 continue;
1035
1036 ralloc_asprintf_append(&report, "\n========== %s ==========\n", chunks[i].name);
1037 ralloc_asprintf_append(&report, "%s", chunks[i].ptr);
1038
1039 free(chunks[i].ptr);
1040 }
1041
1042 device->gpu_hang_report = report;
1043 }
1044
1045 #endif
1046 return VK_ERROR_DEVICE_LOST;
1047 }
1048
1049 bool
radv_trap_handler_init(struct radv_device * device)1050 radv_trap_handler_init(struct radv_device *device)
1051 {
1052 const struct radv_physical_device *pdev = radv_device_physical(device);
1053 struct radeon_winsys *ws = device->ws;
1054 uint32_t desc[4];
1055 VkResult result;
1056 uint32_t size;
1057
1058 /* Create the trap handler shader and upload it like other shaders. */
1059 device->trap_handler_shader = radv_create_trap_handler_shader(device);
1060 if (!device->trap_handler_shader) {
1061 fprintf(stderr, "radv: failed to create the trap handler shader.\n");
1062 return false;
1063 }
1064
1065 result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
1066 if (result != VK_SUCCESS)
1067 return false;
1068
1069 /* Compute the TMA BO size. */
1070 size = sizeof(desc) + sizeof(struct aco_trap_handler_layout);
1071
1072 result = radv_bo_create(
1073 device, NULL, size, 256, RADEON_DOMAIN_VRAM,
1074 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
1075 RADV_BO_PRIORITY_SCRATCH, 0, true, &device->tma_bo);
1076 if (result != VK_SUCCESS)
1077 return false;
1078
1079 result = ws->buffer_make_resident(ws, device->tma_bo, true);
1080 if (result != VK_SUCCESS)
1081 return false;
1082
1083 device->tma_ptr = radv_buffer_map(ws, device->tma_bo);
1084 if (!device->tma_ptr)
1085 return false;
1086
1087 /* Upload a buffer descriptor to store various info from the trap. */
1088 uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + sizeof(desc);
1089
1090 const struct ac_buffer_state ac_state = {
1091 .va = tma_va,
1092 .size = size - sizeof(desc),
1093 .format = PIPE_FORMAT_R32_FLOAT,
1094 .swizzle =
1095 {
1096 PIPE_SWIZZLE_X,
1097 PIPE_SWIZZLE_Y,
1098 PIPE_SWIZZLE_Z,
1099 PIPE_SWIZZLE_W,
1100 },
1101 .gfx10_oob_select = V_008F0C_OOB_SELECT_RAW,
1102 .stride = 4, /* Used for VGPRs dump. */
1103 };
1104
1105 ac_build_buffer_descriptor(pdev->info.gfx_level, &ac_state, desc);
1106
1107 memcpy(device->tma_ptr, desc, sizeof(desc));
1108
1109 return true;
1110 }
1111
1112 void
radv_trap_handler_finish(struct radv_device * device)1113 radv_trap_handler_finish(struct radv_device *device)
1114 {
1115 struct radeon_winsys *ws = device->ws;
1116
1117 if (unlikely(device->trap_handler_shader)) {
1118 ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
1119 radv_shader_unref(device, device->trap_handler_shader);
1120 }
1121
1122 if (unlikely(device->tma_bo)) {
1123 ws->buffer_make_resident(ws, device->tma_bo, false);
1124 radv_bo_destroy(device, NULL, device->tma_bo);
1125 }
1126 }
1127
1128 static void
radv_dump_faulty_shader(const struct radv_device * device,const struct radv_shader * shader,uint64_t faulty_pc,FILE * f)1129 radv_dump_faulty_shader(const struct radv_device *device, const struct radv_shader *shader, uint64_t faulty_pc, FILE *f)
1130 {
1131 uint64_t start_addr, end_addr;
1132 uint32_t instr_offset;
1133
1134 start_addr = radv_shader_get_va(shader);
1135 start_addr &= ((1ull << 48) - 1);
1136 end_addr = start_addr + shader->code_size;
1137 instr_offset = faulty_pc - start_addr;
1138
1139 fprintf(f,
1140 "Faulty shader found "
1141 "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
1142 start_addr, end_addr, instr_offset);
1143
1144 /* Get the list of instructions.
1145 * Buffer size / 4 is the upper bound of the instruction count.
1146 */
1147 unsigned num_inst = 0;
1148 struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
1149
1150 /* Split the disassembly string into instructions. */
1151 radv_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
1152
1153 /* Print instructions with annotations. */
1154 for (unsigned i = 0; i < num_inst; i++) {
1155 struct radv_shader_inst *inst = &instructions[i];
1156
1157 if (start_addr + inst->offset == faulty_pc) {
1158 fprintf(f, "\n!!! Faulty instruction below !!!\n");
1159 fprintf(f, "%s\n", inst->text);
1160 fprintf(f, "\n");
1161 } else {
1162 fprintf(f, "%s\n", inst->text);
1163 }
1164 }
1165
1166 free(instructions);
1167 }
1168
1169 static void
radv_dump_sq_hw_regs(struct radv_device * device,const struct aco_trap_handler_layout * layout,FILE * f)1170 radv_dump_sq_hw_regs(struct radv_device *device, const struct aco_trap_handler_layout *layout, FILE *f)
1171 {
1172 const struct radv_physical_device *pdev = radv_device_physical(device);
1173 enum amd_gfx_level gfx_level = pdev->info.gfx_level;
1174 enum radeon_family family = pdev->info.family;
1175
1176 fprintf(f, "\nHardware registers:\n");
1177 if (pdev->info.gfx_level >= GFX10) {
1178 ac_dump_reg(f, gfx_level, family, R_000404_SQ_WAVE_MODE, layout->sq_wave_regs.mode, ~0);
1179 ac_dump_reg(f, gfx_level, family, R_000408_SQ_WAVE_STATUS, layout->sq_wave_regs.status, ~0);
1180 ac_dump_reg(f, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.trap_sts, ~0);
1181 ac_dump_reg(f, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, layout->sq_wave_regs.hw_id1, ~0);
1182 ac_dump_reg(f, gfx_level, family, R_000414_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gpr_alloc, ~0);
1183 ac_dump_reg(f, gfx_level, family, R_000418_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.lds_alloc, ~0);
1184 ac_dump_reg(f, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, layout->sq_wave_regs.ib_sts, ~0);
1185 } else {
1186 ac_dump_reg(f, gfx_level, family, R_000044_SQ_WAVE_MODE, layout->sq_wave_regs.mode, ~0);
1187 ac_dump_reg(f, gfx_level, family, R_000048_SQ_WAVE_STATUS, layout->sq_wave_regs.status, ~0);
1188 ac_dump_reg(f, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.trap_sts, ~0);
1189 ac_dump_reg(f, gfx_level, family, R_000050_SQ_WAVE_HW_ID, layout->sq_wave_regs.hw_id1, ~0);
1190 ac_dump_reg(f, gfx_level, family, R_000054_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gpr_alloc, ~0);
1191 ac_dump_reg(f, gfx_level, family, R_000058_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.lds_alloc, ~0);
1192 ac_dump_reg(f, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, layout->sq_wave_regs.ib_sts, ~0);
1193 }
1194 fprintf(f, "\n\n");
1195 }
1196
1197 static uint32_t
radv_get_vgpr_size(const struct radv_device * device,const struct aco_trap_handler_layout * layout)1198 radv_get_vgpr_size(const struct radv_device *device, const struct aco_trap_handler_layout *layout)
1199 {
1200 const struct radv_physical_device *pdev = radv_device_physical(device);
1201 uint32_t vgpr_size;
1202
1203 if (pdev->info.gfx_level >= GFX11) {
1204 vgpr_size = G_000414_VGPR_SIZE_GFX11(layout->sq_wave_regs.gpr_alloc);
1205 } else if (pdev->info.gfx_level >= GFX10) {
1206 vgpr_size = G_000414_VGPR_SIZE_GFX10(layout->sq_wave_regs.gpr_alloc);
1207 } else {
1208 vgpr_size = G_000054_VGPR_SIZE_GFX6(layout->sq_wave_regs.gpr_alloc);
1209 }
1210
1211 return vgpr_size;
1212 }
1213
1214 static void
radv_dump_shader_regs(const struct radv_device * device,const struct aco_trap_handler_layout * layout,FILE * f)1215 radv_dump_shader_regs(const struct radv_device *device, const struct aco_trap_handler_layout *layout, FILE *f)
1216 {
1217 fprintf(f, "\nShader registers:\n");
1218
1219 fprintf(f, "m0: 0x%08x\n", layout->m0);
1220 fprintf(f, "exec_lo: 0x%08x\n", layout->exec_lo);
1221 fprintf(f, "exec_hi: 0x%08x\n", layout->exec_hi);
1222
1223 fprintf(f, "\nSGPRS:\n");
1224 for (uint32_t i = 0; i < MAX_SGPRS; i += 4) {
1225 fprintf(f, "s[%d-%d] = { %08x, %08x, %08x, %08x }\n", i, i + 3, layout->sgprs[i], layout->sgprs[i + 1],
1226 layout->sgprs[i + 2], layout->sgprs[i + 3]);
1227 }
1228 fprintf(f, "\n\n");
1229
1230 const uint32_t vgpr_size = radv_get_vgpr_size(device, layout);
1231 const uint32_t num_vgprs = (vgpr_size + 1) * 4 /* 4-VGPR granularity */;
1232 const uint64_t exec = layout->exec_lo | (uint64_t)layout->exec_hi << 32;
1233
1234 assert(num_vgprs < MAX_VGPRS);
1235
1236 fprintf(f, "VGPRS:\n");
1237 fprintf(f, " ");
1238 for (uint32_t i = 0; i < 64; i++) {
1239 const bool live = exec & BITFIELD64_BIT(i);
1240
1241 fprintf(f, live ? " t%02u " : " (t%02u) ", i);
1242 }
1243 fprintf(f, "\n");
1244 for (uint32_t i = 0; i < num_vgprs; i++) {
1245 fprintf(f, " [%3u] = {", i);
1246
1247 for (uint32_t j = 0; j < 64; j++) {
1248 fprintf(f, " %08x", layout->vgprs[i * 64 + j]);
1249 }
1250 fprintf(f, " }\n");
1251 }
1252
1253 fprintf(f, "\n\n");
1254 }
1255
1256 static void
radv_dump_lds(const struct radv_device * device,const struct aco_trap_handler_layout * layout,FILE * f)1257 radv_dump_lds(const struct radv_device *device, const struct aco_trap_handler_layout *layout, FILE *f)
1258 {
1259 uint32_t lds_size = G_000058_LDS_SIZE(layout->sq_wave_regs.lds_alloc);
1260
1261 if (!lds_size)
1262 return;
1263
1264 /* Compute the LDS size in dwords. */
1265 lds_size *= 64;
1266
1267 fprintf(f, "LDS:\n");
1268
1269 for (uint32_t i = 0; i < lds_size; i += 8) {
1270 fprintf(f, "lds[%d-%d] = { %08x, %08x, %08x, %08x, %08x, %08x, %08x, %08x }\n", i, i + 7, layout->lds[i],
1271 layout->lds[i + 1], layout->lds[i + 2], layout->lds[i + 3], layout->lds[i + 4], layout->lds[i + 5],
1272 layout->lds[i + 6], layout->lds[i + 7]);
1273 }
1274
1275 fprintf(f, "\n\n");
1276 }
1277
1278 void
radv_check_trap_handler(struct radv_queue * queue)1279 radv_check_trap_handler(struct radv_queue *queue)
1280 {
1281 enum amd_ip_type ring = radv_queue_ring(queue);
1282 struct radv_device *device = radv_queue_device(queue);
1283 struct radeon_winsys *ws = device->ws;
1284 const struct aco_trap_handler_layout *layout = (struct aco_trap_handler_layout *)&device->tma_ptr[4];
1285
1286 /* Wait for the context to be idle in a finite time. */
1287 ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
1288
1289 /* Try to detect if the trap handler has been reached by the hw by
1290 * looking at ttmp0 which should be non-zero if a shader exception
1291 * happened.
1292 */
1293 if (!layout->ttmp0)
1294 return;
1295
1296 fprintf(stderr, "radv: Trap handler reached...\n");
1297
1298 #ifndef _WIN32
1299 char *dump_dir = NULL;
1300 char dump_path[512];
1301 FILE *f;
1302
1303 dump_dir = radv_create_dump_dir();
1304
1305 fprintf(stderr, "radv: Trap handler report will be saved to '%s'!\n", dump_dir);
1306
1307 snprintf(dump_path, sizeof(dump_path), "%s/trap_handler.log", dump_dir);
1308 f = fopen(dump_path, "w+");
1309 if (!f) {
1310 free(dump_dir);
1311 return;
1312 }
1313
1314 #if 0
1315 fprintf(stderr, "tma_ptr:\n");
1316 for (unsigned i = 0; i < 10; i++)
1317 fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
1318 #endif
1319
1320 radv_dump_sq_hw_regs(device, layout, f);
1321 radv_dump_shader_regs(device, layout, f);
1322 radv_dump_lds(device, layout, f);
1323
1324 uint32_t ttmp0 = layout->ttmp0;
1325 uint32_t ttmp1 = layout->ttmp1;
1326
1327 /* According to the ISA docs, 3.10 Trap and Exception Registers:
1328 *
1329 * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
1330 *
1331 * "When the trap handler is entered, the PC of the faulting
1332 * instruction is: (PC - PC_rewind * 4)."
1333 * */
1334 uint8_t trap_id = (ttmp1 >> 16) & 0xff;
1335 uint8_t ht = (ttmp1 >> 24) & 0x1;
1336 uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
1337 uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
1338
1339 fprintf(f, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht, pc_rewind);
1340
1341 struct radv_shader *shader = radv_find_shader(device, pc);
1342 if (shader) {
1343 radv_dump_faulty_shader(device, shader, pc, f);
1344 } else {
1345 fprintf(stderr, "radv: Failed to find the faulty shader.\n");
1346 }
1347
1348 fclose(f);
1349
1350 if (shader) {
1351 snprintf(dump_path, sizeof(dump_path), "%s/shader_dump.log", dump_dir);
1352 f = fopen(dump_path, "w+");
1353 if (!f) {
1354 free(dump_dir);
1355 return;
1356 }
1357
1358 radv_dump_shader(device, NULL, shader, shader->info.stage, dump_dir, f);
1359 fclose(f);
1360 }
1361
1362 free(dump_dir);
1363
1364 fprintf(stderr, "radv: Trap handler report saved successfully!\n");
1365 abort();
1366 #endif
1367 }
1368
1369 /* VK_EXT_device_fault */
1370 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetDeviceFaultInfoEXT(VkDevice _device,VkDeviceFaultCountsEXT * pFaultCounts,VkDeviceFaultInfoEXT * pFaultInfo)1371 radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCounts, VkDeviceFaultInfoEXT *pFaultInfo)
1372 {
1373 VK_OUTARRAY_MAKE_TYPED(VkDeviceFaultAddressInfoEXT, out, pFaultInfo ? pFaultInfo->pAddressInfos : NULL,
1374 &pFaultCounts->addressInfoCount);
1375 struct radv_winsys_gpuvm_fault_info fault_info = {0};
1376 VK_FROM_HANDLE(radv_device, device, _device);
1377 const struct radv_physical_device *pdev = radv_device_physical(device);
1378 const struct radv_instance *instance = radv_physical_device_instance(pdev);
1379 bool vm_fault_occurred = false;
1380
1381 /* Query if a GPUVM fault happened. */
1382 vm_fault_occurred = radv_vm_fault_occurred(device, &fault_info);
1383
1384 /* No vendor-specific crash dumps yet. */
1385 pFaultCounts->vendorInfoCount = 0;
1386 pFaultCounts->vendorBinarySize = 0;
1387
1388 if (device->gpu_hang_report) {
1389 VkDeviceFaultVendorBinaryHeaderVersionOneEXT hdr;
1390
1391 hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT);
1392 hdr.headerVersion = VK_DEVICE_FAULT_VENDOR_BINARY_HEADER_VERSION_ONE_EXT;
1393 hdr.vendorID = pdev->vk.properties.vendorID;
1394 hdr.deviceID = pdev->vk.properties.deviceID;
1395 hdr.driverVersion = pdev->vk.properties.driverVersion;
1396 memcpy(hdr.pipelineCacheUUID, pdev->cache_uuid, VK_UUID_SIZE);
1397 hdr.applicationNameOffset = 0;
1398 hdr.applicationVersion = instance->vk.app_info.app_version;
1399 hdr.engineNameOffset = 0;
1400 hdr.engineVersion = instance->vk.app_info.engine_version;
1401 hdr.apiVersion = instance->vk.app_info.api_version;
1402
1403 pFaultCounts->vendorBinarySize = sizeof(hdr) + strlen(device->gpu_hang_report);
1404 if (pFaultInfo) {
1405 memcpy(pFaultInfo->pVendorBinaryData, &hdr, sizeof(hdr));
1406 memcpy((char *)pFaultInfo->pVendorBinaryData + sizeof(hdr), device->gpu_hang_report,
1407 strlen(device->gpu_hang_report));
1408 }
1409 }
1410
1411 if (vm_fault_occurred) {
1412 VkDeviceFaultAddressInfoEXT addr_fault_info = {
1413 .reportedAddress = ((int64_t)fault_info.addr << 16) >> 16,
1414 .addressPrecision = 4096, /* 4K page granularity */
1415 };
1416
1417 if (pFaultInfo)
1418 strncpy(pFaultInfo->description, "A GPUVM fault has been detected", sizeof(pFaultInfo->description));
1419
1420 if (pdev->info.gfx_level >= GFX10) {
1421 addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT
1422 : VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT;
1423 } else {
1424 /* Not sure how to get the access status on GFX6-9. */
1425 addr_fault_info.addressType = VK_DEVICE_FAULT_ADDRESS_TYPE_NONE_EXT;
1426 }
1427 vk_outarray_append_typed(VkDeviceFaultAddressInfoEXT, &out, elem) *elem = addr_fault_info;
1428 }
1429
1430 return vk_outarray_status(&out);
1431 }
1432