• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #ifndef _WIN32
14 #include <sys/utsname.h>
15 #endif
16 #include <sys/stat.h>
17 
18 #include "spirv/nir_spirv.h"
19 #include "util/mesa-sha1.h"
20 #include "util/os_time.h"
21 #include "ac_debug.h"
22 #include "ac_descriptors.h"
23 #include "git_sha1.h"
24 #include "radv_buffer.h"
25 #include "radv_debug.h"
26 #include "radv_descriptor_set.h"
27 #include "radv_entrypoints.h"
28 #include "radv_pipeline_graphics.h"
29 #include "radv_pipeline_rt.h"
30 #include "radv_shader.h"
31 #include "sid.h"
32 
33 #include "vk_common_entrypoints.h"
34 #include "vk_enum_to_str.h"
35 
36 #define COLOR_RESET  "\033[0m"
37 #define COLOR_RED    "\033[31m"
38 #define COLOR_GREEN  "\033[1;32m"
39 #define COLOR_YELLOW "\033[1;33m"
40 #define COLOR_CYAN   "\033[1;36m"
41 
42 #define RADV_DUMP_DIR "radv_dumps"
43 
44 static void
radv_dump_address_binding_report(const struct radv_address_binding_report * report,FILE * f)45 radv_dump_address_binding_report(const struct radv_address_binding_report *report, FILE *f)
46 {
47    fprintf(f, "timestamp=%llu, VA=%.16llx-%.16llx, binding_type=%s, object_type=%s, object_handle=0x%llx\n",
48            (long long)report->timestamp, (long long)report->va, (long long)(report->va + report->size),
49            (report->binding_type == VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT) ? "bind" : "unbind",
50            vk_ObjectType_to_str(report->object_type), (long long)report->object_handle);
51 }
52 
53 static void
radv_dump_address_binding_reports(struct radv_device * device,FILE * f)54 radv_dump_address_binding_reports(struct radv_device *device, FILE *f)
55 {
56    struct radv_address_binding_tracker *tracker = device->addr_binding_tracker;
57 
58    simple_mtx_lock(&tracker->mtx);
59    util_dynarray_foreach (&tracker->reports, struct radv_address_binding_report, report)
60       radv_dump_address_binding_report(report, f);
61    simple_mtx_unlock(&tracker->mtx);
62 }
63 
64 static void
radv_dump_address_binding_report_check(struct radv_device * device,uint64_t va,FILE * f)65 radv_dump_address_binding_report_check(struct radv_device *device, uint64_t va, FILE *f)
66 {
67    struct radv_address_binding_tracker *tracker = device->addr_binding_tracker;
68    bool va_found = false;
69    bool va_valid = false;
70 
71    if (!tracker)
72       return;
73 
74    fprintf(f, "\nPerforming some verifications with address binding report...\n");
75 
76    simple_mtx_lock(&tracker->mtx);
77 
78    util_dynarray_foreach (&tracker->reports, struct radv_address_binding_report, report) {
79       if (va < report->va || va >= report->va + report->size)
80          continue;
81 
82       if (report->object_type == VK_OBJECT_TYPE_DEVICE_MEMORY) {
83          if (report->binding_type == VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT) {
84             va_valid = true; /* BO alloc */
85          } else {
86             va_valid = false; /* BO destroy */
87          }
88       }
89 
90       radv_dump_address_binding_report(report, f);
91       va_found = true;
92    }
93 
94    simple_mtx_unlock(&tracker->mtx);
95 
96    if (va_found) {
97       if (!va_valid)
98          fprintf(f, "\nPotential use-after-free detected! See addr_binding_report.log for more info.\n");
99    } else {
100       fprintf(f, "VA not found!\n");
101    }
102 }
103 
104 static VkBool32 VKAPI_PTR
radv_address_binding_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,VkDebugUtilsMessageTypeFlagsEXT message_types,const VkDebugUtilsMessengerCallbackDataEXT * callback_data,void * userdata)105 radv_address_binding_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,
106                               VkDebugUtilsMessageTypeFlagsEXT message_types,
107                               const VkDebugUtilsMessengerCallbackDataEXT *callback_data, void *userdata)
108 {
109    struct radv_address_binding_tracker *tracker = userdata;
110    const VkDeviceAddressBindingCallbackDataEXT *data;
111 
112    if (!callback_data)
113       return VK_FALSE;
114 
115    data = vk_find_struct_const(callback_data->pNext, DEVICE_ADDRESS_BINDING_CALLBACK_DATA_EXT);
116    if (!data)
117       return VK_FALSE;
118 
119    simple_mtx_lock(&tracker->mtx);
120 
121    for (uint32_t i = 0; i < callback_data->objectCount; i++) {
122       struct radv_address_binding_report report = {
123          .timestamp = os_time_get_nano(),
124          .va = data->baseAddress & ((1ull << 48) - 1),
125          .size = data->size,
126          .flags = data->flags,
127          .binding_type = data->bindingType,
128          .object_handle = callback_data->pObjects[i].objectHandle,
129          .object_type = callback_data->pObjects[i].objectType,
130       };
131 
132       util_dynarray_append(&tracker->reports, struct radv_address_binding_report, report);
133    }
134 
135    simple_mtx_unlock(&tracker->mtx);
136 
137    return VK_FALSE;
138 }
139 
140 static bool
radv_init_adress_binding_report(struct radv_device * device)141 radv_init_adress_binding_report(struct radv_device *device)
142 {
143    struct radv_physical_device *pdev = radv_device_physical(device);
144    struct radv_instance *instance = radv_physical_device_instance(pdev);
145    VkResult result;
146 
147    device->addr_binding_tracker = calloc(1, sizeof(*device->addr_binding_tracker));
148    if (!device->addr_binding_tracker)
149       return false;
150 
151    simple_mtx_init(&device->addr_binding_tracker->mtx, mtx_plain);
152    util_dynarray_init(&device->addr_binding_tracker->reports, NULL);
153 
154    VkDebugUtilsMessengerCreateInfoEXT create_info = {
155       .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT,
156       .pUserData = device->addr_binding_tracker,
157       .pfnUserCallback = radv_address_binding_callback,
158       .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_DEVICE_ADDRESS_BINDING_BIT_EXT,
159    };
160 
161    result = vk_common_CreateDebugUtilsMessengerEXT(radv_instance_to_handle(instance), &create_info, NULL,
162                                                    &device->addr_binding_tracker->messenger);
163    if (result != VK_SUCCESS)
164       return false;
165 
166    return true;
167 }
168 
169 static void
radv_finish_address_binding_report(struct radv_device * device)170 radv_finish_address_binding_report(struct radv_device *device)
171 {
172    struct radv_physical_device *pdev = radv_device_physical(device);
173    struct radv_instance *instance = radv_physical_device_instance(pdev);
174    struct radv_address_binding_tracker *tracker = device->addr_binding_tracker;
175 
176    util_dynarray_fini(&tracker->reports);
177    simple_mtx_destroy(&tracker->mtx);
178 
179    vk_common_DestroyDebugUtilsMessengerEXT(radv_instance_to_handle(instance), tracker->messenger, NULL);
180    free(device->addr_binding_tracker);
181 }
182 
183 bool
radv_init_trace(struct radv_device * device)184 radv_init_trace(struct radv_device *device)
185 {
186    struct radeon_winsys *ws = device->ws;
187    VkResult result;
188 
189    result = radv_bo_create(
190       device, NULL, sizeof(struct radv_trace_data), 8, RADEON_DOMAIN_VRAM,
191       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_VA_UNCACHED,
192       RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, true, &device->trace_bo);
193    if (result != VK_SUCCESS)
194       return false;
195 
196    result = ws->buffer_make_resident(ws, device->trace_bo, true);
197    if (result != VK_SUCCESS)
198       return false;
199 
200    device->trace_data = radv_buffer_map(ws, device->trace_bo);
201    if (!device->trace_data)
202       return false;
203 
204    if (!radv_init_adress_binding_report(device))
205       return false;
206 
207    return true;
208 }
209 
210 void
radv_finish_trace(struct radv_device * device)211 radv_finish_trace(struct radv_device *device)
212 {
213    struct radeon_winsys *ws = device->ws;
214 
215    if (device->addr_binding_tracker)
216       radv_finish_address_binding_report(device);
217 
218    if (unlikely(device->trace_bo)) {
219       ws->buffer_make_resident(ws, device->trace_bo, false);
220       radv_bo_destroy(device, NULL, device->trace_bo);
221    }
222 }
223 
224 static void
radv_dump_trace(const struct radv_device * device,struct radeon_cmdbuf * cs,FILE * f)225 radv_dump_trace(const struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
226 {
227    fprintf(f, "Trace ID: %x\n", device->trace_data->primary_id);
228    device->ws->cs_dump(cs, f, (const int *)&device->trace_data->primary_id, 2, RADV_CS_DUMP_TYPE_IBS);
229 }
230 
231 static void
radv_dump_mmapped_reg(const struct radv_device * device,FILE * f,unsigned offset)232 radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset)
233 {
234    const struct radv_physical_device *pdev = radv_device_physical(device);
235    struct radeon_winsys *ws = device->ws;
236    uint32_t value;
237 
238    if (ws->read_registers(ws, offset, 1, &value))
239       ac_dump_reg(f, pdev->info.gfx_level, pdev->info.family, offset, value, ~0);
240 }
241 
242 static void
radv_dump_debug_registers(const struct radv_device * device,FILE * f)243 radv_dump_debug_registers(const struct radv_device *device, FILE *f)
244 {
245    const struct radv_physical_device *pdev = radv_device_physical(device);
246    const struct radeon_info *gpu_info = &pdev->info;
247 
248    fprintf(f, "Memory-mapped registers:\n");
249    radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
250 
251    radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
252    radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
253    radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
254    radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
255    radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
256    radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
257    radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
258    if (gpu_info->gfx_level <= GFX8) {
259       radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
260       radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
261       radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
262    }
263    radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
264    radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
265    radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
266    radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
267    radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
268    radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
269    radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
270    radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
271    radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
272    radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
273    fprintf(f, "\n");
274 }
275 
276 static void
radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)277 radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
278 {
279    fprintf(f, COLOR_CYAN "Buffer:" COLOR_RESET "\n");
280    for (unsigned j = 0; j < 4; j++)
281       ac_dump_reg(f, gfx_level, family, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
282 }
283 
284 static void
radv_dump_image_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)285 radv_dump_image_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
286 {
287    unsigned sq_img_rsrc_word0 = gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
288 
289    fprintf(f, COLOR_CYAN "Image:" COLOR_RESET "\n");
290    for (unsigned j = 0; j < 8; j++)
291       ac_dump_reg(f, gfx_level, family, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
292 
293    fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
294    for (unsigned j = 0; j < 8; j++)
295       ac_dump_reg(f, gfx_level, family, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
296 }
297 
298 static void
radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)299 radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
300 {
301    fprintf(f, COLOR_CYAN "Sampler state:" COLOR_RESET "\n");
302    for (unsigned j = 0; j < 4; j++) {
303       ac_dump_reg(f, gfx_level, family, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
304    }
305 }
306 
307 static void
radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)308 radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family,
309                                             const uint32_t *desc, FILE *f)
310 {
311    radv_dump_image_descriptor(gfx_level, family, desc, f);
312    radv_dump_sampler_descriptor(gfx_level, family, desc + 16, f);
313 }
314 
315 static void
radv_dump_descriptor_set(const struct radv_device * device,const struct radv_descriptor_set * set,unsigned id,FILE * f)316 radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f)
317 {
318    const struct radv_physical_device *pdev = radv_device_physical(device);
319    enum amd_gfx_level gfx_level = pdev->info.gfx_level;
320    enum radeon_family family = pdev->info.family;
321    const struct radv_descriptor_set_layout *layout;
322    int i;
323 
324    if (!set)
325       return;
326    layout = set->header.layout;
327 
328    for (i = 0; i < set->header.layout->binding_count; i++) {
329       uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
330 
331       fprintf(f, "(set=%u binding=%u offset=0x%x) ", id, i, layout->binding[i].offset);
332 
333       switch (layout->binding[i].type) {
334       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
335       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
336       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
337       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
338          radv_dump_buffer_descriptor(gfx_level, family, desc, f);
339          break;
340       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
341       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
342       case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
343          radv_dump_image_descriptor(gfx_level, family, desc, f);
344          break;
345       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
346          radv_dump_combined_image_sampler_descriptor(gfx_level, family, desc, f);
347          break;
348       case VK_DESCRIPTOR_TYPE_SAMPLER:
349          radv_dump_sampler_descriptor(gfx_level, family, desc, f);
350          break;
351       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
352       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
353       case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
354       case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
355          /* todo */
356          break;
357       default:
358          assert(!"unknown descriptor type");
359          break;
360       }
361       fprintf(f, "\n");
362    }
363    fprintf(f, "\n\n");
364 }
365 
366 static void
radv_dump_descriptors(struct radv_device * device,FILE * f)367 radv_dump_descriptors(struct radv_device *device, FILE *f)
368 {
369    int i;
370 
371    fprintf(f, "Descriptors:\n");
372    for (i = 0; i < MAX_SETS; i++) {
373       struct radv_descriptor_set *set = (struct radv_descriptor_set *)(uintptr_t)device->trace_data->descriptor_sets[i];
374 
375       radv_dump_descriptor_set(device, set, i, f);
376    }
377 }
378 
379 struct radv_shader_inst {
380    char text[160];  /* one disasm line */
381    unsigned offset; /* instruction offset */
382    unsigned size;   /* instruction size >= 4 */
383 };
384 
385 /* Split a disassembly string into lines and add them to the array pointed
386  * to by "instructions". */
387 static void
radv_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct radv_shader_inst * instructions)388 radv_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, struct radv_shader_inst *instructions)
389 {
390    struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
391    char *next;
392    char *repeat = strstr(disasm, "then repeated");
393 
394    while ((next = strchr(disasm, '\n'))) {
395       struct radv_shader_inst *inst = &instructions[*num];
396       unsigned len = next - disasm;
397 
398       if (repeat >= disasm && repeat < next) {
399          uint32_t repeat_count;
400          sscanf(repeat, "then repeated %u times", &repeat_count);
401 
402          for (uint32_t i = 0; i < repeat_count; i++) {
403             inst = &instructions[*num];
404             memcpy(inst, last_inst, sizeof(struct radv_shader_inst));
405             inst->offset = last_inst->offset + last_inst->size * (i + 1);
406             (*num)++;
407          }
408 
409          last_inst = inst;
410 
411          disasm = next + 1;
412          repeat = strstr(disasm, "then repeated");
413          continue;
414       }
415 
416       if (!memchr(disasm, ';', len)) {
417          /* Ignore everything that is not an instruction. */
418          disasm = next + 1;
419          continue;
420       }
421 
422       assert(len < ARRAY_SIZE(inst->text));
423       memcpy(inst->text, disasm, len);
424       inst->text[len] = 0;
425       inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
426 
427       const char *semicolon = strchr(disasm, ';');
428       assert(semicolon);
429       /* 9 = 8 hex digits + a leading space */
430       inst->size = (next - semicolon) / 9 * 4;
431 
432       snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, " [PC=0x%" PRIx64 ", off=%u, size=%u]",
433                start_addr + inst->offset, inst->offset, inst->size);
434 
435       last_inst = inst;
436       (*num)++;
437       disasm = next + 1;
438    }
439 }
440 
441 static void
radv_dump_annotated_shader(const struct radv_shader * shader,gl_shader_stage stage,struct ac_wave_info * waves,unsigned num_waves,FILE * f)442 radv_dump_annotated_shader(const struct radv_shader *shader, gl_shader_stage stage, struct ac_wave_info *waves,
443                            unsigned num_waves, FILE *f)
444 {
445    uint64_t start_addr, end_addr;
446    unsigned i;
447 
448    if (!shader)
449       return;
450 
451    start_addr = radv_shader_get_va(shader) & ((1ull << 48) - 1);
452    end_addr = start_addr + shader->code_size;
453 
454    /* See if any wave executes the shader. */
455    for (i = 0; i < num_waves; i++) {
456       if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
457          break;
458    }
459 
460    if (i == num_waves)
461       return; /* the shader is not being executed */
462 
463    /* Remember the first found wave. The waves are sorted according to PC. */
464    waves = &waves[i];
465    num_waves -= i;
466 
467    /* Get the list of instructions.
468     * Buffer size / 4 is the upper bound of the instruction count.
469     */
470    unsigned num_inst = 0;
471    struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
472 
473    radv_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
474 
475    fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", radv_get_shader_name(&shader->info, stage));
476 
477    /* Print instructions with annotations. */
478    for (i = 0; i < num_inst; i++) {
479       struct radv_shader_inst *inst = &instructions[i];
480 
481       fprintf(f, "%s\n", inst->text);
482 
483       /* Print which waves execute the instruction right now. */
484       while (num_waves && start_addr + inst->offset == waves->pc) {
485          fprintf(f,
486                  "          " COLOR_GREEN "^ SE%u SH%u CU%u "
487                  "SIMD%u WAVE%u  EXEC=%016" PRIx64 "  ",
488                  waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
489 
490          if (inst->size == 4) {
491             fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
492          } else {
493             fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
494          }
495 
496          waves->matched = true;
497          waves = &waves[1];
498          num_waves--;
499       }
500    }
501 
502    fprintf(f, "\n\n");
503    free(instructions);
504 }
505 
506 static void
radv_dump_spirv(const struct radv_shader * shader,const char * sha1,const char * dump_dir)507 radv_dump_spirv(const struct radv_shader *shader, const char *sha1, const char *dump_dir)
508 {
509    char dump_path[512];
510    FILE *f;
511 
512    snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
513 
514    f = fopen(dump_path, "w+");
515    if (f) {
516       fwrite(shader->spirv, shader->spirv_size, 1, f);
517       fclose(f);
518    }
519 }
520 
521 static void
radv_dump_shader(struct radv_device * device,struct radv_pipeline * pipeline,struct radv_shader * shader,gl_shader_stage stage,const char * dump_dir,FILE * f)522 radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
523                  gl_shader_stage stage, const char *dump_dir, FILE *f)
524 {
525    const struct radv_physical_device *pdev = radv_device_physical(device);
526 
527    if (!shader)
528       return;
529 
530    fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
531 
532    if (shader->spirv) {
533       unsigned char sha1[21];
534       char sha1buf[41];
535 
536       _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
537       _mesa_sha1_format(sha1buf, sha1);
538 
539       if (device->vk.enabled_features.deviceFaultVendorBinary) {
540          spirv_print_asm(f, (const uint32_t *)shader->spirv, shader->spirv_size / 4);
541       } else {
542          fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
543          radv_dump_spirv(shader, sha1buf, dump_dir);
544       }
545    }
546 
547    if (shader->nir_string) {
548       fprintf(f, "NIR:\n%s\n", shader->nir_string);
549    }
550 
551    fprintf(f, "%s IR:\n%s\n", pdev->use_llvm ? "LLVM" : "ACO", shader->ir_string);
552    fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
553 
554    if (pipeline)
555       radv_dump_shader_stats(device, pipeline, shader, stage, f);
556 }
557 
558 static void
radv_dump_vertex_descriptors(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,FILE * f)559 radv_dump_vertex_descriptors(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f)
560 {
561    struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
562    uint32_t count = util_bitcount(vs->info.vs.vb_desc_usage_mask);
563    uint32_t *vb_ptr = (uint32_t *)(uintptr_t)device->trace_data->vertex_descriptors;
564 
565    if (!count)
566       return;
567 
568    fprintf(f, "Num vertex %s: %d\n", vs->info.vs.use_per_attribute_vb_descs ? "attributes" : "bindings", count);
569    for (uint32_t i = 0; i < count; i++) {
570       uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
571       uint64_t va = 0;
572 
573       va |= desc[0];
574       va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
575 
576       fprintf(f, "VBO#%d:\n", i);
577       fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
578       fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
579       fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
580    }
581 }
582 
583 static void
radv_dump_vs_prolog(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,FILE * f)584 radv_dump_vs_prolog(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f)
585 {
586    struct radv_shader_part *vs_prolog = (struct radv_shader_part *)(uintptr_t)device->trace_data->vertex_prolog;
587    struct radv_shader *vs_shader = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
588 
589    if (!vs_prolog || !vs_shader || !vs_shader->info.vs.has_prolog)
590       return;
591 
592    fprintf(f, "Vertex prolog:\n\n");
593    fprintf(f, "DISASM:\n%s\n", vs_prolog->disasm_string);
594 }
595 
596 static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device * device,enum amd_ip_type ring)597 radv_get_saved_pipeline(struct radv_device *device, enum amd_ip_type ring)
598 {
599    if (ring == AMD_IP_GFX)
600       return (struct radv_pipeline *)(uintptr_t)device->trace_data->gfx_ring_pipeline;
601    else
602       return (struct radv_pipeline *)(uintptr_t)device->trace_data->comp_ring_pipeline;
603 }
604 
605 static void
radv_dump_queue_state(struct radv_queue * queue,const char * dump_dir,const char * wave_dump,FILE * f)606 radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, const char *wave_dump, FILE *f)
607 {
608    struct radv_device *device = radv_queue_device(queue);
609    const struct radv_physical_device *pdev = radv_device_physical(device);
610    enum amd_ip_type ring = radv_queue_ring(queue);
611    struct radv_pipeline *pipeline;
612 
613    fprintf(f, "AMD_IP_%s:\n", ac_get_ip_type_string(&pdev->info, ring));
614 
615    pipeline = radv_get_saved_pipeline(device, ring);
616    if (pipeline) {
617       fprintf(f, "Pipeline hash: %" PRIx64 "\n", pipeline->pipeline_hash);
618 
619       if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
620          struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
621 
622          radv_dump_vs_prolog(device, graphics_pipeline, f);
623 
624          /* Dump active graphics shaders. */
625          unsigned stages = graphics_pipeline->active_stages;
626          while (stages) {
627             int stage = u_bit_scan(&stages);
628 
629             radv_dump_shader(device, &graphics_pipeline->base, graphics_pipeline->base.shaders[stage], stage, dump_dir,
630                              f);
631          }
632       } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
633          struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
634          for (unsigned i = 0; i < rt_pipeline->stage_count; i++) {
635             struct radv_shader *shader = rt_pipeline->stages[i].shader;
636             if (shader)
637                radv_dump_shader(device, pipeline, shader, shader->info.stage, dump_dir, f);
638          }
639          radv_dump_shader(device, pipeline, pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION,
640                           dump_dir, f);
641       } else {
642          struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
643 
644          radv_dump_shader(device, &compute_pipeline->base, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE],
645                           MESA_SHADER_COMPUTE, dump_dir, f);
646       }
647 
648       if (wave_dump) {
649          struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
650          enum amd_gfx_level gfx_level = pdev->info.gfx_level;
651          unsigned num_waves = ac_get_wave_info(gfx_level, &pdev->info, wave_dump, waves);
652 
653          fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
654 
655          if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
656             struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
657 
658             /* Dump annotated active graphics shaders. */
659             unsigned stages = graphics_pipeline->active_stages;
660             while (stages) {
661                int stage = u_bit_scan(&stages);
662 
663                radv_dump_annotated_shader(graphics_pipeline->base.shaders[stage], stage, waves, num_waves, f);
664             }
665          } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
666             struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
667             for (unsigned i = 0; i < rt_pipeline->stage_count; i++) {
668                struct radv_shader *shader = rt_pipeline->stages[i].shader;
669                if (shader)
670                   radv_dump_annotated_shader(shader, shader->info.stage, waves, num_waves, f);
671             }
672             radv_dump_annotated_shader(pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION, waves,
673                                        num_waves, f);
674          } else {
675             struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
676 
677             radv_dump_annotated_shader(compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE, waves,
678                                        num_waves, f);
679          }
680 
681          /* Print waves executing shaders that are not currently bound. */
682          unsigned i;
683          bool found = false;
684          for (i = 0; i < num_waves; i++) {
685             if (waves[i].matched)
686                continue;
687 
688             if (!found) {
689                fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
690                found = true;
691             }
692 
693             struct radv_shader *shader = radv_find_shader(device, waves[0].pc);
694             if (shader) {
695                radv_dump_annotated_shader(shader, shader->info.stage, waves, num_waves, f);
696                if (waves[i].matched)
697                   continue;
698             }
699 
700             fprintf(f, "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016" PRIx64 "  INST=%08X %08X  PC=%" PRIx64 "\n",
701                     waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
702                     waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
703          }
704          if (found)
705             fprintf(f, "\n\n");
706       }
707 
708       VkDispatchIndirectCommand dispatch_indirect = device->trace_data->indirect_dispatch;
709       if (dispatch_indirect.x || dispatch_indirect.y || dispatch_indirect.z)
710          fprintf(f, "VkDispatchIndirectCommand: x=%u y=%u z=%u\n\n\n", dispatch_indirect.x, dispatch_indirect.y,
711                  dispatch_indirect.z);
712 
713       if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
714          struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
715          radv_dump_vertex_descriptors(device, graphics_pipeline, f);
716       }
717       radv_dump_descriptors(device, f);
718    }
719 }
720 
721 static void
radv_dump_cmd(const char * cmd,FILE * f)722 radv_dump_cmd(const char *cmd, FILE *f)
723 {
724 #ifndef _WIN32
725    char line[2048];
726    FILE *p;
727 
728    p = popen(cmd, "r");
729    if (p) {
730       while (fgets(line, sizeof(line), p))
731          fputs(line, f);
732       fprintf(f, "\n");
733       pclose(p);
734    }
735 #endif
736 }
737 
738 static void
radv_dump_dmesg(FILE * f)739 radv_dump_dmesg(FILE *f)
740 {
741    fprintf(f, "\nLast 60 lines of dmesg:\n\n");
742    radv_dump_cmd("dmesg | tail -n60", f);
743 }
744 
745 void
radv_dump_enabled_options(const struct radv_device * device,FILE * f)746 radv_dump_enabled_options(const struct radv_device *device, FILE *f)
747 {
748    const struct radv_physical_device *pdev = radv_device_physical(device);
749    const struct radv_instance *instance = radv_physical_device_instance(pdev);
750    uint64_t mask;
751 
752    if (instance->debug_flags) {
753       fprintf(f, "Enabled debug options: ");
754 
755       mask = instance->debug_flags;
756       while (mask) {
757          int i = u_bit_scan64(&mask);
758          fprintf(f, "%s, ", radv_get_debug_option_name(i));
759       }
760       fprintf(f, "\n");
761    }
762 
763    if (instance->perftest_flags) {
764       fprintf(f, "Enabled perftest options: ");
765 
766       mask = instance->perftest_flags;
767       while (mask) {
768          int i = u_bit_scan64(&mask);
769          fprintf(f, "%s, ", radv_get_perftest_option_name(i));
770       }
771       fprintf(f, "\n");
772    }
773 }
774 
775 static void
radv_dump_app_info(const struct radv_device * device,FILE * f)776 radv_dump_app_info(const struct radv_device *device, FILE *f)
777 {
778    const struct radv_physical_device *pdev = radv_device_physical(device);
779    const struct radv_instance *instance = radv_physical_device_instance(pdev);
780 
781    fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
782    fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
783    fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
784    fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
785    fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
786            VK_VERSION_MINOR(instance->vk.app_info.api_version), VK_VERSION_PATCH(instance->vk.app_info.api_version));
787 
788    radv_dump_enabled_options(device, f);
789 }
790 
791 static void
radv_dump_device_name(const struct radv_device * device,FILE * f)792 radv_dump_device_name(const struct radv_device *device, FILE *f)
793 {
794 #ifndef _WIN32
795    const struct radv_physical_device *pdev = radv_device_physical(device);
796    const struct radeon_info *gpu_info = &pdev->info;
797    char kernel_version[128] = {0};
798    struct utsname uname_data;
799 
800    if (uname(&uname_data) == 0)
801       snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
802 
803    fprintf(f, "Mesa version: " PACKAGE_VERSION MESA_GIT_SHA1 "\n");
804    fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", pdev->marketing_name, gpu_info->drm_major, gpu_info->drm_minor,
805            gpu_info->drm_patchlevel, kernel_version);
806 #endif
807 }
808 
809 static void
radv_dump_umr_ring(const struct radv_queue * queue,FILE * f)810 radv_dump_umr_ring(const struct radv_queue *queue, FILE *f)
811 {
812 #ifndef _WIN32
813    const struct radv_device *device = radv_queue_device(queue);
814    const struct radv_physical_device *pdev = radv_device_physical(device);
815    const enum amd_ip_type ring = radv_queue_ring(queue);
816    char cmd[256];
817 
818    /* TODO: Dump compute ring. */
819    if (ring != AMD_IP_GFX)
820       return;
821 
822    sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", pdev->bus_info.domain, pdev->bus_info.bus,
823            pdev->bus_info.dev, pdev->bus_info.func, pdev->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
824    fprintf(f, "\nUMR GFX ring:\n\n");
825    radv_dump_cmd(cmd, f);
826 #endif
827 }
828 
829 static void
radv_dump_umr_waves(struct radv_queue * queue,const char * wave_dump,FILE * f)830 radv_dump_umr_waves(struct radv_queue *queue, const char *wave_dump, FILE *f)
831 {
832    fprintf(f, "\nUMR GFX waves:\n\n%s", wave_dump ? wave_dump : "");
833 }
834 
835 static void
radv_dump_vm_fault(struct radv_device * device,const struct radv_winsys_gpuvm_fault_info * fault_info,FILE * f)836 radv_dump_vm_fault(struct radv_device *device, const struct radv_winsys_gpuvm_fault_info *fault_info, FILE *f)
837 {
838    struct radv_physical_device *pdev = radv_device_physical(device);
839 
840    fprintf(f, "VM fault report.\n\n");
841    fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n", fault_info->addr);
842    ac_print_gpuvm_fault_status(f, pdev->info.gfx_level, fault_info->status);
843 
844    radv_dump_address_binding_report_check(device, fault_info->addr, f);
845 }
846 
847 static bool
radv_gpu_hang_occurred(struct radv_queue * queue,enum amd_ip_type ring)848 radv_gpu_hang_occurred(struct radv_queue *queue, enum amd_ip_type ring)
849 {
850    const struct radv_device *device = radv_queue_device(queue);
851    struct radeon_winsys *ws = device->ws;
852 
853    if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
854       return true;
855 
856    return false;
857 }
858 
859 bool
radv_vm_fault_occurred(struct radv_device * device,struct radv_winsys_gpuvm_fault_info * fault_info)860 radv_vm_fault_occurred(struct radv_device *device, struct radv_winsys_gpuvm_fault_info *fault_info)
861 {
862    const struct radv_physical_device *pdev = radv_device_physical(device);
863 
864    if (!pdev->info.has_gpuvm_fault_query)
865       return false;
866 
867    return device->ws->query_gpuvm_fault(device->ws, fault_info);
868 }
869 
870 enum radv_device_fault_chunk {
871    RADV_DEVICE_FAULT_CHUNK_TRACE,
872    RADV_DEVICE_FAULT_CHUNK_QUEUE_STATE,
873    RADV_DEVICE_FAULT_CHUNK_UMR_WAVES,
874    RADV_DEVICE_FAULT_CHUNK_UMR_RING,
875    RADV_DEVICE_FAULT_CHUNK_REGISTERS,
876    RADV_DEVICE_FAULT_CHUNK_BO_RANGES,
877    RADV_DEVICE_FAULT_CHUNK_BO_HISTORY,
878    RADV_DEVICE_FAULT_CHUNK_ADDR_BINDING_REPORT,
879    RADV_DEVICE_FAULT_CHUNK_VM_FAULT,
880    RADV_DEVICE_FAULT_CHUNK_APP_INFO,
881    RADV_DEVICE_FAULT_CHUNK_GPU_INFO,
882    RADV_DEVICE_FAULT_CHUNK_DMESG,
883    RADV_DEVICE_FAULT_CHUNK_COUNT,
884 };
885 
886 static char *
radv_create_dump_dir()887 radv_create_dump_dir()
888 {
889 #ifndef _WIN32
890    char dump_dir[256], buf_time[128];
891    struct tm *timep, result;
892    time_t raw_time;
893 
894    time(&raw_time);
895    timep = os_localtime(&raw_time, &result);
896    strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
897 
898    snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."), getpid(),
899             buf_time);
900    if (mkdir(dump_dir, 0774) && errno != EEXIST) {
901       fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
902       abort();
903    }
904 
905    return strdup(dump_dir);
906 #else
907    return NULL;
908 #endif
909 }
910 
911 VkResult
radv_check_gpu_hangs(struct radv_queue * queue,const struct radv_winsys_submit_info * submit_info)912 radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_info *submit_info)
913 {
914    enum amd_ip_type ring;
915 
916    ring = radv_queue_ring(queue);
917 
918    bool hang_occurred = radv_gpu_hang_occurred(queue, ring);
919    if (!hang_occurred)
920       return VK_SUCCESS;
921 
922    fprintf(stderr, "radv: GPU hang detected...\n");
923 
924 #ifndef _WIN32
925    struct radv_device *device = radv_queue_device(queue);
926    const struct radv_physical_device *pdev = radv_device_physical(device);
927    const struct radv_instance *instance = radv_physical_device_instance(pdev);
928    const bool save_hang_report = !device->vk.enabled_features.deviceFaultVendorBinary;
929    struct radv_winsys_gpuvm_fault_info fault_info = {0};
930 
931    /* Query if a VM fault happened for this GPU hang. */
932    bool vm_fault_occurred = radv_vm_fault_occurred(device, &fault_info);
933 
934    /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
935     * various debugging info about that GPU hang.
936     */
937    FILE *f;
938    char *dump_dir = NULL;
939    char dump_path[512];
940 
941    if (save_hang_report) {
942       dump_dir = radv_create_dump_dir();
943 
944       fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
945    }
946 
947    struct {
948       const char *name;
949       char *ptr;
950       size_t size;
951    } chunks[RADV_DEVICE_FAULT_CHUNK_COUNT] = {
952       {"trace"},     {"pipeline"},  {"umr_waves"},  {"umr_ring"},
953       {"registers"}, {"bo_ranges"}, {"bo_history"}, {"addr_binding_report"},
954       {"vm_fault"},  {"app_info"},  {"gpu_info"},   {"dmesg"},
955    };
956 
957    char *wave_dump = NULL;
958    if (!(instance->debug_flags & RADV_DEBUG_NO_UMR))
959       wave_dump = ac_get_umr_waves(&pdev->info, radv_queue_ring(queue));
960 
961    for (uint32_t i = 0; i < RADV_DEVICE_FAULT_CHUNK_COUNT; i++) {
962 
963       if (save_hang_report) {
964          snprintf(dump_path, sizeof(dump_path), "%s/%s.log", dump_dir, chunks[i].name);
965 
966          f = fopen(dump_path, "w+");
967       } else {
968          f = open_memstream(&chunks[i].ptr, &chunks[i].size);
969       }
970 
971       if (!f)
972          continue;
973 
974       switch (i) {
975       case RADV_DEVICE_FAULT_CHUNK_TRACE:
976          radv_dump_trace(device, submit_info->cs_array[0], f);
977          break;
978       case RADV_DEVICE_FAULT_CHUNK_QUEUE_STATE:
979          radv_dump_queue_state(queue, dump_dir, wave_dump, f);
980          break;
981       case RADV_DEVICE_FAULT_CHUNK_UMR_WAVES:
982          if (!(instance->debug_flags & RADV_DEBUG_NO_UMR))
983             radv_dump_umr_waves(queue, wave_dump, f);
984          break;
985       case RADV_DEVICE_FAULT_CHUNK_UMR_RING:
986          if (!(instance->debug_flags & RADV_DEBUG_NO_UMR))
987             radv_dump_umr_ring(queue, f);
988          break;
989       case RADV_DEVICE_FAULT_CHUNK_REGISTERS:
990          radv_dump_debug_registers(device, f);
991          break;
992       case RADV_DEVICE_FAULT_CHUNK_BO_RANGES:
993          device->ws->dump_bo_ranges(device->ws, f);
994          break;
995       case RADV_DEVICE_FAULT_CHUNK_BO_HISTORY:
996          device->ws->dump_bo_log(device->ws, f);
997          break;
998       case RADV_DEVICE_FAULT_CHUNK_ADDR_BINDING_REPORT:
999          radv_dump_address_binding_reports(device, f);
1000          break;
1001       case RADV_DEVICE_FAULT_CHUNK_VM_FAULT:
1002          if (vm_fault_occurred)
1003             radv_dump_vm_fault(device, &fault_info, f);
1004          break;
1005       case RADV_DEVICE_FAULT_CHUNK_APP_INFO:
1006          radv_dump_app_info(device, f);
1007          break;
1008       case RADV_DEVICE_FAULT_CHUNK_GPU_INFO:
1009          radv_dump_device_name(device, f);
1010          ac_print_gpu_info(&pdev->info, f);
1011          break;
1012       case RADV_DEVICE_FAULT_CHUNK_DMESG:
1013          radv_dump_dmesg(f);
1014          break;
1015       default:
1016          break;
1017       }
1018 
1019       fclose(f);
1020    }
1021 
1022    free(dump_dir);
1023    free(wave_dump);
1024 
1025    if (save_hang_report) {
1026       fprintf(stderr, "radv: GPU hang report saved successfully!\n");
1027       abort();
1028    } else {
1029       char *report;
1030 
1031       report = ralloc_strdup(NULL, "========== RADV GPU hang report ==========\n");
1032       for (uint32_t i = 0; i < RADV_DEVICE_FAULT_CHUNK_COUNT; i++) {
1033          if (!chunks[i].size)
1034             continue;
1035 
1036          ralloc_asprintf_append(&report, "\n========== %s ==========\n", chunks[i].name);
1037          ralloc_asprintf_append(&report, "%s", chunks[i].ptr);
1038 
1039          free(chunks[i].ptr);
1040       }
1041 
1042       device->gpu_hang_report = report;
1043    }
1044 
1045 #endif
1046    return VK_ERROR_DEVICE_LOST;
1047 }
1048 
1049 bool
radv_trap_handler_init(struct radv_device * device)1050 radv_trap_handler_init(struct radv_device *device)
1051 {
1052    const struct radv_physical_device *pdev = radv_device_physical(device);
1053    struct radeon_winsys *ws = device->ws;
1054    uint32_t desc[4];
1055    VkResult result;
1056    uint32_t size;
1057 
1058    /* Create the trap handler shader and upload it like other shaders. */
1059    device->trap_handler_shader = radv_create_trap_handler_shader(device);
1060    if (!device->trap_handler_shader) {
1061       fprintf(stderr, "radv: failed to create the trap handler shader.\n");
1062       return false;
1063    }
1064 
1065    result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
1066    if (result != VK_SUCCESS)
1067       return false;
1068 
1069    /* Compute the TMA BO size. */
1070    size = sizeof(desc) + sizeof(struct aco_trap_handler_layout);
1071 
1072    result = radv_bo_create(
1073       device, NULL, size, 256, RADEON_DOMAIN_VRAM,
1074       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
1075       RADV_BO_PRIORITY_SCRATCH, 0, true, &device->tma_bo);
1076    if (result != VK_SUCCESS)
1077       return false;
1078 
1079    result = ws->buffer_make_resident(ws, device->tma_bo, true);
1080    if (result != VK_SUCCESS)
1081       return false;
1082 
1083    device->tma_ptr = radv_buffer_map(ws, device->tma_bo);
1084    if (!device->tma_ptr)
1085       return false;
1086 
1087    /* Upload a buffer descriptor to store various info from the trap. */
1088    uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + sizeof(desc);
1089 
1090    const struct ac_buffer_state ac_state = {
1091       .va = tma_va,
1092       .size = size - sizeof(desc),
1093       .format = PIPE_FORMAT_R32_FLOAT,
1094       .swizzle =
1095          {
1096             PIPE_SWIZZLE_X,
1097             PIPE_SWIZZLE_Y,
1098             PIPE_SWIZZLE_Z,
1099             PIPE_SWIZZLE_W,
1100          },
1101       .gfx10_oob_select = V_008F0C_OOB_SELECT_RAW,
1102       .stride = 4, /* Used for VGPRs dump. */
1103    };
1104 
1105    ac_build_buffer_descriptor(pdev->info.gfx_level, &ac_state, desc);
1106 
1107    memcpy(device->tma_ptr, desc, sizeof(desc));
1108 
1109    return true;
1110 }
1111 
1112 void
radv_trap_handler_finish(struct radv_device * device)1113 radv_trap_handler_finish(struct radv_device *device)
1114 {
1115    struct radeon_winsys *ws = device->ws;
1116 
1117    if (unlikely(device->trap_handler_shader)) {
1118       ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
1119       radv_shader_unref(device, device->trap_handler_shader);
1120    }
1121 
1122    if (unlikely(device->tma_bo)) {
1123       ws->buffer_make_resident(ws, device->tma_bo, false);
1124       radv_bo_destroy(device, NULL, device->tma_bo);
1125    }
1126 }
1127 
1128 static void
radv_dump_faulty_shader(const struct radv_device * device,const struct radv_shader * shader,uint64_t faulty_pc,FILE * f)1129 radv_dump_faulty_shader(const struct radv_device *device, const struct radv_shader *shader, uint64_t faulty_pc, FILE *f)
1130 {
1131    uint64_t start_addr, end_addr;
1132    uint32_t instr_offset;
1133 
1134    start_addr = radv_shader_get_va(shader);
1135    start_addr &= ((1ull << 48) - 1);
1136    end_addr = start_addr + shader->code_size;
1137    instr_offset = faulty_pc - start_addr;
1138 
1139    fprintf(f,
1140            "Faulty shader found "
1141            "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
1142            start_addr, end_addr, instr_offset);
1143 
1144    /* Get the list of instructions.
1145     * Buffer size / 4 is the upper bound of the instruction count.
1146     */
1147    unsigned num_inst = 0;
1148    struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
1149 
1150    /* Split the disassembly string into instructions. */
1151    radv_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
1152 
1153    /* Print instructions with annotations. */
1154    for (unsigned i = 0; i < num_inst; i++) {
1155       struct radv_shader_inst *inst = &instructions[i];
1156 
1157       if (start_addr + inst->offset == faulty_pc) {
1158          fprintf(f, "\n!!! Faulty instruction below !!!\n");
1159          fprintf(f, "%s\n", inst->text);
1160          fprintf(f, "\n");
1161       } else {
1162          fprintf(f, "%s\n", inst->text);
1163       }
1164    }
1165 
1166    free(instructions);
1167 }
1168 
1169 static void
radv_dump_sq_hw_regs(struct radv_device * device,const struct aco_trap_handler_layout * layout,FILE * f)1170 radv_dump_sq_hw_regs(struct radv_device *device, const struct aco_trap_handler_layout *layout, FILE *f)
1171 {
1172    const struct radv_physical_device *pdev = radv_device_physical(device);
1173    enum amd_gfx_level gfx_level = pdev->info.gfx_level;
1174    enum radeon_family family = pdev->info.family;
1175 
1176    fprintf(f, "\nHardware registers:\n");
1177    if (pdev->info.gfx_level >= GFX10) {
1178       ac_dump_reg(f, gfx_level, family, R_000404_SQ_WAVE_MODE, layout->sq_wave_regs.mode, ~0);
1179       ac_dump_reg(f, gfx_level, family, R_000408_SQ_WAVE_STATUS, layout->sq_wave_regs.status, ~0);
1180       ac_dump_reg(f, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.trap_sts, ~0);
1181       ac_dump_reg(f, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, layout->sq_wave_regs.hw_id1, ~0);
1182       ac_dump_reg(f, gfx_level, family, R_000414_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gpr_alloc, ~0);
1183       ac_dump_reg(f, gfx_level, family, R_000418_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.lds_alloc, ~0);
1184       ac_dump_reg(f, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, layout->sq_wave_regs.ib_sts, ~0);
1185    } else {
1186       ac_dump_reg(f, gfx_level, family, R_000044_SQ_WAVE_MODE, layout->sq_wave_regs.mode, ~0);
1187       ac_dump_reg(f, gfx_level, family, R_000048_SQ_WAVE_STATUS, layout->sq_wave_regs.status, ~0);
1188       ac_dump_reg(f, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, layout->sq_wave_regs.trap_sts, ~0);
1189       ac_dump_reg(f, gfx_level, family, R_000050_SQ_WAVE_HW_ID, layout->sq_wave_regs.hw_id1, ~0);
1190       ac_dump_reg(f, gfx_level, family, R_000054_SQ_WAVE_GPR_ALLOC, layout->sq_wave_regs.gpr_alloc, ~0);
1191       ac_dump_reg(f, gfx_level, family, R_000058_SQ_WAVE_LDS_ALLOC, layout->sq_wave_regs.lds_alloc, ~0);
1192       ac_dump_reg(f, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, layout->sq_wave_regs.ib_sts, ~0);
1193    }
1194    fprintf(f, "\n\n");
1195 }
1196 
1197 static uint32_t
radv_get_vgpr_size(const struct radv_device * device,const struct aco_trap_handler_layout * layout)1198 radv_get_vgpr_size(const struct radv_device *device, const struct aco_trap_handler_layout *layout)
1199 {
1200    const struct radv_physical_device *pdev = radv_device_physical(device);
1201    uint32_t vgpr_size;
1202 
1203    if (pdev->info.gfx_level >= GFX11) {
1204       vgpr_size = G_000414_VGPR_SIZE_GFX11(layout->sq_wave_regs.gpr_alloc);
1205    } else if (pdev->info.gfx_level >= GFX10) {
1206       vgpr_size = G_000414_VGPR_SIZE_GFX10(layout->sq_wave_regs.gpr_alloc);
1207    } else {
1208       vgpr_size = G_000054_VGPR_SIZE_GFX6(layout->sq_wave_regs.gpr_alloc);
1209    }
1210 
1211    return vgpr_size;
1212 }
1213 
1214 static void
radv_dump_shader_regs(const struct radv_device * device,const struct aco_trap_handler_layout * layout,FILE * f)1215 radv_dump_shader_regs(const struct radv_device *device, const struct aco_trap_handler_layout *layout, FILE *f)
1216 {
1217    fprintf(f, "\nShader registers:\n");
1218 
1219    fprintf(f, "m0: 0x%08x\n", layout->m0);
1220    fprintf(f, "exec_lo: 0x%08x\n", layout->exec_lo);
1221    fprintf(f, "exec_hi: 0x%08x\n", layout->exec_hi);
1222 
1223    fprintf(f, "\nSGPRS:\n");
1224    for (uint32_t i = 0; i < MAX_SGPRS; i += 4) {
1225       fprintf(f, "s[%d-%d] = { %08x, %08x, %08x, %08x }\n", i, i + 3, layout->sgprs[i], layout->sgprs[i + 1],
1226               layout->sgprs[i + 2], layout->sgprs[i + 3]);
1227    }
1228    fprintf(f, "\n\n");
1229 
1230    const uint32_t vgpr_size = radv_get_vgpr_size(device, layout);
1231    const uint32_t num_vgprs = (vgpr_size + 1) * 4 /* 4-VGPR granularity */;
1232    const uint64_t exec = layout->exec_lo | (uint64_t)layout->exec_hi << 32;
1233 
1234    assert(num_vgprs < MAX_VGPRS);
1235 
1236    fprintf(f, "VGPRS:\n");
1237    fprintf(f, "             ");
1238    for (uint32_t i = 0; i < 64; i++) {
1239       const bool live = exec & BITFIELD64_BIT(i);
1240 
1241       fprintf(f, live ? " t%02u     " : " (t%02u)   ", i);
1242    }
1243    fprintf(f, "\n");
1244    for (uint32_t i = 0; i < num_vgprs; i++) {
1245       fprintf(f, "    [%3u] = {", i);
1246 
1247       for (uint32_t j = 0; j < 64; j++) {
1248          fprintf(f, " %08x", layout->vgprs[i * 64 + j]);
1249       }
1250       fprintf(f, " }\n");
1251    }
1252 
1253    fprintf(f, "\n\n");
1254 }
1255 
1256 static void
radv_dump_lds(const struct radv_device * device,const struct aco_trap_handler_layout * layout,FILE * f)1257 radv_dump_lds(const struct radv_device *device, const struct aco_trap_handler_layout *layout, FILE *f)
1258 {
1259    uint32_t lds_size = G_000058_LDS_SIZE(layout->sq_wave_regs.lds_alloc);
1260 
1261    if (!lds_size)
1262       return;
1263 
1264    /* Compute the LDS size in dwords. */
1265    lds_size *= 64;
1266 
1267    fprintf(f, "LDS:\n");
1268 
1269    for (uint32_t i = 0; i < lds_size; i += 8) {
1270       fprintf(f, "lds[%d-%d] = { %08x, %08x, %08x, %08x, %08x, %08x, %08x, %08x }\n", i, i + 7, layout->lds[i],
1271               layout->lds[i + 1], layout->lds[i + 2], layout->lds[i + 3], layout->lds[i + 4], layout->lds[i + 5],
1272               layout->lds[i + 6], layout->lds[i + 7]);
1273    }
1274 
1275    fprintf(f, "\n\n");
1276 }
1277 
1278 void
radv_check_trap_handler(struct radv_queue * queue)1279 radv_check_trap_handler(struct radv_queue *queue)
1280 {
1281    enum amd_ip_type ring = radv_queue_ring(queue);
1282    struct radv_device *device = radv_queue_device(queue);
1283    struct radeon_winsys *ws = device->ws;
1284    const struct aco_trap_handler_layout *layout = (struct aco_trap_handler_layout *)&device->tma_ptr[4];
1285 
1286    /* Wait for the context to be idle in a finite time. */
1287    ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
1288 
1289    /* Try to detect if the trap handler has been reached by the hw by
1290     * looking at ttmp0 which should be non-zero if a shader exception
1291     * happened.
1292     */
1293    if (!layout->ttmp0)
1294       return;
1295 
1296    fprintf(stderr, "radv: Trap handler reached...\n");
1297 
1298 #ifndef _WIN32
1299    char *dump_dir = NULL;
1300    char dump_path[512];
1301    FILE *f;
1302 
1303    dump_dir = radv_create_dump_dir();
1304 
1305    fprintf(stderr, "radv: Trap handler report will be saved to '%s'!\n", dump_dir);
1306 
1307    snprintf(dump_path, sizeof(dump_path), "%s/trap_handler.log", dump_dir);
1308    f = fopen(dump_path, "w+");
1309    if (!f) {
1310       free(dump_dir);
1311       return;
1312    }
1313 
1314 #if 0
1315    fprintf(stderr, "tma_ptr:\n");
1316    for (unsigned i = 0; i < 10; i++)
1317       fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
1318 #endif
1319 
1320    radv_dump_sq_hw_regs(device, layout, f);
1321    radv_dump_shader_regs(device, layout, f);
1322    radv_dump_lds(device, layout, f);
1323 
1324    uint32_t ttmp0 = layout->ttmp0;
1325    uint32_t ttmp1 = layout->ttmp1;
1326 
1327    /* According to the ISA docs, 3.10 Trap and Exception Registers:
1328     *
1329     * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
1330     *
1331     * "When the trap handler is entered, the PC of the faulting
1332     *  instruction is: (PC - PC_rewind * 4)."
1333     * */
1334    uint8_t trap_id = (ttmp1 >> 16) & 0xff;
1335    uint8_t ht = (ttmp1 >> 24) & 0x1;
1336    uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
1337    uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
1338 
1339    fprintf(f, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht, pc_rewind);
1340 
1341    struct radv_shader *shader = radv_find_shader(device, pc);
1342    if (shader) {
1343       radv_dump_faulty_shader(device, shader, pc, f);
1344    } else {
1345       fprintf(stderr, "radv: Failed to find the faulty shader.\n");
1346    }
1347 
1348    fclose(f);
1349 
1350    if (shader) {
1351       snprintf(dump_path, sizeof(dump_path), "%s/shader_dump.log", dump_dir);
1352       f = fopen(dump_path, "w+");
1353       if (!f) {
1354          free(dump_dir);
1355          return;
1356       }
1357 
1358       radv_dump_shader(device, NULL, shader, shader->info.stage, dump_dir, f);
1359       fclose(f);
1360    }
1361 
1362    free(dump_dir);
1363 
1364    fprintf(stderr, "radv: Trap handler report saved successfully!\n");
1365    abort();
1366 #endif
1367 }
1368 
1369 /* VK_EXT_device_fault */
1370 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetDeviceFaultInfoEXT(VkDevice _device,VkDeviceFaultCountsEXT * pFaultCounts,VkDeviceFaultInfoEXT * pFaultInfo)1371 radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCounts, VkDeviceFaultInfoEXT *pFaultInfo)
1372 {
1373    VK_OUTARRAY_MAKE_TYPED(VkDeviceFaultAddressInfoEXT, out, pFaultInfo ? pFaultInfo->pAddressInfos : NULL,
1374                           &pFaultCounts->addressInfoCount);
1375    struct radv_winsys_gpuvm_fault_info fault_info = {0};
1376    VK_FROM_HANDLE(radv_device, device, _device);
1377    const struct radv_physical_device *pdev = radv_device_physical(device);
1378    const struct radv_instance *instance = radv_physical_device_instance(pdev);
1379    bool vm_fault_occurred = false;
1380 
1381    /* Query if a GPUVM fault happened. */
1382    vm_fault_occurred = radv_vm_fault_occurred(device, &fault_info);
1383 
1384    /* No vendor-specific crash dumps yet. */
1385    pFaultCounts->vendorInfoCount = 0;
1386    pFaultCounts->vendorBinarySize = 0;
1387 
1388    if (device->gpu_hang_report) {
1389       VkDeviceFaultVendorBinaryHeaderVersionOneEXT hdr;
1390 
1391       hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT);
1392       hdr.headerVersion = VK_DEVICE_FAULT_VENDOR_BINARY_HEADER_VERSION_ONE_EXT;
1393       hdr.vendorID = pdev->vk.properties.vendorID;
1394       hdr.deviceID = pdev->vk.properties.deviceID;
1395       hdr.driverVersion = pdev->vk.properties.driverVersion;
1396       memcpy(hdr.pipelineCacheUUID, pdev->cache_uuid, VK_UUID_SIZE);
1397       hdr.applicationNameOffset = 0;
1398       hdr.applicationVersion = instance->vk.app_info.app_version;
1399       hdr.engineNameOffset = 0;
1400       hdr.engineVersion = instance->vk.app_info.engine_version;
1401       hdr.apiVersion = instance->vk.app_info.api_version;
1402 
1403       pFaultCounts->vendorBinarySize = sizeof(hdr) + strlen(device->gpu_hang_report);
1404       if (pFaultInfo) {
1405          memcpy(pFaultInfo->pVendorBinaryData, &hdr, sizeof(hdr));
1406          memcpy((char *)pFaultInfo->pVendorBinaryData + sizeof(hdr), device->gpu_hang_report,
1407                 strlen(device->gpu_hang_report));
1408       }
1409    }
1410 
1411    if (vm_fault_occurred) {
1412       VkDeviceFaultAddressInfoEXT addr_fault_info = {
1413          .reportedAddress = ((int64_t)fault_info.addr << 16) >> 16,
1414          .addressPrecision = 4096, /* 4K page granularity */
1415       };
1416 
1417       if (pFaultInfo)
1418          strncpy(pFaultInfo->description, "A GPUVM fault has been detected", sizeof(pFaultInfo->description));
1419 
1420       if (pdev->info.gfx_level >= GFX10) {
1421          addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT
1422                                                                       : VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT;
1423       } else {
1424          /* Not sure how to get the access status on GFX6-9. */
1425          addr_fault_info.addressType = VK_DEVICE_FAULT_ADDRESS_TYPE_NONE_EXT;
1426       }
1427       vk_outarray_append_typed(VkDeviceFaultAddressInfoEXT, &out, elem) *elem = addr_fault_info;
1428    }
1429 
1430    return vk_outarray_status(&out);
1431 }
1432