• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #ifndef _WIN32
31 #include <sys/utsname.h>
32 #endif
33 #include <sys/stat.h>
34 
35 #include "util/mesa-sha1.h"
36 #include "util/os_time.h"
37 #include "ac_debug.h"
38 #include "radv_debug.h"
39 #include "radv_shader.h"
40 #include "sid.h"
41 
42 #define TRACE_BO_SIZE 4096
43 #define TMA_BO_SIZE   4096
44 
45 #define COLOR_RESET  "\033[0m"
46 #define COLOR_RED    "\033[31m"
47 #define COLOR_GREEN  "\033[1;32m"
48 #define COLOR_YELLOW "\033[1;33m"
49 #define COLOR_CYAN   "\033[1;36m"
50 
51 #define RADV_DUMP_DIR "radv_dumps"
52 
53 /* Trace BO layout (offsets are 4 bytes):
54  *
55  * [0]: primary trace ID
56  * [1]: secondary trace ID
57  * [2-3]: 64-bit GFX ring pipeline pointer
58  * [4-5]: 64-bit COMPUTE ring pipeline pointer
59  * [6-7]: Vertex descriptors pointer
60  * [8-9]: 64-bit Vertex prolog pointer
61  * [10-11]: 64-bit descriptor set #0 pointer
62  * ...
63  * [72-73]: 64-bit descriptor set #31 pointer
64  */
65 
66 bool
radv_init_trace(struct radv_device * device)67 radv_init_trace(struct radv_device *device)
68 {
69    struct radeon_winsys *ws = device->ws;
70    VkResult result;
71 
72    result = ws->buffer_create(
73       ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
74       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_VA_UNCACHED,
75       RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
76    if (result != VK_SUCCESS)
77       return false;
78 
79    result = ws->buffer_make_resident(ws, device->trace_bo, true);
80    if (result != VK_SUCCESS)
81       return false;
82 
83    device->trace_id_ptr = ws->buffer_map(device->trace_bo);
84    if (!device->trace_id_ptr)
85       return false;
86 
87    return true;
88 }
89 
90 void
radv_finish_trace(struct radv_device * device)91 radv_finish_trace(struct radv_device *device)
92 {
93    struct radeon_winsys *ws = device->ws;
94 
95    if (unlikely(device->trace_bo)) {
96       ws->buffer_make_resident(ws, device->trace_bo, false);
97       ws->buffer_destroy(ws, device->trace_bo);
98    }
99 }
100 
101 static void
radv_dump_trace(const struct radv_device * device,struct radeon_cmdbuf * cs,FILE * f)102 radv_dump_trace(const struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
103 {
104    fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
105    device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2, RADV_CS_DUMP_TYPE_IBS);
106 }
107 
108 static void
radv_dump_mmapped_reg(const struct radv_device * device,FILE * f,unsigned offset)109 radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset)
110 {
111    struct radeon_winsys *ws = device->ws;
112    uint32_t value;
113 
114    if (ws->read_registers(ws, offset, 1, &value))
115       ac_dump_reg(f, device->physical_device->rad_info.gfx_level, device->physical_device->rad_info.family, offset,
116                   value, ~0);
117 }
118 
119 static void
radv_dump_debug_registers(const struct radv_device * device,FILE * f)120 radv_dump_debug_registers(const struct radv_device *device, FILE *f)
121 {
122    const struct radeon_info *info = &device->physical_device->rad_info;
123 
124    fprintf(f, "Memory-mapped registers:\n");
125    radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
126 
127    radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
128    radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
129    radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
130    radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
131    radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
132    radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
133    radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
134    if (info->gfx_level <= GFX8) {
135       radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
136       radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
137       radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
138    }
139    radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
140    radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
141    radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
142    radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
143    radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
144    radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
145    radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
146    radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
147    radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
148    radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
149    fprintf(f, "\n");
150 }
151 
152 static void
radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)153 radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
154 {
155    fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
156    for (unsigned j = 0; j < 4; j++)
157       ac_dump_reg(f, gfx_level, family, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
158 }
159 
160 static void
radv_dump_image_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)161 radv_dump_image_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
162 {
163    unsigned sq_img_rsrc_word0 = gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
164 
165    fprintf(f, COLOR_CYAN "    Image:" COLOR_RESET "\n");
166    for (unsigned j = 0; j < 8; j++)
167       ac_dump_reg(f, gfx_level, family, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
168 
169    fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
170    for (unsigned j = 0; j < 8; j++)
171       ac_dump_reg(f, gfx_level, family, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
172 }
173 
174 static void
radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)175 radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family, const uint32_t *desc, FILE *f)
176 {
177    fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
178    for (unsigned j = 0; j < 4; j++) {
179       ac_dump_reg(f, gfx_level, family, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
180    }
181 }
182 
183 static void
radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level,enum radeon_family family,const uint32_t * desc,FILE * f)184 radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, enum radeon_family family,
185                                             const uint32_t *desc, FILE *f)
186 {
187    radv_dump_image_descriptor(gfx_level, family, desc, f);
188    radv_dump_sampler_descriptor(gfx_level, family, desc + 16, f);
189 }
190 
191 static void
radv_dump_descriptor_set(const struct radv_device * device,const struct radv_descriptor_set * set,unsigned id,FILE * f)192 radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f)
193 {
194    enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
195    enum radeon_family family = device->physical_device->rad_info.family;
196    const struct radv_descriptor_set_layout *layout;
197    int i;
198 
199    if (!set)
200       return;
201    layout = set->header.layout;
202 
203    for (i = 0; i < set->header.layout->binding_count; i++) {
204       uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
205 
206       switch (layout->binding[i].type) {
207       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
208       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
209       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
210       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
211          radv_dump_buffer_descriptor(gfx_level, family, desc, f);
212          break;
213       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
214       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
215       case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
216          radv_dump_image_descriptor(gfx_level, family, desc, f);
217          break;
218       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
219          radv_dump_combined_image_sampler_descriptor(gfx_level, family, desc, f);
220          break;
221       case VK_DESCRIPTOR_TYPE_SAMPLER:
222          radv_dump_sampler_descriptor(gfx_level, family, desc, f);
223          break;
224       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
225       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
226       case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
227       case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
228          /* todo */
229          break;
230       default:
231          assert(!"unknown descriptor type");
232          break;
233       }
234       fprintf(f, "\n");
235    }
236    fprintf(f, "\n\n");
237 }
238 
239 static void
radv_dump_descriptors(struct radv_device * device,FILE * f)240 radv_dump_descriptors(struct radv_device *device, FILE *f)
241 {
242    uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
243    int i;
244 
245    fprintf(f, "Descriptors:\n");
246    for (i = 0; i < MAX_SETS; i++) {
247       struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 5);
248 
249       radv_dump_descriptor_set(device, set, i, f);
250    }
251 }
252 
253 struct radv_shader_inst {
254    char text[160];  /* one disasm line */
255    unsigned offset; /* instruction offset */
256    unsigned size;   /* instruction size = 4 or 8 */
257 };
258 
259 /* Split a disassembly string into lines and add them to the array pointed
260  * to by "instructions". */
261 static void
radv_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct radv_shader_inst * instructions)262 radv_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, struct radv_shader_inst *instructions)
263 {
264    struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
265    char *next;
266 
267    while ((next = strchr(disasm, '\n'))) {
268       struct radv_shader_inst *inst = &instructions[*num];
269       unsigned len = next - disasm;
270 
271       if (!memchr(disasm, ';', len)) {
272          /* Ignore everything that is not an instruction. */
273          disasm = next + 1;
274          continue;
275       }
276 
277       assert(len < ARRAY_SIZE(inst->text));
278       memcpy(inst->text, disasm, len);
279       inst->text[len] = 0;
280       inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
281 
282       const char *semicolon = strchr(disasm, ';');
283       assert(semicolon);
284       /* More than 16 chars after ";" means the instruction is 8 bytes long. */
285       inst->size = next - semicolon > 16 ? 8 : 4;
286 
287       snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, " [PC=0x%" PRIx64 ", off=%u, size=%u]",
288                start_addr + inst->offset, inst->offset, inst->size);
289 
290       last_inst = inst;
291       (*num)++;
292       disasm = next + 1;
293    }
294 }
295 
296 static void
radv_dump_annotated_shader(const struct radv_shader * shader,gl_shader_stage stage,struct ac_wave_info * waves,unsigned num_waves,FILE * f)297 radv_dump_annotated_shader(const struct radv_shader *shader, gl_shader_stage stage, struct ac_wave_info *waves,
298                            unsigned num_waves, FILE *f)
299 {
300    uint64_t start_addr, end_addr;
301    unsigned i;
302 
303    if (!shader)
304       return;
305 
306    start_addr = radv_shader_get_va(shader);
307    end_addr = start_addr + shader->code_size;
308 
309    /* See if any wave executes the shader. */
310    for (i = 0; i < num_waves; i++) {
311       if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
312          break;
313    }
314 
315    if (i == num_waves)
316       return; /* the shader is not being executed */
317 
318    /* Remember the first found wave. The waves are sorted according to PC. */
319    waves = &waves[i];
320    num_waves -= i;
321 
322    /* Get the list of instructions.
323     * Buffer size / 4 is the upper bound of the instruction count.
324     */
325    unsigned num_inst = 0;
326    struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
327 
328    radv_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
329 
330    fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", radv_get_shader_name(&shader->info, stage));
331 
332    /* Print instructions with annotations. */
333    for (i = 0; i < num_inst; i++) {
334       struct radv_shader_inst *inst = &instructions[i];
335 
336       fprintf(f, "%s\n", inst->text);
337 
338       /* Print which waves execute the instruction right now. */
339       while (num_waves && start_addr + inst->offset == waves->pc) {
340          fprintf(f,
341                  "          " COLOR_GREEN "^ SE%u SH%u CU%u "
342                  "SIMD%u WAVE%u  EXEC=%016" PRIx64 "  ",
343                  waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
344 
345          if (inst->size == 4) {
346             fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
347          } else {
348             fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
349          }
350 
351          waves->matched = true;
352          waves = &waves[1];
353          num_waves--;
354       }
355    }
356 
357    fprintf(f, "\n\n");
358    free(instructions);
359 }
360 
361 static void
radv_dump_spirv(const struct radv_shader * shader,const char * sha1,const char * dump_dir)362 radv_dump_spirv(const struct radv_shader *shader, const char *sha1, const char *dump_dir)
363 {
364    char dump_path[512];
365    FILE *f;
366 
367    snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
368 
369    f = fopen(dump_path, "w+");
370    if (f) {
371       fwrite(shader->spirv, shader->spirv_size, 1, f);
372       fclose(f);
373    }
374 }
375 
376 static void
radv_dump_shader(struct radv_device * device,struct radv_pipeline * pipeline,struct radv_shader * shader,gl_shader_stage stage,const char * dump_dir,FILE * f)377 radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
378                  gl_shader_stage stage, const char *dump_dir, FILE *f)
379 {
380    if (!shader)
381       return;
382 
383    fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
384 
385    if (shader->spirv) {
386       unsigned char sha1[21];
387       char sha1buf[41];
388 
389       _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
390       _mesa_sha1_format(sha1buf, sha1);
391 
392       if (device->vk.enabled_features.deviceFaultVendorBinary) {
393          radv_print_spirv(shader->spirv, shader->spirv_size, f);
394       } else {
395          fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
396          radv_dump_spirv(shader, sha1buf, dump_dir);
397       }
398    }
399 
400    if (shader->nir_string) {
401       fprintf(f, "NIR:\n%s\n", shader->nir_string);
402    }
403 
404    fprintf(f, "%s IR:\n%s\n", device->physical_device->use_llvm ? "LLVM" : "ACO", shader->ir_string);
405    fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
406 
407    radv_dump_shader_stats(device, pipeline, shader, stage, f);
408 }
409 
410 static void
radv_dump_vertex_descriptors(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,FILE * f)411 radv_dump_vertex_descriptors(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f)
412 {
413    struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
414    uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
415    uint32_t count = util_bitcount(vs->info.vs.vb_desc_usage_mask);
416    uint32_t *vb_ptr = *(uint32_t **)(ptr + 3);
417 
418    if (!count)
419       return;
420 
421    fprintf(f, "Num vertex %s: %d\n", vs->info.vs.use_per_attribute_vb_descs ? "attributes" : "bindings", count);
422    for (uint32_t i = 0; i < count; i++) {
423       uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
424       uint64_t va = 0;
425 
426       va |= desc[0];
427       va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
428 
429       fprintf(f, "VBO#%d:\n", i);
430       fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
431       fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
432       fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
433    }
434 }
435 
436 static struct radv_shader_part *
radv_get_saved_vs_prolog(const struct radv_device * device)437 radv_get_saved_vs_prolog(const struct radv_device *device)
438 {
439    uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
440    return *(struct radv_shader_part **)(ptr + 4);
441 }
442 
443 static void
radv_dump_vs_prolog(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,FILE * f)444 radv_dump_vs_prolog(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, FILE *f)
445 {
446    struct radv_shader_part *vs_prolog = radv_get_saved_vs_prolog(device);
447    struct radv_shader *vs_shader = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
448 
449    if (!vs_prolog || !vs_shader || !vs_shader->info.vs.has_prolog)
450       return;
451 
452    fprintf(f, "Vertex prolog:\n\n");
453    fprintf(f, "DISASM:\n%s\n", vs_prolog->disasm_string);
454 }
455 
456 static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device * device,enum amd_ip_type ring)457 radv_get_saved_pipeline(struct radv_device *device, enum amd_ip_type ring)
458 {
459    uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
460    int offset = ring == AMD_IP_GFX ? 1 : 2;
461 
462    return *(struct radv_pipeline **)(ptr + offset);
463 }
464 
465 static void
radv_dump_queue_state(struct radv_queue * queue,const char * dump_dir,FILE * f)466 radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
467 {
468    struct radv_device *device = queue->device;
469    enum amd_ip_type ring = radv_queue_ring(queue);
470    struct radv_pipeline *pipeline;
471 
472    fprintf(f, "AMD_IP_%s:\n", ring == AMD_IP_GFX ? "GFX" : "COMPUTE");
473 
474    pipeline = radv_get_saved_pipeline(queue->device, ring);
475    if (pipeline) {
476       fprintf(f, "Pipeline hash: %" PRIx64 "\n", pipeline->pipeline_hash);
477 
478       if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
479          struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
480 
481          radv_dump_vs_prolog(device, graphics_pipeline, f);
482 
483          /* Dump active graphics shaders. */
484          unsigned stages = graphics_pipeline->active_stages;
485          while (stages) {
486             int stage = u_bit_scan(&stages);
487 
488             radv_dump_shader(device, &graphics_pipeline->base, graphics_pipeline->base.shaders[stage], stage, dump_dir,
489                              f);
490          }
491       } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
492          struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
493          for (unsigned i = 0; i < rt_pipeline->stage_count; i++) {
494             struct radv_shader *shader = rt_pipeline->stages[i].shader;
495             if (shader)
496                radv_dump_shader(device, pipeline, shader, shader->info.stage, dump_dir, f);
497          }
498          radv_dump_shader(device, pipeline, pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION,
499                           dump_dir, f);
500       } else {
501          struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
502 
503          radv_dump_shader(device, &compute_pipeline->base, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE],
504                           MESA_SHADER_COMPUTE, dump_dir, f);
505       }
506 
507       if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
508          struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
509          enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
510          unsigned num_waves = ac_get_wave_info(gfx_level, &device->physical_device->rad_info, waves);
511 
512          fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
513 
514          if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
515             struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
516 
517             /* Dump annotated active graphics shaders. */
518             unsigned stages = graphics_pipeline->active_stages;
519             while (stages) {
520                int stage = u_bit_scan(&stages);
521 
522                radv_dump_annotated_shader(graphics_pipeline->base.shaders[stage], stage, waves, num_waves, f);
523             }
524          } else if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
525             struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
526             for (unsigned i = 0; i < rt_pipeline->stage_count; i++) {
527                struct radv_shader *shader = rt_pipeline->stages[i].shader;
528                if (shader)
529                   radv_dump_annotated_shader(shader, shader->info.stage, waves, num_waves, f);
530             }
531             radv_dump_annotated_shader(pipeline->shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION, waves,
532                                        num_waves, f);
533          } else {
534             struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
535 
536             radv_dump_annotated_shader(compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE, waves,
537                                        num_waves, f);
538          }
539 
540          /* Print waves executing shaders that are not currently bound. */
541          unsigned i;
542          bool found = false;
543          for (i = 0; i < num_waves; i++) {
544             if (waves[i].matched)
545                continue;
546 
547             if (!found) {
548                fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
549                found = true;
550             }
551             fprintf(f, "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016" PRIx64 "  INST=%08X %08X  PC=%" PRIx64 "\n",
552                     waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
553                     waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
554          }
555          if (found)
556             fprintf(f, "\n\n");
557       }
558 
559       if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
560          struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
561          radv_dump_vertex_descriptors(device, graphics_pipeline, f);
562       }
563       radv_dump_descriptors(queue->device, f);
564    }
565 }
566 
567 static void
radv_dump_cmd(const char * cmd,FILE * f)568 radv_dump_cmd(const char *cmd, FILE *f)
569 {
570 #ifndef _WIN32
571    char line[2048];
572    FILE *p;
573 
574    p = popen(cmd, "r");
575    if (p) {
576       while (fgets(line, sizeof(line), p))
577          fputs(line, f);
578       fprintf(f, "\n");
579       pclose(p);
580    }
581 #endif
582 }
583 
584 static void
radv_dump_dmesg(FILE * f)585 radv_dump_dmesg(FILE *f)
586 {
587    fprintf(f, "\nLast 60 lines of dmesg:\n\n");
588    radv_dump_cmd("dmesg | tail -n60", f);
589 }
590 
591 void
radv_dump_enabled_options(const struct radv_device * device,FILE * f)592 radv_dump_enabled_options(const struct radv_device *device, FILE *f)
593 {
594    uint64_t mask;
595 
596    if (device->instance->debug_flags) {
597       fprintf(f, "Enabled debug options: ");
598 
599       mask = device->instance->debug_flags;
600       while (mask) {
601          int i = u_bit_scan64(&mask);
602          fprintf(f, "%s, ", radv_get_debug_option_name(i));
603       }
604       fprintf(f, "\n");
605    }
606 
607    if (device->instance->perftest_flags) {
608       fprintf(f, "Enabled perftest options: ");
609 
610       mask = device->instance->perftest_flags;
611       while (mask) {
612          int i = u_bit_scan64(&mask);
613          fprintf(f, "%s, ", radv_get_perftest_option_name(i));
614       }
615       fprintf(f, "\n");
616    }
617 }
618 
619 static void
radv_dump_app_info(const struct radv_device * device,FILE * f)620 radv_dump_app_info(const struct radv_device *device, FILE *f)
621 {
622    const struct radv_instance *instance = device->instance;
623 
624    fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
625    fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
626    fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
627    fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
628    fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
629            VK_VERSION_MINOR(instance->vk.app_info.api_version), VK_VERSION_PATCH(instance->vk.app_info.api_version));
630 
631    radv_dump_enabled_options(device, f);
632 }
633 
634 static void
radv_dump_device_name(const struct radv_device * device,FILE * f)635 radv_dump_device_name(const struct radv_device *device, FILE *f)
636 {
637    const struct radeon_info *info = &device->physical_device->rad_info;
638 #ifndef _WIN32
639    char kernel_version[128] = {0};
640    struct utsname uname_data;
641 #endif
642 
643 #ifdef _WIN32
644    fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, info->drm_major,
645            info->drm_minor, info->drm_patchlevel);
646 #else
647    if (uname(&uname_data) == 0)
648       snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
649 
650    fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, info->drm_major,
651            info->drm_minor, info->drm_patchlevel, kernel_version);
652 #endif
653 }
654 
655 static void
radv_dump_umr_ring(const struct radv_queue * queue,FILE * f)656 radv_dump_umr_ring(const struct radv_queue *queue, FILE *f)
657 {
658 #ifndef _WIN32
659    const enum amd_ip_type ring = radv_queue_ring(queue);
660    const struct radv_device *device = queue->device;
661    char cmd[256];
662 
663    /* TODO: Dump compute ring. */
664    if (ring != AMD_IP_GFX)
665       return;
666 
667    sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", device->physical_device->bus_info.domain,
668            device->physical_device->bus_info.bus, device->physical_device->bus_info.dev,
669            device->physical_device->bus_info.func,
670            device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
671    fprintf(f, "\nUMR GFX ring:\n\n");
672    radv_dump_cmd(cmd, f);
673 #endif
674 }
675 
676 static void
radv_dump_umr_waves(struct radv_queue * queue,FILE * f)677 radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
678 {
679 #ifndef _WIN32
680    enum amd_ip_type ring = radv_queue_ring(queue);
681    struct radv_device *device = queue->device;
682    char cmd[256];
683 
684    /* TODO: Dump compute ring. */
685    if (ring != AMD_IP_GFX)
686       return;
687 
688    sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -O bits,halt_waves -go 0 -wa %s -go 1 2>&1",
689            device->physical_device->bus_info.domain, device->physical_device->bus_info.bus,
690            device->physical_device->bus_info.dev, device->physical_device->bus_info.func,
691            device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
692    fprintf(f, "\nUMR GFX waves:\n\n");
693    radv_dump_cmd(cmd, f);
694 #endif
695 }
696 
697 static bool
radv_gpu_hang_occurred(struct radv_queue * queue,enum amd_ip_type ring)698 radv_gpu_hang_occurred(struct radv_queue *queue, enum amd_ip_type ring)
699 {
700    struct radeon_winsys *ws = queue->device->ws;
701 
702    if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
703       return true;
704 
705    return false;
706 }
707 
708 bool
radv_vm_fault_occurred(struct radv_device * device,struct radv_winsys_gpuvm_fault_info * fault_info)709 radv_vm_fault_occurred(struct radv_device *device, struct radv_winsys_gpuvm_fault_info *fault_info)
710 {
711    if (!device->physical_device->rad_info.has_gpuvm_fault_query)
712       return false;
713 
714    return device->ws->query_gpuvm_fault(device->ws, fault_info);
715 }
716 
717 enum radv_device_fault_chunk {
718    RADV_DEVICE_FAULT_CHUNK_TRACE,
719    RADV_DEVICE_FAULT_CHUNK_QUEUE_STATE,
720    RADV_DEVICE_FAULT_CHUNK_UMR_WAVES,
721    RADV_DEVICE_FAULT_CHUNK_UMR_RING,
722    RADV_DEVICE_FAULT_CHUNK_REGISTERS,
723    RADV_DEVICE_FAULT_CHUNK_BO_RANGES,
724    RADV_DEVICE_FAULT_CHUNK_BO_HISTORY,
725    RADV_DEVICE_FAULT_CHUNK_VM_FAULT,
726    RADV_DEVICE_FAULT_CHUNK_APP_INFO,
727    RADV_DEVICE_FAULT_CHUNK_GPU_INFO,
728    RADV_DEVICE_FAULT_CHUNK_DMESG,
729    RADV_DEVICE_FAULT_CHUNK_COUNT,
730 };
731 
732 void
radv_check_gpu_hangs(struct radv_queue * queue,const struct radv_winsys_submit_info * submit_info)733 radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_info *submit_info)
734 {
735    enum amd_ip_type ring;
736 
737    ring = radv_queue_ring(queue);
738 
739    bool hang_occurred = radv_gpu_hang_occurred(queue, ring);
740    if (!hang_occurred)
741       return;
742 
743    fprintf(stderr, "radv: GPU hang detected...\n");
744 
745 #ifndef _WIN32
746    const bool save_hang_report = !queue->device->vk.enabled_features.deviceFaultVendorBinary;
747    struct radv_winsys_gpuvm_fault_info fault_info = {0};
748    struct radv_device *device = queue->device;
749 
750    /* Query if a VM fault happened for this GPU hang. */
751    bool vm_fault_occurred = radv_vm_fault_occurred(queue->device, &fault_info);
752 
753    /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
754     * various debugging info about that GPU hang.
755     */
756    struct tm *timep, result;
757    time_t raw_time;
758    FILE *f;
759    char dump_dir[256], dump_path[512], buf_time[128];
760 
761    if (save_hang_report) {
762       time(&raw_time);
763       timep = os_localtime(&raw_time, &result);
764       strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
765 
766       snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."), getpid(),
767                buf_time);
768       if (mkdir(dump_dir, 0774) && errno != EEXIST) {
769          fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
770          abort();
771       }
772 
773       fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
774    }
775 
776    struct {
777       const char *name;
778       char *ptr;
779       size_t size;
780    } chunks[RADV_DEVICE_FAULT_CHUNK_COUNT] = {
781       {"trace"},      {"pipeline"}, {"umr_waves"}, {"umr_ring"}, {"registers"}, {"bo_ranges"},
782       {"bo_history"}, {"vm_fault"}, {"app_info"},  {"gpu_info"}, {"dmesg"},
783    };
784 
785    for (uint32_t i = 0; i < RADV_DEVICE_FAULT_CHUNK_COUNT; i++) {
786 
787       if (save_hang_report) {
788          snprintf(dump_path, sizeof(dump_path), "%s/%s.log", dump_dir, chunks[i].name);
789 
790          f = fopen(dump_path, "w+");
791       } else {
792          f = open_memstream(&chunks[i].ptr, &chunks[i].size);
793       }
794 
795       if (!f)
796          continue;
797 
798       switch (i) {
799       case RADV_DEVICE_FAULT_CHUNK_TRACE:
800          radv_dump_trace(queue->device, submit_info->cs_array[0], f);
801          break;
802       case RADV_DEVICE_FAULT_CHUNK_QUEUE_STATE:
803          radv_dump_queue_state(queue, dump_dir, f);
804          break;
805       case RADV_DEVICE_FAULT_CHUNK_UMR_WAVES:
806          if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR))
807             radv_dump_umr_waves(queue, f);
808          break;
809       case RADV_DEVICE_FAULT_CHUNK_UMR_RING:
810          if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR))
811             radv_dump_umr_ring(queue, f);
812          break;
813       case RADV_DEVICE_FAULT_CHUNK_REGISTERS:
814          radv_dump_debug_registers(device, f);
815          break;
816       case RADV_DEVICE_FAULT_CHUNK_BO_RANGES:
817          device->ws->dump_bo_ranges(device->ws, f);
818          break;
819       case RADV_DEVICE_FAULT_CHUNK_BO_HISTORY:
820          device->ws->dump_bo_log(device->ws, f);
821          break;
822       case RADV_DEVICE_FAULT_CHUNK_VM_FAULT:
823          if (vm_fault_occurred) {
824             fprintf(f, "VM fault report.\n\n");
825             fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n", fault_info.addr);
826             ac_print_gpuvm_fault_status(f, device->physical_device->rad_info.gfx_level, fault_info.status);
827          }
828          break;
829       case RADV_DEVICE_FAULT_CHUNK_APP_INFO:
830          radv_dump_app_info(device, f);
831          break;
832       case RADV_DEVICE_FAULT_CHUNK_GPU_INFO:
833          radv_dump_device_name(device, f);
834          ac_print_gpu_info(&device->physical_device->rad_info, f);
835          break;
836       case RADV_DEVICE_FAULT_CHUNK_DMESG:
837          radv_dump_dmesg(f);
838          break;
839       default:
840          break;
841       }
842 
843       fclose(f);
844    }
845 
846    if (save_hang_report) {
847       fprintf(stderr, "radv: GPU hang report saved successfully!\n");
848       abort();
849    } else {
850       char *report;
851 
852       report = ralloc_strdup(NULL, "========== RADV GPU hang report ==========\n");
853       for (uint32_t i = 0; i < RADV_DEVICE_FAULT_CHUNK_COUNT; i++) {
854          if (!chunks[i].size)
855             continue;
856 
857          ralloc_asprintf_append(&report, "\n========== %s ==========\n", chunks[i].name);
858          ralloc_asprintf_append(&report, "%s", chunks[i].ptr);
859 
860          free(chunks[i].ptr);
861       }
862 
863       device->gpu_hang_report = report;
864    }
865 
866 #endif
867 }
868 
869 void
radv_print_spirv(const char * data,uint32_t size,FILE * fp)870 radv_print_spirv(const char *data, uint32_t size, FILE *fp)
871 {
872 #ifndef _WIN32
873    char path[] = "/tmp/fileXXXXXX";
874    char command[128];
875    int fd;
876 
877    /* Dump the binary into a temporary file. */
878    fd = mkstemp(path);
879    if (fd < 0)
880       return;
881 
882    if (write(fd, data, size) == -1)
883       goto fail;
884 
885    /* Disassemble using spirv-dis if installed. */
886    sprintf(command, "spirv-dis %s", path);
887    radv_dump_cmd(command, fp);
888 
889 fail:
890    close(fd);
891    unlink(path);
892 #endif
893 }
894 
895 bool
radv_trap_handler_init(struct radv_device * device)896 radv_trap_handler_init(struct radv_device *device)
897 {
898    struct radeon_winsys *ws = device->ws;
899    VkResult result;
900 
901    /* Create the trap handler shader and upload it like other shaders. */
902    device->trap_handler_shader = radv_create_trap_handler_shader(device);
903    if (!device->trap_handler_shader) {
904       fprintf(stderr, "radv: failed to create the trap handler shader.\n");
905       return false;
906    }
907 
908    result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
909    if (result != VK_SUCCESS)
910       return false;
911 
912    result = ws->buffer_create(
913       ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
914       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
915       RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
916    if (result != VK_SUCCESS)
917       return false;
918 
919    result = ws->buffer_make_resident(ws, device->tma_bo, true);
920    if (result != VK_SUCCESS)
921       return false;
922 
923    device->tma_ptr = ws->buffer_map(device->tma_bo);
924    if (!device->tma_ptr)
925       return false;
926 
927    /* Upload a buffer descriptor to store various info from the trap. */
928    uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
929    uint32_t desc[4];
930 
931    desc[0] = tma_va;
932    desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
933    desc[2] = TMA_BO_SIZE;
934    desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
935              S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
936              S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
937 
938    memcpy(device->tma_ptr, desc, sizeof(desc));
939 
940    return true;
941 }
942 
943 void
radv_trap_handler_finish(struct radv_device * device)944 radv_trap_handler_finish(struct radv_device *device)
945 {
946    struct radeon_winsys *ws = device->ws;
947 
948    if (unlikely(device->trap_handler_shader)) {
949       ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
950       radv_shader_unref(device, device->trap_handler_shader);
951    }
952 
953    if (unlikely(device->tma_bo)) {
954       ws->buffer_make_resident(ws, device->tma_bo, false);
955       ws->buffer_destroy(ws, device->tma_bo);
956    }
957 }
958 
959 static void
radv_dump_faulty_shader(struct radv_device * device,uint64_t faulty_pc)960 radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
961 {
962    struct radv_shader *shader;
963    uint64_t start_addr, end_addr;
964    uint32_t instr_offset;
965 
966    shader = radv_find_shader(device, faulty_pc);
967    if (!shader)
968       return;
969 
970    start_addr = radv_shader_get_va(shader);
971    end_addr = start_addr + shader->code_size;
972    instr_offset = faulty_pc - start_addr;
973 
974    fprintf(stderr,
975            "Faulty shader found "
976            "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
977            start_addr, end_addr, instr_offset);
978 
979    /* Get the list of instructions.
980     * Buffer size / 4 is the upper bound of the instruction count.
981     */
982    unsigned num_inst = 0;
983    struct radv_shader_inst *instructions = calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
984 
985    /* Split the disassembly string into instructions. */
986    radv_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
987 
988    /* Print instructions with annotations. */
989    for (unsigned i = 0; i < num_inst; i++) {
990       struct radv_shader_inst *inst = &instructions[i];
991 
992       if (start_addr + inst->offset == faulty_pc) {
993          fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
994          fprintf(stderr, "%s\n", inst->text);
995          fprintf(stderr, "\n");
996       } else {
997          fprintf(stderr, "%s\n", inst->text);
998       }
999    }
1000 
1001    free(instructions);
1002 }
1003 
1004 struct radv_sq_hw_reg {
1005    uint32_t status;
1006    uint32_t trap_sts;
1007    uint32_t hw_id;
1008    uint32_t ib_sts;
1009 };
1010 
1011 static void
radv_dump_sq_hw_regs(struct radv_device * device)1012 radv_dump_sq_hw_regs(struct radv_device *device)
1013 {
1014    enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
1015    enum radeon_family family = device->physical_device->rad_info.family;
1016    struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
1017 
1018    fprintf(stderr, "\nHardware registers:\n");
1019    if (device->physical_device->rad_info.gfx_level >= GFX10) {
1020       ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS, regs->status, ~0);
1021       ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
1022       ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0);
1023       ac_dump_reg(stderr, gfx_level, family, R_00041C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
1024    } else {
1025       ac_dump_reg(stderr, gfx_level, family, R_000048_SQ_WAVE_STATUS, regs->status, ~0);
1026       ac_dump_reg(stderr, gfx_level, family, R_00004C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
1027       ac_dump_reg(stderr, gfx_level, family, R_000050_SQ_WAVE_HW_ID, regs->hw_id, ~0);
1028       ac_dump_reg(stderr, gfx_level, family, R_00005C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
1029    }
1030    fprintf(stderr, "\n\n");
1031 }
1032 
1033 void
radv_check_trap_handler(struct radv_queue * queue)1034 radv_check_trap_handler(struct radv_queue *queue)
1035 {
1036    enum amd_ip_type ring = radv_queue_ring(queue);
1037    struct radv_device *device = queue->device;
1038    struct radeon_winsys *ws = device->ws;
1039 
1040    /* Wait for the context to be idle in a finite time. */
1041    ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
1042 
1043    /* Try to detect if the trap handler has been reached by the hw by
1044     * looking at ttmp0 which should be non-zero if a shader exception
1045     * happened.
1046     */
1047    if (!device->tma_ptr[4])
1048       return;
1049 
1050 #if 0
1051 	fprintf(stderr, "tma_ptr:\n");
1052 	for (unsigned i = 0; i < 10; i++)
1053 		fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
1054 #endif
1055 
1056    radv_dump_sq_hw_regs(device);
1057 
1058    uint32_t ttmp0 = device->tma_ptr[4];
1059    uint32_t ttmp1 = device->tma_ptr[5];
1060 
1061    /* According to the ISA docs, 3.10 Trap and Exception Registers:
1062     *
1063     * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
1064     *
1065     * "When the trap handler is entered, the PC of the faulting
1066     *  instruction is: (PC - PC_rewind * 4)."
1067     * */
1068    uint8_t trap_id = (ttmp1 >> 16) & 0xff;
1069    uint8_t ht = (ttmp1 >> 24) & 0x1;
1070    uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
1071    uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
1072 
1073    fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht, pc_rewind);
1074 
1075    radv_dump_faulty_shader(device, pc);
1076 
1077    abort();
1078 }
1079 
1080 /* VK_EXT_device_fault */
1081 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetDeviceFaultInfoEXT(VkDevice _device,VkDeviceFaultCountsEXT * pFaultCounts,VkDeviceFaultInfoEXT * pFaultInfo)1082 radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCounts, VkDeviceFaultInfoEXT *pFaultInfo)
1083 {
1084    VK_OUTARRAY_MAKE_TYPED(VkDeviceFaultAddressInfoEXT, out, pFaultInfo ? pFaultInfo->pAddressInfos : NULL,
1085                           &pFaultCounts->addressInfoCount);
1086    struct radv_winsys_gpuvm_fault_info fault_info = {0};
1087    RADV_FROM_HANDLE(radv_device, device, _device);
1088    bool vm_fault_occurred = false;
1089 
1090    /* Query if a GPUVM fault happened. */
1091    vm_fault_occurred = radv_vm_fault_occurred(device, &fault_info);
1092 
1093    /* No vendor-specific crash dumps yet. */
1094    pFaultCounts->vendorInfoCount = 0;
1095    pFaultCounts->vendorBinarySize = 0;
1096 
1097    if (device->gpu_hang_report) {
1098       const struct radv_physical_device *pdevice = device->physical_device;
1099 
1100       VkDeviceFaultVendorBinaryHeaderVersionOneEXT hdr;
1101 
1102       hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT);
1103       hdr.headerVersion = VK_DEVICE_FAULT_VENDOR_BINARY_HEADER_VERSION_ONE_EXT;
1104       hdr.vendorID = pdevice->vk.properties.vendorID;
1105       hdr.deviceID = pdevice->vk.properties.deviceID;
1106       hdr.driverVersion = pdevice->vk.properties.driverVersion;
1107       memcpy(hdr.pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1108       hdr.applicationNameOffset = 0;
1109       hdr.applicationVersion = pdevice->instance->vk.app_info.app_version;
1110       hdr.engineNameOffset = 0;
1111       hdr.engineVersion = pdevice->instance->vk.app_info.engine_version;
1112       hdr.apiVersion = pdevice->instance->vk.app_info.api_version;
1113 
1114       pFaultCounts->vendorBinarySize = sizeof(hdr) + strlen(device->gpu_hang_report);
1115       if (pFaultInfo) {
1116          memcpy(pFaultInfo->pVendorBinaryData, &hdr, sizeof(hdr));
1117          memcpy((char *)pFaultInfo->pVendorBinaryData + sizeof(hdr), device->gpu_hang_report,
1118                 strlen(device->gpu_hang_report));
1119       }
1120    }
1121 
1122    if (vm_fault_occurred) {
1123       VkDeviceFaultAddressInfoEXT addr_fault_info = {
1124          .reportedAddress = fault_info.addr,
1125          .addressPrecision = 4096, /* 4K page granularity */
1126       };
1127 
1128       if (pFaultInfo)
1129          strncpy(pFaultInfo->description, "A GPUVM fault has been detected", sizeof(pFaultInfo->description));
1130 
1131       if (device->physical_device->rad_info.gfx_level >= GFX10) {
1132          addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT
1133                                                                       : VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT;
1134       } else {
1135          /* Not sure how to get the access status on GFX6-9. */
1136          addr_fault_info.addressType = VK_DEVICE_FAULT_ADDRESS_TYPE_NONE_EXT;
1137       }
1138       vk_outarray_append_typed(VkDeviceFaultAddressInfoEXT, &out, elem) *elem = addr_fault_info;
1139    }
1140 
1141    return vk_outarray_status(&out);
1142 }
1143