• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #ifndef _WIN32
31 #include <sys/utsname.h>
32 #endif
33 #include <sys/stat.h>
34 
35 #include "util/mesa-sha1.h"
36 #include "util/os_time.h"
37 #include "ac_debug.h"
38 #include "radv_debug.h"
39 #include "radv_shader.h"
40 #include "sid.h"
41 
42 #define TRACE_BO_SIZE 4096
43 #define TMA_BO_SIZE   4096
44 
45 #define COLOR_RESET  "\033[0m"
46 #define COLOR_RED    "\033[31m"
47 #define COLOR_GREEN  "\033[1;32m"
48 #define COLOR_YELLOW "\033[1;33m"
49 #define COLOR_CYAN   "\033[1;36m"
50 
51 #define RADV_DUMP_DIR "radv_dumps"
52 
53 /* Trace BO layout (offsets are 4 bytes):
54  *
55  * [0]: primary trace ID
56  * [1]: secondary trace ID
57  * [2-3]: 64-bit GFX ring pipeline pointer
58  * [4-5]: 64-bit COMPUTE ring pipeline pointer
59  * [6-7]: Vertex descriptors pointer
60  * [8-9]: 64-bit Vertex prolog pointer
61  * [10-11]: 64-bit descriptor set #0 pointer
62  * ...
63  * [72-73]: 64-bit descriptor set #31 pointer
64  */
65 
66 bool
radv_init_trace(struct radv_device * device)67 radv_init_trace(struct radv_device *device)
68 {
69    struct radeon_winsys *ws = device->ws;
70    VkResult result;
71 
72    result = ws->buffer_create(
73       ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
74       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM |
75       RADEON_FLAG_VA_UNCACHED, RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
76    if (result != VK_SUCCESS)
77       return false;
78 
79    result = ws->buffer_make_resident(ws, device->trace_bo, true);
80    if (result != VK_SUCCESS)
81       return false;
82 
83    device->trace_id_ptr = ws->buffer_map(device->trace_bo);
84    if (!device->trace_id_ptr)
85       return false;
86 
87    ac_vm_fault_occured(device->physical_device->rad_info.gfx_level, &device->dmesg_timestamp, NULL);
88 
89    return true;
90 }
91 
92 void
radv_finish_trace(struct radv_device * device)93 radv_finish_trace(struct radv_device *device)
94 {
95    struct radeon_winsys *ws = device->ws;
96 
97    if (unlikely(device->trace_bo)) {
98       ws->buffer_make_resident(ws, device->trace_bo, false);
99       ws->buffer_destroy(ws, device->trace_bo);
100    }
101 }
102 
103 static void
radv_dump_trace(struct radv_device * device,struct radeon_cmdbuf * cs,FILE * f)104 radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
105 {
106    fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
107    device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);
108 }
109 
110 static void
radv_dump_mmapped_reg(struct radv_device * device,FILE * f,unsigned offset)111 radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
112 {
113    struct radeon_winsys *ws = device->ws;
114    uint32_t value;
115 
116    if (ws->read_registers(ws, offset, 1, &value))
117       ac_dump_reg(f, device->physical_device->rad_info.gfx_level, offset, value, ~0);
118 }
119 
120 static void
radv_dump_debug_registers(struct radv_device * device,FILE * f)121 radv_dump_debug_registers(struct radv_device *device, FILE *f)
122 {
123    struct radeon_info *info = &device->physical_device->rad_info;
124 
125    fprintf(f, "Memory-mapped registers:\n");
126    radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
127 
128    radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
129    radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
130    radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
131    radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
132    radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
133    radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
134    radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
135    if (info->gfx_level <= GFX8) {
136       radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
137       radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
138       radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
139    }
140    radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
141    radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
142    radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
143    radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
144    radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
145    radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
146    radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
147    radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
148    radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
149    radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
150    fprintf(f, "\n");
151 }
152 
153 static void
radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level,const uint32_t * desc,FILE * f)154 radv_dump_buffer_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f)
155 {
156    fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
157    for (unsigned j = 0; j < 4; j++)
158       ac_dump_reg(f, gfx_level, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
159 }
160 
161 static void
radv_dump_image_descriptor(enum amd_gfx_level gfx_level,const uint32_t * desc,FILE * f)162 radv_dump_image_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f)
163 {
164    unsigned sq_img_rsrc_word0 =
165       gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
166 
167    fprintf(f, COLOR_CYAN "    Image:" COLOR_RESET "\n");
168    for (unsigned j = 0; j < 8; j++)
169       ac_dump_reg(f, gfx_level, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
170 
171    fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
172    for (unsigned j = 0; j < 8; j++)
173       ac_dump_reg(f, gfx_level, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
174 }
175 
176 static void
radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level,const uint32_t * desc,FILE * f)177 radv_dump_sampler_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc, FILE *f)
178 {
179    fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
180    for (unsigned j = 0; j < 4; j++) {
181       ac_dump_reg(f, gfx_level, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
182    }
183 }
184 
185 static void
radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level,const uint32_t * desc,FILE * f)186 radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, const uint32_t *desc,
187                                             FILE *f)
188 {
189    radv_dump_image_descriptor(gfx_level, desc, f);
190    radv_dump_sampler_descriptor(gfx_level, desc + 16, f);
191 }
192 
193 static void
radv_dump_descriptor_set(struct radv_device * device,struct radv_descriptor_set * set,unsigned id,FILE * f)194 radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
195                          FILE *f)
196 {
197    enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
198    const struct radv_descriptor_set_layout *layout;
199    int i;
200 
201    if (!set)
202       return;
203    layout = set->header.layout;
204 
205    for (i = 0; i < set->header.layout->binding_count; i++) {
206       uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
207 
208       switch (layout->binding[i].type) {
209       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
210       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
211       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
212       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
213          radv_dump_buffer_descriptor(gfx_level, desc, f);
214          break;
215       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
216       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
217       case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
218          radv_dump_image_descriptor(gfx_level, desc, f);
219          break;
220       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
221          radv_dump_combined_image_sampler_descriptor(gfx_level, desc, f);
222          break;
223       case VK_DESCRIPTOR_TYPE_SAMPLER:
224          radv_dump_sampler_descriptor(gfx_level, desc, f);
225          break;
226       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
227       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
228       case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
229       case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
230          /* todo */
231          break;
232       default:
233          assert(!"unknown descriptor type");
234          break;
235       }
236       fprintf(f, "\n");
237    }
238    fprintf(f, "\n\n");
239 }
240 
241 static void
radv_dump_descriptors(struct radv_device * device,FILE * f)242 radv_dump_descriptors(struct radv_device *device, FILE *f)
243 {
244    uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
245    int i;
246 
247    fprintf(f, "Descriptors:\n");
248    for (i = 0; i < MAX_SETS; i++) {
249       struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 5);
250 
251       radv_dump_descriptor_set(device, set, i, f);
252    }
253 }
254 
255 struct radv_shader_inst {
256    char text[160];  /* one disasm line */
257    unsigned offset; /* instruction offset */
258    unsigned size;   /* instruction size = 4 or 8 */
259 };
260 
261 /* Split a disassembly string into lines and add them to the array pointed
262  * to by "instructions". */
263 static void
si_add_split_disasm(const char * disasm,uint64_t start_addr,unsigned * num,struct radv_shader_inst * instructions)264 si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
265                     struct radv_shader_inst *instructions)
266 {
267    struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
268    char *next;
269 
270    while ((next = strchr(disasm, '\n'))) {
271       struct radv_shader_inst *inst = &instructions[*num];
272       unsigned len = next - disasm;
273 
274       if (!memchr(disasm, ';', len)) {
275          /* Ignore everything that is not an instruction. */
276          disasm = next + 1;
277          continue;
278       }
279 
280       assert(len < ARRAY_SIZE(inst->text));
281       memcpy(inst->text, disasm, len);
282       inst->text[len] = 0;
283       inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
284 
285       const char *semicolon = strchr(disasm, ';');
286       assert(semicolon);
287       /* More than 16 chars after ";" means the instruction is 8 bytes long. */
288       inst->size = next - semicolon > 16 ? 8 : 4;
289 
290       snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
291                " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
292                inst->size);
293 
294       last_inst = inst;
295       (*num)++;
296       disasm = next + 1;
297    }
298 }
299 
300 static void
radv_dump_annotated_shader(struct radv_shader * shader,gl_shader_stage stage,struct ac_wave_info * waves,unsigned num_waves,FILE * f)301 radv_dump_annotated_shader(struct radv_shader *shader, gl_shader_stage stage,
302                            struct ac_wave_info *waves, unsigned num_waves, FILE *f)
303 {
304    uint64_t start_addr, end_addr;
305    unsigned i;
306 
307    if (!shader)
308       return;
309 
310    start_addr = radv_shader_get_va(shader);
311    end_addr = start_addr + shader->code_size;
312 
313    /* See if any wave executes the shader. */
314    for (i = 0; i < num_waves; i++) {
315       if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
316          break;
317    }
318 
319    if (i == num_waves)
320       return; /* the shader is not being executed */
321 
322    /* Remember the first found wave. The waves are sorted according to PC. */
323    waves = &waves[i];
324    num_waves -= i;
325 
326    /* Get the list of instructions.
327     * Buffer size / 4 is the upper bound of the instruction count.
328     */
329    unsigned num_inst = 0;
330    struct radv_shader_inst *instructions =
331       calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
332 
333    si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
334 
335    fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
336            radv_get_shader_name(&shader->info, stage));
337 
338    /* Print instructions with annotations. */
339    for (i = 0; i < num_inst; i++) {
340       struct radv_shader_inst *inst = &instructions[i];
341 
342       fprintf(f, "%s\n", inst->text);
343 
344       /* Print which waves execute the instruction right now. */
345       while (num_waves && start_addr + inst->offset == waves->pc) {
346          fprintf(f,
347                  "          " COLOR_GREEN "^ SE%u SH%u CU%u "
348                  "SIMD%u WAVE%u  EXEC=%016" PRIx64 "  ",
349                  waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
350 
351          if (inst->size == 4) {
352             fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
353          } else {
354             fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
355          }
356 
357          waves->matched = true;
358          waves = &waves[1];
359          num_waves--;
360       }
361    }
362 
363    fprintf(f, "\n\n");
364    free(instructions);
365 }
366 
367 static void
radv_dump_annotated_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,FILE * f)368 radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
369                             FILE *f)
370 {
371    struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
372    enum amd_gfx_level gfx_level = pipeline->device->physical_device->rad_info.gfx_level;
373    unsigned num_waves = ac_get_wave_info(gfx_level, waves);
374 
375    fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
376 
377    /* Dump annotated active graphics shaders. */
378    unsigned stages = active_stages;
379    while (stages) {
380       int stage = u_bit_scan(&stages);
381 
382       radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);
383    }
384 
385    /* Print waves executing shaders that are not currently bound. */
386    unsigned i;
387    bool found = false;
388    for (i = 0; i < num_waves; i++) {
389       if (waves[i].matched)
390          continue;
391 
392       if (!found) {
393          fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
394          found = true;
395       }
396       fprintf(f,
397               "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016" PRIx64 "  INST=%08X %08X  PC=%" PRIx64
398               "\n",
399               waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
400               waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
401    }
402    if (found)
403       fprintf(f, "\n\n");
404 }
405 
406 static void
radv_dump_spirv(struct radv_shader * shader,const char * sha1,const char * dump_dir)407 radv_dump_spirv(struct radv_shader *shader, const char *sha1, const char *dump_dir)
408 {
409    char dump_path[512];
410    FILE *f;
411 
412    snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
413 
414    f = fopen(dump_path, "w+");
415    if (f) {
416       fwrite(shader->spirv, shader->spirv_size, 1, f);
417       fclose(f);
418    }
419 }
420 
421 static void
radv_dump_shader(struct radv_pipeline * pipeline,struct radv_shader * shader,gl_shader_stage stage,const char * dump_dir,FILE * f)422 radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader *shader,
423                  gl_shader_stage stage, const char *dump_dir, FILE *f)
424 {
425    if (!shader)
426       return;
427 
428    fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
429 
430    if (shader->spirv) {
431       unsigned char sha1[21];
432       char sha1buf[41];
433 
434       _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
435       _mesa_sha1_format(sha1buf, sha1);
436 
437       fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
438       radv_dump_spirv(shader, sha1buf, dump_dir);
439    }
440 
441    if (shader->nir_string) {
442       fprintf(f, "NIR:\n%s\n", shader->nir_string);
443    }
444 
445    fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
446            shader->ir_string);
447    fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
448 
449    radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
450 }
451 
452 static void
radv_dump_shaders(struct radv_pipeline * pipeline,VkShaderStageFlagBits active_stages,const char * dump_dir,FILE * f)453 radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
454                   const char *dump_dir, FILE *f)
455 {
456    /* Dump active graphics shaders. */
457    unsigned stages = active_stages;
458    while (stages) {
459       int stage = u_bit_scan(&stages);
460 
461       radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f);
462    }
463 }
464 
465 static void
radv_dump_vertex_descriptors(struct radv_graphics_pipeline * pipeline,FILE * f)466 radv_dump_vertex_descriptors(struct radv_graphics_pipeline *pipeline, FILE *f)
467 {
468    void *ptr = (uint64_t *)pipeline->base.device->trace_id_ptr;
469    uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask);
470    uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
471 
472    if (!count)
473       return;
474 
475    fprintf(f, "Num vertex %s: %d\n",
476            pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count);
477    for (uint32_t i = 0; i < count; i++) {
478       uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
479       uint64_t va = 0;
480 
481       va |= desc[0];
482       va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
483 
484       fprintf(f, "VBO#%d:\n", i);
485       fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
486       fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
487       fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
488    }
489 }
490 
491 static struct radv_shader_part *
radv_get_saved_vs_prolog(struct radv_device * device)492 radv_get_saved_vs_prolog(struct radv_device *device)
493 {
494    uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
495    return *(struct radv_shader_part **)(ptr + 4);
496 }
497 
498 static void
radv_dump_vs_prolog(struct radv_pipeline * pipeline,FILE * f)499 radv_dump_vs_prolog(struct radv_pipeline *pipeline, FILE *f)
500 {
501    struct radv_shader_part *vs_prolog = radv_get_saved_vs_prolog(pipeline->device);
502    struct radv_shader *vs_shader = radv_get_shader(pipeline, MESA_SHADER_VERTEX);
503 
504    if (!vs_prolog || !vs_shader || !vs_shader->info.vs.has_prolog)
505       return;
506 
507    fprintf(f, "Vertex prolog:\n\n");
508    fprintf(f, "DISASM:\n%s\n", vs_prolog->disasm_string);
509 }
510 
511 static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device * device,enum amd_ip_type ring)512 radv_get_saved_pipeline(struct radv_device *device, enum amd_ip_type ring)
513 {
514    uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
515    int offset = ring == AMD_IP_GFX ? 1 : 2;
516 
517    return *(struct radv_pipeline **)(ptr + offset);
518 }
519 
520 static void
radv_dump_queue_state(struct radv_queue * queue,const char * dump_dir,FILE * f)521 radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
522 {
523    enum amd_ip_type ring = radv_queue_ring(queue);
524    struct radv_pipeline *pipeline;
525 
526    fprintf(f, "AMD_IP_%s:\n", ring == AMD_IP_GFX ? "GFX" : "COMPUTE");
527 
528    pipeline = radv_get_saved_pipeline(queue->device, ring);
529    if (pipeline) {
530       struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
531       VkShaderStageFlags active_stages;
532 
533       if (pipeline->type == RADV_PIPELINE_GRAPHICS) {
534          active_stages = graphics_pipeline->active_stages;
535       } else {
536          active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
537       }
538 
539       radv_dump_vs_prolog(pipeline, f);
540       radv_dump_shaders(pipeline, active_stages, dump_dir, f);
541       if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
542          radv_dump_annotated_shaders(pipeline, active_stages, f);
543       radv_dump_vertex_descriptors(graphics_pipeline, f);
544       radv_dump_descriptors(queue->device, f);
545    }
546 }
547 
548 static void
radv_dump_cmd(const char * cmd,FILE * f)549 radv_dump_cmd(const char *cmd, FILE *f)
550 {
551 #ifndef _WIN32
552    char line[2048];
553    FILE *p;
554 
555    p = popen(cmd, "r");
556    if (p) {
557       while (fgets(line, sizeof(line), p))
558          fputs(line, f);
559       fprintf(f, "\n");
560       pclose(p);
561    }
562 #endif
563 }
564 
565 static void
radv_dump_dmesg(FILE * f)566 radv_dump_dmesg(FILE *f)
567 {
568    fprintf(f, "\nLast 60 lines of dmesg:\n\n");
569    radv_dump_cmd("dmesg | tail -n60", f);
570 }
571 
572 void
radv_dump_enabled_options(struct radv_device * device,FILE * f)573 radv_dump_enabled_options(struct radv_device *device, FILE *f)
574 {
575    uint64_t mask;
576 
577    if (device->instance->debug_flags) {
578       fprintf(f, "Enabled debug options: ");
579 
580       mask = device->instance->debug_flags;
581       while (mask) {
582          int i = u_bit_scan64(&mask);
583          fprintf(f, "%s, ", radv_get_debug_option_name(i));
584       }
585       fprintf(f, "\n");
586    }
587 
588    if (device->instance->perftest_flags) {
589       fprintf(f, "Enabled perftest options: ");
590 
591       mask = device->instance->perftest_flags;
592       while (mask) {
593          int i = u_bit_scan64(&mask);
594          fprintf(f, "%s, ", radv_get_perftest_option_name(i));
595       }
596       fprintf(f, "\n");
597    }
598 }
599 
600 static void
radv_dump_app_info(struct radv_device * device,FILE * f)601 radv_dump_app_info(struct radv_device *device, FILE *f)
602 {
603    struct radv_instance *instance = device->instance;
604 
605    fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
606    fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
607    fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
608    fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
609    fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
610            VK_VERSION_MINOR(instance->vk.app_info.api_version),
611            VK_VERSION_PATCH(instance->vk.app_info.api_version));
612 
613    radv_dump_enabled_options(device, f);
614 }
615 
616 static void
radv_dump_device_name(struct radv_device * device,FILE * f)617 radv_dump_device_name(struct radv_device *device, FILE *f)
618 {
619    struct radeon_info *info = &device->physical_device->rad_info;
620 #ifndef _WIN32
621    char kernel_version[128] = {0};
622    struct utsname uname_data;
623 #endif
624 
625 #ifdef _WIN32
626    fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name,
627            info->drm_major, info->drm_minor, info->drm_patchlevel);
628 #else
629    if (uname(&uname_data) == 0)
630       snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
631 
632    fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name,
633            info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
634 #endif
635 }
636 
637 static void
radv_dump_umr_ring(struct radv_queue * queue,FILE * f)638 radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
639 {
640    enum amd_ip_type ring = radv_queue_ring(queue);
641    struct radv_device *device = queue->device;
642    char cmd[128];
643 
644    /* TODO: Dump compute ring. */
645    if (ring != AMD_IP_GFX)
646       return;
647 
648    sprintf(cmd, "umr -R %s 2>&1",
649            device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
650 
651    fprintf(f, "\nUMR GFX ring:\n\n");
652    radv_dump_cmd(cmd, f);
653 }
654 
655 static void
radv_dump_umr_waves(struct radv_queue * queue,FILE * f)656 radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
657 {
658    enum amd_ip_type ring = radv_queue_ring(queue);
659    struct radv_device *device = queue->device;
660    char cmd[128];
661 
662    /* TODO: Dump compute ring. */
663    if (ring != AMD_IP_GFX)
664       return;
665 
666    sprintf(cmd, "umr -O bits,halt_waves -go 0 -wa %s -go 1 2>&1",
667            device->physical_device->rad_info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
668 
669    fprintf(f, "\nUMR GFX waves:\n\n");
670    radv_dump_cmd(cmd, f);
671 }
672 
673 static bool
radv_gpu_hang_occured(struct radv_queue * queue,enum amd_ip_type ring)674 radv_gpu_hang_occured(struct radv_queue *queue, enum amd_ip_type ring)
675 {
676    struct radeon_winsys *ws = queue->device->ws;
677 
678    if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
679       return true;
680 
681    return false;
682 }
683 
684 void
radv_check_gpu_hangs(struct radv_queue * queue,struct radeon_cmdbuf * cs)685 radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
686 {
687    struct radv_device *device = queue->device;
688    enum amd_ip_type ring;
689    uint64_t addr;
690 
691    ring = radv_queue_ring(queue);
692 
693    bool hang_occurred = radv_gpu_hang_occured(queue, ring);
694    bool vm_fault_occurred = false;
695    if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
696       vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.gfx_level,
697                                               &device->dmesg_timestamp, &addr);
698    if (!hang_occurred && !vm_fault_occurred)
699       return;
700 
701    fprintf(stderr, "radv: GPU hang detected...\n");
702 
703 #ifndef _WIN32
704    /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
705     * various debugging info about that GPU hang.
706     */
707    struct tm *timep, result;
708    time_t raw_time;
709    FILE *f;
710    char dump_dir[256], dump_path[512], buf_time[128];
711 
712    time(&raw_time);
713    timep = os_localtime(&raw_time, &result);
714    strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
715 
716    snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
717             getpid(), buf_time);
718    if (mkdir(dump_dir, 0774) && errno != EEXIST) {
719       fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
720       abort();
721    }
722 
723    fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
724 
725    /* Dump trace file. */
726    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
727    f = fopen(dump_path, "w+");
728    if (f) {
729       radv_dump_trace(queue->device, cs, f);
730       fclose(f);
731    }
732 
733    /* Dump pipeline state. */
734    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
735    f = fopen(dump_path, "w+");
736    if (f) {
737       radv_dump_queue_state(queue, dump_dir, f);
738       fclose(f);
739    }
740 
741    if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
742       /* Dump UMR waves. */
743       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
744       f = fopen(dump_path, "w+");
745       if (f) {
746          radv_dump_umr_waves(queue, f);
747          fclose(f);
748       }
749 
750       /* Dump UMR ring. */
751       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
752       f = fopen(dump_path, "w+");
753       if (f) {
754          radv_dump_umr_ring(queue, f);
755          fclose(f);
756       }
757    }
758 
759    /* Dump debug registers. */
760    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
761    f = fopen(dump_path, "w+");
762    if (f) {
763       radv_dump_debug_registers(device, f);
764       fclose(f);
765    }
766 
767    /* Dump BO ranges. */
768    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
769    f = fopen(dump_path, "w+");
770    if (f) {
771       device->ws->dump_bo_ranges(device->ws, f);
772       fclose(f);
773    }
774 
775    /* Dump BO log. */
776    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
777    f = fopen(dump_path, "w+");
778    if (f) {
779       device->ws->dump_bo_log(device->ws, f);
780       fclose(f);
781    }
782 
783    /* Dump VM fault info. */
784    if (vm_fault_occurred) {
785       snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
786       f = fopen(dump_path, "w+");
787       if (f) {
788          fprintf(f, "VM fault report.\n\n");
789          fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);
790          fclose(f);
791       }
792    }
793 
794    /* Dump app info. */
795    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
796    f = fopen(dump_path, "w+");
797    if (f) {
798       radv_dump_app_info(device, f);
799       fclose(f);
800    }
801 
802    /* Dump GPU info. */
803    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
804    f = fopen(dump_path, "w+");
805    if (f) {
806       radv_dump_device_name(device, f);
807       ac_print_gpu_info(&device->physical_device->rad_info, f);
808       fclose(f);
809    }
810 
811    /* Dump dmesg. */
812    snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
813    f = fopen(dump_path, "w+");
814    if (f) {
815       radv_dump_dmesg(f);
816       fclose(f);
817    }
818 #endif
819 
820    fprintf(stderr, "radv: GPU hang report saved successfully!\n");
821    abort();
822 }
823 
824 void
radv_print_spirv(const char * data,uint32_t size,FILE * fp)825 radv_print_spirv(const char *data, uint32_t size, FILE *fp)
826 {
827 #ifndef _WIN32
828    char path[] = "/tmp/fileXXXXXX";
829    char command[128];
830    int fd;
831 
832    /* Dump the binary into a temporary file. */
833    fd = mkstemp(path);
834    if (fd < 0)
835       return;
836 
837    if (write(fd, data, size) == -1)
838       goto fail;
839 
840    /* Disassemble using spirv-dis if installed. */
841    sprintf(command, "spirv-dis %s", path);
842    radv_dump_cmd(command, fp);
843 
844 fail:
845    close(fd);
846    unlink(path);
847 #endif
848 }
849 
850 bool
radv_trap_handler_init(struct radv_device * device)851 radv_trap_handler_init(struct radv_device *device)
852 {
853    struct radeon_winsys *ws = device->ws;
854    VkResult result;
855 
856    /* Create the trap handler shader and upload it like other shaders. */
857    device->trap_handler_shader = radv_create_trap_handler_shader(device);
858    if (!device->trap_handler_shader) {
859       fprintf(stderr, "radv: failed to create the trap handler shader.\n");
860       return false;
861    }
862 
863    result = ws->buffer_make_resident(ws, device->trap_handler_shader->alloc->arena->bo, true);
864    if (result != VK_SUCCESS)
865       return false;
866 
867    result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
868                               RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
869                                  RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
870                               RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
871    if (result != VK_SUCCESS)
872       return false;
873 
874    result = ws->buffer_make_resident(ws, device->tma_bo, true);
875    if (result != VK_SUCCESS)
876       return false;
877 
878    device->tma_ptr = ws->buffer_map(device->tma_bo);
879    if (!device->tma_ptr)
880       return false;
881 
882    /* Upload a buffer descriptor to store various info from the trap. */
883    uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
884    uint32_t desc[4];
885 
886    desc[0] = tma_va;
887    desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
888    desc[2] = TMA_BO_SIZE;
889    desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
890              S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
891              S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
892 
893    memcpy(device->tma_ptr, desc, sizeof(desc));
894 
895    return true;
896 }
897 
898 void
radv_trap_handler_finish(struct radv_device * device)899 radv_trap_handler_finish(struct radv_device *device)
900 {
901    struct radeon_winsys *ws = device->ws;
902 
903    if (unlikely(device->trap_handler_shader)) {
904       ws->buffer_make_resident(ws, device->trap_handler_shader->alloc->arena->bo, false);
905       radv_trap_handler_shader_destroy(device, device->trap_handler_shader);
906    }
907 
908    if (unlikely(device->tma_bo)) {
909       ws->buffer_make_resident(ws, device->tma_bo, false);
910       ws->buffer_destroy(ws, device->tma_bo);
911    }
912 }
913 
914 static void
radv_dump_faulty_shader(struct radv_device * device,uint64_t faulty_pc)915 radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
916 {
917    struct radv_shader *shader;
918    uint64_t start_addr, end_addr;
919    uint32_t instr_offset;
920 
921    shader = radv_find_shader(device, faulty_pc);
922    if (!shader)
923       return;
924 
925    start_addr = radv_shader_get_va(shader);
926    end_addr = start_addr + shader->code_size;
927    instr_offset = faulty_pc - start_addr;
928 
929    fprintf(stderr,
930            "Faulty shader found "
931            "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
932            start_addr, end_addr, instr_offset);
933 
934    /* Get the list of instructions.
935     * Buffer size / 4 is the upper bound of the instruction count.
936     */
937    unsigned num_inst = 0;
938    struct radv_shader_inst *instructions =
939       calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
940 
941    /* Split the disassembly string into instructions. */
942    si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
943 
944    /* Print instructions with annotations. */
945    for (unsigned i = 0; i < num_inst; i++) {
946       struct radv_shader_inst *inst = &instructions[i];
947 
948       if (start_addr + inst->offset == faulty_pc) {
949          fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
950          fprintf(stderr, "%s\n", inst->text);
951          fprintf(stderr, "\n");
952       } else {
953          fprintf(stderr, "%s\n", inst->text);
954       }
955    }
956 
957    free(instructions);
958 }
959 
960 struct radv_sq_hw_reg {
961    uint32_t status;
962    uint32_t trap_sts;
963    uint32_t hw_id;
964    uint32_t ib_sts;
965 };
966 
967 static void
radv_dump_sq_hw_regs(struct radv_device * device)968 radv_dump_sq_hw_regs(struct radv_device *device)
969 {
970    struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
971 
972    fprintf(stderr, "\nHardware registers:\n");
973    if (device->physical_device->rad_info.gfx_level >= GFX10) {
974       ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000408_SQ_WAVE_STATUS,
975                   regs->status, ~0);
976       ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00040C_SQ_WAVE_TRAPSTS,
977                   regs->trap_sts, ~0);
978       ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00045C_SQ_WAVE_HW_ID1,
979                   regs->hw_id, ~0);
980       ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00041C_SQ_WAVE_IB_STS,
981                   regs->ib_sts, ~0);
982    } else {
983       ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000048_SQ_WAVE_STATUS,
984                   regs->status, ~0);
985       ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00004C_SQ_WAVE_TRAPSTS,
986                   regs->trap_sts, ~0);
987       ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_000050_SQ_WAVE_HW_ID,
988                   regs->hw_id, ~0);
989       ac_dump_reg(stderr, device->physical_device->rad_info.gfx_level, R_00005C_SQ_WAVE_IB_STS,
990                   regs->ib_sts, ~0);
991    }
992    fprintf(stderr, "\n\n");
993 }
994 
995 void
radv_check_trap_handler(struct radv_queue * queue)996 radv_check_trap_handler(struct radv_queue *queue)
997 {
998    enum amd_ip_type ring = radv_queue_ring(queue);
999    struct radv_device *device = queue->device;
1000    struct radeon_winsys *ws = device->ws;
1001 
1002    /* Wait for the context to be idle in a finite time. */
1003    ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
1004 
1005    /* Try to detect if the trap handler has been reached by the hw by
1006     * looking at ttmp0 which should be non-zero if a shader exception
1007     * happened.
1008     */
1009    if (!device->tma_ptr[4])
1010       return;
1011 
1012 #if 0
1013 	fprintf(stderr, "tma_ptr:\n");
1014 	for (unsigned i = 0; i < 10; i++)
1015 		fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
1016 #endif
1017 
1018    radv_dump_sq_hw_regs(device);
1019 
1020    uint32_t ttmp0 = device->tma_ptr[4];
1021    uint32_t ttmp1 = device->tma_ptr[5];
1022 
1023    /* According to the ISA docs, 3.10 Trap and Exception Registers:
1024     *
1025     * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
1026     *
1027     * "When the trap handler is entered, the PC of the faulting
1028     *  instruction is: (PC - PC_rewind * 4)."
1029     * */
1030    uint8_t trap_id = (ttmp1 >> 16) & 0xff;
1031    uint8_t ht = (ttmp1 >> 24) & 0x1;
1032    uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
1033    uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
1034 
1035    fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
1036            pc_rewind);
1037 
1038    radv_dump_faulty_shader(device, pc);
1039 
1040    abort();
1041 }
1042