• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 Alyssa Rosenzweig
3  * Copyright (C) 2017-2018 Lyude Paul
4  * Copyright (C) 2019 Collabora, Ltd.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25 
26 #include <assert.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <sys/mman.h>
32 
33 #include "util/macros.h"
34 #include "util/u_debug.h"
35 #include "util/u_hexdump.h"
36 #include "decode.h"
37 
38 #include "compiler/bifrost/disassemble.h"
39 #include "compiler/valhall/disassemble.h"
40 #include "midgard/disassemble.h"
41 
42 /* Used to distiguish dumped files, otherwise we would have to print the ctx
43  * pointer, which is annoying for the user since it changes with every run */
44 static int num_ctxs = 0;
45 
46 #define to_mapped_memory(x)                                                    \
47    rb_node_data(struct pandecode_mapped_memory, x, node)
48 
49 /*
50  * Compare a GPU VA to a node, considering a GPU VA to be equal to a node if it
51  * is contained in the interval the node represents. This lets us store
52  * intervals in our tree.
53  */
54 static int
pandecode_cmp_key(const struct rb_node * lhs,const void * key)55 pandecode_cmp_key(const struct rb_node *lhs, const void *key)
56 {
57    struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
58    uint64_t *gpu_va = (uint64_t *)key;
59 
60    if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
61       return 0;
62    else
63       return mem->gpu_va - *gpu_va;
64 }
65 
66 static int
pandecode_cmp(const struct rb_node * lhs,const struct rb_node * rhs)67 pandecode_cmp(const struct rb_node *lhs, const struct rb_node *rhs)
68 {
69    return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
70 }
71 
72 static struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context * ctx,uint64_t addr)73 pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context *ctx,
74                                             uint64_t addr)
75 {
76    simple_mtx_assert_locked(&ctx->lock);
77 
78    struct rb_node *node =
79       rb_tree_search(&ctx->mmap_tree, &addr, pandecode_cmp_key);
80 
81    return to_mapped_memory(node);
82 }
83 
84 struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing(struct pandecode_context * ctx,uint64_t addr)85 pandecode_find_mapped_gpu_mem_containing(struct pandecode_context *ctx,
86                                          uint64_t addr)
87 {
88    simple_mtx_assert_locked(&ctx->lock);
89 
90    struct pandecode_mapped_memory *mem =
91       pandecode_find_mapped_gpu_mem_containing_rw(ctx, addr);
92 
93    if (mem && mem->addr && !mem->ro) {
94       mprotect(mem->addr, mem->length, PROT_READ);
95       mem->ro = true;
96       util_dynarray_append(&ctx->ro_mappings, struct pandecode_mapped_memory *,
97                            mem);
98    }
99 
100    return mem;
101 }
102 
103 /*
104  * To check for memory safety issues, validates that the given pointer in GPU
105  * memory is valid, containing at least sz bytes. This function is a tool to
106  * detect GPU-side memory bugs by validating pointers.
107  */
108 void
pandecode_validate_buffer(struct pandecode_context * ctx,mali_ptr addr,size_t sz)109 pandecode_validate_buffer(struct pandecode_context *ctx, mali_ptr addr,
110                           size_t sz)
111 {
112    if (!addr) {
113       pandecode_log(ctx, "// XXX: null pointer deref\n");
114       return;
115    }
116 
117    /* Find a BO */
118 
119    struct pandecode_mapped_memory *bo =
120       pandecode_find_mapped_gpu_mem_containing(ctx, addr);
121 
122    if (!bo) {
123       pandecode_log(ctx, "// XXX: invalid memory dereference\n");
124       return;
125    }
126 
127    /* Bounds check */
128 
129    unsigned offset = addr - bo->gpu_va;
130    unsigned total = offset + sz;
131 
132    if (total > bo->length) {
133       pandecode_log(ctx,
134                     "// XXX: buffer overrun. "
135                     "Chunk of size %zu at offset %d in buffer of size %zu. "
136                     "Overrun by %zu bytes. \n",
137                     sz, offset, bo->length, total - bo->length);
138       return;
139    }
140 }
141 
142 void
pandecode_map_read_write(struct pandecode_context * ctx)143 pandecode_map_read_write(struct pandecode_context *ctx)
144 {
145    simple_mtx_assert_locked(&ctx->lock);
146 
147    util_dynarray_foreach(&ctx->ro_mappings, struct pandecode_mapped_memory *,
148                          mem) {
149       (*mem)->ro = false;
150       mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
151    }
152    util_dynarray_clear(&ctx->ro_mappings);
153 }
154 
155 static void
pandecode_add_name(struct pandecode_context * ctx,struct pandecode_mapped_memory * mem,uint64_t gpu_va,const char * name)156 pandecode_add_name(struct pandecode_context *ctx,
157                    struct pandecode_mapped_memory *mem, uint64_t gpu_va,
158                    const char *name)
159 {
160    simple_mtx_assert_locked(&ctx->lock);
161 
162    if (!name) {
163       /* If we don't have a name, assign one */
164 
165       snprintf(mem->name, sizeof(mem->name) - 1, "memory_%" PRIx64, gpu_va);
166    } else {
167       assert((strlen(name) + 1) < sizeof(mem->name));
168       memcpy(mem->name, name, strlen(name) + 1);
169    }
170 }
171 
172 void
pandecode_inject_mmap(struct pandecode_context * ctx,uint64_t gpu_va,void * cpu,unsigned sz,const char * name)173 pandecode_inject_mmap(struct pandecode_context *ctx, uint64_t gpu_va, void *cpu,
174                       unsigned sz, const char *name)
175 {
176    simple_mtx_lock(&ctx->lock);
177 
178    /* First, search if we already mapped this and are just updating an address */
179 
180    struct pandecode_mapped_memory *existing =
181       pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);
182 
183    if (existing && existing->gpu_va == gpu_va) {
184       existing->length = sz;
185       existing->addr = cpu;
186       pandecode_add_name(ctx, existing, gpu_va, name);
187    } else {
188       /* Otherwise, add a fresh mapping */
189       struct pandecode_mapped_memory *mapped_mem = NULL;
190 
191       mapped_mem = calloc(1, sizeof(*mapped_mem));
192       mapped_mem->gpu_va = gpu_va;
193       mapped_mem->length = sz;
194       mapped_mem->addr = cpu;
195       pandecode_add_name(ctx, mapped_mem, gpu_va, name);
196 
197       /* Add it to the tree */
198       rb_tree_insert(&ctx->mmap_tree, &mapped_mem->node, pandecode_cmp);
199    }
200 
201    simple_mtx_unlock(&ctx->lock);
202 }
203 
204 void
pandecode_inject_free(struct pandecode_context * ctx,uint64_t gpu_va,unsigned sz)205 pandecode_inject_free(struct pandecode_context *ctx, uint64_t gpu_va,
206                       unsigned sz)
207 {
208    simple_mtx_lock(&ctx->lock);
209 
210    struct pandecode_mapped_memory *mem =
211       pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);
212 
213    if (mem) {
214       assert(mem->gpu_va == gpu_va);
215       assert(mem->length == sz);
216 
217       rb_tree_remove(&ctx->mmap_tree, &mem->node);
218       free(mem);
219    }
220 
221    simple_mtx_unlock(&ctx->lock);
222 }
223 
224 char *
pointer_as_memory_reference(struct pandecode_context * ctx,uint64_t ptr)225 pointer_as_memory_reference(struct pandecode_context *ctx, uint64_t ptr)
226 {
227    simple_mtx_assert_locked(&ctx->lock);
228 
229    struct pandecode_mapped_memory *mapped;
230    char *out = malloc(128);
231 
232    /* Try to find the corresponding mapped zone */
233 
234    mapped = pandecode_find_mapped_gpu_mem_containing_rw(ctx, ptr);
235 
236    if (mapped) {
237       snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va));
238       return out;
239    }
240 
241    /* Just use the raw address if other options are exhausted */
242 
243    snprintf(out, 128, "0x%" PRIx64, ptr);
244    return out;
245 }
246 
247 void
pandecode_dump_file_open(struct pandecode_context * ctx)248 pandecode_dump_file_open(struct pandecode_context *ctx)
249 {
250    simple_mtx_assert_locked(&ctx->lock);
251 
252    /* This does a getenv every frame, so it is possible to use
253     * setenv to change the base at runtime.
254     */
255    const char *dump_file_base =
256       debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
257    if (!strcmp(dump_file_base, "stderr"))
258       ctx->dump_stream = stderr;
259    else if (!ctx->dump_stream) {
260       char buffer[1024];
261       snprintf(buffer, sizeof(buffer), "%s.ctx-%d.%04d", dump_file_base,
262                ctx->id, ctx->dump_frame_count);
263       printf("pandecode: dump command stream to file %s\n", buffer);
264       ctx->dump_stream = fopen(buffer, "w");
265       if (!ctx->dump_stream)
266          fprintf(stderr,
267                  "pandecode: failed to open command stream log file %s\n",
268                  buffer);
269    }
270 }
271 
272 static void
pandecode_dump_file_close(struct pandecode_context * ctx)273 pandecode_dump_file_close(struct pandecode_context *ctx)
274 {
275    simple_mtx_assert_locked(&ctx->lock);
276 
277    if (ctx->dump_stream && ctx->dump_stream != stderr) {
278       if (fclose(ctx->dump_stream))
279          perror("pandecode: dump file");
280 
281       ctx->dump_stream = NULL;
282    }
283 }
284 
285 struct pandecode_context *
pandecode_create_context(bool to_stderr)286 pandecode_create_context(bool to_stderr)
287 {
288    struct pandecode_context *ctx = calloc(1, sizeof(*ctx));
289 
290    /* Not thread safe, but we shouldn't ever hit this, and even if we do, the
291     * worst that could happen is having the files dumped with their filenames
292     * in a different order. */
293    ctx->id = num_ctxs++;
294 
295    /* This will be initialized later and can be changed at run time through
296     * the PANDECODE_DUMP_FILE environment variable.
297     */
298    ctx->dump_stream = to_stderr ? stderr : NULL;
299 
300    rb_tree_init(&ctx->mmap_tree);
301    util_dynarray_init(&ctx->ro_mappings, NULL);
302 
303    simple_mtx_t mtx_init = SIMPLE_MTX_INITIALIZER;
304    memcpy(&ctx->lock, &mtx_init, sizeof(simple_mtx_t));
305 
306    return ctx;
307 }
308 
309 void
pandecode_next_frame(struct pandecode_context * ctx)310 pandecode_next_frame(struct pandecode_context *ctx)
311 {
312    simple_mtx_lock(&ctx->lock);
313 
314    pandecode_dump_file_close(ctx);
315    ctx->dump_frame_count++;
316 
317    simple_mtx_unlock(&ctx->lock);
318 }
319 
320 void
pandecode_destroy_context(struct pandecode_context * ctx)321 pandecode_destroy_context(struct pandecode_context *ctx)
322 {
323    simple_mtx_lock(&ctx->lock);
324 
325    rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &ctx->mmap_tree,
326                         node) {
327       rb_tree_remove(&ctx->mmap_tree, &it->node);
328       free(it);
329    }
330 
331    util_dynarray_fini(&ctx->ro_mappings);
332    pandecode_dump_file_close(ctx);
333 
334    simple_mtx_unlock(&ctx->lock);
335 
336    free(ctx);
337 }
338 
339 void
pandecode_dump_mappings(struct pandecode_context * ctx)340 pandecode_dump_mappings(struct pandecode_context *ctx)
341 {
342    simple_mtx_lock(&ctx->lock);
343 
344    pandecode_dump_file_open(ctx);
345 
346    rb_tree_foreach(struct pandecode_mapped_memory, it, &ctx->mmap_tree, node) {
347       if (!it->addr || !it->length)
348          continue;
349 
350       fprintf(ctx->dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name,
351               it->gpu_va);
352 
353       u_hexdump(ctx->dump_stream, it->addr, it->length, false);
354       fprintf(ctx->dump_stream, "\n");
355    }
356 
357    fflush(ctx->dump_stream);
358    simple_mtx_unlock(&ctx->lock);
359 }
360 
361 void
pandecode_abort_on_fault(struct pandecode_context * ctx,mali_ptr jc_gpu_va,unsigned gpu_id)362 pandecode_abort_on_fault(struct pandecode_context *ctx, mali_ptr jc_gpu_va,
363                          unsigned gpu_id)
364 {
365    simple_mtx_lock(&ctx->lock);
366 
367    switch (pan_arch(gpu_id)) {
368    case 4:
369       pandecode_abort_on_fault_v4(ctx, jc_gpu_va);
370       break;
371    case 5:
372       pandecode_abort_on_fault_v5(ctx, jc_gpu_va);
373       break;
374    case 6:
375       pandecode_abort_on_fault_v6(ctx, jc_gpu_va);
376       break;
377    case 7:
378       pandecode_abort_on_fault_v7(ctx, jc_gpu_va);
379       break;
380    case 9:
381       pandecode_abort_on_fault_v9(ctx, jc_gpu_va);
382       break;
383    default:
384       unreachable("Unsupported architecture");
385    }
386 
387    simple_mtx_unlock(&ctx->lock);
388 }
389 
390 void
pandecode_jc(struct pandecode_context * ctx,mali_ptr jc_gpu_va,unsigned gpu_id)391 pandecode_jc(struct pandecode_context *ctx, mali_ptr jc_gpu_va, unsigned gpu_id)
392 {
393    simple_mtx_lock(&ctx->lock);
394 
395    switch (pan_arch(gpu_id)) {
396    case 4:
397       pandecode_jc_v4(ctx, jc_gpu_va, gpu_id);
398       break;
399    case 5:
400       pandecode_jc_v5(ctx, jc_gpu_va, gpu_id);
401       break;
402    case 6:
403       pandecode_jc_v6(ctx, jc_gpu_va, gpu_id);
404       break;
405    case 7:
406       pandecode_jc_v7(ctx, jc_gpu_va, gpu_id);
407       break;
408    case 9:
409       pandecode_jc_v9(ctx, jc_gpu_va, gpu_id);
410       break;
411    default:
412       unreachable("Unsupported architecture");
413    }
414 
415    simple_mtx_unlock(&ctx->lock);
416 }
417 
418 void
pandecode_cs(struct pandecode_context * ctx,mali_ptr queue_gpu_va,uint32_t size,unsigned gpu_id,uint32_t * regs)419 pandecode_cs(struct pandecode_context *ctx, mali_ptr queue_gpu_va,
420              uint32_t size, unsigned gpu_id, uint32_t *regs)
421 {
422    simple_mtx_lock(&ctx->lock);
423 
424    switch (pan_arch(gpu_id)) {
425    case 10:
426       pandecode_cs_v10(ctx, queue_gpu_va, size, gpu_id, regs);
427       break;
428    default:
429       unreachable("Unsupported architecture");
430    }
431 
432    simple_mtx_unlock(&ctx->lock);
433 }
434 
435 void
pandecode_shader_disassemble(struct pandecode_context * ctx,mali_ptr shader_ptr,unsigned gpu_id)436 pandecode_shader_disassemble(struct pandecode_context *ctx, mali_ptr shader_ptr,
437                              unsigned gpu_id)
438 {
439    uint8_t *PANDECODE_PTR_VAR(ctx, code, shader_ptr);
440 
441    /* Compute maximum possible size */
442    struct pandecode_mapped_memory *mem =
443       pandecode_find_mapped_gpu_mem_containing(ctx, shader_ptr);
444    size_t sz = mem->length - (shader_ptr - mem->gpu_va);
445 
446    /* Print some boilerplate to clearly denote the assembly (which doesn't
447     * obey indentation rules), and actually do the disassembly! */
448 
449    pandecode_log_cont(ctx, "\nShader %p (GPU VA %" PRIx64 ") sz %" PRId64 "\n",
450                       code, shader_ptr, sz);
451 
452    if (pan_arch(gpu_id) >= 9) {
453       disassemble_valhall(ctx->dump_stream, (const uint64_t *)code, sz, true);
454    } else if (pan_arch(gpu_id) >= 6)
455       disassemble_bifrost(ctx->dump_stream, code, sz, false);
456    else
457       disassemble_midgard(ctx->dump_stream, code, sz, gpu_id, true);
458 
459    pandecode_log_cont(ctx, "\n\n");
460 }
461