1 /*
2 * Copyright (C) 2019 Alyssa Rosenzweig
3 * Copyright (C) 2017-2018 Lyude Paul
4 * Copyright (C) 2019 Collabora, Ltd.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 */
25
26 #include <assert.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <sys/mman.h>
32
33 #include "util/macros.h"
34 #include "util/u_debug.h"
35 #include "util/u_hexdump.h"
36 #include "decode.h"
37
38 #include "compiler/bifrost/disassemble.h"
39 #include "compiler/valhall/disassemble.h"
40 #include "midgard/disassemble.h"
41
42 /* Used to distiguish dumped files, otherwise we would have to print the ctx
43 * pointer, which is annoying for the user since it changes with every run */
44 static int num_ctxs = 0;
45
46 #define to_mapped_memory(x) \
47 rb_node_data(struct pandecode_mapped_memory, x, node)
48
49 /*
50 * Compare a GPU VA to a node, considering a GPU VA to be equal to a node if it
51 * is contained in the interval the node represents. This lets us store
52 * intervals in our tree.
53 */
54 static int
pandecode_cmp_key(const struct rb_node * lhs,const void * key)55 pandecode_cmp_key(const struct rb_node *lhs, const void *key)
56 {
57 struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
58 uint64_t *gpu_va = (uint64_t *)key;
59
60 if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
61 return 0;
62 else
63 return mem->gpu_va - *gpu_va;
64 }
65
66 static int
pandecode_cmp(const struct rb_node * lhs,const struct rb_node * rhs)67 pandecode_cmp(const struct rb_node *lhs, const struct rb_node *rhs)
68 {
69 return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
70 }
71
72 static struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context * ctx,uint64_t addr)73 pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context *ctx,
74 uint64_t addr)
75 {
76 simple_mtx_assert_locked(&ctx->lock);
77
78 struct rb_node *node =
79 rb_tree_search(&ctx->mmap_tree, &addr, pandecode_cmp_key);
80
81 return to_mapped_memory(node);
82 }
83
84 struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing(struct pandecode_context * ctx,uint64_t addr)85 pandecode_find_mapped_gpu_mem_containing(struct pandecode_context *ctx,
86 uint64_t addr)
87 {
88 simple_mtx_assert_locked(&ctx->lock);
89
90 struct pandecode_mapped_memory *mem =
91 pandecode_find_mapped_gpu_mem_containing_rw(ctx, addr);
92
93 if (mem && mem->addr && !mem->ro) {
94 mprotect(mem->addr, mem->length, PROT_READ);
95 mem->ro = true;
96 util_dynarray_append(&ctx->ro_mappings, struct pandecode_mapped_memory *,
97 mem);
98 }
99
100 return mem;
101 }
102
103 /*
104 * To check for memory safety issues, validates that the given pointer in GPU
105 * memory is valid, containing at least sz bytes. This function is a tool to
106 * detect GPU-side memory bugs by validating pointers.
107 */
108 void
pandecode_validate_buffer(struct pandecode_context * ctx,mali_ptr addr,size_t sz)109 pandecode_validate_buffer(struct pandecode_context *ctx, mali_ptr addr,
110 size_t sz)
111 {
112 if (!addr) {
113 pandecode_log(ctx, "// XXX: null pointer deref\n");
114 return;
115 }
116
117 /* Find a BO */
118
119 struct pandecode_mapped_memory *bo =
120 pandecode_find_mapped_gpu_mem_containing(ctx, addr);
121
122 if (!bo) {
123 pandecode_log(ctx, "// XXX: invalid memory dereference\n");
124 return;
125 }
126
127 /* Bounds check */
128
129 unsigned offset = addr - bo->gpu_va;
130 unsigned total = offset + sz;
131
132 if (total > bo->length) {
133 pandecode_log(ctx,
134 "// XXX: buffer overrun. "
135 "Chunk of size %zu at offset %d in buffer of size %zu. "
136 "Overrun by %zu bytes. \n",
137 sz, offset, bo->length, total - bo->length);
138 return;
139 }
140 }
141
142 void
pandecode_map_read_write(struct pandecode_context * ctx)143 pandecode_map_read_write(struct pandecode_context *ctx)
144 {
145 simple_mtx_assert_locked(&ctx->lock);
146
147 util_dynarray_foreach(&ctx->ro_mappings, struct pandecode_mapped_memory *,
148 mem) {
149 (*mem)->ro = false;
150 mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
151 }
152 util_dynarray_clear(&ctx->ro_mappings);
153 }
154
155 static void
pandecode_add_name(struct pandecode_context * ctx,struct pandecode_mapped_memory * mem,uint64_t gpu_va,const char * name)156 pandecode_add_name(struct pandecode_context *ctx,
157 struct pandecode_mapped_memory *mem, uint64_t gpu_va,
158 const char *name)
159 {
160 simple_mtx_assert_locked(&ctx->lock);
161
162 if (!name) {
163 /* If we don't have a name, assign one */
164
165 snprintf(mem->name, sizeof(mem->name) - 1, "memory_%" PRIx64, gpu_va);
166 } else {
167 assert((strlen(name) + 1) < sizeof(mem->name));
168 memcpy(mem->name, name, strlen(name) + 1);
169 }
170 }
171
172 void
pandecode_inject_mmap(struct pandecode_context * ctx,uint64_t gpu_va,void * cpu,unsigned sz,const char * name)173 pandecode_inject_mmap(struct pandecode_context *ctx, uint64_t gpu_va, void *cpu,
174 unsigned sz, const char *name)
175 {
176 simple_mtx_lock(&ctx->lock);
177
178 /* First, search if we already mapped this and are just updating an address */
179
180 struct pandecode_mapped_memory *existing =
181 pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);
182
183 if (existing && existing->gpu_va == gpu_va) {
184 existing->length = sz;
185 existing->addr = cpu;
186 pandecode_add_name(ctx, existing, gpu_va, name);
187 } else {
188 /* Otherwise, add a fresh mapping */
189 struct pandecode_mapped_memory *mapped_mem = NULL;
190
191 mapped_mem = calloc(1, sizeof(*mapped_mem));
192 mapped_mem->gpu_va = gpu_va;
193 mapped_mem->length = sz;
194 mapped_mem->addr = cpu;
195 pandecode_add_name(ctx, mapped_mem, gpu_va, name);
196
197 /* Add it to the tree */
198 rb_tree_insert(&ctx->mmap_tree, &mapped_mem->node, pandecode_cmp);
199 }
200
201 simple_mtx_unlock(&ctx->lock);
202 }
203
204 void
pandecode_inject_free(struct pandecode_context * ctx,uint64_t gpu_va,unsigned sz)205 pandecode_inject_free(struct pandecode_context *ctx, uint64_t gpu_va,
206 unsigned sz)
207 {
208 simple_mtx_lock(&ctx->lock);
209
210 struct pandecode_mapped_memory *mem =
211 pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);
212
213 if (mem) {
214 assert(mem->gpu_va == gpu_va);
215 assert(mem->length == sz);
216
217 rb_tree_remove(&ctx->mmap_tree, &mem->node);
218 free(mem);
219 }
220
221 simple_mtx_unlock(&ctx->lock);
222 }
223
224 char *
pointer_as_memory_reference(struct pandecode_context * ctx,uint64_t ptr)225 pointer_as_memory_reference(struct pandecode_context *ctx, uint64_t ptr)
226 {
227 simple_mtx_assert_locked(&ctx->lock);
228
229 struct pandecode_mapped_memory *mapped;
230 char *out = malloc(128);
231
232 /* Try to find the corresponding mapped zone */
233
234 mapped = pandecode_find_mapped_gpu_mem_containing_rw(ctx, ptr);
235
236 if (mapped) {
237 snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va));
238 return out;
239 }
240
241 /* Just use the raw address if other options are exhausted */
242
243 snprintf(out, 128, "0x%" PRIx64, ptr);
244 return out;
245 }
246
247 void
pandecode_dump_file_open(struct pandecode_context * ctx)248 pandecode_dump_file_open(struct pandecode_context *ctx)
249 {
250 simple_mtx_assert_locked(&ctx->lock);
251
252 /* This does a getenv every frame, so it is possible to use
253 * setenv to change the base at runtime.
254 */
255 const char *dump_file_base =
256 debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
257 if (!strcmp(dump_file_base, "stderr"))
258 ctx->dump_stream = stderr;
259 else if (!ctx->dump_stream) {
260 char buffer[1024];
261 snprintf(buffer, sizeof(buffer), "%s.ctx-%d.%04d", dump_file_base,
262 ctx->id, ctx->dump_frame_count);
263 printf("pandecode: dump command stream to file %s\n", buffer);
264 ctx->dump_stream = fopen(buffer, "w");
265 if (!ctx->dump_stream)
266 fprintf(stderr,
267 "pandecode: failed to open command stream log file %s\n",
268 buffer);
269 }
270 }
271
272 static void
pandecode_dump_file_close(struct pandecode_context * ctx)273 pandecode_dump_file_close(struct pandecode_context *ctx)
274 {
275 simple_mtx_assert_locked(&ctx->lock);
276
277 if (ctx->dump_stream && ctx->dump_stream != stderr) {
278 if (fclose(ctx->dump_stream))
279 perror("pandecode: dump file");
280
281 ctx->dump_stream = NULL;
282 }
283 }
284
285 struct pandecode_context *
pandecode_create_context(bool to_stderr)286 pandecode_create_context(bool to_stderr)
287 {
288 struct pandecode_context *ctx = calloc(1, sizeof(*ctx));
289
290 /* Not thread safe, but we shouldn't ever hit this, and even if we do, the
291 * worst that could happen is having the files dumped with their filenames
292 * in a different order. */
293 ctx->id = num_ctxs++;
294
295 /* This will be initialized later and can be changed at run time through
296 * the PANDECODE_DUMP_FILE environment variable.
297 */
298 ctx->dump_stream = to_stderr ? stderr : NULL;
299
300 rb_tree_init(&ctx->mmap_tree);
301 util_dynarray_init(&ctx->ro_mappings, NULL);
302
303 simple_mtx_t mtx_init = SIMPLE_MTX_INITIALIZER;
304 memcpy(&ctx->lock, &mtx_init, sizeof(simple_mtx_t));
305
306 return ctx;
307 }
308
309 void
pandecode_next_frame(struct pandecode_context * ctx)310 pandecode_next_frame(struct pandecode_context *ctx)
311 {
312 simple_mtx_lock(&ctx->lock);
313
314 pandecode_dump_file_close(ctx);
315 ctx->dump_frame_count++;
316
317 simple_mtx_unlock(&ctx->lock);
318 }
319
320 void
pandecode_destroy_context(struct pandecode_context * ctx)321 pandecode_destroy_context(struct pandecode_context *ctx)
322 {
323 simple_mtx_lock(&ctx->lock);
324
325 rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &ctx->mmap_tree,
326 node) {
327 rb_tree_remove(&ctx->mmap_tree, &it->node);
328 free(it);
329 }
330
331 util_dynarray_fini(&ctx->ro_mappings);
332 pandecode_dump_file_close(ctx);
333
334 simple_mtx_unlock(&ctx->lock);
335
336 free(ctx);
337 }
338
339 void
pandecode_dump_mappings(struct pandecode_context * ctx)340 pandecode_dump_mappings(struct pandecode_context *ctx)
341 {
342 simple_mtx_lock(&ctx->lock);
343
344 pandecode_dump_file_open(ctx);
345
346 rb_tree_foreach(struct pandecode_mapped_memory, it, &ctx->mmap_tree, node) {
347 if (!it->addr || !it->length)
348 continue;
349
350 fprintf(ctx->dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name,
351 it->gpu_va);
352
353 u_hexdump(ctx->dump_stream, it->addr, it->length, false);
354 fprintf(ctx->dump_stream, "\n");
355 }
356
357 fflush(ctx->dump_stream);
358 simple_mtx_unlock(&ctx->lock);
359 }
360
361 void
pandecode_abort_on_fault(struct pandecode_context * ctx,mali_ptr jc_gpu_va,unsigned gpu_id)362 pandecode_abort_on_fault(struct pandecode_context *ctx, mali_ptr jc_gpu_va,
363 unsigned gpu_id)
364 {
365 simple_mtx_lock(&ctx->lock);
366
367 switch (pan_arch(gpu_id)) {
368 case 4:
369 pandecode_abort_on_fault_v4(ctx, jc_gpu_va);
370 break;
371 case 5:
372 pandecode_abort_on_fault_v5(ctx, jc_gpu_va);
373 break;
374 case 6:
375 pandecode_abort_on_fault_v6(ctx, jc_gpu_va);
376 break;
377 case 7:
378 pandecode_abort_on_fault_v7(ctx, jc_gpu_va);
379 break;
380 case 9:
381 pandecode_abort_on_fault_v9(ctx, jc_gpu_va);
382 break;
383 default:
384 unreachable("Unsupported architecture");
385 }
386
387 simple_mtx_unlock(&ctx->lock);
388 }
389
390 void
pandecode_jc(struct pandecode_context * ctx,mali_ptr jc_gpu_va,unsigned gpu_id)391 pandecode_jc(struct pandecode_context *ctx, mali_ptr jc_gpu_va, unsigned gpu_id)
392 {
393 simple_mtx_lock(&ctx->lock);
394
395 switch (pan_arch(gpu_id)) {
396 case 4:
397 pandecode_jc_v4(ctx, jc_gpu_va, gpu_id);
398 break;
399 case 5:
400 pandecode_jc_v5(ctx, jc_gpu_va, gpu_id);
401 break;
402 case 6:
403 pandecode_jc_v6(ctx, jc_gpu_va, gpu_id);
404 break;
405 case 7:
406 pandecode_jc_v7(ctx, jc_gpu_va, gpu_id);
407 break;
408 case 9:
409 pandecode_jc_v9(ctx, jc_gpu_va, gpu_id);
410 break;
411 default:
412 unreachable("Unsupported architecture");
413 }
414
415 simple_mtx_unlock(&ctx->lock);
416 }
417
418 void
pandecode_cs(struct pandecode_context * ctx,mali_ptr queue_gpu_va,uint32_t size,unsigned gpu_id,uint32_t * regs)419 pandecode_cs(struct pandecode_context *ctx, mali_ptr queue_gpu_va,
420 uint32_t size, unsigned gpu_id, uint32_t *regs)
421 {
422 simple_mtx_lock(&ctx->lock);
423
424 switch (pan_arch(gpu_id)) {
425 case 10:
426 pandecode_cs_v10(ctx, queue_gpu_va, size, gpu_id, regs);
427 break;
428 default:
429 unreachable("Unsupported architecture");
430 }
431
432 simple_mtx_unlock(&ctx->lock);
433 }
434
435 void
pandecode_shader_disassemble(struct pandecode_context * ctx,mali_ptr shader_ptr,unsigned gpu_id)436 pandecode_shader_disassemble(struct pandecode_context *ctx, mali_ptr shader_ptr,
437 unsigned gpu_id)
438 {
439 uint8_t *PANDECODE_PTR_VAR(ctx, code, shader_ptr);
440
441 /* Compute maximum possible size */
442 struct pandecode_mapped_memory *mem =
443 pandecode_find_mapped_gpu_mem_containing(ctx, shader_ptr);
444 size_t sz = mem->length - (shader_ptr - mem->gpu_va);
445
446 /* Print some boilerplate to clearly denote the assembly (which doesn't
447 * obey indentation rules), and actually do the disassembly! */
448
449 pandecode_log_cont(ctx, "\nShader %p (GPU VA %" PRIx64 ") sz %" PRId64 "\n",
450 code, shader_ptr, sz);
451
452 if (pan_arch(gpu_id) >= 9) {
453 disassemble_valhall(ctx->dump_stream, (const uint64_t *)code, sz, true);
454 } else if (pan_arch(gpu_id) >= 6)
455 disassemble_bifrost(ctx->dump_stream, code, sz, false);
456 else
457 disassemble_midgard(ctx->dump_stream, code, sz, gpu_id, true);
458
459 pandecode_log_cont(ctx, "\n\n");
460 }
461