• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017-2019 Alyssa Rosenzweig
3  * Copyright 2017-2019 Connor Abbott
4  * Copyright 2019 Collabora, Ltd.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include <ctype.h>
9 #include <memory.h>
10 #include <stdarg.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include "util/u_dynarray.h"
15 #include "util/u_math.h"
16 #include <sys/mman.h>
17 #include <agx_pack.h>
18 
19 #include "util/u_hexdump.h"
20 #include "decode.h"
21 #include "unstable_asahi_drm.h"
22 #ifdef __APPLE__
23 #include "agx_iokit.h"
24 #endif
25 
26 struct libagxdecode_config lib_config;
27 
28 static void
agx_disassemble(void * _code,size_t maxlen,FILE * fp)29 agx_disassemble(void *_code, size_t maxlen, FILE *fp)
30 {
31    /* stub */
32 }
33 
34 FILE *agxdecode_dump_stream;
35 
36 struct agxdecode_ctx {
37    struct util_dynarray mmap_array;
38    uint64_t shader_base;
39 };
40 
41 static uint64_t
decode_usc(struct agxdecode_ctx * ctx,uint64_t addr)42 decode_usc(struct agxdecode_ctx *ctx, uint64_t addr)
43 {
44    return ctx->shader_base + addr;
45 }
46 
47 struct agxdecode_ctx *
agxdecode_new_context(uint64_t shader_base)48 agxdecode_new_context(uint64_t shader_base)
49 {
50    struct agxdecode_ctx *ctx = calloc(1, sizeof(struct agxdecode_ctx));
51    ctx->shader_base = shader_base;
52    return ctx;
53 }
54 
55 void
agxdecode_destroy_context(struct agxdecode_ctx * ctx)56 agxdecode_destroy_context(struct agxdecode_ctx *ctx)
57 {
58    free(ctx);
59 }
60 
61 static struct agx_bo *
agxdecode_find_mapped_gpu_mem_containing(struct agxdecode_ctx * ctx,uint64_t addr)62 agxdecode_find_mapped_gpu_mem_containing(struct agxdecode_ctx *ctx,
63                                          uint64_t addr)
64 {
65    util_dynarray_foreach(&ctx->mmap_array, struct agx_bo, it) {
66       if (it->va && addr >= it->va->addr && (addr - it->va->addr) < it->size)
67          return it;
68    }
69 
70    return NULL;
71 }
72 
73 static struct agx_bo *
agxdecode_find_handle(struct agxdecode_ctx * ctx,unsigned handle,unsigned type)74 agxdecode_find_handle(struct agxdecode_ctx *ctx, unsigned handle, unsigned type)
75 {
76    util_dynarray_foreach(&ctx->mmap_array, struct agx_bo, it) {
77       if (it->handle == handle)
78          return it;
79    }
80 
81    return NULL;
82 }
83 
84 static size_t
__agxdecode_fetch_gpu_mem(struct agxdecode_ctx * ctx,const struct agx_bo * mem,uint64_t gpu_va,size_t size,void * buf,int line,const char * filename)85 __agxdecode_fetch_gpu_mem(struct agxdecode_ctx *ctx, const struct agx_bo *mem,
86                           uint64_t gpu_va, size_t size, void *buf, int line,
87                           const char *filename)
88 {
89    if (lib_config.read_gpu_mem)
90       return lib_config.read_gpu_mem(gpu_va, size, buf);
91 
92    if (!mem)
93       mem = agxdecode_find_mapped_gpu_mem_containing(ctx, gpu_va);
94 
95    if (!mem) {
96       fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", gpu_va,
97               filename, line);
98       fflush(agxdecode_dump_stream);
99       assert(0);
100    }
101 
102    assert(mem);
103 
104    if (size + (gpu_va - mem->va->addr) > mem->size) {
105       fprintf(stderr,
106               "Overflowing to unknown memory %" PRIx64
107               " of size %zu (max size %zu) in %s:%d\n",
108               gpu_va, size, (size_t)(mem->size - (gpu_va - mem->va->addr)),
109               filename, line);
110       fflush(agxdecode_dump_stream);
111       assert(0);
112    }
113 
114    memcpy(buf, mem->_map + gpu_va - mem->va->addr, size);
115 
116    return size;
117 }
118 
119 #define agxdecode_fetch_gpu_mem(ctx, gpu_va, size, buf)                        \
120    __agxdecode_fetch_gpu_mem(ctx, NULL, gpu_va, size, buf, __LINE__, __FILE__)
121 
122 #define agxdecode_fetch_gpu_array(ctx, gpu_va, buf)                            \
123    agxdecode_fetch_gpu_mem(ctx, gpu_va, sizeof(buf), buf)
124 
125 /* Helpers for parsing the cmdstream */
126 
127 #define DUMP_UNPACKED(T, var, str)                                             \
128    {                                                                           \
129       agxdecode_log(str);                                                      \
130       agx_print(agxdecode_dump_stream, T, var, 2);                             \
131    }
132 
133 #define DUMP_CL(T, cl, str)                                                    \
134    {                                                                           \
135       agx_unpack(agxdecode_dump_stream, cl, T, temp);                          \
136       DUMP_UNPACKED(T, temp, str "\n");                                        \
137    }
138 
139 #define DUMP_FIELD(struct, fmt, field)                                         \
140    {                                                                           \
141       fprintf(agxdecode_dump_stream, #field " = " fmt "\n", struct->field);    \
142    }
143 
144 #define agxdecode_log(str) fputs(str, agxdecode_dump_stream)
145 #define agxdecode_msg(str) fprintf(agxdecode_dump_stream, "// %s", str)
146 
147 typedef struct drm_asahi_params_global decoder_params;
148 
149 /* Abstraction for command stream parsing */
150 typedef unsigned (*decode_cmd)(struct agxdecode_ctx *ctx, const uint8_t *map,
151                                uint64_t *link, bool verbose,
152                                decoder_params *params, void *data);
153 
154 #define STATE_DONE (0xFFFFFFFFu)
155 #define STATE_LINK (0xFFFFFFFEu)
156 #define STATE_CALL (0xFFFFFFFDu)
157 #define STATE_RET  (0xFFFFFFFCu)
158 
159 static void
agxdecode_stateful(struct agxdecode_ctx * ctx,uint64_t va,const char * label,decode_cmd decoder,bool verbose,decoder_params * params,void * data)160 agxdecode_stateful(struct agxdecode_ctx *ctx, uint64_t va, const char *label,
161                    decode_cmd decoder, bool verbose, decoder_params *params,
162                    void *data)
163 {
164    uint64_t stack[16];
165    unsigned sp = 0;
166 
167    uint8_t buf[1024];
168    size_t size = sizeof(buf);
169    if (!lib_config.read_gpu_mem) {
170       struct agx_bo *alloc = agxdecode_find_mapped_gpu_mem_containing(ctx, va);
171       assert(alloc != NULL && "nonexistent object");
172       fprintf(agxdecode_dump_stream, "%s (%" PRIx64 ", handle %u)\n", label, va,
173               alloc->handle);
174       size = MIN2(size, alloc->size - (va - alloc->va->addr));
175    } else {
176       fprintf(agxdecode_dump_stream, "%s (%" PRIx64 ")\n", label, va);
177    }
178    fflush(agxdecode_dump_stream);
179 
180    int len = agxdecode_fetch_gpu_mem(ctx, va, size, buf);
181 
182    int left = len;
183    uint8_t *map = buf;
184    uint64_t link = 0;
185 
186    fflush(agxdecode_dump_stream);
187 
188    while (left) {
189       if (len <= 0) {
190          fprintf(agxdecode_dump_stream, "!! Failed to read GPU memory\n");
191          fflush(agxdecode_dump_stream);
192          return;
193       }
194 
195       unsigned count = decoder(ctx, map, &link, verbose, params, data);
196 
197       /* If we fail to decode, default to a hexdump (don't hang) */
198       if (count == 0) {
199          u_hexdump(agxdecode_dump_stream, map, 8, false);
200          count = 8;
201       }
202 
203       fflush(agxdecode_dump_stream);
204       if (count == STATE_DONE) {
205          break;
206       } else if (count == STATE_LINK) {
207          fprintf(agxdecode_dump_stream, "Linking to 0x%" PRIx64 "\n\n", link);
208          va = link;
209          left = len = agxdecode_fetch_gpu_array(ctx, va, buf);
210          map = buf;
211       } else if (count == STATE_CALL) {
212          fprintf(agxdecode_dump_stream,
213                  "Calling 0x%" PRIx64 " (return = 0x%" PRIx64 ")\n\n", link,
214                  va + 8);
215          assert(sp < ARRAY_SIZE(stack));
216          stack[sp++] = va + 8;
217          va = link;
218          left = len = agxdecode_fetch_gpu_array(ctx, va, buf);
219          map = buf;
220       } else if (count == STATE_RET) {
221          assert(sp > 0);
222          va = stack[--sp];
223          fprintf(agxdecode_dump_stream, "Returning to 0x%" PRIx64 "\n\n", va);
224          left = len = agxdecode_fetch_gpu_array(ctx, va, buf);
225          map = buf;
226       } else {
227          va += count;
228          map += count;
229          left -= count;
230 
231          if (left < 512 && len == sizeof(buf)) {
232             left = len = agxdecode_fetch_gpu_array(ctx, va, buf);
233             map = buf;
234          }
235       }
236    }
237 }
238 
239 static void
agxdecode_texture_pbe(struct agxdecode_ctx * ctx,const void * map)240 agxdecode_texture_pbe(struct agxdecode_ctx *ctx, const void *map)
241 {
242    struct AGX_TEXTURE tex;
243    struct AGX_PBE pbe;
244 
245    bool valid_texture = AGX_TEXTURE_unpack(NULL, map, &tex);
246    bool valid_pbe = AGX_PBE_unpack(NULL, map, &pbe);
247 
248    /* Try to guess if it's texture or PBE */
249    valid_texture &=
250       tex.swizzle_r <= AGX_CHANNEL_0 && tex.swizzle_g <= AGX_CHANNEL_0 &&
251       tex.swizzle_b <= AGX_CHANNEL_0 && tex.swizzle_a <= AGX_CHANNEL_0;
252 
253    if (valid_texture && !valid_pbe) {
254       DUMP_CL(TEXTURE, map, "Texture");
255    } else if (valid_pbe && !valid_texture) {
256       DUMP_CL(PBE, map, "PBE");
257    } else {
258       if (!valid_texture) {
259          assert(!valid_pbe);
260          fprintf(agxdecode_dump_stream, "XXX: invalid texture/PBE\n");
261       }
262 
263       DUMP_CL(TEXTURE, map, "Texture");
264       DUMP_CL(PBE, map, "PBE");
265    }
266 }
267 
268 static unsigned
agxdecode_usc(struct agxdecode_ctx * ctx,const uint8_t * map,UNUSED uint64_t * link,UNUSED bool verbose,decoder_params * params,UNUSED void * data)269 agxdecode_usc(struct agxdecode_ctx *ctx, const uint8_t *map,
270               UNUSED uint64_t *link, UNUSED bool verbose,
271               decoder_params *params, UNUSED void *data)
272 {
273    enum agx_sampler_states *sampler_states = data;
274    enum agx_usc_control type = map[0];
275    uint8_t buf[3072];
276 
277    bool extended_samplers =
278       (sampler_states != NULL) &&
279       (((*sampler_states) == AGX_SAMPLER_STATES_8_EXTENDED) ||
280        ((*sampler_states) == AGX_SAMPLER_STATES_16_EXTENDED));
281 
282 #define USC_CASE(name, human)                                                  \
283    case AGX_USC_CONTROL_##name: {                                              \
284       DUMP_CL(USC_##name, map, human);                                         \
285       return AGX_USC_##name##_LENGTH;                                          \
286    }
287 
288    switch (type) {
289    case AGX_USC_CONTROL_NO_PRESHADER: {
290       DUMP_CL(USC_NO_PRESHADER, map, "No preshader");
291       return STATE_DONE;
292    }
293 
294    case AGX_USC_CONTROL_PRESHADER: {
295       agx_unpack(agxdecode_dump_stream, map, USC_PRESHADER, ctrl);
296       DUMP_UNPACKED(USC_PRESHADER, ctrl, "Preshader\n");
297 
298       agx_disassemble(
299          buf, agxdecode_fetch_gpu_array(ctx, decode_usc(ctx, ctrl.code), buf),
300          agxdecode_dump_stream);
301 
302       return STATE_DONE;
303    }
304 
305    case AGX_USC_CONTROL_SHADER: {
306       agx_unpack(agxdecode_dump_stream, map, USC_SHADER, ctrl);
307       DUMP_UNPACKED(USC_SHADER, ctrl, "Shader\n");
308 
309       agxdecode_log("\n");
310       agx_disassemble(
311          buf, agxdecode_fetch_gpu_array(ctx, decode_usc(ctx, ctrl.code), buf),
312          agxdecode_dump_stream);
313       agxdecode_log("\n");
314 
315       return AGX_USC_SHADER_LENGTH;
316    }
317 
318    case AGX_USC_CONTROL_SAMPLER: {
319       agx_unpack(agxdecode_dump_stream, map, USC_SAMPLER, temp);
320       DUMP_UNPACKED(USC_SAMPLER, temp, "Sampler state\n");
321 
322       size_t stride =
323          AGX_SAMPLER_LENGTH + (extended_samplers ? AGX_BORDER_LENGTH : 0);
324       uint8_t *samp = alloca(stride * temp.count);
325 
326       agxdecode_fetch_gpu_mem(ctx, temp.buffer, stride * temp.count, samp);
327 
328       for (unsigned i = 0; i < temp.count; ++i) {
329          DUMP_CL(SAMPLER, samp, "Sampler");
330          samp += AGX_SAMPLER_LENGTH;
331 
332          if (extended_samplers) {
333             DUMP_CL(BORDER, samp, "Border");
334             samp += AGX_BORDER_LENGTH;
335          }
336       }
337 
338       return AGX_USC_SAMPLER_LENGTH;
339    }
340 
341    case AGX_USC_CONTROL_TEXTURE: {
342       agx_unpack(agxdecode_dump_stream, map, USC_TEXTURE, temp);
343       DUMP_UNPACKED(USC_TEXTURE, temp, "Texture state\n");
344 
345       uint8_t buf[AGX_TEXTURE_LENGTH * temp.count];
346       uint8_t *tex = buf;
347 
348       agxdecode_fetch_gpu_array(ctx, temp.buffer, buf);
349 
350       /* Note: samplers only need 8 byte alignment? */
351       for (unsigned i = 0; i < temp.count; ++i) {
352          fprintf(agxdecode_dump_stream, "ts%u: \n", temp.start + i);
353          agxdecode_texture_pbe(ctx, tex);
354 
355          tex += AGX_TEXTURE_LENGTH;
356       }
357 
358       return AGX_USC_TEXTURE_LENGTH;
359    }
360 
361    case AGX_USC_CONTROL_UNIFORM: {
362       agx_unpack(agxdecode_dump_stream, map, USC_UNIFORM, temp);
363       DUMP_UNPACKED(USC_UNIFORM, temp, "Uniform\n");
364 
365       uint8_t buf[2 * temp.size_halfs];
366       agxdecode_fetch_gpu_array(ctx, temp.buffer, buf);
367       u_hexdump(agxdecode_dump_stream, buf, 2 * temp.size_halfs, false);
368 
369       return AGX_USC_UNIFORM_LENGTH;
370    }
371 
372    case AGX_USC_CONTROL_UNIFORM_HIGH: {
373       agx_unpack(agxdecode_dump_stream, map, USC_UNIFORM_HIGH, temp);
374       DUMP_UNPACKED(USC_UNIFORM_HIGH, temp, "Uniform (high)\n");
375 
376       uint8_t buf[2 * temp.size_halfs];
377       agxdecode_fetch_gpu_array(ctx, temp.buffer, buf);
378       u_hexdump(agxdecode_dump_stream, buf, 2 * temp.size_halfs, false);
379 
380       return AGX_USC_UNIFORM_HIGH_LENGTH;
381    }
382 
383       USC_CASE(FRAGMENT_PROPERTIES, "Fragment properties");
384       USC_CASE(SHARED, "Shared");
385       USC_CASE(REGISTERS, "Registers");
386 
387    default:
388       fprintf(agxdecode_dump_stream, "Unknown USC control type: %u\n", type);
389       u_hexdump(agxdecode_dump_stream, map, 8, false);
390       return 8;
391    }
392 
393 #undef USC_CASE
394 }
395 
396 #define PPP_PRINT(map, header_name, struct_name, human)                        \
397    if (hdr.header_name) {                                                      \
398       if (((map + AGX_##struct_name##_LENGTH) > (base + size))) {              \
399          fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");     \
400          return;                                                               \
401       }                                                                        \
402       DUMP_CL(struct_name, map, human);                                        \
403       map += AGX_##struct_name##_LENGTH;                                       \
404       fflush(agxdecode_dump_stream);                                           \
405    }
406 
407 static void
agxdecode_record(struct agxdecode_ctx * ctx,uint64_t va,size_t size,bool verbose,decoder_params * params)408 agxdecode_record(struct agxdecode_ctx *ctx, uint64_t va, size_t size,
409                  bool verbose, decoder_params *params)
410 {
411    uint8_t buf[size];
412    uint8_t *base = buf;
413    uint8_t *map = base;
414 
415    agxdecode_fetch_gpu_array(ctx, va, buf);
416 
417    agx_unpack(agxdecode_dump_stream, map, PPP_HEADER, hdr);
418    map += AGX_PPP_HEADER_LENGTH;
419 
420    PPP_PRINT(map, fragment_control, FRAGMENT_CONTROL, "Fragment control");
421    PPP_PRINT(map, fragment_control_2, FRAGMENT_CONTROL, "Fragment control 2");
422    PPP_PRINT(map, fragment_front_face, FRAGMENT_FACE, "Front face");
423    PPP_PRINT(map, fragment_front_face_2, FRAGMENT_FACE_2, "Front face 2");
424    PPP_PRINT(map, fragment_front_stencil, FRAGMENT_STENCIL, "Front stencil");
425    PPP_PRINT(map, fragment_back_face, FRAGMENT_FACE, "Back face");
426    PPP_PRINT(map, fragment_back_face_2, FRAGMENT_FACE_2, "Back face 2");
427    PPP_PRINT(map, fragment_back_stencil, FRAGMENT_STENCIL, "Back stencil");
428    PPP_PRINT(map, depth_bias_scissor, DEPTH_BIAS_SCISSOR, "Depth bias/scissor");
429 
430    if (hdr.region_clip) {
431       if (((map + (AGX_REGION_CLIP_LENGTH * hdr.viewport_count)) >
432            (base + size))) {
433          fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");
434          return;
435       }
436 
437       for (unsigned i = 0; i < hdr.viewport_count; ++i) {
438          DUMP_CL(REGION_CLIP, map, "Region clip");
439          map += AGX_REGION_CLIP_LENGTH;
440          fflush(agxdecode_dump_stream);
441       }
442    }
443 
444    if (hdr.viewport) {
445       if (((map + AGX_VIEWPORT_CONTROL_LENGTH +
446             (AGX_VIEWPORT_LENGTH * hdr.viewport_count)) > (base + size))) {
447          fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");
448          return;
449       }
450 
451       DUMP_CL(VIEWPORT_CONTROL, map, "Viewport control");
452       map += AGX_VIEWPORT_CONTROL_LENGTH;
453 
454       for (unsigned i = 0; i < hdr.viewport_count; ++i) {
455          DUMP_CL(VIEWPORT, map, "Viewport");
456          map += AGX_VIEWPORT_LENGTH;
457          fflush(agxdecode_dump_stream);
458       }
459    }
460 
461    PPP_PRINT(map, w_clamp, W_CLAMP, "W clamp");
462    PPP_PRINT(map, output_select, OUTPUT_SELECT, "Output select");
463    PPP_PRINT(map, varying_counts_32, VARYING_COUNTS, "Varying counts 32");
464    PPP_PRINT(map, varying_counts_16, VARYING_COUNTS, "Varying counts 16");
465    PPP_PRINT(map, cull, CULL, "Cull");
466    PPP_PRINT(map, cull_2, CULL_2, "Cull 2");
467 
468    if (hdr.fragment_shader) {
469       agx_unpack(agxdecode_dump_stream, map, FRAGMENT_SHADER_WORD_0, frag_0);
470       agx_unpack(agxdecode_dump_stream, map + 4, FRAGMENT_SHADER_WORD_1,
471                  frag_1);
472       agx_unpack(agxdecode_dump_stream, map + 8, FRAGMENT_SHADER_WORD_2,
473                  frag_2);
474       agxdecode_stateful(ctx, decode_usc(ctx, frag_1.pipeline),
475                          "Fragment pipeline", agxdecode_usc, verbose, params,
476                          &frag_0.sampler_state_register_count);
477 
478       if (frag_2.cf_bindings) {
479          uint8_t buf[128];
480          uint8_t *cf = buf;
481 
482          agxdecode_fetch_gpu_array(ctx, decode_usc(ctx, frag_2.cf_bindings),
483                                    buf);
484          u_hexdump(agxdecode_dump_stream, cf, 128, false);
485 
486          DUMP_CL(CF_BINDING_HEADER, cf, "Coefficient binding header:");
487          cf += AGX_CF_BINDING_HEADER_LENGTH;
488 
489          for (unsigned i = 0; i < frag_0.cf_binding_count; ++i) {
490             DUMP_CL(CF_BINDING, cf, "Coefficient binding:");
491             cf += AGX_CF_BINDING_LENGTH;
492          }
493       }
494 
495       DUMP_CL(FRAGMENT_SHADER_WORD_0, map, "Fragment shader word 0");
496       DUMP_CL(FRAGMENT_SHADER_WORD_1, map + 4, "Fragment shader word 1");
497       DUMP_CL(FRAGMENT_SHADER_WORD_2, map + 8, "Fragment shader word 2");
498       DUMP_CL(FRAGMENT_SHADER_WORD_3, map + 12, "Fragment shader word 3");
499       map += 16;
500    }
501 
502    PPP_PRINT(map, occlusion_query, FRAGMENT_OCCLUSION_QUERY, "Occlusion query");
503    PPP_PRINT(map, occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2,
504              "Occlusion query 2");
505    PPP_PRINT(map, output_unknown, OUTPUT_UNKNOWN, "Output unknown");
506    PPP_PRINT(map, output_size, OUTPUT_SIZE, "Output size");
507    PPP_PRINT(map, varying_word_2, VARYING_2, "Varying word 2");
508 
509    /* PPP print checks we don't read too much, now check we read enough */
510    assert(map == (base + size) && "invalid size of PPP update");
511 }
512 
513 static unsigned
agxdecode_cdm(struct agxdecode_ctx * ctx,const uint8_t * map,uint64_t * link,bool verbose,decoder_params * params,UNUSED void * data)514 agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link,
515               bool verbose, decoder_params *params, UNUSED void *data)
516 {
517    /* Bits 29-31 contain the block type */
518    enum agx_cdm_block_type block_type = (map[3] >> 5);
519 
520    switch (block_type) {
521    case AGX_CDM_BLOCK_TYPE_LAUNCH: {
522       size_t length =
523          AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH;
524 
525 #define CDM_PRINT(STRUCT_NAME, human)                                          \
526    do {                                                                        \
527       DUMP_CL(CDM_##STRUCT_NAME, map, human);                                  \
528       map += AGX_CDM_##STRUCT_NAME##_LENGTH;                                   \
529       length += AGX_CDM_##STRUCT_NAME##_LENGTH;                                \
530    } while (0);
531 
532       agx_unpack(agxdecode_dump_stream, map + 0, CDM_LAUNCH_WORD_0, hdr0);
533       agx_unpack(agxdecode_dump_stream, map + 4, CDM_LAUNCH_WORD_1, hdr1);
534 
535       agxdecode_stateful(ctx, decode_usc(ctx, hdr1.pipeline), "Pipeline",
536                          agxdecode_usc, verbose, params,
537                          &hdr0.sampler_state_register_count);
538       DUMP_UNPACKED(CDM_LAUNCH_WORD_0, hdr0, "Compute\n");
539       DUMP_UNPACKED(CDM_LAUNCH_WORD_1, hdr1, "Compute\n");
540       map += 8;
541 
542       /* Added in G14X */
543       if (params->gpu_generation >= 14 && params->num_clusters_total > 1)
544          CDM_PRINT(UNK_G14X, "Unknown G14X");
545 
546       switch (hdr0.mode) {
547       case AGX_CDM_MODE_DIRECT:
548          CDM_PRINT(GLOBAL_SIZE, "Global size");
549          CDM_PRINT(LOCAL_SIZE, "Local size");
550          break;
551       case AGX_CDM_MODE_INDIRECT_GLOBAL:
552          CDM_PRINT(INDIRECT, "Indirect buffer");
553          CDM_PRINT(LOCAL_SIZE, "Local size");
554          break;
555       case AGX_CDM_MODE_INDIRECT_LOCAL:
556          CDM_PRINT(INDIRECT, "Indirect buffer");
557          break;
558       default:
559          fprintf(agxdecode_dump_stream, "Unknown CDM mode: %u\n", hdr0.mode);
560          break;
561       }
562 
563       return length;
564    }
565 
566    case AGX_CDM_BLOCK_TYPE_STREAM_LINK: {
567       agx_unpack(agxdecode_dump_stream, map, CDM_STREAM_LINK, hdr);
568       DUMP_UNPACKED(CDM_STREAM_LINK, hdr, "Stream Link\n");
569       *link = hdr.target_lo | (((uint64_t)hdr.target_hi) << 32);
570       return hdr.with_return ? STATE_CALL : STATE_LINK;
571    }
572 
573    case AGX_CDM_BLOCK_TYPE_STREAM_TERMINATE: {
574       DUMP_CL(CDM_STREAM_TERMINATE, map, "Stream Terminate");
575       return STATE_DONE;
576    }
577 
578    case AGX_CDM_BLOCK_TYPE_STREAM_RETURN: {
579       DUMP_CL(CDM_STREAM_RETURN, map, "Stream Return");
580       return STATE_RET;
581    }
582 
583    case AGX_CDM_BLOCK_TYPE_BARRIER: {
584       DUMP_CL(CDM_BARRIER, map, "Barrier");
585       return AGX_CDM_BARRIER_LENGTH;
586    }
587 
588    default:
589       fprintf(agxdecode_dump_stream, "Unknown CDM block type: %u\n",
590               block_type);
591       u_hexdump(agxdecode_dump_stream, map, 8, false);
592       return 8;
593    }
594 }
595 
596 static unsigned
agxdecode_vdm(struct agxdecode_ctx * ctx,const uint8_t * map,uint64_t * link,bool verbose,decoder_params * params,UNUSED void * data)597 agxdecode_vdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link,
598               bool verbose, decoder_params *params, UNUSED void *data)
599 {
600    /* Bits 29-31 contain the block type */
601    enum agx_vdm_block_type block_type = (map[3] >> 5);
602 
603    switch (block_type) {
604    case AGX_VDM_BLOCK_TYPE_BARRIER: {
605       agx_unpack(agxdecode_dump_stream, map, VDM_BARRIER, hdr);
606       DUMP_UNPACKED(VDM_BARRIER, hdr, "Barrier\n");
607       return hdr.returns ? STATE_RET : AGX_VDM_BARRIER_LENGTH;
608    }
609 
610    case AGX_VDM_BLOCK_TYPE_PPP_STATE_UPDATE: {
611       agx_unpack(agxdecode_dump_stream, map, PPP_STATE, cmd);
612 
613       uint64_t address = (((uint64_t)cmd.pointer_hi) << 32) | cmd.pointer_lo;
614 
615       if (!lib_config.read_gpu_mem) {
616          struct agx_bo *mem =
617             agxdecode_find_mapped_gpu_mem_containing(ctx, address);
618 
619          if (!mem) {
620             DUMP_UNPACKED(PPP_STATE, cmd, "Non-existent record (XXX)\n");
621             return AGX_PPP_STATE_LENGTH;
622          }
623       }
624 
625       agxdecode_record(ctx, address, cmd.size_words * 4, verbose, params);
626       return AGX_PPP_STATE_LENGTH;
627    }
628 
629    case AGX_VDM_BLOCK_TYPE_VDM_STATE_UPDATE: {
630       size_t length = AGX_VDM_STATE_LENGTH;
631       agx_unpack(agxdecode_dump_stream, map, VDM_STATE, hdr);
632       map += AGX_VDM_STATE_LENGTH;
633 
634 #define VDM_PRINT(header_name, STRUCT_NAME, human)                             \
635    if (hdr.header_name##_present) {                                            \
636       DUMP_CL(VDM_STATE_##STRUCT_NAME, map, human);                            \
637       map += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH;                             \
638       length += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH;                          \
639    }
640 
641       VDM_PRINT(restart_index, RESTART_INDEX, "Restart index");
642 
643       /* If word 1 is present but word 0 is not, fallback to compact samplers */
644       enum agx_sampler_states sampler_states = 0;
645 
646       if (hdr.vertex_shader_word_0_present) {
647          agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_0,
648                     word_0);
649          sampler_states = word_0.sampler_state_register_count;
650       }
651 
652       VDM_PRINT(vertex_shader_word_0, VERTEX_SHADER_WORD_0,
653                 "Vertex shader word 0");
654 
655       if (hdr.vertex_shader_word_1_present) {
656          agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_1,
657                     word_1);
658          fprintf(agxdecode_dump_stream, "Pipeline %X\n",
659                  (uint32_t)word_1.pipeline);
660          agxdecode_stateful(ctx, decode_usc(ctx, word_1.pipeline), "Pipeline",
661                             agxdecode_usc, verbose, params, &sampler_states);
662       }
663 
664       VDM_PRINT(vertex_shader_word_1, VERTEX_SHADER_WORD_1,
665                 "Vertex shader word 1");
666       VDM_PRINT(vertex_outputs, VERTEX_OUTPUTS, "Vertex outputs");
667       VDM_PRINT(tessellation, TESSELLATION, "Tessellation");
668       VDM_PRINT(vertex_unknown, VERTEX_UNKNOWN, "Vertex unknown");
669       VDM_PRINT(tessellation_scale, TESSELLATION_SCALE, "Tessellation scale");
670 
671 #undef VDM_PRINT
672       return hdr.tessellation_scale_present ? length : ALIGN_POT(length, 8);
673    }
674 
675    case AGX_VDM_BLOCK_TYPE_INDEX_LIST: {
676       size_t length = AGX_INDEX_LIST_LENGTH;
677       agx_unpack(agxdecode_dump_stream, map, INDEX_LIST, hdr);
678       DUMP_UNPACKED(INDEX_LIST, hdr, "Index List\n");
679       map += AGX_INDEX_LIST_LENGTH;
680 
681 #define IDX_PRINT(header_name, STRUCT_NAME, human)                             \
682    if (hdr.header_name##_present) {                                            \
683       DUMP_CL(INDEX_LIST_##STRUCT_NAME, map, human);                           \
684       map += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH;                            \
685       length += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH;                         \
686    }
687 
688       IDX_PRINT(index_buffer, BUFFER_LO, "Index buffer");
689       IDX_PRINT(index_count, COUNT, "Index count");
690       IDX_PRINT(instance_count, INSTANCES, "Instance count");
691       IDX_PRINT(start, START, "Start");
692       IDX_PRINT(indirect_buffer, INDIRECT_BUFFER, "Indirect buffer");
693       IDX_PRINT(index_buffer_size, BUFFER_SIZE, "Index buffer size");
694 
695 #undef IDX_PRINT
696       return length;
697    }
698 
699    case AGX_VDM_BLOCK_TYPE_STREAM_LINK: {
700       agx_unpack(agxdecode_dump_stream, map, VDM_STREAM_LINK, hdr);
701       DUMP_UNPACKED(VDM_STREAM_LINK, hdr, "Stream Link\n");
702       *link = hdr.target_lo | (((uint64_t)hdr.target_hi) << 32);
703       return hdr.with_return ? STATE_CALL : STATE_LINK;
704    }
705 
706    case AGX_VDM_BLOCK_TYPE_STREAM_TERMINATE: {
707       DUMP_CL(VDM_STREAM_TERMINATE, map, "Stream Terminate");
708       return STATE_DONE;
709    }
710 
711    case AGX_VDM_BLOCK_TYPE_TESSELLATE: {
712       size_t length = AGX_VDM_TESSELLATE_LENGTH;
713       agx_unpack(agxdecode_dump_stream, map, VDM_TESSELLATE, hdr);
714       DUMP_UNPACKED(VDM_TESSELLATE, hdr, "Tessellate List\n");
715       map += AGX_VDM_TESSELLATE_LENGTH;
716 
717 #define TESS_PRINT(header_name, STRUCT_NAME, human)                            \
718    if (hdr.header_name##_present) {                                            \
719       DUMP_CL(VDM_TESSELLATE_##STRUCT_NAME, map, human);                       \
720       map += AGX_VDM_TESSELLATE_##STRUCT_NAME##_LENGTH;                        \
721       length += AGX_VDM_TESSELLATE_##STRUCT_NAME##_LENGTH;                     \
722    }
723 
724       TESS_PRINT(factor_buffer, FACTOR_BUFFER, "Factor buffer");
725       TESS_PRINT(patch_count, PATCH_COUNT, "Patch");
726       TESS_PRINT(instance_count, INSTANCE_COUNT, "Instance count");
727       TESS_PRINT(base_patch, BASE_PATCH, "Base patch");
728       TESS_PRINT(base_instance, BASE_INSTANCE, "Base instance");
729       TESS_PRINT(instance_stride, INSTANCE_STRIDE, "Instance stride");
730       TESS_PRINT(indirect, INDIRECT, "Indirect");
731       TESS_PRINT(factor_buffer_size, FACTOR_BUFFER_SIZE, "Factor buffer size");
732 
733 #undef TESS_PRINT
734       return length;
735    }
736 
737    default:
738       fprintf(agxdecode_dump_stream, "Unknown VDM block type: %u\n",
739               block_type);
740       u_hexdump(agxdecode_dump_stream, map, 8, false);
741       return 8;
742    }
743 }
744 
745 #if __APPLE__
746 static void
agxdecode_cs(struct agxdecode_ctx * ctx,uint32_t * cmdbuf,uint64_t encoder,bool verbose,decoder_params * params)747 agxdecode_cs(struct agxdecode_ctx *ctx, uint32_t *cmdbuf, uint64_t encoder,
748              bool verbose, decoder_params *params)
749 {
750    agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_COMPUTE, cs);
751    DUMP_UNPACKED(IOGPU_COMPUTE, cs, "Compute\n");
752 
753    agxdecode_stateful(ctx, encoder, "Encoder", agxdecode_cdm, verbose, params,
754                       NULL);
755 
756    fprintf(agxdecode_dump_stream, "Context switch program:\n");
757    uint8_t buf[1024];
758    agx_disassemble(buf,
759                    agxdecode_fetch_gpu_array(
760                       ctx, decode_usc(ctx, cs.context_switch_program), buf),
761                    agxdecode_dump_stream);
762 }
763 
764 static void
agxdecode_gfx(struct agxdecode_ctx * ctx,uint32_t * cmdbuf,uint64_t encoder,bool verbose,decoder_params * params)765 agxdecode_gfx(struct agxdecode_ctx *ctx, uint32_t *cmdbuf, uint64_t encoder,
766               bool verbose, decoder_params *params)
767 {
768    agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_GRAPHICS, gfx);
769    DUMP_UNPACKED(IOGPU_GRAPHICS, gfx, "Graphics\n");
770 
771    agxdecode_stateful(ctx, encoder, "Encoder", agxdecode_vdm, verbose, params,
772                       NULL);
773 
774    if (gfx.clear_pipeline_unk) {
775       fprintf(agxdecode_dump_stream, "Unk: %X\n", gfx.clear_pipeline_unk);
776       agxdecode_stateful(ctx, decode_usc(ctx, gfx.clear_pipeline),
777                          "Clear pipeline", agxdecode_usc, verbose, params,
778                          NULL);
779    }
780 
781    if (gfx.store_pipeline_unk) {
782       assert(gfx.store_pipeline_unk == 0x4);
783       agxdecode_stateful(ctx, decode_usc(ctx, gfx.store_pipeline),
784                          "Store pipeline", agxdecode_usc, verbose, params,
785                          NULL);
786    }
787 
788    assert((gfx.partial_reload_pipeline_unk & 0xF) == 0x4);
789    if (gfx.partial_reload_pipeline) {
790       agxdecode_stateful(ctx, decode_usc(ctx, gfx.partial_reload_pipeline),
791                          "Partial reload pipeline", agxdecode_usc, verbose,
792                          params, NULL);
793    }
794 
795    if (gfx.partial_store_pipeline) {
796       agxdecode_stateful(ctx, decode_usc(ctx, gfx.partial_store_pipeline),
797                          "Partial store pipeline", agxdecode_usc, verbose,
798                          params, NULL);
799    }
800 }
801 #endif
802 
803 static void
agxdecode_sampler_heap(struct agxdecode_ctx * ctx,uint64_t heap,unsigned count)804 agxdecode_sampler_heap(struct agxdecode_ctx *ctx, uint64_t heap, unsigned count)
805 {
806    if (!heap)
807       return;
808 
809    struct agx_sampler_packed samp[1024];
810    agxdecode_fetch_gpu_array(ctx, heap, samp);
811 
812    for (unsigned i = 0; i < count; ++i) {
813       bool nonzero = false;
814       for (unsigned j = 0; j < ARRAY_SIZE(samp[i].opaque); ++j) {
815          nonzero |= samp[i].opaque[j] != 0;
816       }
817 
818       if (nonzero) {
819          fprintf(agxdecode_dump_stream, "Heap sampler %u\n", i);
820 
821          agx_unpack(agxdecode_dump_stream, samp + i, SAMPLER, temp);
822          agx_print(agxdecode_dump_stream, SAMPLER, temp, 2);
823       }
824    }
825 }
826 
827 void
agxdecode_image_heap(struct agxdecode_ctx * ctx,uint64_t heap,unsigned nr_entries)828 agxdecode_image_heap(struct agxdecode_ctx *ctx, uint64_t heap,
829                      unsigned nr_entries)
830 {
831    agxdecode_dump_file_open();
832 
833    fprintf(agxdecode_dump_stream, "Image heap:\n");
834    struct agx_texture_packed *map = calloc(nr_entries, AGX_TEXTURE_LENGTH);
835    agxdecode_fetch_gpu_mem(ctx, heap, AGX_TEXTURE_LENGTH * nr_entries, map);
836 
837    for (unsigned i = 0; i < nr_entries; ++i) {
838       bool nonzero = false;
839       for (unsigned j = 0; j < ARRAY_SIZE(map[i].opaque); ++j) {
840          nonzero |= map[i].opaque[j] != 0;
841       }
842 
843       if (nonzero) {
844          fprintf(agxdecode_dump_stream, "%u: \n", i);
845          agxdecode_texture_pbe(ctx, map + i);
846          fprintf(agxdecode_dump_stream, "\n");
847       }
848    }
849 
850    free(map);
851 }
852 
853 static void
agxdecode_helper(struct agxdecode_ctx * ctx,const char * prefix,uint64_t helper)854 agxdecode_helper(struct agxdecode_ctx *ctx, const char *prefix, uint64_t helper)
855 {
856    if (helper & 1) {
857       fprintf(agxdecode_dump_stream, "%s helper program:\n", prefix);
858       uint8_t buf[1024];
859       agx_disassemble(
860          buf, agxdecode_fetch_gpu_array(ctx, decode_usc(ctx, helper & ~1), buf),
861          agxdecode_dump_stream);
862    }
863 }
864 
865 void
agxdecode_drm_cmd_render(struct agxdecode_ctx * ctx,struct drm_asahi_params_global * params,struct drm_asahi_cmd_render * c,bool verbose)866 agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx,
867                          struct drm_asahi_params_global *params,
868                          struct drm_asahi_cmd_render *c, bool verbose)
869 {
870    agxdecode_dump_file_open();
871 
872    DUMP_FIELD(c, "%llx", flags);
873    DUMP_FIELD(c, "0x%llx", encoder_ptr);
874    agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_vdm, verbose,
875                       params, NULL);
876    DUMP_FIELD(c, "0x%x", encoder_id);
877    DUMP_FIELD(c, "0x%x", cmd_ta_id);
878    DUMP_FIELD(c, "0x%x", cmd_3d_id);
879    DUMP_FIELD(c, "0x%x", ppp_ctrl);
880    DUMP_FIELD(c, "0x%llx", ppp_multisamplectl);
881    DUMP_CL(ZLS_CONTROL, &c->zls_ctrl, "ZLS Control");
882    DUMP_FIELD(c, "0x%llx", depth_buffer_load);
883    DUMP_FIELD(c, "0x%llx", depth_buffer_store);
884    DUMP_FIELD(c, "0x%llx", depth_buffer_partial);
885    DUMP_FIELD(c, "0x%llx", stencil_buffer_load);
886    DUMP_FIELD(c, "0x%llx", stencil_buffer_store);
887    DUMP_FIELD(c, "0x%llx", stencil_buffer_partial);
888    DUMP_FIELD(c, "0x%llx", scissor_array);
889    DUMP_FIELD(c, "0x%llx", depth_bias_array);
890    DUMP_FIELD(c, "%d", fb_width);
891    DUMP_FIELD(c, "%d", fb_height);
892    DUMP_FIELD(c, "%d", layers);
893    DUMP_FIELD(c, "%d", samples);
894    DUMP_FIELD(c, "%d", sample_size);
895    DUMP_FIELD(c, "%d", tib_blocks);
896    DUMP_FIELD(c, "%d", utile_width);
897    DUMP_FIELD(c, "%d", utile_height);
898    DUMP_FIELD(c, "0x%x", load_pipeline);
899    DUMP_FIELD(c, "0x%x", load_pipeline_bind);
900    agxdecode_stateful(ctx, decode_usc(ctx, c->load_pipeline & ~0x7),
901                       "Load pipeline", agxdecode_usc, verbose, params, NULL);
902    DUMP_FIELD(c, "0x%x", store_pipeline);
903    DUMP_FIELD(c, "0x%x", store_pipeline_bind);
904    agxdecode_stateful(ctx, decode_usc(ctx, c->store_pipeline & ~0x7),
905                       "Store pipeline", agxdecode_usc, verbose, params, NULL);
906    DUMP_FIELD(c, "0x%x", partial_reload_pipeline);
907    DUMP_FIELD(c, "0x%x", partial_reload_pipeline_bind);
908    agxdecode_stateful(ctx, decode_usc(ctx, c->partial_reload_pipeline & ~0x7),
909                       "Partial reload pipeline", agxdecode_usc, verbose, params,
910                       NULL);
911    DUMP_FIELD(c, "0x%x", partial_store_pipeline);
912    DUMP_FIELD(c, "0x%x", partial_store_pipeline_bind);
913    agxdecode_stateful(ctx, decode_usc(ctx, c->partial_store_pipeline & ~0x7),
914                       "Partial store pipeline", agxdecode_usc, verbose, params,
915                       NULL);
916 
917    DUMP_FIELD(c, "0x%x", depth_dimensions);
918    DUMP_FIELD(c, "0x%x", isp_bgobjdepth);
919    DUMP_FIELD(c, "0x%x", isp_bgobjvals);
920 
921    agxdecode_sampler_heap(ctx, c->vertex_sampler_array,
922                           c->vertex_sampler_count);
923 
924    /* Linux driver doesn't use this, at least for now */
925    assert(c->fragment_sampler_array == c->vertex_sampler_array);
926    assert(c->fragment_sampler_count == c->vertex_sampler_count);
927 
928    DUMP_FIELD(c, "%d", vertex_attachment_count);
929    struct drm_asahi_attachment *vertex_attachments =
930       (void *)(uintptr_t)c->vertex_attachments;
931    for (unsigned i = 0; i < c->vertex_attachment_count; i++) {
932       DUMP_FIELD((&vertex_attachments[i]), "0x%x", order);
933       DUMP_FIELD((&vertex_attachments[i]), "0x%llx", size);
934       DUMP_FIELD((&vertex_attachments[i]), "0x%llx", pointer);
935    }
936    DUMP_FIELD(c, "%d", fragment_attachment_count);
937    struct drm_asahi_attachment *fragment_attachments =
938       (void *)(uintptr_t)c->fragment_attachments;
939    for (unsigned i = 0; i < c->fragment_attachment_count; i++) {
940       DUMP_FIELD((&fragment_attachments[i]), "0x%x", order);
941       DUMP_FIELD((&fragment_attachments[i]), "0x%llx", size);
942       DUMP_FIELD((&fragment_attachments[i]), "0x%llx", pointer);
943    }
944 
945    agxdecode_helper(ctx, "Vertex", c->vertex_helper_program);
946    agxdecode_helper(ctx, "Fragment", c->fragment_helper_program);
947 }
948 
949 void
agxdecode_drm_cmd_compute(struct agxdecode_ctx * ctx,struct drm_asahi_params_global * params,struct drm_asahi_cmd_compute * c,bool verbose)950 agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx,
951                           struct drm_asahi_params_global *params,
952                           struct drm_asahi_cmd_compute *c, bool verbose)
953 {
954    agxdecode_dump_file_open();
955 
956    DUMP_FIELD(c, "%llx", flags);
957    DUMP_FIELD(c, "0x%llx", encoder_ptr);
958    agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_cdm, verbose,
959                       params, NULL);
960    DUMP_FIELD(c, "0x%x", encoder_id);
961    DUMP_FIELD(c, "0x%x", cmd_id);
962 
963    agxdecode_sampler_heap(ctx, c->sampler_array, c->sampler_count);
964    agxdecode_helper(ctx, "Compute", c->helper_program);
965 }
966 
967 static void
chip_id_to_params(decoder_params * params,uint32_t chip_id)968 chip_id_to_params(decoder_params *params, uint32_t chip_id)
969 {
970    switch (chip_id) {
971    case 0x6000 ... 0x6002:
972       *params = (decoder_params){
973          .gpu_generation = 13,
974          .gpu_variant = "SCD"[chip_id & 15],
975          .chip_id = chip_id,
976          .num_clusters_total = 2 << (chip_id & 15),
977       };
978       break;
979    case 0x6020 ... 0x6022:
980       *params = (decoder_params){
981          .gpu_generation = 14,
982          .gpu_variant = "SCD"[chip_id & 15],
983          .chip_id = chip_id,
984          .num_clusters_total = 2 << (chip_id & 15),
985       };
986       break;
987    case 0x8112:
988       *params = (decoder_params){
989          .gpu_generation = 14,
990          .gpu_variant = 'G',
991          .chip_id = chip_id,
992          .num_clusters_total = 1,
993       };
994       break;
995    case 0x8103:
996    default:
997       *params = (decoder_params){
998          .gpu_generation = 13,
999          .gpu_variant = 'G',
1000          .chip_id = chip_id,
1001          .num_clusters_total = 1,
1002       };
1003       break;
1004    }
1005 }
1006 
1007 #ifdef __APPLE__
1008 
1009 void
agxdecode_cmdstream(struct agxdecode_ctx * ctx,unsigned cmdbuf_handle,unsigned map_handle,bool verbose)1010 agxdecode_cmdstream(struct agxdecode_ctx *ctx, unsigned cmdbuf_handle,
1011                     unsigned map_handle, bool verbose)
1012 {
1013    agxdecode_dump_file_open();
1014 
1015    struct agx_bo *cmdbuf =
1016       agxdecode_find_handle(cmdbuf_handle, AGX_ALLOC_CMDBUF);
1017    struct agx_bo *map = agxdecode_find_handle(map_handle, AGX_ALLOC_MEMMAP);
1018    assert(cmdbuf != NULL && "nonexistent command buffer");
1019    assert(map != NULL && "nonexistent mapping");
1020 
1021    /* Print the IOGPU stuff */
1022    agx_unpack(agxdecode_dump_stream, cmdbuf->map, IOGPU_HEADER, cmd);
1023    DUMP_UNPACKED(IOGPU_HEADER, cmd, "IOGPU Header\n");
1024 
1025    DUMP_CL(IOGPU_ATTACHMENT_COUNT,
1026            ((uint8_t *)cmdbuf->map + cmd.attachment_offset),
1027            "Attachment count");
1028 
1029    uint32_t *attachments =
1030       (uint32_t *)((uint8_t *)cmdbuf->map + cmd.attachment_offset);
1031    unsigned attachment_count = attachments[3];
1032    for (unsigned i = 0; i < attachment_count; ++i) {
1033       uint32_t *ptr = attachments + 4 + (i * AGX_IOGPU_ATTACHMENT_LENGTH / 4);
1034       DUMP_CL(IOGPU_ATTACHMENT, ptr, "Attachment");
1035    }
1036 
1037    struct drm_asahi_params_global params;
1038 
1039    chip_id_to_params(&params, 0x8103);
1040 
1041    if (cmd.unk_5 == 3)
1042       agxdecode_cs((uint32_t *)cmdbuf->map, cmd.encoder, verbose, &params);
1043    else
1044       agxdecode_gfx((uint32_t *)cmdbuf->map, cmd.encoder, verbose, &params);
1045 }
1046 
1047 #endif
1048 
1049 void
agxdecode_track_alloc(struct agxdecode_ctx * ctx,struct agx_bo * alloc)1050 agxdecode_track_alloc(struct agxdecode_ctx *ctx, struct agx_bo *alloc)
1051 {
1052    util_dynarray_foreach(&ctx->mmap_array, struct agx_bo, it) {
1053       bool match = (it->handle == alloc->handle);
1054       assert(!match && "tried to alloc already allocated BO");
1055    }
1056 
1057    util_dynarray_append(&ctx->mmap_array, struct agx_bo, *alloc);
1058 }
1059 
1060 void
agxdecode_track_free(struct agxdecode_ctx * ctx,struct agx_bo * bo)1061 agxdecode_track_free(struct agxdecode_ctx *ctx, struct agx_bo *bo)
1062 {
1063    bool found = false;
1064 
1065    util_dynarray_foreach(&ctx->mmap_array, struct agx_bo, it) {
1066       if (it->handle == bo->handle) {
1067          assert(!found && "mapped multiple times!");
1068          found = true;
1069 
1070          memset(it, 0, sizeof(*it));
1071       }
1072    }
1073 
1074    assert(found && "freed unmapped memory");
1075 }
1076 
1077 static int agxdecode_dump_frame_count = 0;
1078 
1079 void
agxdecode_dump_file_open(void)1080 agxdecode_dump_file_open(void)
1081 {
1082    if (agxdecode_dump_stream)
1083       return;
1084 
1085    /* This does a getenv every frame, so it is possible to use
1086     * setenv to change the base at runtime.
1087     */
1088    const char *dump_file_base =
1089       getenv("AGXDECODE_DUMP_FILE") ?: "agxdecode.dump";
1090    if (!strcmp(dump_file_base, "stderr"))
1091       agxdecode_dump_stream = stderr;
1092    else {
1093       char buffer[1024];
1094       snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base,
1095                agxdecode_dump_frame_count);
1096       printf("agxdecode: dump command stream to file %s\n", buffer);
1097       agxdecode_dump_stream = fopen(buffer, "w");
1098       if (!agxdecode_dump_stream) {
1099          fprintf(stderr,
1100                  "agxdecode: failed to open command stream log file %s\n",
1101                  buffer);
1102       }
1103    }
1104 }
1105 
1106 static void
agxdecode_dump_file_close(void)1107 agxdecode_dump_file_close(void)
1108 {
1109    if (agxdecode_dump_stream && agxdecode_dump_stream != stderr) {
1110       fclose(agxdecode_dump_stream);
1111       agxdecode_dump_stream = NULL;
1112    }
1113 }
1114 
1115 void
agxdecode_next_frame(void)1116 agxdecode_next_frame(void)
1117 {
1118    agxdecode_dump_file_close();
1119    agxdecode_dump_frame_count++;
1120 }
1121 
1122 void
agxdecode_close(void)1123 agxdecode_close(void)
1124 {
1125    agxdecode_dump_file_close();
1126 }
1127 
1128 static ssize_t
libagxdecode_writer(void * cookie,const char * buffer,size_t size)1129 libagxdecode_writer(void *cookie, const char *buffer, size_t size)
1130 {
1131    return lib_config.stream_write(buffer, size);
1132 }
1133 
1134 #ifdef _GNU_SOURCE
1135 static cookie_io_functions_t funcs = {.write = libagxdecode_writer};
1136 #endif
1137 
1138 static decoder_params lib_params;
1139 
1140 void
libagxdecode_init(struct libagxdecode_config * config)1141 libagxdecode_init(struct libagxdecode_config *config)
1142 {
1143 #ifdef _GNU_SOURCE
1144    lib_config = *config;
1145    agxdecode_dump_stream = fopencookie(NULL, "w", funcs);
1146 
1147    chip_id_to_params(&lib_params, config->chip_id);
1148 #else
1149    /* fopencookie is a glibc extension */
1150    unreachable("libagxdecode only available with glibc");
1151 #endif
1152 }
1153 
1154 void
libagxdecode_vdm(struct agxdecode_ctx * ctx,uint64_t addr,const char * label,bool verbose)1155 libagxdecode_vdm(struct agxdecode_ctx *ctx, uint64_t addr, const char *label,
1156                  bool verbose)
1157 {
1158    agxdecode_stateful(ctx, addr, label, agxdecode_vdm, verbose, &lib_params,
1159                       NULL);
1160 }
1161 
1162 void
libagxdecode_cdm(struct agxdecode_ctx * ctx,uint64_t addr,const char * label,bool verbose)1163 libagxdecode_cdm(struct agxdecode_ctx *ctx, uint64_t addr, const char *label,
1164                  bool verbose)
1165 {
1166    agxdecode_stateful(ctx, addr, label, agxdecode_cdm, verbose, &lib_params,
1167                       NULL);
1168 }
1169 void
libagxdecode_usc(struct agxdecode_ctx * ctx,uint64_t addr,const char * label,bool verbose)1170 libagxdecode_usc(struct agxdecode_ctx *ctx, uint64_t addr, const char *label,
1171                  bool verbose)
1172 {
1173    agxdecode_stateful(ctx, addr, label, agxdecode_usc, verbose, &lib_params,
1174                       NULL);
1175 }
1176 void
libagxdecode_shutdown(void)1177 libagxdecode_shutdown(void)
1178 {
1179    agxdecode_dump_file_close();
1180 }
1181