• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017-2019 Alyssa Rosenzweig
3  * Copyright 2017-2019 Connor Abbott
4  * Copyright 2019 Collabora, Ltd.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include <ctype.h>
9 #include <memory.h>
10 #include <stdarg.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <sys/mman.h>
15 #include <agx_pack.h>
16 
17 #include "util/u_hexdump.h"
18 #include "decode.h"
19 #ifdef __APPLE__
20 #include "agx_iokit.h"
21 #endif
22 
23 /* Pending UAPI */
24 struct drm_asahi_params_global {
25    int gpu_generation;
26    int gpu_variant;
27    int chip_id;
28    int num_clusters_total;
29 };
30 
31 struct libagxdecode_config lib_config;
32 
33 UNUSED static const char *agx_alloc_types[AGX_NUM_ALLOC] = {"mem", "map",
34                                                             "cmd"};
35 
36 static void
agx_disassemble(void * _code,size_t maxlen,FILE * fp)37 agx_disassemble(void *_code, size_t maxlen, FILE *fp)
38 {
39    /* stub */
40 }
41 
42 FILE *agxdecode_dump_stream;
43 
44 #define MAX_MAPPINGS 4096
45 
46 struct agx_bo mmap_array[MAX_MAPPINGS];
47 unsigned mmap_count = 0;
48 
49 struct agx_bo *ro_mappings[MAX_MAPPINGS];
50 unsigned ro_mapping_count = 0;
51 
52 static struct agx_bo *
agxdecode_find_mapped_gpu_mem_containing_rw(uint64_t addr)53 agxdecode_find_mapped_gpu_mem_containing_rw(uint64_t addr)
54 {
55    for (unsigned i = 0; i < mmap_count; ++i) {
56       if (mmap_array[i].type == AGX_ALLOC_REGULAR &&
57           addr >= mmap_array[i].ptr.gpu &&
58           (addr - mmap_array[i].ptr.gpu) < mmap_array[i].size)
59          return mmap_array + i;
60    }
61 
62    return NULL;
63 }
64 
65 static struct agx_bo *
agxdecode_find_mapped_gpu_mem_containing(uint64_t addr)66 agxdecode_find_mapped_gpu_mem_containing(uint64_t addr)
67 {
68    struct agx_bo *mem = agxdecode_find_mapped_gpu_mem_containing_rw(addr);
69 
70    if (mem && mem->ptr.cpu && !mem->ro) {
71       mprotect(mem->ptr.cpu, mem->size, PROT_READ);
72       mem->ro = true;
73       ro_mappings[ro_mapping_count++] = mem;
74       assert(ro_mapping_count < MAX_MAPPINGS);
75    }
76 
77    if (mem && !mem->mapped) {
78       fprintf(stderr,
79               "[ERROR] access to memory not mapped (GPU %" PRIx64
80               ", handle %u)\n",
81               mem->ptr.gpu, mem->handle);
82    }
83 
84    return mem;
85 }
86 
87 static struct agx_bo *
agxdecode_find_handle(unsigned handle,unsigned type)88 agxdecode_find_handle(unsigned handle, unsigned type)
89 {
90    for (unsigned i = 0; i < mmap_count; ++i) {
91       if (mmap_array[i].type != type)
92          continue;
93 
94       if (mmap_array[i].handle != handle)
95          continue;
96 
97       return &mmap_array[i];
98    }
99 
100    return NULL;
101 }
102 
103 static void
agxdecode_mark_mapped(unsigned handle)104 agxdecode_mark_mapped(unsigned handle)
105 {
106    struct agx_bo *bo = agxdecode_find_handle(handle, AGX_ALLOC_REGULAR);
107 
108    if (!bo) {
109       fprintf(stderr, "ERROR - unknown BO mapped with handle %u\n", handle);
110       return;
111    }
112 
113    /* Mark mapped for future consumption */
114    bo->mapped = true;
115 }
116 
117 #ifdef __APPLE__
118 
119 static void
agxdecode_decode_segment_list(void * segment_list)120 agxdecode_decode_segment_list(void *segment_list)
121 {
122    unsigned nr_handles = 0;
123 
124    /* First, mark everything unmapped */
125    for (unsigned i = 0; i < mmap_count; ++i)
126       mmap_array[i].mapped = false;
127 
128    /* Check the header */
129    struct agx_map_header *hdr = segment_list;
130    if (hdr->resource_group_count == 0) {
131       fprintf(agxdecode_dump_stream, "ERROR - empty map\n");
132       return;
133    }
134 
135    if (hdr->segment_count != 1) {
136       fprintf(agxdecode_dump_stream, "ERROR - can't handle segment count %u\n",
137               hdr->segment_count);
138    }
139 
140    fprintf(agxdecode_dump_stream, "Segment list:\n");
141    fprintf(agxdecode_dump_stream, "  Command buffer shmem ID: %" PRIx64 "\n",
142            hdr->cmdbuf_id);
143    fprintf(agxdecode_dump_stream, "  Encoder ID: %" PRIx64 "\n",
144            hdr->encoder_id);
145    fprintf(agxdecode_dump_stream, "  Kernel commands start offset: %u\n",
146            hdr->kernel_commands_start_offset);
147    fprintf(agxdecode_dump_stream, "  Kernel commands end offset: %u\n",
148            hdr->kernel_commands_end_offset);
149    fprintf(agxdecode_dump_stream, "  Unknown: 0x%X\n", hdr->unk);
150 
151    /* Expected structure: header followed by resource groups */
152    size_t length = sizeof(struct agx_map_header);
153    length += sizeof(struct agx_map_entry) * hdr->resource_group_count;
154 
155    if (length != hdr->length) {
156       fprintf(agxdecode_dump_stream, "ERROR: expected length %zu, got %u\n",
157               length, hdr->length);
158    }
159 
160    if (hdr->padding[0] || hdr->padding[1])
161       fprintf(agxdecode_dump_stream, "ERROR - padding tripped\n");
162 
163    /* Check the entries */
164    struct agx_map_entry *groups = ((void *)hdr) + sizeof(*hdr);
165    for (unsigned i = 0; i < hdr->resource_group_count; ++i) {
166       struct agx_map_entry group = groups[i];
167       unsigned count = group.resource_count;
168 
169       STATIC_ASSERT(ARRAY_SIZE(group.resource_id) == 6);
170       STATIC_ASSERT(ARRAY_SIZE(group.resource_unk) == 6);
171       STATIC_ASSERT(ARRAY_SIZE(group.resource_flags) == 6);
172 
173       if ((count < 1) || (count > 6)) {
174          fprintf(agxdecode_dump_stream, "ERROR - invalid count %u\n", count);
175          continue;
176       }
177 
178       for (unsigned j = 0; j < count; ++j) {
179          unsigned handle = group.resource_id[j];
180          unsigned unk = group.resource_unk[j];
181          unsigned flags = group.resource_flags[j];
182 
183          if (!handle) {
184             fprintf(agxdecode_dump_stream, "ERROR - invalid handle %u\n",
185                     handle);
186             continue;
187          }
188 
189          agxdecode_mark_mapped(handle);
190          nr_handles++;
191 
192          fprintf(agxdecode_dump_stream, "%u (0x%X, 0x%X)\n", handle, unk,
193                  flags);
194       }
195 
196       if (group.unka)
197          fprintf(agxdecode_dump_stream, "ERROR - unknown 0x%X\n", group.unka);
198 
199       /* Visual separator for resource groups */
200       fprintf(agxdecode_dump_stream, "\n");
201    }
202 
203    /* Check the handle count */
204    if (nr_handles != hdr->total_resources) {
205       fprintf(agxdecode_dump_stream,
206               "ERROR - wrong handle count, got %u, expected %u (%u entries)\n",
207               nr_handles, hdr->total_resources, hdr->resource_group_count);
208    }
209 }
210 
211 #endif
212 
213 static size_t
__agxdecode_fetch_gpu_mem(const struct agx_bo * mem,uint64_t gpu_va,size_t size,void * buf,int line,const char * filename)214 __agxdecode_fetch_gpu_mem(const struct agx_bo *mem, uint64_t gpu_va,
215                           size_t size, void *buf, int line,
216                           const char *filename)
217 {
218    if (lib_config.read_gpu_mem)
219       return lib_config.read_gpu_mem(gpu_va, size, buf);
220 
221    if (!mem)
222       mem = agxdecode_find_mapped_gpu_mem_containing(gpu_va);
223 
224    if (!mem) {
225       fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", gpu_va,
226               filename, line);
227       fflush(agxdecode_dump_stream);
228       assert(0);
229    }
230 
231    assert(mem);
232 
233    if (size + (gpu_va - mem->ptr.gpu) > mem->size) {
234       fprintf(stderr,
235               "Overflowing to unknown memory %" PRIx64
236               " of size %zu (max size %zu) in %s:%d\n",
237               gpu_va, size, (size_t)(mem->size - (gpu_va - mem->ptr.gpu)),
238               filename, line);
239       fflush(agxdecode_dump_stream);
240       assert(0);
241    }
242 
243    memcpy(buf, mem->ptr.cpu + gpu_va - mem->ptr.gpu, size);
244 
245    return size;
246 }
247 
248 #define agxdecode_fetch_gpu_mem(gpu_va, size, buf)                             \
249    __agxdecode_fetch_gpu_mem(NULL, gpu_va, size, buf, __LINE__, __FILE__)
250 
251 #define agxdecode_fetch_gpu_array(gpu_va, buf)                                 \
252    agxdecode_fetch_gpu_mem(gpu_va, sizeof(buf), buf)
253 
254 static void
agxdecode_map_read_write(void)255 agxdecode_map_read_write(void)
256 {
257    for (unsigned i = 0; i < ro_mapping_count; ++i) {
258       ro_mappings[i]->ro = false;
259       mprotect(ro_mappings[i]->ptr.cpu, ro_mappings[i]->size,
260                PROT_READ | PROT_WRITE);
261    }
262 
263    ro_mapping_count = 0;
264 }
265 
266 /* Helpers for parsing the cmdstream */
267 
268 #define DUMP_UNPACKED(T, var, str)                                             \
269    {                                                                           \
270       agxdecode_log(str);                                                      \
271       agx_print(agxdecode_dump_stream, T, var, (agxdecode_indent + 1) * 2);    \
272    }
273 
274 #define DUMP_CL(T, cl, str)                                                    \
275    {                                                                           \
276       agx_unpack(agxdecode_dump_stream, cl, T, temp);                          \
277       DUMP_UNPACKED(T, temp, str "\n");                                        \
278    }
279 
280 #define agxdecode_log(str) fputs(str, agxdecode_dump_stream)
281 #define agxdecode_msg(str) fprintf(agxdecode_dump_stream, "// %s", str)
282 
283 unsigned agxdecode_indent = 0;
284 
285 typedef struct drm_asahi_params_global decoder_params;
286 
287 /* Abstraction for command stream parsing */
288 typedef unsigned (*decode_cmd)(const uint8_t *map, uint64_t *link, bool verbose,
289                                decoder_params *params, void *data);
290 
291 #define STATE_DONE (0xFFFFFFFFu)
292 #define STATE_LINK (0xFFFFFFFEu)
293 #define STATE_CALL (0xFFFFFFFDu)
294 #define STATE_RET  (0xFFFFFFFCu)
295 
296 static void
agxdecode_stateful(uint64_t va,const char * label,decode_cmd decoder,bool verbose,decoder_params * params,void * data)297 agxdecode_stateful(uint64_t va, const char *label, decode_cmd decoder,
298                    bool verbose, decoder_params *params, void *data)
299 {
300    uint64_t stack[16];
301    unsigned sp = 0;
302 
303    uint8_t buf[1024];
304    if (!lib_config.read_gpu_mem) {
305       struct agx_bo *alloc = agxdecode_find_mapped_gpu_mem_containing(va);
306       assert(alloc != NULL && "nonexistent object");
307       fprintf(agxdecode_dump_stream, "%s (%" PRIx64 ", handle %u)\n", label, va,
308               alloc->handle);
309    } else {
310       fprintf(agxdecode_dump_stream, "%s (%" PRIx64 ")\n", label, va);
311    }
312    fflush(agxdecode_dump_stream);
313 
314    int len = agxdecode_fetch_gpu_array(va, buf);
315 
316    int left = len;
317    uint8_t *map = buf;
318    uint64_t link = 0;
319 
320    fflush(agxdecode_dump_stream);
321 
322    while (left) {
323       if (len <= 0) {
324          fprintf(agxdecode_dump_stream, "!! Failed to read GPU memory\n");
325          fflush(agxdecode_dump_stream);
326          return;
327       }
328 
329       unsigned count = decoder(map, &link, verbose, params, data);
330 
331       /* If we fail to decode, default to a hexdump (don't hang) */
332       if (count == 0) {
333          u_hexdump(agxdecode_dump_stream, map, 8, false);
334          count = 8;
335       }
336 
337       fflush(agxdecode_dump_stream);
338       if (count == STATE_DONE) {
339          break;
340       } else if (count == STATE_LINK) {
341          fprintf(agxdecode_dump_stream, "Linking to 0x%" PRIx64 "\n\n", link);
342          va = link;
343          left = len = agxdecode_fetch_gpu_array(va, buf);
344          map = buf;
345       } else if (count == STATE_CALL) {
346          fprintf(agxdecode_dump_stream,
347                  "Calling 0x%" PRIx64 " (return = 0x%" PRIx64 ")\n\n", link,
348                  va + 8);
349          assert(sp < ARRAY_SIZE(stack));
350          stack[sp++] = va + 8;
351          va = link;
352          left = len = agxdecode_fetch_gpu_array(va, buf);
353          map = buf;
354       } else if (count == STATE_RET) {
355          assert(sp > 0);
356          va = stack[--sp];
357          fprintf(agxdecode_dump_stream, "Returning to 0x%" PRIx64 "\n\n", va);
358          left = len = agxdecode_fetch_gpu_array(va, buf);
359          map = buf;
360       } else {
361          va += count;
362          map += count;
363          left -= count;
364 
365          if (left < 512 && len == sizeof(buf)) {
366             left = len = agxdecode_fetch_gpu_array(va, buf);
367             map = buf;
368          }
369       }
370    }
371 }
372 
373 static unsigned
agxdecode_usc(const uint8_t * map,UNUSED uint64_t * link,UNUSED bool verbose,decoder_params * params,UNUSED void * data)374 agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose,
375               decoder_params *params, UNUSED void *data)
376 {
377    enum agx_sampler_states *sampler_states = data;
378    enum agx_usc_control type = map[0];
379    uint8_t buf[8192];
380 
381    bool extended_samplers =
382       (sampler_states != NULL) &&
383       (((*sampler_states) == AGX_SAMPLER_STATES_8_EXTENDED) ||
384        ((*sampler_states) == AGX_SAMPLER_STATES_16_EXTENDED));
385 
386 #define USC_CASE(name, human)                                                  \
387    case AGX_USC_CONTROL_##name: {                                              \
388       DUMP_CL(USC_##name, map, human);                                         \
389       return AGX_USC_##name##_LENGTH;                                          \
390    }
391 
392    switch (type) {
393    case AGX_USC_CONTROL_NO_PRESHADER: {
394       DUMP_CL(USC_NO_PRESHADER, map, "No preshader");
395       return STATE_DONE;
396    }
397 
398    case AGX_USC_CONTROL_PRESHADER: {
399       agx_unpack(agxdecode_dump_stream, map, USC_PRESHADER, ctrl);
400       DUMP_UNPACKED(USC_PRESHADER, ctrl, "Preshader\n");
401 
402       agx_disassemble(buf, agxdecode_fetch_gpu_array(ctrl.code, buf),
403                       agxdecode_dump_stream);
404 
405       return STATE_DONE;
406    }
407 
408    case AGX_USC_CONTROL_SHADER: {
409       agx_unpack(agxdecode_dump_stream, map, USC_SHADER, ctrl);
410       DUMP_UNPACKED(USC_SHADER, ctrl, "Shader\n");
411 
412       agxdecode_log("\n");
413       agx_disassemble(buf, agxdecode_fetch_gpu_array(ctrl.code, buf),
414                       agxdecode_dump_stream);
415       agxdecode_log("\n");
416 
417       return AGX_USC_SHADER_LENGTH;
418    }
419 
420    case AGX_USC_CONTROL_SAMPLER: {
421       agx_unpack(agxdecode_dump_stream, map, USC_SAMPLER, temp);
422       DUMP_UNPACKED(USC_SAMPLER, temp, "Sampler state\n");
423 
424       uint8_t buf[(AGX_SAMPLER_LENGTH + AGX_BORDER_LENGTH) * temp.count];
425       uint8_t *samp = buf;
426 
427       agxdecode_fetch_gpu_array(temp.buffer, buf);
428 
429       for (unsigned i = 0; i < temp.count; ++i) {
430          DUMP_CL(SAMPLER, samp, "Sampler");
431          samp += AGX_SAMPLER_LENGTH;
432 
433          if (extended_samplers) {
434             DUMP_CL(BORDER, samp, "Border");
435             samp += AGX_BORDER_LENGTH;
436          }
437       }
438 
439       return AGX_USC_SAMPLER_LENGTH;
440    }
441 
442    case AGX_USC_CONTROL_TEXTURE: {
443       agx_unpack(agxdecode_dump_stream, map, USC_TEXTURE, temp);
444       DUMP_UNPACKED(USC_TEXTURE, temp, "Texture state\n");
445 
446       uint8_t buf[AGX_TEXTURE_LENGTH * temp.count];
447       uint8_t *tex = buf;
448 
449       agxdecode_fetch_gpu_array(temp.buffer, buf);
450 
451       /* Note: samplers only need 8 byte alignment? */
452       for (unsigned i = 0; i < temp.count; ++i) {
453          agx_unpack(agxdecode_dump_stream, tex, TEXTURE, t);
454          DUMP_CL(TEXTURE, tex, "Texture");
455          DUMP_CL(PBE, tex, "PBE");
456 
457          tex += AGX_TEXTURE_LENGTH;
458       }
459 
460       return AGX_USC_TEXTURE_LENGTH;
461    }
462 
463    case AGX_USC_CONTROL_UNIFORM: {
464       agx_unpack(agxdecode_dump_stream, map, USC_UNIFORM, temp);
465       DUMP_UNPACKED(USC_UNIFORM, temp, "Uniform\n");
466 
467       uint8_t buf[2 * temp.size_halfs];
468       agxdecode_fetch_gpu_array(temp.buffer, buf);
469       u_hexdump(agxdecode_dump_stream, buf, 2 * temp.size_halfs, false);
470 
471       return AGX_USC_UNIFORM_LENGTH;
472    }
473 
474    case AGX_USC_CONTROL_UNIFORM_HIGH: {
475       agx_unpack(agxdecode_dump_stream, map, USC_UNIFORM_HIGH, temp);
476       DUMP_UNPACKED(USC_UNIFORM_HIGH, temp, "Uniform (high)\n");
477 
478       uint8_t buf[2 * temp.size_halfs];
479       agxdecode_fetch_gpu_array(temp.buffer, buf);
480       u_hexdump(agxdecode_dump_stream, buf, 2 * temp.size_halfs, false);
481 
482       return AGX_USC_UNIFORM_HIGH_LENGTH;
483    }
484 
485       USC_CASE(FRAGMENT_PROPERTIES, "Fragment properties");
486       USC_CASE(SHARED, "Shared");
487       USC_CASE(REGISTERS, "Registers");
488 
489    default:
490       fprintf(agxdecode_dump_stream, "Unknown USC control type: %u\n", type);
491       u_hexdump(agxdecode_dump_stream, map, 8, false);
492       return 8;
493    }
494 
495 #undef USC_CASE
496 }
497 
498 #define PPP_PRINT(map, header_name, struct_name, human)                        \
499    if (hdr.header_name) {                                                      \
500       if (((map + AGX_##struct_name##_LENGTH) > (base + size))) {              \
501          fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");     \
502          return;                                                               \
503       }                                                                        \
504       DUMP_CL(struct_name, map, human);                                        \
505       map += AGX_##struct_name##_LENGTH;                                       \
506       fflush(agxdecode_dump_stream);                                           \
507    }
508 
509 static void
agxdecode_record(uint64_t va,size_t size,bool verbose,decoder_params * params)510 agxdecode_record(uint64_t va, size_t size, bool verbose, decoder_params *params)
511 {
512    uint8_t buf[size];
513    uint8_t *base = buf;
514    uint8_t *map = base;
515 
516    agxdecode_fetch_gpu_array(va, buf);
517 
518    agx_unpack(agxdecode_dump_stream, map, PPP_HEADER, hdr);
519    map += AGX_PPP_HEADER_LENGTH;
520 
521    PPP_PRINT(map, fragment_control, FRAGMENT_CONTROL, "Fragment control");
522    PPP_PRINT(map, fragment_control_2, FRAGMENT_CONTROL, "Fragment control 2");
523    PPP_PRINT(map, fragment_front_face, FRAGMENT_FACE, "Front face");
524    PPP_PRINT(map, fragment_front_face_2, FRAGMENT_FACE_2, "Front face 2");
525    PPP_PRINT(map, fragment_front_stencil, FRAGMENT_STENCIL, "Front stencil");
526    PPP_PRINT(map, fragment_back_face, FRAGMENT_FACE, "Back face");
527    PPP_PRINT(map, fragment_back_face_2, FRAGMENT_FACE_2, "Back face 2");
528    PPP_PRINT(map, fragment_back_stencil, FRAGMENT_STENCIL, "Back stencil");
529    PPP_PRINT(map, depth_bias_scissor, DEPTH_BIAS_SCISSOR, "Depth bias/scissor");
530 
531    if (hdr.region_clip) {
532       if (((map + (AGX_REGION_CLIP_LENGTH * hdr.viewport_count)) >
533            (base + size))) {
534          fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");
535          return;
536       }
537 
538       for (unsigned i = 0; i < hdr.viewport_count; ++i) {
539          DUMP_CL(REGION_CLIP, map, "Region clip");
540          map += AGX_REGION_CLIP_LENGTH;
541          fflush(agxdecode_dump_stream);
542       }
543    }
544 
545    if (hdr.viewport) {
546       if (((map + AGX_VIEWPORT_CONTROL_LENGTH +
547             (AGX_VIEWPORT_LENGTH * hdr.viewport_count)) > (base + size))) {
548          fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");
549          return;
550       }
551 
552       DUMP_CL(VIEWPORT_CONTROL, map, "Viewport control");
553       map += AGX_VIEWPORT_CONTROL_LENGTH;
554 
555       for (unsigned i = 0; i < hdr.viewport_count; ++i) {
556          DUMP_CL(VIEWPORT, map, "Viewport");
557          map += AGX_VIEWPORT_LENGTH;
558          fflush(agxdecode_dump_stream);
559       }
560    }
561 
562    PPP_PRINT(map, w_clamp, W_CLAMP, "W clamp");
563    PPP_PRINT(map, output_select, OUTPUT_SELECT, "Output select");
564    PPP_PRINT(map, varying_counts_32, VARYING_COUNTS, "Varying counts 32");
565    PPP_PRINT(map, varying_counts_16, VARYING_COUNTS, "Varying counts 16");
566    PPP_PRINT(map, cull, CULL, "Cull");
567    PPP_PRINT(map, cull_2, CULL_2, "Cull 2");
568 
569    if (hdr.fragment_shader) {
570       agx_unpack(agxdecode_dump_stream, map, FRAGMENT_SHADER, frag);
571       agxdecode_stateful(frag.pipeline, "Fragment pipeline", agxdecode_usc,
572                          verbose, params, &frag.sampler_state_register_count);
573 
574       if (frag.cf_bindings) {
575          uint8_t buf[128];
576          uint8_t *cf = buf;
577 
578          agxdecode_fetch_gpu_array(frag.cf_bindings, buf);
579          u_hexdump(agxdecode_dump_stream, cf, 128, false);
580 
581          DUMP_CL(CF_BINDING_HEADER, cf, "Coefficient binding header:");
582          cf += AGX_CF_BINDING_HEADER_LENGTH;
583 
584          for (unsigned i = 0; i < frag.cf_binding_count; ++i) {
585             DUMP_CL(CF_BINDING, cf, "Coefficient binding:");
586             cf += AGX_CF_BINDING_LENGTH;
587          }
588       }
589 
590       DUMP_UNPACKED(FRAGMENT_SHADER, frag, "Fragment shader\n");
591       map += AGX_FRAGMENT_SHADER_LENGTH;
592    }
593 
594    PPP_PRINT(map, occlusion_query, FRAGMENT_OCCLUSION_QUERY, "Occlusion query");
595    PPP_PRINT(map, occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2,
596              "Occlusion query 2");
597    PPP_PRINT(map, output_unknown, OUTPUT_UNKNOWN, "Output unknown");
598    PPP_PRINT(map, output_size, OUTPUT_SIZE, "Output size");
599    PPP_PRINT(map, varying_word_2, VARYING_2, "Varying word 2");
600 
601    /* PPP print checks we don't read too much, now check we read enough */
602    assert(map == (base + size) && "invalid size of PPP update");
603 }
604 
605 static unsigned
agxdecode_cdm(const uint8_t * map,uint64_t * link,bool verbose,decoder_params * params,UNUSED void * data)606 agxdecode_cdm(const uint8_t *map, uint64_t *link, bool verbose,
607               decoder_params *params, UNUSED void *data)
608 {
609    /* Bits 29-31 contain the block type */
610    enum agx_cdm_block_type block_type = (map[3] >> 5);
611 
612    switch (block_type) {
613    case AGX_CDM_BLOCK_TYPE_LAUNCH: {
614       size_t length = AGX_CDM_LAUNCH_LENGTH;
615 
616 #define CDM_PRINT(STRUCT_NAME, human)                                          \
617    do {                                                                        \
618       DUMP_CL(CDM_##STRUCT_NAME, map, human);                                  \
619       map += AGX_CDM_##STRUCT_NAME##_LENGTH;                                   \
620       length += AGX_CDM_##STRUCT_NAME##_LENGTH;                                \
621    } while (0);
622 
623       agx_unpack(agxdecode_dump_stream, map, CDM_LAUNCH, hdr);
624       agxdecode_stateful(hdr.pipeline, "Pipeline", agxdecode_usc, verbose,
625                          params, &hdr.sampler_state_register_count);
626       DUMP_UNPACKED(CDM_LAUNCH, hdr, "Compute\n");
627       map += AGX_CDM_LAUNCH_LENGTH;
628 
629       /* Added in G14X */
630       if (params->gpu_generation >= 14 && params->num_clusters_total > 1)
631          CDM_PRINT(UNK_G14X, "Unknown G14X");
632 
633       switch (hdr.mode) {
634       case AGX_CDM_MODE_DIRECT:
635          CDM_PRINT(GLOBAL_SIZE, "Global size");
636          CDM_PRINT(LOCAL_SIZE, "Local size");
637          break;
638       case AGX_CDM_MODE_INDIRECT_GLOBAL:
639          CDM_PRINT(INDIRECT, "Indirect buffer");
640          CDM_PRINT(LOCAL_SIZE, "Local size");
641          break;
642       case AGX_CDM_MODE_INDIRECT_LOCAL:
643          CDM_PRINT(INDIRECT, "Indirect buffer");
644          break;
645       default:
646          fprintf(agxdecode_dump_stream, "Unknown CDM mode: %u\n", hdr.mode);
647          break;
648       }
649 
650       return length;
651    }
652 
653    case AGX_CDM_BLOCK_TYPE_STREAM_LINK: {
654       agx_unpack(agxdecode_dump_stream, map, CDM_STREAM_LINK, hdr);
655       DUMP_UNPACKED(CDM_STREAM_LINK, hdr, "Stream Link\n");
656       *link = hdr.target_lo | (((uint64_t)hdr.target_hi) << 32);
657       return STATE_LINK;
658    }
659 
660    case AGX_CDM_BLOCK_TYPE_STREAM_TERMINATE: {
661       DUMP_CL(CDM_STREAM_TERMINATE, map, "Stream Terminate");
662       return STATE_DONE;
663    }
664 
665    case AGX_CDM_BLOCK_TYPE_BARRIER: {
666       DUMP_CL(CDM_BARRIER, map, "Barrier");
667       return AGX_CDM_BARRIER_LENGTH;
668    }
669 
670    default:
671       fprintf(agxdecode_dump_stream, "Unknown CDM block type: %u\n",
672               block_type);
673       u_hexdump(agxdecode_dump_stream, map, 8, false);
674       return 8;
675    }
676 }
677 
678 static unsigned
agxdecode_vdm(const uint8_t * map,uint64_t * link,bool verbose,decoder_params * params,UNUSED void * data)679 agxdecode_vdm(const uint8_t *map, uint64_t *link, bool verbose,
680               decoder_params *params, UNUSED void *data)
681 {
682    /* Bits 29-31 contain the block type */
683    enum agx_vdm_block_type block_type = (map[3] >> 5);
684 
685    switch (block_type) {
686    case AGX_VDM_BLOCK_TYPE_BARRIER: {
687       agx_unpack(agxdecode_dump_stream, map, VDM_BARRIER, hdr);
688       DUMP_UNPACKED(VDM_BARRIER, hdr, "Barrier\n");
689       return hdr.returns ? STATE_RET : AGX_VDM_BARRIER_LENGTH;
690    }
691 
692    case AGX_VDM_BLOCK_TYPE_PPP_STATE_UPDATE: {
693       agx_unpack(agxdecode_dump_stream, map, PPP_STATE, cmd);
694 
695       uint64_t address = (((uint64_t)cmd.pointer_hi) << 32) | cmd.pointer_lo;
696 
697       if (!lib_config.read_gpu_mem) {
698          struct agx_bo *mem = agxdecode_find_mapped_gpu_mem_containing(address);
699 
700          if (!mem) {
701             DUMP_UNPACKED(PPP_STATE, cmd, "Non-existent record (XXX)\n");
702             return AGX_PPP_STATE_LENGTH;
703          }
704       }
705 
706       agxdecode_record(address, cmd.size_words * 4, verbose, params);
707       return AGX_PPP_STATE_LENGTH;
708    }
709 
710    case AGX_VDM_BLOCK_TYPE_VDM_STATE_UPDATE: {
711       size_t length = AGX_VDM_STATE_LENGTH;
712       agx_unpack(agxdecode_dump_stream, map, VDM_STATE, hdr);
713       map += AGX_VDM_STATE_LENGTH;
714 
715 #define VDM_PRINT(header_name, STRUCT_NAME, human)                             \
716    if (hdr.header_name##_present) {                                            \
717       DUMP_CL(VDM_STATE_##STRUCT_NAME, map, human);                            \
718       map += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH;                             \
719       length += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH;                          \
720    }
721 
722       VDM_PRINT(restart_index, RESTART_INDEX, "Restart index");
723 
724       /* If word 1 is present but word 0 is not, fallback to compact samplers */
725       enum agx_sampler_states sampler_states = 0;
726 
727       if (hdr.vertex_shader_word_0_present) {
728          agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_0,
729                     word_0);
730          sampler_states = word_0.sampler_state_register_count;
731       }
732 
733       VDM_PRINT(vertex_shader_word_0, VERTEX_SHADER_WORD_0,
734                 "Vertex shader word 0");
735 
736       if (hdr.vertex_shader_word_1_present) {
737          agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_1,
738                     word_1);
739          fprintf(agxdecode_dump_stream, "Pipeline %X\n",
740                  (uint32_t)word_1.pipeline);
741          agxdecode_stateful(word_1.pipeline, "Pipeline", agxdecode_usc, verbose,
742                             params, &sampler_states);
743       }
744 
745       VDM_PRINT(vertex_shader_word_1, VERTEX_SHADER_WORD_1,
746                 "Vertex shader word 1");
747       VDM_PRINT(vertex_outputs, VERTEX_OUTPUTS, "Vertex outputs");
748       VDM_PRINT(tessellation, TESSELLATION, "Tessellation");
749       VDM_PRINT(vertex_unknown, VERTEX_UNKNOWN, "Vertex unknown");
750       VDM_PRINT(tessellation_scale, TESSELLATION_SCALE, "Tessellation scale");
751 
752 #undef VDM_PRINT
753       return hdr.tessellation_scale_present ? length : ALIGN_POT(length, 8);
754    }
755 
756    case AGX_VDM_BLOCK_TYPE_INDEX_LIST: {
757       size_t length = AGX_INDEX_LIST_LENGTH;
758       agx_unpack(agxdecode_dump_stream, map, INDEX_LIST, hdr);
759       DUMP_UNPACKED(INDEX_LIST, hdr, "Index List\n");
760       map += AGX_INDEX_LIST_LENGTH;
761 
762 #define IDX_PRINT(header_name, STRUCT_NAME, human)                             \
763    if (hdr.header_name##_present) {                                            \
764       DUMP_CL(INDEX_LIST_##STRUCT_NAME, map, human);                           \
765       map += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH;                            \
766       length += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH;                         \
767    }
768 
769       IDX_PRINT(index_buffer, BUFFER_LO, "Index buffer");
770       IDX_PRINT(index_count, COUNT, "Index count");
771       IDX_PRINT(instance_count, INSTANCES, "Instance count");
772       IDX_PRINT(start, START, "Start");
773       IDX_PRINT(indirect_buffer, INDIRECT_BUFFER, "Indirect buffer");
774       IDX_PRINT(index_buffer_size, BUFFER_SIZE, "Index buffer size");
775 
776 #undef IDX_PRINT
777       return length;
778    }
779 
780    case AGX_VDM_BLOCK_TYPE_STREAM_LINK: {
781       agx_unpack(agxdecode_dump_stream, map, VDM_STREAM_LINK, hdr);
782       DUMP_UNPACKED(VDM_STREAM_LINK, hdr, "Stream Link\n");
783       *link = hdr.target_lo | (((uint64_t)hdr.target_hi) << 32);
784       return hdr.with_return ? STATE_CALL : STATE_LINK;
785    }
786 
787    case AGX_VDM_BLOCK_TYPE_STREAM_TERMINATE: {
788       DUMP_CL(VDM_STREAM_TERMINATE, map, "Stream Terminate");
789       return STATE_DONE;
790    }
791 
792    case AGX_VDM_BLOCK_TYPE_TESSELLATE: {
793       size_t length = AGX_VDM_TESSELLATE_LENGTH;
794       agx_unpack(agxdecode_dump_stream, map, VDM_TESSELLATE, hdr);
795       DUMP_UNPACKED(VDM_TESSELLATE, hdr, "Tessellate List\n");
796       map += AGX_VDM_TESSELLATE_LENGTH;
797 
798 #define TESS_PRINT(header_name, STRUCT_NAME, human)                            \
799    if (hdr.header_name##_present) {                                            \
800       DUMP_CL(VDM_TESSELLATE_##STRUCT_NAME, map, human);                       \
801       map += AGX_VDM_TESSELLATE_##STRUCT_NAME##_LENGTH;                        \
802       length += AGX_VDM_TESSELLATE_##STRUCT_NAME##_LENGTH;                     \
803    }
804 
805       TESS_PRINT(factor_buffer, FACTOR_BUFFER, "Factor buffer");
806       TESS_PRINT(patch_count, PATCH_COUNT, "Patch");
807       TESS_PRINT(instance_count, INSTANCE_COUNT, "Instance count");
808       TESS_PRINT(base_patch, BASE_PATCH, "Base patch");
809       TESS_PRINT(base_instance, BASE_INSTANCE, "Base instance");
810       TESS_PRINT(instance_stride, INSTANCE_STRIDE, "Instance stride");
811       TESS_PRINT(indirect, INDIRECT, "Indirect");
812       TESS_PRINT(unknown, UNKNOWN, "Unknown");
813 
814 #undef TESS_PRINT
815       return length;
816    }
817 
818    default:
819       fprintf(agxdecode_dump_stream, "Unknown VDM block type: %u\n",
820               block_type);
821       u_hexdump(agxdecode_dump_stream, map, 8, false);
822       return 8;
823    }
824 }
825 
826 static void
agxdecode_cs(uint32_t * cmdbuf,uint64_t encoder,bool verbose,decoder_params * params)827 agxdecode_cs(uint32_t *cmdbuf, uint64_t encoder, bool verbose,
828              decoder_params *params)
829 {
830    agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_COMPUTE, cs);
831    DUMP_UNPACKED(IOGPU_COMPUTE, cs, "Compute\n");
832 
833    agxdecode_stateful(encoder, "Encoder", agxdecode_cdm, verbose, params, NULL);
834 
835    fprintf(agxdecode_dump_stream, "Context switch program:\n");
836    uint8_t buf[1024];
837    agx_disassemble(buf,
838                    agxdecode_fetch_gpu_array(cs.context_switch_program, buf),
839                    agxdecode_dump_stream);
840 }
841 
842 static void
agxdecode_gfx(uint32_t * cmdbuf,uint64_t encoder,bool verbose,decoder_params * params)843 agxdecode_gfx(uint32_t *cmdbuf, uint64_t encoder, bool verbose,
844               decoder_params *params)
845 {
846    agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_GRAPHICS, gfx);
847    DUMP_UNPACKED(IOGPU_GRAPHICS, gfx, "Graphics\n");
848 
849    agxdecode_stateful(encoder, "Encoder", agxdecode_vdm, verbose, params, NULL);
850 
851    if (gfx.clear_pipeline_unk) {
852       fprintf(agxdecode_dump_stream, "Unk: %X\n", gfx.clear_pipeline_unk);
853       agxdecode_stateful(gfx.clear_pipeline, "Clear pipeline", agxdecode_usc,
854                          verbose, params, NULL);
855    }
856 
857    if (gfx.store_pipeline_unk) {
858       assert(gfx.store_pipeline_unk == 0x4);
859       agxdecode_stateful(gfx.store_pipeline, "Store pipeline", agxdecode_usc,
860                          verbose, params, NULL);
861    }
862 
863    assert((gfx.partial_reload_pipeline_unk & 0xF) == 0x4);
864    if (gfx.partial_reload_pipeline) {
865       agxdecode_stateful(gfx.partial_reload_pipeline, "Partial reload pipeline",
866                          agxdecode_usc, verbose, params, NULL);
867    }
868 
869    if (gfx.partial_store_pipeline) {
870       agxdecode_stateful(gfx.partial_store_pipeline, "Partial store pipeline",
871                          agxdecode_usc, verbose, params, NULL);
872    }
873 }
874 
875 static void
chip_id_to_params(decoder_params * params,uint32_t chip_id)876 chip_id_to_params(decoder_params *params, uint32_t chip_id)
877 {
878    switch (chip_id) {
879    case 0x6000 ... 0x6002:
880       *params = (decoder_params){
881          .gpu_generation = 13,
882          .gpu_variant = "SCD"[chip_id & 15],
883          .chip_id = chip_id,
884          .num_clusters_total = 2 << (chip_id & 15),
885       };
886       break;
887    case 0x6020 ... 0x6022:
888       *params = (decoder_params){
889          .gpu_generation = 14,
890          .gpu_variant = "SCD"[chip_id & 15],
891          .chip_id = chip_id,
892          .num_clusters_total = 2 << (chip_id & 15),
893       };
894       break;
895    case 0x8112:
896       *params = (decoder_params){
897          .gpu_generation = 14,
898          .gpu_variant = 'G',
899          .chip_id = chip_id,
900          .num_clusters_total = 1,
901       };
902       break;
903    case 0x8103:
904    default:
905       *params = (decoder_params){
906          .gpu_generation = 13,
907          .gpu_variant = 'G',
908          .chip_id = chip_id,
909          .num_clusters_total = 1,
910       };
911       break;
912    }
913 }
914 
915 #ifdef __APPLE__
916 
917 void
agxdecode_cmdstream(unsigned cmdbuf_handle,unsigned map_handle,bool verbose)918 agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose)
919 {
920    agxdecode_dump_file_open();
921 
922    struct agx_bo *cmdbuf =
923       agxdecode_find_handle(cmdbuf_handle, AGX_ALLOC_CMDBUF);
924    struct agx_bo *map = agxdecode_find_handle(map_handle, AGX_ALLOC_MEMMAP);
925    assert(cmdbuf != NULL && "nonexistent command buffer");
926    assert(map != NULL && "nonexistent mapping");
927 
928    /* Before decoding anything, validate the map. Set bo->mapped fields */
929    agxdecode_decode_segment_list(map->ptr.cpu);
930 
931    /* Print the IOGPU stuff */
932    agx_unpack(agxdecode_dump_stream, cmdbuf->ptr.cpu, IOGPU_HEADER, cmd);
933    DUMP_UNPACKED(IOGPU_HEADER, cmd, "IOGPU Header\n");
934 
935    DUMP_CL(IOGPU_ATTACHMENT_COUNT,
936            ((uint8_t *)cmdbuf->ptr.cpu + cmd.attachment_offset),
937            "Attachment count");
938 
939    uint32_t *attachments =
940       (uint32_t *)((uint8_t *)cmdbuf->ptr.cpu + cmd.attachment_offset);
941    unsigned attachment_count = attachments[3];
942    for (unsigned i = 0; i < attachment_count; ++i) {
943       uint32_t *ptr = attachments + 4 + (i * AGX_IOGPU_ATTACHMENT_LENGTH / 4);
944       DUMP_CL(IOGPU_ATTACHMENT, ptr, "Attachment");
945    }
946 
947    struct drm_asahi_params_global params;
948 
949    chip_id_to_params(&params, 0x8103);
950 
951    if (cmd.unk_5 == 3)
952       agxdecode_cs((uint32_t *)cmdbuf->ptr.cpu, cmd.encoder, verbose, &params);
953    else
954       agxdecode_gfx((uint32_t *)cmdbuf->ptr.cpu, cmd.encoder, verbose, &params);
955 
956    agxdecode_map_read_write();
957 }
958 
959 void
agxdecode_dump_mappings(unsigned map_handle)960 agxdecode_dump_mappings(unsigned map_handle)
961 {
962    agxdecode_dump_file_open();
963 
964    struct agx_bo *map = agxdecode_find_handle(map_handle, AGX_ALLOC_MEMMAP);
965    assert(map != NULL && "nonexistent mapping");
966    agxdecode_decode_segment_list(map->ptr.cpu);
967 
968    for (unsigned i = 0; i < mmap_count; ++i) {
969       if (!mmap_array[i].ptr.cpu || !mmap_array[i].size ||
970           !mmap_array[i].mapped)
971          continue;
972 
973       assert(mmap_array[i].type < AGX_NUM_ALLOC);
974 
975       fprintf(agxdecode_dump_stream,
976               "Buffer: type %s, gpu %" PRIx64 ", handle %u.bin:\n\n",
977               agx_alloc_types[mmap_array[i].type], mmap_array[i].ptr.gpu,
978               mmap_array[i].handle);
979 
980       u_hexdump(agxdecode_dump_stream, mmap_array[i].ptr.cpu,
981                 mmap_array[i].size, false);
982       fprintf(agxdecode_dump_stream, "\n");
983    }
984 }
985 
986 #endif
987 
988 void
agxdecode_track_alloc(struct agx_bo * alloc)989 agxdecode_track_alloc(struct agx_bo *alloc)
990 {
991    assert((mmap_count + 1) < MAX_MAPPINGS);
992 
993    for (unsigned i = 0; i < mmap_count; ++i) {
994       struct agx_bo *bo = &mmap_array[i];
995       bool match = (bo->handle == alloc->handle && bo->type == alloc->type);
996       assert(!match && "tried to alloc already allocated BO");
997    }
998 
999    mmap_array[mmap_count++] = *alloc;
1000 }
1001 
1002 void
agxdecode_track_free(struct agx_bo * bo)1003 agxdecode_track_free(struct agx_bo *bo)
1004 {
1005    bool found = false;
1006 
1007    for (unsigned i = 0; i < mmap_count; ++i) {
1008       if (mmap_array[i].handle == bo->handle &&
1009           (mmap_array[i].type == AGX_ALLOC_REGULAR) ==
1010              (bo->type == AGX_ALLOC_REGULAR)) {
1011          assert(!found && "mapped multiple times!");
1012          found = true;
1013 
1014          memset(&mmap_array[i], 0, sizeof(mmap_array[i]));
1015       }
1016    }
1017 
1018    assert(found && "freed unmapped memory");
1019 }
1020 
1021 static int agxdecode_dump_frame_count = 0;
1022 
1023 void
agxdecode_dump_file_open(void)1024 agxdecode_dump_file_open(void)
1025 {
1026    if (agxdecode_dump_stream)
1027       return;
1028 
1029    /* This does a getenv every frame, so it is possible to use
1030     * setenv to change the base at runtime.
1031     */
1032    const char *dump_file_base =
1033       getenv("AGXDECODE_DUMP_FILE") ?: "agxdecode.dump";
1034    if (!strcmp(dump_file_base, "stderr"))
1035       agxdecode_dump_stream = stderr;
1036    else {
1037       char buffer[1024];
1038       snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base,
1039                agxdecode_dump_frame_count);
1040       printf("agxdecode: dump command stream to file %s\n", buffer);
1041       agxdecode_dump_stream = fopen(buffer, "w");
1042       if (!agxdecode_dump_stream) {
1043          fprintf(stderr,
1044                  "agxdecode: failed to open command stream log file %s\n",
1045                  buffer);
1046       }
1047    }
1048 }
1049 
1050 static void
agxdecode_dump_file_close(void)1051 agxdecode_dump_file_close(void)
1052 {
1053    if (agxdecode_dump_stream && agxdecode_dump_stream != stderr) {
1054       fclose(agxdecode_dump_stream);
1055       agxdecode_dump_stream = NULL;
1056    }
1057 }
1058 
1059 void
agxdecode_next_frame(void)1060 agxdecode_next_frame(void)
1061 {
1062    agxdecode_dump_file_close();
1063    agxdecode_dump_frame_count++;
1064 }
1065 
1066 void
agxdecode_close(void)1067 agxdecode_close(void)
1068 {
1069    agxdecode_dump_file_close();
1070 }
1071 
1072 static ssize_t
libagxdecode_writer(void * cookie,const char * buffer,size_t size)1073 libagxdecode_writer(void *cookie, const char *buffer, size_t size)
1074 {
1075    return lib_config.stream_write(buffer, size);
1076 }
1077 
1078 #ifdef _GNU_SOURCE
1079 static cookie_io_functions_t funcs = {.write = libagxdecode_writer};
1080 #endif
1081 
1082 static decoder_params lib_params;
1083 
1084 void
libagxdecode_init(struct libagxdecode_config * config)1085 libagxdecode_init(struct libagxdecode_config *config)
1086 {
1087 #ifdef _GNU_SOURCE
1088    lib_config = *config;
1089    agxdecode_dump_stream = fopencookie(NULL, "w", funcs);
1090 
1091    chip_id_to_params(&lib_params, config->chip_id);
1092 #else
1093    /* fopencookie is a glibc extension */
1094    unreachable("libagxdecode only available with glibc");
1095 #endif
1096 }
1097 
1098 void
libagxdecode_vdm(uint64_t addr,const char * label,bool verbose)1099 libagxdecode_vdm(uint64_t addr, const char *label, bool verbose)
1100 {
1101    agxdecode_stateful(addr, label, agxdecode_vdm, verbose, &lib_params, NULL);
1102 }
1103 
1104 void
libagxdecode_cdm(uint64_t addr,const char * label,bool verbose)1105 libagxdecode_cdm(uint64_t addr, const char *label, bool verbose)
1106 {
1107    agxdecode_stateful(addr, label, agxdecode_cdm, verbose, &lib_params, NULL);
1108 }
1109 void
libagxdecode_usc(uint64_t addr,const char * label,bool verbose)1110 libagxdecode_usc(uint64_t addr, const char *label, bool verbose)
1111 {
1112    agxdecode_stateful(addr, label, agxdecode_usc, verbose, &lib_params, NULL);
1113 }
1114 void
libagxdecode_shutdown(void)1115 libagxdecode_shutdown(void)
1116 {
1117    agxdecode_dump_file_close();
1118 }
1119