1 /*
2 * Copyright 2017-2019 Alyssa Rosenzweig
3 * Copyright 2017-2019 Connor Abbott
4 * Copyright 2019 Collabora, Ltd.
5 * SPDX-License-Identifier: MIT
6 */
7
8 #include <ctype.h>
9 #include <memory.h>
10 #include <stdarg.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <sys/mman.h>
15 #include <agx_pack.h>
16
17 #include "util/u_hexdump.h"
18 #include "decode.h"
19 #ifdef __APPLE__
20 #include "agx_iokit.h"
21 #endif
22
23 /* Pending UAPI */
24 struct drm_asahi_params_global {
25 int gpu_generation;
26 int gpu_variant;
27 int chip_id;
28 int num_clusters_total;
29 };
30
31 struct libagxdecode_config lib_config;
32
33 UNUSED static const char *agx_alloc_types[AGX_NUM_ALLOC] = {"mem", "map",
34 "cmd"};
35
36 static void
agx_disassemble(void * _code,size_t maxlen,FILE * fp)37 agx_disassemble(void *_code, size_t maxlen, FILE *fp)
38 {
39 /* stub */
40 }
41
42 FILE *agxdecode_dump_stream;
43
44 #define MAX_MAPPINGS 4096
45
46 struct agx_bo mmap_array[MAX_MAPPINGS];
47 unsigned mmap_count = 0;
48
49 struct agx_bo *ro_mappings[MAX_MAPPINGS];
50 unsigned ro_mapping_count = 0;
51
52 static struct agx_bo *
agxdecode_find_mapped_gpu_mem_containing_rw(uint64_t addr)53 agxdecode_find_mapped_gpu_mem_containing_rw(uint64_t addr)
54 {
55 for (unsigned i = 0; i < mmap_count; ++i) {
56 if (mmap_array[i].type == AGX_ALLOC_REGULAR &&
57 addr >= mmap_array[i].ptr.gpu &&
58 (addr - mmap_array[i].ptr.gpu) < mmap_array[i].size)
59 return mmap_array + i;
60 }
61
62 return NULL;
63 }
64
65 static struct agx_bo *
agxdecode_find_mapped_gpu_mem_containing(uint64_t addr)66 agxdecode_find_mapped_gpu_mem_containing(uint64_t addr)
67 {
68 struct agx_bo *mem = agxdecode_find_mapped_gpu_mem_containing_rw(addr);
69
70 if (mem && mem->ptr.cpu && !mem->ro) {
71 mprotect(mem->ptr.cpu, mem->size, PROT_READ);
72 mem->ro = true;
73 ro_mappings[ro_mapping_count++] = mem;
74 assert(ro_mapping_count < MAX_MAPPINGS);
75 }
76
77 if (mem && !mem->mapped) {
78 fprintf(stderr,
79 "[ERROR] access to memory not mapped (GPU %" PRIx64
80 ", handle %u)\n",
81 mem->ptr.gpu, mem->handle);
82 }
83
84 return mem;
85 }
86
87 static struct agx_bo *
agxdecode_find_handle(unsigned handle,unsigned type)88 agxdecode_find_handle(unsigned handle, unsigned type)
89 {
90 for (unsigned i = 0; i < mmap_count; ++i) {
91 if (mmap_array[i].type != type)
92 continue;
93
94 if (mmap_array[i].handle != handle)
95 continue;
96
97 return &mmap_array[i];
98 }
99
100 return NULL;
101 }
102
103 static void
agxdecode_mark_mapped(unsigned handle)104 agxdecode_mark_mapped(unsigned handle)
105 {
106 struct agx_bo *bo = agxdecode_find_handle(handle, AGX_ALLOC_REGULAR);
107
108 if (!bo) {
109 fprintf(stderr, "ERROR - unknown BO mapped with handle %u\n", handle);
110 return;
111 }
112
113 /* Mark mapped for future consumption */
114 bo->mapped = true;
115 }
116
117 #ifdef __APPLE__
118
119 static void
agxdecode_decode_segment_list(void * segment_list)120 agxdecode_decode_segment_list(void *segment_list)
121 {
122 unsigned nr_handles = 0;
123
124 /* First, mark everything unmapped */
125 for (unsigned i = 0; i < mmap_count; ++i)
126 mmap_array[i].mapped = false;
127
128 /* Check the header */
129 struct agx_map_header *hdr = segment_list;
130 if (hdr->resource_group_count == 0) {
131 fprintf(agxdecode_dump_stream, "ERROR - empty map\n");
132 return;
133 }
134
135 if (hdr->segment_count != 1) {
136 fprintf(agxdecode_dump_stream, "ERROR - can't handle segment count %u\n",
137 hdr->segment_count);
138 }
139
140 fprintf(agxdecode_dump_stream, "Segment list:\n");
141 fprintf(agxdecode_dump_stream, " Command buffer shmem ID: %" PRIx64 "\n",
142 hdr->cmdbuf_id);
143 fprintf(agxdecode_dump_stream, " Encoder ID: %" PRIx64 "\n",
144 hdr->encoder_id);
145 fprintf(agxdecode_dump_stream, " Kernel commands start offset: %u\n",
146 hdr->kernel_commands_start_offset);
147 fprintf(agxdecode_dump_stream, " Kernel commands end offset: %u\n",
148 hdr->kernel_commands_end_offset);
149 fprintf(agxdecode_dump_stream, " Unknown: 0x%X\n", hdr->unk);
150
151 /* Expected structure: header followed by resource groups */
152 size_t length = sizeof(struct agx_map_header);
153 length += sizeof(struct agx_map_entry) * hdr->resource_group_count;
154
155 if (length != hdr->length) {
156 fprintf(agxdecode_dump_stream, "ERROR: expected length %zu, got %u\n",
157 length, hdr->length);
158 }
159
160 if (hdr->padding[0] || hdr->padding[1])
161 fprintf(agxdecode_dump_stream, "ERROR - padding tripped\n");
162
163 /* Check the entries */
164 struct agx_map_entry *groups = ((void *)hdr) + sizeof(*hdr);
165 for (unsigned i = 0; i < hdr->resource_group_count; ++i) {
166 struct agx_map_entry group = groups[i];
167 unsigned count = group.resource_count;
168
169 STATIC_ASSERT(ARRAY_SIZE(group.resource_id) == 6);
170 STATIC_ASSERT(ARRAY_SIZE(group.resource_unk) == 6);
171 STATIC_ASSERT(ARRAY_SIZE(group.resource_flags) == 6);
172
173 if ((count < 1) || (count > 6)) {
174 fprintf(agxdecode_dump_stream, "ERROR - invalid count %u\n", count);
175 continue;
176 }
177
178 for (unsigned j = 0; j < count; ++j) {
179 unsigned handle = group.resource_id[j];
180 unsigned unk = group.resource_unk[j];
181 unsigned flags = group.resource_flags[j];
182
183 if (!handle) {
184 fprintf(agxdecode_dump_stream, "ERROR - invalid handle %u\n",
185 handle);
186 continue;
187 }
188
189 agxdecode_mark_mapped(handle);
190 nr_handles++;
191
192 fprintf(agxdecode_dump_stream, "%u (0x%X, 0x%X)\n", handle, unk,
193 flags);
194 }
195
196 if (group.unka)
197 fprintf(agxdecode_dump_stream, "ERROR - unknown 0x%X\n", group.unka);
198
199 /* Visual separator for resource groups */
200 fprintf(agxdecode_dump_stream, "\n");
201 }
202
203 /* Check the handle count */
204 if (nr_handles != hdr->total_resources) {
205 fprintf(agxdecode_dump_stream,
206 "ERROR - wrong handle count, got %u, expected %u (%u entries)\n",
207 nr_handles, hdr->total_resources, hdr->resource_group_count);
208 }
209 }
210
211 #endif
212
213 static size_t
__agxdecode_fetch_gpu_mem(const struct agx_bo * mem,uint64_t gpu_va,size_t size,void * buf,int line,const char * filename)214 __agxdecode_fetch_gpu_mem(const struct agx_bo *mem, uint64_t gpu_va,
215 size_t size, void *buf, int line,
216 const char *filename)
217 {
218 if (lib_config.read_gpu_mem)
219 return lib_config.read_gpu_mem(gpu_va, size, buf);
220
221 if (!mem)
222 mem = agxdecode_find_mapped_gpu_mem_containing(gpu_va);
223
224 if (!mem) {
225 fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", gpu_va,
226 filename, line);
227 fflush(agxdecode_dump_stream);
228 assert(0);
229 }
230
231 assert(mem);
232
233 if (size + (gpu_va - mem->ptr.gpu) > mem->size) {
234 fprintf(stderr,
235 "Overflowing to unknown memory %" PRIx64
236 " of size %zu (max size %zu) in %s:%d\n",
237 gpu_va, size, (size_t)(mem->size - (gpu_va - mem->ptr.gpu)),
238 filename, line);
239 fflush(agxdecode_dump_stream);
240 assert(0);
241 }
242
243 memcpy(buf, mem->ptr.cpu + gpu_va - mem->ptr.gpu, size);
244
245 return size;
246 }
247
248 #define agxdecode_fetch_gpu_mem(gpu_va, size, buf) \
249 __agxdecode_fetch_gpu_mem(NULL, gpu_va, size, buf, __LINE__, __FILE__)
250
251 #define agxdecode_fetch_gpu_array(gpu_va, buf) \
252 agxdecode_fetch_gpu_mem(gpu_va, sizeof(buf), buf)
253
254 static void
agxdecode_map_read_write(void)255 agxdecode_map_read_write(void)
256 {
257 for (unsigned i = 0; i < ro_mapping_count; ++i) {
258 ro_mappings[i]->ro = false;
259 mprotect(ro_mappings[i]->ptr.cpu, ro_mappings[i]->size,
260 PROT_READ | PROT_WRITE);
261 }
262
263 ro_mapping_count = 0;
264 }
265
266 /* Helpers for parsing the cmdstream */
267
268 #define DUMP_UNPACKED(T, var, str) \
269 { \
270 agxdecode_log(str); \
271 agx_print(agxdecode_dump_stream, T, var, (agxdecode_indent + 1) * 2); \
272 }
273
274 #define DUMP_CL(T, cl, str) \
275 { \
276 agx_unpack(agxdecode_dump_stream, cl, T, temp); \
277 DUMP_UNPACKED(T, temp, str "\n"); \
278 }
279
280 #define agxdecode_log(str) fputs(str, agxdecode_dump_stream)
281 #define agxdecode_msg(str) fprintf(agxdecode_dump_stream, "// %s", str)
282
283 unsigned agxdecode_indent = 0;
284
285 typedef struct drm_asahi_params_global decoder_params;
286
287 /* Abstraction for command stream parsing */
288 typedef unsigned (*decode_cmd)(const uint8_t *map, uint64_t *link, bool verbose,
289 decoder_params *params, void *data);
290
291 #define STATE_DONE (0xFFFFFFFFu)
292 #define STATE_LINK (0xFFFFFFFEu)
293 #define STATE_CALL (0xFFFFFFFDu)
294 #define STATE_RET (0xFFFFFFFCu)
295
296 static void
agxdecode_stateful(uint64_t va,const char * label,decode_cmd decoder,bool verbose,decoder_params * params,void * data)297 agxdecode_stateful(uint64_t va, const char *label, decode_cmd decoder,
298 bool verbose, decoder_params *params, void *data)
299 {
300 uint64_t stack[16];
301 unsigned sp = 0;
302
303 uint8_t buf[1024];
304 if (!lib_config.read_gpu_mem) {
305 struct agx_bo *alloc = agxdecode_find_mapped_gpu_mem_containing(va);
306 assert(alloc != NULL && "nonexistent object");
307 fprintf(agxdecode_dump_stream, "%s (%" PRIx64 ", handle %u)\n", label, va,
308 alloc->handle);
309 } else {
310 fprintf(agxdecode_dump_stream, "%s (%" PRIx64 ")\n", label, va);
311 }
312 fflush(agxdecode_dump_stream);
313
314 int len = agxdecode_fetch_gpu_array(va, buf);
315
316 int left = len;
317 uint8_t *map = buf;
318 uint64_t link = 0;
319
320 fflush(agxdecode_dump_stream);
321
322 while (left) {
323 if (len <= 0) {
324 fprintf(agxdecode_dump_stream, "!! Failed to read GPU memory\n");
325 fflush(agxdecode_dump_stream);
326 return;
327 }
328
329 unsigned count = decoder(map, &link, verbose, params, data);
330
331 /* If we fail to decode, default to a hexdump (don't hang) */
332 if (count == 0) {
333 u_hexdump(agxdecode_dump_stream, map, 8, false);
334 count = 8;
335 }
336
337 fflush(agxdecode_dump_stream);
338 if (count == STATE_DONE) {
339 break;
340 } else if (count == STATE_LINK) {
341 fprintf(agxdecode_dump_stream, "Linking to 0x%" PRIx64 "\n\n", link);
342 va = link;
343 left = len = agxdecode_fetch_gpu_array(va, buf);
344 map = buf;
345 } else if (count == STATE_CALL) {
346 fprintf(agxdecode_dump_stream,
347 "Calling 0x%" PRIx64 " (return = 0x%" PRIx64 ")\n\n", link,
348 va + 8);
349 assert(sp < ARRAY_SIZE(stack));
350 stack[sp++] = va + 8;
351 va = link;
352 left = len = agxdecode_fetch_gpu_array(va, buf);
353 map = buf;
354 } else if (count == STATE_RET) {
355 assert(sp > 0);
356 va = stack[--sp];
357 fprintf(agxdecode_dump_stream, "Returning to 0x%" PRIx64 "\n\n", va);
358 left = len = agxdecode_fetch_gpu_array(va, buf);
359 map = buf;
360 } else {
361 va += count;
362 map += count;
363 left -= count;
364
365 if (left < 512 && len == sizeof(buf)) {
366 left = len = agxdecode_fetch_gpu_array(va, buf);
367 map = buf;
368 }
369 }
370 }
371 }
372
373 static unsigned
agxdecode_usc(const uint8_t * map,UNUSED uint64_t * link,UNUSED bool verbose,decoder_params * params,UNUSED void * data)374 agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose,
375 decoder_params *params, UNUSED void *data)
376 {
377 enum agx_sampler_states *sampler_states = data;
378 enum agx_usc_control type = map[0];
379 uint8_t buf[8192];
380
381 bool extended_samplers =
382 (sampler_states != NULL) &&
383 (((*sampler_states) == AGX_SAMPLER_STATES_8_EXTENDED) ||
384 ((*sampler_states) == AGX_SAMPLER_STATES_16_EXTENDED));
385
386 #define USC_CASE(name, human) \
387 case AGX_USC_CONTROL_##name: { \
388 DUMP_CL(USC_##name, map, human); \
389 return AGX_USC_##name##_LENGTH; \
390 }
391
392 switch (type) {
393 case AGX_USC_CONTROL_NO_PRESHADER: {
394 DUMP_CL(USC_NO_PRESHADER, map, "No preshader");
395 return STATE_DONE;
396 }
397
398 case AGX_USC_CONTROL_PRESHADER: {
399 agx_unpack(agxdecode_dump_stream, map, USC_PRESHADER, ctrl);
400 DUMP_UNPACKED(USC_PRESHADER, ctrl, "Preshader\n");
401
402 agx_disassemble(buf, agxdecode_fetch_gpu_array(ctrl.code, buf),
403 agxdecode_dump_stream);
404
405 return STATE_DONE;
406 }
407
408 case AGX_USC_CONTROL_SHADER: {
409 agx_unpack(agxdecode_dump_stream, map, USC_SHADER, ctrl);
410 DUMP_UNPACKED(USC_SHADER, ctrl, "Shader\n");
411
412 agxdecode_log("\n");
413 agx_disassemble(buf, agxdecode_fetch_gpu_array(ctrl.code, buf),
414 agxdecode_dump_stream);
415 agxdecode_log("\n");
416
417 return AGX_USC_SHADER_LENGTH;
418 }
419
420 case AGX_USC_CONTROL_SAMPLER: {
421 agx_unpack(agxdecode_dump_stream, map, USC_SAMPLER, temp);
422 DUMP_UNPACKED(USC_SAMPLER, temp, "Sampler state\n");
423
424 uint8_t buf[(AGX_SAMPLER_LENGTH + AGX_BORDER_LENGTH) * temp.count];
425 uint8_t *samp = buf;
426
427 agxdecode_fetch_gpu_array(temp.buffer, buf);
428
429 for (unsigned i = 0; i < temp.count; ++i) {
430 DUMP_CL(SAMPLER, samp, "Sampler");
431 samp += AGX_SAMPLER_LENGTH;
432
433 if (extended_samplers) {
434 DUMP_CL(BORDER, samp, "Border");
435 samp += AGX_BORDER_LENGTH;
436 }
437 }
438
439 return AGX_USC_SAMPLER_LENGTH;
440 }
441
442 case AGX_USC_CONTROL_TEXTURE: {
443 agx_unpack(agxdecode_dump_stream, map, USC_TEXTURE, temp);
444 DUMP_UNPACKED(USC_TEXTURE, temp, "Texture state\n");
445
446 uint8_t buf[AGX_TEXTURE_LENGTH * temp.count];
447 uint8_t *tex = buf;
448
449 agxdecode_fetch_gpu_array(temp.buffer, buf);
450
451 /* Note: samplers only need 8 byte alignment? */
452 for (unsigned i = 0; i < temp.count; ++i) {
453 agx_unpack(agxdecode_dump_stream, tex, TEXTURE, t);
454 DUMP_CL(TEXTURE, tex, "Texture");
455 DUMP_CL(PBE, tex, "PBE");
456
457 tex += AGX_TEXTURE_LENGTH;
458 }
459
460 return AGX_USC_TEXTURE_LENGTH;
461 }
462
463 case AGX_USC_CONTROL_UNIFORM: {
464 agx_unpack(agxdecode_dump_stream, map, USC_UNIFORM, temp);
465 DUMP_UNPACKED(USC_UNIFORM, temp, "Uniform\n");
466
467 uint8_t buf[2 * temp.size_halfs];
468 agxdecode_fetch_gpu_array(temp.buffer, buf);
469 u_hexdump(agxdecode_dump_stream, buf, 2 * temp.size_halfs, false);
470
471 return AGX_USC_UNIFORM_LENGTH;
472 }
473
474 case AGX_USC_CONTROL_UNIFORM_HIGH: {
475 agx_unpack(agxdecode_dump_stream, map, USC_UNIFORM_HIGH, temp);
476 DUMP_UNPACKED(USC_UNIFORM_HIGH, temp, "Uniform (high)\n");
477
478 uint8_t buf[2 * temp.size_halfs];
479 agxdecode_fetch_gpu_array(temp.buffer, buf);
480 u_hexdump(agxdecode_dump_stream, buf, 2 * temp.size_halfs, false);
481
482 return AGX_USC_UNIFORM_HIGH_LENGTH;
483 }
484
485 USC_CASE(FRAGMENT_PROPERTIES, "Fragment properties");
486 USC_CASE(SHARED, "Shared");
487 USC_CASE(REGISTERS, "Registers");
488
489 default:
490 fprintf(agxdecode_dump_stream, "Unknown USC control type: %u\n", type);
491 u_hexdump(agxdecode_dump_stream, map, 8, false);
492 return 8;
493 }
494
495 #undef USC_CASE
496 }
497
498 #define PPP_PRINT(map, header_name, struct_name, human) \
499 if (hdr.header_name) { \
500 if (((map + AGX_##struct_name##_LENGTH) > (base + size))) { \
501 fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n"); \
502 return; \
503 } \
504 DUMP_CL(struct_name, map, human); \
505 map += AGX_##struct_name##_LENGTH; \
506 fflush(agxdecode_dump_stream); \
507 }
508
509 static void
agxdecode_record(uint64_t va,size_t size,bool verbose,decoder_params * params)510 agxdecode_record(uint64_t va, size_t size, bool verbose, decoder_params *params)
511 {
512 uint8_t buf[size];
513 uint8_t *base = buf;
514 uint8_t *map = base;
515
516 agxdecode_fetch_gpu_array(va, buf);
517
518 agx_unpack(agxdecode_dump_stream, map, PPP_HEADER, hdr);
519 map += AGX_PPP_HEADER_LENGTH;
520
521 PPP_PRINT(map, fragment_control, FRAGMENT_CONTROL, "Fragment control");
522 PPP_PRINT(map, fragment_control_2, FRAGMENT_CONTROL, "Fragment control 2");
523 PPP_PRINT(map, fragment_front_face, FRAGMENT_FACE, "Front face");
524 PPP_PRINT(map, fragment_front_face_2, FRAGMENT_FACE_2, "Front face 2");
525 PPP_PRINT(map, fragment_front_stencil, FRAGMENT_STENCIL, "Front stencil");
526 PPP_PRINT(map, fragment_back_face, FRAGMENT_FACE, "Back face");
527 PPP_PRINT(map, fragment_back_face_2, FRAGMENT_FACE_2, "Back face 2");
528 PPP_PRINT(map, fragment_back_stencil, FRAGMENT_STENCIL, "Back stencil");
529 PPP_PRINT(map, depth_bias_scissor, DEPTH_BIAS_SCISSOR, "Depth bias/scissor");
530
531 if (hdr.region_clip) {
532 if (((map + (AGX_REGION_CLIP_LENGTH * hdr.viewport_count)) >
533 (base + size))) {
534 fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");
535 return;
536 }
537
538 for (unsigned i = 0; i < hdr.viewport_count; ++i) {
539 DUMP_CL(REGION_CLIP, map, "Region clip");
540 map += AGX_REGION_CLIP_LENGTH;
541 fflush(agxdecode_dump_stream);
542 }
543 }
544
545 if (hdr.viewport) {
546 if (((map + AGX_VIEWPORT_CONTROL_LENGTH +
547 (AGX_VIEWPORT_LENGTH * hdr.viewport_count)) > (base + size))) {
548 fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");
549 return;
550 }
551
552 DUMP_CL(VIEWPORT_CONTROL, map, "Viewport control");
553 map += AGX_VIEWPORT_CONTROL_LENGTH;
554
555 for (unsigned i = 0; i < hdr.viewport_count; ++i) {
556 DUMP_CL(VIEWPORT, map, "Viewport");
557 map += AGX_VIEWPORT_LENGTH;
558 fflush(agxdecode_dump_stream);
559 }
560 }
561
562 PPP_PRINT(map, w_clamp, W_CLAMP, "W clamp");
563 PPP_PRINT(map, output_select, OUTPUT_SELECT, "Output select");
564 PPP_PRINT(map, varying_counts_32, VARYING_COUNTS, "Varying counts 32");
565 PPP_PRINT(map, varying_counts_16, VARYING_COUNTS, "Varying counts 16");
566 PPP_PRINT(map, cull, CULL, "Cull");
567 PPP_PRINT(map, cull_2, CULL_2, "Cull 2");
568
569 if (hdr.fragment_shader) {
570 agx_unpack(agxdecode_dump_stream, map, FRAGMENT_SHADER, frag);
571 agxdecode_stateful(frag.pipeline, "Fragment pipeline", agxdecode_usc,
572 verbose, params, &frag.sampler_state_register_count);
573
574 if (frag.cf_bindings) {
575 uint8_t buf[128];
576 uint8_t *cf = buf;
577
578 agxdecode_fetch_gpu_array(frag.cf_bindings, buf);
579 u_hexdump(agxdecode_dump_stream, cf, 128, false);
580
581 DUMP_CL(CF_BINDING_HEADER, cf, "Coefficient binding header:");
582 cf += AGX_CF_BINDING_HEADER_LENGTH;
583
584 for (unsigned i = 0; i < frag.cf_binding_count; ++i) {
585 DUMP_CL(CF_BINDING, cf, "Coefficient binding:");
586 cf += AGX_CF_BINDING_LENGTH;
587 }
588 }
589
590 DUMP_UNPACKED(FRAGMENT_SHADER, frag, "Fragment shader\n");
591 map += AGX_FRAGMENT_SHADER_LENGTH;
592 }
593
594 PPP_PRINT(map, occlusion_query, FRAGMENT_OCCLUSION_QUERY, "Occlusion query");
595 PPP_PRINT(map, occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2,
596 "Occlusion query 2");
597 PPP_PRINT(map, output_unknown, OUTPUT_UNKNOWN, "Output unknown");
598 PPP_PRINT(map, output_size, OUTPUT_SIZE, "Output size");
599 PPP_PRINT(map, varying_word_2, VARYING_2, "Varying word 2");
600
601 /* PPP print checks we don't read too much, now check we read enough */
602 assert(map == (base + size) && "invalid size of PPP update");
603 }
604
605 static unsigned
agxdecode_cdm(const uint8_t * map,uint64_t * link,bool verbose,decoder_params * params,UNUSED void * data)606 agxdecode_cdm(const uint8_t *map, uint64_t *link, bool verbose,
607 decoder_params *params, UNUSED void *data)
608 {
609 /* Bits 29-31 contain the block type */
610 enum agx_cdm_block_type block_type = (map[3] >> 5);
611
612 switch (block_type) {
613 case AGX_CDM_BLOCK_TYPE_LAUNCH: {
614 size_t length = AGX_CDM_LAUNCH_LENGTH;
615
616 #define CDM_PRINT(STRUCT_NAME, human) \
617 do { \
618 DUMP_CL(CDM_##STRUCT_NAME, map, human); \
619 map += AGX_CDM_##STRUCT_NAME##_LENGTH; \
620 length += AGX_CDM_##STRUCT_NAME##_LENGTH; \
621 } while (0);
622
623 agx_unpack(agxdecode_dump_stream, map, CDM_LAUNCH, hdr);
624 agxdecode_stateful(hdr.pipeline, "Pipeline", agxdecode_usc, verbose,
625 params, &hdr.sampler_state_register_count);
626 DUMP_UNPACKED(CDM_LAUNCH, hdr, "Compute\n");
627 map += AGX_CDM_LAUNCH_LENGTH;
628
629 /* Added in G14X */
630 if (params->gpu_generation >= 14 && params->num_clusters_total > 1)
631 CDM_PRINT(UNK_G14X, "Unknown G14X");
632
633 switch (hdr.mode) {
634 case AGX_CDM_MODE_DIRECT:
635 CDM_PRINT(GLOBAL_SIZE, "Global size");
636 CDM_PRINT(LOCAL_SIZE, "Local size");
637 break;
638 case AGX_CDM_MODE_INDIRECT_GLOBAL:
639 CDM_PRINT(INDIRECT, "Indirect buffer");
640 CDM_PRINT(LOCAL_SIZE, "Local size");
641 break;
642 case AGX_CDM_MODE_INDIRECT_LOCAL:
643 CDM_PRINT(INDIRECT, "Indirect buffer");
644 break;
645 default:
646 fprintf(agxdecode_dump_stream, "Unknown CDM mode: %u\n", hdr.mode);
647 break;
648 }
649
650 return length;
651 }
652
653 case AGX_CDM_BLOCK_TYPE_STREAM_LINK: {
654 agx_unpack(agxdecode_dump_stream, map, CDM_STREAM_LINK, hdr);
655 DUMP_UNPACKED(CDM_STREAM_LINK, hdr, "Stream Link\n");
656 *link = hdr.target_lo | (((uint64_t)hdr.target_hi) << 32);
657 return STATE_LINK;
658 }
659
660 case AGX_CDM_BLOCK_TYPE_STREAM_TERMINATE: {
661 DUMP_CL(CDM_STREAM_TERMINATE, map, "Stream Terminate");
662 return STATE_DONE;
663 }
664
665 case AGX_CDM_BLOCK_TYPE_BARRIER: {
666 DUMP_CL(CDM_BARRIER, map, "Barrier");
667 return AGX_CDM_BARRIER_LENGTH;
668 }
669
670 default:
671 fprintf(agxdecode_dump_stream, "Unknown CDM block type: %u\n",
672 block_type);
673 u_hexdump(agxdecode_dump_stream, map, 8, false);
674 return 8;
675 }
676 }
677
678 static unsigned
agxdecode_vdm(const uint8_t * map,uint64_t * link,bool verbose,decoder_params * params,UNUSED void * data)679 agxdecode_vdm(const uint8_t *map, uint64_t *link, bool verbose,
680 decoder_params *params, UNUSED void *data)
681 {
682 /* Bits 29-31 contain the block type */
683 enum agx_vdm_block_type block_type = (map[3] >> 5);
684
685 switch (block_type) {
686 case AGX_VDM_BLOCK_TYPE_BARRIER: {
687 agx_unpack(agxdecode_dump_stream, map, VDM_BARRIER, hdr);
688 DUMP_UNPACKED(VDM_BARRIER, hdr, "Barrier\n");
689 return hdr.returns ? STATE_RET : AGX_VDM_BARRIER_LENGTH;
690 }
691
692 case AGX_VDM_BLOCK_TYPE_PPP_STATE_UPDATE: {
693 agx_unpack(agxdecode_dump_stream, map, PPP_STATE, cmd);
694
695 uint64_t address = (((uint64_t)cmd.pointer_hi) << 32) | cmd.pointer_lo;
696
697 if (!lib_config.read_gpu_mem) {
698 struct agx_bo *mem = agxdecode_find_mapped_gpu_mem_containing(address);
699
700 if (!mem) {
701 DUMP_UNPACKED(PPP_STATE, cmd, "Non-existent record (XXX)\n");
702 return AGX_PPP_STATE_LENGTH;
703 }
704 }
705
706 agxdecode_record(address, cmd.size_words * 4, verbose, params);
707 return AGX_PPP_STATE_LENGTH;
708 }
709
710 case AGX_VDM_BLOCK_TYPE_VDM_STATE_UPDATE: {
711 size_t length = AGX_VDM_STATE_LENGTH;
712 agx_unpack(agxdecode_dump_stream, map, VDM_STATE, hdr);
713 map += AGX_VDM_STATE_LENGTH;
714
715 #define VDM_PRINT(header_name, STRUCT_NAME, human) \
716 if (hdr.header_name##_present) { \
717 DUMP_CL(VDM_STATE_##STRUCT_NAME, map, human); \
718 map += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH; \
719 length += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH; \
720 }
721
722 VDM_PRINT(restart_index, RESTART_INDEX, "Restart index");
723
724 /* If word 1 is present but word 0 is not, fallback to compact samplers */
725 enum agx_sampler_states sampler_states = 0;
726
727 if (hdr.vertex_shader_word_0_present) {
728 agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_0,
729 word_0);
730 sampler_states = word_0.sampler_state_register_count;
731 }
732
733 VDM_PRINT(vertex_shader_word_0, VERTEX_SHADER_WORD_0,
734 "Vertex shader word 0");
735
736 if (hdr.vertex_shader_word_1_present) {
737 agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_1,
738 word_1);
739 fprintf(agxdecode_dump_stream, "Pipeline %X\n",
740 (uint32_t)word_1.pipeline);
741 agxdecode_stateful(word_1.pipeline, "Pipeline", agxdecode_usc, verbose,
742 params, &sampler_states);
743 }
744
745 VDM_PRINT(vertex_shader_word_1, VERTEX_SHADER_WORD_1,
746 "Vertex shader word 1");
747 VDM_PRINT(vertex_outputs, VERTEX_OUTPUTS, "Vertex outputs");
748 VDM_PRINT(tessellation, TESSELLATION, "Tessellation");
749 VDM_PRINT(vertex_unknown, VERTEX_UNKNOWN, "Vertex unknown");
750 VDM_PRINT(tessellation_scale, TESSELLATION_SCALE, "Tessellation scale");
751
752 #undef VDM_PRINT
753 return hdr.tessellation_scale_present ? length : ALIGN_POT(length, 8);
754 }
755
756 case AGX_VDM_BLOCK_TYPE_INDEX_LIST: {
757 size_t length = AGX_INDEX_LIST_LENGTH;
758 agx_unpack(agxdecode_dump_stream, map, INDEX_LIST, hdr);
759 DUMP_UNPACKED(INDEX_LIST, hdr, "Index List\n");
760 map += AGX_INDEX_LIST_LENGTH;
761
762 #define IDX_PRINT(header_name, STRUCT_NAME, human) \
763 if (hdr.header_name##_present) { \
764 DUMP_CL(INDEX_LIST_##STRUCT_NAME, map, human); \
765 map += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH; \
766 length += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH; \
767 }
768
769 IDX_PRINT(index_buffer, BUFFER_LO, "Index buffer");
770 IDX_PRINT(index_count, COUNT, "Index count");
771 IDX_PRINT(instance_count, INSTANCES, "Instance count");
772 IDX_PRINT(start, START, "Start");
773 IDX_PRINT(indirect_buffer, INDIRECT_BUFFER, "Indirect buffer");
774 IDX_PRINT(index_buffer_size, BUFFER_SIZE, "Index buffer size");
775
776 #undef IDX_PRINT
777 return length;
778 }
779
780 case AGX_VDM_BLOCK_TYPE_STREAM_LINK: {
781 agx_unpack(agxdecode_dump_stream, map, VDM_STREAM_LINK, hdr);
782 DUMP_UNPACKED(VDM_STREAM_LINK, hdr, "Stream Link\n");
783 *link = hdr.target_lo | (((uint64_t)hdr.target_hi) << 32);
784 return hdr.with_return ? STATE_CALL : STATE_LINK;
785 }
786
787 case AGX_VDM_BLOCK_TYPE_STREAM_TERMINATE: {
788 DUMP_CL(VDM_STREAM_TERMINATE, map, "Stream Terminate");
789 return STATE_DONE;
790 }
791
792 case AGX_VDM_BLOCK_TYPE_TESSELLATE: {
793 size_t length = AGX_VDM_TESSELLATE_LENGTH;
794 agx_unpack(agxdecode_dump_stream, map, VDM_TESSELLATE, hdr);
795 DUMP_UNPACKED(VDM_TESSELLATE, hdr, "Tessellate List\n");
796 map += AGX_VDM_TESSELLATE_LENGTH;
797
798 #define TESS_PRINT(header_name, STRUCT_NAME, human) \
799 if (hdr.header_name##_present) { \
800 DUMP_CL(VDM_TESSELLATE_##STRUCT_NAME, map, human); \
801 map += AGX_VDM_TESSELLATE_##STRUCT_NAME##_LENGTH; \
802 length += AGX_VDM_TESSELLATE_##STRUCT_NAME##_LENGTH; \
803 }
804
805 TESS_PRINT(factor_buffer, FACTOR_BUFFER, "Factor buffer");
806 TESS_PRINT(patch_count, PATCH_COUNT, "Patch");
807 TESS_PRINT(instance_count, INSTANCE_COUNT, "Instance count");
808 TESS_PRINT(base_patch, BASE_PATCH, "Base patch");
809 TESS_PRINT(base_instance, BASE_INSTANCE, "Base instance");
810 TESS_PRINT(instance_stride, INSTANCE_STRIDE, "Instance stride");
811 TESS_PRINT(indirect, INDIRECT, "Indirect");
812 TESS_PRINT(unknown, UNKNOWN, "Unknown");
813
814 #undef TESS_PRINT
815 return length;
816 }
817
818 default:
819 fprintf(agxdecode_dump_stream, "Unknown VDM block type: %u\n",
820 block_type);
821 u_hexdump(agxdecode_dump_stream, map, 8, false);
822 return 8;
823 }
824 }
825
826 static void
agxdecode_cs(uint32_t * cmdbuf,uint64_t encoder,bool verbose,decoder_params * params)827 agxdecode_cs(uint32_t *cmdbuf, uint64_t encoder, bool verbose,
828 decoder_params *params)
829 {
830 agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_COMPUTE, cs);
831 DUMP_UNPACKED(IOGPU_COMPUTE, cs, "Compute\n");
832
833 agxdecode_stateful(encoder, "Encoder", agxdecode_cdm, verbose, params, NULL);
834
835 fprintf(agxdecode_dump_stream, "Context switch program:\n");
836 uint8_t buf[1024];
837 agx_disassemble(buf,
838 agxdecode_fetch_gpu_array(cs.context_switch_program, buf),
839 agxdecode_dump_stream);
840 }
841
842 static void
agxdecode_gfx(uint32_t * cmdbuf,uint64_t encoder,bool verbose,decoder_params * params)843 agxdecode_gfx(uint32_t *cmdbuf, uint64_t encoder, bool verbose,
844 decoder_params *params)
845 {
846 agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_GRAPHICS, gfx);
847 DUMP_UNPACKED(IOGPU_GRAPHICS, gfx, "Graphics\n");
848
849 agxdecode_stateful(encoder, "Encoder", agxdecode_vdm, verbose, params, NULL);
850
851 if (gfx.clear_pipeline_unk) {
852 fprintf(agxdecode_dump_stream, "Unk: %X\n", gfx.clear_pipeline_unk);
853 agxdecode_stateful(gfx.clear_pipeline, "Clear pipeline", agxdecode_usc,
854 verbose, params, NULL);
855 }
856
857 if (gfx.store_pipeline_unk) {
858 assert(gfx.store_pipeline_unk == 0x4);
859 agxdecode_stateful(gfx.store_pipeline, "Store pipeline", agxdecode_usc,
860 verbose, params, NULL);
861 }
862
863 assert((gfx.partial_reload_pipeline_unk & 0xF) == 0x4);
864 if (gfx.partial_reload_pipeline) {
865 agxdecode_stateful(gfx.partial_reload_pipeline, "Partial reload pipeline",
866 agxdecode_usc, verbose, params, NULL);
867 }
868
869 if (gfx.partial_store_pipeline) {
870 agxdecode_stateful(gfx.partial_store_pipeline, "Partial store pipeline",
871 agxdecode_usc, verbose, params, NULL);
872 }
873 }
874
875 static void
chip_id_to_params(decoder_params * params,uint32_t chip_id)876 chip_id_to_params(decoder_params *params, uint32_t chip_id)
877 {
878 switch (chip_id) {
879 case 0x6000 ... 0x6002:
880 *params = (decoder_params){
881 .gpu_generation = 13,
882 .gpu_variant = "SCD"[chip_id & 15],
883 .chip_id = chip_id,
884 .num_clusters_total = 2 << (chip_id & 15),
885 };
886 break;
887 case 0x6020 ... 0x6022:
888 *params = (decoder_params){
889 .gpu_generation = 14,
890 .gpu_variant = "SCD"[chip_id & 15],
891 .chip_id = chip_id,
892 .num_clusters_total = 2 << (chip_id & 15),
893 };
894 break;
895 case 0x8112:
896 *params = (decoder_params){
897 .gpu_generation = 14,
898 .gpu_variant = 'G',
899 .chip_id = chip_id,
900 .num_clusters_total = 1,
901 };
902 break;
903 case 0x8103:
904 default:
905 *params = (decoder_params){
906 .gpu_generation = 13,
907 .gpu_variant = 'G',
908 .chip_id = chip_id,
909 .num_clusters_total = 1,
910 };
911 break;
912 }
913 }
914
915 #ifdef __APPLE__
916
917 void
agxdecode_cmdstream(unsigned cmdbuf_handle,unsigned map_handle,bool verbose)918 agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose)
919 {
920 agxdecode_dump_file_open();
921
922 struct agx_bo *cmdbuf =
923 agxdecode_find_handle(cmdbuf_handle, AGX_ALLOC_CMDBUF);
924 struct agx_bo *map = agxdecode_find_handle(map_handle, AGX_ALLOC_MEMMAP);
925 assert(cmdbuf != NULL && "nonexistent command buffer");
926 assert(map != NULL && "nonexistent mapping");
927
928 /* Before decoding anything, validate the map. Set bo->mapped fields */
929 agxdecode_decode_segment_list(map->ptr.cpu);
930
931 /* Print the IOGPU stuff */
932 agx_unpack(agxdecode_dump_stream, cmdbuf->ptr.cpu, IOGPU_HEADER, cmd);
933 DUMP_UNPACKED(IOGPU_HEADER, cmd, "IOGPU Header\n");
934
935 DUMP_CL(IOGPU_ATTACHMENT_COUNT,
936 ((uint8_t *)cmdbuf->ptr.cpu + cmd.attachment_offset),
937 "Attachment count");
938
939 uint32_t *attachments =
940 (uint32_t *)((uint8_t *)cmdbuf->ptr.cpu + cmd.attachment_offset);
941 unsigned attachment_count = attachments[3];
942 for (unsigned i = 0; i < attachment_count; ++i) {
943 uint32_t *ptr = attachments + 4 + (i * AGX_IOGPU_ATTACHMENT_LENGTH / 4);
944 DUMP_CL(IOGPU_ATTACHMENT, ptr, "Attachment");
945 }
946
947 struct drm_asahi_params_global params;
948
949 chip_id_to_params(¶ms, 0x8103);
950
951 if (cmd.unk_5 == 3)
952 agxdecode_cs((uint32_t *)cmdbuf->ptr.cpu, cmd.encoder, verbose, ¶ms);
953 else
954 agxdecode_gfx((uint32_t *)cmdbuf->ptr.cpu, cmd.encoder, verbose, ¶ms);
955
956 agxdecode_map_read_write();
957 }
958
959 void
agxdecode_dump_mappings(unsigned map_handle)960 agxdecode_dump_mappings(unsigned map_handle)
961 {
962 agxdecode_dump_file_open();
963
964 struct agx_bo *map = agxdecode_find_handle(map_handle, AGX_ALLOC_MEMMAP);
965 assert(map != NULL && "nonexistent mapping");
966 agxdecode_decode_segment_list(map->ptr.cpu);
967
968 for (unsigned i = 0; i < mmap_count; ++i) {
969 if (!mmap_array[i].ptr.cpu || !mmap_array[i].size ||
970 !mmap_array[i].mapped)
971 continue;
972
973 assert(mmap_array[i].type < AGX_NUM_ALLOC);
974
975 fprintf(agxdecode_dump_stream,
976 "Buffer: type %s, gpu %" PRIx64 ", handle %u.bin:\n\n",
977 agx_alloc_types[mmap_array[i].type], mmap_array[i].ptr.gpu,
978 mmap_array[i].handle);
979
980 u_hexdump(agxdecode_dump_stream, mmap_array[i].ptr.cpu,
981 mmap_array[i].size, false);
982 fprintf(agxdecode_dump_stream, "\n");
983 }
984 }
985
986 #endif
987
988 void
agxdecode_track_alloc(struct agx_bo * alloc)989 agxdecode_track_alloc(struct agx_bo *alloc)
990 {
991 assert((mmap_count + 1) < MAX_MAPPINGS);
992
993 for (unsigned i = 0; i < mmap_count; ++i) {
994 struct agx_bo *bo = &mmap_array[i];
995 bool match = (bo->handle == alloc->handle && bo->type == alloc->type);
996 assert(!match && "tried to alloc already allocated BO");
997 }
998
999 mmap_array[mmap_count++] = *alloc;
1000 }
1001
1002 void
agxdecode_track_free(struct agx_bo * bo)1003 agxdecode_track_free(struct agx_bo *bo)
1004 {
1005 bool found = false;
1006
1007 for (unsigned i = 0; i < mmap_count; ++i) {
1008 if (mmap_array[i].handle == bo->handle &&
1009 (mmap_array[i].type == AGX_ALLOC_REGULAR) ==
1010 (bo->type == AGX_ALLOC_REGULAR)) {
1011 assert(!found && "mapped multiple times!");
1012 found = true;
1013
1014 memset(&mmap_array[i], 0, sizeof(mmap_array[i]));
1015 }
1016 }
1017
1018 assert(found && "freed unmapped memory");
1019 }
1020
1021 static int agxdecode_dump_frame_count = 0;
1022
1023 void
agxdecode_dump_file_open(void)1024 agxdecode_dump_file_open(void)
1025 {
1026 if (agxdecode_dump_stream)
1027 return;
1028
1029 /* This does a getenv every frame, so it is possible to use
1030 * setenv to change the base at runtime.
1031 */
1032 const char *dump_file_base =
1033 getenv("AGXDECODE_DUMP_FILE") ?: "agxdecode.dump";
1034 if (!strcmp(dump_file_base, "stderr"))
1035 agxdecode_dump_stream = stderr;
1036 else {
1037 char buffer[1024];
1038 snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base,
1039 agxdecode_dump_frame_count);
1040 printf("agxdecode: dump command stream to file %s\n", buffer);
1041 agxdecode_dump_stream = fopen(buffer, "w");
1042 if (!agxdecode_dump_stream) {
1043 fprintf(stderr,
1044 "agxdecode: failed to open command stream log file %s\n",
1045 buffer);
1046 }
1047 }
1048 }
1049
1050 static void
agxdecode_dump_file_close(void)1051 agxdecode_dump_file_close(void)
1052 {
1053 if (agxdecode_dump_stream && agxdecode_dump_stream != stderr) {
1054 fclose(agxdecode_dump_stream);
1055 agxdecode_dump_stream = NULL;
1056 }
1057 }
1058
1059 void
agxdecode_next_frame(void)1060 agxdecode_next_frame(void)
1061 {
1062 agxdecode_dump_file_close();
1063 agxdecode_dump_frame_count++;
1064 }
1065
1066 void
agxdecode_close(void)1067 agxdecode_close(void)
1068 {
1069 agxdecode_dump_file_close();
1070 }
1071
1072 static ssize_t
libagxdecode_writer(void * cookie,const char * buffer,size_t size)1073 libagxdecode_writer(void *cookie, const char *buffer, size_t size)
1074 {
1075 return lib_config.stream_write(buffer, size);
1076 }
1077
1078 #ifdef _GNU_SOURCE
1079 static cookie_io_functions_t funcs = {.write = libagxdecode_writer};
1080 #endif
1081
1082 static decoder_params lib_params;
1083
1084 void
libagxdecode_init(struct libagxdecode_config * config)1085 libagxdecode_init(struct libagxdecode_config *config)
1086 {
1087 #ifdef _GNU_SOURCE
1088 lib_config = *config;
1089 agxdecode_dump_stream = fopencookie(NULL, "w", funcs);
1090
1091 chip_id_to_params(&lib_params, config->chip_id);
1092 #else
1093 /* fopencookie is a glibc extension */
1094 unreachable("libagxdecode only available with glibc");
1095 #endif
1096 }
1097
1098 void
libagxdecode_vdm(uint64_t addr,const char * label,bool verbose)1099 libagxdecode_vdm(uint64_t addr, const char *label, bool verbose)
1100 {
1101 agxdecode_stateful(addr, label, agxdecode_vdm, verbose, &lib_params, NULL);
1102 }
1103
1104 void
libagxdecode_cdm(uint64_t addr,const char * label,bool verbose)1105 libagxdecode_cdm(uint64_t addr, const char *label, bool verbose)
1106 {
1107 agxdecode_stateful(addr, label, agxdecode_cdm, verbose, &lib_params, NULL);
1108 }
1109 void
libagxdecode_usc(uint64_t addr,const char * label,bool verbose)1110 libagxdecode_usc(uint64_t addr, const char *label, bool verbose)
1111 {
1112 agxdecode_stateful(addr, label, agxdecode_usc, verbose, &lib_params, NULL);
1113 }
1114 void
libagxdecode_shutdown(void)1115 libagxdecode_shutdown(void)
1116 {
1117 agxdecode_dump_file_close();
1118 }
1119