1 /*
2 * Copyright © 2022 Igalia S.L.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <assert.h>
7 #include <err.h>
8 #include <getopt.h>
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/types.h>
14
15 #include "redump.h"
16
17 #include "util/u_math.h"
18
19 #include "adreno_common.xml.h"
20 #include "adreno_pm4.xml.h"
21 #include "freedreno_pm4.h"
22
23 #include "a6xx.xml.h"
24
25 #include "ir3/ir3_assembler.h"
26 #include "ir3/ir3_compiler.h"
27 #include "ir3/ir3_shader.h"
28
29 #include "util/list.h"
30 #include "util/vma.h"
31
32 struct cmdstream {
33 struct list_head link;
34
35 uint32_t *mem;
36 uint32_t total_size;
37 uint32_t cur;
38
39 uint64_t iova;
40 };
41
42 static uint64_t
cs_get_cur_iova(struct cmdstream * cs)43 cs_get_cur_iova(struct cmdstream *cs)
44 {
45 return cs->iova + cs->cur * sizeof(uint32_t);
46 }
47
48 struct wrbuf {
49 struct list_head link;
50
51 uint64_t iova;
52 uint64_t size;
53 const char *name;
54 };
55
56 struct replay_context {
57 void *mem_ctx;
58
59 struct util_vma_heap vma;
60
61 struct cmdstream *submit_cs;
62 struct cmdstream *state_cs;
63 struct cmdstream *shader_cs;
64
65 struct cmdstream *shader_log;
66 struct cmdstream *cp_log;
67
68 struct list_head cs_list;
69
70 struct list_head wrbuf_list;
71
72 struct ir3_compiler *compiler;
73
74 struct hash_table_u64 *compiled_shaders;
75
76 const char *output_name;
77 };
78
79 static void
pkt(struct cmdstream * cs,uint32_t payload)80 pkt(struct cmdstream *cs, uint32_t payload)
81 {
82 assert(cs->cur <= cs->total_size);
83 cs->mem[cs->cur++] = payload;
84 }
85
86 static void
pkt_qw(struct cmdstream * cs,uint64_t payload)87 pkt_qw(struct cmdstream *cs, uint64_t payload)
88 {
89 pkt(cs, payload);
90 pkt(cs, payload >> 32);
91 }
92
93 static uint64_t
pkt_blob(struct cmdstream * cs,void * payload,uint32_t size,uint32_t alignment)94 pkt_blob(struct cmdstream *cs, void *payload, uint32_t size, uint32_t alignment)
95 {
96 cs->cur = align(cs->cur, alignment / sizeof(uint32_t));
97 uint64_t start_iova = cs_get_cur_iova(cs);
98
99 memcpy(cs->mem + cs->cur, payload, size);
100 cs->cur += size;
101
102 return start_iova;
103 }
104
105 static void
pkt4(struct cmdstream * cs,uint16_t regindx,uint16_t cnt,uint32_t payload)106 pkt4(struct cmdstream *cs, uint16_t regindx, uint16_t cnt, uint32_t payload)
107 {
108 pkt(cs, pm4_pkt4_hdr(regindx, cnt));
109 pkt(cs, payload);
110 }
111
112 static void
pkt7(struct cmdstream * cs,uint8_t opcode,uint16_t cnt)113 pkt7(struct cmdstream *cs, uint8_t opcode, uint16_t cnt)
114 {
115 pkt(cs, pm4_pkt7_hdr(opcode, cnt));
116 }
117
118 struct rd_section {
119 uint32_t type;
120 uint32_t size;
121 };
122
123 static struct cmdstream *
cs_alloc(struct replay_context * ctx,uint32_t size)124 cs_alloc(struct replay_context *ctx, uint32_t size)
125 {
126 struct cmdstream *cs = (struct cmdstream *) calloc(1, sizeof(struct cmdstream));
127 cs->mem = (uint32_t *)calloc(1, size);
128 cs->total_size = size / sizeof(uint32_t);
129 cs->cur = 0;
130 cs->iova = util_vma_heap_alloc(&ctx->vma, size, 4096);
131
132 assert(cs->iova != 0);
133
134 list_addtail(&cs->link, &ctx->cs_list);
135
136 return cs;
137 }
138
139 static void
rd_write_gpu_addr_section(FILE * out,struct cmdstream * cs,enum rd_sect_type section)140 rd_write_gpu_addr_section(FILE *out, struct cmdstream *cs, enum rd_sect_type section)
141 {
142 const uint32_t packet[] = {(uint32_t)cs->iova,
143 (uint32_t)(cs->cur * sizeof(uint32_t)),
144 (uint32_t)(cs->iova >> 32)};
145 struct rd_section section_address = {.type = section,
146 .size = sizeof(packet)};
147 fwrite(§ion_address, sizeof(section_address), 1, out);
148 fwrite(packet, sizeof(packet), 1, out);
149 }
150
151 static void
rd_write_cs_buffer(FILE * out,struct cmdstream * cs)152 rd_write_cs_buffer(FILE *out, struct cmdstream *cs)
153 {
154 if (cs->cur == 0)
155 return;
156
157 rd_write_gpu_addr_section(out, cs, RD_GPUADDR);
158
159 struct rd_section section_contents = {.type = RD_BUFFER_CONTENTS,
160 .size = uint32_t(cs->cur * sizeof(uint32_t))};
161
162 fwrite(§ion_contents, sizeof(section_contents), 1, out);
163 fwrite(cs->mem, sizeof(uint32_t), cs->cur, out);
164 }
165
166 static void
rd_write_cs_submit(FILE * out,struct cmdstream * cs)167 rd_write_cs_submit(FILE *out, struct cmdstream *cs)
168 {
169 const uint32_t packet[] = {(uint32_t)cs->iova, cs->cur,
170 (uint32_t)(cs->iova >> 32)};
171 struct rd_section section_cmdstream = {.type = RD_CMDSTREAM_ADDR,
172 .size = sizeof(packet)};
173
174 fwrite(§ion_cmdstream, sizeof(section_cmdstream), 1, out);
175 fwrite(packet, sizeof(packet), 1, out);
176 }
177
178 static void
rd_write_wrbuffer(FILE * out,struct wrbuf * wrbuf)179 rd_write_wrbuffer(FILE *out, struct wrbuf *wrbuf)
180 {
181 uint32_t name_len = strlen(wrbuf->name) + 1;
182 struct rd_section section = {.type = RD_WRBUFFER,
183 .size = (uint32_t)(sizeof(uint32_t) * 2) + name_len};
184 fwrite(§ion, sizeof(section), 1, out);
185 fwrite(&wrbuf->iova, sizeof(uint64_t), 1, out);
186 fwrite(&wrbuf->size, sizeof(uint64_t), 1, out);
187 fwrite(wrbuf->name, sizeof(char), name_len, out);
188 }
189
190 static void
print_usage(const char * name)191 print_usage(const char *name)
192 {
193 /* clang-format off */
194 fprintf(stderr, "Usage:\n\n"
195 "\t%s [OPTSIONS]... FILE...\n\n"
196 "Options:\n"
197 "\t --vastart=offset\n"
198 "\t --vasize=size\n"
199 "\t-h, --help - show this message\n"
200 , name);
201 /* clang-format on */
202 exit(2);
203 }
204
205 #define OPT_VA_START 1000
206 #define OPT_VA_SIZE 1001
207
208 /* clang-format off */
209 static const struct option opts[] = {
210 { "vastart", required_argument, 0, OPT_VA_START },
211 { "vasize", required_argument, 0, OPT_VA_SIZE },
212 { "help", no_argument, 0, 'h' },
213 };
214 /* clang-format on */
215
216 static void
replay_context_init(struct replay_context * ctx,struct fd_dev_id * dev_id,int argc,char ** argv)217 replay_context_init(struct replay_context *ctx, struct fd_dev_id *dev_id,
218 int argc, char **argv)
219 {
220 uint64_t va_start = 0;
221 uint64_t va_size = 0;
222
223 int c;
224 while ((c = getopt_long(argc, argv, "h", opts, NULL)) != -1) {
225 switch (c) {
226 case OPT_VA_START:
227 va_start = strtoull(optarg, NULL, 0);
228 break;
229 case OPT_VA_SIZE:
230 va_size = strtoull(optarg, NULL, 0);
231 break;
232 case 'h':
233 default:
234 print_usage(argv[0]);
235 }
236 }
237
238 if (optind < argc) {
239 ctx->output_name = argv[optind];
240 } else {
241 }
242
243 if (!va_start || !va_size || !ctx->output_name) {
244 print_usage(argv[0]);
245 exit(1);
246 }
247
248 ctx->mem_ctx = ralloc_context(NULL);
249 list_inithead(&ctx->cs_list);
250 list_inithead(&ctx->wrbuf_list);
251
252 util_vma_heap_init(&ctx->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
253
254 ctx->submit_cs = cs_alloc(ctx, 1024 * 1024);
255 ctx->state_cs = cs_alloc(ctx, 2 * 1024 * 1024);
256 ctx->shader_cs = cs_alloc(ctx, 8 * 1024 * 1024);
257
258 ctx->shader_log = cs_alloc(ctx, 1024 * 1024);
259 ctx->shader_log->mem[0] = (ctx->shader_log->iova & 0xffffffff) + sizeof(uint64_t);
260 ctx->shader_log->mem[1] = ctx->shader_log->iova >> 32;
261 ctx->shader_log->cur = ctx->shader_log->total_size;
262
263 ctx->cp_log = cs_alloc(ctx, 8 * 1024 * 1024);
264 ((uint64_t *)ctx->cp_log->mem)[0] = ctx->cp_log->iova + 2 * sizeof(uint64_t);
265 ((uint64_t *)ctx->cp_log->mem)[1] = sizeof(uint64_t);
266 ctx->cp_log->cur = ctx->cp_log->total_size;
267
268 struct ir3_compiler_options options{};
269 ctx->compiler =
270 ir3_compiler_create(NULL, dev_id, fd_dev_info_raw(dev_id), &options);
271 ctx->compiled_shaders = _mesa_hash_table_u64_create(ctx->mem_ctx);
272 }
273
274 static void
replay_context_finish(struct replay_context * ctx)275 replay_context_finish(struct replay_context *ctx)
276 {
277 FILE *out = fopen(ctx->output_name, "w");
278 if (!out) {
279 errx(1, "Cannot open '%s' for writing\n", ctx->output_name);
280 }
281
282 static const uint32_t gpu_id = 660;
283 struct rd_section section_gpu_id = {.type = RD_GPU_ID,
284 .size = 1 * sizeof(uint32_t)};
285 fwrite(§ion_gpu_id, sizeof(section_gpu_id), 1, out);
286 fwrite(&gpu_id, sizeof(uint32_t), 1, out);
287
288 rd_write_gpu_addr_section(out, ctx->shader_log, RD_SHADER_LOG_BUFFER);
289 rd_write_gpu_addr_section(out, ctx->cp_log, RD_CP_LOG_BUFFER);
290
291 list_for_each_entry (struct cmdstream, cs, &ctx->cs_list, link) {
292 rd_write_cs_buffer(out, cs);
293 }
294 rd_write_cs_submit(out, ctx->submit_cs);
295
296 list_for_each_entry (struct wrbuf, wrbuf, &ctx->wrbuf_list, link) {
297 rd_write_wrbuffer(out, wrbuf);
298 }
299
300 fclose(out);
301 }
302
303 static void
upload_shader(struct replay_context * ctx,uint64_t id,const char * source)304 upload_shader(struct replay_context *ctx, uint64_t id, const char *source)
305 {
306 FILE *in = fmemopen((void *)source, strlen(source), "r");
307
308 struct ir3_kernel_info info = {
309 .shader_print_buffer_iova = ctx->shader_log->iova,
310 };
311 struct ir3_shader *shader = ir3_parse_asm(ctx->compiler, &info, in);
312 assert(shader);
313
314 fclose(in);
315
316 uint64_t *shader_iova = ralloc(ctx->mem_ctx, uint64_t);
317 *shader_iova = pkt_blob(ctx->shader_cs, shader->variants->bin,
318 shader->variants->info.size, 128);
319 ralloc_free(shader);
320
321 _mesa_hash_table_u64_insert(ctx->compiled_shaders, id, shader_iova);
322 }
323
324 static void
emit_shader_iova(struct replay_context * ctx,struct cmdstream * cs,uint64_t id)325 emit_shader_iova(struct replay_context *ctx, struct cmdstream *cs, uint64_t id)
326 {
327 uint64_t *shader_iova = (uint64_t *)
328 _mesa_hash_table_u64_search(ctx->compiled_shaders, id);
329 pkt_qw(cs, *shader_iova);
330 }
331
332 #define begin_draw_state() \
333 uint64_t subcs_iova_start = cs_get_cur_iova(ctx.state_cs); \
334 struct cmdstream *prev_cs = cs; \
335 struct cmdstream *cs = ctx.state_cs;
336
337 #define end_draw_state(params) \
338 uint64_t subcs_iova_end = cs_get_cur_iova(ctx.state_cs); \
339 uint32_t subcs_size = \
340 (subcs_iova_end - subcs_iova_start) / sizeof(uint32_t); \
341 pkt7(prev_cs, CP_SET_DRAW_STATE, 3); \
342 pkt(prev_cs, (params) | subcs_size); \
343 pkt_qw(prev_cs, subcs_iova_start);
344
345 #define begin_ib() \
346 struct cmdstream *prev_cs = cs; \
347 struct cmdstream *cs = cs_alloc(&ctx, 1024 * 1024);
348
349 #define end_ib() \
350 uint64_t ibcs_size = cs->cur; \
351 pkt7(prev_cs, CP_INDIRECT_BUFFER, 3); \
352 pkt_qw(prev_cs, cs->iova); \
353 pkt(prev_cs, ibcs_size);
354
355 static void
gpu_print(struct replay_context * ctx,struct cmdstream * _cs,uint64_t iova,uint32_t dwords)356 gpu_print(struct replay_context *ctx, struct cmdstream *_cs, uint64_t iova,
357 uint32_t dwords)
358 {
359 uint64_t header_iova, body_iova;
360 struct cmdstream *prev_cs = _cs;
361 struct cmdstream *cs = cs_alloc(ctx, 4096);
362 /* Commands that are being modified should be in a separate cmdstream,
363 * otherwise they would be prefetched and writes would not be visible.
364 */
365 {
366 /* Write size into entry's header */
367 pkt7(cs, CP_MEM_WRITE, 4);
368 header_iova = cs_get_cur_iova(cs);
369 pkt_qw(cs, 0xdeadbeef);
370 uint64_t size_iova = cs_get_cur_iova(cs);
371 pkt(cs, dwords * 4);
372 pkt(cs, 0);
373
374 /* Copy the data into entry's body */
375 pkt7(cs, CP_MEMCPY, 5);
376 pkt(cs, dwords);
377 pkt_qw(cs, iova);
378 body_iova = cs_get_cur_iova(cs);
379 pkt_qw(cs, 0xdeadbeef);
380
381 /* iova = iova + body_size + header_size */
382 pkt7(cs, CP_MEM_TO_MEM, 9);
383 pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES);
384 pkt_qw(cs, ctx->cp_log->iova);
385 pkt_qw(cs, ctx->cp_log->iova);
386 pkt_qw(cs, size_iova);
387 pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t));
388 }
389
390 {
391 struct cmdstream *cs = prev_cs;
392 pkt7(cs, CP_MEM_TO_MEM, 5);
393 pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES);
394 pkt_qw(cs, header_iova);
395 pkt_qw(cs, ctx->cp_log->iova);
396
397 pkt7(cs, CP_MEM_TO_MEM, 7);
398 pkt(cs, CP_MEM_TO_MEM_0_DOUBLE);
399 pkt_qw(cs, body_iova);
400 pkt_qw(cs, ctx->cp_log->iova);
401 pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t));
402
403 pkt7(cs, CP_WAIT_MEM_WRITES, 0);
404 pkt7(cs, CP_WAIT_FOR_ME, 0);
405 }
406
407 end_ib();
408 }
409
410 static void
gpu_read_into_file(struct replay_context * ctx,struct cmdstream * _cs,uint64_t iova,uint64_t size,const char * name)411 gpu_read_into_file(struct replay_context *ctx, struct cmdstream *_cs,
412 uint64_t iova, uint64_t size, const char *name)
413 {
414 struct wrbuf *wrbuf = (struct wrbuf *) calloc(1, sizeof(struct wrbuf));
415 wrbuf->iova = iova;
416 wrbuf->size = size;
417 wrbuf->name = strdup(name);
418
419 assert(wrbuf->iova != 0);
420
421 list_addtail(&wrbuf->link, &ctx->wrbuf_list);
422 }