• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Igalia S.L.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <assert.h>
7 #include <err.h>
8 #include <getopt.h>
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/types.h>
14 
15 #include "redump.h"
16 
17 #include "util/u_math.h"
18 
19 #include "adreno_common.xml.h"
20 #include "adreno_pm4.xml.h"
21 #include "freedreno_pm4.h"
22 
23 #include "a6xx.xml.h"
24 
25 #include "ir3/ir3_assembler.h"
26 #include "ir3/ir3_compiler.h"
27 #include "ir3/ir3_shader.h"
28 
29 #include "util/list.h"
30 #include "util/vma.h"
31 
32 struct cmdstream {
33    struct list_head link;
34 
35    uint32_t *mem;
36    uint32_t total_size;
37    uint32_t cur;
38 
39    uint64_t iova;
40 };
41 
42 static uint64_t
cs_get_cur_iova(struct cmdstream * cs)43 cs_get_cur_iova(struct cmdstream *cs)
44 {
45    return cs->iova + cs->cur * sizeof(uint32_t);
46 }
47 
48 struct wrbuf {
49    struct list_head link;
50 
51    uint64_t iova;
52    uint64_t size;
53    const char *name;
54 };
55 
56 struct replay_context {
57    void *mem_ctx;
58 
59    struct util_vma_heap vma;
60 
61    struct cmdstream *submit_cs;
62    struct cmdstream *state_cs;
63    struct cmdstream *shader_cs;
64 
65    struct cmdstream *shader_log;
66    struct cmdstream *cp_log;
67 
68    struct list_head cs_list;
69 
70    struct list_head wrbuf_list;
71 
72    struct ir3_compiler *compiler;
73 
74    struct hash_table_u64 *compiled_shaders;
75 
76    const char *output_name;
77 };
78 
79 static void
pkt(struct cmdstream * cs,uint32_t payload)80 pkt(struct cmdstream *cs, uint32_t payload)
81 {
82    assert(cs->cur <= cs->total_size);
83    cs->mem[cs->cur++] = payload;
84 }
85 
86 static void
pkt_qw(struct cmdstream * cs,uint64_t payload)87 pkt_qw(struct cmdstream *cs, uint64_t payload)
88 {
89    pkt(cs, payload);
90    pkt(cs, payload >> 32);
91 }
92 
93 static uint64_t
pkt_blob(struct cmdstream * cs,void * payload,uint32_t size,uint32_t alignment)94 pkt_blob(struct cmdstream *cs, void *payload, uint32_t size, uint32_t alignment)
95 {
96    cs->cur = align(cs->cur, alignment / sizeof(uint32_t));
97    uint64_t start_iova = cs_get_cur_iova(cs);
98 
99    memcpy(cs->mem + cs->cur, payload, size);
100    cs->cur += size;
101 
102    return start_iova;
103 }
104 
105 static void
pkt4(struct cmdstream * cs,uint16_t regindx,uint16_t cnt,uint32_t payload)106 pkt4(struct cmdstream *cs, uint16_t regindx, uint16_t cnt, uint32_t payload)
107 {
108    pkt(cs, pm4_pkt4_hdr(regindx, cnt));
109    pkt(cs, payload);
110 }
111 
112 static void
pkt7(struct cmdstream * cs,uint8_t opcode,uint16_t cnt)113 pkt7(struct cmdstream *cs, uint8_t opcode, uint16_t cnt)
114 {
115    pkt(cs, pm4_pkt7_hdr(opcode, cnt));
116 }
117 
118 struct rd_section {
119    uint32_t type;
120    uint32_t size;
121 };
122 
123 static struct cmdstream *
cs_alloc(struct replay_context * ctx,uint32_t size)124 cs_alloc(struct replay_context *ctx, uint32_t size)
125 {
126    struct cmdstream *cs = (struct cmdstream *) calloc(1, sizeof(struct cmdstream));
127    cs->mem = (uint32_t *)calloc(1, size);
128    cs->total_size = size / sizeof(uint32_t);
129    cs->cur = 0;
130    cs->iova = util_vma_heap_alloc(&ctx->vma, size, 4096);
131 
132    assert(cs->iova != 0);
133 
134    list_addtail(&cs->link, &ctx->cs_list);
135 
136    return cs;
137 }
138 
139 static void
rd_write_gpu_addr_section(FILE * out,struct cmdstream * cs,enum rd_sect_type section)140 rd_write_gpu_addr_section(FILE *out, struct cmdstream *cs, enum rd_sect_type section)
141 {
142    const uint32_t packet[] = {(uint32_t)cs->iova,
143                               (uint32_t)(cs->cur * sizeof(uint32_t)),
144                               (uint32_t)(cs->iova >> 32)};
145    struct rd_section section_address = {.type = section,
146                                         .size = sizeof(packet)};
147    fwrite(&section_address, sizeof(section_address), 1, out);
148    fwrite(packet, sizeof(packet), 1, out);
149 }
150 
151 static void
rd_write_cs_buffer(FILE * out,struct cmdstream * cs)152 rd_write_cs_buffer(FILE *out, struct cmdstream *cs)
153 {
154    if (cs->cur == 0)
155       return;
156 
157    rd_write_gpu_addr_section(out, cs, RD_GPUADDR);
158 
159    struct rd_section section_contents = {.type = RD_BUFFER_CONTENTS,
160                                          .size = uint32_t(cs->cur * sizeof(uint32_t))};
161 
162    fwrite(&section_contents, sizeof(section_contents), 1, out);
163    fwrite(cs->mem, sizeof(uint32_t), cs->cur, out);
164 }
165 
166 static void
rd_write_cs_submit(FILE * out,struct cmdstream * cs)167 rd_write_cs_submit(FILE *out, struct cmdstream *cs)
168 {
169    const uint32_t packet[] = {(uint32_t)cs->iova, cs->cur,
170                               (uint32_t)(cs->iova >> 32)};
171    struct rd_section section_cmdstream = {.type = RD_CMDSTREAM_ADDR,
172                                           .size = sizeof(packet)};
173 
174    fwrite(&section_cmdstream, sizeof(section_cmdstream), 1, out);
175    fwrite(packet, sizeof(packet), 1, out);
176 }
177 
178 static void
rd_write_wrbuffer(FILE * out,struct wrbuf * wrbuf)179 rd_write_wrbuffer(FILE *out, struct wrbuf *wrbuf)
180 {
181    uint32_t name_len = strlen(wrbuf->name) + 1;
182    struct rd_section section = {.type = RD_WRBUFFER,
183                                 .size = (uint32_t)(sizeof(uint32_t) * 2) + name_len};
184    fwrite(&section, sizeof(section), 1, out);
185    fwrite(&wrbuf->iova, sizeof(uint64_t), 1, out);
186    fwrite(&wrbuf->size, sizeof(uint64_t), 1, out);
187    fwrite(wrbuf->name, sizeof(char), name_len, out);
188 }
189 
190 static void
print_usage(const char * name)191 print_usage(const char *name)
192 {
193    /* clang-format off */
194    fprintf(stderr, "Usage:\n\n"
195            "\t%s [OPTSIONS]... FILE...\n\n"
196            "Options:\n"
197            "\t    --vastart=offset\n"
198            "\t    --vasize=size\n"
199            "\t-h, --help             - show this message\n"
200            , name);
201    /* clang-format on */
202    exit(2);
203 }
204 
205 #define OPT_VA_START 1000
206 #define OPT_VA_SIZE  1001
207 
208 /* clang-format off */
209 static const struct option opts[] = {
210       { "vastart",  required_argument, 0, OPT_VA_START },
211       { "vasize",   required_argument, 0, OPT_VA_SIZE },
212       { "help",     no_argument,       0, 'h' },
213 };
214 /* clang-format on */
215 
216 static void
replay_context_init(struct replay_context * ctx,struct fd_dev_id * dev_id,int argc,char ** argv)217 replay_context_init(struct replay_context *ctx, struct fd_dev_id *dev_id,
218                     int argc, char **argv)
219 {
220    uint64_t va_start = 0;
221    uint64_t va_size = 0;
222 
223    int c;
224    while ((c = getopt_long(argc, argv, "h", opts, NULL)) != -1) {
225       switch (c) {
226       case OPT_VA_START:
227          va_start = strtoull(optarg, NULL, 0);
228          break;
229       case OPT_VA_SIZE:
230          va_size = strtoull(optarg, NULL, 0);
231          break;
232       case 'h':
233       default:
234          print_usage(argv[0]);
235       }
236    }
237 
238    if (optind < argc) {
239       ctx->output_name = argv[optind];
240    } else {
241    }
242 
243    if (!va_start || !va_size || !ctx->output_name) {
244       print_usage(argv[0]);
245       exit(1);
246    }
247 
248    ctx->mem_ctx = ralloc_context(NULL);
249    list_inithead(&ctx->cs_list);
250    list_inithead(&ctx->wrbuf_list);
251 
252    util_vma_heap_init(&ctx->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
253 
254    ctx->submit_cs = cs_alloc(ctx, 1024 * 1024);
255    ctx->state_cs = cs_alloc(ctx, 2 * 1024 * 1024);
256    ctx->shader_cs = cs_alloc(ctx, 8 * 1024 * 1024);
257 
258    ctx->shader_log = cs_alloc(ctx, 1024 * 1024);
259    ctx->shader_log->mem[0] = (ctx->shader_log->iova & 0xffffffff) + sizeof(uint64_t);
260    ctx->shader_log->mem[1] = ctx->shader_log->iova >> 32;
261    ctx->shader_log->cur = ctx->shader_log->total_size;
262 
263    ctx->cp_log = cs_alloc(ctx, 8 * 1024 * 1024);
264    ((uint64_t *)ctx->cp_log->mem)[0] = ctx->cp_log->iova + 2 * sizeof(uint64_t);
265    ((uint64_t *)ctx->cp_log->mem)[1] = sizeof(uint64_t);
266    ctx->cp_log->cur = ctx->cp_log->total_size;
267 
268    struct ir3_compiler_options options{};
269    ctx->compiler =
270       ir3_compiler_create(NULL, dev_id, fd_dev_info_raw(dev_id), &options);
271    ctx->compiled_shaders = _mesa_hash_table_u64_create(ctx->mem_ctx);
272 }
273 
274 static void
replay_context_finish(struct replay_context * ctx)275 replay_context_finish(struct replay_context *ctx)
276 {
277    FILE *out = fopen(ctx->output_name, "w");
278    if (!out) {
279       errx(1, "Cannot open '%s' for writing\n", ctx->output_name);
280    }
281 
282    static const uint32_t gpu_id = 660;
283    struct rd_section section_gpu_id = {.type = RD_GPU_ID,
284                                        .size = 1 * sizeof(uint32_t)};
285    fwrite(&section_gpu_id, sizeof(section_gpu_id), 1, out);
286    fwrite(&gpu_id, sizeof(uint32_t), 1, out);
287 
288    rd_write_gpu_addr_section(out, ctx->shader_log, RD_SHADER_LOG_BUFFER);
289    rd_write_gpu_addr_section(out, ctx->cp_log, RD_CP_LOG_BUFFER);
290 
291    list_for_each_entry (struct cmdstream, cs, &ctx->cs_list, link) {
292       rd_write_cs_buffer(out, cs);
293    }
294    rd_write_cs_submit(out, ctx->submit_cs);
295 
296    list_for_each_entry (struct wrbuf, wrbuf, &ctx->wrbuf_list, link) {
297       rd_write_wrbuffer(out, wrbuf);
298    }
299 
300    fclose(out);
301 }
302 
303 static void
upload_shader(struct replay_context * ctx,uint64_t id,const char * source)304 upload_shader(struct replay_context *ctx, uint64_t id, const char *source)
305 {
306    FILE *in = fmemopen((void *)source, strlen(source), "r");
307 
308    struct ir3_kernel_info info = {
309       .shader_print_buffer_iova = ctx->shader_log->iova,
310    };
311    struct ir3_shader *shader = ir3_parse_asm(ctx->compiler, &info, in);
312    assert(shader);
313 
314    fclose(in);
315 
316    uint64_t *shader_iova = ralloc(ctx->mem_ctx, uint64_t);
317    *shader_iova = pkt_blob(ctx->shader_cs, shader->variants->bin,
318                            shader->variants->info.size, 128);
319    ralloc_free(shader);
320 
321    _mesa_hash_table_u64_insert(ctx->compiled_shaders, id, shader_iova);
322 }
323 
324 static void
emit_shader_iova(struct replay_context * ctx,struct cmdstream * cs,uint64_t id)325 emit_shader_iova(struct replay_context *ctx, struct cmdstream *cs, uint64_t id)
326 {
327    uint64_t *shader_iova = (uint64_t *)
328       _mesa_hash_table_u64_search(ctx->compiled_shaders, id);
329    pkt_qw(cs, *shader_iova);
330 }
331 
332 #define begin_draw_state()                                                     \
333    uint64_t subcs_iova_start = cs_get_cur_iova(ctx.state_cs);                  \
334    struct cmdstream *prev_cs = cs;                                             \
335    struct cmdstream *cs = ctx.state_cs;
336 
337 #define end_draw_state(params)                                                 \
338    uint64_t subcs_iova_end = cs_get_cur_iova(ctx.state_cs);                    \
339    uint32_t subcs_size =                                                       \
340       (subcs_iova_end - subcs_iova_start) / sizeof(uint32_t);                  \
341    pkt7(prev_cs, CP_SET_DRAW_STATE, 3);                                        \
342    pkt(prev_cs, (params) | subcs_size);                                        \
343    pkt_qw(prev_cs, subcs_iova_start);
344 
345 #define begin_ib()                                                             \
346    struct cmdstream *prev_cs = cs;                                             \
347    struct cmdstream *cs = cs_alloc(&ctx, 1024 * 1024);
348 
349 #define end_ib()                                                               \
350    uint64_t ibcs_size = cs->cur;                                               \
351    pkt7(prev_cs, CP_INDIRECT_BUFFER, 3);                                       \
352    pkt_qw(prev_cs, cs->iova);                                                  \
353    pkt(prev_cs, ibcs_size);
354 
355 static void
gpu_print(struct replay_context * ctx,struct cmdstream * _cs,uint64_t iova,uint32_t dwords)356 gpu_print(struct replay_context *ctx, struct cmdstream *_cs, uint64_t iova,
357           uint32_t dwords)
358 {
359    uint64_t header_iova, body_iova;
360    struct cmdstream *prev_cs = _cs;
361    struct cmdstream *cs = cs_alloc(ctx, 4096);
362    /* Commands that are being modified should be in a separate cmdstream,
363     * otherwise they would be prefetched and writes would not be visible.
364     */
365    {
366       /* Write size into entry's header */
367       pkt7(cs, CP_MEM_WRITE, 4);
368       header_iova = cs_get_cur_iova(cs);
369       pkt_qw(cs, 0xdeadbeef);
370       uint64_t size_iova = cs_get_cur_iova(cs);
371       pkt(cs, dwords * 4);
372       pkt(cs, 0);
373 
374       /* Copy the data into entry's body */
375       pkt7(cs, CP_MEMCPY, 5);
376       pkt(cs, dwords);
377       pkt_qw(cs, iova);
378       body_iova = cs_get_cur_iova(cs);
379       pkt_qw(cs, 0xdeadbeef);
380 
381       /* iova = iova + body_size + header_size */
382       pkt7(cs, CP_MEM_TO_MEM, 9);
383       pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES);
384       pkt_qw(cs, ctx->cp_log->iova);
385       pkt_qw(cs, ctx->cp_log->iova);
386       pkt_qw(cs, size_iova);
387       pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t));
388    }
389 
390    {
391       struct cmdstream *cs = prev_cs;
392       pkt7(cs, CP_MEM_TO_MEM, 5);
393       pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES);
394       pkt_qw(cs, header_iova);
395       pkt_qw(cs, ctx->cp_log->iova);
396 
397       pkt7(cs, CP_MEM_TO_MEM, 7);
398       pkt(cs, CP_MEM_TO_MEM_0_DOUBLE);
399       pkt_qw(cs, body_iova);
400       pkt_qw(cs, ctx->cp_log->iova);
401       pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t));
402 
403       pkt7(cs, CP_WAIT_MEM_WRITES, 0);
404       pkt7(cs, CP_WAIT_FOR_ME, 0);
405    }
406 
407    end_ib();
408 }
409 
410 static void
gpu_read_into_file(struct replay_context * ctx,struct cmdstream * _cs,uint64_t iova,uint64_t size,const char * name)411 gpu_read_into_file(struct replay_context *ctx, struct cmdstream *_cs,
412                     uint64_t iova, uint64_t size, const char *name)
413 {
414    struct wrbuf *wrbuf = (struct wrbuf *) calloc(1, sizeof(struct wrbuf));
415    wrbuf->iova = iova;
416    wrbuf->size = size;
417    wrbuf->name = strdup(name);
418 
419    assert(wrbuf->iova != 0);
420 
421    list_addtail(&wrbuf->link, &ctx->wrbuf_list);
422 }