• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Igalia S.L.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <assert.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <getopt.h>
12 #include <inttypes.h>
13 #include <signal.h>
14 #include <stdarg.h>
15 #include <stdbool.h>
16 #include <stdint.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <unistd.h>
21 #include <libgen.h>
22 #if FD_REPLAY_KGSL
23 #define __user
24 #include "../vulkan/msm_kgsl.h"
25 #elif FD_REPLAY_MSM
26 #include <xf86drm.h>
27 #include "drm-uapi/msm_drm.h"
28 #elif FD_REPLAY_WSL
29 #define __KERNEL__
30 #include "drm-uapi/d3dkmthk.h"
31 #endif
32 
33 #include <sys/ioctl.h>
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38 
39 #include "util/os_time.h"
40 #include "util/rb_tree.h"
41 #include "util/u_vector.h"
42 #include "util/vma.h"
43 #include "buffers.h"
44 #include "cffdec.h"
45 #include "io.h"
46 #include "redump.h"
47 #include "rdutil.h"
48 
49 /**
50  * Replay command stream obtained from:
51  * - /sys/kernel/debug/dri/0/rd
52  * - /sys/kernel/debug/dri/0/hangrd
53  * !!! Command stream capture should be done with ALL buffers:
54  * - echo 1 > /sys/module/msm/parameters/rd_full
55  *
56  * Requires kernel with MSM_INFO_SET_IOVA support.
57  * In case userspace IOVAs are not supported, like on KGSL, we have to
58  * pre-allocate a single buffer and hope it always allocated starting
59  * from the same address.
60  *
61  * TODO: Misrendering, would require marking framebuffer images
62  *       at each renderpass in order to fetch and decode them.
63  *
64  * Code from Freedreno/Turnip is not re-used here since the relevant
65  * pieces may introduce additional allocations which cannot be allowed
66  * during the replay.
67  *
68  * For how-to see freedreno.rst
69  */
70 
71 static const char *exename = NULL;
72 
73 static const uint64_t FAKE_ADDRESS_SPACE_SIZE = 1024 * 1024 * 1024;
74 
75 static int handle_file(const char *filename, uint32_t first_submit,
76                        uint32_t last_submit, uint32_t submit_to_override,
77                        uint64_t base_addr, const char *cmdstreamgen);
78 
79 static void
print_usage(const char * name,const char * default_csgen)80 print_usage(const char *name, const char *default_csgen)
81 {
82    /* clang-format off */
83    fprintf(stderr, "Usage:\n\n"
84            "\t%s [OPTIONS]... FILE...\n\n"
85            "Options:\n"
86            "\t-e, --exe=NAME         - only use cmdstream from named process\n"
87            "\t-o  --override=submit  - № of the submit to override\n"
88            "\t-g  --generator=path   - executable which generate cmdstream for override (default: %s)\n"
89            "\t-f  --first=submit     - first submit № to replay\n"
90            "\t-l  --last=submit      - last submit № to replay\n"
91            "\t-a  --address=address  - base iova address on WSL\n"
92            "\t-h, --help             - show this message\n"
93            , name, default_csgen);
94    /* clang-format on */
95    exit(2);
96 }
97 
98 /* clang-format off */
99 static const struct option opts[] = {
100       { "exe",       required_argument, 0, 'e' },
101       { "override",  required_argument, 0, 'o' },
102       { "generator", required_argument, 0, 'g' },
103       { "first",     required_argument, 0, 'f' },
104       { "last",      required_argument, 0, 'l' },
105       { "address",   required_argument, 0, 'a' },
106       { "help",      no_argument,       0, 'h' },
107 };
108 /* clang-format on */
109 
110 int
main(int argc,char ** argv)111 main(int argc, char **argv)
112 {
113    int ret = -1;
114    int c;
115 
116    uint32_t submit_to_override = -1;
117    uint32_t first_submit = 0;
118    uint32_t last_submit = -1;
119    uint64_t base_addr = 0;
120 
121    char *default_csgen = malloc(PATH_MAX);
122    snprintf(default_csgen, PATH_MAX, "%s/generate_rd", dirname(argv[0]));
123 
124    const char *csgen = default_csgen;
125 
126    while ((c = getopt_long(argc, argv, "e:o:g:f:l:a:h", opts, NULL)) != -1) {
127       switch (c) {
128       case 0:
129          /* option that set a flag, nothing to do */
130          break;
131       case 'e':
132          exename = optarg;
133          break;
134       case 'o':
135          submit_to_override = strtoul(optarg, NULL, 0);
136          break;
137       case 'g':
138          csgen = optarg;
139          break;
140       case 'f':
141          first_submit = strtoul(optarg, NULL, 0);
142          break;
143       case 'l':
144          last_submit = strtoul(optarg, NULL, 0);
145          break;
146       case 'a':
147          base_addr = strtoull(optarg, NULL, 0);
148          break;
149       case 'h':
150       default:
151          print_usage(argv[0], default_csgen);
152       }
153    }
154 
155    while (optind < argc) {
156       ret = handle_file(argv[optind], first_submit, last_submit,
157                         submit_to_override, base_addr, csgen);
158       if (ret) {
159          fprintf(stderr, "error reading: %s\n", argv[optind]);
160          fprintf(stderr, "continuing..\n");
161       }
162       optind++;
163    }
164 
165    if (ret)
166       print_usage(argv[0], default_csgen);
167 
168    return ret;
169 }
170 
171 struct buffer {
172    struct rb_node node;
173 
174    uint32_t gem_handle;
175    uint64_t size;
176    uint64_t iova;
177    void *map;
178 
179    bool used;
180    uint32_t flags;
181 };
182 
183 struct cmdstream {
184    uint64_t iova;
185    uint64_t size;
186 };
187 
188 struct wrbuf {
189    uint64_t iova;
190    uint64_t size;
191    char* name;
192 };
193 
194 struct device {
195    int fd;
196 
197    struct rb_tree buffers;
198    struct util_vma_heap vma;
199 
200    struct u_vector cmdstreams;
201 
202    uint64_t shader_log_iova;
203    uint64_t cp_log_iova;
204 
205    bool has_set_iova;
206 
207    uint32_t va_id;
208    void *va_map;
209    uint64_t va_iova;
210 
211    struct u_vector wrbufs;
212 
213 #ifdef FD_REPLAY_MSM
214    uint32_t queue_id;
215 #endif
216 
217 #ifdef FD_REPLAY_KGSL
218    uint32_t context_id;
219 #endif
220 
221 #ifdef FD_REPLAY_WSL
222    struct d3dkmthandle device;
223    struct d3dkmthandle context;
224 
225    /* We don't know at the moment a good way to wait for submission to complete
226     * on WSL, so we could use our own fences.
227     */
228    uint64_t fence_iova;
229    uint64_t fence_ib_iova;
230    volatile uint32_t *fence;
231    uint32_t *fence_ib;
232 #endif
233 };
234 
235 void buffer_mem_free(struct device *dev, struct buffer *buf);
236 
237 static int
rb_buffer_insert_cmp(const struct rb_node * n1,const struct rb_node * n2)238 rb_buffer_insert_cmp(const struct rb_node *n1, const struct rb_node *n2)
239 {
240    const struct buffer *buf1 = (const struct buffer *)n1;
241    const struct buffer *buf2 = (const struct buffer *)n2;
242    /* Note that gpuaddr comparisions can overflow an int: */
243    if (buf1->iova > buf2->iova)
244       return 1;
245    else if (buf1->iova < buf2->iova)
246       return -1;
247    return 0;
248 }
249 
250 static int
rb_buffer_search_cmp(const struct rb_node * node,const void * addrptr)251 rb_buffer_search_cmp(const struct rb_node *node, const void *addrptr)
252 {
253    const struct buffer *buf = (const struct buffer *)node;
254    uint64_t iova = *(uint64_t *)addrptr;
255    if (buf->iova + buf->size <= iova)
256       return -1;
257    else if (buf->iova > iova)
258       return 1;
259    return 0;
260 }
261 
262 static struct buffer *
device_get_buffer(struct device * dev,uint64_t iova)263 device_get_buffer(struct device *dev, uint64_t iova)
264 {
265    if (iova == 0)
266       return NULL;
267    return (struct buffer *)rb_tree_search(&dev->buffers, &iova,
268                                           rb_buffer_search_cmp);
269 }
270 
271 static void
device_mark_buffers(struct device * dev)272 device_mark_buffers(struct device *dev)
273 {
274    rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
275       buf->used = false;
276    }
277 }
278 
279 static void
device_free_buffers(struct device * dev)280 device_free_buffers(struct device *dev)
281 {
282    rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
283       buffer_mem_free(dev, buf);
284       rb_tree_remove(&dev->buffers, &buf->node);
285       free(buf);
286    }
287 }
288 
289 static void
device_print_shader_log(struct device * dev)290 device_print_shader_log(struct device *dev)
291 {
292    struct shader_log {
293       uint64_t cur_iova;
294       union {
295          uint32_t entries_u32[0];
296          float entries_float[0];
297       };
298    };
299 
300    if (dev->shader_log_iova != 0)
301    {
302       struct buffer *buf = device_get_buffer(dev, dev->shader_log_iova);
303       if (buf) {
304          struct shader_log *log = buf->map + (dev->shader_log_iova - buf->iova);
305          uint32_t count = (log->cur_iova - dev->shader_log_iova -
306                            offsetof(struct shader_log, entries_u32)) / 4;
307 
308          printf("Shader Log Entries: %u\n", count);
309 
310          for (uint32_t i = 0; i < count; i++) {
311             printf("[%u] %08x %.4f\n", i, log->entries_u32[i],
312                    log->entries_float[i]);
313          }
314 
315          printf("========================================\n");
316       }
317    }
318 }
319 
320 static void
device_print_cp_log(struct device * dev)321 device_print_cp_log(struct device *dev)
322 {
323    struct cp_log {
324       uint64_t cur_iova;
325       uint64_t tmp;
326       uint64_t first_entry_size;
327    };
328 
329    struct cp_log_entry {
330       uint64_t size;
331       uint32_t data[0];
332    };
333 
334    if (dev->cp_log_iova == 0)
335       return;
336 
337    struct buffer *buf = device_get_buffer(dev, dev->cp_log_iova);
338    if (!buf)
339       return;
340 
341    struct cp_log *log = buf->map + (dev->cp_log_iova - buf->iova);
342    if (log->first_entry_size == 0)
343       return;
344 
345    struct cp_log_entry *log_entry =
346       buf->map + offsetof(struct cp_log, first_entry_size);
347    uint32_t idx = 0;
348    while (log_entry->size != 0) {
349       printf("\nCP Log [%u]:\n", idx++);
350       uint32_t dwords = log_entry->size / 4;
351 
352       for (uint32_t i = 0; i < dwords; i++) {
353          if (i % 8 == 0)
354             printf("\t");
355          printf("%08x ", log_entry->data[i]);
356          if (i % 8 == 7)
357             printf("\n");
358       }
359       printf("\n");
360 
361       log_entry = (void *)log_entry + log_entry->size +
362                   offsetof(struct cp_log_entry, data);
363    }
364 }
365 
366 static void
device_dump_wrbuf(struct device * dev)367 device_dump_wrbuf(struct device *dev)
368 {
369    if (!u_vector_length(&dev->wrbufs))
370       return;
371 
372    char buffer_dir[PATH_MAX];
373    getcwd(buffer_dir, sizeof(buffer_dir));
374    strcat(buffer_dir, "/buffers");
375    rmdir(buffer_dir);
376    mkdir(buffer_dir, 0777);
377 
378    struct wrbuf *wrbuf;
379    u_vector_foreach(wrbuf, &dev->wrbufs) {
380       char buffer_path[PATH_MAX];
381       snprintf(buffer_path, sizeof(buffer_path), "%s/%s", buffer_dir, wrbuf->name);
382       FILE *f = fopen(buffer_path, "wb");
383       if (!f) {
384          fprintf(stderr, "Error opening %s\n", buffer_path);
385          goto end_it;
386       }
387 
388       struct buffer *buf = device_get_buffer(dev, wrbuf->iova);
389       if (!buf) {
390          fprintf(stderr, "Error getting buffer for %s\n", buffer_path);
391          goto end_it;
392       }
393 
394       uint64_t offset = wrbuf->iova - buf->iova;
395       uint64_t size = MIN2(wrbuf->size, buf->size - offset);
396       if (size != wrbuf->size) {
397          fprintf(stderr, "Warning: Clamping buffer %s as it's smaller than expected (0x%" PRIx64 " < 0x%" PRIx64 ")\n", wrbuf->name, size, wrbuf->size);
398       }
399 
400       printf("Dumping %s (0x%" PRIx64 " - 0x%" PRIx64 ")\n", wrbuf->name, wrbuf->iova, wrbuf->iova + size);
401 
402       fwrite(buf->map + offset, size, 1, f);
403 
404       end_it:
405       fclose(f);
406    }
407 }
408 
409 #if FD_REPLAY_MSM
410 static inline void
get_abs_timeout(struct drm_msm_timespec * tv,uint64_t ns)411 get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
412 {
413    struct timespec t;
414    clock_gettime(CLOCK_MONOTONIC, &t);
415    tv->tv_sec = t.tv_sec + ns / 1000000000;
416    tv->tv_nsec = t.tv_nsec + ns % 1000000000;
417 }
418 
419 static struct device *
device_create(uint64_t base_addr)420 device_create(uint64_t base_addr)
421 {
422    struct device *dev = calloc(sizeof(struct device), 1);
423 
424    dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
425    if (dev->fd < 0) {
426       errx(1, "Cannot open MSM fd!");
427    }
428 
429    uint64_t va_start, va_size;
430 
431    struct drm_msm_param req = {
432       .pipe = MSM_PIPE_3D0,
433       .param = MSM_PARAM_VA_START,
434    };
435 
436    int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
437    va_start = req.value;
438 
439    if (!ret) {
440       req.param = MSM_PARAM_VA_SIZE;
441       ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
442       va_size = req.value;
443 
444       dev->has_set_iova = true;
445    }
446 
447    if (ret) {
448       printf("MSM_INFO_SET_IOVA is not supported!\n");
449 
450       struct drm_msm_gem_new req_new = {.size = FAKE_ADDRESS_SPACE_SIZE, .flags = MSM_BO_CACHED_COHERENT};
451       drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req_new, sizeof(req_new));
452       dev->va_id = req_new.handle;
453 
454       struct drm_msm_gem_info req_info = {
455          .handle = req_new.handle,
456          .info = MSM_INFO_GET_IOVA,
457       };
458 
459       drmCommandWriteRead(dev->fd,
460                                  DRM_MSM_GEM_INFO, &req_info, sizeof(req_info));
461       dev->va_iova = req_info.value;
462 
463       struct drm_msm_gem_info req_offset = {
464          .handle = req_new.handle,
465          .info = MSM_INFO_GET_OFFSET,
466       };
467 
468       drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_offset, sizeof(req_offset));
469 
470       dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
471                        dev->fd, req_offset.value);
472       if (dev->va_map == MAP_FAILED) {
473          err(1, "mmap failure");
474       }
475 
476       va_start = dev->va_iova;
477       va_size = FAKE_ADDRESS_SPACE_SIZE;
478 
479       printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
480    }
481 
482    struct drm_msm_submitqueue req_queue = {
483       .flags = 0,
484       .prio = 0,
485    };
486 
487    ret = drmCommandWriteRead(dev->fd, DRM_MSM_SUBMITQUEUE_NEW, &req_queue,
488                              sizeof(req_queue));
489    if (ret) {
490       err(1, "DRM_MSM_SUBMITQUEUE_NEW failure");
491    }
492 
493    dev->queue_id = req_queue.id;
494 
495    rb_tree_init(&dev->buffers);
496    util_vma_heap_init(&dev->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
497    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
498    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
499 
500    return dev;
501 }
502 
503 static void
device_submit_cmdstreams(struct device * dev)504 device_submit_cmdstreams(struct device *dev)
505 {
506    if (!u_vector_length(&dev->cmdstreams)) {
507       device_free_buffers(dev);
508       return;
509    }
510 
511    struct drm_msm_gem_submit_cmd cmds[u_vector_length(&dev->cmdstreams)];
512 
513    uint32_t idx = 0;
514    struct cmdstream *cmd;
515    u_vector_foreach(cmd, &dev->cmdstreams) {
516       struct buffer *cmdstream_buf = device_get_buffer(dev, cmd->iova);
517 
518       uint32_t bo_idx = 0;
519       rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
520          if (buf == cmdstream_buf)
521             break;
522 
523          bo_idx++;
524       }
525 
526       if (cmdstream_buf)
527          cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP;
528 
529       struct drm_msm_gem_submit_cmd *submit_cmd = &cmds[idx];
530       submit_cmd->type = MSM_SUBMIT_CMD_BUF;
531       submit_cmd->submit_idx = dev->has_set_iova ? bo_idx : 0;
532       if (dev->has_set_iova) {
533          submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova;
534       } else {
535          submit_cmd->submit_offset = cmd->iova - dev->va_iova;
536       }
537       submit_cmd->size = cmd->size;
538       submit_cmd->pad = 0;
539       submit_cmd->nr_relocs = 0;
540       submit_cmd->relocs = 0;
541 
542       idx++;
543    }
544 
545    uint32_t bo_count = 0;
546    rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
547       if (buf)
548          bo_count++;
549    }
550 
551    if (!dev->has_set_iova) {
552       bo_count = 1;
553    }
554 
555    struct drm_msm_gem_submit_bo *bo_list =
556       calloc(sizeof(struct drm_msm_gem_submit_bo), bo_count);
557 
558    if (dev->has_set_iova) {
559       uint32_t bo_idx = 0;
560       rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
561          struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++];
562          submit_bo->handle = buf->gem_handle;
563          submit_bo->flags =
564             buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
565          submit_bo->presumed = buf->iova;
566 
567          buf->flags = 0;
568       }
569    } else {
570       bo_list[0].handle = dev->va_id;
571       bo_list[0].flags =
572          MSM_SUBMIT_BO_DUMP | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
573       bo_list[0].presumed = dev->va_iova;
574    }
575 
576    struct drm_msm_gem_submit submit_req = {
577       .flags = MSM_PIPE_3D0,
578       .queueid = dev->queue_id,
579       .bos = (uint64_t)(uintptr_t)bo_list,
580       .nr_bos = bo_count,
581       .cmds = (uint64_t)(uintptr_t)cmds,
582       .nr_cmds = u_vector_length(&dev->cmdstreams),
583       .in_syncobjs = 0,
584       .out_syncobjs = 0,
585       .nr_in_syncobjs = 0,
586       .nr_out_syncobjs = 0,
587       .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
588    };
589 
590    int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_SUBMIT, &submit_req,
591                                  sizeof(submit_req));
592 
593    if (ret) {
594       err(1, "DRM_MSM_GEM_SUBMIT failure %d", ret);
595    }
596 
597    /* Wait for submission to complete in order to be sure that
598     * freeing buffers would free their VMAs in the kernel.
599     * Makes sure that new allocations won't clash with old ones.
600     */
601    struct drm_msm_wait_fence wait_req = {
602       .fence = submit_req.fence,
603       .queueid = dev->queue_id,
604    };
605    get_abs_timeout(&wait_req.timeout, 1000000000);
606 
607    ret =
608       drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &wait_req, sizeof(wait_req));
609    if (ret && (ret != -ETIMEDOUT)) {
610       err(1, "DRM_MSM_WAIT_FENCE failure %d", ret);
611    }
612 
613    u_vector_finish(&dev->cmdstreams);
614    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
615 
616    device_print_shader_log(dev);
617    device_print_cp_log(dev);
618 
619    device_dump_wrbuf(dev);
620    u_vector_finish(&dev->wrbufs);
621    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
622 
623    device_free_buffers(dev);
624 }
625 
626 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)627 buffer_mem_alloc(struct device *dev, struct buffer *buf)
628 {
629    bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
630    if (!success)
631       errx(1, "Failed to allocate buffer");
632 
633    if (!dev->has_set_iova) {
634       uint64_t offset = buf->iova - dev->va_iova;
635       assert(offset < FAKE_ADDRESS_SPACE_SIZE && (offset + buf->size) <= FAKE_ADDRESS_SPACE_SIZE);
636       buf->map = ((uint8_t*)dev->va_map) + offset;
637       return;
638    }
639 
640    {
641       struct drm_msm_gem_new req = {.size = buf->size, .flags = MSM_BO_WC};
642 
643       int ret =
644          drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req, sizeof(req));
645       if (ret) {
646          err(1, "DRM_MSM_GEM_NEW failure %d", ret);
647       }
648 
649       buf->gem_handle = req.handle;
650    }
651 
652    {
653       struct drm_msm_gem_info req = {
654          .handle = buf->gem_handle,
655          .info = MSM_INFO_SET_IOVA,
656          .value = buf->iova,
657       };
658 
659       int ret =
660          drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
661 
662       if (ret) {
663          err(1, "MSM_INFO_SET_IOVA failure %d", ret);
664       }
665    }
666 
667    {
668       struct drm_msm_gem_info req = {
669          .handle = buf->gem_handle,
670          .info = MSM_INFO_GET_OFFSET,
671       };
672 
673       int ret =
674          drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
675       if (ret) {
676          err(1, "MSM_INFO_GET_OFFSET failure %d", ret);
677       }
678 
679       void *map = mmap(0, buf->size, PROT_READ | PROT_WRITE, MAP_SHARED,
680                        dev->fd, req.value);
681       if (map == MAP_FAILED) {
682          err(1, "mmap failure");
683       }
684 
685       buf->map = map;
686    }
687 }
688 
689 void
buffer_mem_free(struct device * dev,struct buffer * buf)690 buffer_mem_free(struct device *dev, struct buffer *buf)
691 {
692    if (dev->has_set_iova) {
693       munmap(buf->map, buf->size);
694 
695       struct drm_msm_gem_info req_iova = {
696          .handle = buf->gem_handle,
697          .info = MSM_INFO_SET_IOVA,
698          .value = 0,
699       };
700 
701       int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_iova,
702                                     sizeof(req_iova));
703       if (ret < 0) {
704          err(1, "MSM_INFO_SET_IOVA(0) failed! %d", ret);
705          return;
706       }
707 
708       struct drm_gem_close req = {
709          .handle = buf->gem_handle,
710       };
711       drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
712    }
713 
714    util_vma_heap_free(&dev->vma, buf->iova, buf->size);
715 }
716 
717 #elif FD_REPLAY_KGSL
718 static int
safe_ioctl(int fd,unsigned long request,void * arg)719 safe_ioctl(int fd, unsigned long request, void *arg)
720 {
721    int ret;
722 
723    do {
724       ret = ioctl(fd, request, arg);
725    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
726 
727    return ret;
728 }
729 
730 static struct device *
device_create(uint64_t base_addr)731 device_create(uint64_t base_addr)
732 {
733    struct device *dev = calloc(sizeof(struct device), 1);
734 
735    static const char path[] = "/dev/kgsl-3d0";
736 
737    dev->fd = open(path, O_RDWR | O_CLOEXEC);
738    if (dev->fd < 0) {
739       errx(1, "Cannot open KGSL fd!");
740    }
741 
742    struct kgsl_gpumem_alloc_id req = {
743       .size = FAKE_ADDRESS_SPACE_SIZE,
744       .flags = KGSL_MEMFLAGS_IOCOHERENT,
745    };
746 
747    int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
748    if (ret) {
749       err(1, "IOCTL_KGSL_GPUMEM_ALLOC_ID failure");
750    }
751 
752    dev->va_id = req.id;
753    dev->va_iova = req.gpuaddr;
754    dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE,
755                       MAP_SHARED, dev->fd, req.id << 12);
756 
757    rb_tree_init(&dev->buffers);
758    util_vma_heap_init(&dev->vma, req.gpuaddr, ROUND_DOWN_TO(FAKE_ADDRESS_SPACE_SIZE, 4096));
759    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
760    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
761 
762    struct kgsl_drawctxt_create drawctxt_req = {
763       .flags = KGSL_CONTEXT_SAVE_GMEM |
764               KGSL_CONTEXT_NO_GMEM_ALLOC |
765               KGSL_CONTEXT_PREAMBLE,
766    };
767 
768    ret = safe_ioctl(dev->fd, IOCTL_KGSL_DRAWCTXT_CREATE, &drawctxt_req);
769    if (ret) {
770       err(1, "IOCTL_KGSL_DRAWCTXT_CREATE failure");
771    }
772 
773    printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
774 
775    dev->context_id = drawctxt_req.drawctxt_id;
776 
777    return dev;
778 }
779 
780 static void
device_submit_cmdstreams(struct device * dev)781 device_submit_cmdstreams(struct device *dev)
782 {
783    if (!u_vector_length(&dev->cmdstreams)) {
784       device_free_buffers(dev);
785       return;
786    }
787 
788    struct kgsl_command_object cmds[u_vector_length(&dev->cmdstreams)];
789 
790    uint32_t idx = 0;
791    struct cmdstream *cmd;
792    u_vector_foreach(cmd, &dev->cmdstreams) {
793       struct kgsl_command_object *submit_cmd = &cmds[idx++];
794       submit_cmd->gpuaddr = cmd->iova;
795       submit_cmd->size = cmd->size;
796       submit_cmd->flags = KGSL_CMDLIST_IB;
797       submit_cmd->id = dev->va_id;
798    }
799 
800    struct kgsl_gpu_command submit_req = {
801       .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
802       .cmdlist = (uintptr_t) &cmds,
803       .cmdsize = sizeof(struct kgsl_command_object),
804       .numcmds = u_vector_length(&dev->cmdstreams),
805       .numsyncs = 0,
806       .context_id = dev->context_id,
807    };
808 
809    int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPU_COMMAND, &submit_req);
810 
811    if (ret) {
812       err(1, "IOCTL_KGSL_GPU_COMMAND failure %d", ret);
813    }
814 
815    struct kgsl_device_waittimestamp_ctxtid wait = {
816       .context_id = dev->context_id,
817       .timestamp = submit_req.timestamp,
818       .timeout = 3000,
819    };
820 
821    ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
822 
823    if (ret) {
824       err(1, "IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID failure %d", ret);
825    }
826 
827    u_vector_finish(&dev->cmdstreams);
828    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
829 
830    device_print_shader_log(dev);
831    device_print_cp_log(dev);
832 
833    device_dump_wrbuf(dev);
834    u_vector_finish(&dev->wrbufs);
835    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
836 
837    device_free_buffers(dev);
838 }
839 
840 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)841 buffer_mem_alloc(struct device *dev, struct buffer *buf)
842 {
843    bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
844    if (!success)
845       errx(1, "Failed to allocate buffer");
846 
847    buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
848 }
849 
850 void
buffer_mem_free(struct device * dev,struct buffer * buf)851 buffer_mem_free(struct device *dev, struct buffer *buf)
852 {
853    util_vma_heap_free(&dev->vma, buf->iova, buf->size);
854 }
855 #else
856 
857 static int
safe_ioctl(int fd,unsigned long request,void * arg)858 safe_ioctl(int fd, unsigned long request, void *arg)
859 {
860    int ret;
861 
862    do {
863       ret = ioctl(fd, request, arg);
864    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
865 
866    return ret;
867 }
868 
869 struct alloc_priv_info {
870    __u32 struct_size;
871    char _pad0[4];
872    __u32 unk0; // 1
873    char _pad1[4];
874    __u64 size;
875    __u32 alignment;
876    char _pad2[20];
877    __u64 allocated_size;
878    __u32 unk1;   // 1
879    char _pad4[8]; /* offset: 60*/
880    __u32 unk2;   // 61
881    char _pad5[76];
882    __u32 unk3; /* offset: 148 */ // 1
883    char _pad6[8];
884    __u32 unk4; /* offset: 160 */ // 1
885    char _pad7[44];
886    __u32 unk5; /* offset: 208 */ // 3
887    char _pad8[16];
888    __u32 size_2; /* offset: 228 */
889    __u32 unk6;   // 1
890    __u32 size_3;
891    __u32 size_4;
892    __u32 unk7; /* offset: 244 */ // 1
893    char _pad9[56];
894 };
895 static_assert(sizeof(struct alloc_priv_info) == 304);
896 static_assert(offsetof(struct alloc_priv_info, unk1) == 56);
897 static_assert(offsetof(struct alloc_priv_info, unk3) == 148);
898 static_assert(offsetof(struct alloc_priv_info, unk5) == 208);
899 
900 struct submit_priv_ib_info {
901    char _pad5[4];
902    __u32 size_dwords;
903    __u64 iova;
904    char _pad6[8];
905 } __attribute__((packed));
906 
907 struct submit_priv_data {
908    __u32 magic0;
909    char _pad0[4];
910    __u32 struct_size;
911    char _pad1[4];
912    /* It seems that priv data can have several sub-datas
913     * cmdbuf is one of them, after it there is another 8 byte struct
914     * without anything useful in it. That second data doesn't seem
915     * important for replaying.
916     */
917    __u32 datas_count;
918    char _pad2[32];
919    struct {
920       __u32 magic1;
921       __u32 data_size;
922 
923       struct {
924          __u32 unk1;
925          __u32 cmdbuf_size;
926          char _pad3[32];
927          __u32 ib_count;
928          char _pad4[36];
929 
930          struct submit_priv_ib_info ibs[];
931       } cmdbuf;
932    } data0;
933 
934    //    unsigned char magic2[8];
935 } __attribute__((packed));
936 static_assert(offsetof(struct submit_priv_data, data0) == 0x34);
937 static_assert(offsetof(struct submit_priv_data, data0.cmdbuf.ibs) == 0x8c);
938 
939 static struct device *
device_create(uint64_t base_addr)940 device_create(uint64_t base_addr)
941 {
942    struct device *dev = calloc(sizeof(struct device), 1);
943 
944    static const char path[] = "/dev/dxg";
945 
946    dev->fd = open(path, O_RDWR | O_CLOEXEC);
947    if (dev->fd < 0) {
948       errx(1, "Cannot open /dev/dxg fd");
949    }
950 
951    struct d3dkmt_adapterinfo adapters[1];
952    struct d3dkmt_enumadapters3 enum_adapters = {
953       .adapter_count = 1,
954       .adapters = adapters,
955    };
956    int ret = safe_ioctl(dev->fd, LX_DXENUMADAPTERS3, &enum_adapters);
957    if (ret) {
958       errx(1, "LX_DXENUMADAPTERS3 failure");
959    }
960 
961    if (enum_adapters.adapter_count == 0) {
962       errx(1, "No adapters found");
963    }
964 
965    struct winluid adapter_luid = enum_adapters.adapters[0].adapter_luid;
966 
967    struct d3dkmt_openadapterfromluid open_adapter = {
968       .adapter_luid = adapter_luid,
969    };
970    ret = safe_ioctl(dev->fd, LX_DXOPENADAPTERFROMLUID, &open_adapter);
971    if (ret) {
972       errx(1, "LX_DXOPENADAPTERFROMLUID failure");
973    }
974 
975    struct d3dkmthandle adapter = open_adapter.adapter_handle;
976 
977    struct d3dkmt_createdevice create_device = {
978       .adapter = adapter,
979    };
980    ret = safe_ioctl(dev->fd, LX_DXCREATEDEVICE, &create_device);
981    if (ret) {
982       errx(1, "LX_DXCREATEDEVICE failure");
983    }
984 
985    struct d3dkmthandle device = create_device.device;
986    dev->device = device;
987 
988    unsigned char create_context_priv_data[] = {
989       0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
990       0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
991       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00,
992       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
993       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
994       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
995    };
996 
997    struct d3dkmt_createcontextvirtual create_context = {
998       .device = device,
999       .node_ordinal = 0,
1000       .engine_affinity = 1,
1001       .priv_drv_data = create_context_priv_data,
1002       .priv_drv_data_size = sizeof(create_context_priv_data),
1003       .client_hint = 16,
1004    };
1005    ret = safe_ioctl(dev->fd, LX_DXCREATECONTEXTVIRTUAL, &create_context);
1006    if (ret) {
1007       errx(1, "LX_DXCREATECONTEXTVIRTUAL failure");
1008    }
1009 
1010    dev->context = create_context.context;
1011 
1012    struct d3dkmt_createpagingqueue create_paging_queue = {
1013       .device = device,
1014       .priority = _D3DDDI_PAGINGQUEUE_PRIORITY_NORMAL,
1015       .physical_adapter_index = 0,
1016    };
1017    ret = safe_ioctl(dev->fd, LX_DXCREATEPAGINGQUEUE, &create_paging_queue);
1018    if (ret) {
1019       errx(1, "LX_DXCREATEPAGINGQUEUE failure");
1020    }
1021    struct d3dkmthandle paging_queue = create_paging_queue.paging_queue;
1022 
1023 
1024    uint32_t alloc_size = FAKE_ADDRESS_SPACE_SIZE;
1025    struct alloc_priv_info priv_alloc_info = {
1026       .struct_size = sizeof(struct alloc_priv_info),
1027       .unk0 = 1,
1028       .size = alloc_size,
1029       .alignment = 4096,
1030       .unk1 = 1,
1031       .unk2 = 61,
1032       .unk3 = 1,
1033       .unk4 = 1,
1034       .unk5 = 3,
1035       .size_2 = alloc_size,
1036       .unk6 = 1,
1037       .size_3 = alloc_size,
1038       .size_4 = alloc_size,
1039       .unk7 = 1,
1040    };
1041 
1042    struct d3dddi_allocationinfo2 alloc_info = {
1043       .priv_drv_data = &priv_alloc_info,
1044       .priv_drv_data_size = sizeof(struct alloc_priv_info),
1045    };
1046 
1047    struct d3dkmt_createallocation create_allocation = {
1048       .device = device,
1049       .alloc_count = 1,
1050       .allocation_info = &alloc_info,
1051    };
1052    ret = safe_ioctl(dev->fd, LX_DXCREATEALLOCATION, &create_allocation);
1053    if (ret) {
1054       errx(1, "LX_DXCREATEALLOCATION failure");
1055    }
1056 
1057    assert(priv_alloc_info.allocated_size == alloc_size);
1058 
1059    struct d3dddi_mapgpuvirtualaddress map_virtual_address = {
1060       .paging_queue = paging_queue,
1061       .base_address = base_addr,
1062       .maximum_address = 18446744073709551615ull,
1063       .allocation = create_allocation.allocation_info[0].allocation,
1064       .size_in_pages = MAX2(alloc_size / 4096, 1),
1065       .protection = {
1066          .write = 1,
1067          .execute = 1,
1068       },
1069    };
1070    ret = safe_ioctl(dev->fd, LX_DXMAPGPUVIRTUALADDRESS, &map_virtual_address);
1071    if (ret != 259) {
1072       errx(1, "LX_DXMAPGPUVIRTUALADDRESS failure");
1073    }
1074 
1075    __u32 priority = 0;
1076    struct d3dddi_makeresident make_resident = {
1077       .paging_queue = paging_queue,
1078       .alloc_count = 1,
1079       .allocation_list = &create_allocation.allocation_info[0].allocation,
1080       .priority_list = &priority,
1081    };
1082    ret = safe_ioctl(dev->fd, LX_DXMAKERESIDENT, &make_resident);
1083    if (ret != 259) {
1084       errx(1, "LX_DXMAKERESIDENT failure");
1085    }
1086 
1087    struct d3dkmt_lock2 lock = {
1088       .device = device,
1089       .allocation = create_allocation.allocation_info[0].allocation,
1090    };
1091    ret = safe_ioctl(dev->fd, LX_DXLOCK2, &lock);
1092    if (ret) {
1093       errx(1, "LX_DXLOCK2 failure");
1094    }
1095 
1096    dev->va_iova = map_virtual_address.virtual_address;
1097    dev->va_map = lock.data;
1098 
1099    rb_tree_init(&dev->buffers);
1100    util_vma_heap_init(&dev->vma, dev->va_iova, ROUND_DOWN_TO(alloc_size, 4096));
1101    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1102    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
1103 
1104    printf("Allocated iova at 0x%" PRIx64 "\n", dev->va_iova);
1105 
1106    uint64_t hole_size = 4096;
1107    dev->vma.alloc_high = true;
1108    dev->fence_iova = util_vma_heap_alloc(&dev->vma, hole_size, 4096);
1109    dev->fence_ib_iova = dev->fence_iova + 8;
1110    dev->fence = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_iova - dev->va_iova));
1111    dev->fence_ib = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_ib_iova - dev->va_iova));
1112    dev->vma.alloc_high = false;
1113 
1114    return dev;
1115 }
1116 
1117 static void
device_submit_cmdstreams(struct device * dev)1118 device_submit_cmdstreams(struct device *dev)
1119 {
1120    if (!u_vector_length(&dev->cmdstreams)) {
1121       device_free_buffers(dev);
1122       return;
1123    }
1124 
1125    uint32_t cmdstream_count = u_vector_length(&dev->cmdstreams) + 1;
1126 
1127    uint32_t priv_data_size =
1128       sizeof(struct submit_priv_data) +
1129       cmdstream_count * sizeof(struct submit_priv_ib_info);
1130 
1131    struct submit_priv_data *priv_data = calloc(1, priv_data_size);
1132    priv_data->magic0 = 0xccaabbee;
1133    priv_data->struct_size = priv_data_size;
1134    priv_data->datas_count = 1;
1135 
1136    priv_data->data0.magic1 = 0xfadcab02;
1137    priv_data->data0.data_size =
1138       sizeof(priv_data->data0) +
1139       cmdstream_count * sizeof(struct submit_priv_ib_info);
1140    priv_data->data0.cmdbuf.unk1 = 0xcccc0001;
1141    priv_data->data0.cmdbuf.cmdbuf_size = sizeof(priv_data->data0.cmdbuf) +
1142       cmdstream_count * sizeof(struct submit_priv_ib_info);
1143    priv_data->data0.cmdbuf.ib_count = cmdstream_count;
1144 
1145    struct cmdstream *cmd;
1146    uint32_t idx = 0;
1147    u_vector_foreach(cmd, &dev->cmdstreams) {
1148       priv_data->data0.cmdbuf.ibs[idx].size_dwords = cmd->size / 4;
1149       priv_data->data0.cmdbuf.ibs[idx].iova = cmd->iova;
1150       idx++;
1151    }
1152 
1153    priv_data->data0.cmdbuf.ibs[idx].size_dwords = 4;
1154    priv_data->data0.cmdbuf.ibs[idx].iova = dev->fence_ib_iova;
1155 
1156    *dev->fence = 0x00000000;
1157    dev->fence_ib[0] = pm4_pkt7_hdr(0x3d, 3); // CP_MEM_WRITE
1158    dev->fence_ib[1] = dev->fence_iova;
1159    dev->fence_ib[2] = dev->fence_iova >> 32;
1160    dev->fence_ib[3] = 0xababfcfc;
1161 
1162    // Fill second (empty) data block
1163    // uint32_t *magic_end = (uint32_t *)(((char *) priv_data) + priv_data_size - 8);
1164    // magic_end[0] = 0xfadcab00;
1165    // magic_end[1] = 0x00000008;
1166 
1167    struct d3dkmt_submitcommand submission = {
1168       .command_buffer = priv_data->data0.cmdbuf.ibs[0].iova,
1169       .command_length = priv_data->data0.cmdbuf.ibs[0].size_dwords * sizeof(uint32_t),
1170       .broadcast_context_count = 1,
1171       .broadcast_context[0] = dev->context,
1172       .priv_drv_data_size = priv_data_size,
1173       .priv_drv_data = priv_data,
1174    };
1175 
1176    int ret = safe_ioctl(dev->fd, LX_DXSUBMITCOMMAND, &submission);
1177    if (ret) {
1178       errx(1, "LX_DXSUBMITCOMMAND failure");
1179    }
1180 
1181    free(priv_data);
1182 
1183    u_vector_finish(&dev->cmdstreams);
1184    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1185 
1186    // TODO: better way to wait
1187    for (unsigned i = 0; i < 1000; i++) {
1188       usleep(1000);
1189       if (*dev->fence != 0)
1190          break;
1191    }
1192    if (*dev->fence == 0) {
1193       errx(1, "Waiting for submission failed! GPU faulted or kernel did not execute this submission.");
1194    }
1195 
1196    device_print_shader_log(dev);
1197    device_print_cp_log(dev);
1198 
1199    device_dump_wrbuf(dev);
1200    u_vector_finish(&dev->wrbufs);
1201    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
1202 
1203    device_free_buffers(dev);
1204 }
1205 
1206 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)1207 buffer_mem_alloc(struct device *dev, struct buffer *buf)
1208 {
1209    bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
1210    if (!success)
1211       errx(1, "Failed to allocate buffer");
1212 
1213    buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
1214 }
1215 
1216 void
buffer_mem_free(struct device * dev,struct buffer * buf)1217 buffer_mem_free(struct device *dev, struct buffer *buf)
1218 {
1219    util_vma_heap_free(&dev->vma, buf->iova, buf->size);
1220 }
1221 
1222 #endif
1223 
1224 static void
upload_buffer(struct device * dev,uint64_t iova,unsigned int size,void * hostptr)1225 upload_buffer(struct device *dev, uint64_t iova, unsigned int size,
1226               void *hostptr)
1227 {
1228    struct buffer *buf = device_get_buffer(dev, iova);
1229 
1230    if (!buf) {
1231       buf = calloc(sizeof(struct buffer), 1);
1232       buf->iova = iova;
1233       buf->size = size;
1234 
1235       rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp);
1236 
1237       buffer_mem_alloc(dev, buf);
1238    } else if (buf->size != size) {
1239       buffer_mem_free(dev, buf);
1240       buf->size = size;
1241       buffer_mem_alloc(dev, buf);
1242    }
1243 
1244    memcpy(buf->map, hostptr, size);
1245 
1246    buf->used = true;
1247 }
1248 
1249 static int
override_cmdstream(struct device * dev,struct cmdstream * cs,const char * cmdstreamgen)1250 override_cmdstream(struct device *dev, struct cmdstream *cs,
1251                    const char *cmdstreamgen)
1252 {
1253 #if FD_REPLAY_KGSL
1254    static const char *tmpfilename = "/sdcard/Download/cmdstream_override.rd";
1255 #elif FD_REPLAY_MSM || FD_REPLAY_WSL
1256    static const char *tmpfilename = "/tmp/cmdstream_override.rd";
1257 #endif
1258 
1259 
1260    /* Find a free space for the new cmdstreams and resources we will use
1261     * when overriding existing cmdstream.
1262     */
1263    uint64_t hole_size = util_vma_heap_get_max_free_continuous_size(&dev->vma);
1264    uint64_t hole_iova = util_vma_heap_alloc(&dev->vma, hole_size, 1);
1265    util_vma_heap_free(&dev->vma, hole_iova, hole_size);
1266 
1267    char cmd[2048];
1268    snprintf(cmd, sizeof(cmd),
1269             "%s --vastart=%" PRIu64 " --vasize=%" PRIu64 " %s", cmdstreamgen,
1270             hole_iova, hole_size, tmpfilename);
1271 
1272    printf("generating cmdstream '%s'\n", cmd);
1273 
1274    int ret = system(cmd);
1275    if (ret) {
1276       fprintf(stderr, "Error executing %s\n", cmd);
1277       return -1;
1278    }
1279 
1280    struct io *io;
1281    struct rd_parsed_section ps = {0};
1282 
1283    io = io_open(tmpfilename);
1284    if (!io) {
1285       fprintf(stderr, "could not open: %s\n", tmpfilename);
1286       return -1;
1287    }
1288 
1289    struct {
1290       unsigned int len;
1291       uint64_t gpuaddr;
1292    } gpuaddr = {0};
1293 
1294    while (parse_rd_section(io, &ps)) {
1295       switch (ps.type) {
1296       case RD_GPUADDR:
1297          parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1298          /* no-op */
1299          break;
1300       case RD_BUFFER_CONTENTS:
1301          upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1302          ps.buf = NULL;
1303          break;
1304       case RD_CMDSTREAM_ADDR: {
1305          unsigned int sizedwords;
1306          uint64_t gpuaddr;
1307          parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1308          printf("override cmdstream: %d dwords\n", sizedwords);
1309 
1310          cs->iova = gpuaddr;
1311          cs->size = sizedwords * sizeof(uint32_t);
1312          break;
1313       }
1314       case RD_SHADER_LOG_BUFFER: {
1315          unsigned int sizedwords;
1316          parse_addr(ps.buf, ps.sz, &sizedwords, &dev->shader_log_iova);
1317          break;
1318       }
1319       case RD_CP_LOG_BUFFER: {
1320          unsigned int sizedwords;
1321          parse_addr(ps.buf, ps.sz, &sizedwords, &dev->cp_log_iova);
1322          break;
1323       }
1324       case RD_WRBUFFER: {
1325          struct wrbuf *wrbuf = u_vector_add(&dev->wrbufs);
1326          uint64_t *p = (uint64_t *)ps.buf;
1327          wrbuf->iova = p[0];
1328          wrbuf->size = p[1];
1329          bool clear = p[2];
1330          int name_len = ps.sz - (3 * sizeof(uint64_t));
1331          wrbuf->name = calloc(sizeof(char), name_len);
1332          memcpy(wrbuf->name, (char*)(p + 3), name_len); // includes null terminator
1333 
1334          if (clear) {
1335             struct buffer *buf = device_get_buffer(dev, wrbuf->iova);
1336             assert(buf);
1337 
1338             uint64_t offset = wrbuf->iova - buf->iova;
1339             uint64_t end = MIN2(offset + wrbuf->size, buf->size);
1340             while (offset < end) {
1341                static const uint64_t clear_value = 0xdeadbeefdeadbeef;
1342                memcpy(buf->map + offset, &clear_value,
1343                       MIN2(sizeof(clear_value), end - offset));
1344                offset += sizeof(clear_value);
1345             }
1346          }
1347 
1348          break;
1349       }
1350       default:
1351          break;
1352       }
1353    }
1354 
1355    io_close(io);
1356    if (ps.ret < 0) {
1357       fprintf(stderr, "corrupt file %s\n", tmpfilename);
1358    }
1359 
1360    return ps.ret;
1361 }
1362 
1363 static int
handle_file(const char * filename,uint32_t first_submit,uint32_t last_submit,uint32_t submit_to_override,uint64_t base_addr,const char * cmdstreamgen)1364 handle_file(const char *filename, uint32_t first_submit, uint32_t last_submit,
1365             uint32_t submit_to_override, uint64_t base_addr, const char *cmdstreamgen)
1366 {
1367    struct io *io;
1368    int submit = 0;
1369    bool skip = false;
1370    bool need_submit = false;
1371    struct rd_parsed_section ps = {0};
1372 
1373    printf("Reading %s...\n", filename);
1374 
1375    if (!strcmp(filename, "-"))
1376       io = io_openfd(0);
1377    else
1378       io = io_open(filename);
1379 
1380    if (!io) {
1381       fprintf(stderr, "could not open: %s\n", filename);
1382       return -1;
1383    }
1384 
1385    struct device *dev = device_create(base_addr);
1386 
1387    struct {
1388       unsigned int len;
1389       uint64_t gpuaddr;
1390    } gpuaddr = {0};
1391 
1392    while (parse_rd_section(io, &ps)) {
1393       switch (ps.type) {
1394       case RD_TEST:
1395       case RD_VERT_SHADER:
1396       case RD_FRAG_SHADER:
1397          /* no-op */
1398          break;
1399       case RD_CMD:
1400          skip = false;
1401          if (exename) {
1402             skip |= (strstr(ps.buf, exename) != ps.buf);
1403          } else {
1404             skip |= (strstr(ps.buf, "fdperf") == ps.buf);
1405             skip |= (strstr(ps.buf, "chrome") == ps.buf);
1406             skip |= (strstr(ps.buf, "surfaceflinger") == ps.buf);
1407             skip |= ((char *)ps.buf)[0] == 'X';
1408          }
1409          break;
1410 
1411       case RD_GPUADDR:
1412          if (need_submit) {
1413             need_submit = false;
1414             device_submit_cmdstreams(dev);
1415          }
1416 
1417          parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1418          /* no-op */
1419          break;
1420       case RD_BUFFER_CONTENTS:
1421          /* TODO: skip buffer uploading and even reading if this buffer
1422           * is used for submit outside of [first_submit, last_submit]
1423           * range. A set of buffers is shared between several cmdstreams,
1424           * so we'd have to find starting from which RD_CMD to upload
1425           * the buffers.
1426           */
1427          upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1428          break;
1429       case RD_CMDSTREAM_ADDR: {
1430          unsigned int sizedwords;
1431          uint64_t gpuaddr;
1432          parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1433 
1434          bool add_submit = !skip && (submit >= first_submit) && (submit <= last_submit);
1435          printf("%scmdstream %d: %d dwords\n", add_submit ? "" : "skipped ",
1436                 submit, sizedwords);
1437 
1438          if (add_submit) {
1439             struct cmdstream *cs = u_vector_add(&dev->cmdstreams);
1440 
1441             if (submit == submit_to_override) {
1442                if (override_cmdstream(dev, cs, cmdstreamgen) < 0)
1443                   break;
1444             } else {
1445                cs->iova = gpuaddr;
1446                cs->size = sizedwords * sizeof(uint32_t);
1447             }
1448          }
1449 
1450          need_submit = true;
1451 
1452          submit++;
1453          break;
1454       }
1455       case RD_GPU_ID: {
1456          uint32_t gpu_id = parse_gpu_id(ps.buf);
1457          if (gpu_id)
1458             printf("gpuid: %d\n", gpu_id);
1459          break;
1460       }
1461       case RD_CHIP_ID: {
1462          uint64_t chip_id = parse_chip_id(ps.buf);
1463          printf("chip_id: 0x%" PRIx64 "\n", chip_id);
1464          break;
1465       }
1466       default:
1467          break;
1468       }
1469    }
1470 
1471    if (need_submit)
1472       device_submit_cmdstreams(dev);
1473 
1474    close(dev->fd);
1475 
1476    io_close(io);
1477    fflush(stdout);
1478 
1479    if (ps.ret < 0) {
1480       printf("corrupt file\n");
1481    }
1482    return 0;
1483 }
1484