• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Igalia S.L.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <assert.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <getopt.h>
12 #include <inttypes.h>
13 #include <signal.h>
14 #include <stdarg.h>
15 #include <stdbool.h>
16 #include <stdint.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <unistd.h>
21 #if FD_REPLAY_KGSL
22 #include "../vulkan/msm_kgsl.h"
23 #elif FD_REPLAY_MSM
24 #include <xf86drm.h>
25 #include "drm-uapi/msm_drm.h"
26 #elif FD_REPLAY_WSL
27 #define __KERNEL__
28 #include "drm-uapi/d3dkmthk.h"
29 #endif
30 
31 #include <sys/ioctl.h>
32 #include <sys/mman.h>
33 #include <sys/stat.h>
34 #include <sys/types.h>
35 #include <sys/wait.h>
36 
37 #include "util/os_time.h"
38 #include "util/rb_tree.h"
39 #include "util/u_vector.h"
40 #include "util/vma.h"
41 #include "buffers.h"
42 #include "cffdec.h"
43 #include "io.h"
44 #include "redump.h"
45 #include "rdutil.h"
46 
47 /**
48  * Replay command stream obtained from:
49  * - /sys/kernel/debug/dri/0/rd
50  * - /sys/kernel/debug/dri/0/hangrd
51  * !!! Command stream capture should be done with ALL buffers:
52  * - echo 1 > /sys/module/msm/parameters/rd_full
53  *
54  * Requires kernel with MSM_INFO_SET_IOVA support.
55  * In case userspace IOVAs are not supported, like on KGSL, we have to
56  * pre-allocate a single buffer and hope it always allocated starting
57  * from the same address.
58  *
59  * TODO: Misrendering, would require marking framebuffer images
60  *       at each renderpass in order to fetch and decode them.
61  *
62  * Code from Freedreno/Turnip is not re-used here since the relevant
63  * pieces may introduce additional allocations which cannot be allowed
64  * during the replay.
65  *
66  * For how-to see freedreno.rst
67  */
68 
69 static const char *exename = NULL;
70 
71 static const uint64_t FAKE_ADDRESS_SPACE_SIZE = 1024 * 1024 * 1024;
72 
73 static int handle_file(const char *filename, uint32_t first_submit,
74                        uint32_t last_submit, uint32_t submit_to_override,
75                        uint64_t base_addr, const char *cmdstreamgen);
76 
77 static void
print_usage(const char * name)78 print_usage(const char *name)
79 {
80    /* clang-format off */
81    fprintf(stderr, "Usage:\n\n"
82            "\t%s [OPTSIONS]... FILE...\n\n"
83            "Options:\n"
84            "\t-e, --exe=NAME         - only use cmdstream from named process\n"
85            "\t-o  --override=submit  - № of the submit to override\n"
86            "\t-g  --generator=path   - executable which generate cmdstream for override\n"
87            "\t-f  --first=submit     - first submit № to replay\n"
88            "\t-l  --last=submit      - last submit № to replay\n"
89            "\t-a  --address=address  - base iova address on WSL\n"
90            "\t-h, --help             - show this message\n"
91            , name);
92    /* clang-format on */
93    exit(2);
94 }
95 
96 /* clang-format off */
97 static const struct option opts[] = {
98       { "exe",       required_argument, 0, 'e' },
99       { "override",  required_argument, 0, 'o' },
100       { "generator", required_argument, 0, 'g' },
101       { "first",     required_argument, 0, 'f' },
102       { "last",      required_argument, 0, 'l' },
103       { "address",   required_argument, 0, 'a' },
104       { "help",      no_argument,       0, 'h' },
105 };
106 /* clang-format on */
107 
108 int
main(int argc,char ** argv)109 main(int argc, char **argv)
110 {
111    int ret = -1;
112    int c;
113 
114    uint32_t submit_to_override = -1;
115    uint32_t first_submit = 0;
116    uint32_t last_submit = -1;
117    uint64_t base_addr = 0;
118    const char *cmdstreamgen = NULL;
119 
120    while ((c = getopt_long(argc, argv, "e:o:g:f:l:a:h", opts, NULL)) != -1) {
121       switch (c) {
122       case 0:
123          /* option that set a flag, nothing to do */
124          break;
125       case 'e':
126          exename = optarg;
127          break;
128       case 'o':
129          submit_to_override = strtoul(optarg, NULL, 0);
130          break;
131       case 'g':
132          cmdstreamgen = optarg;
133          break;
134       case 'f':
135          first_submit = strtoul(optarg, NULL, 0);
136          break;
137       case 'l':
138          last_submit = strtoul(optarg, NULL, 0);
139          break;
140       case 'a':
141          base_addr = strtoull(optarg, NULL, 0);
142          break;
143       case 'h':
144       default:
145          print_usage(argv[0]);
146       }
147    }
148 
149    while (optind < argc) {
150       ret = handle_file(argv[optind], first_submit, last_submit,
151                         submit_to_override, base_addr, cmdstreamgen);
152       if (ret) {
153          fprintf(stderr, "error reading: %s\n", argv[optind]);
154          fprintf(stderr, "continuing..\n");
155       }
156       optind++;
157    }
158 
159    if (ret)
160       print_usage(argv[0]);
161 
162    return ret;
163 }
164 
165 struct buffer {
166    struct rb_node node;
167 
168    uint32_t gem_handle;
169    uint64_t size;
170    uint64_t iova;
171    void *map;
172 
173    bool used;
174    uint32_t flags;
175 };
176 
177 struct cmdstream {
178    uint64_t iova;
179    uint64_t size;
180 };
181 
182 struct wrbuf {
183    uint64_t iova;
184    uint64_t size;
185    char* name;
186 };
187 
188 struct device {
189    int fd;
190 
191    struct rb_tree buffers;
192    struct util_vma_heap vma;
193 
194    struct u_vector cmdstreams;
195 
196    uint64_t shader_log_iova;
197    uint64_t cp_log_iova;
198 
199    bool has_set_iova;
200 
201    uint32_t va_id;
202    void *va_map;
203    uint64_t va_iova;
204 
205    struct u_vector wrbufs;
206 
207 #ifdef FD_REPLAY_KGSL
208    uint32_t context_id;
209 #endif
210 
211 #ifdef FD_REPLAY_WSL
212    struct d3dkmthandle device;
213    struct d3dkmthandle context;
214 
215    /* We don't know at the moment a good way to wait for submission to complete
216     * on WSL, so we could use our own fences.
217     */
218    uint64_t fence_iova;
219    uint64_t fence_ib_iova;
220    volatile uint32_t *fence;
221    uint32_t *fence_ib;
222 #endif
223 };
224 
225 void buffer_mem_free(struct device *dev, struct buffer *buf);
226 
227 static int
rb_buffer_insert_cmp(const struct rb_node * n1,const struct rb_node * n2)228 rb_buffer_insert_cmp(const struct rb_node *n1, const struct rb_node *n2)
229 {
230    const struct buffer *buf1 = (const struct buffer *)n1;
231    const struct buffer *buf2 = (const struct buffer *)n2;
232    /* Note that gpuaddr comparisions can overflow an int: */
233    if (buf1->iova > buf2->iova)
234       return 1;
235    else if (buf1->iova < buf2->iova)
236       return -1;
237    return 0;
238 }
239 
240 static int
rb_buffer_search_cmp(const struct rb_node * node,const void * addrptr)241 rb_buffer_search_cmp(const struct rb_node *node, const void *addrptr)
242 {
243    const struct buffer *buf = (const struct buffer *)node;
244    uint64_t iova = *(uint64_t *)addrptr;
245    if (buf->iova + buf->size <= iova)
246       return -1;
247    else if (buf->iova > iova)
248       return 1;
249    return 0;
250 }
251 
252 static struct buffer *
device_get_buffer(struct device * dev,uint64_t iova)253 device_get_buffer(struct device *dev, uint64_t iova)
254 {
255    if (iova == 0)
256       return NULL;
257    return (struct buffer *)rb_tree_search(&dev->buffers, &iova,
258                                           rb_buffer_search_cmp);
259 }
260 
261 static void
device_mark_buffers(struct device * dev)262 device_mark_buffers(struct device *dev)
263 {
264    rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
265       buf->used = false;
266    }
267 }
268 
269 static void
device_free_buffers(struct device * dev)270 device_free_buffers(struct device *dev)
271 {
272    rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
273       buffer_mem_free(dev, buf);
274       rb_tree_remove(&dev->buffers, &buf->node);
275       free(buf);
276    }
277 }
278 
279 static void
device_print_shader_log(struct device * dev)280 device_print_shader_log(struct device *dev)
281 {
282    struct shader_log {
283       uint64_t cur_iova;
284       union {
285          uint32_t entries_u32[0];
286          float entries_float[0];
287       };
288    };
289 
290    if (dev->shader_log_iova != 0)
291    {
292       struct buffer *buf = device_get_buffer(dev, dev->shader_log_iova);
293       if (buf) {
294          struct shader_log *log = buf->map + (dev->shader_log_iova - buf->iova);
295          uint32_t count = (log->cur_iova - dev->shader_log_iova -
296                            offsetof(struct shader_log, entries_u32)) / 4;
297 
298          printf("Shader Log Entries: %u\n", count);
299 
300          for (uint32_t i = 0; i < count; i++) {
301             printf("[%u] %08x %.4f\n", i, log->entries_u32[i],
302                    log->entries_float[i]);
303          }
304 
305          printf("========================================\n");
306       }
307    }
308 }
309 
310 static void
device_print_cp_log(struct device * dev)311 device_print_cp_log(struct device *dev)
312 {
313    struct cp_log {
314       uint64_t cur_iova;
315       uint64_t tmp;
316       uint64_t first_entry_size;
317    };
318 
319    struct cp_log_entry {
320       uint64_t size;
321       uint32_t data[0];
322    };
323 
324    if (dev->cp_log_iova == 0)
325       return;
326 
327    struct buffer *buf = device_get_buffer(dev, dev->cp_log_iova);
328    if (!buf)
329       return;
330 
331    struct cp_log *log = buf->map + (dev->cp_log_iova - buf->iova);
332    if (log->first_entry_size == 0)
333       return;
334 
335    struct cp_log_entry *log_entry =
336       buf->map + offsetof(struct cp_log, first_entry_size);
337    uint32_t idx = 0;
338    while (log_entry->size != 0) {
339       printf("\nCP Log [%u]:\n", idx++);
340       uint32_t dwords = log_entry->size / 4;
341 
342       for (uint32_t i = 0; i < dwords; i++) {
343          if (i % 8 == 0)
344             printf("\t");
345          printf("%08x ", log_entry->data[i]);
346          if (i % 8 == 7)
347             printf("\n");
348       }
349       printf("\n");
350 
351       log_entry = (void *)log_entry + log_entry->size +
352                   offsetof(struct cp_log_entry, data);
353    }
354 }
355 
356 static void
device_dump_wrbuf(struct device * dev)357 device_dump_wrbuf(struct device *dev)
358 {
359    if (!u_vector_length(&dev->wrbufs))
360       return;
361 
362    char buffer_dir[256];
363    snprintf(buffer_dir, sizeof(buffer_dir), "%s/buffers", exename);
364    rmdir(buffer_dir);
365    mkdir(buffer_dir, 0777);
366 
367    struct wrbuf *wrbuf;
368    u_vector_foreach(wrbuf, &dev->wrbufs) {
369       char buffer_path[256];
370       snprintf(buffer_path, sizeof(buffer_path), "%s/%s", buffer_dir, wrbuf->name);
371       FILE *f = fopen(buffer_path, "wb");
372       if (!f) {
373          fprintf(stderr, "Error opening %s\n", buffer_path);
374          goto end_it;
375       }
376 
377       struct buffer *buf = device_get_buffer(dev, wrbuf->iova);
378       if (!buf) {
379          fprintf(stderr, "Error getting buffer for %s\n", buffer_path);
380          goto end_it;
381       }
382       const void *buffer = buf->map + (wrbuf->iova - buf->iova);
383       fwrite(buffer, wrbuf->size, 1, f);
384 
385       end_it:
386       fclose(f);
387    }
388 }
389 
390 #if FD_REPLAY_MSM
391 static inline void
get_abs_timeout(struct drm_msm_timespec * tv,uint64_t ns)392 get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
393 {
394    struct timespec t;
395    clock_gettime(CLOCK_MONOTONIC, &t);
396    tv->tv_sec = t.tv_sec + ns / 1000000000;
397    tv->tv_nsec = t.tv_nsec + ns % 1000000000;
398 }
399 
400 static struct device *
device_create(uint64_t base_addr)401 device_create(uint64_t base_addr)
402 {
403    struct device *dev = calloc(sizeof(struct device), 1);
404 
405    dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
406    if (dev->fd < 0) {
407       errx(1, "Cannot open MSM fd!");
408    }
409 
410    uint64_t va_start, va_size;
411 
412    struct drm_msm_param req = {
413       .pipe = MSM_PIPE_3D0,
414       .param = MSM_PARAM_VA_START,
415    };
416 
417    int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
418    va_start = req.value;
419 
420    if (!ret) {
421       req.param = MSM_PARAM_VA_SIZE;
422       ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
423       va_size = req.value;
424 
425       dev->has_set_iova = true;
426    }
427 
428    if (ret) {
429       printf("MSM_INFO_SET_IOVA is not supported!\n");
430 
431       struct drm_msm_gem_new req_new = {.size = FAKE_ADDRESS_SPACE_SIZE, .flags = MSM_BO_CACHED_COHERENT};
432       drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req_new, sizeof(req_new));
433       dev->va_id = req_new.handle;
434 
435       struct drm_msm_gem_info req_info = {
436          .handle = req_new.handle,
437          .info = MSM_INFO_GET_IOVA,
438       };
439 
440       drmCommandWriteRead(dev->fd,
441                                  DRM_MSM_GEM_INFO, &req_info, sizeof(req_info));
442       dev->va_iova = req_info.value;
443 
444       struct drm_msm_gem_info req_offset = {
445          .handle = req_new.handle,
446          .info = MSM_INFO_GET_OFFSET,
447       };
448 
449       drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_offset, sizeof(req_offset));
450 
451       dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
452                        dev->fd, req_offset.value);
453       if (dev->va_map == MAP_FAILED) {
454          err(1, "mmap failure");
455       }
456 
457       va_start = dev->va_iova;
458       va_size = FAKE_ADDRESS_SPACE_SIZE;
459 
460       printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
461    }
462 
463    rb_tree_init(&dev->buffers);
464    util_vma_heap_init(&dev->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
465    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
466    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
467 
468    return dev;
469 }
470 
471 static void
device_submit_cmdstreams(struct device * dev)472 device_submit_cmdstreams(struct device *dev)
473 {
474    if (!u_vector_length(&dev->cmdstreams)) {
475       device_free_buffers(dev);
476       return;
477    }
478 
479    struct drm_msm_gem_submit_cmd cmds[u_vector_length(&dev->cmdstreams)];
480 
481    uint32_t idx = 0;
482    struct cmdstream *cmd;
483    u_vector_foreach(cmd, &dev->cmdstreams) {
484       struct buffer *cmdstream_buf = device_get_buffer(dev, cmd->iova);
485 
486       uint32_t bo_idx = 0;
487       rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
488          if (buf == cmdstream_buf)
489             break;
490 
491          bo_idx++;
492       }
493 
494       if (cmdstream_buf)
495          cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP;
496 
497       struct drm_msm_gem_submit_cmd *submit_cmd = &cmds[idx];
498       submit_cmd->type = MSM_SUBMIT_CMD_BUF;
499       submit_cmd->submit_idx = bo_idx;
500       if (dev->has_set_iova) {
501          submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova;
502       } else {
503          submit_cmd->submit_offset = cmd->iova - dev->va_iova;
504       }
505       submit_cmd->size = cmd->size;
506       submit_cmd->pad = 0;
507       submit_cmd->nr_relocs = 0;
508       submit_cmd->relocs = 0;
509 
510       idx++;
511    }
512 
513    uint32_t bo_count = 0;
514    rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
515       if (buf)
516          bo_count++;
517    }
518 
519    if (!dev->has_set_iova) {
520       bo_count = 1;
521    }
522 
523    struct drm_msm_gem_submit_bo *bo_list =
524       calloc(sizeof(struct drm_msm_gem_submit_bo), bo_count);
525 
526    if (dev->has_set_iova) {
527       uint32_t bo_idx = 0;
528       rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
529          struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++];
530          submit_bo->handle = buf->gem_handle;
531          submit_bo->flags =
532             buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
533          submit_bo->presumed = buf->iova;
534 
535          buf->flags = 0;
536       }
537    } else {
538       bo_list[0].handle = dev->va_id;
539       bo_list[0].flags =
540          MSM_SUBMIT_BO_DUMP | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
541       bo_list[0].presumed = dev->va_iova;
542    }
543 
544    struct drm_msm_gem_submit submit_req = {
545       .flags = MSM_PIPE_3D0,
546       .queueid = 0,
547       .bos = (uint64_t)(uintptr_t)bo_list,
548       .nr_bos = bo_count,
549       .cmds = (uint64_t)(uintptr_t)cmds,
550       .nr_cmds = u_vector_length(&dev->cmdstreams),
551       .in_syncobjs = 0,
552       .out_syncobjs = 0,
553       .nr_in_syncobjs = 0,
554       .nr_out_syncobjs = 0,
555       .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
556    };
557 
558    int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_SUBMIT, &submit_req,
559                                  sizeof(submit_req));
560 
561    if (ret) {
562       err(1, "DRM_MSM_GEM_SUBMIT failure %d", ret);
563    }
564 
565    /* Wait for submission to complete in order to be sure that
566     * freeing buffers would free their VMAs in the kernel.
567     * Makes sure that new allocations won't clash with old ones.
568     */
569    struct drm_msm_wait_fence wait_req = {
570       .fence = submit_req.fence,
571       .queueid = 0,
572    };
573    get_abs_timeout(&wait_req.timeout, 1000000000);
574 
575    ret =
576       drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &wait_req, sizeof(wait_req));
577    if (ret && (ret != -ETIMEDOUT)) {
578       err(1, "DRM_MSM_WAIT_FENCE failure %d", ret);
579    }
580 
581    u_vector_finish(&dev->cmdstreams);
582    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
583 
584    device_print_shader_log(dev);
585    device_print_cp_log(dev);
586 
587    device_dump_wrbuf(dev);
588 
589    device_free_buffers(dev);
590 }
591 
592 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)593 buffer_mem_alloc(struct device *dev, struct buffer *buf)
594 {
595    util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
596 
597    if (!dev->has_set_iova) {
598       uint64_t offset = buf->iova - dev->va_iova;
599       assert(offset < FAKE_ADDRESS_SPACE_SIZE && (offset + buf->size) <= FAKE_ADDRESS_SPACE_SIZE);
600       buf->map = ((uint8_t*)dev->va_map) + offset;
601       return;
602    }
603 
604    {
605       struct drm_msm_gem_new req = {.size = buf->size, .flags = MSM_BO_WC};
606 
607       int ret =
608          drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req, sizeof(req));
609       if (ret) {
610          err(1, "DRM_MSM_GEM_NEW failure %d", ret);
611       }
612 
613       buf->gem_handle = req.handle;
614    }
615 
616    {
617       struct drm_msm_gem_info req = {
618          .handle = buf->gem_handle,
619          .info = MSM_INFO_SET_IOVA,
620          .value = buf->iova,
621       };
622 
623       int ret =
624          drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
625 
626       if (ret) {
627          err(1, "MSM_INFO_SET_IOVA failure %d", ret);
628       }
629    }
630 
631    {
632       struct drm_msm_gem_info req = {
633          .handle = buf->gem_handle,
634          .info = MSM_INFO_GET_OFFSET,
635       };
636 
637       int ret =
638          drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
639       if (ret) {
640          err(1, "MSM_INFO_GET_OFFSET failure %d", ret);
641       }
642 
643       void *map = mmap(0, buf->size, PROT_READ | PROT_WRITE, MAP_SHARED,
644                        dev->fd, req.value);
645       if (map == MAP_FAILED) {
646          err(1, "mmap failure");
647       }
648 
649       buf->map = map;
650    }
651 }
652 
653 void
buffer_mem_free(struct device * dev,struct buffer * buf)654 buffer_mem_free(struct device *dev, struct buffer *buf)
655 {
656    if (dev->has_set_iova) {
657       munmap(buf->map, buf->size);
658 
659       struct drm_msm_gem_info req_iova = {
660          .handle = buf->gem_handle,
661          .info = MSM_INFO_SET_IOVA,
662          .value = 0,
663       };
664 
665       int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_iova,
666                                     sizeof(req_iova));
667       if (ret < 0) {
668          err(1, "MSM_INFO_SET_IOVA(0) failed! %d", ret);
669          return;
670       }
671 
672       struct drm_gem_close req = {
673          .handle = buf->gem_handle,
674       };
675       drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
676    }
677 
678    util_vma_heap_free(&dev->vma, buf->iova, buf->size);
679 }
680 
681 #elif FD_REPLAY_KGSL
682 static int
safe_ioctl(int fd,unsigned long request,void * arg)683 safe_ioctl(int fd, unsigned long request, void *arg)
684 {
685    int ret;
686 
687    do {
688       ret = ioctl(fd, request, arg);
689    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
690 
691    return ret;
692 }
693 
694 static struct device *
device_create(uint64_t base_addr)695 device_create(uint64_t base_addr)
696 {
697    struct device *dev = calloc(sizeof(struct device), 1);
698 
699    static const char path[] = "/dev/kgsl-3d0";
700 
701    dev->fd = open(path, O_RDWR | O_CLOEXEC);
702    if (dev->fd < 0) {
703       errx(1, "Cannot open KGSL fd!");
704    }
705 
706    struct kgsl_gpumem_alloc_id req = {
707       .size = FAKE_ADDRESS_SPACE_SIZE,
708       .flags = KGSL_MEMFLAGS_IOCOHERENT,
709    };
710 
711    int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
712    if (ret) {
713       err(1, "IOCTL_KGSL_GPUMEM_ALLOC_ID failure");
714    }
715 
716    dev->va_id = req.id;
717    dev->va_iova = req.gpuaddr;
718    dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE,
719                       MAP_SHARED, dev->fd, req.id << 12);
720 
721    rb_tree_init(&dev->buffers);
722    util_vma_heap_init(&dev->vma, req.gpuaddr, ROUND_DOWN_TO(FAKE_ADDRESS_SPACE_SIZE, 4096));
723    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
724 
725    struct kgsl_drawctxt_create drawctxt_req = {
726       .flags = KGSL_CONTEXT_SAVE_GMEM |
727               KGSL_CONTEXT_NO_GMEM_ALLOC |
728               KGSL_CONTEXT_PREAMBLE,
729    };
730 
731    ret = safe_ioctl(dev->fd, IOCTL_KGSL_DRAWCTXT_CREATE, &drawctxt_req);
732    if (ret) {
733       err(1, "IOCTL_KGSL_DRAWCTXT_CREATE failure");
734    }
735 
736    printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
737 
738    dev->context_id = drawctxt_req.drawctxt_id;
739 
740    return dev;
741 }
742 
743 static void
device_submit_cmdstreams(struct device * dev)744 device_submit_cmdstreams(struct device *dev)
745 {
746    if (!u_vector_length(&dev->cmdstreams)) {
747       device_free_buffers(dev);
748       return;
749    }
750 
751    struct kgsl_command_object cmds[u_vector_length(&dev->cmdstreams)];
752 
753    uint32_t idx = 0;
754    struct cmdstream *cmd;
755    u_vector_foreach(cmd, &dev->cmdstreams) {
756       struct kgsl_command_object *submit_cmd = &cmds[idx++];
757       submit_cmd->gpuaddr = cmd->iova;
758       submit_cmd->size = cmd->size;
759       submit_cmd->flags = KGSL_CMDLIST_IB;
760       submit_cmd->id = dev->va_id;
761    }
762 
763    struct kgsl_gpu_command submit_req = {
764       .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
765       .cmdlist = (uintptr_t) &cmds,
766       .cmdsize = sizeof(struct kgsl_command_object),
767       .numcmds = u_vector_length(&dev->cmdstreams),
768       .numsyncs = 0,
769       .context_id = dev->context_id,
770    };
771 
772    int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPU_COMMAND, &submit_req);
773 
774    if (ret) {
775       err(1, "IOCTL_KGSL_GPU_COMMAND failure %d", ret);
776    }
777 
778    struct kgsl_device_waittimestamp_ctxtid wait = {
779       .context_id = dev->context_id,
780       .timestamp = submit_req.timestamp,
781       .timeout = 3000,
782    };
783 
784    ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
785 
786    if (ret) {
787       err(1, "IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID failure %d", ret);
788    }
789 
790    u_vector_finish(&dev->cmdstreams);
791    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
792 
793    device_print_shader_log(dev);
794    device_print_cp_log(dev);
795 
796    device_dump_wrbuf(dev);
797 
798    device_free_buffers(dev);
799 }
800 
801 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)802 buffer_mem_alloc(struct device *dev, struct buffer *buf)
803 {
804    util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
805 
806    buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
807 }
808 
809 void
buffer_mem_free(struct device * dev,struct buffer * buf)810 buffer_mem_free(struct device *dev, struct buffer *buf)
811 {
812    util_vma_heap_free(&dev->vma, buf->iova, buf->size);
813 }
814 #else
815 
816 static int
safe_ioctl(int fd,unsigned long request,void * arg)817 safe_ioctl(int fd, unsigned long request, void *arg)
818 {
819    int ret;
820 
821    do {
822       ret = ioctl(fd, request, arg);
823    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
824 
825    return ret;
826 }
827 
828 struct alloc_priv_info {
829    __u32 struct_size;
830    char _pad0[4];
831    __u32 unk0; // 1
832    char _pad1[4];
833    __u64 size;
834    __u32 alignment;
835    char _pad2[20];
836    __u64 allocated_size;
837    __u32 unk1;   // 1
838    char _pad4[8]; /* offset: 60*/
839    __u32 unk2;   // 61
840    char _pad5[76];
841    __u32 unk3; /* offset: 148 */ // 1
842    char _pad6[8];
843    __u32 unk4; /* offset: 160 */ // 1
844    char _pad7[44];
845    __u32 unk5; /* offset: 208 */ // 3
846    char _pad8[16];
847    __u32 size_2; /* offset: 228 */
848    __u32 unk6;   // 1
849    __u32 size_3;
850    __u32 size_4;
851    __u32 unk7; /* offset: 244 */ // 1
852    char _pad9[56];
853 };
854 static_assert(sizeof(struct alloc_priv_info) == 304);
855 static_assert(offsetof(struct alloc_priv_info, unk1) == 56);
856 static_assert(offsetof(struct alloc_priv_info, unk3) == 148);
857 static_assert(offsetof(struct alloc_priv_info, unk5) == 208);
858 
859 struct submit_priv_ib_info {
860    char _pad5[4];
861    __u32 size_dwords;
862    __u64 iova;
863    char _pad6[8];
864 } __attribute__((packed));
865 
866 struct submit_priv_data {
867    __u32 magic0;
868    char _pad0[4];
869    __u32 struct_size;
870    char _pad1[4];
871    /* It seems that priv data can have several sub-datas
872     * cmdbuf is one of them, after it there is another 8 byte struct
873     * without anything useful in it. That second data doesn't seem
874     * important for replaying.
875     */
876    __u32 datas_count;
877    char _pad2[32];
878    struct {
879       __u32 magic1;
880       __u32 data_size;
881 
882       struct {
883          __u32 unk1;
884          __u32 cmdbuf_size;
885          char _pad3[32];
886          __u32 ib_count;
887          char _pad4[36];
888 
889          struct submit_priv_ib_info ibs[];
890       } cmdbuf;
891    } data0;
892 
893    //    unsigned char magic2[8];
894 } __attribute__((packed));
895 static_assert(offsetof(struct submit_priv_data, data0) == 0x34);
896 static_assert(offsetof(struct submit_priv_data, data0.cmdbuf.ibs) == 0x8c);
897 
898 static struct device *
device_create(uint64_t base_addr)899 device_create(uint64_t base_addr)
900 {
901    struct device *dev = calloc(sizeof(struct device), 1);
902 
903    static const char path[] = "/dev/dxg";
904 
905    dev->fd = open(path, O_RDWR | O_CLOEXEC);
906    if (dev->fd < 0) {
907       errx(1, "Cannot open /dev/dxg fd");
908    }
909 
910    struct d3dkmt_adapterinfo adapters[1];
911    struct d3dkmt_enumadapters3 enum_adapters = {
912       .adapter_count = 1,
913       .adapters = adapters,
914    };
915    int ret = safe_ioctl(dev->fd, LX_DXENUMADAPTERS3, &enum_adapters);
916    if (ret) {
917       errx(1, "LX_DXENUMADAPTERS3 failure");
918    }
919 
920    if (enum_adapters.adapter_count == 0) {
921       errx(1, "No adapters found");
922    }
923 
924    struct winluid adapter_luid = enum_adapters.adapters[0].adapter_luid;
925 
926    struct d3dkmt_openadapterfromluid open_adapter = {
927       .adapter_luid = adapter_luid,
928    };
929    ret = safe_ioctl(dev->fd, LX_DXOPENADAPTERFROMLUID, &open_adapter);
930    if (ret) {
931       errx(1, "LX_DXOPENADAPTERFROMLUID failure");
932    }
933 
934    struct d3dkmthandle adapter = open_adapter.adapter_handle;
935 
936    struct d3dkmt_createdevice create_device = {
937       .adapter = adapter,
938    };
939    ret = safe_ioctl(dev->fd, LX_DXCREATEDEVICE, &create_device);
940    if (ret) {
941       errx(1, "LX_DXCREATEDEVICE failure");
942    }
943 
944    struct d3dkmthandle device = create_device.device;
945    dev->device = device;
946 
947    unsigned char create_context_priv_data[] = {
948       0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
949       0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
950       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00,
951       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
952       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
953       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
954    };
955 
956    struct d3dkmt_createcontextvirtual create_context = {
957       .device = device,
958       .node_ordinal = 0,
959       .engine_affinity = 1,
960       .priv_drv_data = create_context_priv_data,
961       .priv_drv_data_size = sizeof(create_context_priv_data),
962       .client_hint = 16,
963    };
964    ret = safe_ioctl(dev->fd, LX_DXCREATECONTEXTVIRTUAL, &create_context);
965    if (ret) {
966       errx(1, "LX_DXCREATECONTEXTVIRTUAL failure");
967    }
968 
969    dev->context = create_context.context;
970 
971    struct d3dkmt_createpagingqueue create_paging_queue = {
972       .device = device,
973       .priority = _D3DDDI_PAGINGQUEUE_PRIORITY_NORMAL,
974       .physical_adapter_index = 0,
975    };
976    ret = safe_ioctl(dev->fd, LX_DXCREATEPAGINGQUEUE, &create_paging_queue);
977    if (ret) {
978       errx(1, "LX_DXCREATEPAGINGQUEUE failure");
979    }
980    struct d3dkmthandle paging_queue = create_paging_queue.paging_queue;
981 
982 
983    uint32_t alloc_size = FAKE_ADDRESS_SPACE_SIZE;
984    struct alloc_priv_info priv_alloc_info = {
985       .struct_size = sizeof(struct alloc_priv_info),
986       .unk0 = 1,
987       .size = alloc_size,
988       .alignment = 4096,
989       .unk1 = 1,
990       .unk2 = 61,
991       .unk3 = 1,
992       .unk4 = 1,
993       .unk5 = 3,
994       .size_2 = alloc_size,
995       .unk6 = 1,
996       .size_3 = alloc_size,
997       .size_4 = alloc_size,
998       .unk7 = 1,
999    };
1000 
1001    struct d3dddi_allocationinfo2 alloc_info = {
1002       .priv_drv_data = &priv_alloc_info,
1003       .priv_drv_data_size = sizeof(struct alloc_priv_info),
1004    };
1005 
1006    struct d3dkmt_createallocation create_allocation = {
1007       .device = device,
1008       .alloc_count = 1,
1009       .allocation_info = &alloc_info,
1010    };
1011    ret = safe_ioctl(dev->fd, LX_DXCREATEALLOCATION, &create_allocation);
1012    if (ret) {
1013       errx(1, "LX_DXCREATEALLOCATION failure");
1014    }
1015 
1016    assert(priv_alloc_info.allocated_size == alloc_size);
1017 
1018    struct d3dddi_mapgpuvirtualaddress map_virtual_address = {
1019       .paging_queue = paging_queue,
1020       .base_address = base_addr,
1021       .maximum_address = 18446744073709551615ull,
1022       .allocation = create_allocation.allocation_info[0].allocation,
1023       .size_in_pages = MAX2(alloc_size / 4096, 1),
1024       .protection = {
1025          .write = 1,
1026          .execute = 1,
1027       },
1028    };
1029    ret = safe_ioctl(dev->fd, LX_DXMAPGPUVIRTUALADDRESS, &map_virtual_address);
1030    if (ret != 259) {
1031       errx(1, "LX_DXMAPGPUVIRTUALADDRESS failure");
1032    }
1033 
1034    __u32 priority = 0;
1035    struct d3dddi_makeresident make_resident = {
1036       .paging_queue = paging_queue,
1037       .alloc_count = 1,
1038       .allocation_list = &create_allocation.allocation_info[0].allocation,
1039       .priority_list = &priority,
1040    };
1041    ret = safe_ioctl(dev->fd, LX_DXMAKERESIDENT, &make_resident);
1042    if (ret != 259) {
1043       errx(1, "LX_DXMAKERESIDENT failure");
1044    }
1045 
1046    struct d3dkmt_lock2 lock = {
1047       .device = device,
1048       .allocation = create_allocation.allocation_info[0].allocation,
1049    };
1050    ret = safe_ioctl(dev->fd, LX_DXLOCK2, &lock);
1051    if (ret) {
1052       errx(1, "LX_DXLOCK2 failure");
1053    }
1054 
1055    dev->va_iova = map_virtual_address.virtual_address;
1056    dev->va_map = lock.data;
1057 
1058    rb_tree_init(&dev->buffers);
1059    util_vma_heap_init(&dev->vma, dev->va_iova, ROUND_DOWN_TO(alloc_size, 4096));
1060    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1061 
1062    printf("Allocated iova at 0x%" PRIx64 "\n", dev->va_iova);
1063 
1064    uint64_t hole_size = 4096;
1065    dev->vma.alloc_high = true;
1066    dev->fence_iova = util_vma_heap_alloc(&dev->vma, hole_size, 4096);
1067    dev->fence_ib_iova = dev->fence_iova + 8;
1068    dev->fence = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_iova - dev->va_iova));
1069    dev->fence_ib = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_ib_iova - dev->va_iova));
1070    dev->vma.alloc_high = false;
1071 
1072    return dev;
1073 }
1074 
1075 static void
device_submit_cmdstreams(struct device * dev)1076 device_submit_cmdstreams(struct device *dev)
1077 {
1078    if (!u_vector_length(&dev->cmdstreams)) {
1079       device_free_buffers(dev);
1080       return;
1081    }
1082 
1083    uint32_t cmdstream_count = u_vector_length(&dev->cmdstreams) + 1;
1084 
1085    uint32_t priv_data_size =
1086       sizeof(struct submit_priv_data) +
1087       cmdstream_count * sizeof(struct submit_priv_ib_info);
1088 
1089    struct submit_priv_data *priv_data = calloc(1, priv_data_size);
1090    priv_data->magic0 = 0xccaabbee;
1091    priv_data->struct_size = priv_data_size;
1092    priv_data->datas_count = 1;
1093 
1094    priv_data->data0.magic1 = 0xfadcab02;
1095    priv_data->data0.data_size =
1096       sizeof(priv_data->data0) +
1097       cmdstream_count * sizeof(struct submit_priv_ib_info);
1098    priv_data->data0.cmdbuf.unk1 = 0xcccc0001;
1099    priv_data->data0.cmdbuf.cmdbuf_size = sizeof(priv_data->data0.cmdbuf) +
1100       cmdstream_count * sizeof(struct submit_priv_ib_info);
1101    priv_data->data0.cmdbuf.ib_count = cmdstream_count;
1102 
1103    struct cmdstream *cmd;
1104    uint32_t idx = 0;
1105    u_vector_foreach(cmd, &dev->cmdstreams) {
1106       priv_data->data0.cmdbuf.ibs[idx].size_dwords = cmd->size / 4;
1107       priv_data->data0.cmdbuf.ibs[idx].iova = cmd->iova;
1108       idx++;
1109    }
1110 
1111    priv_data->data0.cmdbuf.ibs[idx].size_dwords = 4;
1112    priv_data->data0.cmdbuf.ibs[idx].iova = dev->fence_ib_iova;
1113 
1114    *dev->fence = 0x00000000;
1115    dev->fence_ib[0] = pm4_pkt7_hdr(0x3d, 3); // CP_MEM_WRITE
1116    dev->fence_ib[1] = dev->fence_iova;
1117    dev->fence_ib[2] = dev->fence_iova >> 32;
1118    dev->fence_ib[3] = 0xababfcfc;
1119 
1120    // Fill second (empty) data block
1121    // uint32_t *magic_end = (uint32_t *)(((char *) priv_data) + priv_data_size - 8);
1122    // magic_end[0] = 0xfadcab00;
1123    // magic_end[1] = 0x00000008;
1124 
1125    struct d3dkmt_submitcommand submission = {
1126       .command_buffer = priv_data->data0.cmdbuf.ibs[0].iova,
1127       .command_length = priv_data->data0.cmdbuf.ibs[0].size_dwords * sizeof(uint32_t),
1128       .broadcast_context_count = 1,
1129       .broadcast_context[0] = dev->context,
1130       .priv_drv_data_size = priv_data_size,
1131       .priv_drv_data = priv_data,
1132    };
1133 
1134    int ret = safe_ioctl(dev->fd, LX_DXSUBMITCOMMAND, &submission);
1135    if (ret) {
1136       errx(1, "LX_DXSUBMITCOMMAND failure");
1137    }
1138 
1139    free(priv_data);
1140 
1141    u_vector_finish(&dev->cmdstreams);
1142    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1143 
1144    // TODO: better way to wait
1145    for (unsigned i = 0; i < 1000; i++) {
1146       usleep(1000);
1147       if (*dev->fence != 0)
1148          break;
1149    }
1150    if (*dev->fence == 0) {
1151       errx(1, "Waiting for submission failed! GPU faulted or kernel did not execute this submission.");
1152    }
1153 
1154    device_print_shader_log(dev);
1155    device_print_cp_log(dev);
1156 
1157    device_dump_wrbuf(dev);
1158 
1159    device_free_buffers(dev);
1160 }
1161 
1162 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)1163 buffer_mem_alloc(struct device *dev, struct buffer *buf)
1164 {
1165    util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
1166 
1167    buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
1168 }
1169 
1170 void
buffer_mem_free(struct device * dev,struct buffer * buf)1171 buffer_mem_free(struct device *dev, struct buffer *buf)
1172 {
1173    util_vma_heap_free(&dev->vma, buf->iova, buf->size);
1174 }
1175 
1176 #endif
1177 
1178 static void
upload_buffer(struct device * dev,uint64_t iova,unsigned int size,void * hostptr)1179 upload_buffer(struct device *dev, uint64_t iova, unsigned int size,
1180               void *hostptr)
1181 {
1182    struct buffer *buf = device_get_buffer(dev, iova);
1183 
1184    if (!buf) {
1185       buf = calloc(sizeof(struct buffer), 1);
1186       buf->iova = iova;
1187       buf->size = size;
1188 
1189       rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp);
1190 
1191       buffer_mem_alloc(dev, buf);
1192    } else if (buf->size != size) {
1193       buffer_mem_free(dev, buf);
1194       buf->size = size;
1195       buffer_mem_alloc(dev, buf);
1196    }
1197 
1198    memcpy(buf->map, hostptr, size);
1199 
1200    buf->used = true;
1201 }
1202 
1203 static int
override_cmdstream(struct device * dev,struct cmdstream * cs,const char * cmdstreamgen)1204 override_cmdstream(struct device *dev, struct cmdstream *cs,
1205                    const char *cmdstreamgen)
1206 {
1207 #if FD_REPLAY_KGSL
1208    static const char *tmpfilename = "/sdcard/Download/cmdstream_override.rd";
1209 #elif FD_REPLAY_MSM || FD_REPLAY_WSL
1210    static const char *tmpfilename = "/tmp/cmdstream_override.rd";
1211 #endif
1212 
1213 
1214    /* Find a free space for the new cmdstreams and resources we will use
1215     * when overriding existing cmdstream.
1216     */
1217    /* TODO: should the size be configurable? */
1218    uint64_t hole_size = 32 * 1024 * 1024;
1219    dev->vma.alloc_high = true;
1220    uint64_t hole_iova = util_vma_heap_alloc(&dev->vma, hole_size, 4096);
1221    dev->vma.alloc_high = false;
1222    util_vma_heap_free(&dev->vma, hole_iova, hole_size);
1223 
1224    char cmd[2048];
1225    snprintf(cmd, sizeof(cmd),
1226             "%s --vastart=%" PRIu64 " --vasize=%" PRIu64 " %s", cmdstreamgen,
1227             hole_iova, hole_size, tmpfilename);
1228 
1229    printf("generating cmdstream '%s'\n", cmd);
1230 
1231    int ret = system(cmd);
1232    if (ret) {
1233       fprintf(stderr, "Error executing %s\n", cmd);
1234       return -1;
1235    }
1236 
1237    struct io *io;
1238    struct rd_parsed_section ps = {0};
1239 
1240    io = io_open(tmpfilename);
1241    if (!io) {
1242       fprintf(stderr, "could not open: %s\n", tmpfilename);
1243       return -1;
1244    }
1245 
1246    struct {
1247       unsigned int len;
1248       uint64_t gpuaddr;
1249    } gpuaddr = {0};
1250 
1251    while (parse_rd_section(io, &ps)) {
1252       switch (ps.type) {
1253       case RD_GPUADDR:
1254          parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1255          /* no-op */
1256          break;
1257       case RD_BUFFER_CONTENTS:
1258          upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1259          ps.buf = NULL;
1260          break;
1261       case RD_CMDSTREAM_ADDR: {
1262          unsigned int sizedwords;
1263          uint64_t gpuaddr;
1264          parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1265          printf("override cmdstream: %d dwords\n", sizedwords);
1266 
1267          cs->iova = gpuaddr;
1268          cs->size = sizedwords * sizeof(uint32_t);
1269          break;
1270       }
1271       case RD_SHADER_LOG_BUFFER: {
1272          unsigned int sizedwords;
1273          parse_addr(ps.buf, ps.sz, &sizedwords, &dev->shader_log_iova);
1274          break;
1275       }
1276       case RD_CP_LOG_BUFFER: {
1277          unsigned int sizedwords;
1278          parse_addr(ps.buf, ps.sz, &sizedwords, &dev->cp_log_iova);
1279          break;
1280       }
1281       case RD_WRBUFFER: {
1282          struct wrbuf *wrbuf = u_vector_add(&dev->wrbufs);
1283          uint64_t *p = (uint64_t *)ps.buf;
1284          wrbuf->iova = p[0];
1285          wrbuf->size = p[1];
1286          wrbuf->name = calloc(1, p[2]);
1287          memcpy(wrbuf->name, (char *)ps.buf + 3 * sizeof(uint64_t), p[2]);
1288          break;
1289       }
1290       default:
1291          break;
1292       }
1293    }
1294 
1295    io_close(io);
1296    if (ps.ret < 0) {
1297       fprintf(stderr, "corrupt file %s\n", tmpfilename);
1298    }
1299 
1300    return ps.ret;
1301 }
1302 
1303 static int
handle_file(const char * filename,uint32_t first_submit,uint32_t last_submit,uint32_t submit_to_override,uint64_t base_addr,const char * cmdstreamgen)1304 handle_file(const char *filename, uint32_t first_submit, uint32_t last_submit,
1305             uint32_t submit_to_override, uint64_t base_addr, const char *cmdstreamgen)
1306 {
1307    struct io *io;
1308    int submit = 0;
1309    bool skip = false;
1310    bool need_submit = false;
1311    struct rd_parsed_section ps = {0};
1312 
1313    printf("Reading %s...\n", filename);
1314 
1315    if (!strcmp(filename, "-"))
1316       io = io_openfd(0);
1317    else
1318       io = io_open(filename);
1319 
1320    if (!io) {
1321       fprintf(stderr, "could not open: %s\n", filename);
1322       return -1;
1323    }
1324 
1325    struct device *dev = device_create(base_addr);
1326 
1327    struct {
1328       unsigned int len;
1329       uint64_t gpuaddr;
1330    } gpuaddr = {0};
1331 
1332    while (parse_rd_section(io, &ps)) {
1333       switch (ps.type) {
1334       case RD_TEST:
1335       case RD_VERT_SHADER:
1336       case RD_FRAG_SHADER:
1337          /* no-op */
1338          break;
1339       case RD_CMD:
1340          skip = false;
1341          if (exename) {
1342             skip |= (strstr(ps.buf, exename) != ps.buf);
1343          } else {
1344             skip |= (strstr(ps.buf, "fdperf") == ps.buf);
1345             skip |= (strstr(ps.buf, "chrome") == ps.buf);
1346             skip |= (strstr(ps.buf, "surfaceflinger") == ps.buf);
1347             skip |= ((char *)ps.buf)[0] == 'X';
1348          }
1349          break;
1350 
1351       case RD_GPUADDR:
1352          if (need_submit) {
1353             need_submit = false;
1354             device_submit_cmdstreams(dev);
1355          }
1356 
1357          parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1358          /* no-op */
1359          break;
1360       case RD_BUFFER_CONTENTS:
1361          /* TODO: skip buffer uploading and even reading if this buffer
1362           * is used for submit outside of [first_submit, last_submit]
1363           * range. A set of buffers is shared between several cmdstreams,
1364           * so we'd have to find starting from which RD_CMD to upload
1365           * the buffers.
1366           */
1367          upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1368          break;
1369       case RD_CMDSTREAM_ADDR: {
1370          unsigned int sizedwords;
1371          uint64_t gpuaddr;
1372          parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1373 
1374          bool add_submit = !skip && (submit >= first_submit) && (submit <= last_submit);
1375          printf("%scmdstream %d: %d dwords\n", add_submit ? "" : "skipped ",
1376                 submit, sizedwords);
1377 
1378          if (add_submit) {
1379             struct cmdstream *cs = u_vector_add(&dev->cmdstreams);
1380 
1381             if (submit == submit_to_override) {
1382                if (override_cmdstream(dev, cs, cmdstreamgen) < 0)
1383                   break;
1384             } else {
1385                cs->iova = gpuaddr;
1386                cs->size = sizedwords * sizeof(uint32_t);
1387             }
1388          }
1389 
1390          need_submit = true;
1391 
1392          submit++;
1393          break;
1394       }
1395       case RD_GPU_ID: {
1396          uint32_t gpu_id = parse_gpu_id(ps.buf);
1397          if (gpu_id)
1398             printf("gpuid: %d\n", gpu_id);
1399          break;
1400       }
1401       case RD_CHIP_ID: {
1402          uint64_t chip_id = parse_chip_id(ps.buf);
1403          printf("chip_id: 0x%" PRIx64 "\n", chip_id);
1404          break;
1405       }
1406       default:
1407          break;
1408       }
1409    }
1410 
1411    if (need_submit)
1412       device_submit_cmdstreams(dev);
1413 
1414    close(dev->fd);
1415 
1416    io_close(io);
1417    fflush(stdout);
1418 
1419    if (ps.ret < 0) {
1420       printf("corrupt file\n");
1421    }
1422    return 0;
1423 }
1424