1 /*
2 * Copyright © 2022 Igalia S.L.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <assert.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <getopt.h>
12 #include <inttypes.h>
13 #include <signal.h>
14 #include <stdarg.h>
15 #include <stdbool.h>
16 #include <stdint.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <unistd.h>
21 #if FD_REPLAY_KGSL
22 #include "../vulkan/msm_kgsl.h"
23 #elif FD_REPLAY_MSM
24 #include <xf86drm.h>
25 #include "drm-uapi/msm_drm.h"
26 #elif FD_REPLAY_WSL
27 #define __KERNEL__
28 #include "drm-uapi/d3dkmthk.h"
29 #endif
30
31 #include <sys/ioctl.h>
32 #include <sys/mman.h>
33 #include <sys/stat.h>
34 #include <sys/types.h>
35 #include <sys/wait.h>
36
37 #include "util/os_time.h"
38 #include "util/rb_tree.h"
39 #include "util/u_vector.h"
40 #include "util/vma.h"
41 #include "buffers.h"
42 #include "cffdec.h"
43 #include "io.h"
44 #include "redump.h"
45 #include "rdutil.h"
46
47 /**
48 * Replay command stream obtained from:
49 * - /sys/kernel/debug/dri/0/rd
50 * - /sys/kernel/debug/dri/0/hangrd
51 * !!! Command stream capture should be done with ALL buffers:
52 * - echo 1 > /sys/module/msm/parameters/rd_full
53 *
54 * Requires kernel with MSM_INFO_SET_IOVA support.
55 * In case userspace IOVAs are not supported, like on KGSL, we have to
56 * pre-allocate a single buffer and hope it always allocated starting
57 * from the same address.
58 *
59 * TODO: Misrendering, would require marking framebuffer images
60 * at each renderpass in order to fetch and decode them.
61 *
62 * Code from Freedreno/Turnip is not re-used here since the relevant
63 * pieces may introduce additional allocations which cannot be allowed
64 * during the replay.
65 *
66 * For how-to see freedreno.rst
67 */
68
69 static const char *exename = NULL;
70
71 static const uint64_t FAKE_ADDRESS_SPACE_SIZE = 1024 * 1024 * 1024;
72
73 static int handle_file(const char *filename, uint32_t first_submit,
74 uint32_t last_submit, uint32_t submit_to_override,
75 uint64_t base_addr, const char *cmdstreamgen);
76
77 static void
print_usage(const char * name)78 print_usage(const char *name)
79 {
80 /* clang-format off */
81 fprintf(stderr, "Usage:\n\n"
82 "\t%s [OPTSIONS]... FILE...\n\n"
83 "Options:\n"
84 "\t-e, --exe=NAME - only use cmdstream from named process\n"
85 "\t-o --override=submit - № of the submit to override\n"
86 "\t-g --generator=path - executable which generate cmdstream for override\n"
87 "\t-f --first=submit - first submit № to replay\n"
88 "\t-l --last=submit - last submit № to replay\n"
89 "\t-a --address=address - base iova address on WSL\n"
90 "\t-h, --help - show this message\n"
91 , name);
92 /* clang-format on */
93 exit(2);
94 }
95
96 /* clang-format off */
97 static const struct option opts[] = {
98 { "exe", required_argument, 0, 'e' },
99 { "override", required_argument, 0, 'o' },
100 { "generator", required_argument, 0, 'g' },
101 { "first", required_argument, 0, 'f' },
102 { "last", required_argument, 0, 'l' },
103 { "address", required_argument, 0, 'a' },
104 { "help", no_argument, 0, 'h' },
105 };
106 /* clang-format on */
107
108 int
main(int argc,char ** argv)109 main(int argc, char **argv)
110 {
111 int ret = -1;
112 int c;
113
114 uint32_t submit_to_override = -1;
115 uint32_t first_submit = 0;
116 uint32_t last_submit = -1;
117 uint64_t base_addr = 0;
118 const char *cmdstreamgen = NULL;
119
120 while ((c = getopt_long(argc, argv, "e:o:g:f:l:a:h", opts, NULL)) != -1) {
121 switch (c) {
122 case 0:
123 /* option that set a flag, nothing to do */
124 break;
125 case 'e':
126 exename = optarg;
127 break;
128 case 'o':
129 submit_to_override = strtoul(optarg, NULL, 0);
130 break;
131 case 'g':
132 cmdstreamgen = optarg;
133 break;
134 case 'f':
135 first_submit = strtoul(optarg, NULL, 0);
136 break;
137 case 'l':
138 last_submit = strtoul(optarg, NULL, 0);
139 break;
140 case 'a':
141 base_addr = strtoull(optarg, NULL, 0);
142 break;
143 case 'h':
144 default:
145 print_usage(argv[0]);
146 }
147 }
148
149 while (optind < argc) {
150 ret = handle_file(argv[optind], first_submit, last_submit,
151 submit_to_override, base_addr, cmdstreamgen);
152 if (ret) {
153 fprintf(stderr, "error reading: %s\n", argv[optind]);
154 fprintf(stderr, "continuing..\n");
155 }
156 optind++;
157 }
158
159 if (ret)
160 print_usage(argv[0]);
161
162 return ret;
163 }
164
165 struct buffer {
166 struct rb_node node;
167
168 uint32_t gem_handle;
169 uint64_t size;
170 uint64_t iova;
171 void *map;
172
173 bool used;
174 uint32_t flags;
175 };
176
177 struct cmdstream {
178 uint64_t iova;
179 uint64_t size;
180 };
181
182 struct wrbuf {
183 uint64_t iova;
184 uint64_t size;
185 char* name;
186 };
187
188 struct device {
189 int fd;
190
191 struct rb_tree buffers;
192 struct util_vma_heap vma;
193
194 struct u_vector cmdstreams;
195
196 uint64_t shader_log_iova;
197 uint64_t cp_log_iova;
198
199 bool has_set_iova;
200
201 uint32_t va_id;
202 void *va_map;
203 uint64_t va_iova;
204
205 struct u_vector wrbufs;
206
207 #ifdef FD_REPLAY_KGSL
208 uint32_t context_id;
209 #endif
210
211 #ifdef FD_REPLAY_WSL
212 struct d3dkmthandle device;
213 struct d3dkmthandle context;
214
215 /* We don't know at the moment a good way to wait for submission to complete
216 * on WSL, so we could use our own fences.
217 */
218 uint64_t fence_iova;
219 uint64_t fence_ib_iova;
220 volatile uint32_t *fence;
221 uint32_t *fence_ib;
222 #endif
223 };
224
225 void buffer_mem_free(struct device *dev, struct buffer *buf);
226
227 static int
rb_buffer_insert_cmp(const struct rb_node * n1,const struct rb_node * n2)228 rb_buffer_insert_cmp(const struct rb_node *n1, const struct rb_node *n2)
229 {
230 const struct buffer *buf1 = (const struct buffer *)n1;
231 const struct buffer *buf2 = (const struct buffer *)n2;
232 /* Note that gpuaddr comparisions can overflow an int: */
233 if (buf1->iova > buf2->iova)
234 return 1;
235 else if (buf1->iova < buf2->iova)
236 return -1;
237 return 0;
238 }
239
240 static int
rb_buffer_search_cmp(const struct rb_node * node,const void * addrptr)241 rb_buffer_search_cmp(const struct rb_node *node, const void *addrptr)
242 {
243 const struct buffer *buf = (const struct buffer *)node;
244 uint64_t iova = *(uint64_t *)addrptr;
245 if (buf->iova + buf->size <= iova)
246 return -1;
247 else if (buf->iova > iova)
248 return 1;
249 return 0;
250 }
251
252 static struct buffer *
device_get_buffer(struct device * dev,uint64_t iova)253 device_get_buffer(struct device *dev, uint64_t iova)
254 {
255 if (iova == 0)
256 return NULL;
257 return (struct buffer *)rb_tree_search(&dev->buffers, &iova,
258 rb_buffer_search_cmp);
259 }
260
261 static void
device_mark_buffers(struct device * dev)262 device_mark_buffers(struct device *dev)
263 {
264 rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
265 buf->used = false;
266 }
267 }
268
269 static void
device_free_buffers(struct device * dev)270 device_free_buffers(struct device *dev)
271 {
272 rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
273 buffer_mem_free(dev, buf);
274 rb_tree_remove(&dev->buffers, &buf->node);
275 free(buf);
276 }
277 }
278
279 static void
device_print_shader_log(struct device * dev)280 device_print_shader_log(struct device *dev)
281 {
282 struct shader_log {
283 uint64_t cur_iova;
284 union {
285 uint32_t entries_u32[0];
286 float entries_float[0];
287 };
288 };
289
290 if (dev->shader_log_iova != 0)
291 {
292 struct buffer *buf = device_get_buffer(dev, dev->shader_log_iova);
293 if (buf) {
294 struct shader_log *log = buf->map + (dev->shader_log_iova - buf->iova);
295 uint32_t count = (log->cur_iova - dev->shader_log_iova -
296 offsetof(struct shader_log, entries_u32)) / 4;
297
298 printf("Shader Log Entries: %u\n", count);
299
300 for (uint32_t i = 0; i < count; i++) {
301 printf("[%u] %08x %.4f\n", i, log->entries_u32[i],
302 log->entries_float[i]);
303 }
304
305 printf("========================================\n");
306 }
307 }
308 }
309
310 static void
device_print_cp_log(struct device * dev)311 device_print_cp_log(struct device *dev)
312 {
313 struct cp_log {
314 uint64_t cur_iova;
315 uint64_t tmp;
316 uint64_t first_entry_size;
317 };
318
319 struct cp_log_entry {
320 uint64_t size;
321 uint32_t data[0];
322 };
323
324 if (dev->cp_log_iova == 0)
325 return;
326
327 struct buffer *buf = device_get_buffer(dev, dev->cp_log_iova);
328 if (!buf)
329 return;
330
331 struct cp_log *log = buf->map + (dev->cp_log_iova - buf->iova);
332 if (log->first_entry_size == 0)
333 return;
334
335 struct cp_log_entry *log_entry =
336 buf->map + offsetof(struct cp_log, first_entry_size);
337 uint32_t idx = 0;
338 while (log_entry->size != 0) {
339 printf("\nCP Log [%u]:\n", idx++);
340 uint32_t dwords = log_entry->size / 4;
341
342 for (uint32_t i = 0; i < dwords; i++) {
343 if (i % 8 == 0)
344 printf("\t");
345 printf("%08x ", log_entry->data[i]);
346 if (i % 8 == 7)
347 printf("\n");
348 }
349 printf("\n");
350
351 log_entry = (void *)log_entry + log_entry->size +
352 offsetof(struct cp_log_entry, data);
353 }
354 }
355
356 static void
device_dump_wrbuf(struct device * dev)357 device_dump_wrbuf(struct device *dev)
358 {
359 if (!u_vector_length(&dev->wrbufs))
360 return;
361
362 char buffer_dir[256];
363 snprintf(buffer_dir, sizeof(buffer_dir), "%s/buffers", exename);
364 rmdir(buffer_dir);
365 mkdir(buffer_dir, 0777);
366
367 struct wrbuf *wrbuf;
368 u_vector_foreach(wrbuf, &dev->wrbufs) {
369 char buffer_path[256];
370 snprintf(buffer_path, sizeof(buffer_path), "%s/%s", buffer_dir, wrbuf->name);
371 FILE *f = fopen(buffer_path, "wb");
372 if (!f) {
373 fprintf(stderr, "Error opening %s\n", buffer_path);
374 goto end_it;
375 }
376
377 struct buffer *buf = device_get_buffer(dev, wrbuf->iova);
378 if (!buf) {
379 fprintf(stderr, "Error getting buffer for %s\n", buffer_path);
380 goto end_it;
381 }
382 const void *buffer = buf->map + (wrbuf->iova - buf->iova);
383 fwrite(buffer, wrbuf->size, 1, f);
384
385 end_it:
386 fclose(f);
387 }
388 }
389
390 #if FD_REPLAY_MSM
391 static inline void
get_abs_timeout(struct drm_msm_timespec * tv,uint64_t ns)392 get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
393 {
394 struct timespec t;
395 clock_gettime(CLOCK_MONOTONIC, &t);
396 tv->tv_sec = t.tv_sec + ns / 1000000000;
397 tv->tv_nsec = t.tv_nsec + ns % 1000000000;
398 }
399
400 static struct device *
device_create(uint64_t base_addr)401 device_create(uint64_t base_addr)
402 {
403 struct device *dev = calloc(sizeof(struct device), 1);
404
405 dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
406 if (dev->fd < 0) {
407 errx(1, "Cannot open MSM fd!");
408 }
409
410 uint64_t va_start, va_size;
411
412 struct drm_msm_param req = {
413 .pipe = MSM_PIPE_3D0,
414 .param = MSM_PARAM_VA_START,
415 };
416
417 int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
418 va_start = req.value;
419
420 if (!ret) {
421 req.param = MSM_PARAM_VA_SIZE;
422 ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
423 va_size = req.value;
424
425 dev->has_set_iova = true;
426 }
427
428 if (ret) {
429 printf("MSM_INFO_SET_IOVA is not supported!\n");
430
431 struct drm_msm_gem_new req_new = {.size = FAKE_ADDRESS_SPACE_SIZE, .flags = MSM_BO_CACHED_COHERENT};
432 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req_new, sizeof(req_new));
433 dev->va_id = req_new.handle;
434
435 struct drm_msm_gem_info req_info = {
436 .handle = req_new.handle,
437 .info = MSM_INFO_GET_IOVA,
438 };
439
440 drmCommandWriteRead(dev->fd,
441 DRM_MSM_GEM_INFO, &req_info, sizeof(req_info));
442 dev->va_iova = req_info.value;
443
444 struct drm_msm_gem_info req_offset = {
445 .handle = req_new.handle,
446 .info = MSM_INFO_GET_OFFSET,
447 };
448
449 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_offset, sizeof(req_offset));
450
451 dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
452 dev->fd, req_offset.value);
453 if (dev->va_map == MAP_FAILED) {
454 err(1, "mmap failure");
455 }
456
457 va_start = dev->va_iova;
458 va_size = FAKE_ADDRESS_SPACE_SIZE;
459
460 printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
461 }
462
463 rb_tree_init(&dev->buffers);
464 util_vma_heap_init(&dev->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
465 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
466 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
467
468 return dev;
469 }
470
471 static void
device_submit_cmdstreams(struct device * dev)472 device_submit_cmdstreams(struct device *dev)
473 {
474 if (!u_vector_length(&dev->cmdstreams)) {
475 device_free_buffers(dev);
476 return;
477 }
478
479 struct drm_msm_gem_submit_cmd cmds[u_vector_length(&dev->cmdstreams)];
480
481 uint32_t idx = 0;
482 struct cmdstream *cmd;
483 u_vector_foreach(cmd, &dev->cmdstreams) {
484 struct buffer *cmdstream_buf = device_get_buffer(dev, cmd->iova);
485
486 uint32_t bo_idx = 0;
487 rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
488 if (buf == cmdstream_buf)
489 break;
490
491 bo_idx++;
492 }
493
494 if (cmdstream_buf)
495 cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP;
496
497 struct drm_msm_gem_submit_cmd *submit_cmd = &cmds[idx];
498 submit_cmd->type = MSM_SUBMIT_CMD_BUF;
499 submit_cmd->submit_idx = bo_idx;
500 if (dev->has_set_iova) {
501 submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova;
502 } else {
503 submit_cmd->submit_offset = cmd->iova - dev->va_iova;
504 }
505 submit_cmd->size = cmd->size;
506 submit_cmd->pad = 0;
507 submit_cmd->nr_relocs = 0;
508 submit_cmd->relocs = 0;
509
510 idx++;
511 }
512
513 uint32_t bo_count = 0;
514 rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
515 if (buf)
516 bo_count++;
517 }
518
519 if (!dev->has_set_iova) {
520 bo_count = 1;
521 }
522
523 struct drm_msm_gem_submit_bo *bo_list =
524 calloc(sizeof(struct drm_msm_gem_submit_bo), bo_count);
525
526 if (dev->has_set_iova) {
527 uint32_t bo_idx = 0;
528 rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
529 struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++];
530 submit_bo->handle = buf->gem_handle;
531 submit_bo->flags =
532 buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
533 submit_bo->presumed = buf->iova;
534
535 buf->flags = 0;
536 }
537 } else {
538 bo_list[0].handle = dev->va_id;
539 bo_list[0].flags =
540 MSM_SUBMIT_BO_DUMP | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
541 bo_list[0].presumed = dev->va_iova;
542 }
543
544 struct drm_msm_gem_submit submit_req = {
545 .flags = MSM_PIPE_3D0,
546 .queueid = 0,
547 .bos = (uint64_t)(uintptr_t)bo_list,
548 .nr_bos = bo_count,
549 .cmds = (uint64_t)(uintptr_t)cmds,
550 .nr_cmds = u_vector_length(&dev->cmdstreams),
551 .in_syncobjs = 0,
552 .out_syncobjs = 0,
553 .nr_in_syncobjs = 0,
554 .nr_out_syncobjs = 0,
555 .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
556 };
557
558 int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_SUBMIT, &submit_req,
559 sizeof(submit_req));
560
561 if (ret) {
562 err(1, "DRM_MSM_GEM_SUBMIT failure %d", ret);
563 }
564
565 /* Wait for submission to complete in order to be sure that
566 * freeing buffers would free their VMAs in the kernel.
567 * Makes sure that new allocations won't clash with old ones.
568 */
569 struct drm_msm_wait_fence wait_req = {
570 .fence = submit_req.fence,
571 .queueid = 0,
572 };
573 get_abs_timeout(&wait_req.timeout, 1000000000);
574
575 ret =
576 drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &wait_req, sizeof(wait_req));
577 if (ret && (ret != -ETIMEDOUT)) {
578 err(1, "DRM_MSM_WAIT_FENCE failure %d", ret);
579 }
580
581 u_vector_finish(&dev->cmdstreams);
582 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
583
584 device_print_shader_log(dev);
585 device_print_cp_log(dev);
586
587 device_dump_wrbuf(dev);
588
589 device_free_buffers(dev);
590 }
591
592 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)593 buffer_mem_alloc(struct device *dev, struct buffer *buf)
594 {
595 util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
596
597 if (!dev->has_set_iova) {
598 uint64_t offset = buf->iova - dev->va_iova;
599 assert(offset < FAKE_ADDRESS_SPACE_SIZE && (offset + buf->size) <= FAKE_ADDRESS_SPACE_SIZE);
600 buf->map = ((uint8_t*)dev->va_map) + offset;
601 return;
602 }
603
604 {
605 struct drm_msm_gem_new req = {.size = buf->size, .flags = MSM_BO_WC};
606
607 int ret =
608 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req, sizeof(req));
609 if (ret) {
610 err(1, "DRM_MSM_GEM_NEW failure %d", ret);
611 }
612
613 buf->gem_handle = req.handle;
614 }
615
616 {
617 struct drm_msm_gem_info req = {
618 .handle = buf->gem_handle,
619 .info = MSM_INFO_SET_IOVA,
620 .value = buf->iova,
621 };
622
623 int ret =
624 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
625
626 if (ret) {
627 err(1, "MSM_INFO_SET_IOVA failure %d", ret);
628 }
629 }
630
631 {
632 struct drm_msm_gem_info req = {
633 .handle = buf->gem_handle,
634 .info = MSM_INFO_GET_OFFSET,
635 };
636
637 int ret =
638 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
639 if (ret) {
640 err(1, "MSM_INFO_GET_OFFSET failure %d", ret);
641 }
642
643 void *map = mmap(0, buf->size, PROT_READ | PROT_WRITE, MAP_SHARED,
644 dev->fd, req.value);
645 if (map == MAP_FAILED) {
646 err(1, "mmap failure");
647 }
648
649 buf->map = map;
650 }
651 }
652
653 void
buffer_mem_free(struct device * dev,struct buffer * buf)654 buffer_mem_free(struct device *dev, struct buffer *buf)
655 {
656 if (dev->has_set_iova) {
657 munmap(buf->map, buf->size);
658
659 struct drm_msm_gem_info req_iova = {
660 .handle = buf->gem_handle,
661 .info = MSM_INFO_SET_IOVA,
662 .value = 0,
663 };
664
665 int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_iova,
666 sizeof(req_iova));
667 if (ret < 0) {
668 err(1, "MSM_INFO_SET_IOVA(0) failed! %d", ret);
669 return;
670 }
671
672 struct drm_gem_close req = {
673 .handle = buf->gem_handle,
674 };
675 drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
676 }
677
678 util_vma_heap_free(&dev->vma, buf->iova, buf->size);
679 }
680
681 #elif FD_REPLAY_KGSL
682 static int
safe_ioctl(int fd,unsigned long request,void * arg)683 safe_ioctl(int fd, unsigned long request, void *arg)
684 {
685 int ret;
686
687 do {
688 ret = ioctl(fd, request, arg);
689 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
690
691 return ret;
692 }
693
694 static struct device *
device_create(uint64_t base_addr)695 device_create(uint64_t base_addr)
696 {
697 struct device *dev = calloc(sizeof(struct device), 1);
698
699 static const char path[] = "/dev/kgsl-3d0";
700
701 dev->fd = open(path, O_RDWR | O_CLOEXEC);
702 if (dev->fd < 0) {
703 errx(1, "Cannot open KGSL fd!");
704 }
705
706 struct kgsl_gpumem_alloc_id req = {
707 .size = FAKE_ADDRESS_SPACE_SIZE,
708 .flags = KGSL_MEMFLAGS_IOCOHERENT,
709 };
710
711 int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
712 if (ret) {
713 err(1, "IOCTL_KGSL_GPUMEM_ALLOC_ID failure");
714 }
715
716 dev->va_id = req.id;
717 dev->va_iova = req.gpuaddr;
718 dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE,
719 MAP_SHARED, dev->fd, req.id << 12);
720
721 rb_tree_init(&dev->buffers);
722 util_vma_heap_init(&dev->vma, req.gpuaddr, ROUND_DOWN_TO(FAKE_ADDRESS_SPACE_SIZE, 4096));
723 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
724
725 struct kgsl_drawctxt_create drawctxt_req = {
726 .flags = KGSL_CONTEXT_SAVE_GMEM |
727 KGSL_CONTEXT_NO_GMEM_ALLOC |
728 KGSL_CONTEXT_PREAMBLE,
729 };
730
731 ret = safe_ioctl(dev->fd, IOCTL_KGSL_DRAWCTXT_CREATE, &drawctxt_req);
732 if (ret) {
733 err(1, "IOCTL_KGSL_DRAWCTXT_CREATE failure");
734 }
735
736 printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
737
738 dev->context_id = drawctxt_req.drawctxt_id;
739
740 return dev;
741 }
742
743 static void
device_submit_cmdstreams(struct device * dev)744 device_submit_cmdstreams(struct device *dev)
745 {
746 if (!u_vector_length(&dev->cmdstreams)) {
747 device_free_buffers(dev);
748 return;
749 }
750
751 struct kgsl_command_object cmds[u_vector_length(&dev->cmdstreams)];
752
753 uint32_t idx = 0;
754 struct cmdstream *cmd;
755 u_vector_foreach(cmd, &dev->cmdstreams) {
756 struct kgsl_command_object *submit_cmd = &cmds[idx++];
757 submit_cmd->gpuaddr = cmd->iova;
758 submit_cmd->size = cmd->size;
759 submit_cmd->flags = KGSL_CMDLIST_IB;
760 submit_cmd->id = dev->va_id;
761 }
762
763 struct kgsl_gpu_command submit_req = {
764 .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
765 .cmdlist = (uintptr_t) &cmds,
766 .cmdsize = sizeof(struct kgsl_command_object),
767 .numcmds = u_vector_length(&dev->cmdstreams),
768 .numsyncs = 0,
769 .context_id = dev->context_id,
770 };
771
772 int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPU_COMMAND, &submit_req);
773
774 if (ret) {
775 err(1, "IOCTL_KGSL_GPU_COMMAND failure %d", ret);
776 }
777
778 struct kgsl_device_waittimestamp_ctxtid wait = {
779 .context_id = dev->context_id,
780 .timestamp = submit_req.timestamp,
781 .timeout = 3000,
782 };
783
784 ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
785
786 if (ret) {
787 err(1, "IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID failure %d", ret);
788 }
789
790 u_vector_finish(&dev->cmdstreams);
791 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
792
793 device_print_shader_log(dev);
794 device_print_cp_log(dev);
795
796 device_dump_wrbuf(dev);
797
798 device_free_buffers(dev);
799 }
800
801 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)802 buffer_mem_alloc(struct device *dev, struct buffer *buf)
803 {
804 util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
805
806 buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
807 }
808
809 void
buffer_mem_free(struct device * dev,struct buffer * buf)810 buffer_mem_free(struct device *dev, struct buffer *buf)
811 {
812 util_vma_heap_free(&dev->vma, buf->iova, buf->size);
813 }
814 #else
815
816 static int
safe_ioctl(int fd,unsigned long request,void * arg)817 safe_ioctl(int fd, unsigned long request, void *arg)
818 {
819 int ret;
820
821 do {
822 ret = ioctl(fd, request, arg);
823 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
824
825 return ret;
826 }
827
828 struct alloc_priv_info {
829 __u32 struct_size;
830 char _pad0[4];
831 __u32 unk0; // 1
832 char _pad1[4];
833 __u64 size;
834 __u32 alignment;
835 char _pad2[20];
836 __u64 allocated_size;
837 __u32 unk1; // 1
838 char _pad4[8]; /* offset: 60*/
839 __u32 unk2; // 61
840 char _pad5[76];
841 __u32 unk3; /* offset: 148 */ // 1
842 char _pad6[8];
843 __u32 unk4; /* offset: 160 */ // 1
844 char _pad7[44];
845 __u32 unk5; /* offset: 208 */ // 3
846 char _pad8[16];
847 __u32 size_2; /* offset: 228 */
848 __u32 unk6; // 1
849 __u32 size_3;
850 __u32 size_4;
851 __u32 unk7; /* offset: 244 */ // 1
852 char _pad9[56];
853 };
854 static_assert(sizeof(struct alloc_priv_info) == 304);
855 static_assert(offsetof(struct alloc_priv_info, unk1) == 56);
856 static_assert(offsetof(struct alloc_priv_info, unk3) == 148);
857 static_assert(offsetof(struct alloc_priv_info, unk5) == 208);
858
859 struct submit_priv_ib_info {
860 char _pad5[4];
861 __u32 size_dwords;
862 __u64 iova;
863 char _pad6[8];
864 } __attribute__((packed));
865
866 struct submit_priv_data {
867 __u32 magic0;
868 char _pad0[4];
869 __u32 struct_size;
870 char _pad1[4];
871 /* It seems that priv data can have several sub-datas
872 * cmdbuf is one of them, after it there is another 8 byte struct
873 * without anything useful in it. That second data doesn't seem
874 * important for replaying.
875 */
876 __u32 datas_count;
877 char _pad2[32];
878 struct {
879 __u32 magic1;
880 __u32 data_size;
881
882 struct {
883 __u32 unk1;
884 __u32 cmdbuf_size;
885 char _pad3[32];
886 __u32 ib_count;
887 char _pad4[36];
888
889 struct submit_priv_ib_info ibs[];
890 } cmdbuf;
891 } data0;
892
893 // unsigned char magic2[8];
894 } __attribute__((packed));
895 static_assert(offsetof(struct submit_priv_data, data0) == 0x34);
896 static_assert(offsetof(struct submit_priv_data, data0.cmdbuf.ibs) == 0x8c);
897
898 static struct device *
device_create(uint64_t base_addr)899 device_create(uint64_t base_addr)
900 {
901 struct device *dev = calloc(sizeof(struct device), 1);
902
903 static const char path[] = "/dev/dxg";
904
905 dev->fd = open(path, O_RDWR | O_CLOEXEC);
906 if (dev->fd < 0) {
907 errx(1, "Cannot open /dev/dxg fd");
908 }
909
910 struct d3dkmt_adapterinfo adapters[1];
911 struct d3dkmt_enumadapters3 enum_adapters = {
912 .adapter_count = 1,
913 .adapters = adapters,
914 };
915 int ret = safe_ioctl(dev->fd, LX_DXENUMADAPTERS3, &enum_adapters);
916 if (ret) {
917 errx(1, "LX_DXENUMADAPTERS3 failure");
918 }
919
920 if (enum_adapters.adapter_count == 0) {
921 errx(1, "No adapters found");
922 }
923
924 struct winluid adapter_luid = enum_adapters.adapters[0].adapter_luid;
925
926 struct d3dkmt_openadapterfromluid open_adapter = {
927 .adapter_luid = adapter_luid,
928 };
929 ret = safe_ioctl(dev->fd, LX_DXOPENADAPTERFROMLUID, &open_adapter);
930 if (ret) {
931 errx(1, "LX_DXOPENADAPTERFROMLUID failure");
932 }
933
934 struct d3dkmthandle adapter = open_adapter.adapter_handle;
935
936 struct d3dkmt_createdevice create_device = {
937 .adapter = adapter,
938 };
939 ret = safe_ioctl(dev->fd, LX_DXCREATEDEVICE, &create_device);
940 if (ret) {
941 errx(1, "LX_DXCREATEDEVICE failure");
942 }
943
944 struct d3dkmthandle device = create_device.device;
945 dev->device = device;
946
947 unsigned char create_context_priv_data[] = {
948 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
949 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
950 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00,
951 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
952 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
953 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
954 };
955
956 struct d3dkmt_createcontextvirtual create_context = {
957 .device = device,
958 .node_ordinal = 0,
959 .engine_affinity = 1,
960 .priv_drv_data = create_context_priv_data,
961 .priv_drv_data_size = sizeof(create_context_priv_data),
962 .client_hint = 16,
963 };
964 ret = safe_ioctl(dev->fd, LX_DXCREATECONTEXTVIRTUAL, &create_context);
965 if (ret) {
966 errx(1, "LX_DXCREATECONTEXTVIRTUAL failure");
967 }
968
969 dev->context = create_context.context;
970
971 struct d3dkmt_createpagingqueue create_paging_queue = {
972 .device = device,
973 .priority = _D3DDDI_PAGINGQUEUE_PRIORITY_NORMAL,
974 .physical_adapter_index = 0,
975 };
976 ret = safe_ioctl(dev->fd, LX_DXCREATEPAGINGQUEUE, &create_paging_queue);
977 if (ret) {
978 errx(1, "LX_DXCREATEPAGINGQUEUE failure");
979 }
980 struct d3dkmthandle paging_queue = create_paging_queue.paging_queue;
981
982
983 uint32_t alloc_size = FAKE_ADDRESS_SPACE_SIZE;
984 struct alloc_priv_info priv_alloc_info = {
985 .struct_size = sizeof(struct alloc_priv_info),
986 .unk0 = 1,
987 .size = alloc_size,
988 .alignment = 4096,
989 .unk1 = 1,
990 .unk2 = 61,
991 .unk3 = 1,
992 .unk4 = 1,
993 .unk5 = 3,
994 .size_2 = alloc_size,
995 .unk6 = 1,
996 .size_3 = alloc_size,
997 .size_4 = alloc_size,
998 .unk7 = 1,
999 };
1000
1001 struct d3dddi_allocationinfo2 alloc_info = {
1002 .priv_drv_data = &priv_alloc_info,
1003 .priv_drv_data_size = sizeof(struct alloc_priv_info),
1004 };
1005
1006 struct d3dkmt_createallocation create_allocation = {
1007 .device = device,
1008 .alloc_count = 1,
1009 .allocation_info = &alloc_info,
1010 };
1011 ret = safe_ioctl(dev->fd, LX_DXCREATEALLOCATION, &create_allocation);
1012 if (ret) {
1013 errx(1, "LX_DXCREATEALLOCATION failure");
1014 }
1015
1016 assert(priv_alloc_info.allocated_size == alloc_size);
1017
1018 struct d3dddi_mapgpuvirtualaddress map_virtual_address = {
1019 .paging_queue = paging_queue,
1020 .base_address = base_addr,
1021 .maximum_address = 18446744073709551615ull,
1022 .allocation = create_allocation.allocation_info[0].allocation,
1023 .size_in_pages = MAX2(alloc_size / 4096, 1),
1024 .protection = {
1025 .write = 1,
1026 .execute = 1,
1027 },
1028 };
1029 ret = safe_ioctl(dev->fd, LX_DXMAPGPUVIRTUALADDRESS, &map_virtual_address);
1030 if (ret != 259) {
1031 errx(1, "LX_DXMAPGPUVIRTUALADDRESS failure");
1032 }
1033
1034 __u32 priority = 0;
1035 struct d3dddi_makeresident make_resident = {
1036 .paging_queue = paging_queue,
1037 .alloc_count = 1,
1038 .allocation_list = &create_allocation.allocation_info[0].allocation,
1039 .priority_list = &priority,
1040 };
1041 ret = safe_ioctl(dev->fd, LX_DXMAKERESIDENT, &make_resident);
1042 if (ret != 259) {
1043 errx(1, "LX_DXMAKERESIDENT failure");
1044 }
1045
1046 struct d3dkmt_lock2 lock = {
1047 .device = device,
1048 .allocation = create_allocation.allocation_info[0].allocation,
1049 };
1050 ret = safe_ioctl(dev->fd, LX_DXLOCK2, &lock);
1051 if (ret) {
1052 errx(1, "LX_DXLOCK2 failure");
1053 }
1054
1055 dev->va_iova = map_virtual_address.virtual_address;
1056 dev->va_map = lock.data;
1057
1058 rb_tree_init(&dev->buffers);
1059 util_vma_heap_init(&dev->vma, dev->va_iova, ROUND_DOWN_TO(alloc_size, 4096));
1060 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1061
1062 printf("Allocated iova at 0x%" PRIx64 "\n", dev->va_iova);
1063
1064 uint64_t hole_size = 4096;
1065 dev->vma.alloc_high = true;
1066 dev->fence_iova = util_vma_heap_alloc(&dev->vma, hole_size, 4096);
1067 dev->fence_ib_iova = dev->fence_iova + 8;
1068 dev->fence = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_iova - dev->va_iova));
1069 dev->fence_ib = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_ib_iova - dev->va_iova));
1070 dev->vma.alloc_high = false;
1071
1072 return dev;
1073 }
1074
1075 static void
device_submit_cmdstreams(struct device * dev)1076 device_submit_cmdstreams(struct device *dev)
1077 {
1078 if (!u_vector_length(&dev->cmdstreams)) {
1079 device_free_buffers(dev);
1080 return;
1081 }
1082
1083 uint32_t cmdstream_count = u_vector_length(&dev->cmdstreams) + 1;
1084
1085 uint32_t priv_data_size =
1086 sizeof(struct submit_priv_data) +
1087 cmdstream_count * sizeof(struct submit_priv_ib_info);
1088
1089 struct submit_priv_data *priv_data = calloc(1, priv_data_size);
1090 priv_data->magic0 = 0xccaabbee;
1091 priv_data->struct_size = priv_data_size;
1092 priv_data->datas_count = 1;
1093
1094 priv_data->data0.magic1 = 0xfadcab02;
1095 priv_data->data0.data_size =
1096 sizeof(priv_data->data0) +
1097 cmdstream_count * sizeof(struct submit_priv_ib_info);
1098 priv_data->data0.cmdbuf.unk1 = 0xcccc0001;
1099 priv_data->data0.cmdbuf.cmdbuf_size = sizeof(priv_data->data0.cmdbuf) +
1100 cmdstream_count * sizeof(struct submit_priv_ib_info);
1101 priv_data->data0.cmdbuf.ib_count = cmdstream_count;
1102
1103 struct cmdstream *cmd;
1104 uint32_t idx = 0;
1105 u_vector_foreach(cmd, &dev->cmdstreams) {
1106 priv_data->data0.cmdbuf.ibs[idx].size_dwords = cmd->size / 4;
1107 priv_data->data0.cmdbuf.ibs[idx].iova = cmd->iova;
1108 idx++;
1109 }
1110
1111 priv_data->data0.cmdbuf.ibs[idx].size_dwords = 4;
1112 priv_data->data0.cmdbuf.ibs[idx].iova = dev->fence_ib_iova;
1113
1114 *dev->fence = 0x00000000;
1115 dev->fence_ib[0] = pm4_pkt7_hdr(0x3d, 3); // CP_MEM_WRITE
1116 dev->fence_ib[1] = dev->fence_iova;
1117 dev->fence_ib[2] = dev->fence_iova >> 32;
1118 dev->fence_ib[3] = 0xababfcfc;
1119
1120 // Fill second (empty) data block
1121 // uint32_t *magic_end = (uint32_t *)(((char *) priv_data) + priv_data_size - 8);
1122 // magic_end[0] = 0xfadcab00;
1123 // magic_end[1] = 0x00000008;
1124
1125 struct d3dkmt_submitcommand submission = {
1126 .command_buffer = priv_data->data0.cmdbuf.ibs[0].iova,
1127 .command_length = priv_data->data0.cmdbuf.ibs[0].size_dwords * sizeof(uint32_t),
1128 .broadcast_context_count = 1,
1129 .broadcast_context[0] = dev->context,
1130 .priv_drv_data_size = priv_data_size,
1131 .priv_drv_data = priv_data,
1132 };
1133
1134 int ret = safe_ioctl(dev->fd, LX_DXSUBMITCOMMAND, &submission);
1135 if (ret) {
1136 errx(1, "LX_DXSUBMITCOMMAND failure");
1137 }
1138
1139 free(priv_data);
1140
1141 u_vector_finish(&dev->cmdstreams);
1142 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1143
1144 // TODO: better way to wait
1145 for (unsigned i = 0; i < 1000; i++) {
1146 usleep(1000);
1147 if (*dev->fence != 0)
1148 break;
1149 }
1150 if (*dev->fence == 0) {
1151 errx(1, "Waiting for submission failed! GPU faulted or kernel did not execute this submission.");
1152 }
1153
1154 device_print_shader_log(dev);
1155 device_print_cp_log(dev);
1156
1157 device_dump_wrbuf(dev);
1158
1159 device_free_buffers(dev);
1160 }
1161
1162 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)1163 buffer_mem_alloc(struct device *dev, struct buffer *buf)
1164 {
1165 util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
1166
1167 buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
1168 }
1169
1170 void
buffer_mem_free(struct device * dev,struct buffer * buf)1171 buffer_mem_free(struct device *dev, struct buffer *buf)
1172 {
1173 util_vma_heap_free(&dev->vma, buf->iova, buf->size);
1174 }
1175
1176 #endif
1177
1178 static void
upload_buffer(struct device * dev,uint64_t iova,unsigned int size,void * hostptr)1179 upload_buffer(struct device *dev, uint64_t iova, unsigned int size,
1180 void *hostptr)
1181 {
1182 struct buffer *buf = device_get_buffer(dev, iova);
1183
1184 if (!buf) {
1185 buf = calloc(sizeof(struct buffer), 1);
1186 buf->iova = iova;
1187 buf->size = size;
1188
1189 rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp);
1190
1191 buffer_mem_alloc(dev, buf);
1192 } else if (buf->size != size) {
1193 buffer_mem_free(dev, buf);
1194 buf->size = size;
1195 buffer_mem_alloc(dev, buf);
1196 }
1197
1198 memcpy(buf->map, hostptr, size);
1199
1200 buf->used = true;
1201 }
1202
1203 static int
override_cmdstream(struct device * dev,struct cmdstream * cs,const char * cmdstreamgen)1204 override_cmdstream(struct device *dev, struct cmdstream *cs,
1205 const char *cmdstreamgen)
1206 {
1207 #if FD_REPLAY_KGSL
1208 static const char *tmpfilename = "/sdcard/Download/cmdstream_override.rd";
1209 #elif FD_REPLAY_MSM || FD_REPLAY_WSL
1210 static const char *tmpfilename = "/tmp/cmdstream_override.rd";
1211 #endif
1212
1213
1214 /* Find a free space for the new cmdstreams and resources we will use
1215 * when overriding existing cmdstream.
1216 */
1217 /* TODO: should the size be configurable? */
1218 uint64_t hole_size = 32 * 1024 * 1024;
1219 dev->vma.alloc_high = true;
1220 uint64_t hole_iova = util_vma_heap_alloc(&dev->vma, hole_size, 4096);
1221 dev->vma.alloc_high = false;
1222 util_vma_heap_free(&dev->vma, hole_iova, hole_size);
1223
1224 char cmd[2048];
1225 snprintf(cmd, sizeof(cmd),
1226 "%s --vastart=%" PRIu64 " --vasize=%" PRIu64 " %s", cmdstreamgen,
1227 hole_iova, hole_size, tmpfilename);
1228
1229 printf("generating cmdstream '%s'\n", cmd);
1230
1231 int ret = system(cmd);
1232 if (ret) {
1233 fprintf(stderr, "Error executing %s\n", cmd);
1234 return -1;
1235 }
1236
1237 struct io *io;
1238 struct rd_parsed_section ps = {0};
1239
1240 io = io_open(tmpfilename);
1241 if (!io) {
1242 fprintf(stderr, "could not open: %s\n", tmpfilename);
1243 return -1;
1244 }
1245
1246 struct {
1247 unsigned int len;
1248 uint64_t gpuaddr;
1249 } gpuaddr = {0};
1250
1251 while (parse_rd_section(io, &ps)) {
1252 switch (ps.type) {
1253 case RD_GPUADDR:
1254 parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1255 /* no-op */
1256 break;
1257 case RD_BUFFER_CONTENTS:
1258 upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1259 ps.buf = NULL;
1260 break;
1261 case RD_CMDSTREAM_ADDR: {
1262 unsigned int sizedwords;
1263 uint64_t gpuaddr;
1264 parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1265 printf("override cmdstream: %d dwords\n", sizedwords);
1266
1267 cs->iova = gpuaddr;
1268 cs->size = sizedwords * sizeof(uint32_t);
1269 break;
1270 }
1271 case RD_SHADER_LOG_BUFFER: {
1272 unsigned int sizedwords;
1273 parse_addr(ps.buf, ps.sz, &sizedwords, &dev->shader_log_iova);
1274 break;
1275 }
1276 case RD_CP_LOG_BUFFER: {
1277 unsigned int sizedwords;
1278 parse_addr(ps.buf, ps.sz, &sizedwords, &dev->cp_log_iova);
1279 break;
1280 }
1281 case RD_WRBUFFER: {
1282 struct wrbuf *wrbuf = u_vector_add(&dev->wrbufs);
1283 uint64_t *p = (uint64_t *)ps.buf;
1284 wrbuf->iova = p[0];
1285 wrbuf->size = p[1];
1286 wrbuf->name = calloc(1, p[2]);
1287 memcpy(wrbuf->name, (char *)ps.buf + 3 * sizeof(uint64_t), p[2]);
1288 break;
1289 }
1290 default:
1291 break;
1292 }
1293 }
1294
1295 io_close(io);
1296 if (ps.ret < 0) {
1297 fprintf(stderr, "corrupt file %s\n", tmpfilename);
1298 }
1299
1300 return ps.ret;
1301 }
1302
1303 static int
handle_file(const char * filename,uint32_t first_submit,uint32_t last_submit,uint32_t submit_to_override,uint64_t base_addr,const char * cmdstreamgen)1304 handle_file(const char *filename, uint32_t first_submit, uint32_t last_submit,
1305 uint32_t submit_to_override, uint64_t base_addr, const char *cmdstreamgen)
1306 {
1307 struct io *io;
1308 int submit = 0;
1309 bool skip = false;
1310 bool need_submit = false;
1311 struct rd_parsed_section ps = {0};
1312
1313 printf("Reading %s...\n", filename);
1314
1315 if (!strcmp(filename, "-"))
1316 io = io_openfd(0);
1317 else
1318 io = io_open(filename);
1319
1320 if (!io) {
1321 fprintf(stderr, "could not open: %s\n", filename);
1322 return -1;
1323 }
1324
1325 struct device *dev = device_create(base_addr);
1326
1327 struct {
1328 unsigned int len;
1329 uint64_t gpuaddr;
1330 } gpuaddr = {0};
1331
1332 while (parse_rd_section(io, &ps)) {
1333 switch (ps.type) {
1334 case RD_TEST:
1335 case RD_VERT_SHADER:
1336 case RD_FRAG_SHADER:
1337 /* no-op */
1338 break;
1339 case RD_CMD:
1340 skip = false;
1341 if (exename) {
1342 skip |= (strstr(ps.buf, exename) != ps.buf);
1343 } else {
1344 skip |= (strstr(ps.buf, "fdperf") == ps.buf);
1345 skip |= (strstr(ps.buf, "chrome") == ps.buf);
1346 skip |= (strstr(ps.buf, "surfaceflinger") == ps.buf);
1347 skip |= ((char *)ps.buf)[0] == 'X';
1348 }
1349 break;
1350
1351 case RD_GPUADDR:
1352 if (need_submit) {
1353 need_submit = false;
1354 device_submit_cmdstreams(dev);
1355 }
1356
1357 parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1358 /* no-op */
1359 break;
1360 case RD_BUFFER_CONTENTS:
1361 /* TODO: skip buffer uploading and even reading if this buffer
1362 * is used for submit outside of [first_submit, last_submit]
1363 * range. A set of buffers is shared between several cmdstreams,
1364 * so we'd have to find starting from which RD_CMD to upload
1365 * the buffers.
1366 */
1367 upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1368 break;
1369 case RD_CMDSTREAM_ADDR: {
1370 unsigned int sizedwords;
1371 uint64_t gpuaddr;
1372 parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1373
1374 bool add_submit = !skip && (submit >= first_submit) && (submit <= last_submit);
1375 printf("%scmdstream %d: %d dwords\n", add_submit ? "" : "skipped ",
1376 submit, sizedwords);
1377
1378 if (add_submit) {
1379 struct cmdstream *cs = u_vector_add(&dev->cmdstreams);
1380
1381 if (submit == submit_to_override) {
1382 if (override_cmdstream(dev, cs, cmdstreamgen) < 0)
1383 break;
1384 } else {
1385 cs->iova = gpuaddr;
1386 cs->size = sizedwords * sizeof(uint32_t);
1387 }
1388 }
1389
1390 need_submit = true;
1391
1392 submit++;
1393 break;
1394 }
1395 case RD_GPU_ID: {
1396 uint32_t gpu_id = parse_gpu_id(ps.buf);
1397 if (gpu_id)
1398 printf("gpuid: %d\n", gpu_id);
1399 break;
1400 }
1401 case RD_CHIP_ID: {
1402 uint64_t chip_id = parse_chip_id(ps.buf);
1403 printf("chip_id: 0x%" PRIx64 "\n", chip_id);
1404 break;
1405 }
1406 default:
1407 break;
1408 }
1409 }
1410
1411 if (need_submit)
1412 device_submit_cmdstreams(dev);
1413
1414 close(dev->fd);
1415
1416 io_close(io);
1417 fflush(stdout);
1418
1419 if (ps.ret < 0) {
1420 printf("corrupt file\n");
1421 }
1422 return 0;
1423 }
1424