1 /*
2 * Copyright © 2022 Igalia S.L.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <assert.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <getopt.h>
12 #include <inttypes.h>
13 #include <signal.h>
14 #include <stdarg.h>
15 #include <stdbool.h>
16 #include <stdint.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <unistd.h>
21 #include <libgen.h>
22 #if FD_REPLAY_KGSL
23 #define __user
24 #include "../vulkan/msm_kgsl.h"
25 #elif FD_REPLAY_MSM
26 #include <xf86drm.h>
27 #include "drm-uapi/msm_drm.h"
28 #elif FD_REPLAY_WSL
29 #define __KERNEL__
30 #include "drm-uapi/d3dkmthk.h"
31 #endif
32
33 #include <sys/ioctl.h>
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38
39 #include "util/os_time.h"
40 #include "util/rb_tree.h"
41 #include "util/u_vector.h"
42 #include "util/vma.h"
43 #include "buffers.h"
44 #include "cffdec.h"
45 #include "io.h"
46 #include "redump.h"
47 #include "rdutil.h"
48
49 /**
50 * Replay command stream obtained from:
51 * - /sys/kernel/debug/dri/0/rd
52 * - /sys/kernel/debug/dri/0/hangrd
53 * !!! Command stream capture should be done with ALL buffers:
54 * - echo 1 > /sys/module/msm/parameters/rd_full
55 *
56 * Requires kernel with MSM_INFO_SET_IOVA support.
57 * In case userspace IOVAs are not supported, like on KGSL, we have to
58 * pre-allocate a single buffer and hope it always allocated starting
59 * from the same address.
60 *
61 * TODO: Misrendering, would require marking framebuffer images
62 * at each renderpass in order to fetch and decode them.
63 *
64 * Code from Freedreno/Turnip is not re-used here since the relevant
65 * pieces may introduce additional allocations which cannot be allowed
66 * during the replay.
67 *
68 * For how-to see freedreno.rst
69 */
70
71 static const char *exename = NULL;
72
73 static const uint64_t FAKE_ADDRESS_SPACE_SIZE = 1024 * 1024 * 1024;
74
75 static int handle_file(const char *filename, uint32_t first_submit,
76 uint32_t last_submit, uint32_t submit_to_override,
77 uint64_t base_addr, const char *cmdstreamgen);
78
79 static void
print_usage(const char * name,const char * default_csgen)80 print_usage(const char *name, const char *default_csgen)
81 {
82 /* clang-format off */
83 fprintf(stderr, "Usage:\n\n"
84 "\t%s [OPTIONS]... FILE...\n\n"
85 "Options:\n"
86 "\t-e, --exe=NAME - only use cmdstream from named process\n"
87 "\t-o --override=submit - № of the submit to override\n"
88 "\t-g --generator=path - executable which generate cmdstream for override (default: %s)\n"
89 "\t-f --first=submit - first submit № to replay\n"
90 "\t-l --last=submit - last submit № to replay\n"
91 "\t-a --address=address - base iova address on WSL\n"
92 "\t-h, --help - show this message\n"
93 , name, default_csgen);
94 /* clang-format on */
95 exit(2);
96 }
97
98 /* clang-format off */
99 static const struct option opts[] = {
100 { "exe", required_argument, 0, 'e' },
101 { "override", required_argument, 0, 'o' },
102 { "generator", required_argument, 0, 'g' },
103 { "first", required_argument, 0, 'f' },
104 { "last", required_argument, 0, 'l' },
105 { "address", required_argument, 0, 'a' },
106 { "help", no_argument, 0, 'h' },
107 };
108 /* clang-format on */
109
110 int
main(int argc,char ** argv)111 main(int argc, char **argv)
112 {
113 int ret = -1;
114 int c;
115
116 uint32_t submit_to_override = -1;
117 uint32_t first_submit = 0;
118 uint32_t last_submit = -1;
119 uint64_t base_addr = 0;
120
121 char *default_csgen = malloc(PATH_MAX);
122 snprintf(default_csgen, PATH_MAX, "%s/generate_rd", dirname(argv[0]));
123
124 const char *csgen = default_csgen;
125
126 while ((c = getopt_long(argc, argv, "e:o:g:f:l:a:h", opts, NULL)) != -1) {
127 switch (c) {
128 case 0:
129 /* option that set a flag, nothing to do */
130 break;
131 case 'e':
132 exename = optarg;
133 break;
134 case 'o':
135 submit_to_override = strtoul(optarg, NULL, 0);
136 break;
137 case 'g':
138 csgen = optarg;
139 break;
140 case 'f':
141 first_submit = strtoul(optarg, NULL, 0);
142 break;
143 case 'l':
144 last_submit = strtoul(optarg, NULL, 0);
145 break;
146 case 'a':
147 base_addr = strtoull(optarg, NULL, 0);
148 break;
149 case 'h':
150 default:
151 print_usage(argv[0], default_csgen);
152 }
153 }
154
155 while (optind < argc) {
156 ret = handle_file(argv[optind], first_submit, last_submit,
157 submit_to_override, base_addr, csgen);
158 if (ret) {
159 fprintf(stderr, "error reading: %s\n", argv[optind]);
160 fprintf(stderr, "continuing..\n");
161 }
162 optind++;
163 }
164
165 if (ret)
166 print_usage(argv[0], default_csgen);
167
168 return ret;
169 }
170
171 struct buffer {
172 struct rb_node node;
173
174 uint32_t gem_handle;
175 uint64_t size;
176 uint64_t iova;
177 void *map;
178
179 bool used;
180 uint32_t flags;
181 };
182
183 struct cmdstream {
184 uint64_t iova;
185 uint64_t size;
186 };
187
188 struct wrbuf {
189 uint64_t iova;
190 uint64_t size;
191 char* name;
192 };
193
194 struct device {
195 int fd;
196
197 struct rb_tree buffers;
198 struct util_vma_heap vma;
199
200 struct u_vector cmdstreams;
201
202 uint64_t shader_log_iova;
203 uint64_t cp_log_iova;
204
205 bool has_set_iova;
206
207 uint32_t va_id;
208 void *va_map;
209 uint64_t va_iova;
210
211 struct u_vector wrbufs;
212
213 #ifdef FD_REPLAY_MSM
214 uint32_t queue_id;
215 #endif
216
217 #ifdef FD_REPLAY_KGSL
218 uint32_t context_id;
219 #endif
220
221 #ifdef FD_REPLAY_WSL
222 struct d3dkmthandle device;
223 struct d3dkmthandle context;
224
225 /* We don't know at the moment a good way to wait for submission to complete
226 * on WSL, so we could use our own fences.
227 */
228 uint64_t fence_iova;
229 uint64_t fence_ib_iova;
230 volatile uint32_t *fence;
231 uint32_t *fence_ib;
232 #endif
233 };
234
235 void buffer_mem_free(struct device *dev, struct buffer *buf);
236
237 static int
rb_buffer_insert_cmp(const struct rb_node * n1,const struct rb_node * n2)238 rb_buffer_insert_cmp(const struct rb_node *n1, const struct rb_node *n2)
239 {
240 const struct buffer *buf1 = (const struct buffer *)n1;
241 const struct buffer *buf2 = (const struct buffer *)n2;
242 /* Note that gpuaddr comparisions can overflow an int: */
243 if (buf1->iova > buf2->iova)
244 return 1;
245 else if (buf1->iova < buf2->iova)
246 return -1;
247 return 0;
248 }
249
250 static int
rb_buffer_search_cmp(const struct rb_node * node,const void * addrptr)251 rb_buffer_search_cmp(const struct rb_node *node, const void *addrptr)
252 {
253 const struct buffer *buf = (const struct buffer *)node;
254 uint64_t iova = *(uint64_t *)addrptr;
255 if (buf->iova + buf->size <= iova)
256 return -1;
257 else if (buf->iova > iova)
258 return 1;
259 return 0;
260 }
261
262 static struct buffer *
device_get_buffer(struct device * dev,uint64_t iova)263 device_get_buffer(struct device *dev, uint64_t iova)
264 {
265 if (iova == 0)
266 return NULL;
267 return (struct buffer *)rb_tree_search(&dev->buffers, &iova,
268 rb_buffer_search_cmp);
269 }
270
271 static void
device_mark_buffers(struct device * dev)272 device_mark_buffers(struct device *dev)
273 {
274 rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
275 buf->used = false;
276 }
277 }
278
279 static void
device_free_buffers(struct device * dev)280 device_free_buffers(struct device *dev)
281 {
282 rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
283 buffer_mem_free(dev, buf);
284 rb_tree_remove(&dev->buffers, &buf->node);
285 free(buf);
286 }
287 }
288
289 static void
device_print_shader_log(struct device * dev)290 device_print_shader_log(struct device *dev)
291 {
292 struct shader_log {
293 uint64_t cur_iova;
294 union {
295 uint32_t entries_u32[0];
296 float entries_float[0];
297 };
298 };
299
300 if (dev->shader_log_iova != 0)
301 {
302 struct buffer *buf = device_get_buffer(dev, dev->shader_log_iova);
303 if (buf) {
304 struct shader_log *log = buf->map + (dev->shader_log_iova - buf->iova);
305 uint32_t count = (log->cur_iova - dev->shader_log_iova -
306 offsetof(struct shader_log, entries_u32)) / 4;
307
308 printf("Shader Log Entries: %u\n", count);
309
310 for (uint32_t i = 0; i < count; i++) {
311 printf("[%u] %08x %.4f\n", i, log->entries_u32[i],
312 log->entries_float[i]);
313 }
314
315 printf("========================================\n");
316 }
317 }
318 }
319
320 static void
device_print_cp_log(struct device * dev)321 device_print_cp_log(struct device *dev)
322 {
323 struct cp_log {
324 uint64_t cur_iova;
325 uint64_t tmp;
326 uint64_t first_entry_size;
327 };
328
329 struct cp_log_entry {
330 uint64_t size;
331 uint32_t data[0];
332 };
333
334 if (dev->cp_log_iova == 0)
335 return;
336
337 struct buffer *buf = device_get_buffer(dev, dev->cp_log_iova);
338 if (!buf)
339 return;
340
341 struct cp_log *log = buf->map + (dev->cp_log_iova - buf->iova);
342 if (log->first_entry_size == 0)
343 return;
344
345 struct cp_log_entry *log_entry =
346 buf->map + offsetof(struct cp_log, first_entry_size);
347 uint32_t idx = 0;
348 while (log_entry->size != 0) {
349 printf("\nCP Log [%u]:\n", idx++);
350 uint32_t dwords = log_entry->size / 4;
351
352 for (uint32_t i = 0; i < dwords; i++) {
353 if (i % 8 == 0)
354 printf("\t");
355 printf("%08x ", log_entry->data[i]);
356 if (i % 8 == 7)
357 printf("\n");
358 }
359 printf("\n");
360
361 log_entry = (void *)log_entry + log_entry->size +
362 offsetof(struct cp_log_entry, data);
363 }
364 }
365
366 static void
device_dump_wrbuf(struct device * dev)367 device_dump_wrbuf(struct device *dev)
368 {
369 if (!u_vector_length(&dev->wrbufs))
370 return;
371
372 char buffer_dir[PATH_MAX];
373 getcwd(buffer_dir, sizeof(buffer_dir));
374 strcat(buffer_dir, "/buffers");
375 rmdir(buffer_dir);
376 mkdir(buffer_dir, 0777);
377
378 struct wrbuf *wrbuf;
379 u_vector_foreach(wrbuf, &dev->wrbufs) {
380 char buffer_path[PATH_MAX];
381 snprintf(buffer_path, sizeof(buffer_path), "%s/%s", buffer_dir, wrbuf->name);
382 FILE *f = fopen(buffer_path, "wb");
383 if (!f) {
384 fprintf(stderr, "Error opening %s\n", buffer_path);
385 goto end_it;
386 }
387
388 struct buffer *buf = device_get_buffer(dev, wrbuf->iova);
389 if (!buf) {
390 fprintf(stderr, "Error getting buffer for %s\n", buffer_path);
391 goto end_it;
392 }
393
394 uint64_t offset = wrbuf->iova - buf->iova;
395 uint64_t size = MIN2(wrbuf->size, buf->size - offset);
396 if (size != wrbuf->size) {
397 fprintf(stderr, "Warning: Clamping buffer %s as it's smaller than expected (0x%" PRIx64 " < 0x%" PRIx64 ")\n", wrbuf->name, size, wrbuf->size);
398 }
399
400 printf("Dumping %s (0x%" PRIx64 " - 0x%" PRIx64 ")\n", wrbuf->name, wrbuf->iova, wrbuf->iova + size);
401
402 fwrite(buf->map + offset, size, 1, f);
403
404 end_it:
405 fclose(f);
406 }
407 }
408
409 #if FD_REPLAY_MSM
410 static inline void
get_abs_timeout(struct drm_msm_timespec * tv,uint64_t ns)411 get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
412 {
413 struct timespec t;
414 clock_gettime(CLOCK_MONOTONIC, &t);
415 tv->tv_sec = t.tv_sec + ns / 1000000000;
416 tv->tv_nsec = t.tv_nsec + ns % 1000000000;
417 }
418
419 static struct device *
device_create(uint64_t base_addr)420 device_create(uint64_t base_addr)
421 {
422 struct device *dev = calloc(sizeof(struct device), 1);
423
424 dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
425 if (dev->fd < 0) {
426 errx(1, "Cannot open MSM fd!");
427 }
428
429 uint64_t va_start, va_size;
430
431 struct drm_msm_param req = {
432 .pipe = MSM_PIPE_3D0,
433 .param = MSM_PARAM_VA_START,
434 };
435
436 int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
437 va_start = req.value;
438
439 if (!ret) {
440 req.param = MSM_PARAM_VA_SIZE;
441 ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
442 va_size = req.value;
443
444 dev->has_set_iova = true;
445 }
446
447 if (ret) {
448 printf("MSM_INFO_SET_IOVA is not supported!\n");
449
450 struct drm_msm_gem_new req_new = {.size = FAKE_ADDRESS_SPACE_SIZE, .flags = MSM_BO_CACHED_COHERENT};
451 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req_new, sizeof(req_new));
452 dev->va_id = req_new.handle;
453
454 struct drm_msm_gem_info req_info = {
455 .handle = req_new.handle,
456 .info = MSM_INFO_GET_IOVA,
457 };
458
459 drmCommandWriteRead(dev->fd,
460 DRM_MSM_GEM_INFO, &req_info, sizeof(req_info));
461 dev->va_iova = req_info.value;
462
463 struct drm_msm_gem_info req_offset = {
464 .handle = req_new.handle,
465 .info = MSM_INFO_GET_OFFSET,
466 };
467
468 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_offset, sizeof(req_offset));
469
470 dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
471 dev->fd, req_offset.value);
472 if (dev->va_map == MAP_FAILED) {
473 err(1, "mmap failure");
474 }
475
476 va_start = dev->va_iova;
477 va_size = FAKE_ADDRESS_SPACE_SIZE;
478
479 printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
480 }
481
482 struct drm_msm_submitqueue req_queue = {
483 .flags = 0,
484 .prio = 0,
485 };
486
487 ret = drmCommandWriteRead(dev->fd, DRM_MSM_SUBMITQUEUE_NEW, &req_queue,
488 sizeof(req_queue));
489 if (ret) {
490 err(1, "DRM_MSM_SUBMITQUEUE_NEW failure");
491 }
492
493 dev->queue_id = req_queue.id;
494
495 rb_tree_init(&dev->buffers);
496 util_vma_heap_init(&dev->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
497 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
498 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
499
500 return dev;
501 }
502
503 static void
device_submit_cmdstreams(struct device * dev)504 device_submit_cmdstreams(struct device *dev)
505 {
506 if (!u_vector_length(&dev->cmdstreams)) {
507 device_free_buffers(dev);
508 return;
509 }
510
511 struct drm_msm_gem_submit_cmd cmds[u_vector_length(&dev->cmdstreams)];
512
513 uint32_t idx = 0;
514 struct cmdstream *cmd;
515 u_vector_foreach(cmd, &dev->cmdstreams) {
516 struct buffer *cmdstream_buf = device_get_buffer(dev, cmd->iova);
517
518 uint32_t bo_idx = 0;
519 rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
520 if (buf == cmdstream_buf)
521 break;
522
523 bo_idx++;
524 }
525
526 if (cmdstream_buf)
527 cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP;
528
529 struct drm_msm_gem_submit_cmd *submit_cmd = &cmds[idx];
530 submit_cmd->type = MSM_SUBMIT_CMD_BUF;
531 submit_cmd->submit_idx = dev->has_set_iova ? bo_idx : 0;
532 if (dev->has_set_iova) {
533 submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova;
534 } else {
535 submit_cmd->submit_offset = cmd->iova - dev->va_iova;
536 }
537 submit_cmd->size = cmd->size;
538 submit_cmd->pad = 0;
539 submit_cmd->nr_relocs = 0;
540 submit_cmd->relocs = 0;
541
542 idx++;
543 }
544
545 uint32_t bo_count = 0;
546 rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
547 if (buf)
548 bo_count++;
549 }
550
551 if (!dev->has_set_iova) {
552 bo_count = 1;
553 }
554
555 struct drm_msm_gem_submit_bo *bo_list =
556 calloc(sizeof(struct drm_msm_gem_submit_bo), bo_count);
557
558 if (dev->has_set_iova) {
559 uint32_t bo_idx = 0;
560 rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
561 struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++];
562 submit_bo->handle = buf->gem_handle;
563 submit_bo->flags =
564 buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
565 submit_bo->presumed = buf->iova;
566
567 buf->flags = 0;
568 }
569 } else {
570 bo_list[0].handle = dev->va_id;
571 bo_list[0].flags =
572 MSM_SUBMIT_BO_DUMP | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
573 bo_list[0].presumed = dev->va_iova;
574 }
575
576 struct drm_msm_gem_submit submit_req = {
577 .flags = MSM_PIPE_3D0,
578 .queueid = dev->queue_id,
579 .bos = (uint64_t)(uintptr_t)bo_list,
580 .nr_bos = bo_count,
581 .cmds = (uint64_t)(uintptr_t)cmds,
582 .nr_cmds = u_vector_length(&dev->cmdstreams),
583 .in_syncobjs = 0,
584 .out_syncobjs = 0,
585 .nr_in_syncobjs = 0,
586 .nr_out_syncobjs = 0,
587 .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
588 };
589
590 int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_SUBMIT, &submit_req,
591 sizeof(submit_req));
592
593 if (ret) {
594 err(1, "DRM_MSM_GEM_SUBMIT failure %d", ret);
595 }
596
597 /* Wait for submission to complete in order to be sure that
598 * freeing buffers would free their VMAs in the kernel.
599 * Makes sure that new allocations won't clash with old ones.
600 */
601 struct drm_msm_wait_fence wait_req = {
602 .fence = submit_req.fence,
603 .queueid = dev->queue_id,
604 };
605 get_abs_timeout(&wait_req.timeout, 1000000000);
606
607 ret =
608 drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &wait_req, sizeof(wait_req));
609 if (ret && (ret != -ETIMEDOUT)) {
610 err(1, "DRM_MSM_WAIT_FENCE failure %d", ret);
611 }
612
613 u_vector_finish(&dev->cmdstreams);
614 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
615
616 device_print_shader_log(dev);
617 device_print_cp_log(dev);
618
619 device_dump_wrbuf(dev);
620 u_vector_finish(&dev->wrbufs);
621 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
622
623 device_free_buffers(dev);
624 }
625
626 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)627 buffer_mem_alloc(struct device *dev, struct buffer *buf)
628 {
629 bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
630 if (!success)
631 errx(1, "Failed to allocate buffer");
632
633 if (!dev->has_set_iova) {
634 uint64_t offset = buf->iova - dev->va_iova;
635 assert(offset < FAKE_ADDRESS_SPACE_SIZE && (offset + buf->size) <= FAKE_ADDRESS_SPACE_SIZE);
636 buf->map = ((uint8_t*)dev->va_map) + offset;
637 return;
638 }
639
640 {
641 struct drm_msm_gem_new req = {.size = buf->size, .flags = MSM_BO_WC};
642
643 int ret =
644 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req, sizeof(req));
645 if (ret) {
646 err(1, "DRM_MSM_GEM_NEW failure %d", ret);
647 }
648
649 buf->gem_handle = req.handle;
650 }
651
652 {
653 struct drm_msm_gem_info req = {
654 .handle = buf->gem_handle,
655 .info = MSM_INFO_SET_IOVA,
656 .value = buf->iova,
657 };
658
659 int ret =
660 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
661
662 if (ret) {
663 err(1, "MSM_INFO_SET_IOVA failure %d", ret);
664 }
665 }
666
667 {
668 struct drm_msm_gem_info req = {
669 .handle = buf->gem_handle,
670 .info = MSM_INFO_GET_OFFSET,
671 };
672
673 int ret =
674 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
675 if (ret) {
676 err(1, "MSM_INFO_GET_OFFSET failure %d", ret);
677 }
678
679 void *map = mmap(0, buf->size, PROT_READ | PROT_WRITE, MAP_SHARED,
680 dev->fd, req.value);
681 if (map == MAP_FAILED) {
682 err(1, "mmap failure");
683 }
684
685 buf->map = map;
686 }
687 }
688
689 void
buffer_mem_free(struct device * dev,struct buffer * buf)690 buffer_mem_free(struct device *dev, struct buffer *buf)
691 {
692 if (dev->has_set_iova) {
693 munmap(buf->map, buf->size);
694
695 struct drm_msm_gem_info req_iova = {
696 .handle = buf->gem_handle,
697 .info = MSM_INFO_SET_IOVA,
698 .value = 0,
699 };
700
701 int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_iova,
702 sizeof(req_iova));
703 if (ret < 0) {
704 err(1, "MSM_INFO_SET_IOVA(0) failed! %d", ret);
705 return;
706 }
707
708 struct drm_gem_close req = {
709 .handle = buf->gem_handle,
710 };
711 drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
712 }
713
714 util_vma_heap_free(&dev->vma, buf->iova, buf->size);
715 }
716
717 #elif FD_REPLAY_KGSL
718 static int
safe_ioctl(int fd,unsigned long request,void * arg)719 safe_ioctl(int fd, unsigned long request, void *arg)
720 {
721 int ret;
722
723 do {
724 ret = ioctl(fd, request, arg);
725 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
726
727 return ret;
728 }
729
730 static struct device *
device_create(uint64_t base_addr)731 device_create(uint64_t base_addr)
732 {
733 struct device *dev = calloc(sizeof(struct device), 1);
734
735 static const char path[] = "/dev/kgsl-3d0";
736
737 dev->fd = open(path, O_RDWR | O_CLOEXEC);
738 if (dev->fd < 0) {
739 errx(1, "Cannot open KGSL fd!");
740 }
741
742 struct kgsl_gpumem_alloc_id req = {
743 .size = FAKE_ADDRESS_SPACE_SIZE,
744 .flags = KGSL_MEMFLAGS_IOCOHERENT,
745 };
746
747 int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
748 if (ret) {
749 err(1, "IOCTL_KGSL_GPUMEM_ALLOC_ID failure");
750 }
751
752 dev->va_id = req.id;
753 dev->va_iova = req.gpuaddr;
754 dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE,
755 MAP_SHARED, dev->fd, req.id << 12);
756
757 rb_tree_init(&dev->buffers);
758 util_vma_heap_init(&dev->vma, req.gpuaddr, ROUND_DOWN_TO(FAKE_ADDRESS_SPACE_SIZE, 4096));
759 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
760 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
761
762 struct kgsl_drawctxt_create drawctxt_req = {
763 .flags = KGSL_CONTEXT_SAVE_GMEM |
764 KGSL_CONTEXT_NO_GMEM_ALLOC |
765 KGSL_CONTEXT_PREAMBLE,
766 };
767
768 ret = safe_ioctl(dev->fd, IOCTL_KGSL_DRAWCTXT_CREATE, &drawctxt_req);
769 if (ret) {
770 err(1, "IOCTL_KGSL_DRAWCTXT_CREATE failure");
771 }
772
773 printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
774
775 dev->context_id = drawctxt_req.drawctxt_id;
776
777 return dev;
778 }
779
780 static void
device_submit_cmdstreams(struct device * dev)781 device_submit_cmdstreams(struct device *dev)
782 {
783 if (!u_vector_length(&dev->cmdstreams)) {
784 device_free_buffers(dev);
785 return;
786 }
787
788 struct kgsl_command_object cmds[u_vector_length(&dev->cmdstreams)];
789
790 uint32_t idx = 0;
791 struct cmdstream *cmd;
792 u_vector_foreach(cmd, &dev->cmdstreams) {
793 struct kgsl_command_object *submit_cmd = &cmds[idx++];
794 submit_cmd->gpuaddr = cmd->iova;
795 submit_cmd->size = cmd->size;
796 submit_cmd->flags = KGSL_CMDLIST_IB;
797 submit_cmd->id = dev->va_id;
798 }
799
800 struct kgsl_gpu_command submit_req = {
801 .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
802 .cmdlist = (uintptr_t) &cmds,
803 .cmdsize = sizeof(struct kgsl_command_object),
804 .numcmds = u_vector_length(&dev->cmdstreams),
805 .numsyncs = 0,
806 .context_id = dev->context_id,
807 };
808
809 int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPU_COMMAND, &submit_req);
810
811 if (ret) {
812 err(1, "IOCTL_KGSL_GPU_COMMAND failure %d", ret);
813 }
814
815 struct kgsl_device_waittimestamp_ctxtid wait = {
816 .context_id = dev->context_id,
817 .timestamp = submit_req.timestamp,
818 .timeout = 3000,
819 };
820
821 ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
822
823 if (ret) {
824 err(1, "IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID failure %d", ret);
825 }
826
827 u_vector_finish(&dev->cmdstreams);
828 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
829
830 device_print_shader_log(dev);
831 device_print_cp_log(dev);
832
833 device_dump_wrbuf(dev);
834 u_vector_finish(&dev->wrbufs);
835 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
836
837 device_free_buffers(dev);
838 }
839
840 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)841 buffer_mem_alloc(struct device *dev, struct buffer *buf)
842 {
843 bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
844 if (!success)
845 errx(1, "Failed to allocate buffer");
846
847 buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
848 }
849
850 void
buffer_mem_free(struct device * dev,struct buffer * buf)851 buffer_mem_free(struct device *dev, struct buffer *buf)
852 {
853 util_vma_heap_free(&dev->vma, buf->iova, buf->size);
854 }
855 #else
856
857 static int
safe_ioctl(int fd,unsigned long request,void * arg)858 safe_ioctl(int fd, unsigned long request, void *arg)
859 {
860 int ret;
861
862 do {
863 ret = ioctl(fd, request, arg);
864 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
865
866 return ret;
867 }
868
869 struct alloc_priv_info {
870 __u32 struct_size;
871 char _pad0[4];
872 __u32 unk0; // 1
873 char _pad1[4];
874 __u64 size;
875 __u32 alignment;
876 char _pad2[20];
877 __u64 allocated_size;
878 __u32 unk1; // 1
879 char _pad4[8]; /* offset: 60*/
880 __u32 unk2; // 61
881 char _pad5[76];
882 __u32 unk3; /* offset: 148 */ // 1
883 char _pad6[8];
884 __u32 unk4; /* offset: 160 */ // 1
885 char _pad7[44];
886 __u32 unk5; /* offset: 208 */ // 3
887 char _pad8[16];
888 __u32 size_2; /* offset: 228 */
889 __u32 unk6; // 1
890 __u32 size_3;
891 __u32 size_4;
892 __u32 unk7; /* offset: 244 */ // 1
893 char _pad9[56];
894 };
895 static_assert(sizeof(struct alloc_priv_info) == 304);
896 static_assert(offsetof(struct alloc_priv_info, unk1) == 56);
897 static_assert(offsetof(struct alloc_priv_info, unk3) == 148);
898 static_assert(offsetof(struct alloc_priv_info, unk5) == 208);
899
900 struct submit_priv_ib_info {
901 char _pad5[4];
902 __u32 size_dwords;
903 __u64 iova;
904 char _pad6[8];
905 } __attribute__((packed));
906
907 struct submit_priv_data {
908 __u32 magic0;
909 char _pad0[4];
910 __u32 struct_size;
911 char _pad1[4];
912 /* It seems that priv data can have several sub-datas
913 * cmdbuf is one of them, after it there is another 8 byte struct
914 * without anything useful in it. That second data doesn't seem
915 * important for replaying.
916 */
917 __u32 datas_count;
918 char _pad2[32];
919 struct {
920 __u32 magic1;
921 __u32 data_size;
922
923 struct {
924 __u32 unk1;
925 __u32 cmdbuf_size;
926 char _pad3[32];
927 __u32 ib_count;
928 char _pad4[36];
929
930 struct submit_priv_ib_info ibs[];
931 } cmdbuf;
932 } data0;
933
934 // unsigned char magic2[8];
935 } __attribute__((packed));
936 static_assert(offsetof(struct submit_priv_data, data0) == 0x34);
937 static_assert(offsetof(struct submit_priv_data, data0.cmdbuf.ibs) == 0x8c);
938
939 static struct device *
device_create(uint64_t base_addr)940 device_create(uint64_t base_addr)
941 {
942 struct device *dev = calloc(sizeof(struct device), 1);
943
944 static const char path[] = "/dev/dxg";
945
946 dev->fd = open(path, O_RDWR | O_CLOEXEC);
947 if (dev->fd < 0) {
948 errx(1, "Cannot open /dev/dxg fd");
949 }
950
951 struct d3dkmt_adapterinfo adapters[1];
952 struct d3dkmt_enumadapters3 enum_adapters = {
953 .adapter_count = 1,
954 .adapters = adapters,
955 };
956 int ret = safe_ioctl(dev->fd, LX_DXENUMADAPTERS3, &enum_adapters);
957 if (ret) {
958 errx(1, "LX_DXENUMADAPTERS3 failure");
959 }
960
961 if (enum_adapters.adapter_count == 0) {
962 errx(1, "No adapters found");
963 }
964
965 struct winluid adapter_luid = enum_adapters.adapters[0].adapter_luid;
966
967 struct d3dkmt_openadapterfromluid open_adapter = {
968 .adapter_luid = adapter_luid,
969 };
970 ret = safe_ioctl(dev->fd, LX_DXOPENADAPTERFROMLUID, &open_adapter);
971 if (ret) {
972 errx(1, "LX_DXOPENADAPTERFROMLUID failure");
973 }
974
975 struct d3dkmthandle adapter = open_adapter.adapter_handle;
976
977 struct d3dkmt_createdevice create_device = {
978 .adapter = adapter,
979 };
980 ret = safe_ioctl(dev->fd, LX_DXCREATEDEVICE, &create_device);
981 if (ret) {
982 errx(1, "LX_DXCREATEDEVICE failure");
983 }
984
985 struct d3dkmthandle device = create_device.device;
986 dev->device = device;
987
988 unsigned char create_context_priv_data[] = {
989 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
990 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
991 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00,
992 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
993 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
994 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
995 };
996
997 struct d3dkmt_createcontextvirtual create_context = {
998 .device = device,
999 .node_ordinal = 0,
1000 .engine_affinity = 1,
1001 .priv_drv_data = create_context_priv_data,
1002 .priv_drv_data_size = sizeof(create_context_priv_data),
1003 .client_hint = 16,
1004 };
1005 ret = safe_ioctl(dev->fd, LX_DXCREATECONTEXTVIRTUAL, &create_context);
1006 if (ret) {
1007 errx(1, "LX_DXCREATECONTEXTVIRTUAL failure");
1008 }
1009
1010 dev->context = create_context.context;
1011
1012 struct d3dkmt_createpagingqueue create_paging_queue = {
1013 .device = device,
1014 .priority = _D3DDDI_PAGINGQUEUE_PRIORITY_NORMAL,
1015 .physical_adapter_index = 0,
1016 };
1017 ret = safe_ioctl(dev->fd, LX_DXCREATEPAGINGQUEUE, &create_paging_queue);
1018 if (ret) {
1019 errx(1, "LX_DXCREATEPAGINGQUEUE failure");
1020 }
1021 struct d3dkmthandle paging_queue = create_paging_queue.paging_queue;
1022
1023
1024 uint32_t alloc_size = FAKE_ADDRESS_SPACE_SIZE;
1025 struct alloc_priv_info priv_alloc_info = {
1026 .struct_size = sizeof(struct alloc_priv_info),
1027 .unk0 = 1,
1028 .size = alloc_size,
1029 .alignment = 4096,
1030 .unk1 = 1,
1031 .unk2 = 61,
1032 .unk3 = 1,
1033 .unk4 = 1,
1034 .unk5 = 3,
1035 .size_2 = alloc_size,
1036 .unk6 = 1,
1037 .size_3 = alloc_size,
1038 .size_4 = alloc_size,
1039 .unk7 = 1,
1040 };
1041
1042 struct d3dddi_allocationinfo2 alloc_info = {
1043 .priv_drv_data = &priv_alloc_info,
1044 .priv_drv_data_size = sizeof(struct alloc_priv_info),
1045 };
1046
1047 struct d3dkmt_createallocation create_allocation = {
1048 .device = device,
1049 .alloc_count = 1,
1050 .allocation_info = &alloc_info,
1051 };
1052 ret = safe_ioctl(dev->fd, LX_DXCREATEALLOCATION, &create_allocation);
1053 if (ret) {
1054 errx(1, "LX_DXCREATEALLOCATION failure");
1055 }
1056
1057 assert(priv_alloc_info.allocated_size == alloc_size);
1058
1059 struct d3dddi_mapgpuvirtualaddress map_virtual_address = {
1060 .paging_queue = paging_queue,
1061 .base_address = base_addr,
1062 .maximum_address = 18446744073709551615ull,
1063 .allocation = create_allocation.allocation_info[0].allocation,
1064 .size_in_pages = MAX2(alloc_size / 4096, 1),
1065 .protection = {
1066 .write = 1,
1067 .execute = 1,
1068 },
1069 };
1070 ret = safe_ioctl(dev->fd, LX_DXMAPGPUVIRTUALADDRESS, &map_virtual_address);
1071 if (ret != 259) {
1072 errx(1, "LX_DXMAPGPUVIRTUALADDRESS failure");
1073 }
1074
1075 __u32 priority = 0;
1076 struct d3dddi_makeresident make_resident = {
1077 .paging_queue = paging_queue,
1078 .alloc_count = 1,
1079 .allocation_list = &create_allocation.allocation_info[0].allocation,
1080 .priority_list = &priority,
1081 };
1082 ret = safe_ioctl(dev->fd, LX_DXMAKERESIDENT, &make_resident);
1083 if (ret != 259) {
1084 errx(1, "LX_DXMAKERESIDENT failure");
1085 }
1086
1087 struct d3dkmt_lock2 lock = {
1088 .device = device,
1089 .allocation = create_allocation.allocation_info[0].allocation,
1090 };
1091 ret = safe_ioctl(dev->fd, LX_DXLOCK2, &lock);
1092 if (ret) {
1093 errx(1, "LX_DXLOCK2 failure");
1094 }
1095
1096 dev->va_iova = map_virtual_address.virtual_address;
1097 dev->va_map = lock.data;
1098
1099 rb_tree_init(&dev->buffers);
1100 util_vma_heap_init(&dev->vma, dev->va_iova, ROUND_DOWN_TO(alloc_size, 4096));
1101 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1102 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
1103
1104 printf("Allocated iova at 0x%" PRIx64 "\n", dev->va_iova);
1105
1106 uint64_t hole_size = 4096;
1107 dev->vma.alloc_high = true;
1108 dev->fence_iova = util_vma_heap_alloc(&dev->vma, hole_size, 4096);
1109 dev->fence_ib_iova = dev->fence_iova + 8;
1110 dev->fence = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_iova - dev->va_iova));
1111 dev->fence_ib = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_ib_iova - dev->va_iova));
1112 dev->vma.alloc_high = false;
1113
1114 return dev;
1115 }
1116
1117 static void
device_submit_cmdstreams(struct device * dev)1118 device_submit_cmdstreams(struct device *dev)
1119 {
1120 if (!u_vector_length(&dev->cmdstreams)) {
1121 device_free_buffers(dev);
1122 return;
1123 }
1124
1125 uint32_t cmdstream_count = u_vector_length(&dev->cmdstreams) + 1;
1126
1127 uint32_t priv_data_size =
1128 sizeof(struct submit_priv_data) +
1129 cmdstream_count * sizeof(struct submit_priv_ib_info);
1130
1131 struct submit_priv_data *priv_data = calloc(1, priv_data_size);
1132 priv_data->magic0 = 0xccaabbee;
1133 priv_data->struct_size = priv_data_size;
1134 priv_data->datas_count = 1;
1135
1136 priv_data->data0.magic1 = 0xfadcab02;
1137 priv_data->data0.data_size =
1138 sizeof(priv_data->data0) +
1139 cmdstream_count * sizeof(struct submit_priv_ib_info);
1140 priv_data->data0.cmdbuf.unk1 = 0xcccc0001;
1141 priv_data->data0.cmdbuf.cmdbuf_size = sizeof(priv_data->data0.cmdbuf) +
1142 cmdstream_count * sizeof(struct submit_priv_ib_info);
1143 priv_data->data0.cmdbuf.ib_count = cmdstream_count;
1144
1145 struct cmdstream *cmd;
1146 uint32_t idx = 0;
1147 u_vector_foreach(cmd, &dev->cmdstreams) {
1148 priv_data->data0.cmdbuf.ibs[idx].size_dwords = cmd->size / 4;
1149 priv_data->data0.cmdbuf.ibs[idx].iova = cmd->iova;
1150 idx++;
1151 }
1152
1153 priv_data->data0.cmdbuf.ibs[idx].size_dwords = 4;
1154 priv_data->data0.cmdbuf.ibs[idx].iova = dev->fence_ib_iova;
1155
1156 *dev->fence = 0x00000000;
1157 dev->fence_ib[0] = pm4_pkt7_hdr(0x3d, 3); // CP_MEM_WRITE
1158 dev->fence_ib[1] = dev->fence_iova;
1159 dev->fence_ib[2] = dev->fence_iova >> 32;
1160 dev->fence_ib[3] = 0xababfcfc;
1161
1162 // Fill second (empty) data block
1163 // uint32_t *magic_end = (uint32_t *)(((char *) priv_data) + priv_data_size - 8);
1164 // magic_end[0] = 0xfadcab00;
1165 // magic_end[1] = 0x00000008;
1166
1167 struct d3dkmt_submitcommand submission = {
1168 .command_buffer = priv_data->data0.cmdbuf.ibs[0].iova,
1169 .command_length = priv_data->data0.cmdbuf.ibs[0].size_dwords * sizeof(uint32_t),
1170 .broadcast_context_count = 1,
1171 .broadcast_context[0] = dev->context,
1172 .priv_drv_data_size = priv_data_size,
1173 .priv_drv_data = priv_data,
1174 };
1175
1176 int ret = safe_ioctl(dev->fd, LX_DXSUBMITCOMMAND, &submission);
1177 if (ret) {
1178 errx(1, "LX_DXSUBMITCOMMAND failure");
1179 }
1180
1181 free(priv_data);
1182
1183 u_vector_finish(&dev->cmdstreams);
1184 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1185
1186 // TODO: better way to wait
1187 for (unsigned i = 0; i < 1000; i++) {
1188 usleep(1000);
1189 if (*dev->fence != 0)
1190 break;
1191 }
1192 if (*dev->fence == 0) {
1193 errx(1, "Waiting for submission failed! GPU faulted or kernel did not execute this submission.");
1194 }
1195
1196 device_print_shader_log(dev);
1197 device_print_cp_log(dev);
1198
1199 device_dump_wrbuf(dev);
1200 u_vector_finish(&dev->wrbufs);
1201 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
1202
1203 device_free_buffers(dev);
1204 }
1205
1206 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)1207 buffer_mem_alloc(struct device *dev, struct buffer *buf)
1208 {
1209 bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
1210 if (!success)
1211 errx(1, "Failed to allocate buffer");
1212
1213 buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
1214 }
1215
1216 void
buffer_mem_free(struct device * dev,struct buffer * buf)1217 buffer_mem_free(struct device *dev, struct buffer *buf)
1218 {
1219 util_vma_heap_free(&dev->vma, buf->iova, buf->size);
1220 }
1221
1222 #endif
1223
1224 static void
upload_buffer(struct device * dev,uint64_t iova,unsigned int size,void * hostptr)1225 upload_buffer(struct device *dev, uint64_t iova, unsigned int size,
1226 void *hostptr)
1227 {
1228 struct buffer *buf = device_get_buffer(dev, iova);
1229
1230 if (!buf) {
1231 buf = calloc(sizeof(struct buffer), 1);
1232 buf->iova = iova;
1233 buf->size = size;
1234
1235 rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp);
1236
1237 buffer_mem_alloc(dev, buf);
1238 } else if (buf->size != size) {
1239 buffer_mem_free(dev, buf);
1240 buf->size = size;
1241 buffer_mem_alloc(dev, buf);
1242 }
1243
1244 memcpy(buf->map, hostptr, size);
1245
1246 buf->used = true;
1247 }
1248
1249 static int
override_cmdstream(struct device * dev,struct cmdstream * cs,const char * cmdstreamgen)1250 override_cmdstream(struct device *dev, struct cmdstream *cs,
1251 const char *cmdstreamgen)
1252 {
1253 #if FD_REPLAY_KGSL
1254 static const char *tmpfilename = "/sdcard/Download/cmdstream_override.rd";
1255 #elif FD_REPLAY_MSM || FD_REPLAY_WSL
1256 static const char *tmpfilename = "/tmp/cmdstream_override.rd";
1257 #endif
1258
1259
1260 /* Find a free space for the new cmdstreams and resources we will use
1261 * when overriding existing cmdstream.
1262 */
1263 uint64_t hole_size = util_vma_heap_get_max_free_continuous_size(&dev->vma);
1264 uint64_t hole_iova = util_vma_heap_alloc(&dev->vma, hole_size, 1);
1265 util_vma_heap_free(&dev->vma, hole_iova, hole_size);
1266
1267 char cmd[2048];
1268 snprintf(cmd, sizeof(cmd),
1269 "%s --vastart=%" PRIu64 " --vasize=%" PRIu64 " %s", cmdstreamgen,
1270 hole_iova, hole_size, tmpfilename);
1271
1272 printf("generating cmdstream '%s'\n", cmd);
1273
1274 int ret = system(cmd);
1275 if (ret) {
1276 fprintf(stderr, "Error executing %s\n", cmd);
1277 return -1;
1278 }
1279
1280 struct io *io;
1281 struct rd_parsed_section ps = {0};
1282
1283 io = io_open(tmpfilename);
1284 if (!io) {
1285 fprintf(stderr, "could not open: %s\n", tmpfilename);
1286 return -1;
1287 }
1288
1289 struct {
1290 unsigned int len;
1291 uint64_t gpuaddr;
1292 } gpuaddr = {0};
1293
1294 while (parse_rd_section(io, &ps)) {
1295 switch (ps.type) {
1296 case RD_GPUADDR:
1297 parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1298 /* no-op */
1299 break;
1300 case RD_BUFFER_CONTENTS:
1301 upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1302 ps.buf = NULL;
1303 break;
1304 case RD_CMDSTREAM_ADDR: {
1305 unsigned int sizedwords;
1306 uint64_t gpuaddr;
1307 parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1308 printf("override cmdstream: %d dwords\n", sizedwords);
1309
1310 cs->iova = gpuaddr;
1311 cs->size = sizedwords * sizeof(uint32_t);
1312 break;
1313 }
1314 case RD_SHADER_LOG_BUFFER: {
1315 unsigned int sizedwords;
1316 parse_addr(ps.buf, ps.sz, &sizedwords, &dev->shader_log_iova);
1317 break;
1318 }
1319 case RD_CP_LOG_BUFFER: {
1320 unsigned int sizedwords;
1321 parse_addr(ps.buf, ps.sz, &sizedwords, &dev->cp_log_iova);
1322 break;
1323 }
1324 case RD_WRBUFFER: {
1325 struct wrbuf *wrbuf = u_vector_add(&dev->wrbufs);
1326 uint64_t *p = (uint64_t *)ps.buf;
1327 wrbuf->iova = p[0];
1328 wrbuf->size = p[1];
1329 bool clear = p[2];
1330 int name_len = ps.sz - (3 * sizeof(uint64_t));
1331 wrbuf->name = calloc(sizeof(char), name_len);
1332 memcpy(wrbuf->name, (char*)(p + 3), name_len); // includes null terminator
1333
1334 if (clear) {
1335 struct buffer *buf = device_get_buffer(dev, wrbuf->iova);
1336 assert(buf);
1337
1338 uint64_t offset = wrbuf->iova - buf->iova;
1339 uint64_t end = MIN2(offset + wrbuf->size, buf->size);
1340 while (offset < end) {
1341 static const uint64_t clear_value = 0xdeadbeefdeadbeef;
1342 memcpy(buf->map + offset, &clear_value,
1343 MIN2(sizeof(clear_value), end - offset));
1344 offset += sizeof(clear_value);
1345 }
1346 }
1347
1348 break;
1349 }
1350 default:
1351 break;
1352 }
1353 }
1354
1355 io_close(io);
1356 if (ps.ret < 0) {
1357 fprintf(stderr, "corrupt file %s\n", tmpfilename);
1358 }
1359
1360 return ps.ret;
1361 }
1362
1363 static int
handle_file(const char * filename,uint32_t first_submit,uint32_t last_submit,uint32_t submit_to_override,uint64_t base_addr,const char * cmdstreamgen)1364 handle_file(const char *filename, uint32_t first_submit, uint32_t last_submit,
1365 uint32_t submit_to_override, uint64_t base_addr, const char *cmdstreamgen)
1366 {
1367 struct io *io;
1368 int submit = 0;
1369 bool skip = false;
1370 bool need_submit = false;
1371 struct rd_parsed_section ps = {0};
1372
1373 printf("Reading %s...\n", filename);
1374
1375 if (!strcmp(filename, "-"))
1376 io = io_openfd(0);
1377 else
1378 io = io_open(filename);
1379
1380 if (!io) {
1381 fprintf(stderr, "could not open: %s\n", filename);
1382 return -1;
1383 }
1384
1385 struct device *dev = device_create(base_addr);
1386
1387 struct {
1388 unsigned int len;
1389 uint64_t gpuaddr;
1390 } gpuaddr = {0};
1391
1392 while (parse_rd_section(io, &ps)) {
1393 switch (ps.type) {
1394 case RD_TEST:
1395 case RD_VERT_SHADER:
1396 case RD_FRAG_SHADER:
1397 /* no-op */
1398 break;
1399 case RD_CMD:
1400 skip = false;
1401 if (exename) {
1402 skip |= (strstr(ps.buf, exename) != ps.buf);
1403 } else {
1404 skip |= (strstr(ps.buf, "fdperf") == ps.buf);
1405 skip |= (strstr(ps.buf, "chrome") == ps.buf);
1406 skip |= (strstr(ps.buf, "surfaceflinger") == ps.buf);
1407 skip |= ((char *)ps.buf)[0] == 'X';
1408 }
1409 break;
1410
1411 case RD_GPUADDR:
1412 if (need_submit) {
1413 need_submit = false;
1414 device_submit_cmdstreams(dev);
1415 }
1416
1417 parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1418 /* no-op */
1419 break;
1420 case RD_BUFFER_CONTENTS:
1421 /* TODO: skip buffer uploading and even reading if this buffer
1422 * is used for submit outside of [first_submit, last_submit]
1423 * range. A set of buffers is shared between several cmdstreams,
1424 * so we'd have to find starting from which RD_CMD to upload
1425 * the buffers.
1426 */
1427 upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1428 break;
1429 case RD_CMDSTREAM_ADDR: {
1430 unsigned int sizedwords;
1431 uint64_t gpuaddr;
1432 parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1433
1434 bool add_submit = !skip && (submit >= first_submit) && (submit <= last_submit);
1435 printf("%scmdstream %d: %d dwords\n", add_submit ? "" : "skipped ",
1436 submit, sizedwords);
1437
1438 if (add_submit) {
1439 struct cmdstream *cs = u_vector_add(&dev->cmdstreams);
1440
1441 if (submit == submit_to_override) {
1442 if (override_cmdstream(dev, cs, cmdstreamgen) < 0)
1443 break;
1444 } else {
1445 cs->iova = gpuaddr;
1446 cs->size = sizedwords * sizeof(uint32_t);
1447 }
1448 }
1449
1450 need_submit = true;
1451
1452 submit++;
1453 break;
1454 }
1455 case RD_GPU_ID: {
1456 uint32_t gpu_id = parse_gpu_id(ps.buf);
1457 if (gpu_id)
1458 printf("gpuid: %d\n", gpu_id);
1459 break;
1460 }
1461 case RD_CHIP_ID: {
1462 uint64_t chip_id = parse_chip_id(ps.buf);
1463 printf("chip_id: 0x%" PRIx64 "\n", chip_id);
1464 break;
1465 }
1466 default:
1467 break;
1468 }
1469 }
1470
1471 if (need_submit)
1472 device_submit_cmdstreams(dev);
1473
1474 close(dev->fd);
1475
1476 io_close(io);
1477 fflush(stdout);
1478
1479 if (ps.ret < 0) {
1480 printf("corrupt file\n");
1481 }
1482 return 0;
1483 }
1484