1 /*
2 * Copyright © 2020 Google, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "tu_knl.h"
7
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <poll.h>
11 #include <stdint.h>
12 #include <sys/ioctl.h>
13 #include <sys/mman.h>
14 #include <linux/dma-heap.h>
15
16 #include "msm_kgsl.h"
17 #include "ion/ion.h"
18 #include "ion/ion_4.19.h"
19
20 #include "vk_util.h"
21
22 #include "util/os_file.h"
23 #include "util/u_debug.h"
24 #include "util/u_vector.h"
25 #include "util/libsync.h"
26 #include "util/timespec.h"
27
28 #include "tu_cmd_buffer.h"
29 #include "tu_cs.h"
30 #include "tu_device.h"
31 #include "tu_dynamic_rendering.h"
32 #include "tu_queue.h"
33 #include "tu_rmv.h"
34
35 /* ION_HEAP(ION_SYSTEM_HEAP_ID) */
36 #define KGSL_ION_SYSTEM_HEAP_MASK (1u << 25)
37
38
39 static int
safe_ioctl(int fd,unsigned long request,void * arg)40 safe_ioctl(int fd, unsigned long request, void *arg)
41 {
42 int ret;
43
44 do {
45 ret = ioctl(fd, request, arg);
46 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
47
48 return ret;
49 }
50
51 static int
kgsl_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)52 kgsl_submitqueue_new(struct tu_device *dev,
53 int priority,
54 uint32_t *queue_id)
55 {
56 struct kgsl_drawctxt_create req = {
57 .flags = KGSL_CONTEXT_SAVE_GMEM |
58 KGSL_CONTEXT_NO_GMEM_ALLOC |
59 KGSL_CONTEXT_PREAMBLE,
60 };
61
62 int ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_CREATE, &req);
63 if (ret)
64 return ret;
65
66 *queue_id = req.drawctxt_id;
67
68 return 0;
69 }
70
71 static void
kgsl_submitqueue_close(struct tu_device * dev,uint32_t queue_id)72 kgsl_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
73 {
74 struct kgsl_drawctxt_destroy req = {
75 .drawctxt_id = queue_id,
76 };
77
78 safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req);
79 }
80
81 static void kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo);
82
83 static VkResult
bo_init_new_dmaheap(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)84 bo_init_new_dmaheap(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
85 enum tu_bo_alloc_flags flags)
86 {
87 struct dma_heap_allocation_data alloc = {
88 .len = size,
89 .fd_flags = O_RDWR | O_CLOEXEC,
90 };
91
92 int ret;
93 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, DMA_HEAP_IOCTL_ALLOC,
94 &alloc);
95
96 if (ret) {
97 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
98 "DMA_HEAP_IOCTL_ALLOC failed (%s)", strerror(errno));
99 }
100
101 return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
102 }
103
104 static VkResult
bo_init_new_ion(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)105 bo_init_new_ion(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
106 enum tu_bo_alloc_flags flags)
107 {
108 struct ion_new_allocation_data alloc = {
109 .len = size,
110 .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
111 .flags = 0,
112 .fd = -1,
113 };
114
115 int ret;
116 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_NEW_ALLOC, &alloc);
117 if (ret) {
118 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
119 "ION_IOC_NEW_ALLOC failed (%s)", strerror(errno));
120 }
121
122 return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
123 }
124
125 static VkResult
bo_init_new_ion_legacy(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)126 bo_init_new_ion_legacy(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
127 enum tu_bo_alloc_flags flags)
128 {
129 struct ion_allocation_data alloc = {
130 .len = size,
131 .align = 4096,
132 .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
133 .flags = 0,
134 .handle = -1,
135 };
136
137 int ret;
138 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_ALLOC, &alloc);
139 if (ret) {
140 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
141 "ION_IOC_ALLOC failed (%s)", strerror(errno));
142 }
143
144 struct ion_fd_data share = {
145 .handle = alloc.handle,
146 .fd = -1,
147 };
148
149 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_SHARE, &share);
150 if (ret) {
151 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
152 "ION_IOC_SHARE failed (%s)", strerror(errno));
153 }
154
155 struct ion_handle_data free = {
156 .handle = alloc.handle,
157 };
158 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_FREE, &free);
159 if (ret) {
160 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
161 "ION_IOC_FREE failed (%s)", strerror(errno));
162 }
163
164 return tu_bo_init_dmabuf(dev, out_bo, -1, share.fd);
165 }
166
167 static VkResult
kgsl_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)168 kgsl_bo_init(struct tu_device *dev,
169 struct vk_object_base *base,
170 struct tu_bo **out_bo,
171 uint64_t size,
172 uint64_t client_iova,
173 VkMemoryPropertyFlags mem_property,
174 enum tu_bo_alloc_flags flags,
175 const char *name)
176 {
177 if (flags & TU_BO_ALLOC_SHAREABLE) {
178 /* The Vulkan spec doesn't forbid allocating exportable memory with a
179 * fixed address, only imported memory, but on kgsl we can't sensibly
180 * implement it so just always reject it.
181 */
182 if (client_iova) {
183 return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
184 "cannot allocate an exportable BO with a fixed address");
185 }
186
187 switch(dev->physical_device->kgsl_dma_type) {
188 case TU_KGSL_DMA_TYPE_DMAHEAP:
189 return bo_init_new_dmaheap(dev, out_bo, size, flags);
190 case TU_KGSL_DMA_TYPE_ION:
191 return bo_init_new_ion(dev, out_bo, size, flags);
192 case TU_KGSL_DMA_TYPE_ION_LEGACY:
193 return bo_init_new_ion_legacy(dev, out_bo, size, flags);
194 }
195 }
196
197 struct kgsl_gpumem_alloc_id req = {
198 .size = size,
199 };
200
201 if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
202 if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
203 req.flags |= KGSL_MEMFLAGS_IOCOHERENT;
204 }
205
206 req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT;
207 } else {
208 req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT;
209 }
210
211 if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
212 req.flags |= KGSL_MEMFLAGS_GPUREADONLY;
213
214 if (flags & TU_BO_ALLOC_REPLAYABLE)
215 req.flags |= KGSL_MEMFLAGS_USE_CPU_MAP;
216
217 int ret;
218
219 ret = safe_ioctl(dev->physical_device->local_fd,
220 IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
221 if (ret) {
222 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
223 "GPUMEM_ALLOC_ID failed (%s)", strerror(errno));
224 }
225
226 struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
227 assert(bo && bo->gem_handle == 0);
228
229 *bo = (struct tu_bo) {
230 .gem_handle = req.id,
231 .size = req.mmapsize,
232 .iova = req.gpuaddr,
233 .name = tu_debug_bos_add(dev, req.mmapsize, name),
234 .refcnt = 1,
235 .shared_fd = -1,
236 .base = base,
237 };
238
239 if (flags & TU_BO_ALLOC_REPLAYABLE) {
240 uint64_t offset = req.id << 12;
241 void *map = mmap((void *)client_iova, bo->size, PROT_READ | PROT_WRITE,
242 MAP_SHARED, dev->physical_device->local_fd, offset);
243 if (map == MAP_FAILED) {
244 kgsl_bo_finish(dev, bo);
245
246 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
247 "mmap failed (%s)", strerror(errno));
248 }
249
250 if (client_iova && (uint64_t)map != client_iova) {
251 kgsl_bo_finish(dev, bo);
252
253 return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
254 "mmap could not map the given address");
255 }
256
257 bo->map = map;
258 bo->iova = (uint64_t)map;
259
260 /* Because we're using SVM, the CPU mapping and GPU mapping are the same
261 * and the CPU mapping must stay fixed for the lifetime of the BO.
262 */
263 bo->never_unmap = true;
264 }
265
266 tu_dump_bo_init(dev, bo);
267
268 *out_bo = bo;
269
270 TU_RMV(bo_allocate, dev, bo);
271 if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) {
272 TU_RMV(internal_resource_create, dev, bo);
273 TU_RMV(resource_name, dev, bo, name);
274 }
275
276 return VK_SUCCESS;
277 }
278
279 static VkResult
kgsl_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int fd)280 kgsl_bo_init_dmabuf(struct tu_device *dev,
281 struct tu_bo **out_bo,
282 uint64_t size,
283 int fd)
284 {
285 struct kgsl_gpuobj_import_dma_buf import_dmabuf = {
286 .fd = fd,
287 };
288 struct kgsl_gpuobj_import req = {
289 .priv = (uintptr_t)&import_dmabuf,
290 .priv_len = sizeof(import_dmabuf),
291 .flags = 0,
292 .type = KGSL_USER_MEM_TYPE_DMABUF,
293 };
294 int ret;
295
296 ret = safe_ioctl(dev->physical_device->local_fd,
297 IOCTL_KGSL_GPUOBJ_IMPORT, &req);
298 if (ret)
299 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
300 "Failed to import dma-buf (%s)\n", strerror(errno));
301
302 struct kgsl_gpuobj_info info_req = {
303 .id = req.id,
304 };
305
306 ret = safe_ioctl(dev->physical_device->local_fd,
307 IOCTL_KGSL_GPUOBJ_INFO, &info_req);
308 if (ret)
309 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
310 "Failed to get dma-buf info (%s)\n", strerror(errno));
311
312 struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
313 assert(bo && bo->gem_handle == 0);
314
315 *bo = (struct tu_bo) {
316 .gem_handle = req.id,
317 .size = info_req.size,
318 .iova = info_req.gpuaddr,
319 .name = tu_debug_bos_add(dev, info_req.size, "dmabuf"),
320 .refcnt = 1,
321 .shared_fd = os_dupfd_cloexec(fd),
322 };
323
324 tu_dump_bo_init(dev, bo);
325
326 *out_bo = bo;
327
328 return VK_SUCCESS;
329 }
330
331 static int
kgsl_bo_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)332 kgsl_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
333 {
334 assert(bo->shared_fd != -1);
335 return os_dupfd_cloexec(bo->shared_fd);
336 }
337
338 static VkResult
kgsl_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)339 kgsl_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
340 {
341 void *map = MAP_FAILED;
342 if (bo->shared_fd == -1) {
343 uint64_t offset = bo->gem_handle << 12;
344 map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
345 MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
346 dev->physical_device->local_fd, offset);
347 } else {
348 map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
349 MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
350 bo->shared_fd, 0);
351 }
352
353 if (map == MAP_FAILED)
354 return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
355
356 bo->map = map;
357 TU_RMV(bo_map, dev, bo);
358
359 return VK_SUCCESS;
360 }
361
362 static void
kgsl_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)363 kgsl_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
364 {
365 }
366
367 static void
kgsl_bo_finish(struct tu_device * dev,struct tu_bo * bo)368 kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo)
369 {
370 assert(bo->gem_handle);
371
372 if (!p_atomic_dec_zero(&bo->refcnt))
373 return;
374
375 if (bo->map) {
376 TU_RMV(bo_unmap, dev, bo);
377 munmap(bo->map, bo->size);
378 }
379
380 if (bo->shared_fd != -1)
381 close(bo->shared_fd);
382
383 TU_RMV(bo_destroy, dev, bo);
384 tu_debug_bos_del(dev, bo);
385 tu_dump_bo_del(dev, bo);
386
387 struct kgsl_gpumem_free_id req = {
388 .id = bo->gem_handle
389 };
390
391 /* Tell sparse array that entry is free */
392 memset(bo, 0, sizeof(*bo));
393
394 safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
395 }
396
397 static VkResult
get_kgsl_prop(int fd,unsigned int type,void * value,size_t size)398 get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
399 {
400 struct kgsl_device_getproperty getprop = {
401 .type = type,
402 .value = value,
403 .sizebytes = size,
404 };
405
406 return safe_ioctl(fd, IOCTL_KGSL_DEVICE_GETPROPERTY, &getprop)
407 ? VK_ERROR_UNKNOWN
408 : VK_SUCCESS;
409 }
410
411 static bool
kgsl_is_memory_type_supported(int fd,uint32_t flags)412 kgsl_is_memory_type_supported(int fd, uint32_t flags)
413 {
414 struct kgsl_gpumem_alloc_id req_alloc = {
415 .flags = flags,
416 .size = 0x1000,
417 };
418
419 int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc);
420 if (ret) {
421 return false;
422 }
423
424 struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id };
425
426 safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free);
427
428 return true;
429 }
430
431 enum kgsl_syncobj_state {
432 KGSL_SYNCOBJ_STATE_UNSIGNALED,
433 KGSL_SYNCOBJ_STATE_SIGNALED,
434 KGSL_SYNCOBJ_STATE_TS,
435 KGSL_SYNCOBJ_STATE_FD,
436 };
437
438 struct kgsl_syncobj
439 {
440 struct vk_object_base base;
441 enum kgsl_syncobj_state state;
442
443 struct tu_queue *queue;
444 uint32_t timestamp;
445
446 int fd;
447 };
448
449 static void
kgsl_syncobj_init(struct kgsl_syncobj * s,bool signaled)450 kgsl_syncobj_init(struct kgsl_syncobj *s, bool signaled)
451 {
452 s->state =
453 signaled ? KGSL_SYNCOBJ_STATE_SIGNALED : KGSL_SYNCOBJ_STATE_UNSIGNALED;
454
455 s->timestamp = UINT32_MAX;
456 s->fd = -1;
457 }
458
459 static void
kgsl_syncobj_reset(struct kgsl_syncobj * s)460 kgsl_syncobj_reset(struct kgsl_syncobj *s)
461 {
462 if (s->state == KGSL_SYNCOBJ_STATE_FD && s->fd >= 0) {
463 ASSERTED int ret = close(s->fd);
464 assert(ret == 0);
465 s->fd = -1;
466 } else if (s->state == KGSL_SYNCOBJ_STATE_TS) {
467 s->timestamp = UINT32_MAX;
468 }
469
470 s->state = KGSL_SYNCOBJ_STATE_UNSIGNALED;
471 }
472
473 static void
kgsl_syncobj_destroy(struct kgsl_syncobj * s)474 kgsl_syncobj_destroy(struct kgsl_syncobj *s)
475 {
476 kgsl_syncobj_reset(s);
477 }
478
479 static int
timestamp_to_fd(struct tu_queue * queue,uint32_t timestamp)480 timestamp_to_fd(struct tu_queue *queue, uint32_t timestamp)
481 {
482 int fd;
483 struct kgsl_timestamp_event event = {
484 .type = KGSL_TIMESTAMP_EVENT_FENCE,
485 .timestamp = timestamp,
486 .context_id = queue->msm_queue_id,
487 .priv = &fd,
488 .len = sizeof(fd),
489 };
490
491 int ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_TIMESTAMP_EVENT, &event);
492 if (ret)
493 return -1;
494
495 return fd;
496 }
497
498 static int
kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj * syncobj)499 kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj *syncobj)
500 {
501 assert(syncobj->state == KGSL_SYNCOBJ_STATE_TS);
502 return timestamp_to_fd(syncobj->queue, syncobj->timestamp);
503 }
504
505 /* return true if timestamp a is greater (more recent) then b
506 * this relies on timestamps never having a difference > (1<<31)
507 */
508 static inline bool
timestamp_cmp(uint32_t a,uint32_t b)509 timestamp_cmp(uint32_t a, uint32_t b)
510 {
511 return (int32_t) (a - b) >= 0;
512 }
513
514 static uint32_t
max_ts(uint32_t a,uint32_t b)515 max_ts(uint32_t a, uint32_t b)
516 {
517 return timestamp_cmp(a, b) ? a : b;
518 }
519
520 static uint32_t
min_ts(uint32_t a,uint32_t b)521 min_ts(uint32_t a, uint32_t b)
522 {
523 return timestamp_cmp(a, b) ? b : a;
524 }
525
526 static int
get_relative_ms(uint64_t abs_timeout_ns)527 get_relative_ms(uint64_t abs_timeout_ns)
528 {
529 if (abs_timeout_ns >= INT64_MAX)
530 /* We can assume that a wait with a value this high is a forever wait
531 * and return -1 here as it's the infinite timeout for ppoll() while
532 * being the highest unsigned integer value for the wait KGSL IOCTL
533 */
534 return -1;
535
536 uint64_t cur_time_ms = os_time_get_nano() / 1000000;
537 uint64_t abs_timeout_ms = abs_timeout_ns / 1000000;
538 if (abs_timeout_ms <= cur_time_ms)
539 return 0;
540
541 return abs_timeout_ms - cur_time_ms;
542 }
543
544 /* safe_ioctl is not enough as restarted waits would not adjust the timeout
545 * which could lead to waiting substantially longer than requested
546 */
547 static VkResult
wait_timestamp_safe(int fd,unsigned int context_id,unsigned int timestamp,uint64_t abs_timeout_ns)548 wait_timestamp_safe(int fd,
549 unsigned int context_id,
550 unsigned int timestamp,
551 uint64_t abs_timeout_ns)
552 {
553 struct kgsl_device_waittimestamp_ctxtid wait = {
554 .context_id = context_id,
555 .timestamp = timestamp,
556 .timeout = get_relative_ms(abs_timeout_ns),
557 };
558
559 while (true) {
560 int ret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
561
562 if (ret == -1 && (errno == EINTR || errno == EAGAIN)) {
563 int timeout_ms = get_relative_ms(abs_timeout_ns);
564
565 /* update timeout to consider time that has passed since the start */
566 if (timeout_ms == 0)
567 return VK_TIMEOUT;
568
569 wait.timeout = timeout_ms;
570 } else if (ret == -1) {
571 assert(errno == ETIMEDOUT);
572 return VK_TIMEOUT;
573 } else {
574 return VK_SUCCESS;
575 }
576 }
577 }
578
579 VkResult
kgsl_queue_wait_fence(struct tu_queue * queue,uint32_t fence,uint64_t timeout_ns)580 kgsl_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
581 uint64_t timeout_ns)
582 {
583 uint64_t abs_timeout_ns = os_time_get_nano() + timeout_ns;
584
585 return wait_timestamp_safe(queue->device->fd, queue->msm_queue_id,
586 fence, abs_timeout_ns);
587 }
588
589 static VkResult
kgsl_syncobj_wait(struct tu_device * device,struct kgsl_syncobj * s,uint64_t abs_timeout_ns)590 kgsl_syncobj_wait(struct tu_device *device,
591 struct kgsl_syncobj *s,
592 uint64_t abs_timeout_ns)
593 {
594 if (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
595 /* If this syncobj is unsignaled we need to wait for it to resolve to a
596 * valid syncobj prior to letting the rest of the wait continue, this
597 * avoids needing kernel support for wait-before-signal semantics.
598 */
599
600 if (abs_timeout_ns == 0)
601 return VK_TIMEOUT; // If this is a simple poll then we can return early
602
603 pthread_mutex_lock(&device->submit_mutex);
604 struct timespec abstime;
605 timespec_from_nsec(&abstime, abs_timeout_ns);
606
607 while (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
608 int ret;
609 if (abs_timeout_ns == UINT64_MAX) {
610 ret = pthread_cond_wait(&device->timeline_cond,
611 &device->submit_mutex);
612 } else {
613 ret = pthread_cond_timedwait(&device->timeline_cond,
614 &device->submit_mutex, &abstime);
615 }
616 if (ret != 0) {
617 assert(ret == ETIMEDOUT);
618 pthread_mutex_unlock(&device->submit_mutex);
619 return VK_TIMEOUT;
620 }
621 }
622
623 pthread_mutex_unlock(&device->submit_mutex);
624 }
625
626 switch (s->state) {
627 case KGSL_SYNCOBJ_STATE_SIGNALED:
628 return VK_SUCCESS;
629
630 case KGSL_SYNCOBJ_STATE_UNSIGNALED:
631 return VK_TIMEOUT;
632
633 case KGSL_SYNCOBJ_STATE_TS: {
634 return wait_timestamp_safe(device->fd, s->queue->msm_queue_id,
635 s->timestamp, abs_timeout_ns);
636 }
637
638 case KGSL_SYNCOBJ_STATE_FD: {
639 int ret = sync_wait(s->fd, get_relative_ms(abs_timeout_ns));
640 if (ret) {
641 assert(errno == ETIME);
642 return VK_TIMEOUT;
643 } else {
644 return VK_SUCCESS;
645 }
646 }
647
648 default:
649 unreachable("invalid syncobj state");
650 }
651 }
652
653 #define kgsl_syncobj_foreach_state(syncobjs, filter) \
654 for (uint32_t i = 0; sync = syncobjs[i], i < count; i++) \
655 if (sync->state == filter)
656
657 static VkResult
kgsl_syncobj_wait_any(struct tu_device * device,struct kgsl_syncobj ** syncobjs,uint32_t count,uint64_t abs_timeout_ns)658 kgsl_syncobj_wait_any(struct tu_device* device, struct kgsl_syncobj **syncobjs, uint32_t count, uint64_t abs_timeout_ns)
659 {
660 if (count == 0)
661 return VK_TIMEOUT;
662 else if (count == 1)
663 return kgsl_syncobj_wait(device, syncobjs[0], abs_timeout_ns);
664
665 uint32_t num_fds = 0;
666 struct tu_queue *queue = NULL;
667 struct kgsl_syncobj *sync = NULL;
668
669 /* Simple case, we already have a signal one */
670 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_SIGNALED)
671 return VK_SUCCESS;
672
673 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD)
674 num_fds++;
675
676 /* If we have TS from different queues we cannot compare them and would
677 * have to convert them into FDs
678 */
679 bool convert_ts_to_fd = false;
680 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
681 if (queue != NULL && sync->queue != queue) {
682 convert_ts_to_fd = true;
683 break;
684 }
685 queue = sync->queue;
686 }
687
688 /* If we have no FD nor TS syncobjs then we can return immediately */
689 if (num_fds == 0 && queue == NULL)
690 return VK_TIMEOUT;
691
692 VkResult result = VK_TIMEOUT;
693
694 struct u_vector poll_fds = { 0 };
695 uint32_t lowest_timestamp = 0;
696
697 if (convert_ts_to_fd || num_fds > 0)
698 u_vector_init(&poll_fds, 4, sizeof(struct pollfd));
699
700 if (convert_ts_to_fd) {
701 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
702 struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
703 poll_fd->fd = timestamp_to_fd(sync->queue, sync->timestamp);
704 poll_fd->events = POLLIN;
705 }
706 } else {
707 /* TSs could be merged by finding the one with the lowest timestamp */
708 bool first_ts = true;
709 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
710 if (first_ts || timestamp_cmp(sync->timestamp, lowest_timestamp)) {
711 first_ts = false;
712 lowest_timestamp = sync->timestamp;
713 }
714 }
715
716 if (num_fds) {
717 struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
718 poll_fd->fd = timestamp_to_fd(queue, lowest_timestamp);
719 poll_fd->events = POLLIN;
720 }
721 }
722
723 if (num_fds) {
724 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD) {
725 struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
726 poll_fd->fd = sync->fd;
727 poll_fd->events = POLLIN;
728 }
729 }
730
731 if (u_vector_length(&poll_fds) == 0) {
732 result = wait_timestamp_safe(device->fd, queue->msm_queue_id,
733 lowest_timestamp, MIN2(abs_timeout_ns, INT64_MAX));
734 } else {
735 int ret, i;
736
737 struct pollfd *fds = (struct pollfd *) poll_fds.data;
738 uint32_t fds_count = u_vector_length(&poll_fds);
739 do {
740 ret = poll(fds, fds_count, get_relative_ms(abs_timeout_ns));
741 if (ret > 0) {
742 for (i = 0; i < fds_count; i++) {
743 if (fds[i].revents & (POLLERR | POLLNVAL)) {
744 errno = EINVAL;
745 ret = -1;
746 break;
747 }
748 }
749 break;
750 } else if (ret == 0) {
751 errno = ETIME;
752 break;
753 }
754 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
755
756 for (uint32_t i = 0; i < fds_count - num_fds; i++)
757 close(fds[i].fd);
758
759 if (ret != 0) {
760 assert(errno == ETIME);
761 result = VK_TIMEOUT;
762 } else {
763 result = VK_SUCCESS;
764 }
765 }
766
767 u_vector_finish(&poll_fds);
768 return result;
769 }
770
771 static VkResult
kgsl_syncobj_export(struct kgsl_syncobj * s,int * pFd)772 kgsl_syncobj_export(struct kgsl_syncobj *s, int *pFd)
773 {
774 if (!pFd)
775 return VK_SUCCESS;
776
777 switch (s->state) {
778 case KGSL_SYNCOBJ_STATE_SIGNALED:
779 case KGSL_SYNCOBJ_STATE_UNSIGNALED:
780 /* Getting a sync FD from an unsignaled syncobj is UB in Vulkan */
781 *pFd = -1;
782 return VK_SUCCESS;
783
784 case KGSL_SYNCOBJ_STATE_FD:
785 if (s->fd < 0)
786 *pFd = -1;
787 else
788 *pFd = dup(s->fd);
789 return VK_SUCCESS;
790
791 case KGSL_SYNCOBJ_STATE_TS:
792 *pFd = kgsl_syncobj_ts_to_fd(s);
793 return VK_SUCCESS;
794
795 default:
796 unreachable("Invalid syncobj state");
797 }
798 }
799
800 static VkResult
kgsl_syncobj_import(struct kgsl_syncobj * s,int fd)801 kgsl_syncobj_import(struct kgsl_syncobj *s, int fd)
802 {
803 kgsl_syncobj_reset(s);
804 if (fd >= 0) {
805 s->state = KGSL_SYNCOBJ_STATE_FD;
806 s->fd = fd;
807 } else {
808 s->state = KGSL_SYNCOBJ_STATE_SIGNALED;
809 }
810
811 return VK_SUCCESS;
812 }
813
814 static int
sync_merge_close(const char * name,int fd1,int fd2,bool close_fd2)815 sync_merge_close(const char *name, int fd1, int fd2, bool close_fd2)
816 {
817 int fd = sync_merge(name, fd1, fd2);
818 if (fd < 0)
819 return -1;
820
821 close(fd1);
822 if (close_fd2)
823 close(fd2);
824
825 return fd;
826 }
827
828 /* Merges multiple kgsl_syncobjs into a single one which is only signalled
829 * after all submitted syncobjs are signalled
830 */
831 static struct kgsl_syncobj
kgsl_syncobj_merge(const struct kgsl_syncobj ** syncobjs,uint32_t count)832 kgsl_syncobj_merge(const struct kgsl_syncobj **syncobjs, uint32_t count)
833 {
834 struct kgsl_syncobj ret;
835 kgsl_syncobj_init(&ret, true);
836
837 if (count == 0)
838 return ret;
839
840 for (uint32_t i = 0; i < count; ++i) {
841 const struct kgsl_syncobj *sync = syncobjs[i];
842
843 switch (sync->state) {
844 case KGSL_SYNCOBJ_STATE_SIGNALED:
845 break;
846
847 case KGSL_SYNCOBJ_STATE_UNSIGNALED:
848 kgsl_syncobj_reset(&ret);
849 return ret;
850
851 case KGSL_SYNCOBJ_STATE_TS:
852 if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
853 if (ret.queue == sync->queue) {
854 ret.timestamp = max_ts(ret.timestamp, sync->timestamp);
855 } else {
856 ret.state = KGSL_SYNCOBJ_STATE_FD;
857 int sync_fd = kgsl_syncobj_ts_to_fd(sync);
858 ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
859 assert(ret.fd >= 0);
860 }
861 } else if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
862 int sync_fd = kgsl_syncobj_ts_to_fd(sync);
863 ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
864 assert(ret.fd >= 0);
865 } else {
866 ret = *sync;
867 }
868 break;
869
870 case KGSL_SYNCOBJ_STATE_FD:
871 if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
872 ret.fd = sync_merge_close("tu_sync", ret.fd, sync->fd, false);
873 assert(ret.fd >= 0);
874 } else if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
875 ret.state = KGSL_SYNCOBJ_STATE_FD;
876 int sync_fd = kgsl_syncobj_ts_to_fd(sync);
877 ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
878 assert(ret.fd >= 0);
879 } else {
880 ret = *sync;
881 ret.fd = dup(ret.fd);
882 assert(ret.fd >= 0);
883 }
884 break;
885
886 default:
887 unreachable("invalid syncobj state");
888 }
889 }
890
891 return ret;
892 }
893
894 struct vk_kgsl_syncobj
895 {
896 struct vk_sync vk;
897 struct kgsl_syncobj syncobj;
898 };
899
900 static VkResult
vk_kgsl_sync_init(struct vk_device * device,struct vk_sync * sync,uint64_t initial_value)901 vk_kgsl_sync_init(struct vk_device *device,
902 struct vk_sync *sync,
903 uint64_t initial_value)
904 {
905 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
906 kgsl_syncobj_init(&s->syncobj, initial_value != 0);
907 return VK_SUCCESS;
908 }
909
910 static void
vk_kgsl_sync_finish(struct vk_device * device,struct vk_sync * sync)911 vk_kgsl_sync_finish(struct vk_device *device, struct vk_sync *sync)
912 {
913 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
914 kgsl_syncobj_destroy(&s->syncobj);
915 }
916
917 static VkResult
vk_kgsl_sync_reset(struct vk_device * device,struct vk_sync * sync)918 vk_kgsl_sync_reset(struct vk_device *device, struct vk_sync *sync)
919 {
920 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
921 kgsl_syncobj_reset(&s->syncobj);
922 return VK_SUCCESS;
923 }
924
925 static VkResult
vk_kgsl_sync_move(struct vk_device * device,struct vk_sync * dst,struct vk_sync * src)926 vk_kgsl_sync_move(struct vk_device *device,
927 struct vk_sync *dst,
928 struct vk_sync *src)
929 {
930 struct vk_kgsl_syncobj *d = container_of(dst, struct vk_kgsl_syncobj, vk);
931 struct vk_kgsl_syncobj *s = container_of(src, struct vk_kgsl_syncobj, vk);
932 kgsl_syncobj_reset(&d->syncobj);
933 d->syncobj = s->syncobj;
934 kgsl_syncobj_init(&s->syncobj, false);
935 return VK_SUCCESS;
936 }
937
938 static VkResult
vk_kgsl_sync_wait(struct vk_device * _device,struct vk_sync * sync,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)939 vk_kgsl_sync_wait(struct vk_device *_device,
940 struct vk_sync *sync,
941 uint64_t wait_value,
942 enum vk_sync_wait_flags wait_flags,
943 uint64_t abs_timeout_ns)
944 {
945 struct tu_device *device = container_of(_device, struct tu_device, vk);
946 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
947
948 if (wait_flags & VK_SYNC_WAIT_PENDING)
949 return VK_SUCCESS;
950
951 return kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
952 }
953
954 static VkResult
vk_kgsl_sync_wait_many(struct vk_device * _device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)955 vk_kgsl_sync_wait_many(struct vk_device *_device,
956 uint32_t wait_count,
957 const struct vk_sync_wait *waits,
958 enum vk_sync_wait_flags wait_flags,
959 uint64_t abs_timeout_ns)
960 {
961 struct tu_device *device = container_of(_device, struct tu_device, vk);
962
963 if (wait_flags & VK_SYNC_WAIT_PENDING)
964 return VK_SUCCESS;
965
966 if (wait_flags & VK_SYNC_WAIT_ANY) {
967 struct kgsl_syncobj *syncobjs[wait_count];
968 for (uint32_t i = 0; i < wait_count; i++) {
969 syncobjs[i] =
970 &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)->syncobj;
971 }
972
973 return kgsl_syncobj_wait_any(device, syncobjs, wait_count,
974 abs_timeout_ns);
975 } else {
976 for (uint32_t i = 0; i < wait_count; i++) {
977 struct vk_kgsl_syncobj *s =
978 container_of(waits[i].sync, struct vk_kgsl_syncobj, vk);
979
980 VkResult result =
981 kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
982 if (result != VK_SUCCESS)
983 return result;
984 }
985 return VK_SUCCESS;
986 }
987 }
988
989 static VkResult
vk_kgsl_sync_import_sync_file(struct vk_device * device,struct vk_sync * sync,int fd)990 vk_kgsl_sync_import_sync_file(struct vk_device *device,
991 struct vk_sync *sync,
992 int fd)
993 {
994 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
995 if (fd >= 0) {
996 fd = dup(fd);
997 if (fd < 0) {
998 mesa_loge("vk_kgsl_sync_import_sync_file: dup failed: %s",
999 strerror(errno));
1000 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1001 }
1002 }
1003 return kgsl_syncobj_import(&s->syncobj, fd);
1004 }
1005
1006 static VkResult
vk_kgsl_sync_export_sync_file(struct vk_device * device,struct vk_sync * sync,int * pFd)1007 vk_kgsl_sync_export_sync_file(struct vk_device *device,
1008 struct vk_sync *sync,
1009 int *pFd)
1010 {
1011 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
1012 return kgsl_syncobj_export(&s->syncobj, pFd);
1013 }
1014
1015 const struct vk_sync_type vk_kgsl_sync_type = {
1016 .size = sizeof(struct vk_kgsl_syncobj),
1017 .features = (enum vk_sync_features)
1018 (VK_SYNC_FEATURE_BINARY |
1019 VK_SYNC_FEATURE_GPU_WAIT |
1020 VK_SYNC_FEATURE_GPU_MULTI_WAIT |
1021 VK_SYNC_FEATURE_CPU_WAIT |
1022 VK_SYNC_FEATURE_CPU_RESET |
1023 VK_SYNC_FEATURE_WAIT_ANY |
1024 VK_SYNC_FEATURE_WAIT_PENDING),
1025 .init = vk_kgsl_sync_init,
1026 .finish = vk_kgsl_sync_finish,
1027 .reset = vk_kgsl_sync_reset,
1028 .move = vk_kgsl_sync_move,
1029 .wait = vk_kgsl_sync_wait,
1030 .wait_many = vk_kgsl_sync_wait_many,
1031 .import_sync_file = vk_kgsl_sync_import_sync_file,
1032 .export_sync_file = vk_kgsl_sync_export_sync_file,
1033 };
1034
1035 struct tu_kgsl_queue_submit {
1036 struct util_dynarray commands;
1037 };
1038
1039 static void *
kgsl_submit_create(struct tu_device * device)1040 kgsl_submit_create(struct tu_device *device)
1041 {
1042 return vk_zalloc(&device->vk.alloc, sizeof(struct tu_kgsl_queue_submit), 8,
1043 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1044 }
1045
1046 static void
kgsl_submit_finish(struct tu_device * device,void * _submit)1047 kgsl_submit_finish(struct tu_device *device,
1048 void *_submit)
1049 {
1050 struct tu_kgsl_queue_submit *submit =
1051 (struct tu_kgsl_queue_submit *)_submit;
1052
1053 util_dynarray_fini(&submit->commands);
1054 vk_free(&device->vk.alloc, submit);
1055 }
1056
1057 static void
kgsl_submit_add_entries(struct tu_device * device,void * _submit,struct tu_cs_entry * entries,unsigned num_entries)1058 kgsl_submit_add_entries(struct tu_device *device, void *_submit,
1059 struct tu_cs_entry *entries, unsigned num_entries)
1060 {
1061 struct tu_kgsl_queue_submit *submit =
1062 (struct tu_kgsl_queue_submit *)_submit;
1063
1064 struct kgsl_command_object *cmds = (struct kgsl_command_object *)
1065 util_dynarray_grow(&submit->commands, struct kgsl_command_object,
1066 num_entries);
1067
1068 for (unsigned i = 0; i < num_entries; i++) {
1069 cmds[i] = (struct kgsl_command_object) {
1070 .gpuaddr = entries[i].bo->iova + entries[i].offset,
1071 .size = entries[i].size,
1072 .flags = KGSL_CMDLIST_IB,
1073 .id = entries[i].bo->gem_handle,
1074 };
1075 }
1076 }
1077
1078 static VkResult
kgsl_queue_submit(struct tu_queue * queue,void * _submit,struct vk_sync_wait * waits,uint32_t wait_count,struct vk_sync_signal * signals,uint32_t signal_count,struct tu_u_trace_submission_data * u_trace_submission_data)1079 kgsl_queue_submit(struct tu_queue *queue, void *_submit,
1080 struct vk_sync_wait *waits, uint32_t wait_count,
1081 struct vk_sync_signal *signals, uint32_t signal_count,
1082 struct tu_u_trace_submission_data *u_trace_submission_data)
1083 {
1084 struct tu_kgsl_queue_submit *submit =
1085 (struct tu_kgsl_queue_submit *)_submit;
1086
1087 #if HAVE_PERFETTO
1088 uint64_t start_ts = tu_perfetto_begin_submit();
1089 #endif
1090
1091 if (submit->commands.size == 0) {
1092 const struct kgsl_syncobj *wait_semaphores[wait_count + 1];
1093 for (uint32_t i = 0; i < wait_count; i++) {
1094 wait_semaphores[i] = &container_of(waits[i].sync,
1095 struct vk_kgsl_syncobj, vk)
1096 ->syncobj;
1097 }
1098
1099 struct kgsl_syncobj last_submit_sync;
1100 if (queue->fence >= 0)
1101 last_submit_sync = (struct kgsl_syncobj) {
1102 .state = KGSL_SYNCOBJ_STATE_TS,
1103 .queue = queue,
1104 .timestamp = queue->fence,
1105 };
1106 else
1107 last_submit_sync = (struct kgsl_syncobj) {
1108 .state = KGSL_SYNCOBJ_STATE_SIGNALED,
1109 };
1110
1111 wait_semaphores[wait_count] = &last_submit_sync;
1112
1113 struct kgsl_syncobj wait_sync =
1114 kgsl_syncobj_merge(wait_semaphores, wait_count + 1);
1115 assert(wait_sync.state !=
1116 KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1117
1118 for (uint32_t i = 0; i < signal_count; i++) {
1119 struct kgsl_syncobj *signal_sync =
1120 &container_of(signals[i].sync, struct vk_kgsl_syncobj, vk)
1121 ->syncobj;
1122
1123 kgsl_syncobj_reset(signal_sync);
1124 *signal_sync = wait_sync;
1125 }
1126
1127 return VK_SUCCESS;
1128 }
1129
1130 VkResult result = VK_SUCCESS;
1131
1132 if (u_trace_submission_data) {
1133 mtx_lock(&queue->device->kgsl_profiling_mutex);
1134 tu_suballoc_bo_alloc(&u_trace_submission_data->kgsl_timestamp_bo,
1135 &queue->device->kgsl_profiling_suballoc,
1136 sizeof(struct kgsl_cmdbatch_profiling_buffer), 4);
1137 mtx_unlock(&queue->device->kgsl_profiling_mutex);
1138 }
1139
1140 uint32_t obj_count = 0;
1141 if (u_trace_submission_data)
1142 obj_count++;
1143
1144 struct kgsl_command_object *objs = (struct kgsl_command_object *)
1145 vk_alloc(&queue->device->vk.alloc, sizeof(*objs) * obj_count,
1146 alignof(*objs), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1147
1148 struct kgsl_cmdbatch_profiling_buffer *profiling_buffer = NULL;
1149 uint32_t obj_idx = 0;
1150 if (u_trace_submission_data) {
1151 struct tu_suballoc_bo *bo = &u_trace_submission_data->kgsl_timestamp_bo;
1152
1153 objs[obj_idx++] = (struct kgsl_command_object) {
1154 .offset = bo->iova - bo->bo->iova,
1155 .gpuaddr = bo->bo->iova,
1156 .size = sizeof(struct kgsl_cmdbatch_profiling_buffer),
1157 .flags = KGSL_OBJLIST_MEMOBJ | KGSL_OBJLIST_PROFILE,
1158 .id = bo->bo->gem_handle,
1159 };
1160 profiling_buffer =
1161 (struct kgsl_cmdbatch_profiling_buffer *) tu_suballoc_bo_map(bo);
1162 memset(profiling_buffer, 0, sizeof(*profiling_buffer));
1163 }
1164
1165 const struct kgsl_syncobj *wait_semaphores[wait_count];
1166 for (uint32_t i = 0; i < wait_count; i++) {
1167 wait_semaphores[i] =
1168 &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)
1169 ->syncobj;
1170 }
1171
1172 struct kgsl_syncobj wait_sync =
1173 kgsl_syncobj_merge(wait_semaphores, wait_count);
1174 assert(wait_sync.state !=
1175 KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1176
1177 struct kgsl_cmd_syncpoint_timestamp ts;
1178 struct kgsl_cmd_syncpoint_fence fn;
1179 struct kgsl_command_syncpoint sync = { 0 };
1180 bool has_sync = false;
1181 switch (wait_sync.state) {
1182 case KGSL_SYNCOBJ_STATE_SIGNALED:
1183 break;
1184
1185 case KGSL_SYNCOBJ_STATE_TS:
1186 ts.context_id = wait_sync.queue->msm_queue_id;
1187 ts.timestamp = wait_sync.timestamp;
1188
1189 has_sync = true;
1190 sync.type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP;
1191 sync.priv = (uintptr_t) &ts;
1192 sync.size = sizeof(ts);
1193 break;
1194
1195 case KGSL_SYNCOBJ_STATE_FD:
1196 fn.fd = wait_sync.fd;
1197
1198 has_sync = true;
1199 sync.type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
1200 sync.priv = (uintptr_t) &fn;
1201 sync.size = sizeof(fn);
1202 break;
1203
1204 default:
1205 unreachable("invalid syncobj state");
1206 }
1207
1208 struct kgsl_gpu_command req = {
1209 .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
1210 .cmdlist = (uintptr_t) submit->commands.data,
1211 .cmdsize = sizeof(struct kgsl_command_object),
1212 .numcmds = util_dynarray_num_elements(&submit->commands,
1213 struct kgsl_command_object),
1214 .synclist = (uintptr_t) &sync,
1215 .syncsize = sizeof(sync),
1216 .numsyncs = has_sync != 0 ? 1 : 0,
1217 .context_id = queue->msm_queue_id,
1218 };
1219
1220 if (obj_idx) {
1221 req.flags |= KGSL_CMDBATCH_PROFILING;
1222 req.objlist = (uintptr_t) objs;
1223 req.objsize = sizeof(struct kgsl_command_object);
1224 req.numobjs = obj_idx;
1225 }
1226
1227 int ret = safe_ioctl(queue->device->physical_device->local_fd,
1228 IOCTL_KGSL_GPU_COMMAND, &req);
1229
1230 uint64_t gpu_offset = 0;
1231 #if HAVE_PERFETTO
1232 if (profiling_buffer) {
1233 /* We need to wait for KGSL to queue the GPU command before we can read
1234 * the timestamp. Since this is just for profiling and doesn't take too
1235 * long, we can just busy-wait for it.
1236 */
1237 while (p_atomic_read(&profiling_buffer->gpu_ticks_queued) == 0);
1238
1239 struct kgsl_perfcounter_read_group perf = {
1240 .groupid = KGSL_PERFCOUNTER_GROUP_ALWAYSON,
1241 .countable = 0,
1242 .value = 0
1243 };
1244
1245 struct kgsl_perfcounter_read req = {
1246 .reads = &perf,
1247 .count = 1,
1248 };
1249
1250 ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_PERFCOUNTER_READ, &req);
1251 /* Older KGSL has some kind of garbage in upper 32 bits */
1252 uint64_t offseted_gpu_ts = perf.value & 0xffffffff;
1253
1254 gpu_offset = tu_device_ticks_to_ns(
1255 queue->device, offseted_gpu_ts - profiling_buffer->gpu_ticks_queued);
1256
1257 struct tu_perfetto_clocks clocks = {
1258 .cpu = profiling_buffer->wall_clock_ns,
1259 .gpu_ts = tu_device_ticks_to_ns(queue->device,
1260 profiling_buffer->gpu_ticks_queued),
1261 .gpu_ts_offset = gpu_offset,
1262 };
1263
1264 clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
1265 start_ts, &clocks);
1266 gpu_offset = clocks.gpu_ts_offset;
1267 }
1268 #endif
1269
1270 kgsl_syncobj_destroy(&wait_sync);
1271
1272 if (ret) {
1273 result = vk_device_set_lost(&queue->device->vk, "submit failed: %s\n",
1274 strerror(errno));
1275 goto fail_submit;
1276 }
1277
1278 p_atomic_set(&queue->fence, req.timestamp);
1279
1280 for (uint32_t i = 0; i < signal_count; i++) {
1281 struct kgsl_syncobj *signal_sync =
1282 &container_of(signals[i].sync, struct vk_kgsl_syncobj, vk)
1283 ->syncobj;
1284
1285 kgsl_syncobj_reset(signal_sync);
1286 signal_sync->state = KGSL_SYNCOBJ_STATE_TS;
1287 signal_sync->queue = queue;
1288 signal_sync->timestamp = req.timestamp;
1289 }
1290
1291 if (u_trace_submission_data) {
1292 struct tu_u_trace_submission_data *submission_data =
1293 u_trace_submission_data;
1294 submission_data->gpu_ts_offset = gpu_offset;
1295 }
1296
1297 fail_submit:
1298 if (result != VK_SUCCESS) {
1299 mtx_lock(&queue->device->kgsl_profiling_mutex);
1300 tu_suballoc_bo_free(&queue->device->kgsl_profiling_suballoc,
1301 &u_trace_submission_data->kgsl_timestamp_bo);
1302 mtx_unlock(&queue->device->kgsl_profiling_mutex);
1303 }
1304
1305 return result;
1306 }
1307
1308 static VkResult
kgsl_device_init(struct tu_device * dev)1309 kgsl_device_init(struct tu_device *dev)
1310 {
1311 dev->fd = dev->physical_device->local_fd;
1312 return VK_SUCCESS;
1313 }
1314
1315 static void
kgsl_device_finish(struct tu_device * dev)1316 kgsl_device_finish(struct tu_device *dev)
1317 {
1318 /* No-op */
1319 }
1320
1321 static int
kgsl_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)1322 kgsl_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
1323 {
1324 unreachable("");
1325 return 0;
1326 }
1327
1328 static int
kgsl_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)1329 kgsl_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
1330 {
1331 /* kgsl doesn't have a way to get it */
1332 *suspend_count = 0;
1333 return 0;
1334 }
1335
1336 static VkResult
kgsl_device_check_status(struct tu_device * device)1337 kgsl_device_check_status(struct tu_device *device)
1338 {
1339 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1340 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1341 /* KGSL's KGSL_PROP_GPU_RESET_STAT takes the u32 msm_queue_id and returns a
1342 * KGSL_CTX_STAT_* for the worst reset that happened since the last time it
1343 * was queried on that queue.
1344 */
1345 uint32_t value = device->queues[i][q].msm_queue_id;
1346 VkResult status = get_kgsl_prop(device->fd, KGSL_PROP_GPU_RESET_STAT,
1347 &value, sizeof(value));
1348 if (status != VK_SUCCESS)
1349 return vk_device_set_lost(&device->vk, "Failed to get GPU reset status");
1350
1351 if (value != KGSL_CTX_STAT_NO_ERROR &&
1352 value != KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT) {
1353 return vk_device_set_lost(&device->vk, "GPU faulted or hung");
1354 }
1355 }
1356 }
1357
1358 return VK_SUCCESS;
1359 }
1360
1361 static const struct tu_knl kgsl_knl_funcs = {
1362 .name = "kgsl",
1363
1364 .device_init = kgsl_device_init,
1365 .device_finish = kgsl_device_finish,
1366 .device_get_gpu_timestamp = kgsl_device_get_gpu_timestamp,
1367 .device_get_suspend_count = kgsl_device_get_suspend_count,
1368 .device_check_status = kgsl_device_check_status,
1369 .submitqueue_new = kgsl_submitqueue_new,
1370 .submitqueue_close = kgsl_submitqueue_close,
1371 .bo_init = kgsl_bo_init,
1372 .bo_init_dmabuf = kgsl_bo_init_dmabuf,
1373 .bo_export_dmabuf = kgsl_bo_export_dmabuf,
1374 .bo_map = kgsl_bo_map,
1375 .bo_allow_dump = kgsl_bo_allow_dump,
1376 .bo_finish = kgsl_bo_finish,
1377 .submit_create = kgsl_submit_create,
1378 .submit_finish = kgsl_submit_finish,
1379 .submit_add_entries = kgsl_submit_add_entries,
1380 .queue_submit = kgsl_queue_submit,
1381 .queue_wait_fence = kgsl_queue_wait_fence,
1382 };
1383
1384 VkResult
tu_knl_kgsl_load(struct tu_instance * instance,int fd)1385 tu_knl_kgsl_load(struct tu_instance *instance, int fd)
1386 {
1387 if (instance->vk.enabled_extensions.KHR_display) {
1388 return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1389 "I can't KHR_display");
1390 }
1391
1392 struct tu_physical_device *device = (struct tu_physical_device *)
1393 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1394 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1395 if (!device) {
1396 close(fd);
1397 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1398 }
1399
1400 static const char dma_heap_path[] = "/dev/dma_heap/system";
1401 static const char ion_path[] = "/dev/ion";
1402 int dma_fd;
1403
1404 dma_fd = open(dma_heap_path, O_RDONLY);
1405 if (dma_fd >= 0) {
1406 device->kgsl_dma_type = TU_KGSL_DMA_TYPE_DMAHEAP;
1407 } else {
1408 dma_fd = open(ion_path, O_RDONLY);
1409 if (dma_fd >= 0) {
1410 /* ION_IOC_FREE available only for legacy ION */
1411 struct ion_handle_data free = { .handle = 0 };
1412 if (safe_ioctl(dma_fd, ION_IOC_FREE, &free) >= 0 || errno != ENOTTY)
1413 device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION_LEGACY;
1414 else
1415 device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION;
1416 } else {
1417 mesa_logw(
1418 "Unable to open neither %s nor %s, VK_KHR_external_memory_fd would be "
1419 "unavailable: %s",
1420 dma_heap_path, ion_path, strerror(errno));
1421 }
1422 }
1423
1424 VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1425
1426 struct kgsl_devinfo info;
1427 if (get_kgsl_prop(fd, KGSL_PROP_DEVICE_INFO, &info, sizeof(info)))
1428 goto fail;
1429
1430 uint64_t gmem_iova;
1431 if (get_kgsl_prop(fd, KGSL_PROP_UCHE_GMEM_VADDR, &gmem_iova, sizeof(gmem_iova)))
1432 goto fail;
1433
1434 uint32_t highest_bank_bit;
1435 if (get_kgsl_prop(fd, KGSL_PROP_HIGHEST_BANK_BIT, &highest_bank_bit,
1436 sizeof(highest_bank_bit)))
1437 goto fail;
1438
1439 uint32_t ubwc_version;
1440 if (get_kgsl_prop(fd, KGSL_PROP_UBWC_MODE, &ubwc_version,
1441 sizeof(ubwc_version)))
1442 goto fail;
1443
1444
1445 /* kgsl version check? */
1446
1447 device->instance = instance;
1448 device->master_fd = -1;
1449 device->local_fd = fd;
1450 device->kgsl_dma_fd = dma_fd;
1451
1452 device->dev_id.gpu_id =
1453 ((info.chip_id >> 24) & 0xff) * 100 +
1454 ((info.chip_id >> 16) & 0xff) * 10 +
1455 ((info.chip_id >> 8) & 0xff);
1456 device->dev_id.chip_id = info.chip_id;
1457 device->gmem_size = debug_get_num_option("TU_GMEM", info.gmem_sizebytes);
1458 device->gmem_base = gmem_iova;
1459
1460 device->submitqueue_priority_count = 1;
1461
1462 device->timeline_type = vk_sync_timeline_get_type(&vk_kgsl_sync_type);
1463
1464 device->sync_types[0] = &vk_kgsl_sync_type;
1465 device->sync_types[1] = &device->timeline_type.sync;
1466 device->sync_types[2] = NULL;
1467
1468 device->heap.size = tu_get_system_heap_size(device);
1469 device->heap.used = 0u;
1470 device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1471
1472 device->has_set_iova = kgsl_is_memory_type_supported(
1473 fd, KGSL_MEMFLAGS_USE_CPU_MAP);
1474
1475 /* Even if kernel is new enough, the GPU itself may not support it. */
1476 device->has_cached_coherent_memory = kgsl_is_memory_type_supported(
1477 fd, KGSL_MEMFLAGS_IOCOHERENT |
1478 (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT));
1479
1480 /* preemption is always supported on kgsl */
1481 device->has_preemption = true;
1482
1483 device->ubwc_config.highest_bank_bit = highest_bank_bit;
1484
1485 /* The other config values can be partially inferred from the UBWC version,
1486 * but kgsl also hardcodes overrides for specific a6xx versions that we
1487 * have to follow here. Yuck.
1488 */
1489 switch (ubwc_version) {
1490 case KGSL_UBWC_1_0:
1491 device->ubwc_config.bank_swizzle_levels = 0x7;
1492 device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1493 break;
1494 case KGSL_UBWC_2_0:
1495 device->ubwc_config.bank_swizzle_levels = 0x6;
1496 device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1497 break;
1498 case KGSL_UBWC_3_0:
1499 device->ubwc_config.bank_swizzle_levels = 0x6;
1500 device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1501 break;
1502 case KGSL_UBWC_4_0:
1503 device->ubwc_config.bank_swizzle_levels = 0x6;
1504 device->ubwc_config.macrotile_mode = FDL_MACROTILE_8_CHANNEL;
1505 break;
1506 default:
1507 return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1508 "unknown UBWC version 0x%x", ubwc_version);
1509 }
1510
1511 /* kgsl unfortunately hardcodes some settings for certain GPUs and doesn't
1512 * expose them in the uAPI so hardcode them here to match.
1513 */
1514 if (device->dev_id.gpu_id == 663 || device->dev_id.gpu_id == 680) {
1515 device->ubwc_config.macrotile_mode = FDL_MACROTILE_8_CHANNEL;
1516 }
1517 if (device->dev_id.gpu_id == 663) {
1518 /* level2_swizzling_dis = 1 */
1519 device->ubwc_config.bank_swizzle_levels = 0x4;
1520 }
1521
1522 instance->knl = &kgsl_knl_funcs;
1523
1524 result = tu_physical_device_init(device, instance);
1525 if (result != VK_SUCCESS)
1526 goto fail;
1527
1528 list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
1529
1530 return VK_SUCCESS;
1531
1532 fail:
1533 vk_free(&instance->vk.alloc, device);
1534 close(fd);
1535 if (dma_fd >= 0)
1536 close(dma_fd);
1537 return result;
1538 }
1539