1 /*
2 * Copyright © 2020 Google, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "tu_knl.h"
7
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <poll.h>
11 #include <stdint.h>
12 #include <sys/ioctl.h>
13 #include <sys/mman.h>
14 #include <linux/dma-heap.h>
15
16 #define __user
17 #include "msm_kgsl.h"
18 #include "ion/ion.h"
19 #include "ion/ion_4.19.h"
20
21 #include "vk_util.h"
22
23 #include "util/os_file.h"
24 #include "util/u_debug.h"
25 #include "util/u_vector.h"
26 #include "util/libsync.h"
27 #include "util/timespec.h"
28
29 #include "tu_cmd_buffer.h"
30 #include "tu_cs.h"
31 #include "tu_device.h"
32 #include "tu_dynamic_rendering.h"
33 #include "tu_queue.h"
34 #include "tu_rmv.h"
35
36 /* ION_HEAP(ION_SYSTEM_HEAP_ID) */
37 #define KGSL_ION_SYSTEM_HEAP_MASK (1u << 25)
38
39
40 static int
safe_ioctl(int fd,unsigned long request,void * arg)41 safe_ioctl(int fd, unsigned long request, void *arg)
42 {
43 int ret;
44
45 do {
46 ret = ioctl(fd, request, arg);
47 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
48
49 return ret;
50 }
51
52 static int
kgsl_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)53 kgsl_submitqueue_new(struct tu_device *dev,
54 int priority,
55 uint32_t *queue_id)
56 {
57 struct kgsl_drawctxt_create req = {
58 .flags = KGSL_CONTEXT_SAVE_GMEM |
59 KGSL_CONTEXT_NO_GMEM_ALLOC |
60 KGSL_CONTEXT_PREAMBLE,
61 };
62
63 int ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_CREATE, &req);
64 if (ret)
65 return ret;
66
67 *queue_id = req.drawctxt_id;
68
69 return 0;
70 }
71
72 static void
kgsl_submitqueue_close(struct tu_device * dev,uint32_t queue_id)73 kgsl_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
74 {
75 struct kgsl_drawctxt_destroy req = {
76 .drawctxt_id = queue_id,
77 };
78
79 safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req);
80 }
81
82 static void kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo);
83
84 static VkResult
bo_init_new_dmaheap(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)85 bo_init_new_dmaheap(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
86 enum tu_bo_alloc_flags flags)
87 {
88 struct dma_heap_allocation_data alloc = {
89 .len = size,
90 .fd_flags = O_RDWR | O_CLOEXEC,
91 };
92
93 int ret;
94 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, DMA_HEAP_IOCTL_ALLOC,
95 &alloc);
96
97 if (ret) {
98 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
99 "DMA_HEAP_IOCTL_ALLOC failed (%s)", strerror(errno));
100 }
101
102 return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
103 }
104
105 static VkResult
bo_init_new_ion(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)106 bo_init_new_ion(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
107 enum tu_bo_alloc_flags flags)
108 {
109 struct ion_new_allocation_data alloc = {
110 .len = size,
111 .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
112 .flags = 0,
113 .fd = -1,
114 };
115
116 int ret;
117 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_NEW_ALLOC, &alloc);
118 if (ret) {
119 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
120 "ION_IOC_NEW_ALLOC failed (%s)", strerror(errno));
121 }
122
123 return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
124 }
125
126 static VkResult
bo_init_new_ion_legacy(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)127 bo_init_new_ion_legacy(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
128 enum tu_bo_alloc_flags flags)
129 {
130 struct ion_allocation_data alloc = {
131 .len = size,
132 .align = 4096,
133 .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
134 .flags = 0,
135 .handle = -1,
136 };
137
138 int ret;
139 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_ALLOC, &alloc);
140 if (ret) {
141 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
142 "ION_IOC_ALLOC failed (%s)", strerror(errno));
143 }
144
145 struct ion_fd_data share = {
146 .handle = alloc.handle,
147 .fd = -1,
148 };
149
150 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_SHARE, &share);
151 if (ret) {
152 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
153 "ION_IOC_SHARE failed (%s)", strerror(errno));
154 }
155
156 struct ion_handle_data free = {
157 .handle = alloc.handle,
158 };
159 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_FREE, &free);
160 if (ret) {
161 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
162 "ION_IOC_FREE failed (%s)", strerror(errno));
163 }
164
165 return tu_bo_init_dmabuf(dev, out_bo, -1, share.fd);
166 }
167
168 static VkResult
kgsl_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)169 kgsl_bo_init(struct tu_device *dev,
170 struct vk_object_base *base,
171 struct tu_bo **out_bo,
172 uint64_t size,
173 uint64_t client_iova,
174 VkMemoryPropertyFlags mem_property,
175 enum tu_bo_alloc_flags flags,
176 const char *name)
177 {
178 if (flags & TU_BO_ALLOC_SHAREABLE) {
179 /* The Vulkan spec doesn't forbid allocating exportable memory with a
180 * fixed address, only imported memory, but on kgsl we can't sensibly
181 * implement it so just always reject it.
182 */
183 if (client_iova) {
184 return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
185 "cannot allocate an exportable BO with a fixed address");
186 }
187
188 switch(dev->physical_device->kgsl_dma_type) {
189 case TU_KGSL_DMA_TYPE_DMAHEAP:
190 return bo_init_new_dmaheap(dev, out_bo, size, flags);
191 case TU_KGSL_DMA_TYPE_ION:
192 return bo_init_new_ion(dev, out_bo, size, flags);
193 case TU_KGSL_DMA_TYPE_ION_LEGACY:
194 return bo_init_new_ion_legacy(dev, out_bo, size, flags);
195 }
196 }
197
198 struct kgsl_gpumem_alloc_id req = {
199 .size = size,
200 };
201
202 if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
203 if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
204 req.flags |= KGSL_MEMFLAGS_IOCOHERENT;
205 }
206
207 req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT;
208 } else {
209 req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT;
210 }
211
212 if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
213 req.flags |= KGSL_MEMFLAGS_GPUREADONLY;
214
215 if (flags & TU_BO_ALLOC_REPLAYABLE)
216 req.flags |= KGSL_MEMFLAGS_USE_CPU_MAP;
217
218 int ret;
219
220 ret = safe_ioctl(dev->physical_device->local_fd,
221 IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
222 if (ret) {
223 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
224 "GPUMEM_ALLOC_ID failed (%s)", strerror(errno));
225 }
226
227 struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
228 assert(bo && bo->gem_handle == 0);
229
230 *bo = (struct tu_bo) {
231 .gem_handle = req.id,
232 .size = req.mmapsize,
233 .iova = req.gpuaddr,
234 .name = tu_debug_bos_add(dev, req.mmapsize, name),
235 .refcnt = 1,
236 .shared_fd = -1,
237 .base = base,
238 };
239
240 if (flags & TU_BO_ALLOC_REPLAYABLE) {
241 uint64_t offset = req.id << 12;
242 void *map = mmap((void *)client_iova, bo->size, PROT_READ | PROT_WRITE,
243 MAP_SHARED, dev->physical_device->local_fd, offset);
244 if (map == MAP_FAILED) {
245 kgsl_bo_finish(dev, bo);
246
247 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
248 "mmap failed (%s)", strerror(errno));
249 }
250
251 if (client_iova && (uint64_t)map != client_iova) {
252 kgsl_bo_finish(dev, bo);
253
254 return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
255 "mmap could not map the given address");
256 }
257
258 bo->map = map;
259 bo->iova = (uint64_t)map;
260
261 /* Because we're using SVM, the CPU mapping and GPU mapping are the same
262 * and the CPU mapping must stay fixed for the lifetime of the BO.
263 */
264 bo->never_unmap = true;
265 }
266
267 tu_dump_bo_init(dev, bo);
268
269 *out_bo = bo;
270
271 TU_RMV(bo_allocate, dev, bo);
272 if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) {
273 TU_RMV(internal_resource_create, dev, bo);
274 TU_RMV(resource_name, dev, bo, name);
275 }
276
277 return VK_SUCCESS;
278 }
279
280 static VkResult
kgsl_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int fd)281 kgsl_bo_init_dmabuf(struct tu_device *dev,
282 struct tu_bo **out_bo,
283 uint64_t size,
284 int fd)
285 {
286 struct kgsl_gpuobj_import_dma_buf import_dmabuf = {
287 .fd = fd,
288 };
289 struct kgsl_gpuobj_import req = {
290 .priv = (uintptr_t)&import_dmabuf,
291 .priv_len = sizeof(import_dmabuf),
292 .flags = 0,
293 .type = KGSL_USER_MEM_TYPE_DMABUF,
294 };
295 int ret;
296
297 ret = safe_ioctl(dev->physical_device->local_fd,
298 IOCTL_KGSL_GPUOBJ_IMPORT, &req);
299 if (ret)
300 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
301 "Failed to import dma-buf (%s)\n", strerror(errno));
302
303 struct kgsl_gpuobj_info info_req = {
304 .id = req.id,
305 };
306
307 ret = safe_ioctl(dev->physical_device->local_fd,
308 IOCTL_KGSL_GPUOBJ_INFO, &info_req);
309 if (ret)
310 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
311 "Failed to get dma-buf info (%s)\n", strerror(errno));
312
313 struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
314 assert(bo && bo->gem_handle == 0);
315
316 *bo = (struct tu_bo) {
317 .gem_handle = req.id,
318 .size = info_req.size,
319 .iova = info_req.gpuaddr,
320 .name = tu_debug_bos_add(dev, info_req.size, "dmabuf"),
321 .refcnt = 1,
322 .shared_fd = os_dupfd_cloexec(fd),
323 };
324
325 tu_dump_bo_init(dev, bo);
326
327 *out_bo = bo;
328
329 return VK_SUCCESS;
330 }
331
332 static int
kgsl_bo_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)333 kgsl_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
334 {
335 assert(bo->shared_fd != -1);
336 return os_dupfd_cloexec(bo->shared_fd);
337 }
338
339 static VkResult
kgsl_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)340 kgsl_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
341 {
342 void *map = MAP_FAILED;
343 if (bo->shared_fd == -1) {
344 uint64_t offset = bo->gem_handle << 12;
345 map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
346 MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
347 dev->physical_device->local_fd, offset);
348 } else {
349 map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
350 MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
351 bo->shared_fd, 0);
352 }
353
354 if (map == MAP_FAILED)
355 return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
356
357 bo->map = map;
358 TU_RMV(bo_map, dev, bo);
359
360 return VK_SUCCESS;
361 }
362
363 static void
kgsl_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)364 kgsl_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
365 {
366 }
367
368 static void
kgsl_bo_finish(struct tu_device * dev,struct tu_bo * bo)369 kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo)
370 {
371 assert(bo->gem_handle);
372
373 if (!p_atomic_dec_zero(&bo->refcnt))
374 return;
375
376 if (bo->map) {
377 TU_RMV(bo_unmap, dev, bo);
378 munmap(bo->map, bo->size);
379 }
380
381 if (bo->shared_fd != -1)
382 close(bo->shared_fd);
383
384 TU_RMV(bo_destroy, dev, bo);
385 tu_debug_bos_del(dev, bo);
386 tu_dump_bo_del(dev, bo);
387
388 struct kgsl_gpumem_free_id req = {
389 .id = bo->gem_handle
390 };
391
392 /* Tell sparse array that entry is free */
393 memset(bo, 0, sizeof(*bo));
394
395 safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
396 }
397
398 static VkResult
get_kgsl_prop(int fd,unsigned int type,void * value,size_t size)399 get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
400 {
401 struct kgsl_device_getproperty getprop = {
402 .type = type,
403 .value = value,
404 .sizebytes = size,
405 };
406
407 return safe_ioctl(fd, IOCTL_KGSL_DEVICE_GETPROPERTY, &getprop)
408 ? VK_ERROR_UNKNOWN
409 : VK_SUCCESS;
410 }
411
412 static bool
kgsl_is_memory_type_supported(int fd,uint32_t flags)413 kgsl_is_memory_type_supported(int fd, uint32_t flags)
414 {
415 struct kgsl_gpumem_alloc_id req_alloc = {
416 .flags = flags,
417 .size = 0x1000,
418 };
419
420 int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc);
421 if (ret) {
422 return false;
423 }
424
425 struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id };
426
427 safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free);
428
429 return true;
430 }
431
432 enum kgsl_syncobj_state {
433 KGSL_SYNCOBJ_STATE_UNSIGNALED,
434 KGSL_SYNCOBJ_STATE_SIGNALED,
435 KGSL_SYNCOBJ_STATE_TS,
436 KGSL_SYNCOBJ_STATE_FD,
437 };
438
439 struct kgsl_syncobj
440 {
441 struct vk_object_base base;
442 enum kgsl_syncobj_state state;
443
444 struct tu_queue *queue;
445 uint32_t timestamp;
446
447 int fd;
448 };
449
450 static void
kgsl_syncobj_init(struct kgsl_syncobj * s,bool signaled)451 kgsl_syncobj_init(struct kgsl_syncobj *s, bool signaled)
452 {
453 s->state =
454 signaled ? KGSL_SYNCOBJ_STATE_SIGNALED : KGSL_SYNCOBJ_STATE_UNSIGNALED;
455
456 s->timestamp = UINT32_MAX;
457 s->fd = -1;
458 }
459
460 static void
kgsl_syncobj_reset(struct kgsl_syncobj * s)461 kgsl_syncobj_reset(struct kgsl_syncobj *s)
462 {
463 if (s->state == KGSL_SYNCOBJ_STATE_FD && s->fd >= 0) {
464 ASSERTED int ret = close(s->fd);
465 assert(ret == 0);
466 s->fd = -1;
467 } else if (s->state == KGSL_SYNCOBJ_STATE_TS) {
468 s->timestamp = UINT32_MAX;
469 }
470
471 s->state = KGSL_SYNCOBJ_STATE_UNSIGNALED;
472 }
473
474 static void
kgsl_syncobj_destroy(struct kgsl_syncobj * s)475 kgsl_syncobj_destroy(struct kgsl_syncobj *s)
476 {
477 kgsl_syncobj_reset(s);
478 }
479
480 static int
timestamp_to_fd(struct tu_queue * queue,uint32_t timestamp)481 timestamp_to_fd(struct tu_queue *queue, uint32_t timestamp)
482 {
483 int fd;
484 struct kgsl_timestamp_event event = {
485 .type = KGSL_TIMESTAMP_EVENT_FENCE,
486 .timestamp = timestamp,
487 .context_id = queue->msm_queue_id,
488 .priv = &fd,
489 .len = sizeof(fd),
490 };
491
492 int ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_TIMESTAMP_EVENT, &event);
493 if (ret)
494 return -1;
495
496 return fd;
497 }
498
499 static int
kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj * syncobj)500 kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj *syncobj)
501 {
502 assert(syncobj->state == KGSL_SYNCOBJ_STATE_TS);
503 return timestamp_to_fd(syncobj->queue, syncobj->timestamp);
504 }
505
506 /* return true if timestamp a is greater (more recent) then b
507 * this relies on timestamps never having a difference > (1<<31)
508 */
509 static inline bool
timestamp_cmp(uint32_t a,uint32_t b)510 timestamp_cmp(uint32_t a, uint32_t b)
511 {
512 return (int32_t) (a - b) >= 0;
513 }
514
515 static uint32_t
max_ts(uint32_t a,uint32_t b)516 max_ts(uint32_t a, uint32_t b)
517 {
518 return timestamp_cmp(a, b) ? a : b;
519 }
520
521 static uint32_t
min_ts(uint32_t a,uint32_t b)522 min_ts(uint32_t a, uint32_t b)
523 {
524 return timestamp_cmp(a, b) ? b : a;
525 }
526
527 static int
get_relative_ms(uint64_t abs_timeout_ns)528 get_relative_ms(uint64_t abs_timeout_ns)
529 {
530 if (abs_timeout_ns >= INT64_MAX)
531 /* We can assume that a wait with a value this high is a forever wait
532 * and return -1 here as it's the infinite timeout for ppoll() while
533 * being the highest unsigned integer value for the wait KGSL IOCTL
534 */
535 return -1;
536
537 uint64_t cur_time_ms = os_time_get_nano() / 1000000;
538 uint64_t abs_timeout_ms = abs_timeout_ns / 1000000;
539 if (abs_timeout_ms <= cur_time_ms)
540 return 0;
541
542 return abs_timeout_ms - cur_time_ms;
543 }
544
545 /* safe_ioctl is not enough as restarted waits would not adjust the timeout
546 * which could lead to waiting substantially longer than requested
547 */
548 static VkResult
wait_timestamp_safe(int fd,unsigned int context_id,unsigned int timestamp,uint64_t abs_timeout_ns)549 wait_timestamp_safe(int fd,
550 unsigned int context_id,
551 unsigned int timestamp,
552 uint64_t abs_timeout_ns)
553 {
554 struct kgsl_device_waittimestamp_ctxtid wait = {
555 .context_id = context_id,
556 .timestamp = timestamp,
557 .timeout = get_relative_ms(abs_timeout_ns),
558 };
559
560 while (true) {
561 int ret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
562
563 if (ret == -1 && (errno == EINTR || errno == EAGAIN)) {
564 int timeout_ms = get_relative_ms(abs_timeout_ns);
565
566 /* update timeout to consider time that has passed since the start */
567 if (timeout_ms == 0)
568 return VK_TIMEOUT;
569
570 wait.timeout = timeout_ms;
571 } else if (ret == -1) {
572 assert(errno == ETIMEDOUT);
573 return VK_TIMEOUT;
574 } else {
575 return VK_SUCCESS;
576 }
577 }
578 }
579
580 VkResult
kgsl_queue_wait_fence(struct tu_queue * queue,uint32_t fence,uint64_t timeout_ns)581 kgsl_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
582 uint64_t timeout_ns)
583 {
584 uint64_t abs_timeout_ns = os_time_get_nano() + timeout_ns;
585
586 return wait_timestamp_safe(queue->device->fd, queue->msm_queue_id,
587 fence, abs_timeout_ns);
588 }
589
590 static VkResult
kgsl_syncobj_wait(struct tu_device * device,struct kgsl_syncobj * s,uint64_t abs_timeout_ns)591 kgsl_syncobj_wait(struct tu_device *device,
592 struct kgsl_syncobj *s,
593 uint64_t abs_timeout_ns)
594 {
595 if (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
596 /* If this syncobj is unsignaled we need to wait for it to resolve to a
597 * valid syncobj prior to letting the rest of the wait continue, this
598 * avoids needing kernel support for wait-before-signal semantics.
599 */
600
601 if (abs_timeout_ns == 0)
602 return VK_TIMEOUT; // If this is a simple poll then we can return early
603
604 pthread_mutex_lock(&device->submit_mutex);
605 struct timespec abstime;
606 timespec_from_nsec(&abstime, abs_timeout_ns);
607
608 while (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
609 int ret;
610 if (abs_timeout_ns == UINT64_MAX) {
611 ret = pthread_cond_wait(&device->timeline_cond,
612 &device->submit_mutex);
613 } else {
614 ret = pthread_cond_timedwait(&device->timeline_cond,
615 &device->submit_mutex, &abstime);
616 }
617 if (ret != 0) {
618 assert(ret == ETIMEDOUT);
619 pthread_mutex_unlock(&device->submit_mutex);
620 return VK_TIMEOUT;
621 }
622 }
623
624 pthread_mutex_unlock(&device->submit_mutex);
625 }
626
627 switch (s->state) {
628 case KGSL_SYNCOBJ_STATE_SIGNALED:
629 return VK_SUCCESS;
630
631 case KGSL_SYNCOBJ_STATE_UNSIGNALED:
632 return VK_TIMEOUT;
633
634 case KGSL_SYNCOBJ_STATE_TS: {
635 return wait_timestamp_safe(device->fd, s->queue->msm_queue_id,
636 s->timestamp, abs_timeout_ns);
637 }
638
639 case KGSL_SYNCOBJ_STATE_FD: {
640 int ret = sync_wait(s->fd, get_relative_ms(abs_timeout_ns));
641 if (ret) {
642 assert(errno == ETIME);
643 return VK_TIMEOUT;
644 } else {
645 return VK_SUCCESS;
646 }
647 }
648
649 default:
650 unreachable("invalid syncobj state");
651 }
652 }
653
654 #define kgsl_syncobj_foreach_state(syncobjs, filter) \
655 for (uint32_t i = 0; sync = syncobjs[i], i < count; i++) \
656 if (sync->state == filter)
657
658 static VkResult
kgsl_syncobj_wait_any(struct tu_device * device,struct kgsl_syncobj ** syncobjs,uint32_t count,uint64_t abs_timeout_ns)659 kgsl_syncobj_wait_any(struct tu_device* device, struct kgsl_syncobj **syncobjs, uint32_t count, uint64_t abs_timeout_ns)
660 {
661 if (count == 0)
662 return VK_TIMEOUT;
663 else if (count == 1)
664 return kgsl_syncobj_wait(device, syncobjs[0], abs_timeout_ns);
665
666 uint32_t num_fds = 0;
667 struct tu_queue *queue = NULL;
668 struct kgsl_syncobj *sync = NULL;
669
670 /* Simple case, we already have a signal one */
671 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_SIGNALED)
672 return VK_SUCCESS;
673
674 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD)
675 num_fds++;
676
677 /* If we have TS from different queues we cannot compare them and would
678 * have to convert them into FDs
679 */
680 bool convert_ts_to_fd = false;
681 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
682 if (queue != NULL && sync->queue != queue) {
683 convert_ts_to_fd = true;
684 break;
685 }
686 queue = sync->queue;
687 }
688
689 /* If we have no FD nor TS syncobjs then we can return immediately */
690 if (num_fds == 0 && queue == NULL)
691 return VK_TIMEOUT;
692
693 VkResult result = VK_TIMEOUT;
694
695 struct u_vector poll_fds = { 0 };
696 uint32_t lowest_timestamp = 0;
697
698 if (convert_ts_to_fd || num_fds > 0)
699 u_vector_init(&poll_fds, 4, sizeof(struct pollfd));
700
701 if (convert_ts_to_fd) {
702 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
703 struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
704 poll_fd->fd = timestamp_to_fd(sync->queue, sync->timestamp);
705 poll_fd->events = POLLIN;
706 }
707 } else {
708 /* TSs could be merged by finding the one with the lowest timestamp */
709 bool first_ts = true;
710 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
711 if (first_ts || timestamp_cmp(sync->timestamp, lowest_timestamp)) {
712 first_ts = false;
713 lowest_timestamp = sync->timestamp;
714 }
715 }
716
717 if (num_fds) {
718 struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
719 poll_fd->fd = timestamp_to_fd(queue, lowest_timestamp);
720 poll_fd->events = POLLIN;
721 }
722 }
723
724 if (num_fds) {
725 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD) {
726 struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
727 poll_fd->fd = sync->fd;
728 poll_fd->events = POLLIN;
729 }
730 }
731
732 if (u_vector_length(&poll_fds) == 0) {
733 result = wait_timestamp_safe(device->fd, queue->msm_queue_id,
734 lowest_timestamp, MIN2(abs_timeout_ns, INT64_MAX));
735 } else {
736 int ret, i;
737
738 struct pollfd *fds = (struct pollfd *) poll_fds.data;
739 uint32_t fds_count = u_vector_length(&poll_fds);
740 do {
741 ret = poll(fds, fds_count, get_relative_ms(abs_timeout_ns));
742 if (ret > 0) {
743 for (i = 0; i < fds_count; i++) {
744 if (fds[i].revents & (POLLERR | POLLNVAL)) {
745 errno = EINVAL;
746 ret = -1;
747 break;
748 }
749 }
750 break;
751 } else if (ret == 0) {
752 errno = ETIME;
753 break;
754 }
755 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
756
757 for (uint32_t i = 0; i < fds_count - num_fds; i++)
758 close(fds[i].fd);
759
760 if (ret != 0) {
761 assert(errno == ETIME);
762 result = VK_TIMEOUT;
763 } else {
764 result = VK_SUCCESS;
765 }
766 }
767
768 u_vector_finish(&poll_fds);
769 return result;
770 }
771
772 static VkResult
kgsl_syncobj_export(struct kgsl_syncobj * s,int * pFd)773 kgsl_syncobj_export(struct kgsl_syncobj *s, int *pFd)
774 {
775 if (!pFd)
776 return VK_SUCCESS;
777
778 switch (s->state) {
779 case KGSL_SYNCOBJ_STATE_SIGNALED:
780 case KGSL_SYNCOBJ_STATE_UNSIGNALED:
781 /* Getting a sync FD from an unsignaled syncobj is UB in Vulkan */
782 *pFd = -1;
783 return VK_SUCCESS;
784
785 case KGSL_SYNCOBJ_STATE_FD:
786 if (s->fd < 0)
787 *pFd = -1;
788 else
789 *pFd = dup(s->fd);
790 return VK_SUCCESS;
791
792 case KGSL_SYNCOBJ_STATE_TS:
793 *pFd = kgsl_syncobj_ts_to_fd(s);
794 return VK_SUCCESS;
795
796 default:
797 unreachable("Invalid syncobj state");
798 }
799 }
800
801 static VkResult
kgsl_syncobj_import(struct kgsl_syncobj * s,int fd)802 kgsl_syncobj_import(struct kgsl_syncobj *s, int fd)
803 {
804 kgsl_syncobj_reset(s);
805 if (fd >= 0) {
806 s->state = KGSL_SYNCOBJ_STATE_FD;
807 s->fd = fd;
808 } else {
809 s->state = KGSL_SYNCOBJ_STATE_SIGNALED;
810 }
811
812 return VK_SUCCESS;
813 }
814
815 static int
sync_merge_close(const char * name,int fd1,int fd2,bool close_fd2)816 sync_merge_close(const char *name, int fd1, int fd2, bool close_fd2)
817 {
818 int fd = sync_merge(name, fd1, fd2);
819 if (fd < 0)
820 return -1;
821
822 close(fd1);
823 if (close_fd2)
824 close(fd2);
825
826 return fd;
827 }
828
829 /* Merges multiple kgsl_syncobjs into a single one which is only signalled
830 * after all submitted syncobjs are signalled
831 */
832 static struct kgsl_syncobj
kgsl_syncobj_merge(const struct kgsl_syncobj ** syncobjs,uint32_t count)833 kgsl_syncobj_merge(const struct kgsl_syncobj **syncobjs, uint32_t count)
834 {
835 struct kgsl_syncobj ret;
836 kgsl_syncobj_init(&ret, true);
837
838 if (count == 0)
839 return ret;
840
841 for (uint32_t i = 0; i < count; ++i) {
842 const struct kgsl_syncobj *sync = syncobjs[i];
843
844 switch (sync->state) {
845 case KGSL_SYNCOBJ_STATE_SIGNALED:
846 break;
847
848 case KGSL_SYNCOBJ_STATE_UNSIGNALED:
849 kgsl_syncobj_reset(&ret);
850 return ret;
851
852 case KGSL_SYNCOBJ_STATE_TS:
853 if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
854 if (ret.queue == sync->queue) {
855 ret.timestamp = max_ts(ret.timestamp, sync->timestamp);
856 } else {
857 ret.state = KGSL_SYNCOBJ_STATE_FD;
858 int sync_fd = kgsl_syncobj_ts_to_fd(sync);
859 ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
860 assert(ret.fd >= 0);
861 }
862 } else if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
863 int sync_fd = kgsl_syncobj_ts_to_fd(sync);
864 ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
865 assert(ret.fd >= 0);
866 } else {
867 ret = *sync;
868 }
869 break;
870
871 case KGSL_SYNCOBJ_STATE_FD:
872 if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
873 ret.fd = sync_merge_close("tu_sync", ret.fd, sync->fd, false);
874 assert(ret.fd >= 0);
875 } else if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
876 ret.state = KGSL_SYNCOBJ_STATE_FD;
877 int sync_fd = kgsl_syncobj_ts_to_fd(sync);
878 ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
879 assert(ret.fd >= 0);
880 } else {
881 ret = *sync;
882 ret.fd = dup(ret.fd);
883 assert(ret.fd >= 0);
884 }
885 break;
886
887 default:
888 unreachable("invalid syncobj state");
889 }
890 }
891
892 return ret;
893 }
894
895 struct vk_kgsl_syncobj
896 {
897 struct vk_sync vk;
898 struct kgsl_syncobj syncobj;
899 };
900
901 static VkResult
vk_kgsl_sync_init(struct vk_device * device,struct vk_sync * sync,uint64_t initial_value)902 vk_kgsl_sync_init(struct vk_device *device,
903 struct vk_sync *sync,
904 uint64_t initial_value)
905 {
906 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
907 kgsl_syncobj_init(&s->syncobj, initial_value != 0);
908 return VK_SUCCESS;
909 }
910
911 static void
vk_kgsl_sync_finish(struct vk_device * device,struct vk_sync * sync)912 vk_kgsl_sync_finish(struct vk_device *device, struct vk_sync *sync)
913 {
914 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
915 kgsl_syncobj_destroy(&s->syncobj);
916 }
917
918 static VkResult
vk_kgsl_sync_reset(struct vk_device * device,struct vk_sync * sync)919 vk_kgsl_sync_reset(struct vk_device *device, struct vk_sync *sync)
920 {
921 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
922 kgsl_syncobj_reset(&s->syncobj);
923 return VK_SUCCESS;
924 }
925
926 static VkResult
vk_kgsl_sync_move(struct vk_device * device,struct vk_sync * dst,struct vk_sync * src)927 vk_kgsl_sync_move(struct vk_device *device,
928 struct vk_sync *dst,
929 struct vk_sync *src)
930 {
931 struct vk_kgsl_syncobj *d = container_of(dst, struct vk_kgsl_syncobj, vk);
932 struct vk_kgsl_syncobj *s = container_of(src, struct vk_kgsl_syncobj, vk);
933 kgsl_syncobj_reset(&d->syncobj);
934 d->syncobj = s->syncobj;
935 kgsl_syncobj_init(&s->syncobj, false);
936 return VK_SUCCESS;
937 }
938
939 static VkResult
vk_kgsl_sync_wait(struct vk_device * _device,struct vk_sync * sync,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)940 vk_kgsl_sync_wait(struct vk_device *_device,
941 struct vk_sync *sync,
942 uint64_t wait_value,
943 enum vk_sync_wait_flags wait_flags,
944 uint64_t abs_timeout_ns)
945 {
946 struct tu_device *device = container_of(_device, struct tu_device, vk);
947 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
948
949 if (wait_flags & VK_SYNC_WAIT_PENDING)
950 return VK_SUCCESS;
951
952 return kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
953 }
954
955 static VkResult
vk_kgsl_sync_wait_many(struct vk_device * _device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)956 vk_kgsl_sync_wait_many(struct vk_device *_device,
957 uint32_t wait_count,
958 const struct vk_sync_wait *waits,
959 enum vk_sync_wait_flags wait_flags,
960 uint64_t abs_timeout_ns)
961 {
962 struct tu_device *device = container_of(_device, struct tu_device, vk);
963
964 if (wait_flags & VK_SYNC_WAIT_PENDING)
965 return VK_SUCCESS;
966
967 if (wait_flags & VK_SYNC_WAIT_ANY) {
968 struct kgsl_syncobj *syncobjs[wait_count];
969 for (uint32_t i = 0; i < wait_count; i++) {
970 syncobjs[i] =
971 &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)->syncobj;
972 }
973
974 return kgsl_syncobj_wait_any(device, syncobjs, wait_count,
975 abs_timeout_ns);
976 } else {
977 for (uint32_t i = 0; i < wait_count; i++) {
978 struct vk_kgsl_syncobj *s =
979 container_of(waits[i].sync, struct vk_kgsl_syncobj, vk);
980
981 VkResult result =
982 kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
983 if (result != VK_SUCCESS)
984 return result;
985 }
986 return VK_SUCCESS;
987 }
988 }
989
990 static VkResult
vk_kgsl_sync_import_sync_file(struct vk_device * device,struct vk_sync * sync,int fd)991 vk_kgsl_sync_import_sync_file(struct vk_device *device,
992 struct vk_sync *sync,
993 int fd)
994 {
995 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
996 if (fd >= 0) {
997 fd = dup(fd);
998 if (fd < 0) {
999 mesa_loge("vk_kgsl_sync_import_sync_file: dup failed: %s",
1000 strerror(errno));
1001 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1002 }
1003 }
1004 return kgsl_syncobj_import(&s->syncobj, fd);
1005 }
1006
1007 static VkResult
vk_kgsl_sync_export_sync_file(struct vk_device * device,struct vk_sync * sync,int * pFd)1008 vk_kgsl_sync_export_sync_file(struct vk_device *device,
1009 struct vk_sync *sync,
1010 int *pFd)
1011 {
1012 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
1013 return kgsl_syncobj_export(&s->syncobj, pFd);
1014 }
1015
1016 const struct vk_sync_type vk_kgsl_sync_type = {
1017 .size = sizeof(struct vk_kgsl_syncobj),
1018 .features = (enum vk_sync_features)
1019 (VK_SYNC_FEATURE_BINARY |
1020 VK_SYNC_FEATURE_GPU_WAIT |
1021 VK_SYNC_FEATURE_GPU_MULTI_WAIT |
1022 VK_SYNC_FEATURE_CPU_WAIT |
1023 VK_SYNC_FEATURE_CPU_RESET |
1024 VK_SYNC_FEATURE_WAIT_ANY |
1025 VK_SYNC_FEATURE_WAIT_PENDING),
1026 .init = vk_kgsl_sync_init,
1027 .finish = vk_kgsl_sync_finish,
1028 .reset = vk_kgsl_sync_reset,
1029 .move = vk_kgsl_sync_move,
1030 .wait = vk_kgsl_sync_wait,
1031 .wait_many = vk_kgsl_sync_wait_many,
1032 .import_sync_file = vk_kgsl_sync_import_sync_file,
1033 .export_sync_file = vk_kgsl_sync_export_sync_file,
1034 };
1035
1036 struct tu_kgsl_queue_submit {
1037 struct util_dynarray commands;
1038 };
1039
1040 static void *
kgsl_submit_create(struct tu_device * device)1041 kgsl_submit_create(struct tu_device *device)
1042 {
1043 return vk_zalloc(&device->vk.alloc, sizeof(struct tu_kgsl_queue_submit), 8,
1044 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1045 }
1046
1047 static void
kgsl_submit_finish(struct tu_device * device,void * _submit)1048 kgsl_submit_finish(struct tu_device *device,
1049 void *_submit)
1050 {
1051 struct tu_kgsl_queue_submit *submit =
1052 (struct tu_kgsl_queue_submit *)_submit;
1053
1054 util_dynarray_fini(&submit->commands);
1055 vk_free(&device->vk.alloc, submit);
1056 }
1057
1058 static void
kgsl_submit_add_entries(struct tu_device * device,void * _submit,struct tu_cs_entry * entries,unsigned num_entries)1059 kgsl_submit_add_entries(struct tu_device *device, void *_submit,
1060 struct tu_cs_entry *entries, unsigned num_entries)
1061 {
1062 struct tu_kgsl_queue_submit *submit =
1063 (struct tu_kgsl_queue_submit *)_submit;
1064
1065 struct kgsl_command_object *cmds = (struct kgsl_command_object *)
1066 util_dynarray_grow(&submit->commands, struct kgsl_command_object,
1067 num_entries);
1068
1069 for (unsigned i = 0; i < num_entries; i++) {
1070 cmds[i] = (struct kgsl_command_object) {
1071 .gpuaddr = entries[i].bo->iova + entries[i].offset,
1072 .size = entries[i].size,
1073 .flags = KGSL_CMDLIST_IB,
1074 .id = entries[i].bo->gem_handle,
1075 };
1076 }
1077 }
1078
1079 static VkResult
kgsl_queue_submit(struct tu_queue * queue,void * _submit,struct vk_sync_wait * waits,uint32_t wait_count,struct vk_sync_signal * signals,uint32_t signal_count,struct tu_u_trace_submission_data * u_trace_submission_data)1080 kgsl_queue_submit(struct tu_queue *queue, void *_submit,
1081 struct vk_sync_wait *waits, uint32_t wait_count,
1082 struct vk_sync_signal *signals, uint32_t signal_count,
1083 struct tu_u_trace_submission_data *u_trace_submission_data)
1084 {
1085 struct tu_kgsl_queue_submit *submit =
1086 (struct tu_kgsl_queue_submit *)_submit;
1087
1088 #if HAVE_PERFETTO
1089 uint64_t start_ts = tu_perfetto_begin_submit();
1090 #endif
1091
1092 if (submit->commands.size == 0) {
1093 const struct kgsl_syncobj *wait_semaphores[wait_count + 1];
1094 for (uint32_t i = 0; i < wait_count; i++) {
1095 wait_semaphores[i] = &container_of(waits[i].sync,
1096 struct vk_kgsl_syncobj, vk)
1097 ->syncobj;
1098 }
1099
1100 struct kgsl_syncobj last_submit_sync;
1101 if (queue->fence >= 0)
1102 last_submit_sync = (struct kgsl_syncobj) {
1103 .state = KGSL_SYNCOBJ_STATE_TS,
1104 .queue = queue,
1105 .timestamp = queue->fence,
1106 };
1107 else
1108 last_submit_sync = (struct kgsl_syncobj) {
1109 .state = KGSL_SYNCOBJ_STATE_SIGNALED,
1110 };
1111
1112 wait_semaphores[wait_count] = &last_submit_sync;
1113
1114 struct kgsl_syncobj wait_sync =
1115 kgsl_syncobj_merge(wait_semaphores, wait_count + 1);
1116 assert(wait_sync.state !=
1117 KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1118
1119 for (uint32_t i = 0; i < signal_count; i++) {
1120 struct kgsl_syncobj *signal_sync =
1121 &container_of(signals[i].sync, struct vk_kgsl_syncobj, vk)
1122 ->syncobj;
1123
1124 kgsl_syncobj_reset(signal_sync);
1125 *signal_sync = wait_sync;
1126 }
1127
1128 return VK_SUCCESS;
1129 }
1130
1131 VkResult result = VK_SUCCESS;
1132
1133 if (u_trace_submission_data) {
1134 mtx_lock(&queue->device->kgsl_profiling_mutex);
1135 tu_suballoc_bo_alloc(&u_trace_submission_data->kgsl_timestamp_bo,
1136 &queue->device->kgsl_profiling_suballoc,
1137 sizeof(struct kgsl_cmdbatch_profiling_buffer), 4);
1138 mtx_unlock(&queue->device->kgsl_profiling_mutex);
1139 }
1140
1141 uint32_t obj_count = 0;
1142 if (u_trace_submission_data)
1143 obj_count++;
1144
1145 struct kgsl_command_object *objs = (struct kgsl_command_object *)
1146 vk_alloc(&queue->device->vk.alloc, sizeof(*objs) * obj_count,
1147 alignof(*objs), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1148
1149 struct kgsl_cmdbatch_profiling_buffer *profiling_buffer = NULL;
1150 uint32_t obj_idx = 0;
1151 if (u_trace_submission_data) {
1152 struct tu_suballoc_bo *bo = &u_trace_submission_data->kgsl_timestamp_bo;
1153
1154 objs[obj_idx++] = (struct kgsl_command_object) {
1155 .offset = bo->iova - bo->bo->iova,
1156 .gpuaddr = bo->bo->iova,
1157 .size = sizeof(struct kgsl_cmdbatch_profiling_buffer),
1158 .flags = KGSL_OBJLIST_MEMOBJ | KGSL_OBJLIST_PROFILE,
1159 .id = bo->bo->gem_handle,
1160 };
1161 profiling_buffer =
1162 (struct kgsl_cmdbatch_profiling_buffer *) tu_suballoc_bo_map(bo);
1163 memset(profiling_buffer, 0, sizeof(*profiling_buffer));
1164 }
1165
1166 const struct kgsl_syncobj *wait_semaphores[wait_count];
1167 for (uint32_t i = 0; i < wait_count; i++) {
1168 wait_semaphores[i] =
1169 &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)
1170 ->syncobj;
1171 }
1172
1173 struct kgsl_syncobj wait_sync =
1174 kgsl_syncobj_merge(wait_semaphores, wait_count);
1175 assert(wait_sync.state !=
1176 KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1177
1178 struct kgsl_cmd_syncpoint_timestamp ts;
1179 struct kgsl_cmd_syncpoint_fence fn;
1180 struct kgsl_command_syncpoint sync = { 0 };
1181 bool has_sync = false;
1182 switch (wait_sync.state) {
1183 case KGSL_SYNCOBJ_STATE_SIGNALED:
1184 break;
1185
1186 case KGSL_SYNCOBJ_STATE_TS:
1187 ts.context_id = wait_sync.queue->msm_queue_id;
1188 ts.timestamp = wait_sync.timestamp;
1189
1190 has_sync = true;
1191 sync.type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP;
1192 sync.priv = (uintptr_t) &ts;
1193 sync.size = sizeof(ts);
1194 break;
1195
1196 case KGSL_SYNCOBJ_STATE_FD:
1197 fn.fd = wait_sync.fd;
1198
1199 has_sync = true;
1200 sync.type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
1201 sync.priv = (uintptr_t) &fn;
1202 sync.size = sizeof(fn);
1203 break;
1204
1205 default:
1206 unreachable("invalid syncobj state");
1207 }
1208
1209 struct kgsl_gpu_command req = {
1210 .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
1211 .cmdlist = (uintptr_t) submit->commands.data,
1212 .cmdsize = sizeof(struct kgsl_command_object),
1213 .numcmds = util_dynarray_num_elements(&submit->commands,
1214 struct kgsl_command_object),
1215 .synclist = (uintptr_t) &sync,
1216 .syncsize = sizeof(sync),
1217 .numsyncs = has_sync != 0 ? 1 : 0,
1218 .context_id = queue->msm_queue_id,
1219 };
1220
1221 if (obj_idx) {
1222 req.flags |= KGSL_CMDBATCH_PROFILING;
1223 req.objlist = (uintptr_t) objs;
1224 req.objsize = sizeof(struct kgsl_command_object);
1225 req.numobjs = obj_idx;
1226 }
1227
1228 int ret = safe_ioctl(queue->device->physical_device->local_fd,
1229 IOCTL_KGSL_GPU_COMMAND, &req);
1230
1231 uint64_t gpu_offset = 0;
1232 #if HAVE_PERFETTO
1233 if (profiling_buffer) {
1234 /* We need to wait for KGSL to queue the GPU command before we can read
1235 * the timestamp. Since this is just for profiling and doesn't take too
1236 * long, we can just busy-wait for it.
1237 */
1238 while (p_atomic_read(&profiling_buffer->gpu_ticks_queued) == 0);
1239
1240 struct kgsl_perfcounter_read_group perf = {
1241 .groupid = KGSL_PERFCOUNTER_GROUP_ALWAYSON,
1242 .countable = 0,
1243 .value = 0
1244 };
1245
1246 struct kgsl_perfcounter_read req = {
1247 .reads = &perf,
1248 .count = 1,
1249 };
1250
1251 ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_PERFCOUNTER_READ, &req);
1252 /* Older KGSL has some kind of garbage in upper 32 bits */
1253 uint64_t offseted_gpu_ts = perf.value & 0xffffffff;
1254
1255 gpu_offset = tu_device_ticks_to_ns(
1256 queue->device, offseted_gpu_ts - profiling_buffer->gpu_ticks_queued);
1257
1258 struct tu_perfetto_clocks clocks = {
1259 .cpu = profiling_buffer->wall_clock_ns,
1260 .gpu_ts = tu_device_ticks_to_ns(queue->device,
1261 profiling_buffer->gpu_ticks_queued),
1262 .gpu_ts_offset = gpu_offset,
1263 };
1264
1265 clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
1266 start_ts, &clocks);
1267 gpu_offset = clocks.gpu_ts_offset;
1268 }
1269 #endif
1270
1271 kgsl_syncobj_destroy(&wait_sync);
1272
1273 if (ret) {
1274 result = vk_device_set_lost(&queue->device->vk, "submit failed: %s\n",
1275 strerror(errno));
1276 goto fail_submit;
1277 }
1278
1279 p_atomic_set(&queue->fence, req.timestamp);
1280
1281 for (uint32_t i = 0; i < signal_count; i++) {
1282 struct kgsl_syncobj *signal_sync =
1283 &container_of(signals[i].sync, struct vk_kgsl_syncobj, vk)
1284 ->syncobj;
1285
1286 kgsl_syncobj_reset(signal_sync);
1287 signal_sync->state = KGSL_SYNCOBJ_STATE_TS;
1288 signal_sync->queue = queue;
1289 signal_sync->timestamp = req.timestamp;
1290 }
1291
1292 if (u_trace_submission_data) {
1293 struct tu_u_trace_submission_data *submission_data =
1294 u_trace_submission_data;
1295 submission_data->gpu_ts_offset = gpu_offset;
1296 }
1297
1298 fail_submit:
1299 if (result != VK_SUCCESS) {
1300 mtx_lock(&queue->device->kgsl_profiling_mutex);
1301 tu_suballoc_bo_free(&queue->device->kgsl_profiling_suballoc,
1302 &u_trace_submission_data->kgsl_timestamp_bo);
1303 mtx_unlock(&queue->device->kgsl_profiling_mutex);
1304 }
1305
1306 return result;
1307 }
1308
1309 static VkResult
kgsl_device_init(struct tu_device * dev)1310 kgsl_device_init(struct tu_device *dev)
1311 {
1312 dev->fd = dev->physical_device->local_fd;
1313 return VK_SUCCESS;
1314 }
1315
1316 static void
kgsl_device_finish(struct tu_device * dev)1317 kgsl_device_finish(struct tu_device *dev)
1318 {
1319 /* No-op */
1320 }
1321
1322 static int
kgsl_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)1323 kgsl_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
1324 {
1325 unreachable("");
1326 return 0;
1327 }
1328
1329 static int
kgsl_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)1330 kgsl_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
1331 {
1332 /* kgsl doesn't have a way to get it */
1333 *suspend_count = 0;
1334 return 0;
1335 }
1336
1337 static VkResult
kgsl_device_check_status(struct tu_device * device)1338 kgsl_device_check_status(struct tu_device *device)
1339 {
1340 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1341 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1342 /* KGSL's KGSL_PROP_GPU_RESET_STAT takes the u32 msm_queue_id and returns a
1343 * KGSL_CTX_STAT_* for the worst reset that happened since the last time it
1344 * was queried on that queue.
1345 */
1346 uint32_t value = device->queues[i][q].msm_queue_id;
1347 VkResult status = get_kgsl_prop(device->fd, KGSL_PROP_GPU_RESET_STAT,
1348 &value, sizeof(value));
1349 if (status != VK_SUCCESS)
1350 return vk_device_set_lost(&device->vk, "Failed to get GPU reset status");
1351
1352 if (value != KGSL_CTX_STAT_NO_ERROR &&
1353 value != KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT) {
1354 return vk_device_set_lost(&device->vk, "GPU faulted or hung");
1355 }
1356 }
1357 }
1358
1359 return VK_SUCCESS;
1360 }
1361
1362 static const struct tu_knl kgsl_knl_funcs = {
1363 .name = "kgsl",
1364
1365 .device_init = kgsl_device_init,
1366 .device_finish = kgsl_device_finish,
1367 .device_get_gpu_timestamp = kgsl_device_get_gpu_timestamp,
1368 .device_get_suspend_count = kgsl_device_get_suspend_count,
1369 .device_check_status = kgsl_device_check_status,
1370 .submitqueue_new = kgsl_submitqueue_new,
1371 .submitqueue_close = kgsl_submitqueue_close,
1372 .bo_init = kgsl_bo_init,
1373 .bo_init_dmabuf = kgsl_bo_init_dmabuf,
1374 .bo_export_dmabuf = kgsl_bo_export_dmabuf,
1375 .bo_map = kgsl_bo_map,
1376 .bo_allow_dump = kgsl_bo_allow_dump,
1377 .bo_finish = kgsl_bo_finish,
1378 .submit_create = kgsl_submit_create,
1379 .submit_finish = kgsl_submit_finish,
1380 .submit_add_entries = kgsl_submit_add_entries,
1381 .queue_submit = kgsl_queue_submit,
1382 .queue_wait_fence = kgsl_queue_wait_fence,
1383 };
1384
1385 static bool
tu_kgsl_get_raytracing(int fd)1386 tu_kgsl_get_raytracing(int fd)
1387 {
1388 uint32_t value;
1389 int ret = get_kgsl_prop(fd, KGSL_PROP_IS_RAYTRACING_ENABLED, &value, sizeof(value));
1390 if (ret)
1391 return false;
1392
1393 return value;
1394 }
1395
1396 VkResult
tu_knl_kgsl_load(struct tu_instance * instance,int fd)1397 tu_knl_kgsl_load(struct tu_instance *instance, int fd)
1398 {
1399 if (instance->vk.enabled_extensions.KHR_display) {
1400 return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1401 "I can't KHR_display");
1402 }
1403
1404 struct tu_physical_device *device = (struct tu_physical_device *)
1405 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1406 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1407 if (!device) {
1408 close(fd);
1409 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1410 }
1411
1412 static const char dma_heap_path[] = "/dev/dma_heap/system";
1413 static const char ion_path[] = "/dev/ion";
1414 int dma_fd;
1415
1416 dma_fd = open(dma_heap_path, O_RDONLY);
1417 if (dma_fd >= 0) {
1418 device->kgsl_dma_type = TU_KGSL_DMA_TYPE_DMAHEAP;
1419 } else {
1420 dma_fd = open(ion_path, O_RDONLY);
1421 if (dma_fd >= 0) {
1422 /* ION_IOC_FREE available only for legacy ION */
1423 struct ion_handle_data free = { .handle = 0 };
1424 if (safe_ioctl(dma_fd, ION_IOC_FREE, &free) >= 0 || errno != ENOTTY)
1425 device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION_LEGACY;
1426 else
1427 device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION;
1428 } else {
1429 mesa_logw(
1430 "Unable to open neither %s nor %s, VK_KHR_external_memory_fd would be "
1431 "unavailable: %s",
1432 dma_heap_path, ion_path, strerror(errno));
1433 }
1434 }
1435
1436 VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1437
1438 struct kgsl_devinfo info;
1439 if (get_kgsl_prop(fd, KGSL_PROP_DEVICE_INFO, &info, sizeof(info)))
1440 goto fail;
1441
1442 uint64_t gmem_iova;
1443 if (get_kgsl_prop(fd, KGSL_PROP_UCHE_GMEM_VADDR, &gmem_iova, sizeof(gmem_iova)))
1444 goto fail;
1445
1446 uint32_t highest_bank_bit;
1447 if (get_kgsl_prop(fd, KGSL_PROP_HIGHEST_BANK_BIT, &highest_bank_bit,
1448 sizeof(highest_bank_bit)))
1449 goto fail;
1450
1451 uint32_t ubwc_version;
1452 if (get_kgsl_prop(fd, KGSL_PROP_UBWC_MODE, &ubwc_version,
1453 sizeof(ubwc_version)))
1454 goto fail;
1455
1456
1457 /* kgsl version check? */
1458
1459 device->instance = instance;
1460 device->master_fd = -1;
1461 device->local_fd = fd;
1462 device->kgsl_dma_fd = dma_fd;
1463
1464 device->dev_id.gpu_id =
1465 ((info.chip_id >> 24) & 0xff) * 100 +
1466 ((info.chip_id >> 16) & 0xff) * 10 +
1467 ((info.chip_id >> 8) & 0xff);
1468 device->dev_id.chip_id = info.chip_id;
1469 device->gmem_size = debug_get_num_option("TU_GMEM", info.gmem_sizebytes);
1470 device->gmem_base = gmem_iova;
1471
1472 device->has_raytracing = tu_kgsl_get_raytracing(fd);
1473
1474 device->submitqueue_priority_count = 1;
1475
1476 device->timeline_type = vk_sync_timeline_get_type(&vk_kgsl_sync_type);
1477
1478 device->sync_types[0] = &vk_kgsl_sync_type;
1479 device->sync_types[1] = &device->timeline_type.sync;
1480 device->sync_types[2] = NULL;
1481
1482 device->heap.size = tu_get_system_heap_size(device);
1483 device->heap.used = 0u;
1484 device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1485
1486 device->has_set_iova = kgsl_is_memory_type_supported(
1487 fd, KGSL_MEMFLAGS_USE_CPU_MAP);
1488
1489 /* Even if kernel is new enough, the GPU itself may not support it. */
1490 device->has_cached_coherent_memory = kgsl_is_memory_type_supported(
1491 fd, KGSL_MEMFLAGS_IOCOHERENT |
1492 (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT));
1493
1494 /* preemption is always supported on kgsl */
1495 device->has_preemption = true;
1496
1497 device->ubwc_config.highest_bank_bit = highest_bank_bit;
1498
1499 /* The other config values can be partially inferred from the UBWC version,
1500 * but kgsl also hardcodes overrides for specific a6xx versions that we
1501 * have to follow here. Yuck.
1502 */
1503 switch (ubwc_version) {
1504 case KGSL_UBWC_1_0:
1505 device->ubwc_config.bank_swizzle_levels = 0x7;
1506 device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1507 break;
1508 case KGSL_UBWC_2_0:
1509 device->ubwc_config.bank_swizzle_levels = 0x6;
1510 device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1511 break;
1512 case KGSL_UBWC_3_0:
1513 device->ubwc_config.bank_swizzle_levels = 0x6;
1514 device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1515 break;
1516 case KGSL_UBWC_4_0:
1517 device->ubwc_config.bank_swizzle_levels = 0x6;
1518 device->ubwc_config.macrotile_mode = FDL_MACROTILE_8_CHANNEL;
1519 break;
1520 default:
1521 return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1522 "unknown UBWC version 0x%x", ubwc_version);
1523 }
1524
1525 /* kgsl unfortunately hardcodes some settings for certain GPUs and doesn't
1526 * expose them in the uAPI so hardcode them here to match.
1527 */
1528 if (device->dev_id.gpu_id == 663 || device->dev_id.gpu_id == 680) {
1529 device->ubwc_config.macrotile_mode = FDL_MACROTILE_8_CHANNEL;
1530 }
1531 if (device->dev_id.gpu_id == 663) {
1532 /* level2_swizzling_dis = 1 */
1533 device->ubwc_config.bank_swizzle_levels = 0x4;
1534 }
1535
1536 instance->knl = &kgsl_knl_funcs;
1537
1538 result = tu_physical_device_init(device, instance);
1539 if (result != VK_SUCCESS)
1540 goto fail;
1541
1542 list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
1543
1544 return VK_SUCCESS;
1545
1546 fail:
1547 vk_free(&instance->vk.alloc, device);
1548 close(fd);
1549 if (dma_fd >= 0)
1550 close(dma_fd);
1551 return result;
1552 }
1553