• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "tu_knl.h"
7 
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <poll.h>
11 #include <stdint.h>
12 #include <sys/ioctl.h>
13 #include <sys/mman.h>
14 #include <linux/dma-heap.h>
15 
16 #include "msm_kgsl.h"
17 #include "ion/ion.h"
18 #include "ion/ion_4.19.h"
19 
20 #include "vk_util.h"
21 
22 #include "util/os_file.h"
23 #include "util/u_debug.h"
24 #include "util/u_vector.h"
25 #include "util/libsync.h"
26 #include "util/timespec.h"
27 
28 #include "tu_cmd_buffer.h"
29 #include "tu_cs.h"
30 #include "tu_device.h"
31 #include "tu_dynamic_rendering.h"
32 #include "tu_queue.h"
33 #include "tu_rmv.h"
34 
35 /* ION_HEAP(ION_SYSTEM_HEAP_ID) */
36 #define KGSL_ION_SYSTEM_HEAP_MASK (1u << 25)
37 
38 
39 static int
safe_ioctl(int fd,unsigned long request,void * arg)40 safe_ioctl(int fd, unsigned long request, void *arg)
41 {
42    int ret;
43 
44    do {
45       ret = ioctl(fd, request, arg);
46    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
47 
48    return ret;
49 }
50 
51 static int
kgsl_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)52 kgsl_submitqueue_new(struct tu_device *dev,
53                      int priority,
54                      uint32_t *queue_id)
55 {
56    struct kgsl_drawctxt_create req = {
57       .flags = KGSL_CONTEXT_SAVE_GMEM |
58               KGSL_CONTEXT_NO_GMEM_ALLOC |
59               KGSL_CONTEXT_PREAMBLE,
60    };
61 
62    int ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_CREATE, &req);
63    if (ret)
64       return ret;
65 
66    *queue_id = req.drawctxt_id;
67 
68    return 0;
69 }
70 
71 static void
kgsl_submitqueue_close(struct tu_device * dev,uint32_t queue_id)72 kgsl_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
73 {
74    struct kgsl_drawctxt_destroy req = {
75       .drawctxt_id = queue_id,
76    };
77 
78    safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req);
79 }
80 
81 static void kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo);
82 
83 static VkResult
bo_init_new_dmaheap(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)84 bo_init_new_dmaheap(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
85                 enum tu_bo_alloc_flags flags)
86 {
87    struct dma_heap_allocation_data alloc = {
88       .len = size,
89       .fd_flags = O_RDWR | O_CLOEXEC,
90    };
91 
92    int ret;
93    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, DMA_HEAP_IOCTL_ALLOC,
94                     &alloc);
95 
96    if (ret) {
97       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
98                        "DMA_HEAP_IOCTL_ALLOC failed (%s)", strerror(errno));
99    }
100 
101    return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
102 }
103 
104 static VkResult
bo_init_new_ion(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)105 bo_init_new_ion(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
106                 enum tu_bo_alloc_flags flags)
107 {
108    struct ion_new_allocation_data alloc = {
109       .len = size,
110       .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
111       .flags = 0,
112       .fd = -1,
113    };
114 
115    int ret;
116    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_NEW_ALLOC, &alloc);
117    if (ret) {
118       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
119                        "ION_IOC_NEW_ALLOC failed (%s)", strerror(errno));
120    }
121 
122    return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
123 }
124 
125 static VkResult
bo_init_new_ion_legacy(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)126 bo_init_new_ion_legacy(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
127                        enum tu_bo_alloc_flags flags)
128 {
129    struct ion_allocation_data alloc = {
130       .len = size,
131       .align = 4096,
132       .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
133       .flags = 0,
134       .handle = -1,
135    };
136 
137    int ret;
138    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_ALLOC, &alloc);
139    if (ret) {
140       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
141                        "ION_IOC_ALLOC failed (%s)", strerror(errno));
142    }
143 
144    struct ion_fd_data share = {
145       .handle = alloc.handle,
146       .fd = -1,
147    };
148 
149    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_SHARE, &share);
150    if (ret) {
151       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
152                        "ION_IOC_SHARE failed (%s)", strerror(errno));
153    }
154 
155    struct ion_handle_data free = {
156       .handle = alloc.handle,
157    };
158    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_FREE, &free);
159    if (ret) {
160       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
161                        "ION_IOC_FREE failed (%s)", strerror(errno));
162    }
163 
164    return tu_bo_init_dmabuf(dev, out_bo, -1, share.fd);
165 }
166 
167 static VkResult
kgsl_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)168 kgsl_bo_init(struct tu_device *dev,
169              struct vk_object_base *base,
170              struct tu_bo **out_bo,
171              uint64_t size,
172              uint64_t client_iova,
173              VkMemoryPropertyFlags mem_property,
174              enum tu_bo_alloc_flags flags,
175              const char *name)
176 {
177    if (flags & TU_BO_ALLOC_SHAREABLE) {
178       /* The Vulkan spec doesn't forbid allocating exportable memory with a
179        * fixed address, only imported memory, but on kgsl we can't sensibly
180        * implement it so just always reject it.
181        */
182       if (client_iova) {
183          return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
184                           "cannot allocate an exportable BO with a fixed address");
185       }
186 
187       switch(dev->physical_device->kgsl_dma_type) {
188       case TU_KGSL_DMA_TYPE_DMAHEAP:
189          return bo_init_new_dmaheap(dev, out_bo, size, flags);
190       case TU_KGSL_DMA_TYPE_ION:
191          return bo_init_new_ion(dev, out_bo, size, flags);
192       case TU_KGSL_DMA_TYPE_ION_LEGACY:
193          return bo_init_new_ion_legacy(dev, out_bo, size, flags);
194       }
195    }
196 
197    struct kgsl_gpumem_alloc_id req = {
198       .size = size,
199    };
200 
201    if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
202       if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
203          req.flags |= KGSL_MEMFLAGS_IOCOHERENT;
204       }
205 
206       req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT;
207    } else {
208       req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT;
209    }
210 
211    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
212       req.flags |= KGSL_MEMFLAGS_GPUREADONLY;
213 
214    if (flags & TU_BO_ALLOC_REPLAYABLE)
215       req.flags |= KGSL_MEMFLAGS_USE_CPU_MAP;
216 
217    int ret;
218 
219    ret = safe_ioctl(dev->physical_device->local_fd,
220                     IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
221    if (ret) {
222       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
223                        "GPUMEM_ALLOC_ID failed (%s)", strerror(errno));
224    }
225 
226    struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
227    assert(bo && bo->gem_handle == 0);
228 
229    *bo = (struct tu_bo) {
230       .gem_handle = req.id,
231       .size = req.mmapsize,
232       .iova = req.gpuaddr,
233       .name = tu_debug_bos_add(dev, req.mmapsize, name),
234       .refcnt = 1,
235       .shared_fd = -1,
236       .base = base,
237    };
238 
239    if (flags & TU_BO_ALLOC_REPLAYABLE) {
240       uint64_t offset = req.id << 12;
241       void *map = mmap((void *)client_iova, bo->size, PROT_READ | PROT_WRITE,
242                        MAP_SHARED, dev->physical_device->local_fd, offset);
243       if (map == MAP_FAILED) {
244          kgsl_bo_finish(dev, bo);
245 
246          return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
247                           "mmap failed (%s)", strerror(errno));
248       }
249 
250       if (client_iova && (uint64_t)map != client_iova) {
251          kgsl_bo_finish(dev, bo);
252 
253          return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
254                           "mmap could not map the given address");
255       }
256 
257       bo->map = map;
258       bo->iova = (uint64_t)map;
259 
260       /* Because we're using SVM, the CPU mapping and GPU mapping are the same
261        * and the CPU mapping must stay fixed for the lifetime of the BO.
262        */
263       bo->never_unmap = true;
264    }
265 
266    tu_dump_bo_init(dev, bo);
267 
268    *out_bo = bo;
269 
270    TU_RMV(bo_allocate, dev, bo);
271    if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) {
272       TU_RMV(internal_resource_create, dev, bo);
273       TU_RMV(resource_name, dev, bo, name);
274    }
275 
276    return VK_SUCCESS;
277 }
278 
279 static VkResult
kgsl_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int fd)280 kgsl_bo_init_dmabuf(struct tu_device *dev,
281                     struct tu_bo **out_bo,
282                     uint64_t size,
283                     int fd)
284 {
285    struct kgsl_gpuobj_import_dma_buf import_dmabuf = {
286       .fd = fd,
287    };
288    struct kgsl_gpuobj_import req = {
289       .priv = (uintptr_t)&import_dmabuf,
290       .priv_len = sizeof(import_dmabuf),
291       .flags = 0,
292       .type = KGSL_USER_MEM_TYPE_DMABUF,
293    };
294    int ret;
295 
296    ret = safe_ioctl(dev->physical_device->local_fd,
297                     IOCTL_KGSL_GPUOBJ_IMPORT, &req);
298    if (ret)
299       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
300                        "Failed to import dma-buf (%s)\n", strerror(errno));
301 
302    struct kgsl_gpuobj_info info_req = {
303       .id = req.id,
304    };
305 
306    ret = safe_ioctl(dev->physical_device->local_fd,
307                     IOCTL_KGSL_GPUOBJ_INFO, &info_req);
308    if (ret)
309       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
310                        "Failed to get dma-buf info (%s)\n", strerror(errno));
311 
312    struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
313    assert(bo && bo->gem_handle == 0);
314 
315    *bo = (struct tu_bo) {
316       .gem_handle = req.id,
317       .size = info_req.size,
318       .iova = info_req.gpuaddr,
319       .name = tu_debug_bos_add(dev, info_req.size, "dmabuf"),
320       .refcnt = 1,
321       .shared_fd = os_dupfd_cloexec(fd),
322    };
323 
324    tu_dump_bo_init(dev, bo);
325 
326    *out_bo = bo;
327 
328    return VK_SUCCESS;
329 }
330 
331 static int
kgsl_bo_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)332 kgsl_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
333 {
334    assert(bo->shared_fd != -1);
335    return os_dupfd_cloexec(bo->shared_fd);
336 }
337 
338 static VkResult
kgsl_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)339 kgsl_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
340 {
341    void *map = MAP_FAILED;
342    if (bo->shared_fd == -1) {
343       uint64_t offset = bo->gem_handle << 12;
344       map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
345                  MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
346                  dev->physical_device->local_fd, offset);
347    } else {
348       map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
349                  MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
350                  bo->shared_fd, 0);
351    }
352 
353    if (map == MAP_FAILED)
354       return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
355 
356    bo->map = map;
357    TU_RMV(bo_map, dev, bo);
358 
359    return VK_SUCCESS;
360 }
361 
362 static void
kgsl_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)363 kgsl_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
364 {
365 }
366 
367 static void
kgsl_bo_finish(struct tu_device * dev,struct tu_bo * bo)368 kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo)
369 {
370    assert(bo->gem_handle);
371 
372    if (!p_atomic_dec_zero(&bo->refcnt))
373       return;
374 
375    if (bo->map) {
376       TU_RMV(bo_unmap, dev, bo);
377       munmap(bo->map, bo->size);
378    }
379 
380    if (bo->shared_fd != -1)
381       close(bo->shared_fd);
382 
383    TU_RMV(bo_destroy, dev, bo);
384    tu_debug_bos_del(dev, bo);
385    tu_dump_bo_del(dev, bo);
386 
387    struct kgsl_gpumem_free_id req = {
388       .id = bo->gem_handle
389    };
390 
391    /* Tell sparse array that entry is free */
392    memset(bo, 0, sizeof(*bo));
393 
394    safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
395 }
396 
397 static VkResult
get_kgsl_prop(int fd,unsigned int type,void * value,size_t size)398 get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
399 {
400    struct kgsl_device_getproperty getprop = {
401       .type = type,
402       .value = value,
403       .sizebytes = size,
404    };
405 
406    return safe_ioctl(fd, IOCTL_KGSL_DEVICE_GETPROPERTY, &getprop)
407              ? VK_ERROR_UNKNOWN
408              : VK_SUCCESS;
409 }
410 
411 static bool
kgsl_is_memory_type_supported(int fd,uint32_t flags)412 kgsl_is_memory_type_supported(int fd, uint32_t flags)
413 {
414    struct kgsl_gpumem_alloc_id req_alloc = {
415       .flags = flags,
416       .size = 0x1000,
417    };
418 
419    int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc);
420    if (ret) {
421       return false;
422    }
423 
424    struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id };
425 
426    safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free);
427 
428    return true;
429 }
430 
431 enum kgsl_syncobj_state {
432    KGSL_SYNCOBJ_STATE_UNSIGNALED,
433    KGSL_SYNCOBJ_STATE_SIGNALED,
434    KGSL_SYNCOBJ_STATE_TS,
435    KGSL_SYNCOBJ_STATE_FD,
436 };
437 
438 struct kgsl_syncobj
439 {
440    struct vk_object_base base;
441    enum kgsl_syncobj_state state;
442 
443    struct tu_queue *queue;
444    uint32_t timestamp;
445 
446    int fd;
447 };
448 
449 static void
kgsl_syncobj_init(struct kgsl_syncobj * s,bool signaled)450 kgsl_syncobj_init(struct kgsl_syncobj *s, bool signaled)
451 {
452    s->state =
453       signaled ? KGSL_SYNCOBJ_STATE_SIGNALED : KGSL_SYNCOBJ_STATE_UNSIGNALED;
454 
455    s->timestamp = UINT32_MAX;
456    s->fd = -1;
457 }
458 
459 static void
kgsl_syncobj_reset(struct kgsl_syncobj * s)460 kgsl_syncobj_reset(struct kgsl_syncobj *s)
461 {
462    if (s->state == KGSL_SYNCOBJ_STATE_FD && s->fd >= 0) {
463       ASSERTED int ret = close(s->fd);
464       assert(ret == 0);
465       s->fd = -1;
466    } else if (s->state == KGSL_SYNCOBJ_STATE_TS) {
467       s->timestamp = UINT32_MAX;
468    }
469 
470    s->state = KGSL_SYNCOBJ_STATE_UNSIGNALED;
471 }
472 
473 static void
kgsl_syncobj_destroy(struct kgsl_syncobj * s)474 kgsl_syncobj_destroy(struct kgsl_syncobj *s)
475 {
476    kgsl_syncobj_reset(s);
477 }
478 
479 static int
timestamp_to_fd(struct tu_queue * queue,uint32_t timestamp)480 timestamp_to_fd(struct tu_queue *queue, uint32_t timestamp)
481 {
482    int fd;
483    struct kgsl_timestamp_event event = {
484       .type = KGSL_TIMESTAMP_EVENT_FENCE,
485       .timestamp = timestamp,
486       .context_id = queue->msm_queue_id,
487       .priv = &fd,
488       .len = sizeof(fd),
489    };
490 
491    int ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_TIMESTAMP_EVENT, &event);
492    if (ret)
493       return -1;
494 
495    return fd;
496 }
497 
498 static int
kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj * syncobj)499 kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj *syncobj)
500 {
501    assert(syncobj->state == KGSL_SYNCOBJ_STATE_TS);
502    return timestamp_to_fd(syncobj->queue, syncobj->timestamp);
503 }
504 
505 /* return true if timestamp a is greater (more recent) then b
506  * this relies on timestamps never having a difference > (1<<31)
507  */
508 static inline bool
timestamp_cmp(uint32_t a,uint32_t b)509 timestamp_cmp(uint32_t a, uint32_t b)
510 {
511    return (int32_t) (a - b) >= 0;
512 }
513 
514 static uint32_t
max_ts(uint32_t a,uint32_t b)515 max_ts(uint32_t a, uint32_t b)
516 {
517    return timestamp_cmp(a, b) ? a : b;
518 }
519 
520 static uint32_t
min_ts(uint32_t a,uint32_t b)521 min_ts(uint32_t a, uint32_t b)
522 {
523    return timestamp_cmp(a, b) ? b : a;
524 }
525 
526 static int
get_relative_ms(uint64_t abs_timeout_ns)527 get_relative_ms(uint64_t abs_timeout_ns)
528 {
529    if (abs_timeout_ns >= INT64_MAX)
530       /* We can assume that a wait with a value this high is a forever wait
531        * and return -1 here as it's the infinite timeout for ppoll() while
532        * being the highest unsigned integer value for the wait KGSL IOCTL
533        */
534       return -1;
535 
536    uint64_t cur_time_ms = os_time_get_nano() / 1000000;
537    uint64_t abs_timeout_ms = abs_timeout_ns / 1000000;
538    if (abs_timeout_ms <= cur_time_ms)
539       return 0;
540 
541    return abs_timeout_ms - cur_time_ms;
542 }
543 
544 /* safe_ioctl is not enough as restarted waits would not adjust the timeout
545  * which could lead to waiting substantially longer than requested
546  */
547 static VkResult
wait_timestamp_safe(int fd,unsigned int context_id,unsigned int timestamp,uint64_t abs_timeout_ns)548 wait_timestamp_safe(int fd,
549                     unsigned int context_id,
550                     unsigned int timestamp,
551                     uint64_t abs_timeout_ns)
552 {
553    struct kgsl_device_waittimestamp_ctxtid wait = {
554       .context_id = context_id,
555       .timestamp = timestamp,
556       .timeout = get_relative_ms(abs_timeout_ns),
557    };
558 
559    while (true) {
560       int ret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
561 
562       if (ret == -1 && (errno == EINTR || errno == EAGAIN)) {
563          int timeout_ms = get_relative_ms(abs_timeout_ns);
564 
565          /* update timeout to consider time that has passed since the start */
566          if (timeout_ms == 0)
567             return VK_TIMEOUT;
568 
569          wait.timeout = timeout_ms;
570       } else if (ret == -1) {
571          assert(errno == ETIMEDOUT);
572          return VK_TIMEOUT;
573       } else {
574          return VK_SUCCESS;
575       }
576    }
577 }
578 
579 VkResult
kgsl_queue_wait_fence(struct tu_queue * queue,uint32_t fence,uint64_t timeout_ns)580 kgsl_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
581                       uint64_t timeout_ns)
582 {
583    uint64_t abs_timeout_ns = os_time_get_nano() + timeout_ns;
584 
585    return wait_timestamp_safe(queue->device->fd, queue->msm_queue_id,
586                               fence, abs_timeout_ns);
587 }
588 
589 static VkResult
kgsl_syncobj_wait(struct tu_device * device,struct kgsl_syncobj * s,uint64_t abs_timeout_ns)590 kgsl_syncobj_wait(struct tu_device *device,
591                   struct kgsl_syncobj *s,
592                   uint64_t abs_timeout_ns)
593 {
594    if (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
595       /* If this syncobj is unsignaled we need to wait for it to resolve to a
596        * valid syncobj prior to letting the rest of the wait continue, this
597        * avoids needing kernel support for wait-before-signal semantics.
598        */
599 
600       if (abs_timeout_ns == 0)
601          return VK_TIMEOUT; // If this is a simple poll then we can return early
602 
603       pthread_mutex_lock(&device->submit_mutex);
604       struct timespec abstime;
605       timespec_from_nsec(&abstime, abs_timeout_ns);
606 
607       while (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
608          int ret;
609          if (abs_timeout_ns == UINT64_MAX) {
610             ret = pthread_cond_wait(&device->timeline_cond,
611                                     &device->submit_mutex);
612          } else {
613             ret = pthread_cond_timedwait(&device->timeline_cond,
614                                          &device->submit_mutex, &abstime);
615          }
616          if (ret != 0) {
617             assert(ret == ETIMEDOUT);
618             pthread_mutex_unlock(&device->submit_mutex);
619             return VK_TIMEOUT;
620          }
621       }
622 
623       pthread_mutex_unlock(&device->submit_mutex);
624    }
625 
626    switch (s->state) {
627    case KGSL_SYNCOBJ_STATE_SIGNALED:
628       return VK_SUCCESS;
629 
630    case KGSL_SYNCOBJ_STATE_UNSIGNALED:
631       return VK_TIMEOUT;
632 
633    case KGSL_SYNCOBJ_STATE_TS: {
634       return wait_timestamp_safe(device->fd, s->queue->msm_queue_id,
635                                  s->timestamp, abs_timeout_ns);
636    }
637 
638    case KGSL_SYNCOBJ_STATE_FD: {
639       int ret = sync_wait(s->fd, get_relative_ms(abs_timeout_ns));
640       if (ret) {
641          assert(errno == ETIME);
642          return VK_TIMEOUT;
643       } else {
644          return VK_SUCCESS;
645       }
646    }
647 
648    default:
649       unreachable("invalid syncobj state");
650    }
651 }
652 
653 #define kgsl_syncobj_foreach_state(syncobjs, filter) \
654    for (uint32_t i = 0; sync = syncobjs[i], i < count; i++) \
655       if (sync->state == filter)
656 
657 static VkResult
kgsl_syncobj_wait_any(struct tu_device * device,struct kgsl_syncobj ** syncobjs,uint32_t count,uint64_t abs_timeout_ns)658 kgsl_syncobj_wait_any(struct tu_device* device, struct kgsl_syncobj **syncobjs, uint32_t count, uint64_t abs_timeout_ns)
659 {
660    if (count == 0)
661       return VK_TIMEOUT;
662    else if (count == 1)
663       return kgsl_syncobj_wait(device, syncobjs[0], abs_timeout_ns);
664 
665    uint32_t num_fds = 0;
666    struct tu_queue *queue = NULL;
667    struct kgsl_syncobj *sync = NULL;
668 
669    /* Simple case, we already have a signal one */
670    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_SIGNALED)
671       return VK_SUCCESS;
672 
673    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD)
674       num_fds++;
675 
676    /* If we have TS from different queues we cannot compare them and would
677     * have to convert them into FDs
678     */
679    bool convert_ts_to_fd = false;
680    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
681       if (queue != NULL && sync->queue != queue) {
682          convert_ts_to_fd = true;
683          break;
684       }
685       queue = sync->queue;
686    }
687 
688    /* If we have no FD nor TS syncobjs then we can return immediately */
689    if (num_fds == 0 && queue == NULL)
690       return VK_TIMEOUT;
691 
692    VkResult result = VK_TIMEOUT;
693 
694    struct u_vector poll_fds = { 0 };
695    uint32_t lowest_timestamp = 0;
696 
697    if (convert_ts_to_fd || num_fds > 0)
698       u_vector_init(&poll_fds, 4, sizeof(struct pollfd));
699 
700    if (convert_ts_to_fd) {
701       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
702          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
703          poll_fd->fd = timestamp_to_fd(sync->queue, sync->timestamp);
704          poll_fd->events = POLLIN;
705       }
706    } else {
707       /* TSs could be merged by finding the one with the lowest timestamp */
708       bool first_ts = true;
709       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
710          if (first_ts || timestamp_cmp(sync->timestamp, lowest_timestamp)) {
711             first_ts = false;
712             lowest_timestamp = sync->timestamp;
713          }
714       }
715 
716       if (num_fds) {
717          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
718          poll_fd->fd = timestamp_to_fd(queue, lowest_timestamp);
719          poll_fd->events = POLLIN;
720       }
721    }
722 
723    if (num_fds) {
724       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD) {
725          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
726          poll_fd->fd = sync->fd;
727          poll_fd->events = POLLIN;
728       }
729    }
730 
731    if (u_vector_length(&poll_fds) == 0) {
732       result = wait_timestamp_safe(device->fd, queue->msm_queue_id,
733                                    lowest_timestamp, MIN2(abs_timeout_ns, INT64_MAX));
734    } else {
735       int ret, i;
736 
737       struct pollfd *fds = (struct pollfd *) poll_fds.data;
738       uint32_t fds_count = u_vector_length(&poll_fds);
739       do {
740          ret = poll(fds, fds_count, get_relative_ms(abs_timeout_ns));
741          if (ret > 0) {
742             for (i = 0; i < fds_count; i++) {
743                if (fds[i].revents & (POLLERR | POLLNVAL)) {
744                   errno = EINVAL;
745                   ret = -1;
746                   break;
747                }
748             }
749             break;
750          } else if (ret == 0) {
751             errno = ETIME;
752             break;
753          }
754       } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
755 
756       for (uint32_t i = 0; i < fds_count - num_fds; i++)
757          close(fds[i].fd);
758 
759       if (ret != 0) {
760          assert(errno == ETIME);
761          result = VK_TIMEOUT;
762       } else {
763          result = VK_SUCCESS;
764       }
765    }
766 
767    u_vector_finish(&poll_fds);
768    return result;
769 }
770 
771 static VkResult
kgsl_syncobj_export(struct kgsl_syncobj * s,int * pFd)772 kgsl_syncobj_export(struct kgsl_syncobj *s, int *pFd)
773 {
774    if (!pFd)
775       return VK_SUCCESS;
776 
777    switch (s->state) {
778    case KGSL_SYNCOBJ_STATE_SIGNALED:
779    case KGSL_SYNCOBJ_STATE_UNSIGNALED:
780       /* Getting a sync FD from an unsignaled syncobj is UB in Vulkan */
781       *pFd = -1;
782       return VK_SUCCESS;
783 
784    case KGSL_SYNCOBJ_STATE_FD:
785       if (s->fd < 0)
786          *pFd = -1;
787       else
788          *pFd = dup(s->fd);
789       return VK_SUCCESS;
790 
791    case KGSL_SYNCOBJ_STATE_TS:
792       *pFd = kgsl_syncobj_ts_to_fd(s);
793       return VK_SUCCESS;
794 
795    default:
796       unreachable("Invalid syncobj state");
797    }
798 }
799 
800 static VkResult
kgsl_syncobj_import(struct kgsl_syncobj * s,int fd)801 kgsl_syncobj_import(struct kgsl_syncobj *s, int fd)
802 {
803    kgsl_syncobj_reset(s);
804    if (fd >= 0) {
805       s->state = KGSL_SYNCOBJ_STATE_FD;
806       s->fd = fd;
807    } else {
808       s->state = KGSL_SYNCOBJ_STATE_SIGNALED;
809    }
810 
811    return VK_SUCCESS;
812 }
813 
814 static int
sync_merge_close(const char * name,int fd1,int fd2,bool close_fd2)815 sync_merge_close(const char *name, int fd1, int fd2, bool close_fd2)
816 {
817    int fd = sync_merge(name, fd1, fd2);
818    if (fd < 0)
819       return -1;
820 
821    close(fd1);
822    if (close_fd2)
823       close(fd2);
824 
825    return fd;
826 }
827 
828 /* Merges multiple kgsl_syncobjs into a single one which is only signalled
829  * after all submitted syncobjs are signalled
830  */
831 static struct kgsl_syncobj
kgsl_syncobj_merge(const struct kgsl_syncobj ** syncobjs,uint32_t count)832 kgsl_syncobj_merge(const struct kgsl_syncobj **syncobjs, uint32_t count)
833 {
834    struct kgsl_syncobj ret;
835    kgsl_syncobj_init(&ret, true);
836 
837    if (count == 0)
838       return ret;
839 
840    for (uint32_t i = 0; i < count; ++i) {
841       const struct kgsl_syncobj *sync = syncobjs[i];
842 
843       switch (sync->state) {
844       case KGSL_SYNCOBJ_STATE_SIGNALED:
845          break;
846 
847       case KGSL_SYNCOBJ_STATE_UNSIGNALED:
848          kgsl_syncobj_reset(&ret);
849          return ret;
850 
851       case KGSL_SYNCOBJ_STATE_TS:
852          if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
853             if (ret.queue == sync->queue) {
854                ret.timestamp = max_ts(ret.timestamp, sync->timestamp);
855             } else {
856                ret.state = KGSL_SYNCOBJ_STATE_FD;
857                int sync_fd = kgsl_syncobj_ts_to_fd(sync);
858                ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
859                assert(ret.fd >= 0);
860             }
861          } else if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
862             int sync_fd = kgsl_syncobj_ts_to_fd(sync);
863             ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
864             assert(ret.fd >= 0);
865          } else {
866             ret = *sync;
867          }
868          break;
869 
870       case KGSL_SYNCOBJ_STATE_FD:
871          if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
872             ret.fd = sync_merge_close("tu_sync", ret.fd, sync->fd, false);
873             assert(ret.fd >= 0);
874          } else if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
875             ret.state = KGSL_SYNCOBJ_STATE_FD;
876             int sync_fd = kgsl_syncobj_ts_to_fd(sync);
877             ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
878             assert(ret.fd >= 0);
879          } else {
880             ret = *sync;
881             ret.fd = dup(ret.fd);
882             assert(ret.fd >= 0);
883          }
884          break;
885 
886       default:
887          unreachable("invalid syncobj state");
888       }
889    }
890 
891    return ret;
892 }
893 
894 struct vk_kgsl_syncobj
895 {
896    struct vk_sync vk;
897    struct kgsl_syncobj syncobj;
898 };
899 
900 static VkResult
vk_kgsl_sync_init(struct vk_device * device,struct vk_sync * sync,uint64_t initial_value)901 vk_kgsl_sync_init(struct vk_device *device,
902                   struct vk_sync *sync,
903                   uint64_t initial_value)
904 {
905    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
906    kgsl_syncobj_init(&s->syncobj, initial_value != 0);
907    return VK_SUCCESS;
908 }
909 
910 static void
vk_kgsl_sync_finish(struct vk_device * device,struct vk_sync * sync)911 vk_kgsl_sync_finish(struct vk_device *device, struct vk_sync *sync)
912 {
913    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
914    kgsl_syncobj_destroy(&s->syncobj);
915 }
916 
917 static VkResult
vk_kgsl_sync_reset(struct vk_device * device,struct vk_sync * sync)918 vk_kgsl_sync_reset(struct vk_device *device, struct vk_sync *sync)
919 {
920    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
921    kgsl_syncobj_reset(&s->syncobj);
922    return VK_SUCCESS;
923 }
924 
925 static VkResult
vk_kgsl_sync_move(struct vk_device * device,struct vk_sync * dst,struct vk_sync * src)926 vk_kgsl_sync_move(struct vk_device *device,
927                   struct vk_sync *dst,
928                   struct vk_sync *src)
929 {
930    struct vk_kgsl_syncobj *d = container_of(dst, struct vk_kgsl_syncobj, vk);
931    struct vk_kgsl_syncobj *s = container_of(src, struct vk_kgsl_syncobj, vk);
932    kgsl_syncobj_reset(&d->syncobj);
933    d->syncobj = s->syncobj;
934    kgsl_syncobj_init(&s->syncobj, false);
935    return VK_SUCCESS;
936 }
937 
938 static VkResult
vk_kgsl_sync_wait(struct vk_device * _device,struct vk_sync * sync,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)939 vk_kgsl_sync_wait(struct vk_device *_device,
940                   struct vk_sync *sync,
941                   uint64_t wait_value,
942                   enum vk_sync_wait_flags wait_flags,
943                   uint64_t abs_timeout_ns)
944 {
945    struct tu_device *device = container_of(_device, struct tu_device, vk);
946    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
947 
948    if (wait_flags & VK_SYNC_WAIT_PENDING)
949       return VK_SUCCESS;
950 
951    return kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
952 }
953 
954 static VkResult
vk_kgsl_sync_wait_many(struct vk_device * _device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)955 vk_kgsl_sync_wait_many(struct vk_device *_device,
956                        uint32_t wait_count,
957                        const struct vk_sync_wait *waits,
958                        enum vk_sync_wait_flags wait_flags,
959                        uint64_t abs_timeout_ns)
960 {
961    struct tu_device *device = container_of(_device, struct tu_device, vk);
962 
963    if (wait_flags & VK_SYNC_WAIT_PENDING)
964       return VK_SUCCESS;
965 
966    if (wait_flags & VK_SYNC_WAIT_ANY) {
967       struct kgsl_syncobj *syncobjs[wait_count];
968       for (uint32_t i = 0; i < wait_count; i++) {
969          syncobjs[i] =
970             &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)->syncobj;
971       }
972 
973       return kgsl_syncobj_wait_any(device, syncobjs, wait_count,
974                                    abs_timeout_ns);
975    } else {
976       for (uint32_t i = 0; i < wait_count; i++) {
977          struct vk_kgsl_syncobj *s =
978             container_of(waits[i].sync, struct vk_kgsl_syncobj, vk);
979 
980          VkResult result =
981             kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
982          if (result != VK_SUCCESS)
983             return result;
984       }
985       return VK_SUCCESS;
986    }
987 }
988 
989 static VkResult
vk_kgsl_sync_import_sync_file(struct vk_device * device,struct vk_sync * sync,int fd)990 vk_kgsl_sync_import_sync_file(struct vk_device *device,
991                               struct vk_sync *sync,
992                               int fd)
993 {
994    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
995    if (fd >= 0) {
996       fd = dup(fd);
997       if (fd < 0) {
998          mesa_loge("vk_kgsl_sync_import_sync_file: dup failed: %s",
999                    strerror(errno));
1000          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1001       }
1002    }
1003    return kgsl_syncobj_import(&s->syncobj, fd);
1004 }
1005 
1006 static VkResult
vk_kgsl_sync_export_sync_file(struct vk_device * device,struct vk_sync * sync,int * pFd)1007 vk_kgsl_sync_export_sync_file(struct vk_device *device,
1008                               struct vk_sync *sync,
1009                               int *pFd)
1010 {
1011    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
1012    return kgsl_syncobj_export(&s->syncobj, pFd);
1013 }
1014 
1015 const struct vk_sync_type vk_kgsl_sync_type = {
1016    .size = sizeof(struct vk_kgsl_syncobj),
1017    .features = (enum vk_sync_features)
1018                (VK_SYNC_FEATURE_BINARY |
1019                 VK_SYNC_FEATURE_GPU_WAIT |
1020                 VK_SYNC_FEATURE_GPU_MULTI_WAIT |
1021                 VK_SYNC_FEATURE_CPU_WAIT |
1022                 VK_SYNC_FEATURE_CPU_RESET |
1023                 VK_SYNC_FEATURE_WAIT_ANY |
1024                 VK_SYNC_FEATURE_WAIT_PENDING),
1025    .init = vk_kgsl_sync_init,
1026    .finish = vk_kgsl_sync_finish,
1027    .reset = vk_kgsl_sync_reset,
1028    .move = vk_kgsl_sync_move,
1029    .wait = vk_kgsl_sync_wait,
1030    .wait_many = vk_kgsl_sync_wait_many,
1031    .import_sync_file = vk_kgsl_sync_import_sync_file,
1032    .export_sync_file = vk_kgsl_sync_export_sync_file,
1033 };
1034 
1035 struct tu_kgsl_queue_submit {
1036    struct util_dynarray commands;
1037 };
1038 
1039 static void *
kgsl_submit_create(struct tu_device * device)1040 kgsl_submit_create(struct tu_device *device)
1041 {
1042    return vk_zalloc(&device->vk.alloc, sizeof(struct tu_kgsl_queue_submit), 8,
1043                     VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1044 }
1045 
1046 static void
kgsl_submit_finish(struct tu_device * device,void * _submit)1047 kgsl_submit_finish(struct tu_device *device,
1048                    void *_submit)
1049 {
1050    struct tu_kgsl_queue_submit *submit =
1051       (struct tu_kgsl_queue_submit *)_submit;
1052 
1053    util_dynarray_fini(&submit->commands);
1054    vk_free(&device->vk.alloc, submit);
1055 }
1056 
1057 static void
kgsl_submit_add_entries(struct tu_device * device,void * _submit,struct tu_cs_entry * entries,unsigned num_entries)1058 kgsl_submit_add_entries(struct tu_device *device, void *_submit,
1059                         struct tu_cs_entry *entries, unsigned num_entries)
1060 {
1061    struct tu_kgsl_queue_submit *submit =
1062       (struct tu_kgsl_queue_submit *)_submit;
1063 
1064    struct kgsl_command_object *cmds = (struct kgsl_command_object *)
1065       util_dynarray_grow(&submit->commands, struct kgsl_command_object,
1066                       num_entries);
1067 
1068    for (unsigned i = 0; i < num_entries; i++) {
1069       cmds[i] = (struct kgsl_command_object) {
1070          .gpuaddr = entries[i].bo->iova + entries[i].offset,
1071          .size = entries[i].size,
1072          .flags = KGSL_CMDLIST_IB,
1073          .id = entries[i].bo->gem_handle,
1074       };
1075    }
1076 }
1077 
1078 static VkResult
kgsl_queue_submit(struct tu_queue * queue,void * _submit,struct vk_sync_wait * waits,uint32_t wait_count,struct vk_sync_signal * signals,uint32_t signal_count,struct tu_u_trace_submission_data * u_trace_submission_data)1079 kgsl_queue_submit(struct tu_queue *queue, void *_submit,
1080                   struct vk_sync_wait *waits, uint32_t wait_count,
1081                   struct vk_sync_signal *signals, uint32_t signal_count,
1082                   struct tu_u_trace_submission_data *u_trace_submission_data)
1083 {
1084    struct tu_kgsl_queue_submit *submit =
1085       (struct tu_kgsl_queue_submit *)_submit;
1086 
1087 #if HAVE_PERFETTO
1088    uint64_t start_ts = tu_perfetto_begin_submit();
1089 #endif
1090 
1091    if (submit->commands.size == 0) {
1092       const struct kgsl_syncobj *wait_semaphores[wait_count + 1];
1093       for (uint32_t i = 0; i < wait_count; i++) {
1094          wait_semaphores[i] = &container_of(waits[i].sync,
1095                                             struct vk_kgsl_syncobj, vk)
1096                                   ->syncobj;
1097       }
1098 
1099       struct kgsl_syncobj last_submit_sync;
1100       if (queue->fence >= 0)
1101          last_submit_sync = (struct kgsl_syncobj) {
1102             .state = KGSL_SYNCOBJ_STATE_TS,
1103             .queue = queue,
1104             .timestamp = queue->fence,
1105          };
1106       else
1107          last_submit_sync = (struct kgsl_syncobj) {
1108             .state = KGSL_SYNCOBJ_STATE_SIGNALED,
1109          };
1110 
1111       wait_semaphores[wait_count] = &last_submit_sync;
1112 
1113       struct kgsl_syncobj wait_sync =
1114          kgsl_syncobj_merge(wait_semaphores, wait_count + 1);
1115       assert(wait_sync.state !=
1116              KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1117 
1118       for (uint32_t i = 0; i < signal_count; i++) {
1119          struct kgsl_syncobj *signal_sync =
1120             &container_of(signals[i].sync, struct vk_kgsl_syncobj, vk)
1121                 ->syncobj;
1122 
1123          kgsl_syncobj_reset(signal_sync);
1124          *signal_sync = wait_sync;
1125       }
1126 
1127       return VK_SUCCESS;
1128    }
1129 
1130    VkResult result = VK_SUCCESS;
1131 
1132    if (u_trace_submission_data) {
1133       mtx_lock(&queue->device->kgsl_profiling_mutex);
1134       tu_suballoc_bo_alloc(&u_trace_submission_data->kgsl_timestamp_bo,
1135                            &queue->device->kgsl_profiling_suballoc,
1136                            sizeof(struct kgsl_cmdbatch_profiling_buffer), 4);
1137       mtx_unlock(&queue->device->kgsl_profiling_mutex);
1138    }
1139 
1140    uint32_t obj_count = 0;
1141    if (u_trace_submission_data)
1142       obj_count++;
1143 
1144    struct kgsl_command_object *objs = (struct kgsl_command_object *)
1145       vk_alloc(&queue->device->vk.alloc, sizeof(*objs) * obj_count,
1146                alignof(*objs), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1147 
1148    struct kgsl_cmdbatch_profiling_buffer *profiling_buffer = NULL;
1149    uint32_t obj_idx = 0;
1150    if (u_trace_submission_data) {
1151       struct tu_suballoc_bo *bo = &u_trace_submission_data->kgsl_timestamp_bo;
1152 
1153       objs[obj_idx++] = (struct kgsl_command_object) {
1154          .offset = bo->iova - bo->bo->iova,
1155          .gpuaddr = bo->bo->iova,
1156          .size = sizeof(struct kgsl_cmdbatch_profiling_buffer),
1157          .flags = KGSL_OBJLIST_MEMOBJ | KGSL_OBJLIST_PROFILE,
1158          .id = bo->bo->gem_handle,
1159       };
1160       profiling_buffer =
1161          (struct kgsl_cmdbatch_profiling_buffer *) tu_suballoc_bo_map(bo);
1162       memset(profiling_buffer, 0, sizeof(*profiling_buffer));
1163    }
1164 
1165    const struct kgsl_syncobj *wait_semaphores[wait_count];
1166    for (uint32_t i = 0; i < wait_count; i++) {
1167       wait_semaphores[i] =
1168          &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)
1169              ->syncobj;
1170    }
1171 
1172    struct kgsl_syncobj wait_sync =
1173       kgsl_syncobj_merge(wait_semaphores, wait_count);
1174    assert(wait_sync.state !=
1175           KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1176 
1177    struct kgsl_cmd_syncpoint_timestamp ts;
1178    struct kgsl_cmd_syncpoint_fence fn;
1179    struct kgsl_command_syncpoint sync = { 0 };
1180    bool has_sync = false;
1181    switch (wait_sync.state) {
1182    case KGSL_SYNCOBJ_STATE_SIGNALED:
1183       break;
1184 
1185    case KGSL_SYNCOBJ_STATE_TS:
1186       ts.context_id = wait_sync.queue->msm_queue_id;
1187       ts.timestamp = wait_sync.timestamp;
1188 
1189       has_sync = true;
1190       sync.type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP;
1191       sync.priv = (uintptr_t) &ts;
1192       sync.size = sizeof(ts);
1193       break;
1194 
1195    case KGSL_SYNCOBJ_STATE_FD:
1196       fn.fd = wait_sync.fd;
1197 
1198       has_sync = true;
1199       sync.type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
1200       sync.priv = (uintptr_t) &fn;
1201       sync.size = sizeof(fn);
1202       break;
1203 
1204    default:
1205       unreachable("invalid syncobj state");
1206    }
1207 
1208    struct kgsl_gpu_command req = {
1209       .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
1210       .cmdlist = (uintptr_t) submit->commands.data,
1211       .cmdsize = sizeof(struct kgsl_command_object),
1212       .numcmds = util_dynarray_num_elements(&submit->commands,
1213                                             struct kgsl_command_object),
1214       .synclist = (uintptr_t) &sync,
1215       .syncsize = sizeof(sync),
1216       .numsyncs = has_sync != 0 ? 1 : 0,
1217       .context_id = queue->msm_queue_id,
1218    };
1219 
1220    if (obj_idx) {
1221       req.flags |= KGSL_CMDBATCH_PROFILING;
1222       req.objlist = (uintptr_t) objs;
1223       req.objsize = sizeof(struct kgsl_command_object);
1224       req.numobjs = obj_idx;
1225    }
1226 
1227    int ret = safe_ioctl(queue->device->physical_device->local_fd,
1228                         IOCTL_KGSL_GPU_COMMAND, &req);
1229 
1230    uint64_t gpu_offset = 0;
1231 #if HAVE_PERFETTO
1232    if (profiling_buffer) {
1233       /* We need to wait for KGSL to queue the GPU command before we can read
1234        * the timestamp. Since this is just for profiling and doesn't take too
1235        * long, we can just busy-wait for it.
1236        */
1237       while (p_atomic_read(&profiling_buffer->gpu_ticks_queued) == 0);
1238 
1239       struct kgsl_perfcounter_read_group perf = {
1240          .groupid = KGSL_PERFCOUNTER_GROUP_ALWAYSON,
1241          .countable = 0,
1242          .value = 0
1243       };
1244 
1245       struct kgsl_perfcounter_read req = {
1246          .reads = &perf,
1247          .count = 1,
1248       };
1249 
1250       ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_PERFCOUNTER_READ, &req);
1251       /* Older KGSL has some kind of garbage in upper 32 bits */
1252       uint64_t offseted_gpu_ts = perf.value & 0xffffffff;
1253 
1254       gpu_offset = tu_device_ticks_to_ns(
1255          queue->device, offseted_gpu_ts - profiling_buffer->gpu_ticks_queued);
1256 
1257       struct tu_perfetto_clocks clocks = {
1258          .cpu = profiling_buffer->wall_clock_ns,
1259          .gpu_ts = tu_device_ticks_to_ns(queue->device,
1260                                          profiling_buffer->gpu_ticks_queued),
1261          .gpu_ts_offset = gpu_offset,
1262       };
1263 
1264       clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
1265                                       start_ts, &clocks);
1266       gpu_offset = clocks.gpu_ts_offset;
1267    }
1268 #endif
1269 
1270    kgsl_syncobj_destroy(&wait_sync);
1271 
1272    if (ret) {
1273       result = vk_device_set_lost(&queue->device->vk, "submit failed: %s\n",
1274                                   strerror(errno));
1275       goto fail_submit;
1276    }
1277 
1278    p_atomic_set(&queue->fence, req.timestamp);
1279 
1280    for (uint32_t i = 0; i < signal_count; i++) {
1281       struct kgsl_syncobj *signal_sync =
1282          &container_of(signals[i].sync, struct vk_kgsl_syncobj, vk)
1283              ->syncobj;
1284 
1285       kgsl_syncobj_reset(signal_sync);
1286       signal_sync->state = KGSL_SYNCOBJ_STATE_TS;
1287       signal_sync->queue = queue;
1288       signal_sync->timestamp = req.timestamp;
1289    }
1290 
1291    if (u_trace_submission_data) {
1292       struct tu_u_trace_submission_data *submission_data =
1293          u_trace_submission_data;
1294       submission_data->gpu_ts_offset = gpu_offset;
1295    }
1296 
1297 fail_submit:
1298    if (result != VK_SUCCESS) {
1299       mtx_lock(&queue->device->kgsl_profiling_mutex);
1300       tu_suballoc_bo_free(&queue->device->kgsl_profiling_suballoc,
1301                           &u_trace_submission_data->kgsl_timestamp_bo);
1302       mtx_unlock(&queue->device->kgsl_profiling_mutex);
1303    }
1304 
1305    return result;
1306 }
1307 
1308 static VkResult
kgsl_device_init(struct tu_device * dev)1309 kgsl_device_init(struct tu_device *dev)
1310 {
1311    dev->fd = dev->physical_device->local_fd;
1312    return VK_SUCCESS;
1313 }
1314 
1315 static void
kgsl_device_finish(struct tu_device * dev)1316 kgsl_device_finish(struct tu_device *dev)
1317 {
1318    /* No-op */
1319 }
1320 
1321 static int
kgsl_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)1322 kgsl_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
1323 {
1324    unreachable("");
1325    return 0;
1326 }
1327 
1328 static int
kgsl_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)1329 kgsl_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
1330 {
1331    /* kgsl doesn't have a way to get it */
1332    *suspend_count = 0;
1333    return 0;
1334 }
1335 
1336 static VkResult
kgsl_device_check_status(struct tu_device * device)1337 kgsl_device_check_status(struct tu_device *device)
1338 {
1339    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1340       for (unsigned q = 0; q < device->queue_count[i]; q++) {
1341          /* KGSL's KGSL_PROP_GPU_RESET_STAT takes the u32 msm_queue_id and returns a
1342          * KGSL_CTX_STAT_* for the worst reset that happened since the last time it
1343          * was queried on that queue.
1344          */
1345          uint32_t value = device->queues[i][q].msm_queue_id;
1346          VkResult status = get_kgsl_prop(device->fd, KGSL_PROP_GPU_RESET_STAT,
1347                                        &value, sizeof(value));
1348          if (status != VK_SUCCESS)
1349             return vk_device_set_lost(&device->vk, "Failed to get GPU reset status");
1350 
1351          if (value != KGSL_CTX_STAT_NO_ERROR &&
1352             value != KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT) {
1353             return vk_device_set_lost(&device->vk, "GPU faulted or hung");
1354          }
1355       }
1356    }
1357 
1358    return VK_SUCCESS;
1359 }
1360 
1361 static const struct tu_knl kgsl_knl_funcs = {
1362       .name = "kgsl",
1363 
1364       .device_init = kgsl_device_init,
1365       .device_finish = kgsl_device_finish,
1366       .device_get_gpu_timestamp = kgsl_device_get_gpu_timestamp,
1367       .device_get_suspend_count = kgsl_device_get_suspend_count,
1368       .device_check_status = kgsl_device_check_status,
1369       .submitqueue_new = kgsl_submitqueue_new,
1370       .submitqueue_close = kgsl_submitqueue_close,
1371       .bo_init = kgsl_bo_init,
1372       .bo_init_dmabuf = kgsl_bo_init_dmabuf,
1373       .bo_export_dmabuf = kgsl_bo_export_dmabuf,
1374       .bo_map = kgsl_bo_map,
1375       .bo_allow_dump = kgsl_bo_allow_dump,
1376       .bo_finish = kgsl_bo_finish,
1377       .submit_create = kgsl_submit_create,
1378       .submit_finish = kgsl_submit_finish,
1379       .submit_add_entries = kgsl_submit_add_entries,
1380       .queue_submit = kgsl_queue_submit,
1381       .queue_wait_fence = kgsl_queue_wait_fence,
1382 };
1383 
1384 VkResult
tu_knl_kgsl_load(struct tu_instance * instance,int fd)1385 tu_knl_kgsl_load(struct tu_instance *instance, int fd)
1386 {
1387    if (instance->vk.enabled_extensions.KHR_display) {
1388       return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1389                        "I can't KHR_display");
1390    }
1391 
1392    struct tu_physical_device *device = (struct tu_physical_device *)
1393       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1394                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1395    if (!device) {
1396       close(fd);
1397       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1398    }
1399 
1400    static const char dma_heap_path[] = "/dev/dma_heap/system";
1401    static const char ion_path[] = "/dev/ion";
1402    int dma_fd;
1403 
1404    dma_fd = open(dma_heap_path, O_RDONLY);
1405    if (dma_fd >= 0) {
1406       device->kgsl_dma_type = TU_KGSL_DMA_TYPE_DMAHEAP;
1407    } else {
1408       dma_fd = open(ion_path, O_RDONLY);
1409       if (dma_fd >= 0) {
1410          /* ION_IOC_FREE available only for legacy ION */
1411          struct ion_handle_data free = { .handle = 0 };
1412          if (safe_ioctl(dma_fd, ION_IOC_FREE, &free) >= 0 || errno != ENOTTY)
1413             device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION_LEGACY;
1414          else
1415             device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION;
1416       } else {
1417          mesa_logw(
1418             "Unable to open neither %s nor %s, VK_KHR_external_memory_fd would be "
1419             "unavailable: %s",
1420             dma_heap_path, ion_path, strerror(errno));
1421       }
1422    }
1423 
1424    VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1425 
1426    struct kgsl_devinfo info;
1427    if (get_kgsl_prop(fd, KGSL_PROP_DEVICE_INFO, &info, sizeof(info)))
1428       goto fail;
1429 
1430    uint64_t gmem_iova;
1431    if (get_kgsl_prop(fd, KGSL_PROP_UCHE_GMEM_VADDR, &gmem_iova, sizeof(gmem_iova)))
1432       goto fail;
1433 
1434    uint32_t highest_bank_bit;
1435    if (get_kgsl_prop(fd, KGSL_PROP_HIGHEST_BANK_BIT, &highest_bank_bit,
1436                      sizeof(highest_bank_bit)))
1437       goto fail;
1438 
1439    uint32_t ubwc_version;
1440    if (get_kgsl_prop(fd, KGSL_PROP_UBWC_MODE, &ubwc_version,
1441                      sizeof(ubwc_version)))
1442       goto fail;
1443 
1444 
1445    /* kgsl version check? */
1446 
1447    device->instance = instance;
1448    device->master_fd = -1;
1449    device->local_fd = fd;
1450    device->kgsl_dma_fd = dma_fd;
1451 
1452    device->dev_id.gpu_id =
1453       ((info.chip_id >> 24) & 0xff) * 100 +
1454       ((info.chip_id >> 16) & 0xff) * 10 +
1455       ((info.chip_id >>  8) & 0xff);
1456    device->dev_id.chip_id = info.chip_id;
1457    device->gmem_size = debug_get_num_option("TU_GMEM", info.gmem_sizebytes);
1458    device->gmem_base = gmem_iova;
1459 
1460    device->submitqueue_priority_count = 1;
1461 
1462    device->timeline_type = vk_sync_timeline_get_type(&vk_kgsl_sync_type);
1463 
1464    device->sync_types[0] = &vk_kgsl_sync_type;
1465    device->sync_types[1] = &device->timeline_type.sync;
1466    device->sync_types[2] = NULL;
1467 
1468    device->heap.size = tu_get_system_heap_size(device);
1469    device->heap.used = 0u;
1470    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1471 
1472    device->has_set_iova = kgsl_is_memory_type_supported(
1473       fd, KGSL_MEMFLAGS_USE_CPU_MAP);
1474 
1475    /* Even if kernel is new enough, the GPU itself may not support it. */
1476    device->has_cached_coherent_memory = kgsl_is_memory_type_supported(
1477       fd, KGSL_MEMFLAGS_IOCOHERENT |
1478              (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT));
1479 
1480    /* preemption is always supported on kgsl */
1481    device->has_preemption = true;
1482 
1483    device->ubwc_config.highest_bank_bit = highest_bank_bit;
1484 
1485    /* The other config values can be partially inferred from the UBWC version,
1486     * but kgsl also hardcodes overrides for specific a6xx versions that we
1487     * have to follow here. Yuck.
1488     */
1489    switch (ubwc_version) {
1490    case KGSL_UBWC_1_0:
1491       device->ubwc_config.bank_swizzle_levels = 0x7;
1492       device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1493       break;
1494    case KGSL_UBWC_2_0:
1495       device->ubwc_config.bank_swizzle_levels = 0x6;
1496       device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1497       break;
1498    case KGSL_UBWC_3_0:
1499       device->ubwc_config.bank_swizzle_levels = 0x6;
1500       device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1501       break;
1502    case KGSL_UBWC_4_0:
1503       device->ubwc_config.bank_swizzle_levels = 0x6;
1504       device->ubwc_config.macrotile_mode = FDL_MACROTILE_8_CHANNEL;
1505       break;
1506    default:
1507       return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1508                        "unknown UBWC version 0x%x", ubwc_version);
1509    }
1510 
1511    /* kgsl unfortunately hardcodes some settings for certain GPUs and doesn't
1512     * expose them in the uAPI so hardcode them here to match.
1513     */
1514    if (device->dev_id.gpu_id == 663 || device->dev_id.gpu_id == 680) {
1515       device->ubwc_config.macrotile_mode = FDL_MACROTILE_8_CHANNEL;
1516    }
1517    if (device->dev_id.gpu_id == 663) {
1518       /* level2_swizzling_dis = 1 */
1519       device->ubwc_config.bank_swizzle_levels = 0x4;
1520    }
1521 
1522    instance->knl = &kgsl_knl_funcs;
1523 
1524    result = tu_physical_device_init(device, instance);
1525    if (result != VK_SUCCESS)
1526       goto fail;
1527 
1528    list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
1529 
1530    return VK_SUCCESS;
1531 
1532 fail:
1533    vk_free(&instance->vk.alloc, device);
1534    close(fd);
1535    if (dma_fd >= 0)
1536       close(dma_fd);
1537    return result;
1538 }
1539