• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "tu_knl.h"
7 
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <poll.h>
11 #include <stdint.h>
12 #include <sys/ioctl.h>
13 #include <sys/mman.h>
14 #include <linux/dma-heap.h>
15 
16 #define __user
17 #include "msm_kgsl.h"
18 #include "ion/ion.h"
19 #include "ion/ion_4.19.h"
20 
21 #include "vk_util.h"
22 
23 #include "util/os_file.h"
24 #include "util/u_debug.h"
25 #include "util/u_vector.h"
26 #include "util/libsync.h"
27 #include "util/timespec.h"
28 
29 #include "tu_cmd_buffer.h"
30 #include "tu_cs.h"
31 #include "tu_device.h"
32 #include "tu_dynamic_rendering.h"
33 #include "tu_queue.h"
34 #include "tu_rmv.h"
35 
36 /* ION_HEAP(ION_SYSTEM_HEAP_ID) */
37 #define KGSL_ION_SYSTEM_HEAP_MASK (1u << 25)
38 
39 
40 static int
safe_ioctl(int fd,unsigned long request,void * arg)41 safe_ioctl(int fd, unsigned long request, void *arg)
42 {
43    int ret;
44 
45    do {
46       ret = ioctl(fd, request, arg);
47    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
48 
49    return ret;
50 }
51 
52 static int
kgsl_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)53 kgsl_submitqueue_new(struct tu_device *dev,
54                      int priority,
55                      uint32_t *queue_id)
56 {
57    struct kgsl_drawctxt_create req = {
58       .flags = KGSL_CONTEXT_SAVE_GMEM |
59               KGSL_CONTEXT_NO_GMEM_ALLOC |
60               KGSL_CONTEXT_PREAMBLE,
61    };
62 
63    int ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_CREATE, &req);
64    if (ret)
65       return ret;
66 
67    *queue_id = req.drawctxt_id;
68 
69    return 0;
70 }
71 
72 static void
kgsl_submitqueue_close(struct tu_device * dev,uint32_t queue_id)73 kgsl_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
74 {
75    struct kgsl_drawctxt_destroy req = {
76       .drawctxt_id = queue_id,
77    };
78 
79    safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req);
80 }
81 
82 static void kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo);
83 
84 static VkResult
bo_init_new_dmaheap(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)85 bo_init_new_dmaheap(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
86                 enum tu_bo_alloc_flags flags)
87 {
88    struct dma_heap_allocation_data alloc = {
89       .len = size,
90       .fd_flags = O_RDWR | O_CLOEXEC,
91    };
92 
93    int ret;
94    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, DMA_HEAP_IOCTL_ALLOC,
95                     &alloc);
96 
97    if (ret) {
98       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
99                        "DMA_HEAP_IOCTL_ALLOC failed (%s)", strerror(errno));
100    }
101 
102    return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
103 }
104 
105 static VkResult
bo_init_new_ion(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)106 bo_init_new_ion(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
107                 enum tu_bo_alloc_flags flags)
108 {
109    struct ion_new_allocation_data alloc = {
110       .len = size,
111       .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
112       .flags = 0,
113       .fd = -1,
114    };
115 
116    int ret;
117    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_NEW_ALLOC, &alloc);
118    if (ret) {
119       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
120                        "ION_IOC_NEW_ALLOC failed (%s)", strerror(errno));
121    }
122 
123    return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
124 }
125 
126 static VkResult
bo_init_new_ion_legacy(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)127 bo_init_new_ion_legacy(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
128                        enum tu_bo_alloc_flags flags)
129 {
130    struct ion_allocation_data alloc = {
131       .len = size,
132       .align = 4096,
133       .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
134       .flags = 0,
135       .handle = -1,
136    };
137 
138    int ret;
139    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_ALLOC, &alloc);
140    if (ret) {
141       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
142                        "ION_IOC_ALLOC failed (%s)", strerror(errno));
143    }
144 
145    struct ion_fd_data share = {
146       .handle = alloc.handle,
147       .fd = -1,
148    };
149 
150    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_SHARE, &share);
151    if (ret) {
152       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
153                        "ION_IOC_SHARE failed (%s)", strerror(errno));
154    }
155 
156    struct ion_handle_data free = {
157       .handle = alloc.handle,
158    };
159    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_FREE, &free);
160    if (ret) {
161       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
162                        "ION_IOC_FREE failed (%s)", strerror(errno));
163    }
164 
165    return tu_bo_init_dmabuf(dev, out_bo, -1, share.fd);
166 }
167 
168 static VkResult
kgsl_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)169 kgsl_bo_init(struct tu_device *dev,
170              struct vk_object_base *base,
171              struct tu_bo **out_bo,
172              uint64_t size,
173              uint64_t client_iova,
174              VkMemoryPropertyFlags mem_property,
175              enum tu_bo_alloc_flags flags,
176              const char *name)
177 {
178    if (flags & TU_BO_ALLOC_SHAREABLE) {
179       /* The Vulkan spec doesn't forbid allocating exportable memory with a
180        * fixed address, only imported memory, but on kgsl we can't sensibly
181        * implement it so just always reject it.
182        */
183       if (client_iova) {
184          return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
185                           "cannot allocate an exportable BO with a fixed address");
186       }
187 
188       switch(dev->physical_device->kgsl_dma_type) {
189       case TU_KGSL_DMA_TYPE_DMAHEAP:
190          return bo_init_new_dmaheap(dev, out_bo, size, flags);
191       case TU_KGSL_DMA_TYPE_ION:
192          return bo_init_new_ion(dev, out_bo, size, flags);
193       case TU_KGSL_DMA_TYPE_ION_LEGACY:
194          return bo_init_new_ion_legacy(dev, out_bo, size, flags);
195       }
196    }
197 
198    struct kgsl_gpumem_alloc_id req = {
199       .size = size,
200    };
201 
202    if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
203       if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
204          req.flags |= KGSL_MEMFLAGS_IOCOHERENT;
205       }
206 
207       req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT;
208    } else {
209       req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT;
210    }
211 
212    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
213       req.flags |= KGSL_MEMFLAGS_GPUREADONLY;
214 
215    if (flags & TU_BO_ALLOC_REPLAYABLE)
216       req.flags |= KGSL_MEMFLAGS_USE_CPU_MAP;
217 
218    int ret;
219 
220    ret = safe_ioctl(dev->physical_device->local_fd,
221                     IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
222    if (ret) {
223       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
224                        "GPUMEM_ALLOC_ID failed (%s)", strerror(errno));
225    }
226 
227    struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
228    assert(bo && bo->gem_handle == 0);
229 
230    *bo = (struct tu_bo) {
231       .gem_handle = req.id,
232       .size = req.mmapsize,
233       .iova = req.gpuaddr,
234       .name = tu_debug_bos_add(dev, req.mmapsize, name),
235       .refcnt = 1,
236       .shared_fd = -1,
237       .base = base,
238    };
239 
240    if (flags & TU_BO_ALLOC_REPLAYABLE) {
241       uint64_t offset = req.id << 12;
242       void *map = mmap((void *)client_iova, bo->size, PROT_READ | PROT_WRITE,
243                        MAP_SHARED, dev->physical_device->local_fd, offset);
244       if (map == MAP_FAILED) {
245          kgsl_bo_finish(dev, bo);
246 
247          return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
248                           "mmap failed (%s)", strerror(errno));
249       }
250 
251       if (client_iova && (uint64_t)map != client_iova) {
252          kgsl_bo_finish(dev, bo);
253 
254          return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
255                           "mmap could not map the given address");
256       }
257 
258       bo->map = map;
259       bo->iova = (uint64_t)map;
260 
261       /* Because we're using SVM, the CPU mapping and GPU mapping are the same
262        * and the CPU mapping must stay fixed for the lifetime of the BO.
263        */
264       bo->never_unmap = true;
265    }
266 
267    tu_dump_bo_init(dev, bo);
268 
269    *out_bo = bo;
270 
271    TU_RMV(bo_allocate, dev, bo);
272    if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) {
273       TU_RMV(internal_resource_create, dev, bo);
274       TU_RMV(resource_name, dev, bo, name);
275    }
276 
277    return VK_SUCCESS;
278 }
279 
280 static VkResult
kgsl_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int fd)281 kgsl_bo_init_dmabuf(struct tu_device *dev,
282                     struct tu_bo **out_bo,
283                     uint64_t size,
284                     int fd)
285 {
286    struct kgsl_gpuobj_import_dma_buf import_dmabuf = {
287       .fd = fd,
288    };
289    struct kgsl_gpuobj_import req = {
290       .priv = (uintptr_t)&import_dmabuf,
291       .priv_len = sizeof(import_dmabuf),
292       .flags = 0,
293       .type = KGSL_USER_MEM_TYPE_DMABUF,
294    };
295    int ret;
296 
297    ret = safe_ioctl(dev->physical_device->local_fd,
298                     IOCTL_KGSL_GPUOBJ_IMPORT, &req);
299    if (ret)
300       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
301                        "Failed to import dma-buf (%s)\n", strerror(errno));
302 
303    struct kgsl_gpuobj_info info_req = {
304       .id = req.id,
305    };
306 
307    ret = safe_ioctl(dev->physical_device->local_fd,
308                     IOCTL_KGSL_GPUOBJ_INFO, &info_req);
309    if (ret)
310       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
311                        "Failed to get dma-buf info (%s)\n", strerror(errno));
312 
313    struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
314    assert(bo && bo->gem_handle == 0);
315 
316    *bo = (struct tu_bo) {
317       .gem_handle = req.id,
318       .size = info_req.size,
319       .iova = info_req.gpuaddr,
320       .name = tu_debug_bos_add(dev, info_req.size, "dmabuf"),
321       .refcnt = 1,
322       .shared_fd = os_dupfd_cloexec(fd),
323    };
324 
325    tu_dump_bo_init(dev, bo);
326 
327    *out_bo = bo;
328 
329    return VK_SUCCESS;
330 }
331 
332 static int
kgsl_bo_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)333 kgsl_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
334 {
335    assert(bo->shared_fd != -1);
336    return os_dupfd_cloexec(bo->shared_fd);
337 }
338 
339 static VkResult
kgsl_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)340 kgsl_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
341 {
342    void *map = MAP_FAILED;
343    if (bo->shared_fd == -1) {
344       uint64_t offset = bo->gem_handle << 12;
345       map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
346                  MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
347                  dev->physical_device->local_fd, offset);
348    } else {
349       map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
350                  MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
351                  bo->shared_fd, 0);
352    }
353 
354    if (map == MAP_FAILED)
355       return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
356 
357    bo->map = map;
358    TU_RMV(bo_map, dev, bo);
359 
360    return VK_SUCCESS;
361 }
362 
363 static void
kgsl_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)364 kgsl_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
365 {
366 }
367 
368 static void
kgsl_bo_finish(struct tu_device * dev,struct tu_bo * bo)369 kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo)
370 {
371    assert(bo->gem_handle);
372 
373    if (!p_atomic_dec_zero(&bo->refcnt))
374       return;
375 
376    if (bo->map) {
377       TU_RMV(bo_unmap, dev, bo);
378       munmap(bo->map, bo->size);
379    }
380 
381    if (bo->shared_fd != -1)
382       close(bo->shared_fd);
383 
384    TU_RMV(bo_destroy, dev, bo);
385    tu_debug_bos_del(dev, bo);
386    tu_dump_bo_del(dev, bo);
387 
388    struct kgsl_gpumem_free_id req = {
389       .id = bo->gem_handle
390    };
391 
392    /* Tell sparse array that entry is free */
393    memset(bo, 0, sizeof(*bo));
394 
395    safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
396 }
397 
398 static VkResult
get_kgsl_prop(int fd,unsigned int type,void * value,size_t size)399 get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
400 {
401    struct kgsl_device_getproperty getprop = {
402       .type = type,
403       .value = value,
404       .sizebytes = size,
405    };
406 
407    return safe_ioctl(fd, IOCTL_KGSL_DEVICE_GETPROPERTY, &getprop)
408              ? VK_ERROR_UNKNOWN
409              : VK_SUCCESS;
410 }
411 
412 static bool
kgsl_is_memory_type_supported(int fd,uint32_t flags)413 kgsl_is_memory_type_supported(int fd, uint32_t flags)
414 {
415    struct kgsl_gpumem_alloc_id req_alloc = {
416       .flags = flags,
417       .size = 0x1000,
418    };
419 
420    int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc);
421    if (ret) {
422       return false;
423    }
424 
425    struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id };
426 
427    safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free);
428 
429    return true;
430 }
431 
432 enum kgsl_syncobj_state {
433    KGSL_SYNCOBJ_STATE_UNSIGNALED,
434    KGSL_SYNCOBJ_STATE_SIGNALED,
435    KGSL_SYNCOBJ_STATE_TS,
436    KGSL_SYNCOBJ_STATE_FD,
437 };
438 
439 struct kgsl_syncobj
440 {
441    struct vk_object_base base;
442    enum kgsl_syncobj_state state;
443 
444    struct tu_queue *queue;
445    uint32_t timestamp;
446 
447    int fd;
448 };
449 
450 static void
kgsl_syncobj_init(struct kgsl_syncobj * s,bool signaled)451 kgsl_syncobj_init(struct kgsl_syncobj *s, bool signaled)
452 {
453    s->state =
454       signaled ? KGSL_SYNCOBJ_STATE_SIGNALED : KGSL_SYNCOBJ_STATE_UNSIGNALED;
455 
456    s->timestamp = UINT32_MAX;
457    s->fd = -1;
458 }
459 
460 static void
kgsl_syncobj_reset(struct kgsl_syncobj * s)461 kgsl_syncobj_reset(struct kgsl_syncobj *s)
462 {
463    if (s->state == KGSL_SYNCOBJ_STATE_FD && s->fd >= 0) {
464       ASSERTED int ret = close(s->fd);
465       assert(ret == 0);
466       s->fd = -1;
467    } else if (s->state == KGSL_SYNCOBJ_STATE_TS) {
468       s->timestamp = UINT32_MAX;
469    }
470 
471    s->state = KGSL_SYNCOBJ_STATE_UNSIGNALED;
472 }
473 
474 static void
kgsl_syncobj_destroy(struct kgsl_syncobj * s)475 kgsl_syncobj_destroy(struct kgsl_syncobj *s)
476 {
477    kgsl_syncobj_reset(s);
478 }
479 
480 static int
timestamp_to_fd(struct tu_queue * queue,uint32_t timestamp)481 timestamp_to_fd(struct tu_queue *queue, uint32_t timestamp)
482 {
483    int fd;
484    struct kgsl_timestamp_event event = {
485       .type = KGSL_TIMESTAMP_EVENT_FENCE,
486       .timestamp = timestamp,
487       .context_id = queue->msm_queue_id,
488       .priv = &fd,
489       .len = sizeof(fd),
490    };
491 
492    int ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_TIMESTAMP_EVENT, &event);
493    if (ret)
494       return -1;
495 
496    return fd;
497 }
498 
499 static int
kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj * syncobj)500 kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj *syncobj)
501 {
502    assert(syncobj->state == KGSL_SYNCOBJ_STATE_TS);
503    return timestamp_to_fd(syncobj->queue, syncobj->timestamp);
504 }
505 
506 /* return true if timestamp a is greater (more recent) then b
507  * this relies on timestamps never having a difference > (1<<31)
508  */
509 static inline bool
timestamp_cmp(uint32_t a,uint32_t b)510 timestamp_cmp(uint32_t a, uint32_t b)
511 {
512    return (int32_t) (a - b) >= 0;
513 }
514 
515 static uint32_t
max_ts(uint32_t a,uint32_t b)516 max_ts(uint32_t a, uint32_t b)
517 {
518    return timestamp_cmp(a, b) ? a : b;
519 }
520 
521 static uint32_t
min_ts(uint32_t a,uint32_t b)522 min_ts(uint32_t a, uint32_t b)
523 {
524    return timestamp_cmp(a, b) ? b : a;
525 }
526 
527 static int
get_relative_ms(uint64_t abs_timeout_ns)528 get_relative_ms(uint64_t abs_timeout_ns)
529 {
530    if (abs_timeout_ns >= INT64_MAX)
531       /* We can assume that a wait with a value this high is a forever wait
532        * and return -1 here as it's the infinite timeout for ppoll() while
533        * being the highest unsigned integer value for the wait KGSL IOCTL
534        */
535       return -1;
536 
537    uint64_t cur_time_ms = os_time_get_nano() / 1000000;
538    uint64_t abs_timeout_ms = abs_timeout_ns / 1000000;
539    if (abs_timeout_ms <= cur_time_ms)
540       return 0;
541 
542    return abs_timeout_ms - cur_time_ms;
543 }
544 
545 /* safe_ioctl is not enough as restarted waits would not adjust the timeout
546  * which could lead to waiting substantially longer than requested
547  */
548 static VkResult
wait_timestamp_safe(int fd,unsigned int context_id,unsigned int timestamp,uint64_t abs_timeout_ns)549 wait_timestamp_safe(int fd,
550                     unsigned int context_id,
551                     unsigned int timestamp,
552                     uint64_t abs_timeout_ns)
553 {
554    struct kgsl_device_waittimestamp_ctxtid wait = {
555       .context_id = context_id,
556       .timestamp = timestamp,
557       .timeout = get_relative_ms(abs_timeout_ns),
558    };
559 
560    while (true) {
561       int ret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
562 
563       if (ret == -1 && (errno == EINTR || errno == EAGAIN)) {
564          int timeout_ms = get_relative_ms(abs_timeout_ns);
565 
566          /* update timeout to consider time that has passed since the start */
567          if (timeout_ms == 0)
568             return VK_TIMEOUT;
569 
570          wait.timeout = timeout_ms;
571       } else if (ret == -1) {
572          assert(errno == ETIMEDOUT);
573          return VK_TIMEOUT;
574       } else {
575          return VK_SUCCESS;
576       }
577    }
578 }
579 
580 VkResult
kgsl_queue_wait_fence(struct tu_queue * queue,uint32_t fence,uint64_t timeout_ns)581 kgsl_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
582                       uint64_t timeout_ns)
583 {
584    uint64_t abs_timeout_ns = os_time_get_nano() + timeout_ns;
585 
586    return wait_timestamp_safe(queue->device->fd, queue->msm_queue_id,
587                               fence, abs_timeout_ns);
588 }
589 
590 static VkResult
kgsl_syncobj_wait(struct tu_device * device,struct kgsl_syncobj * s,uint64_t abs_timeout_ns)591 kgsl_syncobj_wait(struct tu_device *device,
592                   struct kgsl_syncobj *s,
593                   uint64_t abs_timeout_ns)
594 {
595    if (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
596       /* If this syncobj is unsignaled we need to wait for it to resolve to a
597        * valid syncobj prior to letting the rest of the wait continue, this
598        * avoids needing kernel support for wait-before-signal semantics.
599        */
600 
601       if (abs_timeout_ns == 0)
602          return VK_TIMEOUT; // If this is a simple poll then we can return early
603 
604       pthread_mutex_lock(&device->submit_mutex);
605       struct timespec abstime;
606       timespec_from_nsec(&abstime, abs_timeout_ns);
607 
608       while (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
609          int ret;
610          if (abs_timeout_ns == UINT64_MAX) {
611             ret = pthread_cond_wait(&device->timeline_cond,
612                                     &device->submit_mutex);
613          } else {
614             ret = pthread_cond_timedwait(&device->timeline_cond,
615                                          &device->submit_mutex, &abstime);
616          }
617          if (ret != 0) {
618             assert(ret == ETIMEDOUT);
619             pthread_mutex_unlock(&device->submit_mutex);
620             return VK_TIMEOUT;
621          }
622       }
623 
624       pthread_mutex_unlock(&device->submit_mutex);
625    }
626 
627    switch (s->state) {
628    case KGSL_SYNCOBJ_STATE_SIGNALED:
629       return VK_SUCCESS;
630 
631    case KGSL_SYNCOBJ_STATE_UNSIGNALED:
632       return VK_TIMEOUT;
633 
634    case KGSL_SYNCOBJ_STATE_TS: {
635       return wait_timestamp_safe(device->fd, s->queue->msm_queue_id,
636                                  s->timestamp, abs_timeout_ns);
637    }
638 
639    case KGSL_SYNCOBJ_STATE_FD: {
640       int ret = sync_wait(s->fd, get_relative_ms(abs_timeout_ns));
641       if (ret) {
642          assert(errno == ETIME);
643          return VK_TIMEOUT;
644       } else {
645          return VK_SUCCESS;
646       }
647    }
648 
649    default:
650       unreachable("invalid syncobj state");
651    }
652 }
653 
654 #define kgsl_syncobj_foreach_state(syncobjs, filter) \
655    for (uint32_t i = 0; sync = syncobjs[i], i < count; i++) \
656       if (sync->state == filter)
657 
658 static VkResult
kgsl_syncobj_wait_any(struct tu_device * device,struct kgsl_syncobj ** syncobjs,uint32_t count,uint64_t abs_timeout_ns)659 kgsl_syncobj_wait_any(struct tu_device* device, struct kgsl_syncobj **syncobjs, uint32_t count, uint64_t abs_timeout_ns)
660 {
661    if (count == 0)
662       return VK_TIMEOUT;
663    else if (count == 1)
664       return kgsl_syncobj_wait(device, syncobjs[0], abs_timeout_ns);
665 
666    uint32_t num_fds = 0;
667    struct tu_queue *queue = NULL;
668    struct kgsl_syncobj *sync = NULL;
669 
670    /* Simple case, we already have a signal one */
671    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_SIGNALED)
672       return VK_SUCCESS;
673 
674    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD)
675       num_fds++;
676 
677    /* If we have TS from different queues we cannot compare them and would
678     * have to convert them into FDs
679     */
680    bool convert_ts_to_fd = false;
681    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
682       if (queue != NULL && sync->queue != queue) {
683          convert_ts_to_fd = true;
684          break;
685       }
686       queue = sync->queue;
687    }
688 
689    /* If we have no FD nor TS syncobjs then we can return immediately */
690    if (num_fds == 0 && queue == NULL)
691       return VK_TIMEOUT;
692 
693    VkResult result = VK_TIMEOUT;
694 
695    struct u_vector poll_fds = { 0 };
696    uint32_t lowest_timestamp = 0;
697 
698    if (convert_ts_to_fd || num_fds > 0)
699       u_vector_init(&poll_fds, 4, sizeof(struct pollfd));
700 
701    if (convert_ts_to_fd) {
702       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
703          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
704          poll_fd->fd = timestamp_to_fd(sync->queue, sync->timestamp);
705          poll_fd->events = POLLIN;
706       }
707    } else {
708       /* TSs could be merged by finding the one with the lowest timestamp */
709       bool first_ts = true;
710       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
711          if (first_ts || timestamp_cmp(sync->timestamp, lowest_timestamp)) {
712             first_ts = false;
713             lowest_timestamp = sync->timestamp;
714          }
715       }
716 
717       if (num_fds) {
718          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
719          poll_fd->fd = timestamp_to_fd(queue, lowest_timestamp);
720          poll_fd->events = POLLIN;
721       }
722    }
723 
724    if (num_fds) {
725       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD) {
726          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
727          poll_fd->fd = sync->fd;
728          poll_fd->events = POLLIN;
729       }
730    }
731 
732    if (u_vector_length(&poll_fds) == 0) {
733       result = wait_timestamp_safe(device->fd, queue->msm_queue_id,
734                                    lowest_timestamp, MIN2(abs_timeout_ns, INT64_MAX));
735    } else {
736       int ret, i;
737 
738       struct pollfd *fds = (struct pollfd *) poll_fds.data;
739       uint32_t fds_count = u_vector_length(&poll_fds);
740       do {
741          ret = poll(fds, fds_count, get_relative_ms(abs_timeout_ns));
742          if (ret > 0) {
743             for (i = 0; i < fds_count; i++) {
744                if (fds[i].revents & (POLLERR | POLLNVAL)) {
745                   errno = EINVAL;
746                   ret = -1;
747                   break;
748                }
749             }
750             break;
751          } else if (ret == 0) {
752             errno = ETIME;
753             break;
754          }
755       } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
756 
757       for (uint32_t i = 0; i < fds_count - num_fds; i++)
758          close(fds[i].fd);
759 
760       if (ret != 0) {
761          assert(errno == ETIME);
762          result = VK_TIMEOUT;
763       } else {
764          result = VK_SUCCESS;
765       }
766    }
767 
768    u_vector_finish(&poll_fds);
769    return result;
770 }
771 
772 static VkResult
kgsl_syncobj_export(struct kgsl_syncobj * s,int * pFd)773 kgsl_syncobj_export(struct kgsl_syncobj *s, int *pFd)
774 {
775    if (!pFd)
776       return VK_SUCCESS;
777 
778    switch (s->state) {
779    case KGSL_SYNCOBJ_STATE_SIGNALED:
780    case KGSL_SYNCOBJ_STATE_UNSIGNALED:
781       /* Getting a sync FD from an unsignaled syncobj is UB in Vulkan */
782       *pFd = -1;
783       return VK_SUCCESS;
784 
785    case KGSL_SYNCOBJ_STATE_FD:
786       if (s->fd < 0)
787          *pFd = -1;
788       else
789          *pFd = dup(s->fd);
790       return VK_SUCCESS;
791 
792    case KGSL_SYNCOBJ_STATE_TS:
793       *pFd = kgsl_syncobj_ts_to_fd(s);
794       return VK_SUCCESS;
795 
796    default:
797       unreachable("Invalid syncobj state");
798    }
799 }
800 
801 static VkResult
kgsl_syncobj_import(struct kgsl_syncobj * s,int fd)802 kgsl_syncobj_import(struct kgsl_syncobj *s, int fd)
803 {
804    kgsl_syncobj_reset(s);
805    if (fd >= 0) {
806       s->state = KGSL_SYNCOBJ_STATE_FD;
807       s->fd = fd;
808    } else {
809       s->state = KGSL_SYNCOBJ_STATE_SIGNALED;
810    }
811 
812    return VK_SUCCESS;
813 }
814 
815 static int
sync_merge_close(const char * name,int fd1,int fd2,bool close_fd2)816 sync_merge_close(const char *name, int fd1, int fd2, bool close_fd2)
817 {
818    int fd = sync_merge(name, fd1, fd2);
819    if (fd < 0)
820       return -1;
821 
822    close(fd1);
823    if (close_fd2)
824       close(fd2);
825 
826    return fd;
827 }
828 
829 /* Merges multiple kgsl_syncobjs into a single one which is only signalled
830  * after all submitted syncobjs are signalled
831  */
832 static struct kgsl_syncobj
kgsl_syncobj_merge(const struct kgsl_syncobj ** syncobjs,uint32_t count)833 kgsl_syncobj_merge(const struct kgsl_syncobj **syncobjs, uint32_t count)
834 {
835    struct kgsl_syncobj ret;
836    kgsl_syncobj_init(&ret, true);
837 
838    if (count == 0)
839       return ret;
840 
841    for (uint32_t i = 0; i < count; ++i) {
842       const struct kgsl_syncobj *sync = syncobjs[i];
843 
844       switch (sync->state) {
845       case KGSL_SYNCOBJ_STATE_SIGNALED:
846          break;
847 
848       case KGSL_SYNCOBJ_STATE_UNSIGNALED:
849          kgsl_syncobj_reset(&ret);
850          return ret;
851 
852       case KGSL_SYNCOBJ_STATE_TS:
853          if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
854             if (ret.queue == sync->queue) {
855                ret.timestamp = max_ts(ret.timestamp, sync->timestamp);
856             } else {
857                ret.state = KGSL_SYNCOBJ_STATE_FD;
858                int sync_fd = kgsl_syncobj_ts_to_fd(sync);
859                ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
860                assert(ret.fd >= 0);
861             }
862          } else if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
863             int sync_fd = kgsl_syncobj_ts_to_fd(sync);
864             ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
865             assert(ret.fd >= 0);
866          } else {
867             ret = *sync;
868          }
869          break;
870 
871       case KGSL_SYNCOBJ_STATE_FD:
872          if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
873             ret.fd = sync_merge_close("tu_sync", ret.fd, sync->fd, false);
874             assert(ret.fd >= 0);
875          } else if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
876             ret.state = KGSL_SYNCOBJ_STATE_FD;
877             int sync_fd = kgsl_syncobj_ts_to_fd(sync);
878             ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
879             assert(ret.fd >= 0);
880          } else {
881             ret = *sync;
882             ret.fd = dup(ret.fd);
883             assert(ret.fd >= 0);
884          }
885          break;
886 
887       default:
888          unreachable("invalid syncobj state");
889       }
890    }
891 
892    return ret;
893 }
894 
895 struct vk_kgsl_syncobj
896 {
897    struct vk_sync vk;
898    struct kgsl_syncobj syncobj;
899 };
900 
901 static VkResult
vk_kgsl_sync_init(struct vk_device * device,struct vk_sync * sync,uint64_t initial_value)902 vk_kgsl_sync_init(struct vk_device *device,
903                   struct vk_sync *sync,
904                   uint64_t initial_value)
905 {
906    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
907    kgsl_syncobj_init(&s->syncobj, initial_value != 0);
908    return VK_SUCCESS;
909 }
910 
911 static void
vk_kgsl_sync_finish(struct vk_device * device,struct vk_sync * sync)912 vk_kgsl_sync_finish(struct vk_device *device, struct vk_sync *sync)
913 {
914    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
915    kgsl_syncobj_destroy(&s->syncobj);
916 }
917 
918 static VkResult
vk_kgsl_sync_reset(struct vk_device * device,struct vk_sync * sync)919 vk_kgsl_sync_reset(struct vk_device *device, struct vk_sync *sync)
920 {
921    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
922    kgsl_syncobj_reset(&s->syncobj);
923    return VK_SUCCESS;
924 }
925 
926 static VkResult
vk_kgsl_sync_move(struct vk_device * device,struct vk_sync * dst,struct vk_sync * src)927 vk_kgsl_sync_move(struct vk_device *device,
928                   struct vk_sync *dst,
929                   struct vk_sync *src)
930 {
931    struct vk_kgsl_syncobj *d = container_of(dst, struct vk_kgsl_syncobj, vk);
932    struct vk_kgsl_syncobj *s = container_of(src, struct vk_kgsl_syncobj, vk);
933    kgsl_syncobj_reset(&d->syncobj);
934    d->syncobj = s->syncobj;
935    kgsl_syncobj_init(&s->syncobj, false);
936    return VK_SUCCESS;
937 }
938 
939 static VkResult
vk_kgsl_sync_wait(struct vk_device * _device,struct vk_sync * sync,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)940 vk_kgsl_sync_wait(struct vk_device *_device,
941                   struct vk_sync *sync,
942                   uint64_t wait_value,
943                   enum vk_sync_wait_flags wait_flags,
944                   uint64_t abs_timeout_ns)
945 {
946    struct tu_device *device = container_of(_device, struct tu_device, vk);
947    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
948 
949    if (wait_flags & VK_SYNC_WAIT_PENDING)
950       return VK_SUCCESS;
951 
952    return kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
953 }
954 
955 static VkResult
vk_kgsl_sync_wait_many(struct vk_device * _device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)956 vk_kgsl_sync_wait_many(struct vk_device *_device,
957                        uint32_t wait_count,
958                        const struct vk_sync_wait *waits,
959                        enum vk_sync_wait_flags wait_flags,
960                        uint64_t abs_timeout_ns)
961 {
962    struct tu_device *device = container_of(_device, struct tu_device, vk);
963 
964    if (wait_flags & VK_SYNC_WAIT_PENDING)
965       return VK_SUCCESS;
966 
967    if (wait_flags & VK_SYNC_WAIT_ANY) {
968       struct kgsl_syncobj *syncobjs[wait_count];
969       for (uint32_t i = 0; i < wait_count; i++) {
970          syncobjs[i] =
971             &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)->syncobj;
972       }
973 
974       return kgsl_syncobj_wait_any(device, syncobjs, wait_count,
975                                    abs_timeout_ns);
976    } else {
977       for (uint32_t i = 0; i < wait_count; i++) {
978          struct vk_kgsl_syncobj *s =
979             container_of(waits[i].sync, struct vk_kgsl_syncobj, vk);
980 
981          VkResult result =
982             kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
983          if (result != VK_SUCCESS)
984             return result;
985       }
986       return VK_SUCCESS;
987    }
988 }
989 
990 static VkResult
vk_kgsl_sync_import_sync_file(struct vk_device * device,struct vk_sync * sync,int fd)991 vk_kgsl_sync_import_sync_file(struct vk_device *device,
992                               struct vk_sync *sync,
993                               int fd)
994 {
995    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
996    if (fd >= 0) {
997       fd = dup(fd);
998       if (fd < 0) {
999          mesa_loge("vk_kgsl_sync_import_sync_file: dup failed: %s",
1000                    strerror(errno));
1001          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1002       }
1003    }
1004    return kgsl_syncobj_import(&s->syncobj, fd);
1005 }
1006 
1007 static VkResult
vk_kgsl_sync_export_sync_file(struct vk_device * device,struct vk_sync * sync,int * pFd)1008 vk_kgsl_sync_export_sync_file(struct vk_device *device,
1009                               struct vk_sync *sync,
1010                               int *pFd)
1011 {
1012    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
1013    return kgsl_syncobj_export(&s->syncobj, pFd);
1014 }
1015 
1016 const struct vk_sync_type vk_kgsl_sync_type = {
1017    .size = sizeof(struct vk_kgsl_syncobj),
1018    .features = (enum vk_sync_features)
1019                (VK_SYNC_FEATURE_BINARY |
1020                 VK_SYNC_FEATURE_GPU_WAIT |
1021                 VK_SYNC_FEATURE_GPU_MULTI_WAIT |
1022                 VK_SYNC_FEATURE_CPU_WAIT |
1023                 VK_SYNC_FEATURE_CPU_RESET |
1024                 VK_SYNC_FEATURE_WAIT_ANY |
1025                 VK_SYNC_FEATURE_WAIT_PENDING),
1026    .init = vk_kgsl_sync_init,
1027    .finish = vk_kgsl_sync_finish,
1028    .reset = vk_kgsl_sync_reset,
1029    .move = vk_kgsl_sync_move,
1030    .wait = vk_kgsl_sync_wait,
1031    .wait_many = vk_kgsl_sync_wait_many,
1032    .import_sync_file = vk_kgsl_sync_import_sync_file,
1033    .export_sync_file = vk_kgsl_sync_export_sync_file,
1034 };
1035 
1036 struct tu_kgsl_queue_submit {
1037    struct util_dynarray commands;
1038 };
1039 
1040 static void *
kgsl_submit_create(struct tu_device * device)1041 kgsl_submit_create(struct tu_device *device)
1042 {
1043    return vk_zalloc(&device->vk.alloc, sizeof(struct tu_kgsl_queue_submit), 8,
1044                     VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1045 }
1046 
1047 static void
kgsl_submit_finish(struct tu_device * device,void * _submit)1048 kgsl_submit_finish(struct tu_device *device,
1049                    void *_submit)
1050 {
1051    struct tu_kgsl_queue_submit *submit =
1052       (struct tu_kgsl_queue_submit *)_submit;
1053 
1054    util_dynarray_fini(&submit->commands);
1055    vk_free(&device->vk.alloc, submit);
1056 }
1057 
1058 static void
kgsl_submit_add_entries(struct tu_device * device,void * _submit,struct tu_cs_entry * entries,unsigned num_entries)1059 kgsl_submit_add_entries(struct tu_device *device, void *_submit,
1060                         struct tu_cs_entry *entries, unsigned num_entries)
1061 {
1062    struct tu_kgsl_queue_submit *submit =
1063       (struct tu_kgsl_queue_submit *)_submit;
1064 
1065    struct kgsl_command_object *cmds = (struct kgsl_command_object *)
1066       util_dynarray_grow(&submit->commands, struct kgsl_command_object,
1067                       num_entries);
1068 
1069    for (unsigned i = 0; i < num_entries; i++) {
1070       cmds[i] = (struct kgsl_command_object) {
1071          .gpuaddr = entries[i].bo->iova + entries[i].offset,
1072          .size = entries[i].size,
1073          .flags = KGSL_CMDLIST_IB,
1074          .id = entries[i].bo->gem_handle,
1075       };
1076    }
1077 }
1078 
1079 static VkResult
kgsl_queue_submit(struct tu_queue * queue,void * _submit,struct vk_sync_wait * waits,uint32_t wait_count,struct vk_sync_signal * signals,uint32_t signal_count,struct tu_u_trace_submission_data * u_trace_submission_data)1080 kgsl_queue_submit(struct tu_queue *queue, void *_submit,
1081                   struct vk_sync_wait *waits, uint32_t wait_count,
1082                   struct vk_sync_signal *signals, uint32_t signal_count,
1083                   struct tu_u_trace_submission_data *u_trace_submission_data)
1084 {
1085    struct tu_kgsl_queue_submit *submit =
1086       (struct tu_kgsl_queue_submit *)_submit;
1087 
1088 #if HAVE_PERFETTO
1089    uint64_t start_ts = tu_perfetto_begin_submit();
1090 #endif
1091 
1092    if (submit->commands.size == 0) {
1093       const struct kgsl_syncobj *wait_semaphores[wait_count + 1];
1094       for (uint32_t i = 0; i < wait_count; i++) {
1095          wait_semaphores[i] = &container_of(waits[i].sync,
1096                                             struct vk_kgsl_syncobj, vk)
1097                                   ->syncobj;
1098       }
1099 
1100       struct kgsl_syncobj last_submit_sync;
1101       if (queue->fence >= 0)
1102          last_submit_sync = (struct kgsl_syncobj) {
1103             .state = KGSL_SYNCOBJ_STATE_TS,
1104             .queue = queue,
1105             .timestamp = queue->fence,
1106          };
1107       else
1108          last_submit_sync = (struct kgsl_syncobj) {
1109             .state = KGSL_SYNCOBJ_STATE_SIGNALED,
1110          };
1111 
1112       wait_semaphores[wait_count] = &last_submit_sync;
1113 
1114       struct kgsl_syncobj wait_sync =
1115          kgsl_syncobj_merge(wait_semaphores, wait_count + 1);
1116       assert(wait_sync.state !=
1117              KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1118 
1119       for (uint32_t i = 0; i < signal_count; i++) {
1120          struct kgsl_syncobj *signal_sync =
1121             &container_of(signals[i].sync, struct vk_kgsl_syncobj, vk)
1122                 ->syncobj;
1123 
1124          kgsl_syncobj_reset(signal_sync);
1125          *signal_sync = wait_sync;
1126       }
1127 
1128       return VK_SUCCESS;
1129    }
1130 
1131    VkResult result = VK_SUCCESS;
1132 
1133    if (u_trace_submission_data) {
1134       mtx_lock(&queue->device->kgsl_profiling_mutex);
1135       tu_suballoc_bo_alloc(&u_trace_submission_data->kgsl_timestamp_bo,
1136                            &queue->device->kgsl_profiling_suballoc,
1137                            sizeof(struct kgsl_cmdbatch_profiling_buffer), 4);
1138       mtx_unlock(&queue->device->kgsl_profiling_mutex);
1139    }
1140 
1141    uint32_t obj_count = 0;
1142    if (u_trace_submission_data)
1143       obj_count++;
1144 
1145    struct kgsl_command_object *objs = (struct kgsl_command_object *)
1146       vk_alloc(&queue->device->vk.alloc, sizeof(*objs) * obj_count,
1147                alignof(*objs), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1148 
1149    struct kgsl_cmdbatch_profiling_buffer *profiling_buffer = NULL;
1150    uint32_t obj_idx = 0;
1151    if (u_trace_submission_data) {
1152       struct tu_suballoc_bo *bo = &u_trace_submission_data->kgsl_timestamp_bo;
1153 
1154       objs[obj_idx++] = (struct kgsl_command_object) {
1155          .offset = bo->iova - bo->bo->iova,
1156          .gpuaddr = bo->bo->iova,
1157          .size = sizeof(struct kgsl_cmdbatch_profiling_buffer),
1158          .flags = KGSL_OBJLIST_MEMOBJ | KGSL_OBJLIST_PROFILE,
1159          .id = bo->bo->gem_handle,
1160       };
1161       profiling_buffer =
1162          (struct kgsl_cmdbatch_profiling_buffer *) tu_suballoc_bo_map(bo);
1163       memset(profiling_buffer, 0, sizeof(*profiling_buffer));
1164    }
1165 
1166    const struct kgsl_syncobj *wait_semaphores[wait_count];
1167    for (uint32_t i = 0; i < wait_count; i++) {
1168       wait_semaphores[i] =
1169          &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)
1170              ->syncobj;
1171    }
1172 
1173    struct kgsl_syncobj wait_sync =
1174       kgsl_syncobj_merge(wait_semaphores, wait_count);
1175    assert(wait_sync.state !=
1176           KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1177 
1178    struct kgsl_cmd_syncpoint_timestamp ts;
1179    struct kgsl_cmd_syncpoint_fence fn;
1180    struct kgsl_command_syncpoint sync = { 0 };
1181    bool has_sync = false;
1182    switch (wait_sync.state) {
1183    case KGSL_SYNCOBJ_STATE_SIGNALED:
1184       break;
1185 
1186    case KGSL_SYNCOBJ_STATE_TS:
1187       ts.context_id = wait_sync.queue->msm_queue_id;
1188       ts.timestamp = wait_sync.timestamp;
1189 
1190       has_sync = true;
1191       sync.type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP;
1192       sync.priv = (uintptr_t) &ts;
1193       sync.size = sizeof(ts);
1194       break;
1195 
1196    case KGSL_SYNCOBJ_STATE_FD:
1197       fn.fd = wait_sync.fd;
1198 
1199       has_sync = true;
1200       sync.type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
1201       sync.priv = (uintptr_t) &fn;
1202       sync.size = sizeof(fn);
1203       break;
1204 
1205    default:
1206       unreachable("invalid syncobj state");
1207    }
1208 
1209    struct kgsl_gpu_command req = {
1210       .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
1211       .cmdlist = (uintptr_t) submit->commands.data,
1212       .cmdsize = sizeof(struct kgsl_command_object),
1213       .numcmds = util_dynarray_num_elements(&submit->commands,
1214                                             struct kgsl_command_object),
1215       .synclist = (uintptr_t) &sync,
1216       .syncsize = sizeof(sync),
1217       .numsyncs = has_sync != 0 ? 1 : 0,
1218       .context_id = queue->msm_queue_id,
1219    };
1220 
1221    if (obj_idx) {
1222       req.flags |= KGSL_CMDBATCH_PROFILING;
1223       req.objlist = (uintptr_t) objs;
1224       req.objsize = sizeof(struct kgsl_command_object);
1225       req.numobjs = obj_idx;
1226    }
1227 
1228    int ret = safe_ioctl(queue->device->physical_device->local_fd,
1229                         IOCTL_KGSL_GPU_COMMAND, &req);
1230 
1231    uint64_t gpu_offset = 0;
1232 #if HAVE_PERFETTO
1233    if (profiling_buffer) {
1234       /* We need to wait for KGSL to queue the GPU command before we can read
1235        * the timestamp. Since this is just for profiling and doesn't take too
1236        * long, we can just busy-wait for it.
1237        */
1238       while (p_atomic_read(&profiling_buffer->gpu_ticks_queued) == 0);
1239 
1240       struct kgsl_perfcounter_read_group perf = {
1241          .groupid = KGSL_PERFCOUNTER_GROUP_ALWAYSON,
1242          .countable = 0,
1243          .value = 0
1244       };
1245 
1246       struct kgsl_perfcounter_read req = {
1247          .reads = &perf,
1248          .count = 1,
1249       };
1250 
1251       ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_PERFCOUNTER_READ, &req);
1252       /* Older KGSL has some kind of garbage in upper 32 bits */
1253       uint64_t offseted_gpu_ts = perf.value & 0xffffffff;
1254 
1255       gpu_offset = tu_device_ticks_to_ns(
1256          queue->device, offseted_gpu_ts - profiling_buffer->gpu_ticks_queued);
1257 
1258       struct tu_perfetto_clocks clocks = {
1259          .cpu = profiling_buffer->wall_clock_ns,
1260          .gpu_ts = tu_device_ticks_to_ns(queue->device,
1261                                          profiling_buffer->gpu_ticks_queued),
1262          .gpu_ts_offset = gpu_offset,
1263       };
1264 
1265       clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
1266                                       start_ts, &clocks);
1267       gpu_offset = clocks.gpu_ts_offset;
1268    }
1269 #endif
1270 
1271    kgsl_syncobj_destroy(&wait_sync);
1272 
1273    if (ret) {
1274       result = vk_device_set_lost(&queue->device->vk, "submit failed: %s\n",
1275                                   strerror(errno));
1276       goto fail_submit;
1277    }
1278 
1279    p_atomic_set(&queue->fence, req.timestamp);
1280 
1281    for (uint32_t i = 0; i < signal_count; i++) {
1282       struct kgsl_syncobj *signal_sync =
1283          &container_of(signals[i].sync, struct vk_kgsl_syncobj, vk)
1284              ->syncobj;
1285 
1286       kgsl_syncobj_reset(signal_sync);
1287       signal_sync->state = KGSL_SYNCOBJ_STATE_TS;
1288       signal_sync->queue = queue;
1289       signal_sync->timestamp = req.timestamp;
1290    }
1291 
1292    if (u_trace_submission_data) {
1293       struct tu_u_trace_submission_data *submission_data =
1294          u_trace_submission_data;
1295       submission_data->gpu_ts_offset = gpu_offset;
1296    }
1297 
1298 fail_submit:
1299    if (result != VK_SUCCESS) {
1300       mtx_lock(&queue->device->kgsl_profiling_mutex);
1301       tu_suballoc_bo_free(&queue->device->kgsl_profiling_suballoc,
1302                           &u_trace_submission_data->kgsl_timestamp_bo);
1303       mtx_unlock(&queue->device->kgsl_profiling_mutex);
1304    }
1305 
1306    return result;
1307 }
1308 
1309 static VkResult
kgsl_device_init(struct tu_device * dev)1310 kgsl_device_init(struct tu_device *dev)
1311 {
1312    dev->fd = dev->physical_device->local_fd;
1313    return VK_SUCCESS;
1314 }
1315 
1316 static void
kgsl_device_finish(struct tu_device * dev)1317 kgsl_device_finish(struct tu_device *dev)
1318 {
1319    /* No-op */
1320 }
1321 
1322 static int
kgsl_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)1323 kgsl_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
1324 {
1325    unreachable("");
1326    return 0;
1327 }
1328 
1329 static int
kgsl_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)1330 kgsl_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
1331 {
1332    /* kgsl doesn't have a way to get it */
1333    *suspend_count = 0;
1334    return 0;
1335 }
1336 
1337 static VkResult
kgsl_device_check_status(struct tu_device * device)1338 kgsl_device_check_status(struct tu_device *device)
1339 {
1340    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1341       for (unsigned q = 0; q < device->queue_count[i]; q++) {
1342          /* KGSL's KGSL_PROP_GPU_RESET_STAT takes the u32 msm_queue_id and returns a
1343          * KGSL_CTX_STAT_* for the worst reset that happened since the last time it
1344          * was queried on that queue.
1345          */
1346          uint32_t value = device->queues[i][q].msm_queue_id;
1347          VkResult status = get_kgsl_prop(device->fd, KGSL_PROP_GPU_RESET_STAT,
1348                                        &value, sizeof(value));
1349          if (status != VK_SUCCESS)
1350             return vk_device_set_lost(&device->vk, "Failed to get GPU reset status");
1351 
1352          if (value != KGSL_CTX_STAT_NO_ERROR &&
1353             value != KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT) {
1354             return vk_device_set_lost(&device->vk, "GPU faulted or hung");
1355          }
1356       }
1357    }
1358 
1359    return VK_SUCCESS;
1360 }
1361 
1362 static const struct tu_knl kgsl_knl_funcs = {
1363       .name = "kgsl",
1364 
1365       .device_init = kgsl_device_init,
1366       .device_finish = kgsl_device_finish,
1367       .device_get_gpu_timestamp = kgsl_device_get_gpu_timestamp,
1368       .device_get_suspend_count = kgsl_device_get_suspend_count,
1369       .device_check_status = kgsl_device_check_status,
1370       .submitqueue_new = kgsl_submitqueue_new,
1371       .submitqueue_close = kgsl_submitqueue_close,
1372       .bo_init = kgsl_bo_init,
1373       .bo_init_dmabuf = kgsl_bo_init_dmabuf,
1374       .bo_export_dmabuf = kgsl_bo_export_dmabuf,
1375       .bo_map = kgsl_bo_map,
1376       .bo_allow_dump = kgsl_bo_allow_dump,
1377       .bo_finish = kgsl_bo_finish,
1378       .submit_create = kgsl_submit_create,
1379       .submit_finish = kgsl_submit_finish,
1380       .submit_add_entries = kgsl_submit_add_entries,
1381       .queue_submit = kgsl_queue_submit,
1382       .queue_wait_fence = kgsl_queue_wait_fence,
1383 };
1384 
1385 static bool
tu_kgsl_get_raytracing(int fd)1386 tu_kgsl_get_raytracing(int fd)
1387 {
1388    uint32_t value;
1389    int ret = get_kgsl_prop(fd, KGSL_PROP_IS_RAYTRACING_ENABLED, &value, sizeof(value));
1390    if (ret)
1391       return false;
1392 
1393    return value;
1394 }
1395 
1396 VkResult
tu_knl_kgsl_load(struct tu_instance * instance,int fd)1397 tu_knl_kgsl_load(struct tu_instance *instance, int fd)
1398 {
1399    if (instance->vk.enabled_extensions.KHR_display) {
1400       return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1401                        "I can't KHR_display");
1402    }
1403 
1404    struct tu_physical_device *device = (struct tu_physical_device *)
1405       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1406                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1407    if (!device) {
1408       close(fd);
1409       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1410    }
1411 
1412    static const char dma_heap_path[] = "/dev/dma_heap/system";
1413    static const char ion_path[] = "/dev/ion";
1414    int dma_fd;
1415 
1416    dma_fd = open(dma_heap_path, O_RDONLY);
1417    if (dma_fd >= 0) {
1418       device->kgsl_dma_type = TU_KGSL_DMA_TYPE_DMAHEAP;
1419    } else {
1420       dma_fd = open(ion_path, O_RDONLY);
1421       if (dma_fd >= 0) {
1422          /* ION_IOC_FREE available only for legacy ION */
1423          struct ion_handle_data free = { .handle = 0 };
1424          if (safe_ioctl(dma_fd, ION_IOC_FREE, &free) >= 0 || errno != ENOTTY)
1425             device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION_LEGACY;
1426          else
1427             device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION;
1428       } else {
1429          mesa_logw(
1430             "Unable to open neither %s nor %s, VK_KHR_external_memory_fd would be "
1431             "unavailable: %s",
1432             dma_heap_path, ion_path, strerror(errno));
1433       }
1434    }
1435 
1436    VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1437 
1438    struct kgsl_devinfo info;
1439    if (get_kgsl_prop(fd, KGSL_PROP_DEVICE_INFO, &info, sizeof(info)))
1440       goto fail;
1441 
1442    uint64_t gmem_iova;
1443    if (get_kgsl_prop(fd, KGSL_PROP_UCHE_GMEM_VADDR, &gmem_iova, sizeof(gmem_iova)))
1444       goto fail;
1445 
1446    uint32_t highest_bank_bit;
1447    if (get_kgsl_prop(fd, KGSL_PROP_HIGHEST_BANK_BIT, &highest_bank_bit,
1448                      sizeof(highest_bank_bit)))
1449       goto fail;
1450 
1451    uint32_t ubwc_version;
1452    if (get_kgsl_prop(fd, KGSL_PROP_UBWC_MODE, &ubwc_version,
1453                      sizeof(ubwc_version)))
1454       goto fail;
1455 
1456 
1457    /* kgsl version check? */
1458 
1459    device->instance = instance;
1460    device->master_fd = -1;
1461    device->local_fd = fd;
1462    device->kgsl_dma_fd = dma_fd;
1463 
1464    device->dev_id.gpu_id =
1465       ((info.chip_id >> 24) & 0xff) * 100 +
1466       ((info.chip_id >> 16) & 0xff) * 10 +
1467       ((info.chip_id >>  8) & 0xff);
1468    device->dev_id.chip_id = info.chip_id;
1469    device->gmem_size = debug_get_num_option("TU_GMEM", info.gmem_sizebytes);
1470    device->gmem_base = gmem_iova;
1471 
1472    device->has_raytracing = tu_kgsl_get_raytracing(fd);
1473 
1474    device->submitqueue_priority_count = 1;
1475 
1476    device->timeline_type = vk_sync_timeline_get_type(&vk_kgsl_sync_type);
1477 
1478    device->sync_types[0] = &vk_kgsl_sync_type;
1479    device->sync_types[1] = &device->timeline_type.sync;
1480    device->sync_types[2] = NULL;
1481 
1482    device->heap.size = tu_get_system_heap_size(device);
1483    device->heap.used = 0u;
1484    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1485 
1486    device->has_set_iova = kgsl_is_memory_type_supported(
1487       fd, KGSL_MEMFLAGS_USE_CPU_MAP);
1488 
1489    /* Even if kernel is new enough, the GPU itself may not support it. */
1490    device->has_cached_coherent_memory = kgsl_is_memory_type_supported(
1491       fd, KGSL_MEMFLAGS_IOCOHERENT |
1492              (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT));
1493 
1494    /* preemption is always supported on kgsl */
1495    device->has_preemption = true;
1496 
1497    device->ubwc_config.highest_bank_bit = highest_bank_bit;
1498 
1499    /* The other config values can be partially inferred from the UBWC version,
1500     * but kgsl also hardcodes overrides for specific a6xx versions that we
1501     * have to follow here. Yuck.
1502     */
1503    switch (ubwc_version) {
1504    case KGSL_UBWC_1_0:
1505       device->ubwc_config.bank_swizzle_levels = 0x7;
1506       device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1507       break;
1508    case KGSL_UBWC_2_0:
1509       device->ubwc_config.bank_swizzle_levels = 0x6;
1510       device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1511       break;
1512    case KGSL_UBWC_3_0:
1513       device->ubwc_config.bank_swizzle_levels = 0x6;
1514       device->ubwc_config.macrotile_mode = FDL_MACROTILE_4_CHANNEL;
1515       break;
1516    case KGSL_UBWC_4_0:
1517       device->ubwc_config.bank_swizzle_levels = 0x6;
1518       device->ubwc_config.macrotile_mode = FDL_MACROTILE_8_CHANNEL;
1519       break;
1520    default:
1521       return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1522                        "unknown UBWC version 0x%x", ubwc_version);
1523    }
1524 
1525    /* kgsl unfortunately hardcodes some settings for certain GPUs and doesn't
1526     * expose them in the uAPI so hardcode them here to match.
1527     */
1528    if (device->dev_id.gpu_id == 663 || device->dev_id.gpu_id == 680) {
1529       device->ubwc_config.macrotile_mode = FDL_MACROTILE_8_CHANNEL;
1530    }
1531    if (device->dev_id.gpu_id == 663) {
1532       /* level2_swizzling_dis = 1 */
1533       device->ubwc_config.bank_swizzle_levels = 0x4;
1534    }
1535 
1536    instance->knl = &kgsl_knl_funcs;
1537 
1538    result = tu_physical_device_init(device, instance);
1539    if (result != VK_SUCCESS)
1540       goto fail;
1541 
1542    list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
1543 
1544    return VK_SUCCESS;
1545 
1546 fail:
1547    vk_free(&instance->vk.alloc, device);
1548    close(fd);
1549    if (dma_fd >= 0)
1550       close(dma_fd);
1551    return result;
1552 }
1553