• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "tu_knl.h"
7 
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <poll.h>
11 #include <stdint.h>
12 #include <sys/ioctl.h>
13 #include <sys/mman.h>
14 
15 #include "msm_kgsl.h"
16 #include "vk_util.h"
17 
18 #include "util/u_debug.h"
19 #include "util/u_vector.h"
20 #include "util/libsync.h"
21 #include "util/timespec.h"
22 
23 #include "tu_cmd_buffer.h"
24 #include "tu_cs.h"
25 #include "tu_device.h"
26 #include "tu_dynamic_rendering.h"
27 
28 static int
safe_ioctl(int fd,unsigned long request,void * arg)29 safe_ioctl(int fd, unsigned long request, void *arg)
30 {
31    int ret;
32 
33    do {
34       ret = ioctl(fd, request, arg);
35    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
36 
37    return ret;
38 }
39 
40 static int
kgsl_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)41 kgsl_submitqueue_new(struct tu_device *dev,
42                      int priority,
43                      uint32_t *queue_id)
44 {
45    struct kgsl_drawctxt_create req = {
46       .flags = KGSL_CONTEXT_SAVE_GMEM |
47               KGSL_CONTEXT_NO_GMEM_ALLOC |
48               KGSL_CONTEXT_PREAMBLE,
49    };
50 
51    int ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_CREATE, &req);
52    if (ret)
53       return ret;
54 
55    *queue_id = req.drawctxt_id;
56 
57    return 0;
58 }
59 
60 static void
kgsl_submitqueue_close(struct tu_device * dev,uint32_t queue_id)61 kgsl_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
62 {
63    struct kgsl_drawctxt_destroy req = {
64       .drawctxt_id = queue_id,
65    };
66 
67    safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req);
68 }
69 
70 static VkResult
kgsl_bo_init(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)71 kgsl_bo_init(struct tu_device *dev,
72              struct tu_bo **out_bo,
73              uint64_t size,
74              uint64_t client_iova,
75              VkMemoryPropertyFlags mem_property,
76              enum tu_bo_alloc_flags flags,
77              const char *name)
78 {
79    assert(client_iova == 0);
80 
81    struct kgsl_gpumem_alloc_id req = {
82       .size = size,
83    };
84 
85    if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
86       if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
87          req.flags |= KGSL_MEMFLAGS_IOCOHERENT;
88       }
89 
90       req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT;
91    } else {
92       req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT;
93    }
94 
95    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
96       req.flags |= KGSL_MEMFLAGS_GPUREADONLY;
97 
98    int ret;
99 
100    ret = safe_ioctl(dev->physical_device->local_fd,
101                     IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
102    if (ret) {
103       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
104                        "GPUMEM_ALLOC_ID failed (%s)", strerror(errno));
105    }
106 
107    struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
108    assert(bo && bo->gem_handle == 0);
109 
110    *bo = (struct tu_bo) {
111       .gem_handle = req.id,
112       .size = req.mmapsize,
113       .iova = req.gpuaddr,
114       .name = tu_debug_bos_add(dev, req.mmapsize, name),
115       .refcnt = 1,
116    };
117 
118    *out_bo = bo;
119 
120    return VK_SUCCESS;
121 }
122 
123 static VkResult
kgsl_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int fd)124 kgsl_bo_init_dmabuf(struct tu_device *dev,
125                     struct tu_bo **out_bo,
126                     uint64_t size,
127                     int fd)
128 {
129    struct kgsl_gpuobj_import_dma_buf import_dmabuf = {
130       .fd = fd,
131    };
132    struct kgsl_gpuobj_import req = {
133       .priv = (uintptr_t)&import_dmabuf,
134       .priv_len = sizeof(import_dmabuf),
135       .flags = 0,
136       .type = KGSL_USER_MEM_TYPE_DMABUF,
137    };
138    int ret;
139 
140    ret = safe_ioctl(dev->physical_device->local_fd,
141                     IOCTL_KGSL_GPUOBJ_IMPORT, &req);
142    if (ret)
143       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
144                        "Failed to import dma-buf (%s)\n", strerror(errno));
145 
146    struct kgsl_gpuobj_info info_req = {
147       .id = req.id,
148    };
149 
150    ret = safe_ioctl(dev->physical_device->local_fd,
151                     IOCTL_KGSL_GPUOBJ_INFO, &info_req);
152    if (ret)
153       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
154                        "Failed to get dma-buf info (%s)\n", strerror(errno));
155 
156    struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
157    assert(bo && bo->gem_handle == 0);
158 
159    *bo = (struct tu_bo) {
160       .gem_handle = req.id,
161       .size = info_req.size,
162       .iova = info_req.gpuaddr,
163       .name = tu_debug_bos_add(dev, info_req.size, "dmabuf"),
164       .refcnt = 1,
165    };
166 
167    *out_bo = bo;
168 
169    return VK_SUCCESS;
170 }
171 
172 static int
kgsl_bo_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)173 kgsl_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
174 {
175    tu_stub();
176 
177    return -1;
178 }
179 
180 static VkResult
kgsl_bo_map(struct tu_device * dev,struct tu_bo * bo)181 kgsl_bo_map(struct tu_device *dev, struct tu_bo *bo)
182 {
183    if (bo->map)
184       return VK_SUCCESS;
185 
186    uint64_t offset = bo->gem_handle << 12;
187    void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
188                     dev->physical_device->local_fd, offset);
189    if (map == MAP_FAILED)
190       return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
191 
192    bo->map = map;
193 
194    return VK_SUCCESS;
195 }
196 
197 static void
kgsl_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)198 kgsl_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
199 {
200 }
201 
202 static void
kgsl_bo_finish(struct tu_device * dev,struct tu_bo * bo)203 kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo)
204 {
205    assert(bo->gem_handle);
206 
207    if (!p_atomic_dec_zero(&bo->refcnt))
208       return;
209 
210    if (bo->map)
211       munmap(bo->map, bo->size);
212 
213    struct kgsl_gpumem_free_id req = {
214       .id = bo->gem_handle
215    };
216 
217    /* Tell sparse array that entry is free */
218    memset(bo, 0, sizeof(*bo));
219 
220    safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
221 }
222 
223 static VkResult
kgsl_sync_cache(VkDevice _device,uint32_t op,uint32_t count,const VkMappedMemoryRange * ranges)224 kgsl_sync_cache(VkDevice _device,
225                 uint32_t op,
226                 uint32_t count,
227                 const VkMappedMemoryRange *ranges)
228 {
229    TU_FROM_HANDLE(tu_device, device, _device);
230 
231    struct kgsl_gpuobj_sync_obj *sync_list =
232       (struct kgsl_gpuobj_sync_obj *) vk_zalloc(
233          &device->vk.alloc, sizeof(*sync_list)*count, 8,
234          VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
235 
236    struct kgsl_gpuobj_sync gpuobj_sync = {
237       .objs = (uintptr_t) sync_list,
238       .obj_len = sizeof(*sync_list),
239       .count = count,
240    };
241 
242    for (uint32_t i = 0; i < count; i++) {
243       TU_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
244 
245       sync_list[i].op = op;
246       sync_list[i].id = mem->bo->gem_handle;
247       sync_list[i].offset = ranges[i].offset;
248       sync_list[i].length = ranges[i].size == VK_WHOLE_SIZE
249                                ? (mem->bo->size - ranges[i].offset)
250                                : ranges[i].size;
251    }
252 
253    /* There are two other KGSL ioctls for flushing/invalidation:
254     * - IOCTL_KGSL_GPUMEM_SYNC_CACHE - processes one memory range at a time;
255     * - IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK - processes several buffers but
256     *   not way to specify ranges.
257     *
258     * While IOCTL_KGSL_GPUOBJ_SYNC exactly maps to VK function.
259     */
260    safe_ioctl(device->fd, IOCTL_KGSL_GPUOBJ_SYNC, &gpuobj_sync);
261 
262    vk_free(&device->vk.alloc, sync_list);
263 
264    return VK_SUCCESS;
265 }
266 
267 VkResult
tu_FlushMappedMemoryRanges(VkDevice device,uint32_t count,const VkMappedMemoryRange * ranges)268 tu_FlushMappedMemoryRanges(VkDevice device,
269                            uint32_t count,
270                            const VkMappedMemoryRange *ranges)
271 {
272    return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_TO_GPU, count, ranges);
273 }
274 
275 VkResult
tu_InvalidateMappedMemoryRanges(VkDevice device,uint32_t count,const VkMappedMemoryRange * ranges)276 tu_InvalidateMappedMemoryRanges(VkDevice device,
277                                 uint32_t count,
278                                 const VkMappedMemoryRange *ranges)
279 {
280    return kgsl_sync_cache(device, KGSL_GPUMEM_CACHE_FROM_GPU, count, ranges);
281 }
282 
283 static VkResult
get_kgsl_prop(int fd,unsigned int type,void * value,size_t size)284 get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
285 {
286    struct kgsl_device_getproperty getprop = {
287       .type = type,
288       .value = value,
289       .sizebytes = size,
290    };
291 
292    return safe_ioctl(fd, IOCTL_KGSL_DEVICE_GETPROPERTY, &getprop)
293              ? VK_ERROR_UNKNOWN
294              : VK_SUCCESS;
295 }
296 
297 static bool
kgsl_is_memory_type_supported(int fd,uint32_t flags)298 kgsl_is_memory_type_supported(int fd, uint32_t flags)
299 {
300    struct kgsl_gpumem_alloc_id req_alloc = {
301       .flags = flags,
302       .size = 0x1000,
303    };
304 
305    int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc);
306    if (ret) {
307       return false;
308    }
309 
310    struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id };
311 
312    safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free);
313 
314    return true;
315 }
316 
317 enum kgsl_syncobj_state {
318    KGSL_SYNCOBJ_STATE_UNSIGNALED,
319    KGSL_SYNCOBJ_STATE_SIGNALED,
320    KGSL_SYNCOBJ_STATE_TS,
321    KGSL_SYNCOBJ_STATE_FD,
322 };
323 
324 struct kgsl_syncobj
325 {
326    struct vk_object_base base;
327    enum kgsl_syncobj_state state;
328 
329    struct tu_queue *queue;
330    uint32_t timestamp;
331 
332    int fd;
333 };
334 
335 struct tu_u_trace_syncobj
336 {
337    uint32_t msm_queue_id;
338    uint32_t timestamp;
339 };
340 
341 static void
kgsl_syncobj_init(struct kgsl_syncobj * s,bool signaled)342 kgsl_syncobj_init(struct kgsl_syncobj *s, bool signaled)
343 {
344    s->state =
345       signaled ? KGSL_SYNCOBJ_STATE_SIGNALED : KGSL_SYNCOBJ_STATE_UNSIGNALED;
346 
347    s->timestamp = UINT32_MAX;
348    s->fd = -1;
349 }
350 
351 static void
kgsl_syncobj_reset(struct kgsl_syncobj * s)352 kgsl_syncobj_reset(struct kgsl_syncobj *s)
353 {
354    if (s->state == KGSL_SYNCOBJ_STATE_FD && s->fd >= 0) {
355       ASSERTED int ret = close(s->fd);
356       assert(ret == 0);
357       s->fd = -1;
358    } else if (s->state == KGSL_SYNCOBJ_STATE_TS) {
359       s->timestamp = UINT32_MAX;
360    }
361 
362    s->state = KGSL_SYNCOBJ_STATE_UNSIGNALED;
363 }
364 
365 static void
kgsl_syncobj_destroy(struct kgsl_syncobj * s)366 kgsl_syncobj_destroy(struct kgsl_syncobj *s)
367 {
368    kgsl_syncobj_reset(s);
369 }
370 
371 static int
timestamp_to_fd(struct tu_queue * queue,uint32_t timestamp)372 timestamp_to_fd(struct tu_queue *queue, uint32_t timestamp)
373 {
374    int fd;
375    struct kgsl_timestamp_event event = {
376       .type = KGSL_TIMESTAMP_EVENT_FENCE,
377       .timestamp = timestamp,
378       .context_id = queue->msm_queue_id,
379       .priv = &fd,
380       .len = sizeof(fd),
381    };
382 
383    int ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_TIMESTAMP_EVENT, &event);
384    if (ret)
385       return -1;
386 
387    return fd;
388 }
389 
390 static int
kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj * syncobj)391 kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj *syncobj)
392 {
393    assert(syncobj->state == KGSL_SYNCOBJ_STATE_TS);
394    return timestamp_to_fd(syncobj->queue, syncobj->timestamp);
395 }
396 
397 /* return true if timestamp a is greater (more recent) then b
398  * this relies on timestamps never having a difference > (1<<31)
399  */
400 static inline bool
timestamp_cmp(uint32_t a,uint32_t b)401 timestamp_cmp(uint32_t a, uint32_t b)
402 {
403    return (int32_t) (a - b) >= 0;
404 }
405 
406 static uint32_t
max_ts(uint32_t a,uint32_t b)407 max_ts(uint32_t a, uint32_t b)
408 {
409    return timestamp_cmp(a, b) ? a : b;
410 }
411 
412 static uint32_t
min_ts(uint32_t a,uint32_t b)413 min_ts(uint32_t a, uint32_t b)
414 {
415    return timestamp_cmp(a, b) ? b : a;
416 }
417 
418 static int
get_relative_ms(uint64_t abs_timeout_ns)419 get_relative_ms(uint64_t abs_timeout_ns)
420 {
421    if (abs_timeout_ns >= INT64_MAX)
422       /* We can assume that a wait with a value this high is a forever wait
423        * and return -1 here as it's the infinite timeout for ppoll() while
424        * being the highest unsigned integer value for the wait KGSL IOCTL
425        */
426       return -1;
427 
428    uint64_t cur_time_ms = os_time_get_nano() / 1000000;
429    uint64_t abs_timeout_ms = abs_timeout_ns / 1000000;
430    if (abs_timeout_ms <= cur_time_ms)
431       return 0;
432 
433    return abs_timeout_ms - cur_time_ms;
434 }
435 
436 /* safe_ioctl is not enough as restarted waits would not adjust the timeout
437  * which could lead to waiting substantially longer than requested
438  */
439 static int
wait_timestamp_safe(int fd,unsigned int context_id,unsigned int timestamp,uint64_t abs_timeout_ns)440 wait_timestamp_safe(int fd,
441                     unsigned int context_id,
442                     unsigned int timestamp,
443                     uint64_t abs_timeout_ns)
444 {
445    struct kgsl_device_waittimestamp_ctxtid wait = {
446       .context_id = context_id,
447       .timestamp = timestamp,
448       .timeout = get_relative_ms(abs_timeout_ns),
449    };
450 
451    while (true) {
452       int ret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
453 
454       if (ret == -1 && (errno == EINTR || errno == EAGAIN)) {
455          int timeout_ms = get_relative_ms(abs_timeout_ns);
456 
457          /* update timeout to consider time that has passed since the start */
458          if (timeout_ms == 0) {
459             errno = ETIME;
460             return -1;
461          }
462 
463          wait.timeout = timeout_ms;
464       } else if (ret == -1 && errno == ETIMEDOUT) {
465          /* The kernel returns ETIMEDOUT if the timeout is reached, but
466           * we want to return ETIME instead.
467           */
468          errno = ETIME;
469          return -1;
470       } else {
471          return ret;
472       }
473    }
474 }
475 
476 static VkResult
kgsl_syncobj_wait(struct tu_device * device,struct kgsl_syncobj * s,uint64_t abs_timeout_ns)477 kgsl_syncobj_wait(struct tu_device *device,
478                   struct kgsl_syncobj *s,
479                   uint64_t abs_timeout_ns)
480 {
481    if (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
482       /* If this syncobj is unsignaled we need to wait for it to resolve to a
483        * valid syncobj prior to letting the rest of the wait continue, this
484        * avoids needing kernel support for wait-before-signal semantics.
485        */
486 
487       if (abs_timeout_ns == 0)
488          return VK_TIMEOUT; // If this is a simple poll then we can return early
489 
490       pthread_mutex_lock(&device->submit_mutex);
491       struct timespec abstime;
492       timespec_from_nsec(&abstime, abs_timeout_ns);
493 
494       while (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
495          int ret;
496          if (abs_timeout_ns == UINT64_MAX) {
497             ret = pthread_cond_wait(&device->timeline_cond,
498                                     &device->submit_mutex);
499          } else {
500             ret = pthread_cond_timedwait(&device->timeline_cond,
501                                          &device->submit_mutex, &abstime);
502          }
503          if (ret != 0) {
504             assert(ret == ETIMEDOUT);
505             pthread_mutex_unlock(&device->submit_mutex);
506             return VK_TIMEOUT;
507          }
508       }
509 
510       pthread_mutex_unlock(&device->submit_mutex);
511    }
512 
513    switch (s->state) {
514    case KGSL_SYNCOBJ_STATE_SIGNALED:
515       return VK_SUCCESS;
516 
517    case KGSL_SYNCOBJ_STATE_UNSIGNALED:
518       return VK_TIMEOUT;
519 
520    case KGSL_SYNCOBJ_STATE_TS: {
521       int ret = wait_timestamp_safe(device->fd, s->queue->msm_queue_id,
522                                     s->timestamp, abs_timeout_ns);
523       if (ret) {
524          assert(errno == ETIME);
525          return VK_TIMEOUT;
526       } else {
527          return VK_SUCCESS;
528       }
529    }
530 
531    case KGSL_SYNCOBJ_STATE_FD: {
532       int ret = sync_wait(s->fd, get_relative_ms(abs_timeout_ns));
533       if (ret) {
534          assert(errno == ETIME);
535          return VK_TIMEOUT;
536       } else {
537          return VK_SUCCESS;
538       }
539    }
540 
541    default:
542       unreachable("invalid syncobj state");
543    }
544 }
545 
546 #define kgsl_syncobj_foreach_state(syncobjs, filter) \
547    for (uint32_t i = 0; sync = syncobjs[i], i < count; i++) \
548       if (sync->state == filter)
549 
550 static VkResult
kgsl_syncobj_wait_any(struct tu_device * device,struct kgsl_syncobj ** syncobjs,uint32_t count,uint64_t abs_timeout_ns)551 kgsl_syncobj_wait_any(struct tu_device* device, struct kgsl_syncobj **syncobjs, uint32_t count, uint64_t abs_timeout_ns)
552 {
553    if (count == 0)
554       return VK_TIMEOUT;
555    else if (count == 1)
556       return kgsl_syncobj_wait(device, syncobjs[0], abs_timeout_ns);
557 
558    uint32_t num_fds = 0;
559    struct tu_queue *queue = NULL;
560    struct kgsl_syncobj *sync = NULL;
561 
562    /* Simple case, we already have a signal one */
563    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_SIGNALED)
564       return VK_SUCCESS;
565 
566    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD)
567       num_fds++;
568 
569    /* If we have TS from different queues we cannot compare them and would
570     * have to convert them into FDs
571     */
572    bool convert_ts_to_fd = false;
573    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
574       if (queue != NULL && sync->queue != queue) {
575          convert_ts_to_fd = true;
576          break;
577       }
578       queue = sync->queue;
579    }
580 
581    /* If we have no FD nor TS syncobjs then we can return immediately */
582    if (num_fds == 0 && queue == NULL)
583       return VK_TIMEOUT;
584 
585    VkResult result = VK_TIMEOUT;
586 
587    struct u_vector poll_fds = { 0 };
588    uint32_t lowest_timestamp = 0;
589 
590    if (convert_ts_to_fd || num_fds > 0)
591       u_vector_init(&poll_fds, 4, sizeof(struct pollfd));
592 
593    if (convert_ts_to_fd) {
594       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
595          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
596          poll_fd->fd = timestamp_to_fd(sync->queue, sync->timestamp);
597          poll_fd->events = POLLIN;
598       }
599    } else {
600       /* TSs could be merged by finding the one with the lowest timestamp */
601       bool first_ts = true;
602       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
603          if (first_ts || timestamp_cmp(sync->timestamp, lowest_timestamp)) {
604             first_ts = false;
605             lowest_timestamp = sync->timestamp;
606          }
607       }
608 
609       if (num_fds) {
610          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
611          poll_fd->fd = timestamp_to_fd(queue, lowest_timestamp);
612          poll_fd->events = POLLIN;
613       }
614    }
615 
616    if (num_fds) {
617       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD) {
618          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
619          poll_fd->fd = sync->fd;
620          poll_fd->events = POLLIN;
621       }
622    }
623 
624    if (u_vector_length(&poll_fds) == 0) {
625       int ret = wait_timestamp_safe(device->fd, queue->msm_queue_id,
626                                     lowest_timestamp, MIN2(abs_timeout_ns, INT64_MAX));
627       if (ret) {
628          assert(errno == ETIME);
629          result = VK_TIMEOUT;
630       } else {
631          result = VK_SUCCESS;
632       }
633    } else {
634       int ret, i;
635 
636       struct pollfd *fds = (struct pollfd *) poll_fds.data;
637       uint32_t fds_count = u_vector_length(&poll_fds);
638       do {
639          ret = poll(fds, fds_count, get_relative_ms(abs_timeout_ns));
640          if (ret > 0) {
641             for (i = 0; i < fds_count; i++) {
642                if (fds[i].revents & (POLLERR | POLLNVAL)) {
643                   errno = EINVAL;
644                   ret = -1;
645                   break;
646                }
647             }
648             break;
649          } else if (ret == 0) {
650             errno = ETIME;
651             break;
652          }
653       } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
654 
655       for (uint32_t i = 0; i < fds_count - num_fds; i++)
656          close(fds[i].fd);
657 
658       if (ret != 0) {
659          assert(errno == ETIME);
660          result = VK_TIMEOUT;
661       } else {
662          result = VK_SUCCESS;
663       }
664    }
665 
666    u_vector_finish(&poll_fds);
667    return result;
668 }
669 
670 static VkResult
kgsl_syncobj_export(struct kgsl_syncobj * s,int * pFd)671 kgsl_syncobj_export(struct kgsl_syncobj *s, int *pFd)
672 {
673    if (!pFd)
674       return VK_SUCCESS;
675 
676    switch (s->state) {
677    case KGSL_SYNCOBJ_STATE_SIGNALED:
678    case KGSL_SYNCOBJ_STATE_UNSIGNALED:
679       /* Getting a sync FD from an unsignaled syncobj is UB in Vulkan */
680       *pFd = -1;
681       return VK_SUCCESS;
682 
683    case KGSL_SYNCOBJ_STATE_FD:
684       if (s->fd < 0)
685          *pFd = -1;
686       else
687          *pFd = dup(s->fd);
688       return VK_SUCCESS;
689 
690    case KGSL_SYNCOBJ_STATE_TS:
691       *pFd = kgsl_syncobj_ts_to_fd(s);
692       return VK_SUCCESS;
693 
694    default:
695       unreachable("Invalid syncobj state");
696    }
697 }
698 
699 static VkResult
kgsl_syncobj_import(struct kgsl_syncobj * s,int fd)700 kgsl_syncobj_import(struct kgsl_syncobj *s, int fd)
701 {
702    kgsl_syncobj_reset(s);
703    if (fd >= 0) {
704       s->state = KGSL_SYNCOBJ_STATE_FD;
705       s->fd = fd;
706    } else {
707       s->state = KGSL_SYNCOBJ_STATE_SIGNALED;
708    }
709 
710    return VK_SUCCESS;
711 }
712 
713 static int
sync_merge_close(const char * name,int fd1,int fd2,bool close_fd2)714 sync_merge_close(const char *name, int fd1, int fd2, bool close_fd2)
715 {
716    int fd = sync_merge(name, fd1, fd2);
717    if (fd < 0)
718       return -1;
719 
720    close(fd1);
721    if (close_fd2)
722       close(fd2);
723 
724    return fd;
725 }
726 
727 /* Merges multiple kgsl_syncobjs into a single one which is only signalled
728  * after all submitted syncobjs are signalled
729  */
730 static struct kgsl_syncobj
kgsl_syncobj_merge(const struct kgsl_syncobj ** syncobjs,uint32_t count)731 kgsl_syncobj_merge(const struct kgsl_syncobj **syncobjs, uint32_t count)
732 {
733    struct kgsl_syncobj ret;
734    kgsl_syncobj_init(&ret, true);
735 
736    if (count == 0)
737       return ret;
738 
739    for (uint32_t i = 0; i < count; ++i) {
740       const struct kgsl_syncobj *sync = syncobjs[i];
741 
742       switch (sync->state) {
743       case KGSL_SYNCOBJ_STATE_SIGNALED:
744          break;
745 
746       case KGSL_SYNCOBJ_STATE_UNSIGNALED:
747          kgsl_syncobj_reset(&ret);
748          return ret;
749 
750       case KGSL_SYNCOBJ_STATE_TS:
751          if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
752             if (ret.queue == sync->queue) {
753                ret.timestamp = max_ts(ret.timestamp, sync->timestamp);
754             } else {
755                ret.state = KGSL_SYNCOBJ_STATE_FD;
756                int sync_fd = kgsl_syncobj_ts_to_fd(sync);
757                ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
758                assert(ret.fd >= 0);
759             }
760          } else if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
761             int sync_fd = kgsl_syncobj_ts_to_fd(sync);
762             ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
763             assert(ret.fd >= 0);
764          } else {
765             ret = *sync;
766          }
767          break;
768 
769       case KGSL_SYNCOBJ_STATE_FD:
770          if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
771             ret.fd = sync_merge_close("tu_sync", ret.fd, sync->fd, false);
772             assert(ret.fd >= 0);
773          } else if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
774             ret.state = KGSL_SYNCOBJ_STATE_FD;
775             int sync_fd = kgsl_syncobj_ts_to_fd(sync);
776             ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
777             assert(ret.fd >= 0);
778          } else {
779             ret = *sync;
780             ret.fd = dup(ret.fd);
781             assert(ret.fd >= 0);
782          }
783          break;
784 
785       default:
786          unreachable("invalid syncobj state");
787       }
788    }
789 
790    return ret;
791 }
792 
793 struct vk_kgsl_syncobj
794 {
795    struct vk_sync vk;
796    struct kgsl_syncobj syncobj;
797 };
798 
799 static VkResult
vk_kgsl_sync_init(struct vk_device * device,struct vk_sync * sync,uint64_t initial_value)800 vk_kgsl_sync_init(struct vk_device *device,
801                   struct vk_sync *sync,
802                   uint64_t initial_value)
803 {
804    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
805    kgsl_syncobj_init(&s->syncobj, initial_value != 0);
806    return VK_SUCCESS;
807 }
808 
809 static void
vk_kgsl_sync_finish(struct vk_device * device,struct vk_sync * sync)810 vk_kgsl_sync_finish(struct vk_device *device, struct vk_sync *sync)
811 {
812    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
813    kgsl_syncobj_destroy(&s->syncobj);
814 }
815 
816 static VkResult
vk_kgsl_sync_reset(struct vk_device * device,struct vk_sync * sync)817 vk_kgsl_sync_reset(struct vk_device *device, struct vk_sync *sync)
818 {
819    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
820    kgsl_syncobj_reset(&s->syncobj);
821    return VK_SUCCESS;
822 }
823 
824 static VkResult
vk_kgsl_sync_move(struct vk_device * device,struct vk_sync * dst,struct vk_sync * src)825 vk_kgsl_sync_move(struct vk_device *device,
826                   struct vk_sync *dst,
827                   struct vk_sync *src)
828 {
829    struct vk_kgsl_syncobj *d = container_of(dst, struct vk_kgsl_syncobj, vk);
830    struct vk_kgsl_syncobj *s = container_of(src, struct vk_kgsl_syncobj, vk);
831    kgsl_syncobj_reset(&d->syncobj);
832    d->syncobj = s->syncobj;
833    kgsl_syncobj_init(&s->syncobj, false);
834    return VK_SUCCESS;
835 }
836 
837 static VkResult
vk_kgsl_sync_wait(struct vk_device * _device,struct vk_sync * sync,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)838 vk_kgsl_sync_wait(struct vk_device *_device,
839                   struct vk_sync *sync,
840                   uint64_t wait_value,
841                   enum vk_sync_wait_flags wait_flags,
842                   uint64_t abs_timeout_ns)
843 {
844    struct tu_device *device = container_of(_device, struct tu_device, vk);
845    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
846 
847    if (wait_flags & VK_SYNC_WAIT_PENDING)
848       return VK_SUCCESS;
849 
850    return kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
851 }
852 
853 static VkResult
vk_kgsl_sync_wait_many(struct vk_device * _device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)854 vk_kgsl_sync_wait_many(struct vk_device *_device,
855                        uint32_t wait_count,
856                        const struct vk_sync_wait *waits,
857                        enum vk_sync_wait_flags wait_flags,
858                        uint64_t abs_timeout_ns)
859 {
860    struct tu_device *device = container_of(_device, struct tu_device, vk);
861 
862    if (wait_flags & VK_SYNC_WAIT_PENDING)
863       return VK_SUCCESS;
864 
865    if (wait_flags & VK_SYNC_WAIT_ANY) {
866       struct kgsl_syncobj *syncobjs[wait_count];
867       for (uint32_t i = 0; i < wait_count; i++) {
868          syncobjs[i] =
869             &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)->syncobj;
870       }
871 
872       return kgsl_syncobj_wait_any(device, syncobjs, wait_count,
873                                    abs_timeout_ns);
874    } else {
875       for (uint32_t i = 0; i < wait_count; i++) {
876          struct vk_kgsl_syncobj *s =
877             container_of(waits[i].sync, struct vk_kgsl_syncobj, vk);
878 
879          VkResult result =
880             kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
881          if (result != VK_SUCCESS)
882             return result;
883       }
884       return VK_SUCCESS;
885    }
886 }
887 
888 static VkResult
vk_kgsl_sync_import_sync_file(struct vk_device * device,struct vk_sync * sync,int fd)889 vk_kgsl_sync_import_sync_file(struct vk_device *device,
890                               struct vk_sync *sync,
891                               int fd)
892 {
893    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
894    if (fd >= 0) {
895       fd = dup(fd);
896       if (fd < 0) {
897          mesa_loge("vk_kgsl_sync_import_sync_file: dup failed: %s",
898                    strerror(errno));
899          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
900       }
901    }
902    return kgsl_syncobj_import(&s->syncobj, fd);
903 }
904 
905 static VkResult
vk_kgsl_sync_export_sync_file(struct vk_device * device,struct vk_sync * sync,int * pFd)906 vk_kgsl_sync_export_sync_file(struct vk_device *device,
907                               struct vk_sync *sync,
908                               int *pFd)
909 {
910    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
911    return kgsl_syncobj_export(&s->syncobj, pFd);
912 }
913 
914 const struct vk_sync_type vk_kgsl_sync_type = {
915    .size = sizeof(struct vk_kgsl_syncobj),
916    .features = (enum vk_sync_features)
917                (VK_SYNC_FEATURE_BINARY |
918                 VK_SYNC_FEATURE_GPU_WAIT |
919                 VK_SYNC_FEATURE_GPU_MULTI_WAIT |
920                 VK_SYNC_FEATURE_CPU_WAIT |
921                 VK_SYNC_FEATURE_CPU_RESET |
922                 VK_SYNC_FEATURE_WAIT_ANY |
923                 VK_SYNC_FEATURE_WAIT_PENDING),
924    .init = vk_kgsl_sync_init,
925    .finish = vk_kgsl_sync_finish,
926    .reset = vk_kgsl_sync_reset,
927    .move = vk_kgsl_sync_move,
928    .wait = vk_kgsl_sync_wait,
929    .wait_many = vk_kgsl_sync_wait_many,
930    .import_sync_file = vk_kgsl_sync_import_sync_file,
931    .export_sync_file = vk_kgsl_sync_export_sync_file,
932 };
933 
934 static VkResult
kgsl_queue_submit(struct tu_queue * queue,struct vk_queue_submit * vk_submit)935 kgsl_queue_submit(struct tu_queue *queue, struct vk_queue_submit *vk_submit)
936 {
937    MESA_TRACE_FUNC();
938 
939    bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context);
940    bool has_trace_points = false;
941 
942    if (vk_submit->command_buffer_count == 0) {
943       pthread_mutex_lock(&queue->device->submit_mutex);
944 
945       const struct kgsl_syncobj *wait_semaphores[vk_submit->wait_count + 1];
946       for (uint32_t i = 0; i < vk_submit->wait_count; i++) {
947          wait_semaphores[i] = &container_of(vk_submit->waits[i].sync,
948                                             struct vk_kgsl_syncobj, vk)
949                                   ->syncobj;
950       }
951 
952       struct kgsl_syncobj last_submit_sync;
953       if (queue->fence >= 0)
954          last_submit_sync = (struct kgsl_syncobj) {
955             .state = KGSL_SYNCOBJ_STATE_TS,
956             .queue = queue,
957             .timestamp = queue->fence,
958          };
959       else
960          last_submit_sync = (struct kgsl_syncobj) {
961             .state = KGSL_SYNCOBJ_STATE_SIGNALED,
962          };
963 
964       wait_semaphores[vk_submit->wait_count] = &last_submit_sync;
965 
966       struct kgsl_syncobj wait_sync =
967          kgsl_syncobj_merge(wait_semaphores, vk_submit->wait_count + 1);
968       assert(wait_sync.state !=
969              KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
970 
971       for (uint32_t i = 0; i < vk_submit->signal_count; i++) {
972          struct kgsl_syncobj *signal_sync =
973             &container_of(vk_submit->signals[i].sync, struct vk_kgsl_syncobj,
974                           vk)
975                 ->syncobj;
976 
977          kgsl_syncobj_reset(signal_sync);
978          *signal_sync = wait_sync;
979       }
980 
981       pthread_mutex_unlock(&queue->device->submit_mutex);
982       pthread_cond_broadcast(&queue->device->timeline_cond);
983 
984       return VK_SUCCESS;
985    }
986 
987    uint32_t perf_pass_index =
988       queue->device->perfcntrs_pass_cs ? vk_submit->perf_pass_index : ~0;
989 
990    if (TU_DEBUG(LOG_SKIP_GMEM_OPS))
991       tu_dbg_log_gmem_load_store_skips(queue->device);
992 
993    VkResult result = VK_SUCCESS;
994 
995    pthread_mutex_lock(&queue->device->submit_mutex);
996 
997    struct tu_cmd_buffer **cmd_buffers =
998       (struct tu_cmd_buffer **) vk_submit->command_buffers;
999    static_assert(offsetof(struct tu_cmd_buffer, vk) == 0,
1000                  "vk must be first member of tu_cmd_buffer");
1001    uint32_t cmdbuf_count = vk_submit->command_buffer_count;
1002 
1003    result =
1004       tu_insert_dynamic_cmdbufs(queue->device, &cmd_buffers, &cmdbuf_count);
1005    if (result != VK_SUCCESS) {
1006       pthread_mutex_unlock(&queue->device->submit_mutex);
1007       return result;
1008    }
1009 
1010    uint32_t entry_count = 0;
1011    for (uint32_t i = 0; i < cmdbuf_count; ++i) {
1012       struct tu_cmd_buffer *cmd_buffer = cmd_buffers[i];
1013 
1014       if (perf_pass_index != ~0)
1015          entry_count++;
1016 
1017       entry_count += cmd_buffer->cs.entry_count;
1018 
1019       if (u_trace_enabled && u_trace_has_points(&cmd_buffers[i]->trace)) {
1020          if (!(cmd_buffers[i]->usage_flags &
1021                VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
1022             entry_count++;
1023 
1024          has_trace_points = true;
1025       }
1026    }
1027 
1028    if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count))
1029       entry_count++;
1030 
1031    struct kgsl_command_object *cmds = (struct kgsl_command_object *)
1032       vk_alloc(&queue->device->vk.alloc, sizeof(*cmds) * entry_count,
1033                alignof(*cmds), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1034    if (cmds == NULL) {
1035       pthread_mutex_unlock(&queue->device->submit_mutex);
1036       return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1037    }
1038 
1039    uint32_t obj_count = 0;
1040    if (has_trace_points)
1041       obj_count++;
1042 
1043    struct kgsl_command_object *objs = (struct kgsl_command_object *)
1044       vk_alloc(&queue->device->vk.alloc, sizeof(*objs) * obj_count,
1045                alignof(*objs), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1046 
1047    struct tu_u_trace_submission_data *u_trace_submission_data = NULL;
1048    if (has_trace_points) {
1049       tu_u_trace_submission_data_create(
1050          queue->device, cmd_buffers, cmdbuf_count, &u_trace_submission_data);
1051 
1052       mtx_lock(&queue->device->kgsl_profiling_mutex);
1053       tu_suballoc_bo_alloc(&u_trace_submission_data->kgsl_timestamp_bo,
1054                            &queue->device->kgsl_profiling_suballoc,
1055                            sizeof(struct kgsl_cmdbatch_profiling_buffer), 4);
1056       mtx_unlock(&queue->device->kgsl_profiling_mutex);
1057    }
1058 
1059    uint32_t entry_idx = 0;
1060    for (uint32_t i = 0; i < cmdbuf_count; i++) {
1061       struct tu_cmd_buffer *cmd_buffer = cmd_buffers[i];
1062       struct tu_cs *cs = &cmd_buffer->cs;
1063 
1064       if (perf_pass_index != ~0) {
1065          struct tu_cs_entry *perf_cs_entry =
1066             &cmd_buffer->device->perfcntrs_pass_cs_entries[perf_pass_index];
1067 
1068          cmds[entry_idx++] = (struct kgsl_command_object) {
1069             .gpuaddr = perf_cs_entry->bo->iova + perf_cs_entry->offset,
1070             .size = perf_cs_entry->size,
1071             .flags = KGSL_CMDLIST_IB,
1072             .id = perf_cs_entry->bo->gem_handle,
1073          };
1074       }
1075 
1076       for (uint32_t j = 0; j < cs->entry_count; j++) {
1077          cmds[entry_idx++] = (struct kgsl_command_object) {
1078             .gpuaddr = cs->entries[j].bo->iova + cs->entries[j].offset,
1079             .size = cs->entries[j].size,
1080             .flags = KGSL_CMDLIST_IB,
1081             .id = cs->entries[j].bo->gem_handle,
1082          };
1083       }
1084 
1085       if (u_trace_submission_data &&
1086           u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) {
1087          struct tu_cs_entry *trace_cs_entry =
1088             &u_trace_submission_data->cmd_trace_data[i]
1089                 .timestamp_copy_cs->entries[0];
1090          cmds[entry_idx++] = (struct kgsl_command_object) {
1091             .offset = trace_cs_entry->offset,
1092             .gpuaddr = trace_cs_entry->bo->iova,
1093             .size = trace_cs_entry->size,
1094             .flags = KGSL_CMDLIST_IB,
1095             .id = trace_cs_entry->bo->gem_handle,
1096          };
1097       }
1098    }
1099 
1100    struct kgsl_cmdbatch_profiling_buffer *profiling_buffer = NULL;
1101    uint32_t obj_idx = 0;
1102    if (u_trace_submission_data) {
1103       struct tu_suballoc_bo *bo = &u_trace_submission_data->kgsl_timestamp_bo;
1104 
1105       objs[obj_idx++] = (struct kgsl_command_object) {
1106          .offset = bo->iova - bo->bo->iova,
1107          .gpuaddr = bo->iova,
1108          .size = sizeof(struct kgsl_cmdbatch_profiling_buffer),
1109          .flags = KGSL_OBJLIST_MEMOBJ | KGSL_OBJLIST_PROFILE,
1110          .id = bo->bo->gem_handle,
1111       };
1112       profiling_buffer =
1113          (struct kgsl_cmdbatch_profiling_buffer *) tu_suballoc_bo_map(bo);
1114    }
1115 
1116    if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count)) {
1117       struct tu_cs *autotune_cs = tu_autotune_on_submit(
1118          queue->device, &queue->device->autotune, cmd_buffers, cmdbuf_count);
1119       cmds[entry_idx++] = (struct kgsl_command_object) {
1120          .gpuaddr =
1121             autotune_cs->entries[0].bo->iova + autotune_cs->entries[0].offset,
1122          .size = autotune_cs->entries[0].size,
1123          .flags = KGSL_CMDLIST_IB,
1124          .id = autotune_cs->entries[0].bo->gem_handle,
1125       };
1126    }
1127 
1128    const struct kgsl_syncobj *wait_semaphores[vk_submit->wait_count];
1129    for (uint32_t i = 0; i < vk_submit->wait_count; i++) {
1130       wait_semaphores[i] =
1131          &container_of(vk_submit->waits[i].sync, struct vk_kgsl_syncobj, vk)
1132              ->syncobj;
1133    }
1134 
1135    struct kgsl_syncobj wait_sync =
1136       kgsl_syncobj_merge(wait_semaphores, vk_submit->wait_count);
1137    assert(wait_sync.state !=
1138           KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1139 
1140    struct kgsl_cmd_syncpoint_timestamp ts;
1141    struct kgsl_cmd_syncpoint_fence fn;
1142    struct kgsl_command_syncpoint sync = { 0 };
1143    bool has_sync = false;
1144    switch (wait_sync.state) {
1145    case KGSL_SYNCOBJ_STATE_SIGNALED:
1146       break;
1147 
1148    case KGSL_SYNCOBJ_STATE_TS:
1149       ts.context_id = wait_sync.queue->msm_queue_id;
1150       ts.timestamp = wait_sync.timestamp;
1151 
1152       has_sync = true;
1153       sync.type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP;
1154       sync.priv = (uintptr_t) &ts;
1155       sync.size = sizeof(ts);
1156       break;
1157 
1158    case KGSL_SYNCOBJ_STATE_FD:
1159       fn.fd = wait_sync.fd;
1160 
1161       has_sync = true;
1162       sync.type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
1163       sync.priv = (uintptr_t) &fn;
1164       sync.size = sizeof(fn);
1165       break;
1166 
1167    default:
1168       unreachable("invalid syncobj state");
1169    }
1170 
1171    struct kgsl_gpu_command req = {
1172       .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
1173       .cmdlist = (uintptr_t) cmds,
1174       .cmdsize = sizeof(struct kgsl_command_object),
1175       .numcmds = entry_idx,
1176       .synclist = (uintptr_t) &sync,
1177       .syncsize = sizeof(sync),
1178       .numsyncs = has_sync != 0 ? 1 : 0,
1179       .context_id = queue->msm_queue_id,
1180    };
1181 
1182    if (obj_idx) {
1183       req.flags |= KGSL_CMDBATCH_PROFILING;
1184       req.objlist = (uintptr_t) objs;
1185       req.objsize = sizeof(struct kgsl_command_object);
1186       req.numobjs = obj_idx;
1187    }
1188 
1189    int ret = safe_ioctl(queue->device->physical_device->local_fd,
1190                         IOCTL_KGSL_GPU_COMMAND, &req);
1191 
1192    uint64_t gpu_offset = 0;
1193 #if HAVE_PERFETTO
1194    if (profiling_buffer && profiling_buffer->gpu_ticks_queued) {
1195       struct kgsl_perfcounter_read_group perf = {
1196          .groupid = KGSL_PERFCOUNTER_GROUP_ALWAYSON,
1197          .countable = 0,
1198          .value = 0
1199       };
1200 
1201       struct kgsl_perfcounter_read req = {
1202          .reads = &perf,
1203          .count = 1,
1204       };
1205 
1206       ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_PERFCOUNTER_READ, &req);
1207       /* Older KGSL has some kind of garbage in upper 32 bits */
1208       uint64_t offseted_gpu_ts = perf.value & 0xffffffff;
1209 
1210       gpu_offset = tu_device_ticks_to_ns(
1211          queue->device, offseted_gpu_ts - profiling_buffer->gpu_ticks_queued);
1212 
1213       struct tu_perfetto_clocks clocks = {
1214          .cpu = profiling_buffer->wall_clock_ns,
1215          .gpu_ts = tu_device_ticks_to_ns(queue->device,
1216                                          profiling_buffer->gpu_ticks_queued),
1217          .gpu_ts_offset = gpu_offset,
1218       };
1219 
1220       clocks = tu_perfetto_submit(queue->device, queue->device->submit_count, &clocks);
1221       gpu_offset = clocks.gpu_ts_offset;
1222    }
1223 #endif
1224 
1225    kgsl_syncobj_destroy(&wait_sync);
1226 
1227    if (ret) {
1228       result = vk_device_set_lost(&queue->device->vk, "submit failed: %s\n",
1229                                   strerror(errno));
1230       goto fail_submit;
1231    }
1232 
1233    p_atomic_set(&queue->fence, req.timestamp);
1234 
1235    for (uint32_t i = 0; i < vk_submit->signal_count; i++) {
1236       struct kgsl_syncobj *signal_sync =
1237          &container_of(vk_submit->signals[i].sync, struct vk_kgsl_syncobj, vk)
1238              ->syncobj;
1239 
1240       kgsl_syncobj_reset(signal_sync);
1241       signal_sync->state = KGSL_SYNCOBJ_STATE_TS;
1242       signal_sync->queue = queue;
1243       signal_sync->timestamp = req.timestamp;
1244    }
1245 
1246    if (u_trace_submission_data) {
1247       struct tu_u_trace_submission_data *submission_data =
1248          u_trace_submission_data;
1249       submission_data->submission_id = queue->device->submit_count;
1250       submission_data->gpu_ts_offset = gpu_offset;
1251       /* We have to allocate it here since it is different between drm/kgsl */
1252       submission_data->syncobj = (struct tu_u_trace_syncobj *)
1253          vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
1254                8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1255          submission_data->syncobj->timestamp = req.timestamp;
1256          submission_data->syncobj->msm_queue_id = queue->msm_queue_id;
1257 
1258       u_trace_submission_data = NULL;
1259 
1260       for (uint32_t i = 0; i < submission_data->cmd_buffer_count; i++) {
1261          bool free_data = i == submission_data->last_buffer_with_tracepoints;
1262          if (submission_data->cmd_trace_data[i].trace)
1263             u_trace_flush(submission_data->cmd_trace_data[i].trace,
1264                           submission_data, free_data);
1265 
1266          if (!submission_data->cmd_trace_data[i].timestamp_copy_cs) {
1267             /* u_trace is owned by cmd_buffer */
1268             submission_data->cmd_trace_data[i].trace = NULL;
1269          }
1270       }
1271    }
1272 
1273    queue->device->submit_count++;
1274 
1275    pthread_mutex_unlock(&queue->device->submit_mutex);
1276    pthread_cond_broadcast(&queue->device->timeline_cond);
1277 
1278    u_trace_context_process(&queue->device->trace_context, true);
1279 
1280    if (cmd_buffers != (struct tu_cmd_buffer **) vk_submit->command_buffers)
1281       vk_free(&queue->device->vk.alloc, cmd_buffers);
1282 
1283    vk_free(&queue->device->vk.alloc, cmds);
1284 
1285    return VK_SUCCESS;
1286 
1287 fail_submit:
1288    pthread_mutex_unlock(&queue->device->submit_mutex);
1289 
1290    if (result != VK_SUCCESS) {
1291       mtx_lock(&queue->device->kgsl_profiling_mutex);
1292       tu_suballoc_bo_free(&queue->device->kgsl_profiling_suballoc,
1293                           &u_trace_submission_data->kgsl_timestamp_bo);
1294       mtx_unlock(&queue->device->kgsl_profiling_mutex);
1295    }
1296 
1297    if (cmd_buffers != (struct tu_cmd_buffer **) vk_submit->command_buffers)
1298       vk_free(&queue->device->vk.alloc, cmd_buffers);
1299 
1300    vk_free(&queue->device->vk.alloc, cmds);
1301 
1302    return result;
1303 }
1304 
1305 static VkResult
kgsl_device_wait_u_trace(struct tu_device * dev,struct tu_u_trace_syncobj * syncobj)1306 kgsl_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
1307 {
1308    struct kgsl_device_waittimestamp_ctxtid req = {
1309       .context_id = syncobj->msm_queue_id,
1310       .timestamp = syncobj->timestamp,
1311       .timeout = 5000, // 5s
1312    };
1313 
1314    int ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &req);
1315 
1316    if (ret) {
1317       assert(errno == ETIME);
1318       return VK_TIMEOUT;
1319    }
1320 
1321    return VK_SUCCESS;
1322 }
1323 
1324 static VkResult
kgsl_device_init(struct tu_device * dev)1325 kgsl_device_init(struct tu_device *dev)
1326 {
1327    dev->fd = dev->physical_device->local_fd;
1328    return VK_SUCCESS;
1329 }
1330 
1331 static void
kgsl_device_finish(struct tu_device * dev)1332 kgsl_device_finish(struct tu_device *dev)
1333 {
1334    /* No-op */
1335 }
1336 
1337 static int
kgsl_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)1338 kgsl_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
1339 {
1340    unreachable("");
1341    return 0;
1342 }
1343 
1344 static int
kgsl_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)1345 kgsl_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
1346 {
1347    /* kgsl doesn't have a way to get it */
1348    *suspend_count = 0;
1349    return 0;
1350 }
1351 
1352 static VkResult
kgsl_device_check_status(struct tu_device * device)1353 kgsl_device_check_status(struct tu_device *device)
1354 {
1355    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1356       for (unsigned q = 0; q < device->queue_count[i]; q++) {
1357          /* KGSL's KGSL_PROP_GPU_RESET_STAT takes the u32 msm_queue_id and returns a
1358          * KGSL_CTX_STAT_* for the worst reset that happened since the last time it
1359          * was queried on that queue.
1360          */
1361          uint32_t value = device->queues[i][q].msm_queue_id;
1362          VkResult status = get_kgsl_prop(device->fd, KGSL_PROP_GPU_RESET_STAT,
1363                                        &value, sizeof(value));
1364          if (status != VK_SUCCESS)
1365             return vk_device_set_lost(&device->vk, "Failed to get GPU reset status");
1366 
1367          if (value != KGSL_CTX_STAT_NO_ERROR &&
1368             value != KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT) {
1369             return vk_device_set_lost(&device->vk, "GPU faulted or hung");
1370          }
1371       }
1372    }
1373 
1374    return VK_SUCCESS;
1375 }
1376 
1377 static const struct tu_knl kgsl_knl_funcs = {
1378       .name = "kgsl",
1379 
1380       .device_init = kgsl_device_init,
1381       .device_finish = kgsl_device_finish,
1382       .device_get_gpu_timestamp = kgsl_device_get_gpu_timestamp,
1383       .device_get_suspend_count = kgsl_device_get_suspend_count,
1384       .device_check_status = kgsl_device_check_status,
1385       .submitqueue_new = kgsl_submitqueue_new,
1386       .submitqueue_close = kgsl_submitqueue_close,
1387       .bo_init = kgsl_bo_init,
1388       .bo_init_dmabuf = kgsl_bo_init_dmabuf,
1389       .bo_export_dmabuf = kgsl_bo_export_dmabuf,
1390       .bo_map = kgsl_bo_map,
1391       .bo_allow_dump = kgsl_bo_allow_dump,
1392       .bo_finish = kgsl_bo_finish,
1393       .device_wait_u_trace = kgsl_device_wait_u_trace,
1394       .queue_submit = kgsl_queue_submit,
1395 };
1396 
1397 VkResult
tu_knl_kgsl_load(struct tu_instance * instance,int fd)1398 tu_knl_kgsl_load(struct tu_instance *instance, int fd)
1399 {
1400    if (instance->vk.enabled_extensions.KHR_display) {
1401       return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1402                        "I can't KHR_display");
1403    }
1404 
1405    struct tu_physical_device *device = (struct tu_physical_device *)
1406       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1407                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1408    if (!device) {
1409       close(fd);
1410       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1411    }
1412 
1413    VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1414 
1415    struct kgsl_devinfo info;
1416    if (get_kgsl_prop(fd, KGSL_PROP_DEVICE_INFO, &info, sizeof(info)))
1417       goto fail;
1418 
1419    uint64_t gmem_iova;
1420    if (get_kgsl_prop(fd, KGSL_PROP_UCHE_GMEM_VADDR, &gmem_iova, sizeof(gmem_iova)))
1421       goto fail;
1422 
1423    /* kgsl version check? */
1424 
1425    device->instance = instance;
1426    device->master_fd = -1;
1427    device->local_fd = fd;
1428 
1429    device->dev_id.gpu_id =
1430       ((info.chip_id >> 24) & 0xff) * 100 +
1431       ((info.chip_id >> 16) & 0xff) * 10 +
1432       ((info.chip_id >>  8) & 0xff);
1433    device->dev_id.chip_id = info.chip_id;
1434    device->gmem_size = debug_get_num_option("TU_GMEM", info.gmem_sizebytes);
1435    device->gmem_base = gmem_iova;
1436 
1437    device->submitqueue_priority_count = 1;
1438 
1439    device->timeline_type = vk_sync_timeline_get_type(&vk_kgsl_sync_type);
1440 
1441    device->sync_types[0] = &vk_kgsl_sync_type;
1442    device->sync_types[1] = &device->timeline_type.sync;
1443    device->sync_types[2] = NULL;
1444 
1445    device->heap.size = tu_get_system_heap_size(device);
1446    device->heap.used = 0u;
1447    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1448 
1449    /* Even if kernel is new enough, the GPU itself may not support it. */
1450    device->has_cached_coherent_memory = kgsl_is_memory_type_supported(
1451       fd, KGSL_MEMFLAGS_IOCOHERENT |
1452              (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT));
1453    device->has_cached_non_coherent_memory = true;
1454 
1455    instance->knl = &kgsl_knl_funcs;
1456 
1457    result = tu_physical_device_init(device, instance);
1458    if (result != VK_SUCCESS)
1459       goto fail;
1460 
1461    list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
1462 
1463    return VK_SUCCESS;
1464 
1465 fail:
1466    vk_free(&instance->vk.alloc, device);
1467    close(fd);
1468    return result;
1469 }
1470