• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "vk_queue.h"
25 
26 #include "util/perf/cpu_trace.h"
27 #include "util/u_debug.h"
28 #include <inttypes.h>
29 
30 #include "vk_alloc.h"
31 #include "vk_command_buffer.h"
32 #include "vk_command_pool.h"
33 #include "vk_common_entrypoints.h"
34 #include "vk_device.h"
35 #include "vk_fence.h"
36 #include "vk_log.h"
37 #include "vk_physical_device.h"
38 #include "vk_semaphore.h"
39 #include "vk_sync.h"
40 #include "vk_sync_binary.h"
41 #include "vk_sync_dummy.h"
42 #include "vk_sync_timeline.h"
43 #include "vk_util.h"
44 
45 #include "vulkan/wsi/wsi_common.h"
46 
47 static VkResult
48 vk_queue_start_submit_thread(struct vk_queue *queue);
49 
50 VkResult
vk_queue_init(struct vk_queue * queue,struct vk_device * device,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)51 vk_queue_init(struct vk_queue *queue, struct vk_device *device,
52               const VkDeviceQueueCreateInfo *pCreateInfo,
53               uint32_t index_in_family)
54 {
55    VkResult result = VK_SUCCESS;
56    int ret;
57 
58    memset(queue, 0, sizeof(*queue));
59    vk_object_base_init(device, &queue->base, VK_OBJECT_TYPE_QUEUE);
60 
61    list_addtail(&queue->link, &device->queues);
62 
63    queue->flags = pCreateInfo->flags;
64    queue->queue_family_index = pCreateInfo->queueFamilyIndex;
65 
66    assert(index_in_family < pCreateInfo->queueCount);
67    queue->index_in_family = index_in_family;
68 
69    queue->submit.mode = device->submit_mode;
70    if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND)
71       queue->submit.mode = VK_QUEUE_SUBMIT_MODE_IMMEDIATE;
72 
73    list_inithead(&queue->submit.submits);
74 
75    ret = mtx_init(&queue->submit.mutex, mtx_plain);
76    if (ret == thrd_error) {
77       result = vk_errorf(queue, VK_ERROR_UNKNOWN, "mtx_init failed");
78       goto fail_mutex;
79    }
80 
81    ret = cnd_init(&queue->submit.push);
82    if (ret == thrd_error) {
83       result = vk_errorf(queue, VK_ERROR_UNKNOWN, "cnd_init failed");
84       goto fail_push;
85    }
86 
87    ret = cnd_init(&queue->submit.pop);
88    if (ret == thrd_error) {
89       result = vk_errorf(queue, VK_ERROR_UNKNOWN, "cnd_init failed");
90       goto fail_pop;
91    }
92 
93    if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
94       result = vk_queue_start_submit_thread(queue);
95       if (result != VK_SUCCESS)
96          goto fail_thread;
97    }
98 
99    util_dynarray_init(&queue->labels, NULL);
100    queue->region_begin = true;
101 
102    return VK_SUCCESS;
103 
104 fail_thread:
105    cnd_destroy(&queue->submit.pop);
106 fail_pop:
107    cnd_destroy(&queue->submit.push);
108 fail_push:
109    mtx_destroy(&queue->submit.mutex);
110 fail_mutex:
111    return result;
112 }
113 
114 VkResult
_vk_queue_set_lost(struct vk_queue * queue,const char * file,int line,const char * msg,...)115 _vk_queue_set_lost(struct vk_queue *queue,
116                    const char *file, int line,
117                    const char *msg, ...)
118 {
119    if (queue->_lost.lost)
120       return VK_ERROR_DEVICE_LOST;
121 
122    queue->_lost.lost = true;
123    queue->_lost.error_file = file;
124    queue->_lost.error_line = line;
125 
126    va_list ap;
127    va_start(ap, msg);
128    vsnprintf(queue->_lost.error_msg, sizeof(queue->_lost.error_msg), msg, ap);
129    va_end(ap);
130 
131    p_atomic_inc(&queue->base.device->_lost.lost);
132 
133    if (debug_get_bool_option("MESA_VK_ABORT_ON_DEVICE_LOSS", false)) {
134       _vk_device_report_lost(queue->base.device);
135       abort();
136    }
137 
138    return VK_ERROR_DEVICE_LOST;
139 }
140 
141 static struct vk_queue_submit *
vk_queue_submit_alloc(struct vk_queue * queue,uint32_t wait_count,uint32_t command_buffer_count,uint32_t buffer_bind_count,uint32_t image_opaque_bind_count,uint32_t image_bind_count,uint32_t bind_entry_count,uint32_t image_bind_entry_count,uint32_t signal_count)142 vk_queue_submit_alloc(struct vk_queue *queue,
143                       uint32_t wait_count,
144                       uint32_t command_buffer_count,
145                       uint32_t buffer_bind_count,
146                       uint32_t image_opaque_bind_count,
147                       uint32_t image_bind_count,
148                       uint32_t bind_entry_count,
149                       uint32_t image_bind_entry_count,
150                       uint32_t signal_count)
151 {
152    VK_MULTIALLOC(ma);
153    VK_MULTIALLOC_DECL(&ma, struct vk_queue_submit, submit, 1);
154    VK_MULTIALLOC_DECL(&ma, struct vk_sync_wait, waits, wait_count);
155    VK_MULTIALLOC_DECL(&ma, struct vk_command_buffer *, command_buffers,
156                       command_buffer_count);
157    VK_MULTIALLOC_DECL(&ma, VkSparseBufferMemoryBindInfo, buffer_binds,
158                       buffer_bind_count);
159    VK_MULTIALLOC_DECL(&ma, VkSparseImageOpaqueMemoryBindInfo,
160                       image_opaque_binds, image_opaque_bind_count);
161    VK_MULTIALLOC_DECL(&ma, VkSparseImageMemoryBindInfo, image_binds,
162                       image_bind_count);
163    VK_MULTIALLOC_DECL(&ma, VkSparseMemoryBind,
164                       bind_entries, bind_entry_count);
165    VK_MULTIALLOC_DECL(&ma, VkSparseImageMemoryBind, image_bind_entries,
166                       image_bind_entry_count);
167    VK_MULTIALLOC_DECL(&ma, struct vk_sync_signal, signals, signal_count);
168    VK_MULTIALLOC_DECL(&ma, struct vk_sync *, wait_temps, wait_count);
169 
170    struct vk_sync_timeline_point **wait_points = NULL, **signal_points = NULL;
171    if (queue->base.device->timeline_mode == VK_DEVICE_TIMELINE_MODE_EMULATED) {
172       vk_multialloc_add(&ma, &wait_points,
173                         struct vk_sync_timeline_point *, wait_count);
174       vk_multialloc_add(&ma, &signal_points,
175                         struct vk_sync_timeline_point *, signal_count);
176    }
177 
178    if (!vk_multialloc_zalloc(&ma, &queue->base.device->alloc,
179                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
180       return NULL;
181 
182    submit->waits           = waits;
183    submit->command_buffers = command_buffers;
184    submit->signals         = signals;
185    submit->buffer_binds    = buffer_binds;
186    submit->image_opaque_binds = image_opaque_binds;
187    submit->image_binds     = image_binds;
188 
189    submit->_bind_entries = bind_entries;
190    submit->_image_bind_entries = image_bind_entries;
191    submit->_wait_temps     = wait_temps;
192    submit->_wait_points    = wait_points;
193    submit->_signal_points  = signal_points;
194 
195    return submit;
196 }
197 
198 static void
vk_queue_submit_cleanup(struct vk_queue * queue,struct vk_queue_submit * submit)199 vk_queue_submit_cleanup(struct vk_queue *queue,
200                         struct vk_queue_submit *submit)
201 {
202    for (uint32_t i = 0; i < submit->wait_count; i++) {
203       if (submit->_wait_temps[i] != NULL)
204          vk_sync_destroy(queue->base.device, submit->_wait_temps[i]);
205    }
206 
207    if (submit->_mem_signal_temp != NULL)
208       vk_sync_destroy(queue->base.device, submit->_mem_signal_temp);
209 
210    if (submit->_wait_points != NULL) {
211       for (uint32_t i = 0; i < submit->wait_count; i++) {
212          if (unlikely(submit->_wait_points[i] != NULL)) {
213             vk_sync_timeline_point_release(queue->base.device,
214                                            submit->_wait_points[i]);
215          }
216       }
217    }
218 
219    if (submit->_signal_points != NULL) {
220       for (uint32_t i = 0; i < submit->signal_count; i++) {
221          if (unlikely(submit->_signal_points[i] != NULL)) {
222             vk_sync_timeline_point_free(queue->base.device,
223                                         submit->_signal_points[i]);
224          }
225       }
226    }
227 }
228 
229 static void
vk_queue_submit_free(struct vk_queue * queue,struct vk_queue_submit * submit)230 vk_queue_submit_free(struct vk_queue *queue,
231                      struct vk_queue_submit *submit)
232 {
233    vk_free(&queue->base.device->alloc, submit);
234 }
235 
236 static void
vk_queue_submit_destroy(struct vk_queue * queue,struct vk_queue_submit * submit)237 vk_queue_submit_destroy(struct vk_queue *queue,
238                         struct vk_queue_submit *submit)
239 {
240    vk_queue_submit_cleanup(queue, submit);
241    vk_queue_submit_free(queue, submit);
242 }
243 
244 static void
vk_queue_submit_add_semaphore_wait(struct vk_queue * queue,struct vk_queue_submit * submit,const VkSemaphoreSubmitInfo * wait_info)245 vk_queue_submit_add_semaphore_wait(struct vk_queue *queue,
246                                    struct vk_queue_submit *submit,
247                                    const VkSemaphoreSubmitInfo *wait_info)
248 {
249    VK_FROM_HANDLE(vk_semaphore, semaphore, wait_info->semaphore);
250 
251    /* From the Vulkan 1.2.194 spec:
252     *
253     *    "Applications can import a semaphore payload into an existing
254     *    semaphore using an external semaphore handle. The effects of the
255     *    import operation will be either temporary or permanent, as
256     *    specified by the application. If the import is temporary, the
257     *    implementation must restore the semaphore to its prior permanent
258     *    state after submitting the next semaphore wait operation."
259     *
260     * and
261     *
262     *    VUID-VkImportSemaphoreFdInfoKHR-flags-03323
263     *
264     *    "If flags contains VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, the
265     *    VkSemaphoreTypeCreateInfo::semaphoreType field of the semaphore
266     *    from which handle or name was exported must not be
267     *    VK_SEMAPHORE_TYPE_TIMELINE"
268     */
269    struct vk_sync *sync;
270    if (semaphore->temporary) {
271       assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY);
272       sync = submit->_wait_temps[submit->wait_count] = semaphore->temporary;
273       semaphore->temporary = NULL;
274    } else {
275       if (semaphore->type == VK_SEMAPHORE_TYPE_BINARY) {
276          if (vk_device_supports_threaded_submit(queue->base.device))
277             assert(semaphore->permanent.type->move);
278          submit->_has_binary_permanent_semaphore_wait = true;
279       }
280 
281       sync = &semaphore->permanent;
282    }
283 
284    uint64_t wait_value = semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE ?
285                          wait_info->value : 0;
286 
287    submit->waits[submit->wait_count] = (struct vk_sync_wait) {
288       .sync = sync,
289       .stage_mask = wait_info->stageMask,
290       .wait_value = wait_value,
291    };
292 
293    submit->wait_count++;
294 }
295 
296 static VkResult MUST_CHECK
vk_queue_submit_add_semaphore_signal(struct vk_queue * queue,struct vk_queue_submit * submit,const VkSemaphoreSubmitInfo * signal_info)297 vk_queue_submit_add_semaphore_signal(struct vk_queue *queue,
298                                      struct vk_queue_submit *submit,
299                                      const VkSemaphoreSubmitInfo *signal_info)
300 {
301    VK_FROM_HANDLE(vk_semaphore, semaphore, signal_info->semaphore);
302    VkResult result;
303 
304    struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore);
305    uint64_t signal_value = signal_info->value;
306    if (semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE) {
307       if (signal_value == 0) {
308          return vk_queue_set_lost(queue,
309             "Tried to signal a timeline with value 0");
310       }
311    } else {
312       signal_value = 0;
313    }
314 
315    /* For emulated timelines, we need to associate a binary vk_sync with
316     * each time point and pass the binary vk_sync to the driver.  We could
317     * do this in vk_queue_submit_final but it might require doing memory
318     * allocation and we don't want to to add extra failure paths there.
319     * Instead, allocate and replace the driver-visible vk_sync now and
320     * we'll insert it into the timeline in vk_queue_submit_final.  The
321     * insert step is guaranteed to not fail.
322     */
323    struct vk_sync_timeline *timeline = vk_sync_as_timeline(sync);
324    if (timeline) {
325       assert(queue->base.device->timeline_mode ==
326              VK_DEVICE_TIMELINE_MODE_EMULATED);
327       struct vk_sync_timeline_point **signal_point =
328          &submit->_signal_points[submit->signal_count];
329       result = vk_sync_timeline_alloc_point(queue->base.device, timeline,
330                                             signal_value, signal_point);
331       if (unlikely(result != VK_SUCCESS))
332          return result;
333 
334       sync = &(*signal_point)->sync;
335       signal_value = 0;
336    }
337 
338    submit->signals[submit->signal_count] = (struct vk_sync_signal) {
339       .sync = sync,
340       .stage_mask = signal_info->stageMask,
341       .signal_value = signal_value,
342    };
343 
344    submit->signal_count++;
345 
346    return VK_SUCCESS;
347 }
348 
349 static void
vk_queue_submit_add_sync_signal(struct vk_queue * queue,struct vk_queue_submit * submit,struct vk_sync * sync,uint64_t signal_value)350 vk_queue_submit_add_sync_signal(struct vk_queue *queue,
351                                 struct vk_queue_submit *submit,
352                                 struct vk_sync *sync,
353                                 uint64_t signal_value)
354 {
355    submit->signals[submit->signal_count++] = (struct vk_sync_signal) {
356       .sync = sync,
357       .stage_mask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
358       .signal_value = signal_value,
359    };
360 }
361 
362 static VkResult MUST_CHECK
vk_queue_submit_add_mem_signal(struct vk_queue * queue,struct vk_queue_submit * submit,VkDeviceMemory memory)363 vk_queue_submit_add_mem_signal(struct vk_queue *queue,
364                                struct vk_queue_submit *submit,
365                                VkDeviceMemory memory)
366 {
367    assert(submit->_mem_signal_temp == NULL);
368    VkResult result;
369 
370    struct vk_sync *mem_sync;
371    result = queue->base.device->create_sync_for_memory(queue->base.device,
372                                                        memory, true,
373                                                        &mem_sync);
374    if (unlikely(result != VK_SUCCESS))
375       return result;
376 
377    submit->_mem_signal_temp = mem_sync;
378 
379    vk_queue_submit_add_sync_signal(queue, submit, mem_sync, 0);
380 
381    return VK_SUCCESS;
382 }
383 
384 static void
vk_queue_submit_add_fence_signal(struct vk_queue * queue,struct vk_queue_submit * submit,struct vk_fence * fence)385 vk_queue_submit_add_fence_signal(struct vk_queue *queue,
386                                  struct vk_queue_submit *submit,
387                                  struct vk_fence *fence)
388 {
389    vk_queue_submit_add_sync_signal(queue, submit,
390                                    vk_fence_get_active_sync(fence), 0);
391 }
392 
393 static void
vk_queue_submit_add_command_buffer(struct vk_queue * queue,struct vk_queue_submit * submit,const VkCommandBufferSubmitInfo * info)394 vk_queue_submit_add_command_buffer(struct vk_queue *queue,
395                                    struct vk_queue_submit *submit,
396                                    const VkCommandBufferSubmitInfo *info)
397 {
398    VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, info->commandBuffer);
399 
400    assert(info->deviceMask == 0 || info->deviceMask == 1);
401    assert(cmd_buffer->pool->queue_family_index == queue->queue_family_index);
402 
403    /* Some drivers don't call vk_command_buffer_begin/end() yet and, for
404     * those, we'll see initial layout.  However, this is enough to catch
405     * command buffers which get submitted without calling EndCommandBuffer.
406     */
407    assert(cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_INITIAL ||
408           cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE ||
409           cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_PENDING);
410    cmd_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_PENDING;
411 
412    submit->command_buffers[submit->command_buffer_count++] = cmd_buffer;
413 }
414 
415 static void
vk_queue_submit_add_buffer_bind(struct vk_queue * queue,struct vk_queue_submit * submit,const VkSparseBufferMemoryBindInfo * info)416 vk_queue_submit_add_buffer_bind(
417    struct vk_queue *queue,
418    struct vk_queue_submit *submit,
419    const VkSparseBufferMemoryBindInfo *info)
420 {
421    VkSparseMemoryBind *entries = submit->_bind_entries +
422                                  submit->_bind_entry_count;
423    submit->_bind_entry_count += info->bindCount;
424 
425    typed_memcpy(entries, info->pBinds, info->bindCount);
426 
427    VkSparseBufferMemoryBindInfo info_tmp = *info;
428    info_tmp.pBinds = entries;
429    submit->buffer_binds[submit->buffer_bind_count++] = info_tmp;
430 }
431 
432 static void
vk_queue_submit_add_image_opaque_bind(struct vk_queue * queue,struct vk_queue_submit * submit,const VkSparseImageOpaqueMemoryBindInfo * info)433 vk_queue_submit_add_image_opaque_bind(
434    struct vk_queue *queue,
435    struct vk_queue_submit *submit,
436    const VkSparseImageOpaqueMemoryBindInfo *info)
437 {
438    VkSparseMemoryBind *entries = submit->_bind_entries +
439                                  submit->_bind_entry_count;
440    submit->_bind_entry_count += info->bindCount;
441 
442    typed_memcpy(entries, info->pBinds, info->bindCount);
443 
444    VkSparseImageOpaqueMemoryBindInfo info_tmp = *info;
445    info_tmp.pBinds = entries;
446    submit->image_opaque_binds[submit->image_opaque_bind_count++] = info_tmp;
447 }
448 
449 static void
vk_queue_submit_add_image_bind(struct vk_queue * queue,struct vk_queue_submit * submit,const VkSparseImageMemoryBindInfo * info)450 vk_queue_submit_add_image_bind(
451    struct vk_queue *queue,
452    struct vk_queue_submit *submit,
453    const VkSparseImageMemoryBindInfo *info)
454 {
455    VkSparseImageMemoryBind *entries = submit->_image_bind_entries +
456                                       submit->_image_bind_entry_count;
457    submit->_image_bind_entry_count += info->bindCount;
458 
459    typed_memcpy(entries, info->pBinds, info->bindCount);
460 
461    VkSparseImageMemoryBindInfo info_tmp = *info;
462    info_tmp.pBinds = entries;
463    submit->image_binds[submit->image_bind_count++] = info_tmp;
464 }
465 
466 /* Attempts to merge two submits into one.  If the merge succeeds, the merged
467  * submit is return and the two submits passed in are destroyed.
468  */
469 static struct vk_queue_submit *
vk_queue_submits_merge(struct vk_queue * queue,struct vk_queue_submit * first,struct vk_queue_submit * second)470 vk_queue_submits_merge(struct vk_queue *queue,
471                        struct vk_queue_submit *first,
472                        struct vk_queue_submit *second)
473 {
474    /* Don't merge if there are signals in between: see 'Signal operation order' */
475    if (first->signal_count > 0 &&
476        (second->command_buffer_count ||
477         second->buffer_bind_count ||
478         second->image_opaque_bind_count ||
479         second->image_bind_count ||
480         second->wait_count))
481       return NULL;
482 
483    if (vk_queue_submit_has_bind(first) != vk_queue_submit_has_bind(second))
484       return NULL;
485 
486    if (first->_mem_signal_temp)
487       return NULL;
488 
489    if (first->perf_pass_index != second->perf_pass_index)
490       return NULL;
491 
492    /* noop submits can always do a no-op merge */
493    if (!second->command_buffer_count &&
494        !second->buffer_bind_count &&
495        !second->image_opaque_bind_count &&
496        !second->image_bind_count &&
497        !second->wait_count &&
498        !second->signal_count) {
499       vk_queue_submit_destroy(queue, second);
500       return first;
501    }
502    if (!first->command_buffer_count &&
503        !first->buffer_bind_count &&
504        !first->image_opaque_bind_count &&
505        !first->image_bind_count &&
506        !first->wait_count &&
507        !first->signal_count) {
508       vk_queue_submit_destroy(queue, first);
509       return second;
510    }
511 
512    struct vk_queue_submit *merged = vk_queue_submit_alloc(queue,
513       first->wait_count + second->wait_count,
514       first->command_buffer_count + second->command_buffer_count,
515       first->buffer_bind_count + second->buffer_bind_count,
516       first->image_opaque_bind_count + second->image_opaque_bind_count,
517       first->image_bind_count + second->image_bind_count,
518       first->_bind_entry_count + second->_bind_entry_count,
519       first->_image_bind_entry_count + second->_image_bind_entry_count,
520       first->signal_count + second->signal_count);
521    if (merged == NULL)
522       return NULL;
523 
524    merged->wait_count = first->wait_count + second->wait_count;
525    typed_memcpy(merged->waits, first->waits, first->wait_count);
526    typed_memcpy(&merged->waits[first->wait_count], second->waits, second->wait_count);
527 
528    merged->command_buffer_count = first->command_buffer_count +
529                                   second->command_buffer_count;
530    typed_memcpy(merged->command_buffers,
531                 first->command_buffers, first->command_buffer_count);
532    typed_memcpy(&merged->command_buffers[first->command_buffer_count],
533                 second->command_buffers, second->command_buffer_count);
534 
535    merged->signal_count = first->signal_count + second->signal_count;
536    typed_memcpy(merged->signals, first->signals, first->signal_count);
537    typed_memcpy(&merged->signals[first->signal_count], second->signals, second->signal_count);
538 
539    for (uint32_t i = 0; i < first->buffer_bind_count; i++)
540       vk_queue_submit_add_buffer_bind(queue, merged, &first->buffer_binds[i]);
541    for (uint32_t i = 0; i < second->buffer_bind_count; i++)
542       vk_queue_submit_add_buffer_bind(queue, merged, &second->buffer_binds[i]);
543 
544    for (uint32_t i = 0; i < first->image_opaque_bind_count; i++) {
545       vk_queue_submit_add_image_opaque_bind(queue, merged,
546                                             &first->image_opaque_binds[i]);
547    }
548    for (uint32_t i = 0; i < second->image_opaque_bind_count; i++) {
549       vk_queue_submit_add_image_opaque_bind(queue, merged,
550                                             &second->image_opaque_binds[i]);
551    }
552 
553    for (uint32_t i = 0; i < first->image_bind_count; i++)
554       vk_queue_submit_add_image_bind(queue, merged, &first->image_binds[i]);
555    for (uint32_t i = 0; i < second->image_bind_count; i++)
556       vk_queue_submit_add_image_bind(queue, merged, &second->image_binds[i]);
557 
558    merged->perf_pass_index = first->perf_pass_index;
559    assert(second->perf_pass_index == merged->perf_pass_index);
560 
561    assert(merged->_bind_entry_count ==
562           first->_bind_entry_count + second->_bind_entry_count);
563    assert(merged->_image_bind_entry_count ==
564           first->_image_bind_entry_count + second->_image_bind_entry_count);
565 
566    merged->_has_binary_permanent_semaphore_wait =
567       first->_has_binary_permanent_semaphore_wait;
568 
569    typed_memcpy(merged->_wait_temps, first->_wait_temps, first->wait_count);
570    typed_memcpy(&merged->_wait_temps[first->wait_count], second->_wait_temps, second->wait_count);
571 
572    assert(first->_mem_signal_temp == NULL);
573    merged->_mem_signal_temp = second->_mem_signal_temp;
574 
575    if (queue->base.device->timeline_mode == VK_DEVICE_TIMELINE_MODE_EMULATED) {
576       typed_memcpy(merged->_wait_points,
577                    first->_wait_points, first->wait_count);
578       typed_memcpy(&merged->_wait_points[first->wait_count],
579                    second->_wait_points, second->wait_count);
580 
581       typed_memcpy(merged->_signal_points,
582                    first->_signal_points, first->signal_count);
583       typed_memcpy(&merged->_signal_points[first->signal_count],
584                    second->_signal_points, second->signal_count);
585    } else {
586       assert(first->_wait_points == NULL && second->_wait_points == NULL);
587       assert(first->_signal_points == NULL && second->_signal_points == NULL);
588    }
589    vk_queue_submit_free(queue, first);
590    vk_queue_submit_free(queue, second);
591 
592    return merged;
593 }
594 
595 static void
vk_queue_push_submit(struct vk_queue * queue,struct vk_queue_submit * submit)596 vk_queue_push_submit(struct vk_queue *queue,
597                      struct vk_queue_submit *submit)
598 {
599    mtx_lock(&queue->submit.mutex);
600    list_addtail(&submit->link, &queue->submit.submits);
601    cnd_signal(&queue->submit.push);
602    mtx_unlock(&queue->submit.mutex);
603 }
604 
605 static VkResult
vk_queue_drain(struct vk_queue * queue)606 vk_queue_drain(struct vk_queue *queue)
607 {
608    VkResult result = VK_SUCCESS;
609 
610    mtx_lock(&queue->submit.mutex);
611    while (!list_is_empty(&queue->submit.submits)) {
612       if (vk_device_is_lost(queue->base.device)) {
613          result = VK_ERROR_DEVICE_LOST;
614          break;
615       }
616 
617       int ret = cnd_wait(&queue->submit.pop, &queue->submit.mutex);
618       if (ret == thrd_error) {
619          result = vk_queue_set_lost(queue, "cnd_wait failed");
620          break;
621       }
622    }
623    mtx_unlock(&queue->submit.mutex);
624 
625    return result;
626 }
627 
628 static VkResult
vk_queue_submit_final(struct vk_queue * queue,struct vk_queue_submit * submit)629 vk_queue_submit_final(struct vk_queue *queue,
630                       struct vk_queue_submit *submit)
631 {
632    VkResult result;
633 
634    /* Now that we know all our time points exist, fetch the time point syncs
635     * from any vk_sync_timelines.  While we're here, also compact down the
636     * list of waits to get rid of any trivial timeline waits.
637     */
638    uint32_t wait_count = 0;
639    for (uint32_t i = 0; i < submit->wait_count; i++) {
640       /* A timeline wait on 0 is always a no-op */
641       if ((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) &&
642           submit->waits[i].wait_value == 0)
643          continue;
644 
645       /* Waits on dummy vk_syncs are no-ops */
646       if (vk_sync_type_is_dummy(submit->waits[i].sync->type)) {
647          /* We are about to lose track of this wait, if it has a temporary
648           * we need to destroy it now, as vk_queue_submit_cleanup will not
649           * know about it */
650          if (submit->_wait_temps[i] != NULL) {
651             vk_sync_destroy(queue->base.device, submit->_wait_temps[i]);
652             submit->waits[i].sync = NULL;
653          }
654          continue;
655       }
656 
657       /* For emulated timelines, we have a binary vk_sync associated with
658        * each time point and pass the binary vk_sync to the driver.
659        */
660       struct vk_sync_timeline *timeline =
661          vk_sync_as_timeline(submit->waits[i].sync);
662       if (timeline) {
663          assert(queue->base.device->timeline_mode ==
664                 VK_DEVICE_TIMELINE_MODE_EMULATED);
665          result = vk_sync_timeline_get_point(queue->base.device, timeline,
666                                              submit->waits[i].wait_value,
667                                              &submit->_wait_points[i]);
668          if (unlikely(result != VK_SUCCESS)) {
669             result = vk_queue_set_lost(queue,
670                                        "Time point >= %"PRIu64" not found",
671                                        submit->waits[i].wait_value);
672          }
673 
674          /* This can happen if the point is long past */
675          if (submit->_wait_points[i] == NULL)
676             continue;
677 
678          submit->waits[i].sync = &submit->_wait_points[i]->sync;
679          submit->waits[i].wait_value = 0;
680       }
681 
682       struct vk_sync_binary *binary =
683          vk_sync_as_binary(submit->waits[i].sync);
684       if (binary) {
685          submit->waits[i].sync = &binary->timeline;
686          submit->waits[i].wait_value = binary->next_point;
687       }
688 
689       assert((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) ||
690              submit->waits[i].wait_value == 0);
691 
692       assert(wait_count <= i);
693       if (wait_count < i) {
694          submit->waits[wait_count] = submit->waits[i];
695          submit->_wait_temps[wait_count] = submit->_wait_temps[i];
696          if (submit->_wait_points)
697             submit->_wait_points[wait_count] = submit->_wait_points[i];
698       }
699       wait_count++;
700    }
701 
702    assert(wait_count <= submit->wait_count);
703    submit->wait_count = wait_count;
704 
705    for (uint32_t i = 0; i < submit->signal_count; i++) {
706       assert((submit->signals[i].sync->flags & VK_SYNC_IS_TIMELINE) ||
707              submit->signals[i].signal_value == 0);
708 
709       struct vk_sync_binary *binary =
710          vk_sync_as_binary(submit->signals[i].sync);
711       if (binary) {
712          submit->signals[i].sync = &binary->timeline;
713          submit->signals[i].signal_value = ++binary->next_point;
714       }
715    }
716 
717    result = queue->driver_submit(queue, submit);
718    if (unlikely(result != VK_SUCCESS))
719       return result;
720 
721    if (submit->_signal_points) {
722       for (uint32_t i = 0; i < submit->signal_count; i++) {
723          if (submit->_signal_points[i] == NULL)
724             continue;
725 
726          vk_sync_timeline_point_install(queue->base.device,
727                                         submit->_signal_points[i]);
728          submit->_signal_points[i] = NULL;
729       }
730    }
731 
732    return VK_SUCCESS;
733 }
734 
735 VkResult
vk_queue_flush(struct vk_queue * queue,uint32_t * submit_count_out)736 vk_queue_flush(struct vk_queue *queue, uint32_t *submit_count_out)
737 {
738    VkResult result = VK_SUCCESS;
739 
740    assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_DEFERRED);
741 
742    mtx_lock(&queue->submit.mutex);
743 
744    uint32_t submit_count = 0;
745    while (!list_is_empty(&queue->submit.submits)) {
746       struct vk_queue_submit *submit =
747          list_first_entry(&queue->submit.submits,
748                           struct vk_queue_submit, link);
749 
750       for (uint32_t i = 0; i < submit->wait_count; i++) {
751          /* In emulated timeline mode, only emulated timelines are allowed */
752          if (!vk_sync_type_is_vk_sync_timeline(submit->waits[i].sync->type)) {
753             assert(!(submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE));
754             continue;
755          }
756 
757          result = vk_sync_wait(queue->base.device,
758                                submit->waits[i].sync,
759                                submit->waits[i].wait_value,
760                                VK_SYNC_WAIT_PENDING, 0);
761          if (result == VK_TIMEOUT) {
762             /* This one's not ready yet */
763             result = VK_SUCCESS;
764             goto done;
765          } else if (result != VK_SUCCESS) {
766             result = vk_queue_set_lost(queue, "Wait for time points failed");
767             goto done;
768          }
769       }
770 
771       result = vk_queue_submit_final(queue, submit);
772       if (unlikely(result != VK_SUCCESS)) {
773          result = vk_queue_set_lost(queue, "queue::driver_submit failed");
774          goto done;
775       }
776 
777       submit_count++;
778 
779       list_del(&submit->link);
780 
781       vk_queue_submit_destroy(queue, submit);
782    }
783 
784 done:
785    if (submit_count)
786       cnd_broadcast(&queue->submit.pop);
787 
788    mtx_unlock(&queue->submit.mutex);
789 
790    if (submit_count_out)
791       *submit_count_out = submit_count;
792 
793    return result;
794 }
795 
796 static int
vk_queue_submit_thread_func(void * _data)797 vk_queue_submit_thread_func(void *_data)
798 {
799    struct vk_queue *queue = _data;
800    VkResult result;
801 
802    mtx_lock(&queue->submit.mutex);
803 
804    while (queue->submit.thread_run) {
805       if (list_is_empty(&queue->submit.submits)) {
806          int ret = cnd_wait(&queue->submit.push, &queue->submit.mutex);
807          if (ret == thrd_error) {
808             mtx_unlock(&queue->submit.mutex);
809             vk_queue_set_lost(queue, "cnd_wait failed");
810             return 1;
811          }
812          continue;
813       }
814 
815       struct vk_queue_submit *submit =
816          list_first_entry(&queue->submit.submits,
817                           struct vk_queue_submit, link);
818 
819       /* Drop the lock while we wait */
820       mtx_unlock(&queue->submit.mutex);
821 
822       result = vk_sync_wait_many(queue->base.device,
823                                  submit->wait_count, submit->waits,
824                                  VK_SYNC_WAIT_PENDING, UINT64_MAX);
825       if (unlikely(result != VK_SUCCESS)) {
826          vk_queue_set_lost(queue, "Wait for time points failed");
827          return 1;
828       }
829 
830       result = vk_queue_submit_final(queue, submit);
831       if (unlikely(result != VK_SUCCESS)) {
832          vk_queue_set_lost(queue, "queue::driver_submit failed");
833          return 1;
834       }
835 
836       /* Do all our cleanup of individual fences etc. outside the lock.
837        * We can't actually remove it from the list yet.  We have to do
838        * that under the lock.
839        */
840       vk_queue_submit_cleanup(queue, submit);
841 
842       mtx_lock(&queue->submit.mutex);
843 
844       /* Only remove the submit from from the list and free it after
845        * queue->submit() has completed.  This ensures that, when
846        * vk_queue_drain() completes, there are no more pending jobs.
847        */
848       list_del(&submit->link);
849       vk_queue_submit_free(queue, submit);
850 
851       cnd_broadcast(&queue->submit.pop);
852    }
853 
854    mtx_unlock(&queue->submit.mutex);
855    return 0;
856 }
857 
858 static VkResult
vk_queue_start_submit_thread(struct vk_queue * queue)859 vk_queue_start_submit_thread(struct vk_queue *queue)
860 {
861    int ret;
862 
863    mtx_lock(&queue->submit.mutex);
864    queue->submit.thread_run = true;
865    mtx_unlock(&queue->submit.mutex);
866 
867    ret = thrd_create(&queue->submit.thread,
868                      vk_queue_submit_thread_func,
869                      queue);
870    if (ret == thrd_error)
871       return vk_errorf(queue, VK_ERROR_UNKNOWN, "thrd_create failed");
872 
873    return VK_SUCCESS;
874 }
875 
876 static void
vk_queue_stop_submit_thread(struct vk_queue * queue)877 vk_queue_stop_submit_thread(struct vk_queue *queue)
878 {
879    vk_queue_drain(queue);
880 
881    /* Kick the thread to disable it */
882    mtx_lock(&queue->submit.mutex);
883    queue->submit.thread_run = false;
884    cnd_signal(&queue->submit.push);
885    mtx_unlock(&queue->submit.mutex);
886 
887    thrd_join(queue->submit.thread, NULL);
888 
889    assert(list_is_empty(&queue->submit.submits));
890    queue->submit.mode = VK_QUEUE_SUBMIT_MODE_IMMEDIATE;
891 }
892 
893 VkResult
vk_queue_enable_submit_thread(struct vk_queue * queue)894 vk_queue_enable_submit_thread(struct vk_queue *queue)
895 {
896    assert(vk_device_supports_threaded_submit(queue->base.device));
897 
898    if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED)
899       return VK_SUCCESS;
900 
901    VkResult result = vk_queue_start_submit_thread(queue);
902    if (result != VK_SUCCESS)
903       return result;
904 
905    queue->submit.mode = VK_QUEUE_SUBMIT_MODE_THREADED;
906 
907    return VK_SUCCESS;
908 }
909 
910 struct vulkan_submit_info {
911    const void *pNext;
912 
913    uint32_t command_buffer_count;
914    const VkCommandBufferSubmitInfo *command_buffers;
915 
916    uint32_t wait_count;
917    const VkSemaphoreSubmitInfo *waits;
918 
919    uint32_t signal_count;
920    const VkSemaphoreSubmitInfo *signals;
921 
922    uint32_t buffer_bind_count;
923    const VkSparseBufferMemoryBindInfo *buffer_binds;
924 
925    uint32_t image_opaque_bind_count;
926    const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
927 
928    uint32_t image_bind_count;
929    const VkSparseImageMemoryBindInfo *image_binds;
930 
931    struct vk_fence *fence;
932 };
933 
934 static VkResult
vk_queue_submit_create(struct vk_queue * queue,const struct vulkan_submit_info * info,struct vk_queue_submit ** submit_out)935 vk_queue_submit_create(struct vk_queue *queue,
936                        const struct vulkan_submit_info *info,
937                        struct vk_queue_submit **submit_out)
938 {
939    VkResult result;
940    uint32_t sparse_memory_bind_entry_count = 0;
941    uint32_t sparse_memory_image_bind_entry_count = 0;
942 
943    for (uint32_t i = 0; i < info->buffer_bind_count; ++i)
944       sparse_memory_bind_entry_count += info->buffer_binds[i].bindCount;
945 
946    for (uint32_t i = 0; i < info->image_opaque_bind_count; ++i)
947       sparse_memory_bind_entry_count += info->image_opaque_binds[i].bindCount;
948 
949    for (uint32_t i = 0; i < info->image_bind_count; ++i)
950       sparse_memory_image_bind_entry_count += info->image_binds[i].bindCount;
951 
952    const struct wsi_memory_signal_submit_info *mem_signal =
953       vk_find_struct_const(info->pNext, WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
954    bool signal_mem_sync = mem_signal != NULL &&
955                           mem_signal->memory != VK_NULL_HANDLE &&
956                           queue->base.device->create_sync_for_memory != NULL;
957 
958    uint32_t signal_count = info->signal_count +
959                            signal_mem_sync +
960                            (info->fence != NULL);
961 
962    struct vk_queue_submit *submit =
963       vk_queue_submit_alloc(queue, info->wait_count,
964                             info->command_buffer_count,
965                             info->buffer_bind_count,
966                             info->image_opaque_bind_count,
967                             info->image_bind_count,
968                             sparse_memory_bind_entry_count,
969                             sparse_memory_image_bind_entry_count,
970                             signal_count);
971    if (unlikely(submit == NULL))
972       return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
973 
974    /* From the Vulkan 1.2.194 spec:
975     *
976     *    "If the VkSubmitInfo::pNext chain does not include this structure,
977     *    the batch defaults to use counter pass index 0."
978     */
979    const VkPerformanceQuerySubmitInfoKHR *perf_info =
980       vk_find_struct_const(info->pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
981    submit->perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
982 
983    for (uint32_t i = 0; i < info->wait_count; i++)
984       vk_queue_submit_add_semaphore_wait(queue, submit, &info->waits[i]);
985 
986    for (uint32_t i = 0; i < info->command_buffer_count; i++) {
987       vk_queue_submit_add_command_buffer(queue, submit,
988                                          &info->command_buffers[i]);
989    }
990 
991    for (uint32_t i = 0; i < info->buffer_bind_count; ++i)
992       vk_queue_submit_add_buffer_bind(queue, submit, &info->buffer_binds[i]);
993 
994    for (uint32_t i = 0; i < info->image_opaque_bind_count; ++i) {
995       vk_queue_submit_add_image_opaque_bind(queue, submit,
996                                             &info->image_opaque_binds[i]);
997    }
998 
999    for (uint32_t i = 0; i < info->image_bind_count; ++i)
1000       vk_queue_submit_add_image_bind(queue, submit, &info->image_binds[i]);
1001 
1002    for (uint32_t i = 0; i < info->signal_count; i++) {
1003       result = vk_queue_submit_add_semaphore_signal(queue, submit,
1004                                                     &info->signals[i]);
1005       if (unlikely(result != VK_SUCCESS))
1006          goto fail;
1007    }
1008 
1009    if (signal_mem_sync) {
1010       result = vk_queue_submit_add_mem_signal(queue, submit,
1011                                               mem_signal->memory);
1012       if (unlikely(result != VK_SUCCESS))
1013          goto fail;
1014    }
1015 
1016    if (info->fence != NULL)
1017       vk_queue_submit_add_fence_signal(queue, submit, info->fence);
1018 
1019    assert(signal_count == submit->signal_count);
1020 
1021    *submit_out = submit;
1022 
1023    return VK_SUCCESS;
1024 
1025 fail:
1026    vk_queue_submit_destroy(queue, submit);
1027    return result;
1028 }
1029 
1030 static VkResult
vk_queue_submit(struct vk_queue * queue,struct vk_queue_submit * submit)1031 vk_queue_submit(struct vk_queue *queue,
1032                 struct vk_queue_submit *submit)
1033 {
1034    struct vk_device *device = queue->base.device;
1035    VkResult result;
1036 
1037    /* If this device supports threaded submit, we can't rely on the client
1038     * ordering requirements to ensure submits happen in the right order.  Even
1039     * if this queue doesn't have a submit thread, another queue (possibly in a
1040     * different process) may and that means we our dependencies may not have
1041     * been submitted to the kernel yet.  Do a quick zero-timeout WAIT_PENDING
1042     * on all the wait semaphores to see if we need to start up our own thread.
1043     */
1044    if (device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND &&
1045        queue->submit.mode != VK_QUEUE_SUBMIT_MODE_THREADED) {
1046       assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_IMMEDIATE);
1047 
1048       result = vk_sync_wait_many(queue->base.device,
1049                                  submit->wait_count, submit->waits,
1050                                  VK_SYNC_WAIT_PENDING, 0);
1051       if (result == VK_TIMEOUT)
1052          result = vk_queue_enable_submit_thread(queue);
1053       if (unlikely(result != VK_SUCCESS))
1054          goto fail;
1055    }
1056 
1057    switch (queue->submit.mode) {
1058    case VK_QUEUE_SUBMIT_MODE_IMMEDIATE:
1059       result = vk_queue_submit_final(queue, submit);
1060       if (unlikely(result != VK_SUCCESS))
1061          goto fail;
1062 
1063       /* If threaded submit is possible on this device, we need to ensure that
1064        * binary semaphore payloads get reset so that any other threads can
1065        * properly wait on them for dependency checking.  Because we don't
1066        * currently have a submit thread, we can directly reset that binary
1067        * semaphore payloads.
1068        *
1069        * If we the vk_sync is in our signal et, we can consider it to have
1070        * been both reset and signaled by queue_submit_final().  A reset in
1071        * this case would be wrong because it would throw away our signal
1072        * operation.  If we don't signal the vk_sync, then we need to reset it.
1073        */
1074       if (vk_device_supports_threaded_submit(device) &&
1075           submit->_has_binary_permanent_semaphore_wait) {
1076          for (uint32_t i = 0; i < submit->wait_count; i++) {
1077             if ((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) ||
1078                 submit->_wait_temps[i] != NULL)
1079                continue;
1080 
1081             bool was_signaled = false;
1082             for (uint32_t j = 0; j < submit->signal_count; j++) {
1083                if (submit->signals[j].sync == submit->waits[i].sync) {
1084                   was_signaled = true;
1085                   break;
1086                }
1087             }
1088 
1089             if (!was_signaled) {
1090                result = vk_sync_reset(queue->base.device,
1091                                       submit->waits[i].sync);
1092                if (unlikely(result != VK_SUCCESS))
1093                   goto fail;
1094             }
1095          }
1096       }
1097 
1098       vk_queue_submit_destroy(queue, submit);
1099       return result;
1100 
1101    case VK_QUEUE_SUBMIT_MODE_DEFERRED:
1102       vk_queue_push_submit(queue, submit);
1103       return vk_device_flush(queue->base.device);
1104 
1105    case VK_QUEUE_SUBMIT_MODE_THREADED:
1106       if (submit->_has_binary_permanent_semaphore_wait) {
1107          for (uint32_t i = 0; i < submit->wait_count; i++) {
1108             if (submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE)
1109                continue;
1110 
1111             /* From the Vulkan 1.2.194 spec:
1112              *
1113              *    "When a batch is submitted to a queue via a queue
1114              *    submission, and it includes semaphores to be waited on,
1115              *    it defines a memory dependency between prior semaphore
1116              *    signal operations and the batch, and defines semaphore
1117              *    wait operations.
1118              *
1119              *    Such semaphore wait operations set the semaphores
1120              *    created with a VkSemaphoreType of
1121              *    VK_SEMAPHORE_TYPE_BINARY to the unsignaled state."
1122              *
1123              * For threaded submit, we depend on tracking the unsignaled
1124              * state of binary semaphores to determine when we can safely
1125              * submit.  The VK_SYNC_WAIT_PENDING check above as well as the
1126              * one in the sumbit thread depend on all binary semaphores
1127              * being reset when they're not in active use from the point
1128              * of view of the client's CPU timeline.  This means we need to
1129              * reset them inside vkQueueSubmit and cannot wait until the
1130              * actual submit which happens later in the thread.
1131              *
1132              * We've already stolen temporary semaphore payloads above as
1133              * part of basic semaphore processing.  We steal permanent
1134              * semaphore payloads here by way of vk_sync_move.  For shared
1135              * semaphores, this can be a bit expensive (sync file import
1136              * and export) but, for non-shared semaphores, it can be made
1137              * fairly cheap.  Also, we only do this semaphore swapping in
1138              * the case where you have real timelines AND the client is
1139              * using timeline semaphores with wait-before-signal (that's
1140              * the only way to get a submit thread) AND mixing those with
1141              * waits on binary semaphores AND said binary semaphore is
1142              * using its permanent payload.  In other words, this code
1143              * should basically only ever get executed in CTS tests.
1144              */
1145             if (submit->_wait_temps[i] != NULL)
1146                continue;
1147 
1148             /* From the Vulkan 1.2.194 spec:
1149              *
1150              *    VUID-vkQueueSubmit-pWaitSemaphores-03238
1151              *
1152              *    "All elements of the pWaitSemaphores member of all
1153              *    elements of pSubmits created with a VkSemaphoreType of
1154              *    VK_SEMAPHORE_TYPE_BINARY must reference a semaphore
1155              *    signal operation that has been submitted for execution
1156              *    and any semaphore signal operations on which it depends
1157              *    (if any) must have also been submitted for execution."
1158              *
1159              * Therefore, we can safely do a blocking wait here and it
1160              * won't actually block for long.  This ensures that the
1161              * vk_sync_move below will succeed.
1162              */
1163             result = vk_sync_wait(queue->base.device,
1164                                   submit->waits[i].sync, 0,
1165                                   VK_SYNC_WAIT_PENDING, UINT64_MAX);
1166             if (unlikely(result != VK_SUCCESS))
1167                goto fail;
1168 
1169             result = vk_sync_create(queue->base.device,
1170                                     submit->waits[i].sync->type,
1171                                     0 /* flags */,
1172                                     0 /* initial value */,
1173                                     &submit->_wait_temps[i]);
1174             if (unlikely(result != VK_SUCCESS))
1175                goto fail;
1176 
1177             result = vk_sync_move(queue->base.device,
1178                                   submit->_wait_temps[i],
1179                                   submit->waits[i].sync);
1180             if (unlikely(result != VK_SUCCESS))
1181                goto fail;
1182 
1183             submit->waits[i].sync = submit->_wait_temps[i];
1184          }
1185       }
1186 
1187       /* If we're signaling a memory object, we have to ensure that
1188        * vkQueueSubmit does not return until the kernel submission has
1189        * happened.  Otherwise, we may get a race between this process
1190        * and whatever is going to wait on the object where the other
1191        * process may wait before we've submitted our work.  Drain the
1192        * queue now to avoid this.  It's the responsibility of the caller
1193        * to ensure that any vkQueueSubmit which signals a memory object
1194        * has fully resolved dependencies.
1195        */
1196       const bool needs_drain = submit->_mem_signal_temp;
1197 
1198       vk_queue_push_submit(queue, submit);
1199 
1200       if (needs_drain) {
1201          result = vk_queue_drain(queue);
1202          if (unlikely(result != VK_SUCCESS))
1203             return result;
1204       }
1205 
1206       return VK_SUCCESS;
1207 
1208    case VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND:
1209       unreachable("Invalid vk_queue::submit.mode");
1210    }
1211    unreachable("Invalid submit mode");
1212 
1213 fail:
1214    vk_queue_submit_destroy(queue, submit);
1215    return result;
1216 }
1217 
1218 static VkResult
vk_queue_merge_submit(struct vk_queue * queue,struct vk_queue_submit ** last_submit,struct vk_queue_submit * submit)1219 vk_queue_merge_submit(struct vk_queue *queue,
1220                       struct vk_queue_submit **last_submit,
1221                       struct vk_queue_submit *submit)
1222 {
1223    if (*last_submit == NULL) {
1224       *last_submit = submit;
1225       return VK_SUCCESS;
1226    }
1227 
1228    struct vk_queue_submit *merged =
1229       vk_queue_submits_merge(queue, *last_submit, submit);
1230    if (merged != NULL) {
1231       *last_submit = merged;
1232       return VK_SUCCESS;
1233    }
1234 
1235    VkResult result = vk_queue_submit(queue, *last_submit);
1236    *last_submit = NULL;
1237 
1238    if (likely(result == VK_SUCCESS)) {
1239       *last_submit = submit;
1240    } else {
1241       vk_queue_submit_destroy(queue, submit);
1242    }
1243 
1244    return result;
1245 }
1246 
1247 VkResult
vk_queue_wait_before_present(struct vk_queue * queue,const VkPresentInfoKHR * pPresentInfo)1248 vk_queue_wait_before_present(struct vk_queue *queue,
1249                              const VkPresentInfoKHR *pPresentInfo)
1250 {
1251    if (vk_device_is_lost(queue->base.device))
1252       return VK_ERROR_DEVICE_LOST;
1253 
1254    /* From the Vulkan 1.2.194 spec:
1255     *
1256     *    VUID-vkQueuePresentKHR-pWaitSemaphores-03268
1257     *
1258     *    "All elements of the pWaitSemaphores member of pPresentInfo must
1259     *    reference a semaphore signal operation that has been submitted for
1260     *    execution and any semaphore signal operations on which it depends (if
1261     *    any) must have also been submitted for execution."
1262     *
1263     * As with vkQueueSubmit above, we need to ensure that any binary
1264     * semaphores we use in this present actually exist.  If we don't have
1265     * timeline semaphores, this is a non-issue.  If they're emulated, then
1266     * this is ensured for us by the vk_device_flush() at the end of every
1267     * vkQueueSubmit() and every vkSignalSemaphore().  For real timeline
1268     * semaphores, however, we need to do a wait.  Thanks to the above bit of
1269     * spec text, that wait should never block for long.
1270     */
1271    if (!vk_device_supports_threaded_submit(queue->base.device))
1272       return VK_SUCCESS;
1273 
1274    const uint32_t wait_count = pPresentInfo->waitSemaphoreCount;
1275 
1276    if (wait_count == 0)
1277       return VK_SUCCESS;
1278 
1279    STACK_ARRAY(struct vk_sync_wait, waits, wait_count);
1280 
1281    for (uint32_t i = 0; i < wait_count; i++) {
1282       VK_FROM_HANDLE(vk_semaphore, semaphore,
1283                      pPresentInfo->pWaitSemaphores[i]);
1284 
1285       /* From the Vulkan 1.2.194 spec:
1286        *
1287        *    VUID-vkQueuePresentKHR-pWaitSemaphores-03267
1288        *
1289        *    "All elements of the pWaitSemaphores member of pPresentInfo must
1290        *    be created with a VkSemaphoreType of VK_SEMAPHORE_TYPE_BINARY."
1291        */
1292       assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY);
1293 
1294       waits[i] = (struct vk_sync_wait) {
1295          .sync = vk_semaphore_get_active_sync(semaphore),
1296          .stage_mask = ~(VkPipelineStageFlags2)0,
1297       };
1298    }
1299 
1300    VkResult result = vk_sync_wait_many(queue->base.device, wait_count, waits,
1301                                        VK_SYNC_WAIT_PENDING, UINT64_MAX);
1302 
1303    STACK_ARRAY_FINISH(waits);
1304 
1305    /* Check again, just in case */
1306    if (vk_device_is_lost(queue->base.device))
1307       return VK_ERROR_DEVICE_LOST;
1308 
1309    return result;
1310 }
1311 
1312 static VkResult
vk_queue_signal_sync(struct vk_queue * queue,struct vk_sync * sync,uint32_t signal_value)1313 vk_queue_signal_sync(struct vk_queue *queue,
1314                      struct vk_sync *sync,
1315                      uint32_t signal_value)
1316 {
1317    struct vk_queue_submit *submit = vk_queue_submit_alloc(queue, 0, 0, 0, 0, 0,
1318                                                           0, 0, 1);
1319    if (unlikely(submit == NULL))
1320       return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1321 
1322    vk_queue_submit_add_sync_signal(queue, submit, sync, signal_value);
1323 
1324    VkResult result;
1325    switch (queue->submit.mode) {
1326    case VK_QUEUE_SUBMIT_MODE_IMMEDIATE:
1327       result = vk_queue_submit_final(queue, submit);
1328       vk_queue_submit_destroy(queue, submit);
1329       return result;
1330 
1331    case VK_QUEUE_SUBMIT_MODE_DEFERRED:
1332       vk_queue_push_submit(queue, submit);
1333       return vk_device_flush(queue->base.device);
1334 
1335    case VK_QUEUE_SUBMIT_MODE_THREADED:
1336       vk_queue_push_submit(queue, submit);
1337       return VK_SUCCESS;
1338 
1339    case VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND:
1340       unreachable("Invalid vk_queue::submit.mode");
1341    }
1342    unreachable("Invalid timeline mode");
1343 }
1344 
1345 void
vk_queue_finish(struct vk_queue * queue)1346 vk_queue_finish(struct vk_queue *queue)
1347 {
1348    if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED)
1349       vk_queue_stop_submit_thread(queue);
1350 
1351    while (!list_is_empty(&queue->submit.submits)) {
1352       assert(vk_device_is_lost_no_report(queue->base.device));
1353 
1354       struct vk_queue_submit *submit =
1355          list_first_entry(&queue->submit.submits,
1356                           struct vk_queue_submit, link);
1357 
1358       list_del(&submit->link);
1359       vk_queue_submit_destroy(queue, submit);
1360    }
1361 
1362 #if DETECT_OS_ANDROID
1363    if (queue->anb_semaphore != VK_NULL_HANDLE) {
1364       struct vk_device *device = queue->base.device;
1365       device->dispatch_table.DestroySemaphore(vk_device_to_handle(device),
1366                                               queue->anb_semaphore, NULL);
1367    }
1368 #endif
1369 
1370    cnd_destroy(&queue->submit.pop);
1371    cnd_destroy(&queue->submit.push);
1372    mtx_destroy(&queue->submit.mutex);
1373 
1374    util_dynarray_fini(&queue->labels);
1375    list_del(&queue->link);
1376    vk_object_base_finish(&queue->base);
1377 }
1378 
1379 VKAPI_ATTR VkResult VKAPI_CALL
vk_common_QueueSubmit2(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)1380 vk_common_QueueSubmit2(VkQueue _queue,
1381                           uint32_t submitCount,
1382                           const VkSubmitInfo2 *pSubmits,
1383                           VkFence _fence)
1384 {
1385    VK_FROM_HANDLE(vk_queue, queue, _queue);
1386    VK_FROM_HANDLE(vk_fence, fence, _fence);
1387    VkResult result;
1388 
1389    if (vk_device_is_lost(queue->base.device))
1390       return VK_ERROR_DEVICE_LOST;
1391 
1392    if (submitCount == 0) {
1393       if (fence == NULL) {
1394          return VK_SUCCESS;
1395       } else {
1396          return vk_queue_signal_sync(queue, vk_fence_get_active_sync(fence), 0);
1397       }
1398    }
1399 
1400    struct vk_queue_submit *last_submit = NULL;
1401    for (uint32_t i = 0; i < submitCount; i++) {
1402       struct vulkan_submit_info info = {
1403          .pNext = pSubmits[i].pNext,
1404          .command_buffer_count = pSubmits[i].commandBufferInfoCount,
1405          .command_buffers = pSubmits[i].pCommandBufferInfos,
1406          .wait_count = pSubmits[i].waitSemaphoreInfoCount,
1407          .waits = pSubmits[i].pWaitSemaphoreInfos,
1408          .signal_count = pSubmits[i].signalSemaphoreInfoCount,
1409          .signals = pSubmits[i].pSignalSemaphoreInfos,
1410          .fence = i == submitCount - 1 ? fence : NULL
1411       };
1412       struct vk_queue_submit *submit;
1413       result = vk_queue_submit_create(queue, &info, &submit);
1414       if (unlikely(result != VK_SUCCESS))
1415          return result;
1416 
1417       result = vk_queue_merge_submit(queue, &last_submit, submit);
1418       if (unlikely(result != VK_SUCCESS))
1419          return result;
1420    }
1421 
1422    if (last_submit != NULL) {
1423       result = vk_queue_submit(queue, last_submit);
1424       if (unlikely(result != VK_SUCCESS))
1425          return result;
1426    }
1427 
1428    return VK_SUCCESS;
1429 }
1430 
1431 VKAPI_ATTR VkResult VKAPI_CALL
vk_common_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence _fence)1432 vk_common_QueueBindSparse(VkQueue _queue,
1433                           uint32_t bindInfoCount,
1434                           const VkBindSparseInfo *pBindInfo,
1435                           VkFence _fence)
1436 {
1437    VK_FROM_HANDLE(vk_queue, queue, _queue);
1438    VK_FROM_HANDLE(vk_fence, fence, _fence);
1439    VkResult result;
1440 
1441    if (vk_device_is_lost(queue->base.device))
1442       return VK_ERROR_DEVICE_LOST;
1443 
1444    if (bindInfoCount == 0) {
1445       if (fence == NULL) {
1446          return VK_SUCCESS;
1447       } else {
1448          return vk_queue_signal_sync(queue, vk_fence_get_active_sync(fence), 0);
1449       }
1450    }
1451 
1452    struct vk_queue_submit *last_submit = NULL;
1453    for (uint32_t i = 0; i < bindInfoCount; i++) {
1454       const VkTimelineSemaphoreSubmitInfo *timeline_info =
1455          vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
1456       const uint64_t *wait_values = NULL;
1457       const uint64_t *signal_values = NULL;
1458 
1459       if (timeline_info && timeline_info->waitSemaphoreValueCount) {
1460          /* From the Vulkan 1.3.204 spec:
1461           *
1462           *    VUID-VkBindSparseInfo-pNext-03248
1463           *
1464           *    "If the pNext chain of this structure includes a VkTimelineSemaphoreSubmitInfo structure
1465           *    and any element of pSignalSemaphores was created with a VkSemaphoreType of
1466           *    VK_SEMAPHORE_TYPE_TIMELINE, then its signalSemaphoreValueCount member must equal
1467           *    signalSemaphoreCount"
1468           */
1469          assert(timeline_info->waitSemaphoreValueCount == pBindInfo[i].waitSemaphoreCount);
1470          wait_values = timeline_info->pWaitSemaphoreValues;
1471       }
1472 
1473       if (timeline_info && timeline_info->signalSemaphoreValueCount) {
1474          /* From the Vulkan 1.3.204 spec:
1475           *
1476           * VUID-VkBindSparseInfo-pNext-03247
1477           *
1478           *    "If the pNext chain of this structure includes a VkTimelineSemaphoreSubmitInfo structure
1479           *    and any element of pWaitSemaphores was created with a VkSemaphoreType of
1480           *    VK_SEMAPHORE_TYPE_TIMELINE, then its waitSemaphoreValueCount member must equal
1481           *    waitSemaphoreCount"
1482           */
1483          assert(timeline_info->signalSemaphoreValueCount == pBindInfo[i].signalSemaphoreCount);
1484          signal_values = timeline_info->pSignalSemaphoreValues;
1485       }
1486 
1487       STACK_ARRAY(VkSemaphoreSubmitInfo, wait_semaphore_infos,
1488                   pBindInfo[i].waitSemaphoreCount);
1489       STACK_ARRAY(VkSemaphoreSubmitInfo, signal_semaphore_infos,
1490                   pBindInfo[i].signalSemaphoreCount);
1491 
1492       if (!wait_semaphore_infos || !signal_semaphore_infos) {
1493          STACK_ARRAY_FINISH(wait_semaphore_infos);
1494          STACK_ARRAY_FINISH(signal_semaphore_infos);
1495          return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1496       }
1497 
1498       for (uint32_t j = 0; j < pBindInfo[i].waitSemaphoreCount; j++) {
1499          wait_semaphore_infos[j] = (VkSemaphoreSubmitInfo) {
1500             .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
1501             .semaphore = pBindInfo[i].pWaitSemaphores[j],
1502             .value = wait_values ? wait_values[j] : 0,
1503          };
1504       }
1505 
1506       for (uint32_t j = 0; j < pBindInfo[i].signalSemaphoreCount; j++) {
1507          signal_semaphore_infos[j] = (VkSemaphoreSubmitInfo) {
1508             .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
1509             .semaphore = pBindInfo[i].pSignalSemaphores[j],
1510             .value = signal_values ? signal_values[j] : 0,
1511          };
1512       }
1513       struct vulkan_submit_info info = {
1514          .pNext = pBindInfo[i].pNext,
1515          .wait_count = pBindInfo[i].waitSemaphoreCount,
1516          .waits = wait_semaphore_infos,
1517          .signal_count = pBindInfo[i].signalSemaphoreCount,
1518          .signals = signal_semaphore_infos,
1519          .buffer_bind_count = pBindInfo[i].bufferBindCount,
1520          .buffer_binds = pBindInfo[i].pBufferBinds,
1521          .image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
1522          .image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
1523          .image_bind_count = pBindInfo[i].imageBindCount,
1524          .image_binds = pBindInfo[i].pImageBinds,
1525          .fence = i == bindInfoCount - 1 ? fence : NULL
1526       };
1527       struct vk_queue_submit *submit;
1528       result = vk_queue_submit_create(queue, &info, &submit);
1529       if (likely(result == VK_SUCCESS))
1530          result = vk_queue_merge_submit(queue, &last_submit, submit);
1531 
1532       STACK_ARRAY_FINISH(wait_semaphore_infos);
1533       STACK_ARRAY_FINISH(signal_semaphore_infos);
1534 
1535       if (unlikely(result != VK_SUCCESS))
1536          return result;
1537    }
1538 
1539    if (last_submit != NULL) {
1540       result = vk_queue_submit(queue, last_submit);
1541       if (unlikely(result != VK_SUCCESS))
1542          return result;
1543    }
1544 
1545    return VK_SUCCESS;
1546 }
1547 
1548 static const struct vk_sync_type *
get_cpu_wait_type(struct vk_physical_device * pdevice)1549 get_cpu_wait_type(struct vk_physical_device *pdevice)
1550 {
1551    for (const struct vk_sync_type *const *t =
1552         pdevice->supported_sync_types; *t; t++) {
1553       if (((*t)->features & VK_SYNC_FEATURE_BINARY) &&
1554           ((*t)->features & VK_SYNC_FEATURE_CPU_WAIT))
1555          return *t;
1556    }
1557 
1558    unreachable("You must have a non-timeline CPU wait sync type");
1559 }
1560 
1561 VKAPI_ATTR VkResult VKAPI_CALL
vk_common_QueueWaitIdle(VkQueue _queue)1562 vk_common_QueueWaitIdle(VkQueue _queue)
1563 {
1564    MESA_TRACE_FUNC();
1565 
1566    VK_FROM_HANDLE(vk_queue, queue, _queue);
1567    VkResult result;
1568 
1569    if (vk_device_is_lost(queue->base.device))
1570       return VK_ERROR_DEVICE_LOST;
1571 
1572    const struct vk_sync_type *sync_type =
1573       get_cpu_wait_type(queue->base.device->physical);
1574 
1575    struct vk_sync *sync;
1576    result = vk_sync_create(queue->base.device, sync_type, 0, 0, &sync);
1577    if (unlikely(result != VK_SUCCESS))
1578       return result;
1579 
1580    result = vk_queue_signal_sync(queue, sync, 0);
1581    if (unlikely(result != VK_SUCCESS))
1582       return result;
1583 
1584    result = vk_sync_wait(queue->base.device, sync, 0,
1585                          VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
1586 
1587    vk_sync_destroy(queue->base.device, sync);
1588 
1589    VkResult device_status = vk_device_check_status(queue->base.device);
1590    if (device_status != VK_SUCCESS)
1591       return device_status;
1592 
1593    return result;
1594 }
1595