1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "vk_queue.h"
25
26 #include "util/perf/cpu_trace.h"
27 #include "util/u_debug.h"
28 #include <inttypes.h>
29
30 #include "vk_alloc.h"
31 #include "vk_command_buffer.h"
32 #include "vk_command_pool.h"
33 #include "vk_common_entrypoints.h"
34 #include "vk_device.h"
35 #include "vk_fence.h"
36 #include "vk_log.h"
37 #include "vk_physical_device.h"
38 #include "vk_semaphore.h"
39 #include "vk_sync.h"
40 #include "vk_sync_binary.h"
41 #include "vk_sync_dummy.h"
42 #include "vk_sync_timeline.h"
43 #include "vk_util.h"
44
45 #include "vulkan/wsi/wsi_common.h"
46
47 static VkResult
48 vk_queue_start_submit_thread(struct vk_queue *queue);
49
50 VkResult
vk_queue_init(struct vk_queue * queue,struct vk_device * device,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)51 vk_queue_init(struct vk_queue *queue, struct vk_device *device,
52 const VkDeviceQueueCreateInfo *pCreateInfo,
53 uint32_t index_in_family)
54 {
55 VkResult result = VK_SUCCESS;
56 int ret;
57
58 memset(queue, 0, sizeof(*queue));
59 vk_object_base_init(device, &queue->base, VK_OBJECT_TYPE_QUEUE);
60
61 list_addtail(&queue->link, &device->queues);
62
63 queue->flags = pCreateInfo->flags;
64 queue->queue_family_index = pCreateInfo->queueFamilyIndex;
65
66 assert(index_in_family < pCreateInfo->queueCount);
67 queue->index_in_family = index_in_family;
68
69 queue->submit.mode = device->submit_mode;
70 if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND)
71 queue->submit.mode = VK_QUEUE_SUBMIT_MODE_IMMEDIATE;
72
73 list_inithead(&queue->submit.submits);
74
75 ret = mtx_init(&queue->submit.mutex, mtx_plain);
76 if (ret == thrd_error) {
77 result = vk_errorf(queue, VK_ERROR_UNKNOWN, "mtx_init failed");
78 goto fail_mutex;
79 }
80
81 ret = cnd_init(&queue->submit.push);
82 if (ret == thrd_error) {
83 result = vk_errorf(queue, VK_ERROR_UNKNOWN, "cnd_init failed");
84 goto fail_push;
85 }
86
87 ret = cnd_init(&queue->submit.pop);
88 if (ret == thrd_error) {
89 result = vk_errorf(queue, VK_ERROR_UNKNOWN, "cnd_init failed");
90 goto fail_pop;
91 }
92
93 if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED) {
94 result = vk_queue_start_submit_thread(queue);
95 if (result != VK_SUCCESS)
96 goto fail_thread;
97 }
98
99 util_dynarray_init(&queue->labels, NULL);
100 queue->region_begin = true;
101
102 return VK_SUCCESS;
103
104 fail_thread:
105 cnd_destroy(&queue->submit.pop);
106 fail_pop:
107 cnd_destroy(&queue->submit.push);
108 fail_push:
109 mtx_destroy(&queue->submit.mutex);
110 fail_mutex:
111 return result;
112 }
113
114 VkResult
_vk_queue_set_lost(struct vk_queue * queue,const char * file,int line,const char * msg,...)115 _vk_queue_set_lost(struct vk_queue *queue,
116 const char *file, int line,
117 const char *msg, ...)
118 {
119 if (queue->_lost.lost)
120 return VK_ERROR_DEVICE_LOST;
121
122 queue->_lost.lost = true;
123 queue->_lost.error_file = file;
124 queue->_lost.error_line = line;
125
126 va_list ap;
127 va_start(ap, msg);
128 vsnprintf(queue->_lost.error_msg, sizeof(queue->_lost.error_msg), msg, ap);
129 va_end(ap);
130
131 p_atomic_inc(&queue->base.device->_lost.lost);
132
133 if (debug_get_bool_option("MESA_VK_ABORT_ON_DEVICE_LOSS", false)) {
134 _vk_device_report_lost(queue->base.device);
135 abort();
136 }
137
138 return VK_ERROR_DEVICE_LOST;
139 }
140
141 static struct vk_queue_submit *
vk_queue_submit_alloc(struct vk_queue * queue,uint32_t wait_count,uint32_t command_buffer_count,uint32_t buffer_bind_count,uint32_t image_opaque_bind_count,uint32_t image_bind_count,uint32_t bind_entry_count,uint32_t image_bind_entry_count,uint32_t signal_count)142 vk_queue_submit_alloc(struct vk_queue *queue,
143 uint32_t wait_count,
144 uint32_t command_buffer_count,
145 uint32_t buffer_bind_count,
146 uint32_t image_opaque_bind_count,
147 uint32_t image_bind_count,
148 uint32_t bind_entry_count,
149 uint32_t image_bind_entry_count,
150 uint32_t signal_count)
151 {
152 VK_MULTIALLOC(ma);
153 VK_MULTIALLOC_DECL(&ma, struct vk_queue_submit, submit, 1);
154 VK_MULTIALLOC_DECL(&ma, struct vk_sync_wait, waits, wait_count);
155 VK_MULTIALLOC_DECL(&ma, struct vk_command_buffer *, command_buffers,
156 command_buffer_count);
157 VK_MULTIALLOC_DECL(&ma, VkSparseBufferMemoryBindInfo, buffer_binds,
158 buffer_bind_count);
159 VK_MULTIALLOC_DECL(&ma, VkSparseImageOpaqueMemoryBindInfo,
160 image_opaque_binds, image_opaque_bind_count);
161 VK_MULTIALLOC_DECL(&ma, VkSparseImageMemoryBindInfo, image_binds,
162 image_bind_count);
163 VK_MULTIALLOC_DECL(&ma, VkSparseMemoryBind,
164 bind_entries, bind_entry_count);
165 VK_MULTIALLOC_DECL(&ma, VkSparseImageMemoryBind, image_bind_entries,
166 image_bind_entry_count);
167 VK_MULTIALLOC_DECL(&ma, struct vk_sync_signal, signals, signal_count);
168 VK_MULTIALLOC_DECL(&ma, struct vk_sync *, wait_temps, wait_count);
169
170 struct vk_sync_timeline_point **wait_points = NULL, **signal_points = NULL;
171 if (queue->base.device->timeline_mode == VK_DEVICE_TIMELINE_MODE_EMULATED) {
172 vk_multialloc_add(&ma, &wait_points,
173 struct vk_sync_timeline_point *, wait_count);
174 vk_multialloc_add(&ma, &signal_points,
175 struct vk_sync_timeline_point *, signal_count);
176 }
177
178 if (!vk_multialloc_zalloc(&ma, &queue->base.device->alloc,
179 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
180 return NULL;
181
182 submit->waits = waits;
183 submit->command_buffers = command_buffers;
184 submit->signals = signals;
185 submit->buffer_binds = buffer_binds;
186 submit->image_opaque_binds = image_opaque_binds;
187 submit->image_binds = image_binds;
188
189 submit->_bind_entries = bind_entries;
190 submit->_image_bind_entries = image_bind_entries;
191 submit->_wait_temps = wait_temps;
192 submit->_wait_points = wait_points;
193 submit->_signal_points = signal_points;
194
195 return submit;
196 }
197
198 static void
vk_queue_submit_cleanup(struct vk_queue * queue,struct vk_queue_submit * submit)199 vk_queue_submit_cleanup(struct vk_queue *queue,
200 struct vk_queue_submit *submit)
201 {
202 for (uint32_t i = 0; i < submit->wait_count; i++) {
203 if (submit->_wait_temps[i] != NULL)
204 vk_sync_destroy(queue->base.device, submit->_wait_temps[i]);
205 }
206
207 if (submit->_mem_signal_temp != NULL)
208 vk_sync_destroy(queue->base.device, submit->_mem_signal_temp);
209
210 if (submit->_wait_points != NULL) {
211 for (uint32_t i = 0; i < submit->wait_count; i++) {
212 if (unlikely(submit->_wait_points[i] != NULL)) {
213 vk_sync_timeline_point_release(queue->base.device,
214 submit->_wait_points[i]);
215 }
216 }
217 }
218
219 if (submit->_signal_points != NULL) {
220 for (uint32_t i = 0; i < submit->signal_count; i++) {
221 if (unlikely(submit->_signal_points[i] != NULL)) {
222 vk_sync_timeline_point_free(queue->base.device,
223 submit->_signal_points[i]);
224 }
225 }
226 }
227 }
228
229 static void
vk_queue_submit_free(struct vk_queue * queue,struct vk_queue_submit * submit)230 vk_queue_submit_free(struct vk_queue *queue,
231 struct vk_queue_submit *submit)
232 {
233 vk_free(&queue->base.device->alloc, submit);
234 }
235
236 static void
vk_queue_submit_destroy(struct vk_queue * queue,struct vk_queue_submit * submit)237 vk_queue_submit_destroy(struct vk_queue *queue,
238 struct vk_queue_submit *submit)
239 {
240 vk_queue_submit_cleanup(queue, submit);
241 vk_queue_submit_free(queue, submit);
242 }
243
244 static void
vk_queue_submit_add_semaphore_wait(struct vk_queue * queue,struct vk_queue_submit * submit,const VkSemaphoreSubmitInfo * wait_info)245 vk_queue_submit_add_semaphore_wait(struct vk_queue *queue,
246 struct vk_queue_submit *submit,
247 const VkSemaphoreSubmitInfo *wait_info)
248 {
249 VK_FROM_HANDLE(vk_semaphore, semaphore, wait_info->semaphore);
250
251 /* From the Vulkan 1.2.194 spec:
252 *
253 * "Applications can import a semaphore payload into an existing
254 * semaphore using an external semaphore handle. The effects of the
255 * import operation will be either temporary or permanent, as
256 * specified by the application. If the import is temporary, the
257 * implementation must restore the semaphore to its prior permanent
258 * state after submitting the next semaphore wait operation."
259 *
260 * and
261 *
262 * VUID-VkImportSemaphoreFdInfoKHR-flags-03323
263 *
264 * "If flags contains VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, the
265 * VkSemaphoreTypeCreateInfo::semaphoreType field of the semaphore
266 * from which handle or name was exported must not be
267 * VK_SEMAPHORE_TYPE_TIMELINE"
268 */
269 struct vk_sync *sync;
270 if (semaphore->temporary) {
271 assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY);
272 sync = submit->_wait_temps[submit->wait_count] = semaphore->temporary;
273 semaphore->temporary = NULL;
274 } else {
275 if (semaphore->type == VK_SEMAPHORE_TYPE_BINARY) {
276 if (vk_device_supports_threaded_submit(queue->base.device))
277 assert(semaphore->permanent.type->move);
278 submit->_has_binary_permanent_semaphore_wait = true;
279 }
280
281 sync = &semaphore->permanent;
282 }
283
284 uint64_t wait_value = semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE ?
285 wait_info->value : 0;
286
287 submit->waits[submit->wait_count] = (struct vk_sync_wait) {
288 .sync = sync,
289 .stage_mask = wait_info->stageMask,
290 .wait_value = wait_value,
291 };
292
293 submit->wait_count++;
294 }
295
296 static VkResult MUST_CHECK
vk_queue_submit_add_semaphore_signal(struct vk_queue * queue,struct vk_queue_submit * submit,const VkSemaphoreSubmitInfo * signal_info)297 vk_queue_submit_add_semaphore_signal(struct vk_queue *queue,
298 struct vk_queue_submit *submit,
299 const VkSemaphoreSubmitInfo *signal_info)
300 {
301 VK_FROM_HANDLE(vk_semaphore, semaphore, signal_info->semaphore);
302 VkResult result;
303
304 struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore);
305 uint64_t signal_value = signal_info->value;
306 if (semaphore->type == VK_SEMAPHORE_TYPE_TIMELINE) {
307 if (signal_value == 0) {
308 return vk_queue_set_lost(queue,
309 "Tried to signal a timeline with value 0");
310 }
311 } else {
312 signal_value = 0;
313 }
314
315 /* For emulated timelines, we need to associate a binary vk_sync with
316 * each time point and pass the binary vk_sync to the driver. We could
317 * do this in vk_queue_submit_final but it might require doing memory
318 * allocation and we don't want to to add extra failure paths there.
319 * Instead, allocate and replace the driver-visible vk_sync now and
320 * we'll insert it into the timeline in vk_queue_submit_final. The
321 * insert step is guaranteed to not fail.
322 */
323 struct vk_sync_timeline *timeline = vk_sync_as_timeline(sync);
324 if (timeline) {
325 assert(queue->base.device->timeline_mode ==
326 VK_DEVICE_TIMELINE_MODE_EMULATED);
327 struct vk_sync_timeline_point **signal_point =
328 &submit->_signal_points[submit->signal_count];
329 result = vk_sync_timeline_alloc_point(queue->base.device, timeline,
330 signal_value, signal_point);
331 if (unlikely(result != VK_SUCCESS))
332 return result;
333
334 sync = &(*signal_point)->sync;
335 signal_value = 0;
336 }
337
338 submit->signals[submit->signal_count] = (struct vk_sync_signal) {
339 .sync = sync,
340 .stage_mask = signal_info->stageMask,
341 .signal_value = signal_value,
342 };
343
344 submit->signal_count++;
345
346 return VK_SUCCESS;
347 }
348
349 static void
vk_queue_submit_add_sync_signal(struct vk_queue * queue,struct vk_queue_submit * submit,struct vk_sync * sync,uint64_t signal_value)350 vk_queue_submit_add_sync_signal(struct vk_queue *queue,
351 struct vk_queue_submit *submit,
352 struct vk_sync *sync,
353 uint64_t signal_value)
354 {
355 submit->signals[submit->signal_count++] = (struct vk_sync_signal) {
356 .sync = sync,
357 .stage_mask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
358 .signal_value = signal_value,
359 };
360 }
361
362 static VkResult MUST_CHECK
vk_queue_submit_add_mem_signal(struct vk_queue * queue,struct vk_queue_submit * submit,VkDeviceMemory memory)363 vk_queue_submit_add_mem_signal(struct vk_queue *queue,
364 struct vk_queue_submit *submit,
365 VkDeviceMemory memory)
366 {
367 assert(submit->_mem_signal_temp == NULL);
368 VkResult result;
369
370 struct vk_sync *mem_sync;
371 result = queue->base.device->create_sync_for_memory(queue->base.device,
372 memory, true,
373 &mem_sync);
374 if (unlikely(result != VK_SUCCESS))
375 return result;
376
377 submit->_mem_signal_temp = mem_sync;
378
379 vk_queue_submit_add_sync_signal(queue, submit, mem_sync, 0);
380
381 return VK_SUCCESS;
382 }
383
384 static void
vk_queue_submit_add_fence_signal(struct vk_queue * queue,struct vk_queue_submit * submit,struct vk_fence * fence)385 vk_queue_submit_add_fence_signal(struct vk_queue *queue,
386 struct vk_queue_submit *submit,
387 struct vk_fence *fence)
388 {
389 vk_queue_submit_add_sync_signal(queue, submit,
390 vk_fence_get_active_sync(fence), 0);
391 }
392
393 static void
vk_queue_submit_add_command_buffer(struct vk_queue * queue,struct vk_queue_submit * submit,const VkCommandBufferSubmitInfo * info)394 vk_queue_submit_add_command_buffer(struct vk_queue *queue,
395 struct vk_queue_submit *submit,
396 const VkCommandBufferSubmitInfo *info)
397 {
398 VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, info->commandBuffer);
399
400 assert(info->deviceMask == 0 || info->deviceMask == 1);
401 assert(cmd_buffer->pool->queue_family_index == queue->queue_family_index);
402
403 /* Some drivers don't call vk_command_buffer_begin/end() yet and, for
404 * those, we'll see initial layout. However, this is enough to catch
405 * command buffers which get submitted without calling EndCommandBuffer.
406 */
407 assert(cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_INITIAL ||
408 cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE ||
409 cmd_buffer->state == MESA_VK_COMMAND_BUFFER_STATE_PENDING);
410 cmd_buffer->state = MESA_VK_COMMAND_BUFFER_STATE_PENDING;
411
412 submit->command_buffers[submit->command_buffer_count++] = cmd_buffer;
413 }
414
415 static void
vk_queue_submit_add_buffer_bind(struct vk_queue * queue,struct vk_queue_submit * submit,const VkSparseBufferMemoryBindInfo * info)416 vk_queue_submit_add_buffer_bind(
417 struct vk_queue *queue,
418 struct vk_queue_submit *submit,
419 const VkSparseBufferMemoryBindInfo *info)
420 {
421 VkSparseMemoryBind *entries = submit->_bind_entries +
422 submit->_bind_entry_count;
423 submit->_bind_entry_count += info->bindCount;
424
425 typed_memcpy(entries, info->pBinds, info->bindCount);
426
427 VkSparseBufferMemoryBindInfo info_tmp = *info;
428 info_tmp.pBinds = entries;
429 submit->buffer_binds[submit->buffer_bind_count++] = info_tmp;
430 }
431
432 static void
vk_queue_submit_add_image_opaque_bind(struct vk_queue * queue,struct vk_queue_submit * submit,const VkSparseImageOpaqueMemoryBindInfo * info)433 vk_queue_submit_add_image_opaque_bind(
434 struct vk_queue *queue,
435 struct vk_queue_submit *submit,
436 const VkSparseImageOpaqueMemoryBindInfo *info)
437 {
438 VkSparseMemoryBind *entries = submit->_bind_entries +
439 submit->_bind_entry_count;
440 submit->_bind_entry_count += info->bindCount;
441
442 typed_memcpy(entries, info->pBinds, info->bindCount);
443
444 VkSparseImageOpaqueMemoryBindInfo info_tmp = *info;
445 info_tmp.pBinds = entries;
446 submit->image_opaque_binds[submit->image_opaque_bind_count++] = info_tmp;
447 }
448
449 static void
vk_queue_submit_add_image_bind(struct vk_queue * queue,struct vk_queue_submit * submit,const VkSparseImageMemoryBindInfo * info)450 vk_queue_submit_add_image_bind(
451 struct vk_queue *queue,
452 struct vk_queue_submit *submit,
453 const VkSparseImageMemoryBindInfo *info)
454 {
455 VkSparseImageMemoryBind *entries = submit->_image_bind_entries +
456 submit->_image_bind_entry_count;
457 submit->_image_bind_entry_count += info->bindCount;
458
459 typed_memcpy(entries, info->pBinds, info->bindCount);
460
461 VkSparseImageMemoryBindInfo info_tmp = *info;
462 info_tmp.pBinds = entries;
463 submit->image_binds[submit->image_bind_count++] = info_tmp;
464 }
465
466 /* Attempts to merge two submits into one. If the merge succeeds, the merged
467 * submit is return and the two submits passed in are destroyed.
468 */
469 static struct vk_queue_submit *
vk_queue_submits_merge(struct vk_queue * queue,struct vk_queue_submit * first,struct vk_queue_submit * second)470 vk_queue_submits_merge(struct vk_queue *queue,
471 struct vk_queue_submit *first,
472 struct vk_queue_submit *second)
473 {
474 /* Don't merge if there are signals in between: see 'Signal operation order' */
475 if (first->signal_count > 0 &&
476 (second->command_buffer_count ||
477 second->buffer_bind_count ||
478 second->image_opaque_bind_count ||
479 second->image_bind_count ||
480 second->wait_count))
481 return NULL;
482
483 if (vk_queue_submit_has_bind(first) != vk_queue_submit_has_bind(second))
484 return NULL;
485
486 if (first->_mem_signal_temp)
487 return NULL;
488
489 if (first->perf_pass_index != second->perf_pass_index)
490 return NULL;
491
492 /* noop submits can always do a no-op merge */
493 if (!second->command_buffer_count &&
494 !second->buffer_bind_count &&
495 !second->image_opaque_bind_count &&
496 !second->image_bind_count &&
497 !second->wait_count &&
498 !second->signal_count) {
499 vk_queue_submit_destroy(queue, second);
500 return first;
501 }
502 if (!first->command_buffer_count &&
503 !first->buffer_bind_count &&
504 !first->image_opaque_bind_count &&
505 !first->image_bind_count &&
506 !first->wait_count &&
507 !first->signal_count) {
508 vk_queue_submit_destroy(queue, first);
509 return second;
510 }
511
512 struct vk_queue_submit *merged = vk_queue_submit_alloc(queue,
513 first->wait_count + second->wait_count,
514 first->command_buffer_count + second->command_buffer_count,
515 first->buffer_bind_count + second->buffer_bind_count,
516 first->image_opaque_bind_count + second->image_opaque_bind_count,
517 first->image_bind_count + second->image_bind_count,
518 first->_bind_entry_count + second->_bind_entry_count,
519 first->_image_bind_entry_count + second->_image_bind_entry_count,
520 first->signal_count + second->signal_count);
521 if (merged == NULL)
522 return NULL;
523
524 merged->wait_count = first->wait_count + second->wait_count;
525 typed_memcpy(merged->waits, first->waits, first->wait_count);
526 typed_memcpy(&merged->waits[first->wait_count], second->waits, second->wait_count);
527
528 merged->command_buffer_count = first->command_buffer_count +
529 second->command_buffer_count;
530 typed_memcpy(merged->command_buffers,
531 first->command_buffers, first->command_buffer_count);
532 typed_memcpy(&merged->command_buffers[first->command_buffer_count],
533 second->command_buffers, second->command_buffer_count);
534
535 merged->signal_count = first->signal_count + second->signal_count;
536 typed_memcpy(merged->signals, first->signals, first->signal_count);
537 typed_memcpy(&merged->signals[first->signal_count], second->signals, second->signal_count);
538
539 for (uint32_t i = 0; i < first->buffer_bind_count; i++)
540 vk_queue_submit_add_buffer_bind(queue, merged, &first->buffer_binds[i]);
541 for (uint32_t i = 0; i < second->buffer_bind_count; i++)
542 vk_queue_submit_add_buffer_bind(queue, merged, &second->buffer_binds[i]);
543
544 for (uint32_t i = 0; i < first->image_opaque_bind_count; i++) {
545 vk_queue_submit_add_image_opaque_bind(queue, merged,
546 &first->image_opaque_binds[i]);
547 }
548 for (uint32_t i = 0; i < second->image_opaque_bind_count; i++) {
549 vk_queue_submit_add_image_opaque_bind(queue, merged,
550 &second->image_opaque_binds[i]);
551 }
552
553 for (uint32_t i = 0; i < first->image_bind_count; i++)
554 vk_queue_submit_add_image_bind(queue, merged, &first->image_binds[i]);
555 for (uint32_t i = 0; i < second->image_bind_count; i++)
556 vk_queue_submit_add_image_bind(queue, merged, &second->image_binds[i]);
557
558 merged->perf_pass_index = first->perf_pass_index;
559 assert(second->perf_pass_index == merged->perf_pass_index);
560
561 assert(merged->_bind_entry_count ==
562 first->_bind_entry_count + second->_bind_entry_count);
563 assert(merged->_image_bind_entry_count ==
564 first->_image_bind_entry_count + second->_image_bind_entry_count);
565
566 merged->_has_binary_permanent_semaphore_wait =
567 first->_has_binary_permanent_semaphore_wait;
568
569 typed_memcpy(merged->_wait_temps, first->_wait_temps, first->wait_count);
570 typed_memcpy(&merged->_wait_temps[first->wait_count], second->_wait_temps, second->wait_count);
571
572 assert(first->_mem_signal_temp == NULL);
573 merged->_mem_signal_temp = second->_mem_signal_temp;
574
575 if (queue->base.device->timeline_mode == VK_DEVICE_TIMELINE_MODE_EMULATED) {
576 typed_memcpy(merged->_wait_points,
577 first->_wait_points, first->wait_count);
578 typed_memcpy(&merged->_wait_points[first->wait_count],
579 second->_wait_points, second->wait_count);
580
581 typed_memcpy(merged->_signal_points,
582 first->_signal_points, first->signal_count);
583 typed_memcpy(&merged->_signal_points[first->signal_count],
584 second->_signal_points, second->signal_count);
585 } else {
586 assert(first->_wait_points == NULL && second->_wait_points == NULL);
587 assert(first->_signal_points == NULL && second->_signal_points == NULL);
588 }
589 vk_queue_submit_free(queue, first);
590 vk_queue_submit_free(queue, second);
591
592 return merged;
593 }
594
595 static void
vk_queue_push_submit(struct vk_queue * queue,struct vk_queue_submit * submit)596 vk_queue_push_submit(struct vk_queue *queue,
597 struct vk_queue_submit *submit)
598 {
599 mtx_lock(&queue->submit.mutex);
600 list_addtail(&submit->link, &queue->submit.submits);
601 cnd_signal(&queue->submit.push);
602 mtx_unlock(&queue->submit.mutex);
603 }
604
605 static VkResult
vk_queue_drain(struct vk_queue * queue)606 vk_queue_drain(struct vk_queue *queue)
607 {
608 VkResult result = VK_SUCCESS;
609
610 mtx_lock(&queue->submit.mutex);
611 while (!list_is_empty(&queue->submit.submits)) {
612 if (vk_device_is_lost(queue->base.device)) {
613 result = VK_ERROR_DEVICE_LOST;
614 break;
615 }
616
617 int ret = cnd_wait(&queue->submit.pop, &queue->submit.mutex);
618 if (ret == thrd_error) {
619 result = vk_queue_set_lost(queue, "cnd_wait failed");
620 break;
621 }
622 }
623 mtx_unlock(&queue->submit.mutex);
624
625 return result;
626 }
627
628 static VkResult
vk_queue_submit_final(struct vk_queue * queue,struct vk_queue_submit * submit)629 vk_queue_submit_final(struct vk_queue *queue,
630 struct vk_queue_submit *submit)
631 {
632 VkResult result;
633
634 /* Now that we know all our time points exist, fetch the time point syncs
635 * from any vk_sync_timelines. While we're here, also compact down the
636 * list of waits to get rid of any trivial timeline waits.
637 */
638 uint32_t wait_count = 0;
639 for (uint32_t i = 0; i < submit->wait_count; i++) {
640 /* A timeline wait on 0 is always a no-op */
641 if ((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) &&
642 submit->waits[i].wait_value == 0)
643 continue;
644
645 /* Waits on dummy vk_syncs are no-ops */
646 if (vk_sync_type_is_dummy(submit->waits[i].sync->type)) {
647 /* We are about to lose track of this wait, if it has a temporary
648 * we need to destroy it now, as vk_queue_submit_cleanup will not
649 * know about it */
650 if (submit->_wait_temps[i] != NULL) {
651 vk_sync_destroy(queue->base.device, submit->_wait_temps[i]);
652 submit->waits[i].sync = NULL;
653 }
654 continue;
655 }
656
657 /* For emulated timelines, we have a binary vk_sync associated with
658 * each time point and pass the binary vk_sync to the driver.
659 */
660 struct vk_sync_timeline *timeline =
661 vk_sync_as_timeline(submit->waits[i].sync);
662 if (timeline) {
663 assert(queue->base.device->timeline_mode ==
664 VK_DEVICE_TIMELINE_MODE_EMULATED);
665 result = vk_sync_timeline_get_point(queue->base.device, timeline,
666 submit->waits[i].wait_value,
667 &submit->_wait_points[i]);
668 if (unlikely(result != VK_SUCCESS)) {
669 result = vk_queue_set_lost(queue,
670 "Time point >= %"PRIu64" not found",
671 submit->waits[i].wait_value);
672 }
673
674 /* This can happen if the point is long past */
675 if (submit->_wait_points[i] == NULL)
676 continue;
677
678 submit->waits[i].sync = &submit->_wait_points[i]->sync;
679 submit->waits[i].wait_value = 0;
680 }
681
682 struct vk_sync_binary *binary =
683 vk_sync_as_binary(submit->waits[i].sync);
684 if (binary) {
685 submit->waits[i].sync = &binary->timeline;
686 submit->waits[i].wait_value = binary->next_point;
687 }
688
689 assert((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) ||
690 submit->waits[i].wait_value == 0);
691
692 assert(wait_count <= i);
693 if (wait_count < i) {
694 submit->waits[wait_count] = submit->waits[i];
695 submit->_wait_temps[wait_count] = submit->_wait_temps[i];
696 if (submit->_wait_points)
697 submit->_wait_points[wait_count] = submit->_wait_points[i];
698 }
699 wait_count++;
700 }
701
702 assert(wait_count <= submit->wait_count);
703 submit->wait_count = wait_count;
704
705 for (uint32_t i = 0; i < submit->signal_count; i++) {
706 assert((submit->signals[i].sync->flags & VK_SYNC_IS_TIMELINE) ||
707 submit->signals[i].signal_value == 0);
708
709 struct vk_sync_binary *binary =
710 vk_sync_as_binary(submit->signals[i].sync);
711 if (binary) {
712 submit->signals[i].sync = &binary->timeline;
713 submit->signals[i].signal_value = ++binary->next_point;
714 }
715 }
716
717 result = queue->driver_submit(queue, submit);
718 if (unlikely(result != VK_SUCCESS))
719 return result;
720
721 if (submit->_signal_points) {
722 for (uint32_t i = 0; i < submit->signal_count; i++) {
723 if (submit->_signal_points[i] == NULL)
724 continue;
725
726 vk_sync_timeline_point_install(queue->base.device,
727 submit->_signal_points[i]);
728 submit->_signal_points[i] = NULL;
729 }
730 }
731
732 return VK_SUCCESS;
733 }
734
735 VkResult
vk_queue_flush(struct vk_queue * queue,uint32_t * submit_count_out)736 vk_queue_flush(struct vk_queue *queue, uint32_t *submit_count_out)
737 {
738 VkResult result = VK_SUCCESS;
739
740 assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_DEFERRED);
741
742 mtx_lock(&queue->submit.mutex);
743
744 uint32_t submit_count = 0;
745 while (!list_is_empty(&queue->submit.submits)) {
746 struct vk_queue_submit *submit =
747 list_first_entry(&queue->submit.submits,
748 struct vk_queue_submit, link);
749
750 for (uint32_t i = 0; i < submit->wait_count; i++) {
751 /* In emulated timeline mode, only emulated timelines are allowed */
752 if (!vk_sync_type_is_vk_sync_timeline(submit->waits[i].sync->type)) {
753 assert(!(submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE));
754 continue;
755 }
756
757 result = vk_sync_wait(queue->base.device,
758 submit->waits[i].sync,
759 submit->waits[i].wait_value,
760 VK_SYNC_WAIT_PENDING, 0);
761 if (result == VK_TIMEOUT) {
762 /* This one's not ready yet */
763 result = VK_SUCCESS;
764 goto done;
765 } else if (result != VK_SUCCESS) {
766 result = vk_queue_set_lost(queue, "Wait for time points failed");
767 goto done;
768 }
769 }
770
771 result = vk_queue_submit_final(queue, submit);
772 if (unlikely(result != VK_SUCCESS)) {
773 result = vk_queue_set_lost(queue, "queue::driver_submit failed");
774 goto done;
775 }
776
777 submit_count++;
778
779 list_del(&submit->link);
780
781 vk_queue_submit_destroy(queue, submit);
782 }
783
784 done:
785 if (submit_count)
786 cnd_broadcast(&queue->submit.pop);
787
788 mtx_unlock(&queue->submit.mutex);
789
790 if (submit_count_out)
791 *submit_count_out = submit_count;
792
793 return result;
794 }
795
796 static int
vk_queue_submit_thread_func(void * _data)797 vk_queue_submit_thread_func(void *_data)
798 {
799 struct vk_queue *queue = _data;
800 VkResult result;
801
802 mtx_lock(&queue->submit.mutex);
803
804 while (queue->submit.thread_run) {
805 if (list_is_empty(&queue->submit.submits)) {
806 int ret = cnd_wait(&queue->submit.push, &queue->submit.mutex);
807 if (ret == thrd_error) {
808 mtx_unlock(&queue->submit.mutex);
809 vk_queue_set_lost(queue, "cnd_wait failed");
810 return 1;
811 }
812 continue;
813 }
814
815 struct vk_queue_submit *submit =
816 list_first_entry(&queue->submit.submits,
817 struct vk_queue_submit, link);
818
819 /* Drop the lock while we wait */
820 mtx_unlock(&queue->submit.mutex);
821
822 result = vk_sync_wait_many(queue->base.device,
823 submit->wait_count, submit->waits,
824 VK_SYNC_WAIT_PENDING, UINT64_MAX);
825 if (unlikely(result != VK_SUCCESS)) {
826 vk_queue_set_lost(queue, "Wait for time points failed");
827 return 1;
828 }
829
830 result = vk_queue_submit_final(queue, submit);
831 if (unlikely(result != VK_SUCCESS)) {
832 vk_queue_set_lost(queue, "queue::driver_submit failed");
833 return 1;
834 }
835
836 /* Do all our cleanup of individual fences etc. outside the lock.
837 * We can't actually remove it from the list yet. We have to do
838 * that under the lock.
839 */
840 vk_queue_submit_cleanup(queue, submit);
841
842 mtx_lock(&queue->submit.mutex);
843
844 /* Only remove the submit from from the list and free it after
845 * queue->submit() has completed. This ensures that, when
846 * vk_queue_drain() completes, there are no more pending jobs.
847 */
848 list_del(&submit->link);
849 vk_queue_submit_free(queue, submit);
850
851 cnd_broadcast(&queue->submit.pop);
852 }
853
854 mtx_unlock(&queue->submit.mutex);
855 return 0;
856 }
857
858 static VkResult
vk_queue_start_submit_thread(struct vk_queue * queue)859 vk_queue_start_submit_thread(struct vk_queue *queue)
860 {
861 int ret;
862
863 mtx_lock(&queue->submit.mutex);
864 queue->submit.thread_run = true;
865 mtx_unlock(&queue->submit.mutex);
866
867 ret = thrd_create(&queue->submit.thread,
868 vk_queue_submit_thread_func,
869 queue);
870 if (ret == thrd_error)
871 return vk_errorf(queue, VK_ERROR_UNKNOWN, "thrd_create failed");
872
873 return VK_SUCCESS;
874 }
875
876 static void
vk_queue_stop_submit_thread(struct vk_queue * queue)877 vk_queue_stop_submit_thread(struct vk_queue *queue)
878 {
879 vk_queue_drain(queue);
880
881 /* Kick the thread to disable it */
882 mtx_lock(&queue->submit.mutex);
883 queue->submit.thread_run = false;
884 cnd_signal(&queue->submit.push);
885 mtx_unlock(&queue->submit.mutex);
886
887 thrd_join(queue->submit.thread, NULL);
888
889 assert(list_is_empty(&queue->submit.submits));
890 queue->submit.mode = VK_QUEUE_SUBMIT_MODE_IMMEDIATE;
891 }
892
893 VkResult
vk_queue_enable_submit_thread(struct vk_queue * queue)894 vk_queue_enable_submit_thread(struct vk_queue *queue)
895 {
896 assert(vk_device_supports_threaded_submit(queue->base.device));
897
898 if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED)
899 return VK_SUCCESS;
900
901 VkResult result = vk_queue_start_submit_thread(queue);
902 if (result != VK_SUCCESS)
903 return result;
904
905 queue->submit.mode = VK_QUEUE_SUBMIT_MODE_THREADED;
906
907 return VK_SUCCESS;
908 }
909
910 struct vulkan_submit_info {
911 const void *pNext;
912
913 uint32_t command_buffer_count;
914 const VkCommandBufferSubmitInfo *command_buffers;
915
916 uint32_t wait_count;
917 const VkSemaphoreSubmitInfo *waits;
918
919 uint32_t signal_count;
920 const VkSemaphoreSubmitInfo *signals;
921
922 uint32_t buffer_bind_count;
923 const VkSparseBufferMemoryBindInfo *buffer_binds;
924
925 uint32_t image_opaque_bind_count;
926 const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
927
928 uint32_t image_bind_count;
929 const VkSparseImageMemoryBindInfo *image_binds;
930
931 struct vk_fence *fence;
932 };
933
934 static VkResult
vk_queue_submit_create(struct vk_queue * queue,const struct vulkan_submit_info * info,struct vk_queue_submit ** submit_out)935 vk_queue_submit_create(struct vk_queue *queue,
936 const struct vulkan_submit_info *info,
937 struct vk_queue_submit **submit_out)
938 {
939 VkResult result;
940 uint32_t sparse_memory_bind_entry_count = 0;
941 uint32_t sparse_memory_image_bind_entry_count = 0;
942
943 for (uint32_t i = 0; i < info->buffer_bind_count; ++i)
944 sparse_memory_bind_entry_count += info->buffer_binds[i].bindCount;
945
946 for (uint32_t i = 0; i < info->image_opaque_bind_count; ++i)
947 sparse_memory_bind_entry_count += info->image_opaque_binds[i].bindCount;
948
949 for (uint32_t i = 0; i < info->image_bind_count; ++i)
950 sparse_memory_image_bind_entry_count += info->image_binds[i].bindCount;
951
952 const struct wsi_memory_signal_submit_info *mem_signal =
953 vk_find_struct_const(info->pNext, WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
954 bool signal_mem_sync = mem_signal != NULL &&
955 mem_signal->memory != VK_NULL_HANDLE &&
956 queue->base.device->create_sync_for_memory != NULL;
957
958 uint32_t signal_count = info->signal_count +
959 signal_mem_sync +
960 (info->fence != NULL);
961
962 struct vk_queue_submit *submit =
963 vk_queue_submit_alloc(queue, info->wait_count,
964 info->command_buffer_count,
965 info->buffer_bind_count,
966 info->image_opaque_bind_count,
967 info->image_bind_count,
968 sparse_memory_bind_entry_count,
969 sparse_memory_image_bind_entry_count,
970 signal_count);
971 if (unlikely(submit == NULL))
972 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
973
974 /* From the Vulkan 1.2.194 spec:
975 *
976 * "If the VkSubmitInfo::pNext chain does not include this structure,
977 * the batch defaults to use counter pass index 0."
978 */
979 const VkPerformanceQuerySubmitInfoKHR *perf_info =
980 vk_find_struct_const(info->pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
981 submit->perf_pass_index = perf_info ? perf_info->counterPassIndex : 0;
982
983 for (uint32_t i = 0; i < info->wait_count; i++)
984 vk_queue_submit_add_semaphore_wait(queue, submit, &info->waits[i]);
985
986 for (uint32_t i = 0; i < info->command_buffer_count; i++) {
987 vk_queue_submit_add_command_buffer(queue, submit,
988 &info->command_buffers[i]);
989 }
990
991 for (uint32_t i = 0; i < info->buffer_bind_count; ++i)
992 vk_queue_submit_add_buffer_bind(queue, submit, &info->buffer_binds[i]);
993
994 for (uint32_t i = 0; i < info->image_opaque_bind_count; ++i) {
995 vk_queue_submit_add_image_opaque_bind(queue, submit,
996 &info->image_opaque_binds[i]);
997 }
998
999 for (uint32_t i = 0; i < info->image_bind_count; ++i)
1000 vk_queue_submit_add_image_bind(queue, submit, &info->image_binds[i]);
1001
1002 for (uint32_t i = 0; i < info->signal_count; i++) {
1003 result = vk_queue_submit_add_semaphore_signal(queue, submit,
1004 &info->signals[i]);
1005 if (unlikely(result != VK_SUCCESS))
1006 goto fail;
1007 }
1008
1009 if (signal_mem_sync) {
1010 result = vk_queue_submit_add_mem_signal(queue, submit,
1011 mem_signal->memory);
1012 if (unlikely(result != VK_SUCCESS))
1013 goto fail;
1014 }
1015
1016 if (info->fence != NULL)
1017 vk_queue_submit_add_fence_signal(queue, submit, info->fence);
1018
1019 assert(signal_count == submit->signal_count);
1020
1021 *submit_out = submit;
1022
1023 return VK_SUCCESS;
1024
1025 fail:
1026 vk_queue_submit_destroy(queue, submit);
1027 return result;
1028 }
1029
1030 static VkResult
vk_queue_submit(struct vk_queue * queue,struct vk_queue_submit * submit)1031 vk_queue_submit(struct vk_queue *queue,
1032 struct vk_queue_submit *submit)
1033 {
1034 struct vk_device *device = queue->base.device;
1035 VkResult result;
1036
1037 /* If this device supports threaded submit, we can't rely on the client
1038 * ordering requirements to ensure submits happen in the right order. Even
1039 * if this queue doesn't have a submit thread, another queue (possibly in a
1040 * different process) may and that means we our dependencies may not have
1041 * been submitted to the kernel yet. Do a quick zero-timeout WAIT_PENDING
1042 * on all the wait semaphores to see if we need to start up our own thread.
1043 */
1044 if (device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND &&
1045 queue->submit.mode != VK_QUEUE_SUBMIT_MODE_THREADED) {
1046 assert(queue->submit.mode == VK_QUEUE_SUBMIT_MODE_IMMEDIATE);
1047
1048 result = vk_sync_wait_many(queue->base.device,
1049 submit->wait_count, submit->waits,
1050 VK_SYNC_WAIT_PENDING, 0);
1051 if (result == VK_TIMEOUT)
1052 result = vk_queue_enable_submit_thread(queue);
1053 if (unlikely(result != VK_SUCCESS))
1054 goto fail;
1055 }
1056
1057 switch (queue->submit.mode) {
1058 case VK_QUEUE_SUBMIT_MODE_IMMEDIATE:
1059 result = vk_queue_submit_final(queue, submit);
1060 if (unlikely(result != VK_SUCCESS))
1061 goto fail;
1062
1063 /* If threaded submit is possible on this device, we need to ensure that
1064 * binary semaphore payloads get reset so that any other threads can
1065 * properly wait on them for dependency checking. Because we don't
1066 * currently have a submit thread, we can directly reset that binary
1067 * semaphore payloads.
1068 *
1069 * If we the vk_sync is in our signal et, we can consider it to have
1070 * been both reset and signaled by queue_submit_final(). A reset in
1071 * this case would be wrong because it would throw away our signal
1072 * operation. If we don't signal the vk_sync, then we need to reset it.
1073 */
1074 if (vk_device_supports_threaded_submit(device) &&
1075 submit->_has_binary_permanent_semaphore_wait) {
1076 for (uint32_t i = 0; i < submit->wait_count; i++) {
1077 if ((submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) ||
1078 submit->_wait_temps[i] != NULL)
1079 continue;
1080
1081 bool was_signaled = false;
1082 for (uint32_t j = 0; j < submit->signal_count; j++) {
1083 if (submit->signals[j].sync == submit->waits[i].sync) {
1084 was_signaled = true;
1085 break;
1086 }
1087 }
1088
1089 if (!was_signaled) {
1090 result = vk_sync_reset(queue->base.device,
1091 submit->waits[i].sync);
1092 if (unlikely(result != VK_SUCCESS))
1093 goto fail;
1094 }
1095 }
1096 }
1097
1098 vk_queue_submit_destroy(queue, submit);
1099 return result;
1100
1101 case VK_QUEUE_SUBMIT_MODE_DEFERRED:
1102 vk_queue_push_submit(queue, submit);
1103 return vk_device_flush(queue->base.device);
1104
1105 case VK_QUEUE_SUBMIT_MODE_THREADED:
1106 if (submit->_has_binary_permanent_semaphore_wait) {
1107 for (uint32_t i = 0; i < submit->wait_count; i++) {
1108 if (submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE)
1109 continue;
1110
1111 /* From the Vulkan 1.2.194 spec:
1112 *
1113 * "When a batch is submitted to a queue via a queue
1114 * submission, and it includes semaphores to be waited on,
1115 * it defines a memory dependency between prior semaphore
1116 * signal operations and the batch, and defines semaphore
1117 * wait operations.
1118 *
1119 * Such semaphore wait operations set the semaphores
1120 * created with a VkSemaphoreType of
1121 * VK_SEMAPHORE_TYPE_BINARY to the unsignaled state."
1122 *
1123 * For threaded submit, we depend on tracking the unsignaled
1124 * state of binary semaphores to determine when we can safely
1125 * submit. The VK_SYNC_WAIT_PENDING check above as well as the
1126 * one in the sumbit thread depend on all binary semaphores
1127 * being reset when they're not in active use from the point
1128 * of view of the client's CPU timeline. This means we need to
1129 * reset them inside vkQueueSubmit and cannot wait until the
1130 * actual submit which happens later in the thread.
1131 *
1132 * We've already stolen temporary semaphore payloads above as
1133 * part of basic semaphore processing. We steal permanent
1134 * semaphore payloads here by way of vk_sync_move. For shared
1135 * semaphores, this can be a bit expensive (sync file import
1136 * and export) but, for non-shared semaphores, it can be made
1137 * fairly cheap. Also, we only do this semaphore swapping in
1138 * the case where you have real timelines AND the client is
1139 * using timeline semaphores with wait-before-signal (that's
1140 * the only way to get a submit thread) AND mixing those with
1141 * waits on binary semaphores AND said binary semaphore is
1142 * using its permanent payload. In other words, this code
1143 * should basically only ever get executed in CTS tests.
1144 */
1145 if (submit->_wait_temps[i] != NULL)
1146 continue;
1147
1148 /* From the Vulkan 1.2.194 spec:
1149 *
1150 * VUID-vkQueueSubmit-pWaitSemaphores-03238
1151 *
1152 * "All elements of the pWaitSemaphores member of all
1153 * elements of pSubmits created with a VkSemaphoreType of
1154 * VK_SEMAPHORE_TYPE_BINARY must reference a semaphore
1155 * signal operation that has been submitted for execution
1156 * and any semaphore signal operations on which it depends
1157 * (if any) must have also been submitted for execution."
1158 *
1159 * Therefore, we can safely do a blocking wait here and it
1160 * won't actually block for long. This ensures that the
1161 * vk_sync_move below will succeed.
1162 */
1163 result = vk_sync_wait(queue->base.device,
1164 submit->waits[i].sync, 0,
1165 VK_SYNC_WAIT_PENDING, UINT64_MAX);
1166 if (unlikely(result != VK_SUCCESS))
1167 goto fail;
1168
1169 result = vk_sync_create(queue->base.device,
1170 submit->waits[i].sync->type,
1171 0 /* flags */,
1172 0 /* initial value */,
1173 &submit->_wait_temps[i]);
1174 if (unlikely(result != VK_SUCCESS))
1175 goto fail;
1176
1177 result = vk_sync_move(queue->base.device,
1178 submit->_wait_temps[i],
1179 submit->waits[i].sync);
1180 if (unlikely(result != VK_SUCCESS))
1181 goto fail;
1182
1183 submit->waits[i].sync = submit->_wait_temps[i];
1184 }
1185 }
1186
1187 /* If we're signaling a memory object, we have to ensure that
1188 * vkQueueSubmit does not return until the kernel submission has
1189 * happened. Otherwise, we may get a race between this process
1190 * and whatever is going to wait on the object where the other
1191 * process may wait before we've submitted our work. Drain the
1192 * queue now to avoid this. It's the responsibility of the caller
1193 * to ensure that any vkQueueSubmit which signals a memory object
1194 * has fully resolved dependencies.
1195 */
1196 const bool needs_drain = submit->_mem_signal_temp;
1197
1198 vk_queue_push_submit(queue, submit);
1199
1200 if (needs_drain) {
1201 result = vk_queue_drain(queue);
1202 if (unlikely(result != VK_SUCCESS))
1203 return result;
1204 }
1205
1206 return VK_SUCCESS;
1207
1208 case VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND:
1209 unreachable("Invalid vk_queue::submit.mode");
1210 }
1211 unreachable("Invalid submit mode");
1212
1213 fail:
1214 vk_queue_submit_destroy(queue, submit);
1215 return result;
1216 }
1217
1218 static VkResult
vk_queue_merge_submit(struct vk_queue * queue,struct vk_queue_submit ** last_submit,struct vk_queue_submit * submit)1219 vk_queue_merge_submit(struct vk_queue *queue,
1220 struct vk_queue_submit **last_submit,
1221 struct vk_queue_submit *submit)
1222 {
1223 if (*last_submit == NULL) {
1224 *last_submit = submit;
1225 return VK_SUCCESS;
1226 }
1227
1228 struct vk_queue_submit *merged =
1229 vk_queue_submits_merge(queue, *last_submit, submit);
1230 if (merged != NULL) {
1231 *last_submit = merged;
1232 return VK_SUCCESS;
1233 }
1234
1235 VkResult result = vk_queue_submit(queue, *last_submit);
1236 *last_submit = NULL;
1237
1238 if (likely(result == VK_SUCCESS)) {
1239 *last_submit = submit;
1240 } else {
1241 vk_queue_submit_destroy(queue, submit);
1242 }
1243
1244 return result;
1245 }
1246
1247 VkResult
vk_queue_wait_before_present(struct vk_queue * queue,const VkPresentInfoKHR * pPresentInfo)1248 vk_queue_wait_before_present(struct vk_queue *queue,
1249 const VkPresentInfoKHR *pPresentInfo)
1250 {
1251 if (vk_device_is_lost(queue->base.device))
1252 return VK_ERROR_DEVICE_LOST;
1253
1254 /* From the Vulkan 1.2.194 spec:
1255 *
1256 * VUID-vkQueuePresentKHR-pWaitSemaphores-03268
1257 *
1258 * "All elements of the pWaitSemaphores member of pPresentInfo must
1259 * reference a semaphore signal operation that has been submitted for
1260 * execution and any semaphore signal operations on which it depends (if
1261 * any) must have also been submitted for execution."
1262 *
1263 * As with vkQueueSubmit above, we need to ensure that any binary
1264 * semaphores we use in this present actually exist. If we don't have
1265 * timeline semaphores, this is a non-issue. If they're emulated, then
1266 * this is ensured for us by the vk_device_flush() at the end of every
1267 * vkQueueSubmit() and every vkSignalSemaphore(). For real timeline
1268 * semaphores, however, we need to do a wait. Thanks to the above bit of
1269 * spec text, that wait should never block for long.
1270 */
1271 if (!vk_device_supports_threaded_submit(queue->base.device))
1272 return VK_SUCCESS;
1273
1274 const uint32_t wait_count = pPresentInfo->waitSemaphoreCount;
1275
1276 if (wait_count == 0)
1277 return VK_SUCCESS;
1278
1279 STACK_ARRAY(struct vk_sync_wait, waits, wait_count);
1280
1281 for (uint32_t i = 0; i < wait_count; i++) {
1282 VK_FROM_HANDLE(vk_semaphore, semaphore,
1283 pPresentInfo->pWaitSemaphores[i]);
1284
1285 /* From the Vulkan 1.2.194 spec:
1286 *
1287 * VUID-vkQueuePresentKHR-pWaitSemaphores-03267
1288 *
1289 * "All elements of the pWaitSemaphores member of pPresentInfo must
1290 * be created with a VkSemaphoreType of VK_SEMAPHORE_TYPE_BINARY."
1291 */
1292 assert(semaphore->type == VK_SEMAPHORE_TYPE_BINARY);
1293
1294 waits[i] = (struct vk_sync_wait) {
1295 .sync = vk_semaphore_get_active_sync(semaphore),
1296 .stage_mask = ~(VkPipelineStageFlags2)0,
1297 };
1298 }
1299
1300 VkResult result = vk_sync_wait_many(queue->base.device, wait_count, waits,
1301 VK_SYNC_WAIT_PENDING, UINT64_MAX);
1302
1303 STACK_ARRAY_FINISH(waits);
1304
1305 /* Check again, just in case */
1306 if (vk_device_is_lost(queue->base.device))
1307 return VK_ERROR_DEVICE_LOST;
1308
1309 return result;
1310 }
1311
1312 static VkResult
vk_queue_signal_sync(struct vk_queue * queue,struct vk_sync * sync,uint32_t signal_value)1313 vk_queue_signal_sync(struct vk_queue *queue,
1314 struct vk_sync *sync,
1315 uint32_t signal_value)
1316 {
1317 struct vk_queue_submit *submit = vk_queue_submit_alloc(queue, 0, 0, 0, 0, 0,
1318 0, 0, 1);
1319 if (unlikely(submit == NULL))
1320 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1321
1322 vk_queue_submit_add_sync_signal(queue, submit, sync, signal_value);
1323
1324 VkResult result;
1325 switch (queue->submit.mode) {
1326 case VK_QUEUE_SUBMIT_MODE_IMMEDIATE:
1327 result = vk_queue_submit_final(queue, submit);
1328 vk_queue_submit_destroy(queue, submit);
1329 return result;
1330
1331 case VK_QUEUE_SUBMIT_MODE_DEFERRED:
1332 vk_queue_push_submit(queue, submit);
1333 return vk_device_flush(queue->base.device);
1334
1335 case VK_QUEUE_SUBMIT_MODE_THREADED:
1336 vk_queue_push_submit(queue, submit);
1337 return VK_SUCCESS;
1338
1339 case VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND:
1340 unreachable("Invalid vk_queue::submit.mode");
1341 }
1342 unreachable("Invalid timeline mode");
1343 }
1344
1345 void
vk_queue_finish(struct vk_queue * queue)1346 vk_queue_finish(struct vk_queue *queue)
1347 {
1348 if (queue->submit.mode == VK_QUEUE_SUBMIT_MODE_THREADED)
1349 vk_queue_stop_submit_thread(queue);
1350
1351 while (!list_is_empty(&queue->submit.submits)) {
1352 assert(vk_device_is_lost_no_report(queue->base.device));
1353
1354 struct vk_queue_submit *submit =
1355 list_first_entry(&queue->submit.submits,
1356 struct vk_queue_submit, link);
1357
1358 list_del(&submit->link);
1359 vk_queue_submit_destroy(queue, submit);
1360 }
1361
1362 #if DETECT_OS_ANDROID
1363 if (queue->anb_semaphore != VK_NULL_HANDLE) {
1364 struct vk_device *device = queue->base.device;
1365 device->dispatch_table.DestroySemaphore(vk_device_to_handle(device),
1366 queue->anb_semaphore, NULL);
1367 }
1368 #endif
1369
1370 cnd_destroy(&queue->submit.pop);
1371 cnd_destroy(&queue->submit.push);
1372 mtx_destroy(&queue->submit.mutex);
1373
1374 util_dynarray_fini(&queue->labels);
1375 list_del(&queue->link);
1376 vk_object_base_finish(&queue->base);
1377 }
1378
1379 VKAPI_ATTR VkResult VKAPI_CALL
vk_common_QueueSubmit2(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo2 * pSubmits,VkFence _fence)1380 vk_common_QueueSubmit2(VkQueue _queue,
1381 uint32_t submitCount,
1382 const VkSubmitInfo2 *pSubmits,
1383 VkFence _fence)
1384 {
1385 VK_FROM_HANDLE(vk_queue, queue, _queue);
1386 VK_FROM_HANDLE(vk_fence, fence, _fence);
1387 VkResult result;
1388
1389 if (vk_device_is_lost(queue->base.device))
1390 return VK_ERROR_DEVICE_LOST;
1391
1392 if (submitCount == 0) {
1393 if (fence == NULL) {
1394 return VK_SUCCESS;
1395 } else {
1396 return vk_queue_signal_sync(queue, vk_fence_get_active_sync(fence), 0);
1397 }
1398 }
1399
1400 struct vk_queue_submit *last_submit = NULL;
1401 for (uint32_t i = 0; i < submitCount; i++) {
1402 struct vulkan_submit_info info = {
1403 .pNext = pSubmits[i].pNext,
1404 .command_buffer_count = pSubmits[i].commandBufferInfoCount,
1405 .command_buffers = pSubmits[i].pCommandBufferInfos,
1406 .wait_count = pSubmits[i].waitSemaphoreInfoCount,
1407 .waits = pSubmits[i].pWaitSemaphoreInfos,
1408 .signal_count = pSubmits[i].signalSemaphoreInfoCount,
1409 .signals = pSubmits[i].pSignalSemaphoreInfos,
1410 .fence = i == submitCount - 1 ? fence : NULL
1411 };
1412 struct vk_queue_submit *submit;
1413 result = vk_queue_submit_create(queue, &info, &submit);
1414 if (unlikely(result != VK_SUCCESS))
1415 return result;
1416
1417 result = vk_queue_merge_submit(queue, &last_submit, submit);
1418 if (unlikely(result != VK_SUCCESS))
1419 return result;
1420 }
1421
1422 if (last_submit != NULL) {
1423 result = vk_queue_submit(queue, last_submit);
1424 if (unlikely(result != VK_SUCCESS))
1425 return result;
1426 }
1427
1428 return VK_SUCCESS;
1429 }
1430
1431 VKAPI_ATTR VkResult VKAPI_CALL
vk_common_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence _fence)1432 vk_common_QueueBindSparse(VkQueue _queue,
1433 uint32_t bindInfoCount,
1434 const VkBindSparseInfo *pBindInfo,
1435 VkFence _fence)
1436 {
1437 VK_FROM_HANDLE(vk_queue, queue, _queue);
1438 VK_FROM_HANDLE(vk_fence, fence, _fence);
1439 VkResult result;
1440
1441 if (vk_device_is_lost(queue->base.device))
1442 return VK_ERROR_DEVICE_LOST;
1443
1444 if (bindInfoCount == 0) {
1445 if (fence == NULL) {
1446 return VK_SUCCESS;
1447 } else {
1448 return vk_queue_signal_sync(queue, vk_fence_get_active_sync(fence), 0);
1449 }
1450 }
1451
1452 struct vk_queue_submit *last_submit = NULL;
1453 for (uint32_t i = 0; i < bindInfoCount; i++) {
1454 const VkTimelineSemaphoreSubmitInfo *timeline_info =
1455 vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
1456 const uint64_t *wait_values = NULL;
1457 const uint64_t *signal_values = NULL;
1458
1459 if (timeline_info && timeline_info->waitSemaphoreValueCount) {
1460 /* From the Vulkan 1.3.204 spec:
1461 *
1462 * VUID-VkBindSparseInfo-pNext-03248
1463 *
1464 * "If the pNext chain of this structure includes a VkTimelineSemaphoreSubmitInfo structure
1465 * and any element of pSignalSemaphores was created with a VkSemaphoreType of
1466 * VK_SEMAPHORE_TYPE_TIMELINE, then its signalSemaphoreValueCount member must equal
1467 * signalSemaphoreCount"
1468 */
1469 assert(timeline_info->waitSemaphoreValueCount == pBindInfo[i].waitSemaphoreCount);
1470 wait_values = timeline_info->pWaitSemaphoreValues;
1471 }
1472
1473 if (timeline_info && timeline_info->signalSemaphoreValueCount) {
1474 /* From the Vulkan 1.3.204 spec:
1475 *
1476 * VUID-VkBindSparseInfo-pNext-03247
1477 *
1478 * "If the pNext chain of this structure includes a VkTimelineSemaphoreSubmitInfo structure
1479 * and any element of pWaitSemaphores was created with a VkSemaphoreType of
1480 * VK_SEMAPHORE_TYPE_TIMELINE, then its waitSemaphoreValueCount member must equal
1481 * waitSemaphoreCount"
1482 */
1483 assert(timeline_info->signalSemaphoreValueCount == pBindInfo[i].signalSemaphoreCount);
1484 signal_values = timeline_info->pSignalSemaphoreValues;
1485 }
1486
1487 STACK_ARRAY(VkSemaphoreSubmitInfo, wait_semaphore_infos,
1488 pBindInfo[i].waitSemaphoreCount);
1489 STACK_ARRAY(VkSemaphoreSubmitInfo, signal_semaphore_infos,
1490 pBindInfo[i].signalSemaphoreCount);
1491
1492 if (!wait_semaphore_infos || !signal_semaphore_infos) {
1493 STACK_ARRAY_FINISH(wait_semaphore_infos);
1494 STACK_ARRAY_FINISH(signal_semaphore_infos);
1495 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1496 }
1497
1498 for (uint32_t j = 0; j < pBindInfo[i].waitSemaphoreCount; j++) {
1499 wait_semaphore_infos[j] = (VkSemaphoreSubmitInfo) {
1500 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
1501 .semaphore = pBindInfo[i].pWaitSemaphores[j],
1502 .value = wait_values ? wait_values[j] : 0,
1503 };
1504 }
1505
1506 for (uint32_t j = 0; j < pBindInfo[i].signalSemaphoreCount; j++) {
1507 signal_semaphore_infos[j] = (VkSemaphoreSubmitInfo) {
1508 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
1509 .semaphore = pBindInfo[i].pSignalSemaphores[j],
1510 .value = signal_values ? signal_values[j] : 0,
1511 };
1512 }
1513 struct vulkan_submit_info info = {
1514 .pNext = pBindInfo[i].pNext,
1515 .wait_count = pBindInfo[i].waitSemaphoreCount,
1516 .waits = wait_semaphore_infos,
1517 .signal_count = pBindInfo[i].signalSemaphoreCount,
1518 .signals = signal_semaphore_infos,
1519 .buffer_bind_count = pBindInfo[i].bufferBindCount,
1520 .buffer_binds = pBindInfo[i].pBufferBinds,
1521 .image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
1522 .image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
1523 .image_bind_count = pBindInfo[i].imageBindCount,
1524 .image_binds = pBindInfo[i].pImageBinds,
1525 .fence = i == bindInfoCount - 1 ? fence : NULL
1526 };
1527 struct vk_queue_submit *submit;
1528 result = vk_queue_submit_create(queue, &info, &submit);
1529 if (likely(result == VK_SUCCESS))
1530 result = vk_queue_merge_submit(queue, &last_submit, submit);
1531
1532 STACK_ARRAY_FINISH(wait_semaphore_infos);
1533 STACK_ARRAY_FINISH(signal_semaphore_infos);
1534
1535 if (unlikely(result != VK_SUCCESS))
1536 return result;
1537 }
1538
1539 if (last_submit != NULL) {
1540 result = vk_queue_submit(queue, last_submit);
1541 if (unlikely(result != VK_SUCCESS))
1542 return result;
1543 }
1544
1545 return VK_SUCCESS;
1546 }
1547
1548 static const struct vk_sync_type *
get_cpu_wait_type(struct vk_physical_device * pdevice)1549 get_cpu_wait_type(struct vk_physical_device *pdevice)
1550 {
1551 for (const struct vk_sync_type *const *t =
1552 pdevice->supported_sync_types; *t; t++) {
1553 if (((*t)->features & VK_SYNC_FEATURE_BINARY) &&
1554 ((*t)->features & VK_SYNC_FEATURE_CPU_WAIT))
1555 return *t;
1556 }
1557
1558 unreachable("You must have a non-timeline CPU wait sync type");
1559 }
1560
1561 VKAPI_ATTR VkResult VKAPI_CALL
vk_common_QueueWaitIdle(VkQueue _queue)1562 vk_common_QueueWaitIdle(VkQueue _queue)
1563 {
1564 MESA_TRACE_FUNC();
1565
1566 VK_FROM_HANDLE(vk_queue, queue, _queue);
1567 VkResult result;
1568
1569 if (vk_device_is_lost(queue->base.device))
1570 return VK_ERROR_DEVICE_LOST;
1571
1572 const struct vk_sync_type *sync_type =
1573 get_cpu_wait_type(queue->base.device->physical);
1574
1575 struct vk_sync *sync;
1576 result = vk_sync_create(queue->base.device, sync_type, 0, 0, &sync);
1577 if (unlikely(result != VK_SUCCESS))
1578 return result;
1579
1580 result = vk_queue_signal_sync(queue, sync, 0);
1581 if (unlikely(result != VK_SUCCESS))
1582 return result;
1583
1584 result = vk_sync_wait(queue->base.device, sync, 0,
1585 VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
1586
1587 vk_sync_destroy(queue->base.device, sync);
1588
1589 VkResult device_status = vk_device_check_status(queue->base.device);
1590 if (device_status != VK_SUCCESS)
1591 return device_status;
1592
1593 return result;
1594 }
1595