1 /*
2 * Copyright 2022 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "vn_feedback.h"
7
8 #include "vn_command_buffer.h"
9 #include "vn_device.h"
10 #include "vn_physical_device.h"
11 #include "vn_query_pool.h"
12 #include "vn_queue.h"
13
14 static uint32_t
vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties * mem_props,uint32_t mem_type_bits,VkMemoryPropertyFlags required_mem_flags)15 vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties *mem_props,
16 uint32_t mem_type_bits,
17 VkMemoryPropertyFlags required_mem_flags)
18 {
19 u_foreach_bit(mem_type_index, mem_type_bits)
20 {
21 assert(mem_type_index < mem_props->memoryTypeCount);
22 if ((mem_props->memoryTypes[mem_type_index].propertyFlags &
23 required_mem_flags) == required_mem_flags)
24 return mem_type_index;
25 }
26
27 return UINT32_MAX;
28 }
29
30 VkResult
vn_feedback_buffer_create(struct vn_device * dev,uint32_t size,const VkAllocationCallbacks * alloc,struct vn_feedback_buffer ** out_fb_buf)31 vn_feedback_buffer_create(struct vn_device *dev,
32 uint32_t size,
33 const VkAllocationCallbacks *alloc,
34 struct vn_feedback_buffer **out_fb_buf)
35 {
36 const bool exclusive = dev->queue_family_count == 1;
37 const VkPhysicalDeviceMemoryProperties *mem_props =
38 &dev->physical_device->memory_properties;
39 VkDevice dev_handle = vn_device_to_handle(dev);
40 VkResult result;
41
42 struct vn_feedback_buffer *fb_buf =
43 vk_zalloc(alloc, sizeof(*fb_buf), VN_DEFAULT_ALIGN,
44 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
45 if (!fb_buf)
46 return VK_ERROR_OUT_OF_HOST_MEMORY;
47
48 /* use concurrent to avoid explicit queue family ownership transfer for
49 * device created with queues from multiple queue families
50 */
51 const VkBufferCreateInfo buf_create_info = {
52 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
53 .size = size,
54 /* Feedback for fences and timeline semaphores will write to this buffer
55 * as a DST when signalling. Timeline semaphore feedback will also read
56 * from this buffer as a SRC to retrieve the counter value to signal.
57 */
58 .usage =
59 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
60 .sharingMode =
61 exclusive ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT,
62 /* below favors the current venus protocol */
63 .queueFamilyIndexCount = exclusive ? 0 : dev->queue_family_count,
64 .pQueueFamilyIndices = exclusive ? NULL : dev->queue_families,
65 };
66 result = vn_CreateBuffer(dev_handle, &buf_create_info, alloc,
67 &fb_buf->buf_handle);
68 if (result != VK_SUCCESS)
69 goto out_free_feedback_buffer;
70
71 struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle);
72 const VkMemoryRequirements *mem_req =
73 &buf->requirements.memory.memoryRequirements;
74 const uint32_t mem_type_index =
75 vn_get_memory_type_index(mem_props, mem_req->memoryTypeBits,
76 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
77 if (mem_type_index >= mem_props->memoryTypeCount) {
78 result = VK_ERROR_INITIALIZATION_FAILED;
79 goto out_destroy_buffer;
80 }
81
82 const VkMemoryAllocateInfo mem_alloc_info = {
83 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
84 .allocationSize = mem_req->size,
85 .memoryTypeIndex = mem_type_index,
86 };
87 result = vn_AllocateMemory(dev_handle, &mem_alloc_info, alloc,
88 &fb_buf->mem_handle);
89 if (result != VK_SUCCESS)
90 goto out_destroy_buffer;
91
92 const VkBindBufferMemoryInfo bind_info = {
93 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
94 .buffer = fb_buf->buf_handle,
95 .memory = fb_buf->mem_handle,
96 .memoryOffset = 0,
97 };
98 result = vn_BindBufferMemory2(dev_handle, 1, &bind_info);
99 if (result != VK_SUCCESS)
100 goto out_free_memory;
101
102 result = vn_MapMemory(dev_handle, fb_buf->mem_handle, 0, VK_WHOLE_SIZE, 0,
103 &fb_buf->data);
104 if (result != VK_SUCCESS)
105 goto out_free_memory;
106
107 *out_fb_buf = fb_buf;
108
109 return VK_SUCCESS;
110
111 out_free_memory:
112 vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc);
113
114 out_destroy_buffer:
115 vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc);
116
117 out_free_feedback_buffer:
118 vk_free(alloc, fb_buf);
119
120 return result;
121 }
122
123 void
vn_feedback_buffer_destroy(struct vn_device * dev,struct vn_feedback_buffer * fb_buf,const VkAllocationCallbacks * alloc)124 vn_feedback_buffer_destroy(struct vn_device *dev,
125 struct vn_feedback_buffer *fb_buf,
126 const VkAllocationCallbacks *alloc)
127 {
128 VkDevice dev_handle = vn_device_to_handle(dev);
129
130 vn_UnmapMemory(dev_handle, fb_buf->mem_handle);
131 vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc);
132 vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc);
133 vk_free(alloc, fb_buf);
134 }
135
136 static inline uint32_t
vn_get_feedback_buffer_alignment(struct vn_feedback_buffer * fb_buf)137 vn_get_feedback_buffer_alignment(struct vn_feedback_buffer *fb_buf)
138 {
139 struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle);
140 return buf->requirements.memory.memoryRequirements.alignment;
141 }
142
143 static VkResult
vn_feedback_pool_grow_locked(struct vn_feedback_pool * pool)144 vn_feedback_pool_grow_locked(struct vn_feedback_pool *pool)
145 {
146 VN_TRACE_FUNC();
147 struct vn_feedback_buffer *fb_buf = NULL;
148 VkResult result;
149
150 result =
151 vn_feedback_buffer_create(pool->dev, pool->size, pool->alloc, &fb_buf);
152 if (result != VK_SUCCESS)
153 return result;
154
155 pool->used = 0;
156 pool->alignment = vn_get_feedback_buffer_alignment(fb_buf);
157
158 list_add(&fb_buf->head, &pool->fb_bufs);
159
160 return VK_SUCCESS;
161 }
162
163 VkResult
vn_feedback_pool_init(struct vn_device * dev,struct vn_feedback_pool * pool,uint32_t size,const VkAllocationCallbacks * alloc)164 vn_feedback_pool_init(struct vn_device *dev,
165 struct vn_feedback_pool *pool,
166 uint32_t size,
167 const VkAllocationCallbacks *alloc)
168 {
169 simple_mtx_init(&pool->mutex, mtx_plain);
170
171 pool->dev = dev;
172 pool->alloc = alloc;
173 pool->size = size;
174 pool->used = size;
175 pool->alignment = 1;
176 list_inithead(&pool->fb_bufs);
177 list_inithead(&pool->free_slots);
178
179 return VK_SUCCESS;
180 }
181
182 void
vn_feedback_pool_fini(struct vn_feedback_pool * pool)183 vn_feedback_pool_fini(struct vn_feedback_pool *pool)
184 {
185 list_for_each_entry_safe(struct vn_feedback_slot, slot, &pool->free_slots,
186 head)
187 vk_free(pool->alloc, slot);
188
189 list_for_each_entry_safe(struct vn_feedback_buffer, fb_buf, &pool->fb_bufs,
190 head)
191 vn_feedback_buffer_destroy(pool->dev, fb_buf, pool->alloc);
192
193 simple_mtx_destroy(&pool->mutex);
194 }
195
196 static struct vn_feedback_buffer *
vn_feedback_pool_alloc_locked(struct vn_feedback_pool * pool,uint32_t size,uint32_t * out_offset)197 vn_feedback_pool_alloc_locked(struct vn_feedback_pool *pool,
198 uint32_t size,
199 uint32_t *out_offset)
200 {
201 /* Default values of pool->used and pool->alignment are used to trigger the
202 * initial pool grow, and will be properly initialized after that.
203 */
204 if (unlikely(align(size, pool->alignment) > pool->size - pool->used)) {
205 VkResult result = vn_feedback_pool_grow_locked(pool);
206 if (result != VK_SUCCESS)
207 return NULL;
208
209 assert(align(size, pool->alignment) <= pool->size - pool->used);
210 }
211
212 *out_offset = pool->used;
213 pool->used += align(size, pool->alignment);
214
215 return list_first_entry(&pool->fb_bufs, struct vn_feedback_buffer, head);
216 }
217
218 struct vn_feedback_slot *
vn_feedback_pool_alloc(struct vn_feedback_pool * pool,enum vn_feedback_type type)219 vn_feedback_pool_alloc(struct vn_feedback_pool *pool,
220 enum vn_feedback_type type)
221 {
222 static const uint32_t slot_size = 8;
223 struct vn_feedback_buffer *fb_buf;
224 uint32_t offset;
225 struct vn_feedback_slot *slot;
226
227 simple_mtx_lock(&pool->mutex);
228 if (!list_is_empty(&pool->free_slots)) {
229 slot =
230 list_first_entry(&pool->free_slots, struct vn_feedback_slot, head);
231 list_del(&slot->head);
232 simple_mtx_unlock(&pool->mutex);
233
234 slot->type = type;
235 return slot;
236 }
237
238 slot = vk_alloc(pool->alloc, sizeof(*slot), VN_DEFAULT_ALIGN,
239 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
240 if (!slot) {
241 simple_mtx_unlock(&pool->mutex);
242 return NULL;
243 }
244
245 fb_buf = vn_feedback_pool_alloc_locked(pool, slot_size, &offset);
246 simple_mtx_unlock(&pool->mutex);
247
248 if (!fb_buf) {
249 vk_free(pool->alloc, slot);
250 return NULL;
251 }
252
253 slot->type = type;
254 slot->offset = offset;
255 slot->buf_handle = fb_buf->buf_handle;
256 slot->data = fb_buf->data + offset;
257
258 return slot;
259 }
260
261 void
vn_feedback_pool_free(struct vn_feedback_pool * pool,struct vn_feedback_slot * slot)262 vn_feedback_pool_free(struct vn_feedback_pool *pool,
263 struct vn_feedback_slot *slot)
264 {
265 simple_mtx_lock(&pool->mutex);
266 list_add(&slot->head, &pool->free_slots);
267 simple_mtx_unlock(&pool->mutex);
268 }
269
270 static inline bool
mask_is_32bit(uint64_t x)271 mask_is_32bit(uint64_t x)
272 {
273 return (x & 0xffffffff00000000) == 0;
274 }
275
276 static void
vn_build_buffer_memory_barrier(const VkDependencyInfo * dep_info,VkBufferMemoryBarrier * barrier1,VkPipelineStageFlags * src_stage_mask,VkPipelineStageFlags * dst_stage_mask)277 vn_build_buffer_memory_barrier(const VkDependencyInfo *dep_info,
278 VkBufferMemoryBarrier *barrier1,
279 VkPipelineStageFlags *src_stage_mask,
280 VkPipelineStageFlags *dst_stage_mask)
281 {
282
283 assert(dep_info->pNext == NULL);
284 assert(dep_info->memoryBarrierCount == 0);
285 assert(dep_info->bufferMemoryBarrierCount == 1);
286 assert(dep_info->imageMemoryBarrierCount == 0);
287
288 const VkBufferMemoryBarrier2 *barrier2 =
289 &dep_info->pBufferMemoryBarriers[0];
290 assert(barrier2->pNext == NULL);
291 assert(mask_is_32bit(barrier2->srcStageMask));
292 assert(mask_is_32bit(barrier2->srcAccessMask));
293 assert(mask_is_32bit(barrier2->dstStageMask));
294 assert(mask_is_32bit(barrier2->dstAccessMask));
295
296 *barrier1 = (VkBufferMemoryBarrier){
297 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
298 .pNext = NULL,
299 .srcAccessMask = barrier2->srcAccessMask,
300 .dstAccessMask = barrier2->dstAccessMask,
301 .srcQueueFamilyIndex = barrier2->srcQueueFamilyIndex,
302 .dstQueueFamilyIndex = barrier2->dstQueueFamilyIndex,
303 .buffer = barrier2->buffer,
304 .offset = barrier2->offset,
305 .size = barrier2->size,
306 };
307
308 *src_stage_mask = barrier2->srcStageMask;
309 *dst_stage_mask = barrier2->dstStageMask;
310 }
311
312 static void
vn_cmd_buffer_memory_barrier(VkCommandBuffer cmd_handle,const VkDependencyInfo * dep_info,bool sync2)313 vn_cmd_buffer_memory_barrier(VkCommandBuffer cmd_handle,
314 const VkDependencyInfo *dep_info,
315 bool sync2)
316 {
317 if (sync2)
318 vn_CmdPipelineBarrier2(cmd_handle, dep_info);
319 else {
320 VkBufferMemoryBarrier barrier1;
321 VkPipelineStageFlags src_stage_mask;
322 VkPipelineStageFlags dst_stage_mask;
323
324 vn_build_buffer_memory_barrier(dep_info, &barrier1, &src_stage_mask,
325 &dst_stage_mask);
326 vn_CmdPipelineBarrier(cmd_handle, src_stage_mask, dst_stage_mask,
327 dep_info->dependencyFlags, 0, NULL, 1, &barrier1,
328 0, NULL);
329 }
330 }
331
332 void
vn_event_feedback_cmd_record(VkCommandBuffer cmd_handle,VkEvent ev_handle,VkPipelineStageFlags2 src_stage_mask,VkResult status,bool sync2)333 vn_event_feedback_cmd_record(VkCommandBuffer cmd_handle,
334 VkEvent ev_handle,
335 VkPipelineStageFlags2 src_stage_mask,
336 VkResult status,
337 bool sync2)
338 {
339 /* For vkCmdSetEvent and vkCmdResetEvent feedback interception.
340 *
341 * The injection point is after the event call to avoid introducing
342 * unexpected src stage waiting for VK_PIPELINE_STAGE_HOST_BIT and
343 * VK_PIPELINE_STAGE_TRANSFER_BIT if they are not already being waited by
344 * vkCmdSetEvent or vkCmdResetEvent. On the other hand, the delay in the
345 * feedback signal is acceptable for the nature of VkEvent, and the event
346 * feedback cmds lifecycle is guarded by the intercepted command buffer.
347 */
348 struct vn_event *ev = vn_event_from_handle(ev_handle);
349 struct vn_feedback_slot *slot = ev->feedback_slot;
350
351 if (!slot)
352 return;
353
354 STATIC_ASSERT(sizeof(*slot->status) == 4);
355
356 const VkDependencyInfo dep_before = {
357 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
358 .dependencyFlags = 0,
359 .bufferMemoryBarrierCount = 1,
360 .pBufferMemoryBarriers =
361 (VkBufferMemoryBarrier2[]){
362 {
363 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
364 .srcStageMask = src_stage_mask | VK_PIPELINE_STAGE_HOST_BIT |
365 VK_PIPELINE_STAGE_TRANSFER_BIT,
366 .srcAccessMask =
367 VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
368 .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
369 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
370 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
371 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
372 .buffer = slot->buf_handle,
373 .offset = slot->offset,
374 .size = 4,
375 },
376 },
377 };
378 vn_cmd_buffer_memory_barrier(cmd_handle, &dep_before, sync2);
379
380 vn_CmdFillBuffer(cmd_handle, slot->buf_handle, slot->offset, 4, status);
381
382 const VkDependencyInfo dep_after = {
383 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
384 .dependencyFlags = 0,
385 .bufferMemoryBarrierCount = 1,
386 .pBufferMemoryBarriers =
387 (VkBufferMemoryBarrier2[]){
388 {
389 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
390 .srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
391 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
392 .dstStageMask = VK_PIPELINE_STAGE_HOST_BIT,
393 .dstAccessMask =
394 VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
395 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
396 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
397 .buffer = slot->buf_handle,
398 .offset = slot->offset,
399 .size = 4,
400 },
401 },
402 };
403 vn_cmd_buffer_memory_barrier(cmd_handle, &dep_after, sync2);
404 }
405
406 static inline void
vn_feedback_cmd_record_flush_barrier(VkCommandBuffer cmd_handle,VkBuffer buffer,VkDeviceSize offset,VkDeviceSize size)407 vn_feedback_cmd_record_flush_barrier(VkCommandBuffer cmd_handle,
408 VkBuffer buffer,
409 VkDeviceSize offset,
410 VkDeviceSize size)
411 {
412 const VkBufferMemoryBarrier buf_flush_barrier = {
413 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
414 .pNext = NULL,
415 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
416 .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
417 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
418 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
419 .buffer = buffer,
420 .offset = offset,
421 .size = size,
422 };
423 vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
424 VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
425 &buf_flush_barrier, 0, NULL);
426 }
427
428 static VkResult
vn_feedback_cmd_record(VkCommandBuffer cmd_handle,struct vn_feedback_slot * dst_slot,struct vn_feedback_slot * src_slot)429 vn_feedback_cmd_record(VkCommandBuffer cmd_handle,
430 struct vn_feedback_slot *dst_slot,
431 struct vn_feedback_slot *src_slot)
432 {
433 STATIC_ASSERT(sizeof(*dst_slot->status) == 4);
434 STATIC_ASSERT(sizeof(*dst_slot->counter) == 8);
435 STATIC_ASSERT(sizeof(*src_slot->counter) == 8);
436
437 /* slot size is 8 bytes for timeline semaphore and 4 bytes fence.
438 * src slot is non-null for timeline semaphore.
439 */
440 const VkDeviceSize buf_size = src_slot ? 8 : 4;
441
442 static const VkCommandBufferBeginInfo begin_info = {
443 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
444 .pNext = NULL,
445 .flags = 0,
446 .pInheritanceInfo = NULL,
447 };
448 VkResult result = vn_BeginCommandBuffer(cmd_handle, &begin_info);
449 if (result != VK_SUCCESS)
450 return result;
451
452 static const VkMemoryBarrier mem_barrier_before = {
453 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
454 .pNext = NULL,
455 /* make pending writes available to stay close to signal op */
456 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
457 /* no need to make all memory visible for feedback update */
458 .dstAccessMask = 0,
459 };
460
461 const VkBufferMemoryBarrier buf_barrier_before = {
462 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
463 .pNext = NULL,
464 /* slot memory has been made available via mem_barrier_before */
465 .srcAccessMask = 0,
466 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
467 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
468 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
469 .buffer = dst_slot->buf_handle,
470 .offset = dst_slot->offset,
471 .size = buf_size,
472 };
473
474 /* host writes for src_slots should implicitly be made visible upon
475 * QueueSubmit call */
476 vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
477 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1,
478 &mem_barrier_before, 1, &buf_barrier_before, 0,
479 NULL);
480
481 /* If passed a src_slot, timeline semaphore feedback records a
482 * cmd to copy the counter value from the src slot to the dst slot.
483 * If src_slot is NULL, then fence feedback records a cmd to fill
484 * the dst slot with VK_SUCCESS.
485 */
486 if (src_slot) {
487 assert(src_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE);
488 assert(dst_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE);
489
490 const VkBufferCopy buffer_copy = {
491 .srcOffset = src_slot->offset,
492 .dstOffset = dst_slot->offset,
493 .size = buf_size,
494 };
495 vn_CmdCopyBuffer(cmd_handle, src_slot->buf_handle, dst_slot->buf_handle,
496 1, &buffer_copy);
497 } else {
498 assert(dst_slot->type == VN_FEEDBACK_TYPE_FENCE);
499
500 vn_CmdFillBuffer(cmd_handle, dst_slot->buf_handle, dst_slot->offset,
501 buf_size, VK_SUCCESS);
502 }
503
504 vn_feedback_cmd_record_flush_barrier(cmd_handle, dst_slot->buf_handle,
505 dst_slot->offset, buf_size);
506
507 return vn_EndCommandBuffer(cmd_handle);
508 }
509
510 struct vn_semaphore_feedback_cmd *
vn_semaphore_feedback_cmd_alloc(struct vn_device * dev,struct vn_feedback_slot * dst_slot)511 vn_semaphore_feedback_cmd_alloc(struct vn_device *dev,
512 struct vn_feedback_slot *dst_slot)
513 {
514 const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
515 struct vn_semaphore_feedback_cmd *sfb_cmd;
516 VkCommandBuffer *cmd_handles;
517
518 VK_MULTIALLOC(ma);
519 vk_multialloc_add(&ma, &sfb_cmd, __typeof__(*sfb_cmd), 1);
520 vk_multialloc_add(&ma, &cmd_handles, __typeof__(*cmd_handles),
521 dev->queue_family_count);
522 if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
523 return NULL;
524
525 struct vn_feedback_slot *src_slot =
526 vn_feedback_pool_alloc(&dev->feedback_pool, VN_FEEDBACK_TYPE_SEMAPHORE);
527 if (!src_slot) {
528 vk_free(alloc, sfb_cmd);
529 return NULL;
530 }
531
532 for (uint32_t i = 0; i < dev->queue_family_count; i++) {
533 VkDevice dev_handle = vn_device_to_handle(dev);
534 VkResult result =
535 vn_feedback_cmd_alloc(dev_handle, &dev->fb_cmd_pools[i], dst_slot,
536 src_slot, &cmd_handles[i]);
537 if (result != VK_SUCCESS) {
538 for (uint32_t j = 0; j < i; j++) {
539 vn_feedback_cmd_free(dev_handle, &dev->fb_cmd_pools[j],
540 cmd_handles[j]);
541 }
542
543 vn_feedback_pool_free(&dev->feedback_pool, src_slot);
544 vk_free(alloc, sfb_cmd);
545 return NULL;
546 }
547 }
548
549 sfb_cmd->cmd_handles = cmd_handles;
550 sfb_cmd->src_slot = src_slot;
551 return sfb_cmd;
552 }
553
554 void
vn_semaphore_feedback_cmd_free(struct vn_device * dev,struct vn_semaphore_feedback_cmd * sfb_cmd)555 vn_semaphore_feedback_cmd_free(struct vn_device *dev,
556 struct vn_semaphore_feedback_cmd *sfb_cmd)
557 {
558 const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
559
560 for (uint32_t i = 0; i < dev->queue_family_count; i++) {
561 vn_feedback_cmd_free(vn_device_to_handle(dev), &dev->fb_cmd_pools[i],
562 sfb_cmd->cmd_handles[i]);
563 }
564
565 vn_feedback_pool_free(&dev->feedback_pool, sfb_cmd->src_slot);
566 vk_free(alloc, sfb_cmd);
567 }
568
569 static void
vn_feedback_query_cmd_record(VkCommandBuffer cmd_handle,VkQueryPool pool_handle,uint32_t query,uint32_t count,bool copy)570 vn_feedback_query_cmd_record(VkCommandBuffer cmd_handle,
571 VkQueryPool pool_handle,
572 uint32_t query,
573 uint32_t count,
574 bool copy)
575 {
576 struct vn_query_pool *pool = vn_query_pool_from_handle(pool_handle);
577 if (!pool->fb_buf)
578 return;
579
580 /* Results are always 64 bit and include availability bit (also 64 bit) */
581 const VkDeviceSize slot_size = (pool->result_array_size * 8) + 8;
582 const VkDeviceSize offset = slot_size * query;
583 const VkDeviceSize buf_size = slot_size * count;
584
585 /* The first synchronization scope of vkCmdCopyQueryPoolResults does not
586 * include the query feedback buffer. Insert a barrier to ensure ordering
587 * against feedback buffer fill cmd injected in vkCmdResetQueryPool.
588 *
589 * The second synchronization scope of vkCmdResetQueryPool does not include
590 * the query feedback buffer. Insert a barrer to ensure ordering against
591 * prior cmds referencing the queries.
592 *
593 * For srcAccessMask, VK_ACCESS_TRANSFER_WRITE_BIT is sufficient since the
594 * gpu cache invalidation for feedback buffer fill in vkResetQueryPool is
595 * done implicitly via queue submission.
596 */
597 const VkPipelineStageFlags src_stage_mask =
598 copy ? VK_PIPELINE_STAGE_TRANSFER_BIT
599 : VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
600
601 const VkBufferMemoryBarrier buf_barrier_before = {
602 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
603 .pNext = NULL,
604 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
605 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
606 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
607 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
608 .buffer = pool->fb_buf->buf_handle,
609 .offset = offset,
610 .size = buf_size,
611 };
612 vn_CmdPipelineBarrier(cmd_handle, src_stage_mask,
613 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
614 &buf_barrier_before, 0, NULL);
615
616 if (copy) {
617 /* Per spec: "The first synchronization scope includes all commands
618 * which reference the queries in queryPool indicated by query that
619 * occur earlier in submission order. If flags does not include
620 * VK_QUERY_RESULT_WAIT_BIT, vkCmdEndQueryIndexedEXT,
621 * vkCmdWriteTimestamp2, vkCmdEndQuery, and vkCmdWriteTimestamp are
622 * excluded from this scope."
623 *
624 * Set VK_QUERY_RESULT_WAIT_BIT to ensure ordering after
625 * vkCmdEndQuery or vkCmdWriteTimestamp makes the query available.
626 *
627 * Set VK_QUERY_RESULT_64_BIT as we can convert it to 32 bit if app
628 * requested that.
629 *
630 * Per spec: "vkCmdCopyQueryPoolResults is considered to be a transfer
631 * operation, and its writes to buffer memory must be synchronized using
632 * VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT
633 * before using the results."
634 *
635 * So we can reuse the flush barrier after this copy cmd.
636 */
637 vn_CmdCopyQueryPoolResults(cmd_handle, pool_handle, query, count,
638 pool->fb_buf->buf_handle, offset, slot_size,
639 VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
640 VK_QUERY_RESULT_64_BIT |
641 VK_QUERY_RESULT_WAIT_BIT);
642 } else {
643 vn_CmdFillBuffer(cmd_handle, pool->fb_buf->buf_handle, offset, buf_size,
644 0);
645 }
646
647 vn_feedback_cmd_record_flush_barrier(cmd_handle, pool->fb_buf->buf_handle,
648 offset, buf_size);
649 }
650
651 VkResult
vn_feedback_query_cmd_alloc(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,struct vn_query_feedback_cmd ** out_qfb_cmd)652 vn_feedback_query_cmd_alloc(VkDevice dev_handle,
653 struct vn_feedback_cmd_pool *fb_cmd_pool,
654 struct vn_query_feedback_cmd **out_qfb_cmd)
655 {
656 VkCommandPool cmd_pool_handle = fb_cmd_pool->pool_handle;
657 const VkCommandBufferAllocateInfo info = {
658 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
659 .pNext = NULL,
660 .commandPool = cmd_pool_handle,
661 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
662 .commandBufferCount = 1,
663 };
664 struct vn_command_pool *cmd_pool =
665 vn_command_pool_from_handle(cmd_pool_handle);
666 struct vn_query_feedback_cmd *qfb_cmd = NULL;
667
668 simple_mtx_lock(&fb_cmd_pool->mutex);
669 if (!list_is_empty(&fb_cmd_pool->free_qfb_cmds)) {
670 qfb_cmd = list_first_entry(&fb_cmd_pool->free_qfb_cmds,
671 struct vn_query_feedback_cmd, head);
672 list_del(&qfb_cmd->head);
673 }
674 simple_mtx_unlock(&fb_cmd_pool->mutex);
675
676 if (!qfb_cmd) {
677 VkCommandBuffer qfb_cmd_handle;
678 VkResult result;
679
680 qfb_cmd = vk_alloc(&cmd_pool->allocator, sizeof(*qfb_cmd),
681 VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
682 if (!qfb_cmd)
683 return VK_ERROR_OUT_OF_HOST_MEMORY;
684
685 simple_mtx_lock(&fb_cmd_pool->mutex);
686 result = vn_AllocateCommandBuffers(dev_handle, &info, &qfb_cmd_handle);
687 simple_mtx_unlock(&fb_cmd_pool->mutex);
688
689 if (result != VK_SUCCESS) {
690 vk_free(&cmd_pool->allocator, qfb_cmd);
691 return result;
692 }
693
694 qfb_cmd->fb_cmd_pool = fb_cmd_pool;
695 qfb_cmd->cmd = vn_command_buffer_from_handle(qfb_cmd_handle);
696 }
697
698 *out_qfb_cmd = qfb_cmd;
699
700 return VK_SUCCESS;
701 }
702
703 void
vn_feedback_query_cmd_free(struct vn_query_feedback_cmd * qfb_cmd)704 vn_feedback_query_cmd_free(struct vn_query_feedback_cmd *qfb_cmd)
705 {
706 simple_mtx_lock(&qfb_cmd->fb_cmd_pool->mutex);
707 vn_ResetCommandBuffer(vn_command_buffer_to_handle(qfb_cmd->cmd), 0);
708 list_add(&qfb_cmd->head, &qfb_cmd->fb_cmd_pool->free_qfb_cmds);
709 simple_mtx_unlock(&qfb_cmd->fb_cmd_pool->mutex);
710 }
711
712 VkResult
vn_feedback_query_batch_record(VkDevice dev_handle,struct vn_query_feedback_cmd * qfb_cmd,struct list_head * combined_query_batches)713 vn_feedback_query_batch_record(VkDevice dev_handle,
714 struct vn_query_feedback_cmd *qfb_cmd,
715 struct list_head *combined_query_batches)
716 {
717 static const VkCommandBufferBeginInfo begin_info = {
718 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
719 };
720 VkCommandBuffer qfb_cmd_handle = vn_command_buffer_to_handle(qfb_cmd->cmd);
721 VkResult result;
722
723 simple_mtx_lock(&qfb_cmd->fb_cmd_pool->mutex);
724
725 result = vn_BeginCommandBuffer(qfb_cmd_handle, &begin_info);
726 if (result != VK_SUCCESS) {
727 vn_FreeCommandBuffers(dev_handle, qfb_cmd->fb_cmd_pool->pool_handle, 1,
728 &qfb_cmd_handle);
729 goto out_unlock;
730 }
731
732 list_for_each_entry_safe(struct vn_feedback_query_batch, batch,
733 combined_query_batches, head) {
734 vn_feedback_query_cmd_record(
735 qfb_cmd_handle, vn_query_pool_to_handle(batch->query_pool),
736 batch->query, batch->query_count, batch->copy);
737 }
738
739 result = vn_EndCommandBuffer(qfb_cmd_handle);
740 if (result != VK_SUCCESS) {
741 vn_FreeCommandBuffers(dev_handle, qfb_cmd->fb_cmd_pool->pool_handle, 1,
742 &qfb_cmd_handle);
743 goto out_unlock;
744 }
745
746 out_unlock:
747 simple_mtx_unlock(&qfb_cmd->fb_cmd_pool->mutex);
748
749 return result;
750 }
751
752 VkResult
vn_feedback_cmd_alloc(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,struct vn_feedback_slot * dst_slot,struct vn_feedback_slot * src_slot,VkCommandBuffer * out_cmd_handle)753 vn_feedback_cmd_alloc(VkDevice dev_handle,
754 struct vn_feedback_cmd_pool *fb_cmd_pool,
755 struct vn_feedback_slot *dst_slot,
756 struct vn_feedback_slot *src_slot,
757 VkCommandBuffer *out_cmd_handle)
758 {
759 VkCommandPool cmd_pool_handle = fb_cmd_pool->pool_handle;
760 const VkCommandBufferAllocateInfo info = {
761 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
762 .pNext = NULL,
763 .commandPool = cmd_pool_handle,
764 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
765 .commandBufferCount = 1,
766 };
767 VkCommandBuffer cmd_handle;
768 VkResult result;
769
770 simple_mtx_lock(&fb_cmd_pool->mutex);
771 result = vn_AllocateCommandBuffers(dev_handle, &info, &cmd_handle);
772 if (result != VK_SUCCESS)
773 goto out_unlock;
774
775 result = vn_feedback_cmd_record(cmd_handle, dst_slot, src_slot);
776 if (result != VK_SUCCESS) {
777 vn_FreeCommandBuffers(dev_handle, cmd_pool_handle, 1, &cmd_handle);
778 goto out_unlock;
779 }
780
781 *out_cmd_handle = cmd_handle;
782
783 out_unlock:
784 simple_mtx_unlock(&fb_cmd_pool->mutex);
785
786 return result;
787 }
788
789 void
vn_feedback_cmd_free(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,VkCommandBuffer cmd_handle)790 vn_feedback_cmd_free(VkDevice dev_handle,
791 struct vn_feedback_cmd_pool *fb_cmd_pool,
792 VkCommandBuffer cmd_handle)
793 {
794 simple_mtx_lock(&fb_cmd_pool->mutex);
795 vn_FreeCommandBuffers(dev_handle, fb_cmd_pool->pool_handle, 1,
796 &cmd_handle);
797 simple_mtx_unlock(&fb_cmd_pool->mutex);
798 }
799
800 VkResult
vn_feedback_cmd_pools_init(struct vn_device * dev)801 vn_feedback_cmd_pools_init(struct vn_device *dev)
802 {
803 const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
804 VkDevice dev_handle = vn_device_to_handle(dev);
805 struct vn_feedback_cmd_pool *fb_cmd_pools;
806 VkCommandPoolCreateInfo info = {
807 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
808 .pNext = NULL,
809 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
810 };
811
812 if (VN_PERF(NO_FENCE_FEEDBACK) && VN_PERF(NO_SEMAPHORE_FEEDBACK) &&
813 VN_PERF(NO_QUERY_FEEDBACK))
814 return VK_SUCCESS;
815
816 assert(dev->queue_family_count);
817
818 fb_cmd_pools =
819 vk_zalloc(alloc, sizeof(*fb_cmd_pools) * dev->queue_family_count,
820 VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
821 if (!fb_cmd_pools)
822 return VK_ERROR_OUT_OF_HOST_MEMORY;
823
824 for (uint32_t i = 0; i < dev->queue_family_count; i++) {
825 VkResult result;
826
827 info.queueFamilyIndex = dev->queue_families[i];
828 result = vn_CreateCommandPool(dev_handle, &info, alloc,
829 &fb_cmd_pools[i].pool_handle);
830 if (result != VK_SUCCESS) {
831 for (uint32_t j = 0; j < i; j++) {
832 vn_DestroyCommandPool(dev_handle, fb_cmd_pools[j].pool_handle,
833 alloc);
834 simple_mtx_destroy(&fb_cmd_pools[j].mutex);
835 }
836
837 vk_free(alloc, fb_cmd_pools);
838 return result;
839 }
840
841 simple_mtx_init(&fb_cmd_pools[i].mutex, mtx_plain);
842 list_inithead(&fb_cmd_pools[i].free_qfb_cmds);
843 }
844
845 dev->fb_cmd_pools = fb_cmd_pools;
846
847 return VK_SUCCESS;
848 }
849
850 void
vn_feedback_cmd_pools_fini(struct vn_device * dev)851 vn_feedback_cmd_pools_fini(struct vn_device *dev)
852 {
853 const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
854 VkDevice dev_handle = vn_device_to_handle(dev);
855
856 if (!dev->fb_cmd_pools)
857 return;
858
859 for (uint32_t i = 0; i < dev->queue_family_count; i++) {
860 list_for_each_entry_safe(struct vn_query_feedback_cmd, feedback_cmd,
861 &dev->fb_cmd_pools[i].free_qfb_cmds, head)
862 vk_free(alloc, feedback_cmd);
863
864 vn_DestroyCommandPool(dev_handle, dev->fb_cmd_pools[i].pool_handle,
865 alloc);
866 simple_mtx_destroy(&dev->fb_cmd_pools[i].mutex);
867 }
868
869 vk_free(alloc, dev->fb_cmd_pools);
870 }
871