• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "vn_feedback.h"
7 
8 #include "vn_command_buffer.h"
9 #include "vn_device.h"
10 #include "vn_physical_device.h"
11 #include "vn_query_pool.h"
12 #include "vn_queue.h"
13 
14 static uint32_t
vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties * mem_props,uint32_t mem_type_bits,VkMemoryPropertyFlags required_mem_flags)15 vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties *mem_props,
16                          uint32_t mem_type_bits,
17                          VkMemoryPropertyFlags required_mem_flags)
18 {
19    u_foreach_bit(mem_type_index, mem_type_bits)
20    {
21       assert(mem_type_index < mem_props->memoryTypeCount);
22       if ((mem_props->memoryTypes[mem_type_index].propertyFlags &
23            required_mem_flags) == required_mem_flags)
24          return mem_type_index;
25    }
26 
27    return UINT32_MAX;
28 }
29 
30 VkResult
vn_feedback_buffer_create(struct vn_device * dev,uint32_t size,const VkAllocationCallbacks * alloc,struct vn_feedback_buffer ** out_fb_buf)31 vn_feedback_buffer_create(struct vn_device *dev,
32                           uint32_t size,
33                           const VkAllocationCallbacks *alloc,
34                           struct vn_feedback_buffer **out_fb_buf)
35 {
36    const bool exclusive = dev->queue_family_count == 1;
37    const VkPhysicalDeviceMemoryProperties *mem_props =
38       &dev->physical_device->memory_properties;
39    VkDevice dev_handle = vn_device_to_handle(dev);
40    VkResult result;
41 
42    struct vn_feedback_buffer *fb_buf =
43       vk_zalloc(alloc, sizeof(*fb_buf), VN_DEFAULT_ALIGN,
44                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
45    if (!fb_buf)
46       return VK_ERROR_OUT_OF_HOST_MEMORY;
47 
48    /* use concurrent to avoid explicit queue family ownership transfer for
49     * device created with queues from multiple queue families
50     */
51    const VkBufferCreateInfo buf_create_info = {
52       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
53       .size = size,
54       /* Feedback for fences and timeline semaphores will write to this buffer
55        * as a DST when signalling. Timeline semaphore feedback will also read
56        * from this buffer as a SRC to retrieve the counter value to signal.
57        */
58       .usage =
59          VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
60       .sharingMode =
61          exclusive ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT,
62       /* below favors the current venus protocol */
63       .queueFamilyIndexCount = exclusive ? 0 : dev->queue_family_count,
64       .pQueueFamilyIndices = exclusive ? NULL : dev->queue_families,
65    };
66    result = vn_CreateBuffer(dev_handle, &buf_create_info, alloc,
67                             &fb_buf->buf_handle);
68    if (result != VK_SUCCESS)
69       goto out_free_feedback_buffer;
70 
71    struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle);
72    const VkMemoryRequirements *mem_req =
73       &buf->requirements.memory.memoryRequirements;
74    const uint32_t mem_type_index =
75       vn_get_memory_type_index(mem_props, mem_req->memoryTypeBits,
76                                VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
77    if (mem_type_index >= mem_props->memoryTypeCount) {
78       result = VK_ERROR_INITIALIZATION_FAILED;
79       goto out_destroy_buffer;
80    }
81 
82    const VkMemoryAllocateInfo mem_alloc_info = {
83       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
84       .allocationSize = mem_req->size,
85       .memoryTypeIndex = mem_type_index,
86    };
87    result = vn_AllocateMemory(dev_handle, &mem_alloc_info, alloc,
88                               &fb_buf->mem_handle);
89    if (result != VK_SUCCESS)
90       goto out_destroy_buffer;
91 
92    const VkBindBufferMemoryInfo bind_info = {
93       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
94       .buffer = fb_buf->buf_handle,
95       .memory = fb_buf->mem_handle,
96       .memoryOffset = 0,
97    };
98    result = vn_BindBufferMemory2(dev_handle, 1, &bind_info);
99    if (result != VK_SUCCESS)
100       goto out_free_memory;
101 
102    result = vn_MapMemory(dev_handle, fb_buf->mem_handle, 0, VK_WHOLE_SIZE, 0,
103                          &fb_buf->data);
104    if (result != VK_SUCCESS)
105       goto out_free_memory;
106 
107    *out_fb_buf = fb_buf;
108 
109    return VK_SUCCESS;
110 
111 out_free_memory:
112    vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc);
113 
114 out_destroy_buffer:
115    vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc);
116 
117 out_free_feedback_buffer:
118    vk_free(alloc, fb_buf);
119 
120    return result;
121 }
122 
123 void
vn_feedback_buffer_destroy(struct vn_device * dev,struct vn_feedback_buffer * fb_buf,const VkAllocationCallbacks * alloc)124 vn_feedback_buffer_destroy(struct vn_device *dev,
125                            struct vn_feedback_buffer *fb_buf,
126                            const VkAllocationCallbacks *alloc)
127 {
128    VkDevice dev_handle = vn_device_to_handle(dev);
129 
130    vn_UnmapMemory(dev_handle, fb_buf->mem_handle);
131    vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc);
132    vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc);
133    vk_free(alloc, fb_buf);
134 }
135 
136 static inline uint32_t
vn_get_feedback_buffer_alignment(struct vn_feedback_buffer * fb_buf)137 vn_get_feedback_buffer_alignment(struct vn_feedback_buffer *fb_buf)
138 {
139    struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle);
140    return buf->requirements.memory.memoryRequirements.alignment;
141 }
142 
143 static VkResult
vn_feedback_pool_grow_locked(struct vn_feedback_pool * pool)144 vn_feedback_pool_grow_locked(struct vn_feedback_pool *pool)
145 {
146    VN_TRACE_FUNC();
147    struct vn_feedback_buffer *fb_buf = NULL;
148    VkResult result;
149 
150    result =
151       vn_feedback_buffer_create(pool->dev, pool->size, pool->alloc, &fb_buf);
152    if (result != VK_SUCCESS)
153       return result;
154 
155    pool->used = 0;
156    pool->alignment = vn_get_feedback_buffer_alignment(fb_buf);
157 
158    list_add(&fb_buf->head, &pool->fb_bufs);
159 
160    return VK_SUCCESS;
161 }
162 
163 VkResult
vn_feedback_pool_init(struct vn_device * dev,struct vn_feedback_pool * pool,uint32_t size,const VkAllocationCallbacks * alloc)164 vn_feedback_pool_init(struct vn_device *dev,
165                       struct vn_feedback_pool *pool,
166                       uint32_t size,
167                       const VkAllocationCallbacks *alloc)
168 {
169    simple_mtx_init(&pool->mutex, mtx_plain);
170 
171    pool->dev = dev;
172    pool->alloc = alloc;
173    pool->size = size;
174    pool->used = size;
175    pool->alignment = 1;
176    list_inithead(&pool->fb_bufs);
177    list_inithead(&pool->free_slots);
178 
179    return VK_SUCCESS;
180 }
181 
182 void
vn_feedback_pool_fini(struct vn_feedback_pool * pool)183 vn_feedback_pool_fini(struct vn_feedback_pool *pool)
184 {
185    list_for_each_entry_safe(struct vn_feedback_slot, slot, &pool->free_slots,
186                             head)
187       vk_free(pool->alloc, slot);
188 
189    list_for_each_entry_safe(struct vn_feedback_buffer, fb_buf, &pool->fb_bufs,
190                             head)
191       vn_feedback_buffer_destroy(pool->dev, fb_buf, pool->alloc);
192 
193    simple_mtx_destroy(&pool->mutex);
194 }
195 
196 static struct vn_feedback_buffer *
vn_feedback_pool_alloc_locked(struct vn_feedback_pool * pool,uint32_t size,uint32_t * out_offset)197 vn_feedback_pool_alloc_locked(struct vn_feedback_pool *pool,
198                               uint32_t size,
199                               uint32_t *out_offset)
200 {
201    /* Default values of pool->used and pool->alignment are used to trigger the
202     * initial pool grow, and will be properly initialized after that.
203     */
204    if (unlikely(align(size, pool->alignment) > pool->size - pool->used)) {
205       VkResult result = vn_feedback_pool_grow_locked(pool);
206       if (result != VK_SUCCESS)
207          return NULL;
208 
209       assert(align(size, pool->alignment) <= pool->size - pool->used);
210    }
211 
212    *out_offset = pool->used;
213    pool->used += align(size, pool->alignment);
214 
215    return list_first_entry(&pool->fb_bufs, struct vn_feedback_buffer, head);
216 }
217 
218 struct vn_feedback_slot *
vn_feedback_pool_alloc(struct vn_feedback_pool * pool,enum vn_feedback_type type)219 vn_feedback_pool_alloc(struct vn_feedback_pool *pool,
220                        enum vn_feedback_type type)
221 {
222    static const uint32_t slot_size = 8;
223    struct vn_feedback_buffer *fb_buf;
224    uint32_t offset;
225    struct vn_feedback_slot *slot;
226 
227    simple_mtx_lock(&pool->mutex);
228    if (!list_is_empty(&pool->free_slots)) {
229       slot =
230          list_first_entry(&pool->free_slots, struct vn_feedback_slot, head);
231       list_del(&slot->head);
232       simple_mtx_unlock(&pool->mutex);
233 
234       slot->type = type;
235       return slot;
236    }
237 
238    slot = vk_alloc(pool->alloc, sizeof(*slot), VN_DEFAULT_ALIGN,
239                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
240    if (!slot) {
241       simple_mtx_unlock(&pool->mutex);
242       return NULL;
243    }
244 
245    fb_buf = vn_feedback_pool_alloc_locked(pool, slot_size, &offset);
246    simple_mtx_unlock(&pool->mutex);
247 
248    if (!fb_buf) {
249       vk_free(pool->alloc, slot);
250       return NULL;
251    }
252 
253    slot->type = type;
254    slot->offset = offset;
255    slot->buf_handle = fb_buf->buf_handle;
256    slot->data = fb_buf->data + offset;
257 
258    return slot;
259 }
260 
261 void
vn_feedback_pool_free(struct vn_feedback_pool * pool,struct vn_feedback_slot * slot)262 vn_feedback_pool_free(struct vn_feedback_pool *pool,
263                       struct vn_feedback_slot *slot)
264 {
265    simple_mtx_lock(&pool->mutex);
266    list_add(&slot->head, &pool->free_slots);
267    simple_mtx_unlock(&pool->mutex);
268 }
269 
270 static inline bool
mask_is_32bit(uint64_t x)271 mask_is_32bit(uint64_t x)
272 {
273    return (x & 0xffffffff00000000) == 0;
274 }
275 
276 static void
vn_build_buffer_memory_barrier(const VkDependencyInfo * dep_info,VkBufferMemoryBarrier * barrier1,VkPipelineStageFlags * src_stage_mask,VkPipelineStageFlags * dst_stage_mask)277 vn_build_buffer_memory_barrier(const VkDependencyInfo *dep_info,
278                                VkBufferMemoryBarrier *barrier1,
279                                VkPipelineStageFlags *src_stage_mask,
280                                VkPipelineStageFlags *dst_stage_mask)
281 {
282 
283    assert(dep_info->pNext == NULL);
284    assert(dep_info->memoryBarrierCount == 0);
285    assert(dep_info->bufferMemoryBarrierCount == 1);
286    assert(dep_info->imageMemoryBarrierCount == 0);
287 
288    const VkBufferMemoryBarrier2 *barrier2 =
289       &dep_info->pBufferMemoryBarriers[0];
290    assert(barrier2->pNext == NULL);
291    assert(mask_is_32bit(barrier2->srcStageMask));
292    assert(mask_is_32bit(barrier2->srcAccessMask));
293    assert(mask_is_32bit(barrier2->dstStageMask));
294    assert(mask_is_32bit(barrier2->dstAccessMask));
295 
296    *barrier1 = (VkBufferMemoryBarrier){
297       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
298       .pNext = NULL,
299       .srcAccessMask = barrier2->srcAccessMask,
300       .dstAccessMask = barrier2->dstAccessMask,
301       .srcQueueFamilyIndex = barrier2->srcQueueFamilyIndex,
302       .dstQueueFamilyIndex = barrier2->dstQueueFamilyIndex,
303       .buffer = barrier2->buffer,
304       .offset = barrier2->offset,
305       .size = barrier2->size,
306    };
307 
308    *src_stage_mask = barrier2->srcStageMask;
309    *dst_stage_mask = barrier2->dstStageMask;
310 }
311 
312 static void
vn_cmd_buffer_memory_barrier(VkCommandBuffer cmd_handle,const VkDependencyInfo * dep_info,bool sync2)313 vn_cmd_buffer_memory_barrier(VkCommandBuffer cmd_handle,
314                              const VkDependencyInfo *dep_info,
315                              bool sync2)
316 {
317    if (sync2)
318       vn_CmdPipelineBarrier2(cmd_handle, dep_info);
319    else {
320       VkBufferMemoryBarrier barrier1;
321       VkPipelineStageFlags src_stage_mask;
322       VkPipelineStageFlags dst_stage_mask;
323 
324       vn_build_buffer_memory_barrier(dep_info, &barrier1, &src_stage_mask,
325                                      &dst_stage_mask);
326       vn_CmdPipelineBarrier(cmd_handle, src_stage_mask, dst_stage_mask,
327                             dep_info->dependencyFlags, 0, NULL, 1, &barrier1,
328                             0, NULL);
329    }
330 }
331 
332 void
vn_event_feedback_cmd_record(VkCommandBuffer cmd_handle,VkEvent ev_handle,VkPipelineStageFlags2 src_stage_mask,VkResult status,bool sync2)333 vn_event_feedback_cmd_record(VkCommandBuffer cmd_handle,
334                              VkEvent ev_handle,
335                              VkPipelineStageFlags2 src_stage_mask,
336                              VkResult status,
337                              bool sync2)
338 {
339    /* For vkCmdSetEvent and vkCmdResetEvent feedback interception.
340     *
341     * The injection point is after the event call to avoid introducing
342     * unexpected src stage waiting for VK_PIPELINE_STAGE_HOST_BIT and
343     * VK_PIPELINE_STAGE_TRANSFER_BIT if they are not already being waited by
344     * vkCmdSetEvent or vkCmdResetEvent. On the other hand, the delay in the
345     * feedback signal is acceptable for the nature of VkEvent, and the event
346     * feedback cmds lifecycle is guarded by the intercepted command buffer.
347     */
348    struct vn_event *ev = vn_event_from_handle(ev_handle);
349    struct vn_feedback_slot *slot = ev->feedback_slot;
350 
351    if (!slot)
352       return;
353 
354    STATIC_ASSERT(sizeof(*slot->status) == 4);
355 
356    const VkDependencyInfo dep_before = {
357       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
358       .dependencyFlags = 0,
359       .bufferMemoryBarrierCount = 1,
360       .pBufferMemoryBarriers =
361          (VkBufferMemoryBarrier2[]){
362             {
363                .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
364                .srcStageMask = src_stage_mask | VK_PIPELINE_STAGE_HOST_BIT |
365                                VK_PIPELINE_STAGE_TRANSFER_BIT,
366                .srcAccessMask =
367                   VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
368                .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
369                .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
370                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
371                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
372                .buffer = slot->buf_handle,
373                .offset = slot->offset,
374                .size = 4,
375             },
376          },
377    };
378    vn_cmd_buffer_memory_barrier(cmd_handle, &dep_before, sync2);
379 
380    vn_CmdFillBuffer(cmd_handle, slot->buf_handle, slot->offset, 4, status);
381 
382    const VkDependencyInfo dep_after = {
383       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
384       .dependencyFlags = 0,
385       .bufferMemoryBarrierCount = 1,
386       .pBufferMemoryBarriers =
387          (VkBufferMemoryBarrier2[]){
388             {
389                .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
390                .srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
391                .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
392                .dstStageMask = VK_PIPELINE_STAGE_HOST_BIT,
393                .dstAccessMask =
394                   VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
395                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
396                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
397                .buffer = slot->buf_handle,
398                .offset = slot->offset,
399                .size = 4,
400             },
401          },
402    };
403    vn_cmd_buffer_memory_barrier(cmd_handle, &dep_after, sync2);
404 }
405 
406 static inline void
vn_feedback_cmd_record_flush_barrier(VkCommandBuffer cmd_handle,VkBuffer buffer,VkDeviceSize offset,VkDeviceSize size)407 vn_feedback_cmd_record_flush_barrier(VkCommandBuffer cmd_handle,
408                                      VkBuffer buffer,
409                                      VkDeviceSize offset,
410                                      VkDeviceSize size)
411 {
412    const VkBufferMemoryBarrier buf_flush_barrier = {
413       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
414       .pNext = NULL,
415       .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
416       .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
417       .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
418       .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
419       .buffer = buffer,
420       .offset = offset,
421       .size = size,
422    };
423    vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
424                          VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
425                          &buf_flush_barrier, 0, NULL);
426 }
427 
428 static VkResult
vn_feedback_cmd_record(VkCommandBuffer cmd_handle,struct vn_feedback_slot * dst_slot,struct vn_feedback_slot * src_slot)429 vn_feedback_cmd_record(VkCommandBuffer cmd_handle,
430                        struct vn_feedback_slot *dst_slot,
431                        struct vn_feedback_slot *src_slot)
432 {
433    STATIC_ASSERT(sizeof(*dst_slot->status) == 4);
434    STATIC_ASSERT(sizeof(*dst_slot->counter) == 8);
435    STATIC_ASSERT(sizeof(*src_slot->counter) == 8);
436 
437    /* slot size is 8 bytes for timeline semaphore and 4 bytes fence.
438     * src slot is non-null for timeline semaphore.
439     */
440    const VkDeviceSize buf_size = src_slot ? 8 : 4;
441 
442    static const VkCommandBufferBeginInfo begin_info = {
443       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
444       .pNext = NULL,
445       .flags = 0,
446       .pInheritanceInfo = NULL,
447    };
448    VkResult result = vn_BeginCommandBuffer(cmd_handle, &begin_info);
449    if (result != VK_SUCCESS)
450       return result;
451 
452    static const VkMemoryBarrier mem_barrier_before = {
453       .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
454       .pNext = NULL,
455       /* make pending writes available to stay close to signal op */
456       .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
457       /* no need to make all memory visible for feedback update */
458       .dstAccessMask = 0,
459    };
460 
461    const VkBufferMemoryBarrier buf_barrier_before = {
462       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
463       .pNext = NULL,
464       /* slot memory has been made available via mem_barrier_before */
465       .srcAccessMask = 0,
466       .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
467       .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
468       .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
469       .buffer = dst_slot->buf_handle,
470       .offset = dst_slot->offset,
471       .size = buf_size,
472    };
473 
474    /* host writes for src_slots should implicitly be made visible upon
475     * QueueSubmit call */
476    vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
477                          VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1,
478                          &mem_barrier_before, 1, &buf_barrier_before, 0,
479                          NULL);
480 
481    /* If passed a src_slot, timeline semaphore feedback records a
482     * cmd to copy the counter value from the src slot to the dst slot.
483     * If src_slot is NULL, then fence feedback records a cmd to fill
484     * the dst slot with VK_SUCCESS.
485     */
486    if (src_slot) {
487       assert(src_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE);
488       assert(dst_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE);
489 
490       const VkBufferCopy buffer_copy = {
491          .srcOffset = src_slot->offset,
492          .dstOffset = dst_slot->offset,
493          .size = buf_size,
494       };
495       vn_CmdCopyBuffer(cmd_handle, src_slot->buf_handle, dst_slot->buf_handle,
496                        1, &buffer_copy);
497    } else {
498       assert(dst_slot->type == VN_FEEDBACK_TYPE_FENCE);
499 
500       vn_CmdFillBuffer(cmd_handle, dst_slot->buf_handle, dst_slot->offset,
501                        buf_size, VK_SUCCESS);
502    }
503 
504    vn_feedback_cmd_record_flush_barrier(cmd_handle, dst_slot->buf_handle,
505                                         dst_slot->offset, buf_size);
506 
507    return vn_EndCommandBuffer(cmd_handle);
508 }
509 
510 struct vn_semaphore_feedback_cmd *
vn_semaphore_feedback_cmd_alloc(struct vn_device * dev,struct vn_feedback_slot * dst_slot)511 vn_semaphore_feedback_cmd_alloc(struct vn_device *dev,
512                                 struct vn_feedback_slot *dst_slot)
513 {
514    const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
515    struct vn_semaphore_feedback_cmd *sfb_cmd;
516    VkCommandBuffer *cmd_handles;
517 
518    VK_MULTIALLOC(ma);
519    vk_multialloc_add(&ma, &sfb_cmd, __typeof__(*sfb_cmd), 1);
520    vk_multialloc_add(&ma, &cmd_handles, __typeof__(*cmd_handles),
521                      dev->queue_family_count);
522    if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
523       return NULL;
524 
525    struct vn_feedback_slot *src_slot =
526       vn_feedback_pool_alloc(&dev->feedback_pool, VN_FEEDBACK_TYPE_SEMAPHORE);
527    if (!src_slot) {
528       vk_free(alloc, sfb_cmd);
529       return NULL;
530    }
531 
532    for (uint32_t i = 0; i < dev->queue_family_count; i++) {
533       VkDevice dev_handle = vn_device_to_handle(dev);
534       VkResult result =
535          vn_feedback_cmd_alloc(dev_handle, &dev->fb_cmd_pools[i], dst_slot,
536                                src_slot, &cmd_handles[i]);
537       if (result != VK_SUCCESS) {
538          for (uint32_t j = 0; j < i; j++) {
539             vn_feedback_cmd_free(dev_handle, &dev->fb_cmd_pools[j],
540                                  cmd_handles[j]);
541          }
542 
543          vn_feedback_pool_free(&dev->feedback_pool, src_slot);
544          vk_free(alloc, sfb_cmd);
545          return NULL;
546       }
547    }
548 
549    sfb_cmd->cmd_handles = cmd_handles;
550    sfb_cmd->src_slot = src_slot;
551    return sfb_cmd;
552 }
553 
554 void
vn_semaphore_feedback_cmd_free(struct vn_device * dev,struct vn_semaphore_feedback_cmd * sfb_cmd)555 vn_semaphore_feedback_cmd_free(struct vn_device *dev,
556                                struct vn_semaphore_feedback_cmd *sfb_cmd)
557 {
558    const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
559 
560    for (uint32_t i = 0; i < dev->queue_family_count; i++) {
561       vn_feedback_cmd_free(vn_device_to_handle(dev), &dev->fb_cmd_pools[i],
562                            sfb_cmd->cmd_handles[i]);
563    }
564 
565    vn_feedback_pool_free(&dev->feedback_pool, sfb_cmd->src_slot);
566    vk_free(alloc, sfb_cmd);
567 }
568 
569 static void
vn_feedback_query_cmd_record(VkCommandBuffer cmd_handle,VkQueryPool pool_handle,uint32_t query,uint32_t count,bool copy)570 vn_feedback_query_cmd_record(VkCommandBuffer cmd_handle,
571                              VkQueryPool pool_handle,
572                              uint32_t query,
573                              uint32_t count,
574                              bool copy)
575 {
576    struct vn_query_pool *pool = vn_query_pool_from_handle(pool_handle);
577    if (!pool->fb_buf)
578       return;
579 
580    /* Results are always 64 bit and include availability bit (also 64 bit) */
581    const VkDeviceSize slot_size = (pool->result_array_size * 8) + 8;
582    const VkDeviceSize offset = slot_size * query;
583    const VkDeviceSize buf_size = slot_size * count;
584 
585    /* The first synchronization scope of vkCmdCopyQueryPoolResults does not
586     * include the query feedback buffer. Insert a barrier to ensure ordering
587     * against feedback buffer fill cmd injected in vkCmdResetQueryPool.
588     *
589     * The second synchronization scope of vkCmdResetQueryPool does not include
590     * the query feedback buffer. Insert a barrer to ensure ordering against
591     * prior cmds referencing the queries.
592     *
593     * For srcAccessMask, VK_ACCESS_TRANSFER_WRITE_BIT is sufficient since the
594     * gpu cache invalidation for feedback buffer fill in vkResetQueryPool is
595     * done implicitly via queue submission.
596     */
597    const VkPipelineStageFlags src_stage_mask =
598       copy ? VK_PIPELINE_STAGE_TRANSFER_BIT
599            : VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
600 
601    const VkBufferMemoryBarrier buf_barrier_before = {
602       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
603       .pNext = NULL,
604       .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
605       .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
606       .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
607       .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
608       .buffer = pool->fb_buf->buf_handle,
609       .offset = offset,
610       .size = buf_size,
611    };
612    vn_CmdPipelineBarrier(cmd_handle, src_stage_mask,
613                          VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
614                          &buf_barrier_before, 0, NULL);
615 
616    if (copy) {
617       /* Per spec: "The first synchronization scope includes all commands
618        * which reference the queries in queryPool indicated by query that
619        * occur earlier in submission order. If flags does not include
620        * VK_QUERY_RESULT_WAIT_BIT, vkCmdEndQueryIndexedEXT,
621        * vkCmdWriteTimestamp2, vkCmdEndQuery, and vkCmdWriteTimestamp are
622        * excluded from this scope."
623        *
624        * Set VK_QUERY_RESULT_WAIT_BIT to ensure ordering after
625        * vkCmdEndQuery or vkCmdWriteTimestamp makes the query available.
626        *
627        * Set VK_QUERY_RESULT_64_BIT as we can convert it to 32 bit if app
628        * requested that.
629        *
630        * Per spec: "vkCmdCopyQueryPoolResults is considered to be a transfer
631        * operation, and its writes to buffer memory must be synchronized using
632        * VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT
633        * before using the results."
634        *
635        * So we can reuse the flush barrier after this copy cmd.
636        */
637       vn_CmdCopyQueryPoolResults(cmd_handle, pool_handle, query, count,
638                                  pool->fb_buf->buf_handle, offset, slot_size,
639                                  VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
640                                     VK_QUERY_RESULT_64_BIT |
641                                     VK_QUERY_RESULT_WAIT_BIT);
642    } else {
643       vn_CmdFillBuffer(cmd_handle, pool->fb_buf->buf_handle, offset, buf_size,
644                        0);
645    }
646 
647    vn_feedback_cmd_record_flush_barrier(cmd_handle, pool->fb_buf->buf_handle,
648                                         offset, buf_size);
649 }
650 
651 VkResult
vn_feedback_query_cmd_alloc(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,struct vn_query_feedback_cmd ** out_qfb_cmd)652 vn_feedback_query_cmd_alloc(VkDevice dev_handle,
653                             struct vn_feedback_cmd_pool *fb_cmd_pool,
654                             struct vn_query_feedback_cmd **out_qfb_cmd)
655 {
656    VkCommandPool cmd_pool_handle = fb_cmd_pool->pool_handle;
657    const VkCommandBufferAllocateInfo info = {
658       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
659       .pNext = NULL,
660       .commandPool = cmd_pool_handle,
661       .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
662       .commandBufferCount = 1,
663    };
664    struct vn_command_pool *cmd_pool =
665       vn_command_pool_from_handle(cmd_pool_handle);
666    struct vn_query_feedback_cmd *qfb_cmd = NULL;
667 
668    simple_mtx_lock(&fb_cmd_pool->mutex);
669    if (!list_is_empty(&fb_cmd_pool->free_qfb_cmds)) {
670       qfb_cmd = list_first_entry(&fb_cmd_pool->free_qfb_cmds,
671                                  struct vn_query_feedback_cmd, head);
672       list_del(&qfb_cmd->head);
673    }
674    simple_mtx_unlock(&fb_cmd_pool->mutex);
675 
676    if (!qfb_cmd) {
677       VkCommandBuffer qfb_cmd_handle;
678       VkResult result;
679 
680       qfb_cmd = vk_alloc(&cmd_pool->allocator, sizeof(*qfb_cmd),
681                          VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
682       if (!qfb_cmd)
683          return VK_ERROR_OUT_OF_HOST_MEMORY;
684 
685       simple_mtx_lock(&fb_cmd_pool->mutex);
686       result = vn_AllocateCommandBuffers(dev_handle, &info, &qfb_cmd_handle);
687       simple_mtx_unlock(&fb_cmd_pool->mutex);
688 
689       if (result != VK_SUCCESS) {
690          vk_free(&cmd_pool->allocator, qfb_cmd);
691          return result;
692       }
693 
694       qfb_cmd->fb_cmd_pool = fb_cmd_pool;
695       qfb_cmd->cmd = vn_command_buffer_from_handle(qfb_cmd_handle);
696    }
697 
698    *out_qfb_cmd = qfb_cmd;
699 
700    return VK_SUCCESS;
701 }
702 
703 void
vn_feedback_query_cmd_free(struct vn_query_feedback_cmd * qfb_cmd)704 vn_feedback_query_cmd_free(struct vn_query_feedback_cmd *qfb_cmd)
705 {
706    simple_mtx_lock(&qfb_cmd->fb_cmd_pool->mutex);
707    vn_ResetCommandBuffer(vn_command_buffer_to_handle(qfb_cmd->cmd), 0);
708    list_add(&qfb_cmd->head, &qfb_cmd->fb_cmd_pool->free_qfb_cmds);
709    simple_mtx_unlock(&qfb_cmd->fb_cmd_pool->mutex);
710 }
711 
712 VkResult
vn_feedback_query_batch_record(VkDevice dev_handle,struct vn_query_feedback_cmd * qfb_cmd,struct list_head * combined_query_batches)713 vn_feedback_query_batch_record(VkDevice dev_handle,
714                                struct vn_query_feedback_cmd *qfb_cmd,
715                                struct list_head *combined_query_batches)
716 {
717    static const VkCommandBufferBeginInfo begin_info = {
718       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
719    };
720    VkCommandBuffer qfb_cmd_handle = vn_command_buffer_to_handle(qfb_cmd->cmd);
721    VkResult result;
722 
723    simple_mtx_lock(&qfb_cmd->fb_cmd_pool->mutex);
724 
725    result = vn_BeginCommandBuffer(qfb_cmd_handle, &begin_info);
726    if (result != VK_SUCCESS) {
727       vn_FreeCommandBuffers(dev_handle, qfb_cmd->fb_cmd_pool->pool_handle, 1,
728                             &qfb_cmd_handle);
729       goto out_unlock;
730    }
731 
732    list_for_each_entry_safe(struct vn_feedback_query_batch, batch,
733                             combined_query_batches, head) {
734       vn_feedback_query_cmd_record(
735          qfb_cmd_handle, vn_query_pool_to_handle(batch->query_pool),
736          batch->query, batch->query_count, batch->copy);
737    }
738 
739    result = vn_EndCommandBuffer(qfb_cmd_handle);
740    if (result != VK_SUCCESS) {
741       vn_FreeCommandBuffers(dev_handle, qfb_cmd->fb_cmd_pool->pool_handle, 1,
742                             &qfb_cmd_handle);
743       goto out_unlock;
744    }
745 
746 out_unlock:
747    simple_mtx_unlock(&qfb_cmd->fb_cmd_pool->mutex);
748 
749    return result;
750 }
751 
752 VkResult
vn_feedback_cmd_alloc(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,struct vn_feedback_slot * dst_slot,struct vn_feedback_slot * src_slot,VkCommandBuffer * out_cmd_handle)753 vn_feedback_cmd_alloc(VkDevice dev_handle,
754                       struct vn_feedback_cmd_pool *fb_cmd_pool,
755                       struct vn_feedback_slot *dst_slot,
756                       struct vn_feedback_slot *src_slot,
757                       VkCommandBuffer *out_cmd_handle)
758 {
759    VkCommandPool cmd_pool_handle = fb_cmd_pool->pool_handle;
760    const VkCommandBufferAllocateInfo info = {
761       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
762       .pNext = NULL,
763       .commandPool = cmd_pool_handle,
764       .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
765       .commandBufferCount = 1,
766    };
767    VkCommandBuffer cmd_handle;
768    VkResult result;
769 
770    simple_mtx_lock(&fb_cmd_pool->mutex);
771    result = vn_AllocateCommandBuffers(dev_handle, &info, &cmd_handle);
772    if (result != VK_SUCCESS)
773       goto out_unlock;
774 
775    result = vn_feedback_cmd_record(cmd_handle, dst_slot, src_slot);
776    if (result != VK_SUCCESS) {
777       vn_FreeCommandBuffers(dev_handle, cmd_pool_handle, 1, &cmd_handle);
778       goto out_unlock;
779    }
780 
781    *out_cmd_handle = cmd_handle;
782 
783 out_unlock:
784    simple_mtx_unlock(&fb_cmd_pool->mutex);
785 
786    return result;
787 }
788 
789 void
vn_feedback_cmd_free(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,VkCommandBuffer cmd_handle)790 vn_feedback_cmd_free(VkDevice dev_handle,
791                      struct vn_feedback_cmd_pool *fb_cmd_pool,
792                      VkCommandBuffer cmd_handle)
793 {
794    simple_mtx_lock(&fb_cmd_pool->mutex);
795    vn_FreeCommandBuffers(dev_handle, fb_cmd_pool->pool_handle, 1,
796                          &cmd_handle);
797    simple_mtx_unlock(&fb_cmd_pool->mutex);
798 }
799 
800 VkResult
vn_feedback_cmd_pools_init(struct vn_device * dev)801 vn_feedback_cmd_pools_init(struct vn_device *dev)
802 {
803    const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
804    VkDevice dev_handle = vn_device_to_handle(dev);
805    struct vn_feedback_cmd_pool *fb_cmd_pools;
806    VkCommandPoolCreateInfo info = {
807       .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
808       .pNext = NULL,
809       .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
810    };
811 
812    if (VN_PERF(NO_FENCE_FEEDBACK) && VN_PERF(NO_SEMAPHORE_FEEDBACK) &&
813        VN_PERF(NO_QUERY_FEEDBACK))
814       return VK_SUCCESS;
815 
816    assert(dev->queue_family_count);
817 
818    fb_cmd_pools =
819       vk_zalloc(alloc, sizeof(*fb_cmd_pools) * dev->queue_family_count,
820                 VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
821    if (!fb_cmd_pools)
822       return VK_ERROR_OUT_OF_HOST_MEMORY;
823 
824    for (uint32_t i = 0; i < dev->queue_family_count; i++) {
825       VkResult result;
826 
827       info.queueFamilyIndex = dev->queue_families[i];
828       result = vn_CreateCommandPool(dev_handle, &info, alloc,
829                                     &fb_cmd_pools[i].pool_handle);
830       if (result != VK_SUCCESS) {
831          for (uint32_t j = 0; j < i; j++) {
832             vn_DestroyCommandPool(dev_handle, fb_cmd_pools[j].pool_handle,
833                                   alloc);
834             simple_mtx_destroy(&fb_cmd_pools[j].mutex);
835          }
836 
837          vk_free(alloc, fb_cmd_pools);
838          return result;
839       }
840 
841       simple_mtx_init(&fb_cmd_pools[i].mutex, mtx_plain);
842       list_inithead(&fb_cmd_pools[i].free_qfb_cmds);
843    }
844 
845    dev->fb_cmd_pools = fb_cmd_pools;
846 
847    return VK_SUCCESS;
848 }
849 
850 void
vn_feedback_cmd_pools_fini(struct vn_device * dev)851 vn_feedback_cmd_pools_fini(struct vn_device *dev)
852 {
853    const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
854    VkDevice dev_handle = vn_device_to_handle(dev);
855 
856    if (!dev->fb_cmd_pools)
857       return;
858 
859    for (uint32_t i = 0; i < dev->queue_family_count; i++) {
860       list_for_each_entry_safe(struct vn_query_feedback_cmd, feedback_cmd,
861                                &dev->fb_cmd_pools[i].free_qfb_cmds, head)
862          vk_free(alloc, feedback_cmd);
863 
864       vn_DestroyCommandPool(dev_handle, dev->fb_cmd_pools[i].pool_handle,
865                             alloc);
866       simple_mtx_destroy(&dev->fb_cmd_pools[i].mutex);
867    }
868 
869    vk_free(alloc, dev->fb_cmd_pools);
870 }
871