• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "vn_feedback.h"
7 
8 #include "vn_device.h"
9 #include "vn_physical_device.h"
10 #include "vn_queue.h"
11 
12 /* coherent buffer with bound and mapped memory */
13 struct vn_feedback_buffer {
14    VkBuffer buffer;
15    VkDeviceMemory memory;
16    void *data;
17 
18    struct list_head head;
19 };
20 
21 static uint32_t
vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties * mem_props,uint32_t mem_type_bits,VkMemoryPropertyFlags required_mem_flags)22 vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties *mem_props,
23                          uint32_t mem_type_bits,
24                          VkMemoryPropertyFlags required_mem_flags)
25 {
26    u_foreach_bit(mem_type_index, mem_type_bits)
27    {
28       assert(mem_type_index < mem_props->memoryTypeCount);
29       if ((mem_props->memoryTypes[mem_type_index].propertyFlags &
30            required_mem_flags) == required_mem_flags)
31          return mem_type_index;
32    }
33 
34    return UINT32_MAX;
35 }
36 
37 static VkResult
vn_feedback_buffer_create(struct vn_device * dev,uint32_t size,const VkAllocationCallbacks * alloc,struct vn_feedback_buffer ** out_feedback_buf)38 vn_feedback_buffer_create(struct vn_device *dev,
39                           uint32_t size,
40                           const VkAllocationCallbacks *alloc,
41                           struct vn_feedback_buffer **out_feedback_buf)
42 {
43    const bool exclusive = dev->queue_family_count == 1;
44    const VkPhysicalDeviceMemoryProperties *mem_props =
45       &dev->physical_device->memory_properties.memoryProperties;
46    VkDevice dev_handle = vn_device_to_handle(dev);
47    struct vn_feedback_buffer *feedback_buf;
48    VkResult result;
49 
50    feedback_buf = vk_zalloc(alloc, sizeof(*feedback_buf), VN_DEFAULT_ALIGN,
51                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
52    if (!feedback_buf)
53       return VK_ERROR_OUT_OF_HOST_MEMORY;
54 
55    /* use concurrent to avoid explicit queue family ownership transfer for
56     * device created with queues from multiple queue families
57     */
58    const VkBufferCreateInfo buf_create_info = {
59       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
60       .size = size,
61       .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
62       .sharingMode =
63          exclusive ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT,
64       /* below favors the current venus protocol */
65       .queueFamilyIndexCount = exclusive ? 0 : dev->queue_family_count,
66       .pQueueFamilyIndices = exclusive ? NULL : dev->queue_families,
67    };
68    result = vn_CreateBuffer(dev_handle, &buf_create_info, alloc,
69                             &feedback_buf->buffer);
70    if (result != VK_SUCCESS)
71       goto out_free_feedback_buf;
72 
73    struct vn_buffer *buf = vn_buffer_from_handle(feedback_buf->buffer);
74    const VkMemoryRequirements *mem_req =
75       &buf->requirements.memory.memoryRequirements;
76    const uint32_t mem_type_index =
77       vn_get_memory_type_index(mem_props, mem_req->memoryTypeBits,
78                                VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
79    if (mem_type_index >= mem_props->memoryTypeCount) {
80       result = VK_ERROR_INITIALIZATION_FAILED;
81       goto out_destroy_buffer;
82    }
83 
84    const VkMemoryAllocateInfo mem_alloc_info = {
85       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
86       .allocationSize = mem_req->size,
87       .memoryTypeIndex = mem_type_index,
88    };
89    result = vn_AllocateMemory(dev_handle, &mem_alloc_info, alloc,
90                               &feedback_buf->memory);
91    if (result != VK_SUCCESS)
92       goto out_destroy_buffer;
93 
94    const VkBindBufferMemoryInfo bind_info = {
95       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
96       .buffer = feedback_buf->buffer,
97       .memory = feedback_buf->memory,
98       .memoryOffset = 0,
99    };
100    result = vn_BindBufferMemory2(dev_handle, 1, &bind_info);
101    if (result != VK_SUCCESS)
102       goto out_free_memory;
103 
104    result = vn_MapMemory(dev_handle, feedback_buf->memory, 0, VK_WHOLE_SIZE,
105                          0, &feedback_buf->data);
106    if (result != VK_SUCCESS)
107       goto out_free_memory;
108 
109    *out_feedback_buf = feedback_buf;
110 
111    return VK_SUCCESS;
112 
113 out_free_memory:
114    vn_FreeMemory(dev_handle, feedback_buf->memory, alloc);
115 
116 out_destroy_buffer:
117    vn_DestroyBuffer(dev_handle, feedback_buf->buffer, alloc);
118 
119 out_free_feedback_buf:
120    vk_free(alloc, feedback_buf);
121 
122    return result;
123 }
124 
125 static void
vn_feedback_buffer_destroy(struct vn_device * dev,struct vn_feedback_buffer * feedback_buf,const VkAllocationCallbacks * alloc)126 vn_feedback_buffer_destroy(struct vn_device *dev,
127                            struct vn_feedback_buffer *feedback_buf,
128                            const VkAllocationCallbacks *alloc)
129 {
130    VkDevice dev_handle = vn_device_to_handle(dev);
131 
132    vn_UnmapMemory(dev_handle, feedback_buf->memory);
133    vn_FreeMemory(dev_handle, feedback_buf->memory, alloc);
134    vn_DestroyBuffer(dev_handle, feedback_buf->buffer, alloc);
135    vk_free(alloc, feedback_buf);
136 }
137 
138 static VkResult
vn_feedback_pool_grow_locked(struct vn_feedback_pool * pool)139 vn_feedback_pool_grow_locked(struct vn_feedback_pool *pool)
140 {
141    VN_TRACE_FUNC();
142    struct vn_feedback_buffer *feedback_buf = NULL;
143    VkResult result;
144 
145    result = vn_feedback_buffer_create(pool->device, pool->size, pool->alloc,
146                                       &feedback_buf);
147    if (result != VK_SUCCESS)
148       return result;
149 
150    pool->used = 0;
151 
152    list_add(&feedback_buf->head, &pool->feedback_buffers);
153 
154    return VK_SUCCESS;
155 }
156 
157 VkResult
vn_feedback_pool_init(struct vn_device * dev,struct vn_feedback_pool * pool,uint32_t size,const VkAllocationCallbacks * alloc)158 vn_feedback_pool_init(struct vn_device *dev,
159                       struct vn_feedback_pool *pool,
160                       uint32_t size,
161                       const VkAllocationCallbacks *alloc)
162 {
163    simple_mtx_init(&pool->mutex, mtx_plain);
164 
165    pool->device = dev;
166    pool->alloc = alloc;
167    pool->size = size;
168    pool->used = size;
169    list_inithead(&pool->feedback_buffers);
170    list_inithead(&pool->free_slots);
171 
172    /* no lock needed upon init */
173    return vn_feedback_pool_grow_locked(pool);
174 }
175 
176 void
vn_feedback_pool_fini(struct vn_feedback_pool * pool)177 vn_feedback_pool_fini(struct vn_feedback_pool *pool)
178 {
179    list_for_each_entry_safe(struct vn_feedback_slot, slot, &pool->free_slots,
180                             head)
181       vk_free(pool->alloc, slot);
182 
183    list_for_each_entry_safe(struct vn_feedback_buffer, feedback_buf,
184                             &pool->feedback_buffers, head)
185       vn_feedback_buffer_destroy(pool->device, feedback_buf, pool->alloc);
186 
187    simple_mtx_destroy(&pool->mutex);
188 }
189 
190 static struct vn_feedback_buffer *
vn_feedback_pool_alloc_locked(struct vn_feedback_pool * pool,uint32_t size,uint32_t * out_offset)191 vn_feedback_pool_alloc_locked(struct vn_feedback_pool *pool,
192                               uint32_t size,
193                               uint32_t *out_offset)
194 {
195    VN_TRACE_FUNC();
196    const uint32_t aligned_size = align(size, 4);
197 
198    if (unlikely(aligned_size > pool->size - pool->used)) {
199       VkResult result = vn_feedback_pool_grow_locked(pool);
200       if (result != VK_SUCCESS)
201          return NULL;
202 
203       assert(aligned_size <= pool->size - pool->used);
204    }
205 
206    *out_offset = pool->used;
207    pool->used += aligned_size;
208 
209    return list_first_entry(&pool->feedback_buffers, struct vn_feedback_buffer,
210                            head);
211 }
212 
213 struct vn_feedback_slot *
vn_feedback_pool_alloc(struct vn_feedback_pool * pool,enum vn_feedback_type type)214 vn_feedback_pool_alloc(struct vn_feedback_pool *pool,
215                        enum vn_feedback_type type)
216 {
217    /* TODO Make slot size variable for VkQueryPool feedback. Currently it's
218     * MAX2(sizeof(VkResult), sizeof(uint64_t)).
219     */
220    static const uint32_t slot_size = 8;
221    struct vn_feedback_buffer *feedback_buf;
222    uint32_t offset;
223    struct vn_feedback_slot *slot;
224 
225    simple_mtx_lock(&pool->mutex);
226    if (!list_is_empty(&pool->free_slots)) {
227       slot =
228          list_first_entry(&pool->free_slots, struct vn_feedback_slot, head);
229       list_del(&slot->head);
230       simple_mtx_unlock(&pool->mutex);
231 
232       slot->type = type;
233       return slot;
234    }
235 
236    slot = vk_alloc(pool->alloc, sizeof(*slot), VN_DEFAULT_ALIGN,
237                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
238    if (!slot) {
239       simple_mtx_unlock(&pool->mutex);
240       return NULL;
241    }
242 
243    feedback_buf = vn_feedback_pool_alloc_locked(pool, slot_size, &offset);
244    simple_mtx_unlock(&pool->mutex);
245 
246    if (!feedback_buf) {
247       vk_free(pool->alloc, slot);
248       return NULL;
249    }
250 
251    slot->type = type;
252    slot->offset = offset;
253    slot->buffer = feedback_buf->buffer;
254    slot->data = feedback_buf->data + offset;
255 
256    return slot;
257 }
258 
259 void
vn_feedback_pool_free(struct vn_feedback_pool * pool,struct vn_feedback_slot * slot)260 vn_feedback_pool_free(struct vn_feedback_pool *pool,
261                       struct vn_feedback_slot *slot)
262 {
263    simple_mtx_lock(&pool->mutex);
264    list_add(&slot->head, &pool->free_slots);
265    simple_mtx_unlock(&pool->mutex);
266 }
267 
268 void
vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,VkEvent ev_handle,VkPipelineStageFlags stage_mask,VkResult status)269 vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,
270                              VkEvent ev_handle,
271                              VkPipelineStageFlags stage_mask,
272                              VkResult status)
273 {
274    /* For vkCmdSetEvent and vkCmdResetEvent feedback interception.
275     *
276     * The injection point is after the event call to avoid introducing
277     * unexpected src stage waiting for VK_PIPELINE_STAGE_HOST_BIT and
278     * VK_PIPELINE_STAGE_TRANSFER_BIT if they are not already being waited by
279     * vkCmdSetEvent or vkCmdResetEvent. On the other hand, the delay in the
280     * feedback signal is acceptable for the nature of VkEvent, and the event
281     * feedback cmds lifecycle is guarded by the intercepted command buffer.
282     */
283    struct vn_event *ev = vn_event_from_handle(ev_handle);
284    struct vn_feedback_slot *slot = ev->feedback_slot;
285 
286    if (!slot)
287       return;
288 
289    STATIC_ASSERT(sizeof(*slot->status) == 4);
290 
291    const VkBufferMemoryBarrier buf_barrier_before = {
292       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
293       .pNext = NULL,
294       .srcAccessMask =
295          VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
296       .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
297       .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
298       .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
299       .buffer = slot->buffer,
300       .offset = slot->offset,
301       .size = 4,
302    };
303    vn_CmdPipelineBarrier(cmd_handle,
304                          stage_mask | VK_PIPELINE_STAGE_HOST_BIT |
305                             VK_PIPELINE_STAGE_TRANSFER_BIT,
306                          VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
307                          &buf_barrier_before, 0, NULL);
308    vn_CmdFillBuffer(cmd_handle, slot->buffer, slot->offset, 4, status);
309 
310    const VkBufferMemoryBarrier buf_barrier_after = {
311       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
312       .pNext = NULL,
313       .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
314       .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
315       .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
316       .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
317       .buffer = slot->buffer,
318       .offset = slot->offset,
319       .size = 4,
320    };
321    vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
322                          VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
323                          &buf_barrier_after, 0, NULL);
324 }
325 
326 static VkResult
vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,struct vn_feedback_slot * slot)327 vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,
328                              struct vn_feedback_slot *slot)
329 
330 {
331    STATIC_ASSERT(sizeof(*slot->status) == 4);
332 
333    static const VkCommandBufferBeginInfo begin_info = {
334       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
335       .pNext = NULL,
336       .flags = 0,
337       .pInheritanceInfo = NULL,
338    };
339    VkResult result = vn_BeginCommandBuffer(cmd_handle, &begin_info);
340    if (result != VK_SUCCESS)
341       return result;
342 
343    static const VkMemoryBarrier mem_barrier_before = {
344       .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
345       .pNext = NULL,
346       /* make pending writes available to stay close to fence signal op */
347       .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
348       /* no need to make all memory visible for feedback update */
349       .dstAccessMask = 0,
350    };
351    const VkBufferMemoryBarrier buf_barrier_before = {
352       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
353       .pNext = NULL,
354       /* slot memory has been made available via mem_barrier_before */
355       .srcAccessMask = 0,
356       .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
357       .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
358       .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
359       .buffer = slot->buffer,
360       .offset = slot->offset,
361       .size = 4,
362    };
363    vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
364                          VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1,
365                          &mem_barrier_before, 1, &buf_barrier_before, 0,
366                          NULL);
367    vn_CmdFillBuffer(cmd_handle, slot->buffer, slot->offset, 4, VK_SUCCESS);
368 
369    const VkBufferMemoryBarrier buf_barrier_after = {
370       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
371       .pNext = NULL,
372       .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
373       .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
374       .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
375       .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
376       .buffer = slot->buffer,
377       .offset = slot->offset,
378       .size = 4,
379    };
380    vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
381                          VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
382                          &buf_barrier_after, 0, NULL);
383 
384    return vn_EndCommandBuffer(cmd_handle);
385 }
386 
387 VkResult
vn_feedback_fence_cmd_alloc(VkDevice dev_handle,struct vn_feedback_cmd_pool * pool,struct vn_feedback_slot * slot,VkCommandBuffer * out_cmd_handle)388 vn_feedback_fence_cmd_alloc(VkDevice dev_handle,
389                             struct vn_feedback_cmd_pool *pool,
390                             struct vn_feedback_slot *slot,
391                             VkCommandBuffer *out_cmd_handle)
392 {
393    const VkCommandBufferAllocateInfo info = {
394       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
395       .pNext = NULL,
396       .commandPool = pool->pool,
397       .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
398       .commandBufferCount = 1,
399    };
400    VkCommandBuffer cmd_handle;
401    VkResult result;
402 
403    simple_mtx_lock(&pool->mutex);
404    result = vn_AllocateCommandBuffers(dev_handle, &info, &cmd_handle);
405    if (result != VK_SUCCESS)
406       goto out_unlock;
407 
408    result = vn_feedback_fence_cmd_record(cmd_handle, slot);
409    if (result != VK_SUCCESS) {
410       vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle);
411       goto out_unlock;
412    }
413 
414    *out_cmd_handle = cmd_handle;
415 
416 out_unlock:
417    simple_mtx_unlock(&pool->mutex);
418 
419    return result;
420 }
421 
422 void
vn_feedback_fence_cmd_free(VkDevice dev_handle,struct vn_feedback_cmd_pool * pool,VkCommandBuffer cmd_handle)423 vn_feedback_fence_cmd_free(VkDevice dev_handle,
424                            struct vn_feedback_cmd_pool *pool,
425                            VkCommandBuffer cmd_handle)
426 {
427    simple_mtx_lock(&pool->mutex);
428    vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle);
429    simple_mtx_unlock(&pool->mutex);
430 }
431 
432 VkResult
vn_feedback_cmd_pools_init(struct vn_device * dev)433 vn_feedback_cmd_pools_init(struct vn_device *dev)
434 {
435    const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
436    VkDevice dev_handle = vn_device_to_handle(dev);
437    struct vn_feedback_cmd_pool *pools;
438    VkCommandPoolCreateInfo info = {
439       .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
440       .pNext = NULL,
441       .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
442    };
443 
444    /* TODO will also condition on timeline semaphore feedback */
445    if (VN_PERF(NO_FENCE_FEEDBACK))
446       return VK_SUCCESS;
447 
448    assert(dev->queue_family_count);
449 
450    pools = vk_zalloc(alloc, sizeof(*pools) * dev->queue_family_count,
451                      VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
452    if (!pools)
453       return VK_ERROR_OUT_OF_HOST_MEMORY;
454 
455    for (uint32_t i = 0; i < dev->queue_family_count; i++) {
456       VkResult result;
457 
458       info.queueFamilyIndex = dev->queue_families[i];
459       result = vn_CreateCommandPool(dev_handle, &info, alloc, &pools[i].pool);
460       if (result != VK_SUCCESS) {
461          for (uint32_t j = 0; j < i; j++) {
462             vn_DestroyCommandPool(dev_handle, pools[j].pool, alloc);
463             simple_mtx_destroy(&pools[j].mutex);
464          }
465 
466          vk_free(alloc, pools);
467          return result;
468       }
469 
470       simple_mtx_init(&pools[i].mutex, mtx_plain);
471    }
472 
473    dev->cmd_pools = pools;
474 
475    return VK_SUCCESS;
476 }
477 
478 void
vn_feedback_cmd_pools_fini(struct vn_device * dev)479 vn_feedback_cmd_pools_fini(struct vn_device *dev)
480 {
481    const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
482    VkDevice dev_handle = vn_device_to_handle(dev);
483 
484    if (!dev->cmd_pools)
485       return;
486 
487    for (uint32_t i = 0; i < dev->queue_family_count; i++) {
488       vn_DestroyCommandPool(dev_handle, dev->cmd_pools[i].pool, alloc);
489       simple_mtx_destroy(&dev->cmd_pools[i].mutex);
490    }
491 
492    vk_free(alloc, dev->cmd_pools);
493 }
494