1 /*
2 * Copyright 2022 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "vn_feedback.h"
7
8 #include "vn_device.h"
9 #include "vn_physical_device.h"
10 #include "vn_queue.h"
11
12 /* coherent buffer with bound and mapped memory */
13 struct vn_feedback_buffer {
14 VkBuffer buffer;
15 VkDeviceMemory memory;
16 void *data;
17
18 struct list_head head;
19 };
20
21 static uint32_t
vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties * mem_props,uint32_t mem_type_bits,VkMemoryPropertyFlags required_mem_flags)22 vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties *mem_props,
23 uint32_t mem_type_bits,
24 VkMemoryPropertyFlags required_mem_flags)
25 {
26 u_foreach_bit(mem_type_index, mem_type_bits)
27 {
28 assert(mem_type_index < mem_props->memoryTypeCount);
29 if ((mem_props->memoryTypes[mem_type_index].propertyFlags &
30 required_mem_flags) == required_mem_flags)
31 return mem_type_index;
32 }
33
34 return UINT32_MAX;
35 }
36
37 static VkResult
vn_feedback_buffer_create(struct vn_device * dev,uint32_t size,const VkAllocationCallbacks * alloc,struct vn_feedback_buffer ** out_feedback_buf)38 vn_feedback_buffer_create(struct vn_device *dev,
39 uint32_t size,
40 const VkAllocationCallbacks *alloc,
41 struct vn_feedback_buffer **out_feedback_buf)
42 {
43 const bool exclusive = dev->queue_family_count == 1;
44 const VkPhysicalDeviceMemoryProperties *mem_props =
45 &dev->physical_device->memory_properties.memoryProperties;
46 VkDevice dev_handle = vn_device_to_handle(dev);
47 struct vn_feedback_buffer *feedback_buf;
48 VkResult result;
49
50 feedback_buf = vk_zalloc(alloc, sizeof(*feedback_buf), VN_DEFAULT_ALIGN,
51 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
52 if (!feedback_buf)
53 return VK_ERROR_OUT_OF_HOST_MEMORY;
54
55 /* use concurrent to avoid explicit queue family ownership transfer for
56 * device created with queues from multiple queue families
57 */
58 const VkBufferCreateInfo buf_create_info = {
59 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
60 .size = size,
61 .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
62 .sharingMode =
63 exclusive ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT,
64 /* below favors the current venus protocol */
65 .queueFamilyIndexCount = exclusive ? 0 : dev->queue_family_count,
66 .pQueueFamilyIndices = exclusive ? NULL : dev->queue_families,
67 };
68 result = vn_CreateBuffer(dev_handle, &buf_create_info, alloc,
69 &feedback_buf->buffer);
70 if (result != VK_SUCCESS)
71 goto out_free_feedback_buf;
72
73 struct vn_buffer *buf = vn_buffer_from_handle(feedback_buf->buffer);
74 const VkMemoryRequirements *mem_req =
75 &buf->requirements.memory.memoryRequirements;
76 const uint32_t mem_type_index =
77 vn_get_memory_type_index(mem_props, mem_req->memoryTypeBits,
78 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
79 if (mem_type_index >= mem_props->memoryTypeCount) {
80 result = VK_ERROR_INITIALIZATION_FAILED;
81 goto out_destroy_buffer;
82 }
83
84 const VkMemoryAllocateInfo mem_alloc_info = {
85 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
86 .allocationSize = mem_req->size,
87 .memoryTypeIndex = mem_type_index,
88 };
89 result = vn_AllocateMemory(dev_handle, &mem_alloc_info, alloc,
90 &feedback_buf->memory);
91 if (result != VK_SUCCESS)
92 goto out_destroy_buffer;
93
94 const VkBindBufferMemoryInfo bind_info = {
95 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
96 .buffer = feedback_buf->buffer,
97 .memory = feedback_buf->memory,
98 .memoryOffset = 0,
99 };
100 result = vn_BindBufferMemory2(dev_handle, 1, &bind_info);
101 if (result != VK_SUCCESS)
102 goto out_free_memory;
103
104 result = vn_MapMemory(dev_handle, feedback_buf->memory, 0, VK_WHOLE_SIZE,
105 0, &feedback_buf->data);
106 if (result != VK_SUCCESS)
107 goto out_free_memory;
108
109 *out_feedback_buf = feedback_buf;
110
111 return VK_SUCCESS;
112
113 out_free_memory:
114 vn_FreeMemory(dev_handle, feedback_buf->memory, alloc);
115
116 out_destroy_buffer:
117 vn_DestroyBuffer(dev_handle, feedback_buf->buffer, alloc);
118
119 out_free_feedback_buf:
120 vk_free(alloc, feedback_buf);
121
122 return result;
123 }
124
125 static void
vn_feedback_buffer_destroy(struct vn_device * dev,struct vn_feedback_buffer * feedback_buf,const VkAllocationCallbacks * alloc)126 vn_feedback_buffer_destroy(struct vn_device *dev,
127 struct vn_feedback_buffer *feedback_buf,
128 const VkAllocationCallbacks *alloc)
129 {
130 VkDevice dev_handle = vn_device_to_handle(dev);
131
132 vn_UnmapMemory(dev_handle, feedback_buf->memory);
133 vn_FreeMemory(dev_handle, feedback_buf->memory, alloc);
134 vn_DestroyBuffer(dev_handle, feedback_buf->buffer, alloc);
135 vk_free(alloc, feedback_buf);
136 }
137
138 static VkResult
vn_feedback_pool_grow_locked(struct vn_feedback_pool * pool)139 vn_feedback_pool_grow_locked(struct vn_feedback_pool *pool)
140 {
141 VN_TRACE_FUNC();
142 struct vn_feedback_buffer *feedback_buf = NULL;
143 VkResult result;
144
145 result = vn_feedback_buffer_create(pool->device, pool->size, pool->alloc,
146 &feedback_buf);
147 if (result != VK_SUCCESS)
148 return result;
149
150 pool->used = 0;
151
152 list_add(&feedback_buf->head, &pool->feedback_buffers);
153
154 return VK_SUCCESS;
155 }
156
157 VkResult
vn_feedback_pool_init(struct vn_device * dev,struct vn_feedback_pool * pool,uint32_t size,const VkAllocationCallbacks * alloc)158 vn_feedback_pool_init(struct vn_device *dev,
159 struct vn_feedback_pool *pool,
160 uint32_t size,
161 const VkAllocationCallbacks *alloc)
162 {
163 simple_mtx_init(&pool->mutex, mtx_plain);
164
165 pool->device = dev;
166 pool->alloc = alloc;
167 pool->size = size;
168 pool->used = size;
169 list_inithead(&pool->feedback_buffers);
170 list_inithead(&pool->free_slots);
171
172 /* no lock needed upon init */
173 return vn_feedback_pool_grow_locked(pool);
174 }
175
176 void
vn_feedback_pool_fini(struct vn_feedback_pool * pool)177 vn_feedback_pool_fini(struct vn_feedback_pool *pool)
178 {
179 list_for_each_entry_safe(struct vn_feedback_slot, slot, &pool->free_slots,
180 head)
181 vk_free(pool->alloc, slot);
182
183 list_for_each_entry_safe(struct vn_feedback_buffer, feedback_buf,
184 &pool->feedback_buffers, head)
185 vn_feedback_buffer_destroy(pool->device, feedback_buf, pool->alloc);
186
187 simple_mtx_destroy(&pool->mutex);
188 }
189
190 static struct vn_feedback_buffer *
vn_feedback_pool_alloc_locked(struct vn_feedback_pool * pool,uint32_t size,uint32_t * out_offset)191 vn_feedback_pool_alloc_locked(struct vn_feedback_pool *pool,
192 uint32_t size,
193 uint32_t *out_offset)
194 {
195 VN_TRACE_FUNC();
196 const uint32_t aligned_size = align(size, 4);
197
198 if (unlikely(aligned_size > pool->size - pool->used)) {
199 VkResult result = vn_feedback_pool_grow_locked(pool);
200 if (result != VK_SUCCESS)
201 return NULL;
202
203 assert(aligned_size <= pool->size - pool->used);
204 }
205
206 *out_offset = pool->used;
207 pool->used += aligned_size;
208
209 return list_first_entry(&pool->feedback_buffers, struct vn_feedback_buffer,
210 head);
211 }
212
213 struct vn_feedback_slot *
vn_feedback_pool_alloc(struct vn_feedback_pool * pool,enum vn_feedback_type type)214 vn_feedback_pool_alloc(struct vn_feedback_pool *pool,
215 enum vn_feedback_type type)
216 {
217 /* TODO Make slot size variable for VkQueryPool feedback. Currently it's
218 * MAX2(sizeof(VkResult), sizeof(uint64_t)).
219 */
220 static const uint32_t slot_size = 8;
221 struct vn_feedback_buffer *feedback_buf;
222 uint32_t offset;
223 struct vn_feedback_slot *slot;
224
225 simple_mtx_lock(&pool->mutex);
226 if (!list_is_empty(&pool->free_slots)) {
227 slot =
228 list_first_entry(&pool->free_slots, struct vn_feedback_slot, head);
229 list_del(&slot->head);
230 simple_mtx_unlock(&pool->mutex);
231
232 slot->type = type;
233 return slot;
234 }
235
236 slot = vk_alloc(pool->alloc, sizeof(*slot), VN_DEFAULT_ALIGN,
237 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
238 if (!slot) {
239 simple_mtx_unlock(&pool->mutex);
240 return NULL;
241 }
242
243 feedback_buf = vn_feedback_pool_alloc_locked(pool, slot_size, &offset);
244 simple_mtx_unlock(&pool->mutex);
245
246 if (!feedback_buf) {
247 vk_free(pool->alloc, slot);
248 return NULL;
249 }
250
251 slot->type = type;
252 slot->offset = offset;
253 slot->buffer = feedback_buf->buffer;
254 slot->data = feedback_buf->data + offset;
255
256 return slot;
257 }
258
259 void
vn_feedback_pool_free(struct vn_feedback_pool * pool,struct vn_feedback_slot * slot)260 vn_feedback_pool_free(struct vn_feedback_pool *pool,
261 struct vn_feedback_slot *slot)
262 {
263 simple_mtx_lock(&pool->mutex);
264 list_add(&slot->head, &pool->free_slots);
265 simple_mtx_unlock(&pool->mutex);
266 }
267
268 void
vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,VkEvent ev_handle,VkPipelineStageFlags stage_mask,VkResult status)269 vn_feedback_event_cmd_record(VkCommandBuffer cmd_handle,
270 VkEvent ev_handle,
271 VkPipelineStageFlags stage_mask,
272 VkResult status)
273 {
274 /* For vkCmdSetEvent and vkCmdResetEvent feedback interception.
275 *
276 * The injection point is after the event call to avoid introducing
277 * unexpected src stage waiting for VK_PIPELINE_STAGE_HOST_BIT and
278 * VK_PIPELINE_STAGE_TRANSFER_BIT if they are not already being waited by
279 * vkCmdSetEvent or vkCmdResetEvent. On the other hand, the delay in the
280 * feedback signal is acceptable for the nature of VkEvent, and the event
281 * feedback cmds lifecycle is guarded by the intercepted command buffer.
282 */
283 struct vn_event *ev = vn_event_from_handle(ev_handle);
284 struct vn_feedback_slot *slot = ev->feedback_slot;
285
286 if (!slot)
287 return;
288
289 STATIC_ASSERT(sizeof(*slot->status) == 4);
290
291 const VkBufferMemoryBarrier buf_barrier_before = {
292 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
293 .pNext = NULL,
294 .srcAccessMask =
295 VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
296 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
297 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
298 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
299 .buffer = slot->buffer,
300 .offset = slot->offset,
301 .size = 4,
302 };
303 vn_CmdPipelineBarrier(cmd_handle,
304 stage_mask | VK_PIPELINE_STAGE_HOST_BIT |
305 VK_PIPELINE_STAGE_TRANSFER_BIT,
306 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
307 &buf_barrier_before, 0, NULL);
308 vn_CmdFillBuffer(cmd_handle, slot->buffer, slot->offset, 4, status);
309
310 const VkBufferMemoryBarrier buf_barrier_after = {
311 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
312 .pNext = NULL,
313 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
314 .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
315 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
316 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
317 .buffer = slot->buffer,
318 .offset = slot->offset,
319 .size = 4,
320 };
321 vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
322 VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
323 &buf_barrier_after, 0, NULL);
324 }
325
326 static VkResult
vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,struct vn_feedback_slot * slot)327 vn_feedback_fence_cmd_record(VkCommandBuffer cmd_handle,
328 struct vn_feedback_slot *slot)
329
330 {
331 STATIC_ASSERT(sizeof(*slot->status) == 4);
332
333 static const VkCommandBufferBeginInfo begin_info = {
334 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
335 .pNext = NULL,
336 .flags = 0,
337 .pInheritanceInfo = NULL,
338 };
339 VkResult result = vn_BeginCommandBuffer(cmd_handle, &begin_info);
340 if (result != VK_SUCCESS)
341 return result;
342
343 static const VkMemoryBarrier mem_barrier_before = {
344 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
345 .pNext = NULL,
346 /* make pending writes available to stay close to fence signal op */
347 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
348 /* no need to make all memory visible for feedback update */
349 .dstAccessMask = 0,
350 };
351 const VkBufferMemoryBarrier buf_barrier_before = {
352 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
353 .pNext = NULL,
354 /* slot memory has been made available via mem_barrier_before */
355 .srcAccessMask = 0,
356 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
357 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
358 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
359 .buffer = slot->buffer,
360 .offset = slot->offset,
361 .size = 4,
362 };
363 vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
364 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1,
365 &mem_barrier_before, 1, &buf_barrier_before, 0,
366 NULL);
367 vn_CmdFillBuffer(cmd_handle, slot->buffer, slot->offset, 4, VK_SUCCESS);
368
369 const VkBufferMemoryBarrier buf_barrier_after = {
370 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
371 .pNext = NULL,
372 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
373 .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
374 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
375 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
376 .buffer = slot->buffer,
377 .offset = slot->offset,
378 .size = 4,
379 };
380 vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
381 VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
382 &buf_barrier_after, 0, NULL);
383
384 return vn_EndCommandBuffer(cmd_handle);
385 }
386
387 VkResult
vn_feedback_fence_cmd_alloc(VkDevice dev_handle,struct vn_feedback_cmd_pool * pool,struct vn_feedback_slot * slot,VkCommandBuffer * out_cmd_handle)388 vn_feedback_fence_cmd_alloc(VkDevice dev_handle,
389 struct vn_feedback_cmd_pool *pool,
390 struct vn_feedback_slot *slot,
391 VkCommandBuffer *out_cmd_handle)
392 {
393 const VkCommandBufferAllocateInfo info = {
394 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
395 .pNext = NULL,
396 .commandPool = pool->pool,
397 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
398 .commandBufferCount = 1,
399 };
400 VkCommandBuffer cmd_handle;
401 VkResult result;
402
403 simple_mtx_lock(&pool->mutex);
404 result = vn_AllocateCommandBuffers(dev_handle, &info, &cmd_handle);
405 if (result != VK_SUCCESS)
406 goto out_unlock;
407
408 result = vn_feedback_fence_cmd_record(cmd_handle, slot);
409 if (result != VK_SUCCESS) {
410 vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle);
411 goto out_unlock;
412 }
413
414 *out_cmd_handle = cmd_handle;
415
416 out_unlock:
417 simple_mtx_unlock(&pool->mutex);
418
419 return result;
420 }
421
422 void
vn_feedback_fence_cmd_free(VkDevice dev_handle,struct vn_feedback_cmd_pool * pool,VkCommandBuffer cmd_handle)423 vn_feedback_fence_cmd_free(VkDevice dev_handle,
424 struct vn_feedback_cmd_pool *pool,
425 VkCommandBuffer cmd_handle)
426 {
427 simple_mtx_lock(&pool->mutex);
428 vn_FreeCommandBuffers(dev_handle, pool->pool, 1, &cmd_handle);
429 simple_mtx_unlock(&pool->mutex);
430 }
431
432 VkResult
vn_feedback_cmd_pools_init(struct vn_device * dev)433 vn_feedback_cmd_pools_init(struct vn_device *dev)
434 {
435 const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
436 VkDevice dev_handle = vn_device_to_handle(dev);
437 struct vn_feedback_cmd_pool *pools;
438 VkCommandPoolCreateInfo info = {
439 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
440 .pNext = NULL,
441 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
442 };
443
444 /* TODO will also condition on timeline semaphore feedback */
445 if (VN_PERF(NO_FENCE_FEEDBACK))
446 return VK_SUCCESS;
447
448 assert(dev->queue_family_count);
449
450 pools = vk_zalloc(alloc, sizeof(*pools) * dev->queue_family_count,
451 VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
452 if (!pools)
453 return VK_ERROR_OUT_OF_HOST_MEMORY;
454
455 for (uint32_t i = 0; i < dev->queue_family_count; i++) {
456 VkResult result;
457
458 info.queueFamilyIndex = dev->queue_families[i];
459 result = vn_CreateCommandPool(dev_handle, &info, alloc, &pools[i].pool);
460 if (result != VK_SUCCESS) {
461 for (uint32_t j = 0; j < i; j++) {
462 vn_DestroyCommandPool(dev_handle, pools[j].pool, alloc);
463 simple_mtx_destroy(&pools[j].mutex);
464 }
465
466 vk_free(alloc, pools);
467 return result;
468 }
469
470 simple_mtx_init(&pools[i].mutex, mtx_plain);
471 }
472
473 dev->cmd_pools = pools;
474
475 return VK_SUCCESS;
476 }
477
478 void
vn_feedback_cmd_pools_fini(struct vn_device * dev)479 vn_feedback_cmd_pools_fini(struct vn_device *dev)
480 {
481 const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
482 VkDevice dev_handle = vn_device_to_handle(dev);
483
484 if (!dev->cmd_pools)
485 return;
486
487 for (uint32_t i = 0; i < dev->queue_family_count; i++) {
488 vn_DestroyCommandPool(dev_handle, dev->cmd_pools[i].pool, alloc);
489 simple_mtx_destroy(&dev->cmd_pools[i].mutex);
490 }
491
492 vk_free(alloc, dev->cmd_pools);
493 }
494