• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "avassert.h"
20 
21 #include "vulkan.h"
22 #include "vulkan_loader.h"
23 
24 #if CONFIG_LIBGLSLANG
25 #include "vulkan_glslang.c"
26 #elif CONFIG_LIBSHADERC
27 #include "vulkan_shaderc.c"
28 #endif
29 
30 /* Generic macro for creating contexts which need to keep their addresses
31  * if another context is created. */
32 #define FN_CREATING(ctx, type, shortname, array, num)                          \
33 static av_always_inline type *create_ ##shortname(ctx *dctx)                   \
34 {                                                                              \
35     type **array, *sctx = av_mallocz(sizeof(*sctx));                           \
36     if (!sctx)                                                                 \
37         return NULL;                                                           \
38                                                                                \
39     array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
40     if (!array) {                                                              \
41         av_free(sctx);                                                         \
42         return NULL;                                                           \
43     }                                                                          \
44                                                                                \
45     dctx->array = array;                                                       \
46     dctx->array[dctx->num++] = sctx;                                           \
47                                                                                \
48     return sctx;                                                               \
49 }
50 
51 const VkComponentMapping ff_comp_identity_map = {
52     .r = VK_COMPONENT_SWIZZLE_IDENTITY,
53     .g = VK_COMPONENT_SWIZZLE_IDENTITY,
54     .b = VK_COMPONENT_SWIZZLE_IDENTITY,
55     .a = VK_COMPONENT_SWIZZLE_IDENTITY,
56 };
57 
58 /* Converts return values to strings */
ff_vk_ret2str(VkResult res)59 const char *ff_vk_ret2str(VkResult res)
60 {
61 #define CASE(VAL) case VAL: return #VAL
62     switch (res) {
63     CASE(VK_SUCCESS);
64     CASE(VK_NOT_READY);
65     CASE(VK_TIMEOUT);
66     CASE(VK_EVENT_SET);
67     CASE(VK_EVENT_RESET);
68     CASE(VK_INCOMPLETE);
69     CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
70     CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
71     CASE(VK_ERROR_INITIALIZATION_FAILED);
72     CASE(VK_ERROR_DEVICE_LOST);
73     CASE(VK_ERROR_MEMORY_MAP_FAILED);
74     CASE(VK_ERROR_LAYER_NOT_PRESENT);
75     CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
76     CASE(VK_ERROR_FEATURE_NOT_PRESENT);
77     CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
78     CASE(VK_ERROR_TOO_MANY_OBJECTS);
79     CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
80     CASE(VK_ERROR_FRAGMENTED_POOL);
81     CASE(VK_ERROR_SURFACE_LOST_KHR);
82     CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
83     CASE(VK_SUBOPTIMAL_KHR);
84     CASE(VK_ERROR_OUT_OF_DATE_KHR);
85     CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
86     CASE(VK_ERROR_VALIDATION_FAILED_EXT);
87     CASE(VK_ERROR_INVALID_SHADER_NV);
88     CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
89     CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
90     CASE(VK_ERROR_NOT_PERMITTED_EXT);
91     default: return "Unknown error";
92     }
93 #undef CASE
94 }
95 
ff_vk_qf_init(FFVulkanContext * s,FFVkQueueFamilyCtx * qf,VkQueueFlagBits dev_family,int nb_queues)96 void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
97                    VkQueueFlagBits dev_family, int nb_queues)
98 {
99     switch (dev_family) {
100     case VK_QUEUE_GRAPHICS_BIT:
101         qf->queue_family = s->hwctx->queue_family_index;
102         qf->actual_queues = s->hwctx->nb_graphics_queues;
103         break;
104     case VK_QUEUE_COMPUTE_BIT:
105         qf->queue_family = s->hwctx->queue_family_comp_index;
106         qf->actual_queues = s->hwctx->nb_comp_queues;
107         break;
108     case VK_QUEUE_TRANSFER_BIT:
109         qf->queue_family = s->hwctx->queue_family_tx_index;
110         qf->actual_queues = s->hwctx->nb_tx_queues;
111         break;
112     case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
113         qf->queue_family = s->hwctx->queue_family_encode_index;
114         qf->actual_queues = s->hwctx->nb_encode_queues;
115         break;
116     case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
117         qf->queue_family = s->hwctx->queue_family_decode_index;
118         qf->actual_queues = s->hwctx->nb_decode_queues;
119         break;
120     default:
121         av_assert0(0); /* Should never happen */
122     }
123 
124     if (!nb_queues)
125         qf->nb_queues = qf->actual_queues;
126     else
127         qf->nb_queues = nb_queues;
128 
129     return;
130 }
131 
ff_vk_qf_rotate(FFVkQueueFamilyCtx * qf)132 void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
133 {
134     qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
135 }
136 
vk_alloc_mem(FFVulkanContext * s,VkMemoryRequirements * req,VkMemoryPropertyFlagBits req_flags,void * alloc_extension,VkMemoryPropertyFlagBits * mem_flags,VkDeviceMemory * mem)137 static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
138                         VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
139                         VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
140 {
141     VkResult ret;
142     int index = -1;
143     FFVulkanFunctions *vk = &s->vkfn;
144 
145     VkMemoryAllocateInfo alloc_info = {
146         .sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
147         .pNext           = alloc_extension,
148     };
149 
150     /* Align if we need to */
151     if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
152         req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
153 
154     alloc_info.allocationSize = req->size;
155 
156     /* The vulkan spec requires memory types to be sorted in the "optimal"
157      * order, so the first matching type we find will be the best/fastest one */
158     for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
159         /* The memory type must be supported by the requirements (bitfield) */
160         if (!(req->memoryTypeBits & (1 << i)))
161             continue;
162 
163         /* The memory type flags must include our properties */
164         if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
165             continue;
166 
167         /* Found a suitable memory type */
168         index = i;
169         break;
170     }
171 
172     if (index < 0) {
173         av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
174                req_flags);
175         return AVERROR(EINVAL);
176     }
177 
178     alloc_info.memoryTypeIndex = index;
179 
180     ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
181                              s->hwctx->alloc, mem);
182     if (ret != VK_SUCCESS) {
183         av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
184                ff_vk_ret2str(ret));
185         return AVERROR(ENOMEM);
186     }
187 
188     *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
189 
190     return 0;
191 }
192 
ff_vk_create_buf(FFVulkanContext * s,FFVkBuffer * buf,size_t size,VkBufferUsageFlags usage,VkMemoryPropertyFlagBits flags)193 int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
194                      VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
195 {
196     int err;
197     VkResult ret;
198     int use_ded_mem;
199     FFVulkanFunctions *vk = &s->vkfn;
200 
201     VkBufferCreateInfo buf_spawn = {
202         .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
203         .pNext       = NULL,
204         .usage       = usage,
205         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
206         .size        = size, /* Gets FFALIGNED during alloc if host visible
207                                 but should be ok */
208     };
209 
210     VkBufferMemoryRequirementsInfo2 req_desc = {
211         .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
212     };
213     VkMemoryDedicatedAllocateInfo ded_alloc = {
214         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
215         .pNext = NULL,
216     };
217     VkMemoryDedicatedRequirements ded_req = {
218         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
219     };
220     VkMemoryRequirements2 req = {
221         .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
222         .pNext = &ded_req,
223     };
224 
225     ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
226     if (ret != VK_SUCCESS) {
227         av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
228                ff_vk_ret2str(ret));
229         return AVERROR_EXTERNAL;
230     }
231 
232     req_desc.buffer = buf->buf;
233 
234     vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
235 
236     /* In case the implementation prefers/requires dedicated allocation */
237     use_ded_mem = ded_req.prefersDedicatedAllocation |
238                   ded_req.requiresDedicatedAllocation;
239     if (use_ded_mem)
240         ded_alloc.buffer = buf->buf;
241 
242     err = vk_alloc_mem(s, &req.memoryRequirements, flags,
243                        use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
244                        &buf->flags, &buf->mem);
245     if (err)
246         return err;
247 
248     ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
249     if (ret != VK_SUCCESS) {
250         av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
251                ff_vk_ret2str(ret));
252         return AVERROR_EXTERNAL;
253     }
254 
255     return 0;
256 }
257 
ff_vk_map_buffers(FFVulkanContext * s,FFVkBuffer * buf,uint8_t * mem[],int nb_buffers,int invalidate)258 int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
259                       int nb_buffers, int invalidate)
260 {
261     VkResult ret;
262     FFVulkanFunctions *vk = &s->vkfn;
263     VkMappedMemoryRange *inval_list = NULL;
264     int inval_count = 0;
265 
266     for (int i = 0; i < nb_buffers; i++) {
267         ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
268                             VK_WHOLE_SIZE, 0, (void **)&mem[i]);
269         if (ret != VK_SUCCESS) {
270             av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
271                    ff_vk_ret2str(ret));
272             return AVERROR_EXTERNAL;
273         }
274     }
275 
276     if (!invalidate)
277         return 0;
278 
279     for (int i = 0; i < nb_buffers; i++) {
280         const VkMappedMemoryRange ival_buf = {
281             .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
282             .memory = buf[i].mem,
283             .size   = VK_WHOLE_SIZE,
284         };
285         if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
286             continue;
287         inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
288                                      (++inval_count)*sizeof(*inval_list));
289         if (!inval_list)
290             return AVERROR(ENOMEM);
291         inval_list[inval_count - 1] = ival_buf;
292     }
293 
294     if (inval_count) {
295         ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
296                                                inval_list);
297         if (ret != VK_SUCCESS) {
298             av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
299                    ff_vk_ret2str(ret));
300             return AVERROR_EXTERNAL;
301         }
302     }
303 
304     return 0;
305 }
306 
ff_vk_unmap_buffers(FFVulkanContext * s,FFVkBuffer * buf,int nb_buffers,int flush)307 int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
308                         int flush)
309 {
310     int err = 0;
311     VkResult ret;
312     FFVulkanFunctions *vk = &s->vkfn;
313     VkMappedMemoryRange *flush_list = NULL;
314     int flush_count = 0;
315 
316     if (flush) {
317         for (int i = 0; i < nb_buffers; i++) {
318             const VkMappedMemoryRange flush_buf = {
319                 .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
320                 .memory = buf[i].mem,
321                 .size   = VK_WHOLE_SIZE,
322             };
323             if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
324                 continue;
325             flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
326                                          (++flush_count)*sizeof(*flush_list));
327             if (!flush_list)
328                 return AVERROR(ENOMEM);
329             flush_list[flush_count - 1] = flush_buf;
330         }
331     }
332 
333     if (flush_count) {
334         ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
335                                           flush_list);
336         if (ret != VK_SUCCESS) {
337             av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
338                    ff_vk_ret2str(ret));
339             err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
340         }
341     }
342 
343     for (int i = 0; i < nb_buffers; i++)
344         vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
345 
346     return err;
347 }
348 
ff_vk_free_buf(FFVulkanContext * s,FFVkBuffer * buf)349 void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
350 {
351     FFVulkanFunctions *vk = &s->vkfn;
352 
353     if (!buf || !s->hwctx)
354         return;
355 
356     vk->DeviceWaitIdle(s->hwctx->act_dev);
357 
358     if (buf->buf != VK_NULL_HANDLE)
359         vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
360     if (buf->mem != VK_NULL_HANDLE)
361         vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
362 }
363 
ff_vk_add_push_constant(FFVulkanPipeline * pl,int offset,int size,VkShaderStageFlagBits stage)364 int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
365                             VkShaderStageFlagBits stage)
366 {
367     VkPushConstantRange *pc;
368 
369     pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
370                                        pl->push_consts_num + 1);
371     if (!pl->push_consts)
372         return AVERROR(ENOMEM);
373 
374     pc = &pl->push_consts[pl->push_consts_num++];
375     memset(pc, 0, sizeof(*pc));
376 
377     pc->stageFlags = stage;
378     pc->offset = offset;
379     pc->size = size;
380 
381     return 0;
382 }
383 
FN_CREATING(FFVulkanContext,FFVkExecContext,exec_ctx,exec_ctx,exec_ctx_num)384 FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
385 int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
386                           FFVkQueueFamilyCtx *qf)
387 {
388     VkResult ret;
389     FFVkExecContext *e;
390     FFVulkanFunctions *vk = &s->vkfn;
391 
392     VkCommandPoolCreateInfo cqueue_create = {
393         .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
394         .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
395         .queueFamilyIndex   = qf->queue_family,
396     };
397     VkCommandBufferAllocateInfo cbuf_create = {
398         .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
399         .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
400         .commandBufferCount = qf->nb_queues,
401     };
402 
403     e = create_exec_ctx(s);
404     if (!e)
405         return AVERROR(ENOMEM);
406 
407     e->qf = qf;
408 
409     e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
410     if (!e->queues)
411         return AVERROR(ENOMEM);
412 
413     e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
414     if (!e->bufs)
415         return AVERROR(ENOMEM);
416 
417     /* Create command pool */
418     ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
419                               s->hwctx->alloc, &e->pool);
420     if (ret != VK_SUCCESS) {
421         av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
422                ff_vk_ret2str(ret));
423         return AVERROR_EXTERNAL;
424     }
425 
426     cbuf_create.commandPool = e->pool;
427 
428     /* Allocate command buffer */
429     ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
430     if (ret != VK_SUCCESS) {
431         av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
432                ff_vk_ret2str(ret));
433         return AVERROR_EXTERNAL;
434     }
435 
436     for (int i = 0; i < qf->nb_queues; i++) {
437         FFVkQueueCtx *q = &e->queues[i];
438         vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
439                            i % qf->actual_queues, &q->queue);
440     }
441 
442     *ctx = e;
443 
444     return 0;
445 }
446 
ff_vk_discard_exec_deps(FFVkExecContext * e)447 void ff_vk_discard_exec_deps(FFVkExecContext *e)
448 {
449     FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
450 
451     for (int j = 0; j < q->nb_buf_deps; j++)
452         av_buffer_unref(&q->buf_deps[j]);
453     q->nb_buf_deps = 0;
454 
455     for (int j = 0; j < q->nb_frame_deps; j++)
456         av_frame_free(&q->frame_deps[j]);
457     q->nb_frame_deps = 0;
458 
459     e->sem_wait_cnt = 0;
460     e->sem_sig_cnt = 0;
461 }
462 
ff_vk_start_exec_recording(FFVulkanContext * s,FFVkExecContext * e)463 int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
464 {
465     VkResult ret;
466     FFVulkanFunctions *vk = &s->vkfn;
467     FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
468 
469     VkCommandBufferBeginInfo cmd_start = {
470         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
471         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
472     };
473 
474     /* Create the fence and don't wait for it initially */
475     if (!q->fence) {
476         VkFenceCreateInfo fence_spawn = {
477             .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
478         };
479         ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
480                               &q->fence);
481         if (ret != VK_SUCCESS) {
482             av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
483                    ff_vk_ret2str(ret));
484             return AVERROR_EXTERNAL;
485         }
486     } else {
487         vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
488         vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
489     }
490 
491     /* Discard queue dependencies */
492     ff_vk_discard_exec_deps(e);
493 
494     ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
495     if (ret != VK_SUCCESS) {
496         av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
497                ff_vk_ret2str(ret));
498         return AVERROR_EXTERNAL;
499     }
500 
501     return 0;
502 }
503 
ff_vk_get_exec_buf(FFVkExecContext * e)504 VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e)
505 {
506     return e->bufs[e->qf->cur_queue];
507 }
508 
ff_vk_add_exec_dep(FFVulkanContext * s,FFVkExecContext * e,AVFrame * frame,VkPipelineStageFlagBits in_wait_dst_flag)509 int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
510                        VkPipelineStageFlagBits in_wait_dst_flag)
511 {
512     AVFrame **dst;
513     AVVkFrame *f = (AVVkFrame *)frame->data[0];
514     FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
515     AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
516     int planes = av_pix_fmt_count_planes(fc->sw_format);
517 
518     for (int i = 0; i < planes; i++) {
519         e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
520                                       (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
521         if (!e->sem_wait) {
522             ff_vk_discard_exec_deps(e);
523             return AVERROR(ENOMEM);
524         }
525 
526         e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
527                                           (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
528         if (!e->sem_wait_dst) {
529             ff_vk_discard_exec_deps(e);
530             return AVERROR(ENOMEM);
531         }
532 
533         e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc,
534                                           (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val));
535         if (!e->sem_wait_val) {
536             ff_vk_discard_exec_deps(e);
537             return AVERROR(ENOMEM);
538         }
539 
540         e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
541                                      (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
542         if (!e->sem_sig) {
543             ff_vk_discard_exec_deps(e);
544             return AVERROR(ENOMEM);
545         }
546 
547         e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc,
548                                          (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val));
549         if (!e->sem_sig_val) {
550             ff_vk_discard_exec_deps(e);
551             return AVERROR(ENOMEM);
552         }
553 
554         e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
555                                              (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
556         if (!e->sem_sig_val_dst) {
557             ff_vk_discard_exec_deps(e);
558             return AVERROR(ENOMEM);
559         }
560 
561         e->sem_wait[e->sem_wait_cnt] = f->sem[i];
562         e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
563         e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
564         e->sem_wait_cnt++;
565 
566         e->sem_sig[e->sem_sig_cnt] = f->sem[i];
567         e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
568         e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
569         e->sem_sig_cnt++;
570     }
571 
572     dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
573                           (q->nb_frame_deps + 1) * sizeof(*dst));
574     if (!dst) {
575         ff_vk_discard_exec_deps(e);
576         return AVERROR(ENOMEM);
577     }
578 
579     q->frame_deps = dst;
580     q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
581     if (!q->frame_deps[q->nb_frame_deps]) {
582         ff_vk_discard_exec_deps(e);
583         return AVERROR(ENOMEM);
584     }
585     q->nb_frame_deps++;
586 
587     return 0;
588 }
589 
ff_vk_submit_exec_queue(FFVulkanContext * s,FFVkExecContext * e)590 int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
591 {
592     VkResult ret;
593     FFVulkanFunctions *vk = &s->vkfn;
594     FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
595 
596     VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
597         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
598         .pWaitSemaphoreValues = e->sem_wait_val,
599         .pSignalSemaphoreValues = e->sem_sig_val,
600         .waitSemaphoreValueCount = e->sem_wait_cnt,
601         .signalSemaphoreValueCount = e->sem_sig_cnt,
602     };
603 
604     VkSubmitInfo s_info = {
605         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
606         .pNext                = &s_timeline_sem_info,
607 
608         .commandBufferCount   = 1,
609         .pCommandBuffers      = &e->bufs[e->qf->cur_queue],
610 
611         .pWaitSemaphores      = e->sem_wait,
612         .pWaitDstStageMask    = e->sem_wait_dst,
613         .waitSemaphoreCount   = e->sem_wait_cnt,
614 
615         .pSignalSemaphores    = e->sem_sig,
616         .signalSemaphoreCount = e->sem_sig_cnt,
617     };
618 
619     ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
620     if (ret != VK_SUCCESS) {
621         av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
622                ff_vk_ret2str(ret));
623         return AVERROR_EXTERNAL;
624     }
625 
626     ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
627     if (ret != VK_SUCCESS) {
628         av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
629                ff_vk_ret2str(ret));
630         return AVERROR_EXTERNAL;
631     }
632 
633     for (int i = 0; i < e->sem_sig_cnt; i++)
634         *e->sem_sig_val_dst[i] += 1;
635 
636     return 0;
637 }
638 
ff_vk_add_dep_exec_ctx(FFVulkanContext * s,FFVkExecContext * e,AVBufferRef ** deps,int nb_deps)639 int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
640                            AVBufferRef **deps, int nb_deps)
641 {
642     AVBufferRef **dst;
643     FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
644 
645     if (!deps || !nb_deps)
646         return 0;
647 
648     dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
649                           (q->nb_buf_deps + nb_deps) * sizeof(*dst));
650     if (!dst)
651         goto err;
652 
653     q->buf_deps = dst;
654 
655     for (int i = 0; i < nb_deps; i++) {
656         q->buf_deps[q->nb_buf_deps] = deps[i];
657         if (!q->buf_deps[q->nb_buf_deps])
658             goto err;
659         q->nb_buf_deps++;
660     }
661 
662     return 0;
663 
664 err:
665     ff_vk_discard_exec_deps(e);
666     return AVERROR(ENOMEM);
667 }
668 
FN_CREATING(FFVulkanContext,FFVkSampler,sampler,samplers,samplers_num)669 FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
670 FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
671                                 int unnorm_coords, VkFilter filt)
672 {
673     VkResult ret;
674     FFVulkanFunctions *vk = &s->vkfn;
675 
676     VkSamplerCreateInfo sampler_info = {
677         .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
678         .magFilter = filt,
679         .minFilter = sampler_info.magFilter,
680         .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
681                                       VK_SAMPLER_MIPMAP_MODE_LINEAR,
682         .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
683         .addressModeV = sampler_info.addressModeU,
684         .addressModeW = sampler_info.addressModeU,
685         .anisotropyEnable = VK_FALSE,
686         .compareOp = VK_COMPARE_OP_NEVER,
687         .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
688         .unnormalizedCoordinates = unnorm_coords,
689     };
690 
691     FFVkSampler *sctx = create_sampler(s);
692     if (!sctx)
693         return NULL;
694 
695     ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
696                             s->hwctx->alloc, &sctx->sampler[0]);
697     if (ret != VK_SUCCESS) {
698         av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
699                ff_vk_ret2str(ret));
700         return NULL;
701     }
702 
703     for (int i = 1; i < 4; i++)
704         sctx->sampler[i] = sctx->sampler[0];
705 
706     return sctx;
707 }
708 
ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)709 int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
710 {
711     if (pix_fmt == AV_PIX_FMT_ABGR   || pix_fmt == AV_PIX_FMT_BGRA   ||
712         pix_fmt == AV_PIX_FMT_RGBA   || pix_fmt == AV_PIX_FMT_RGB24  ||
713         pix_fmt == AV_PIX_FMT_BGR24  || pix_fmt == AV_PIX_FMT_RGB48  ||
714         pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
715         pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0   ||
716         pix_fmt == AV_PIX_FMT_0BGR   || pix_fmt == AV_PIX_FMT_RGB0)
717         return 1;
718     return 0;
719 }
720 
ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)721 const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
722 {
723     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
724     const int high = desc->comp[0].depth > 8;
725     return high ? "rgba16f" : "rgba8";
726 }
727 
728 typedef struct ImageViewCtx {
729     VkImageView view;
730 } ImageViewCtx;
731 
destroy_imageview(void * opaque,uint8_t * data)732 static void destroy_imageview(void *opaque, uint8_t *data)
733 {
734     FFVulkanContext *s = opaque;
735     FFVulkanFunctions *vk = &s->vkfn;
736     ImageViewCtx *iv = (ImageViewCtx *)data;
737 
738     vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
739     av_free(iv);
740 }
741 
ff_vk_create_imageview(FFVulkanContext * s,FFVkExecContext * e,VkImageView * v,VkImage img,VkFormat fmt,const VkComponentMapping map)742 int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
743                            VkImageView *v, VkImage img, VkFormat fmt,
744                            const VkComponentMapping map)
745 {
746     int err;
747     AVBufferRef *buf;
748     FFVulkanFunctions *vk = &s->vkfn;
749 
750     VkImageViewCreateInfo imgview_spawn = {
751         .sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
752         .pNext      = NULL,
753         .image      = img,
754         .viewType   = VK_IMAGE_VIEW_TYPE_2D,
755         .format     = fmt,
756         .components = map,
757         .subresourceRange = {
758             .aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
759             .baseMipLevel   = 0,
760             .levelCount     = 1,
761             .baseArrayLayer = 0,
762             .layerCount     = 1,
763         },
764     };
765 
766     ImageViewCtx *iv = av_mallocz(sizeof(*iv));
767 
768     VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn,
769                                        s->hwctx->alloc, &iv->view);
770     if (ret != VK_SUCCESS) {
771         av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
772                ff_vk_ret2str(ret));
773         return AVERROR_EXTERNAL;
774     }
775 
776     buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
777     if (!buf) {
778         destroy_imageview(s, (uint8_t *)iv);
779         return AVERROR(ENOMEM);
780     }
781 
782     /* Add to queue dependencies */
783     err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1);
784     if (err) {
785         av_buffer_unref(&buf);
786         return err;
787     }
788 
789     *v = iv->view;
790 
791     return 0;
792 }
793 
FN_CREATING(FFVulkanPipeline,FFVkSPIRVShader,shader,shaders,shaders_num)794 FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num)
795 FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
796                                    VkShaderStageFlags stage)
797 {
798     FFVkSPIRVShader *shd = create_shader(pl);
799     if (!shd)
800         return NULL;
801 
802     av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
803 
804     shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
805     shd->shader.stage = stage;
806 
807     shd->name = name;
808 
809     GLSLF(0, #version %i                                                  ,460);
810     GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y))       );
811     GLSLC(0,                                                                  );
812 
813     return shd;
814 }
815 
ff_vk_set_compute_shader_sizes(FFVkSPIRVShader * shd,int local_size[3])816 void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
817 {
818     shd->local_size[0] = local_size[0];
819     shd->local_size[1] = local_size[1];
820     shd->local_size[2] = local_size[2];
821 
822     av_bprintf(&shd->src, "layout (local_size_x = %i, "
823                "local_size_y = %i, local_size_z = %i) in;\n\n",
824                shd->local_size[0], shd->local_size[1], shd->local_size[2]);
825 }
826 
ff_vk_print_shader(void * ctx,FFVkSPIRVShader * shd,int prio)827 void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
828 {
829     int line = 0;
830     const char *p = shd->src.str;
831     const char *start = p;
832     const size_t len = strlen(p);
833 
834     AVBPrint buf;
835     av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
836 
837     for (int i = 0; i < len; i++) {
838         if (p[i] == '\n') {
839             av_bprintf(&buf, "%i\t", ++line);
840             av_bprint_append_data(&buf, start, &p[i] - start + 1);
841             start = &p[i + 1];
842         }
843     }
844 
845     av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str);
846     av_bprint_finalize(&buf, NULL);
847 }
848 
ff_vk_compile_shader(FFVulkanContext * s,FFVkSPIRVShader * shd,const char * entrypoint)849 int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
850                          const char *entrypoint)
851 {
852     int err;
853     VkResult ret;
854     FFVulkanFunctions *vk = &s->vkfn;
855     VkShaderModuleCreateInfo shader_create;
856     uint8_t *spirv;
857     size_t spirv_size;
858     void *priv;
859 
860     shd->shader.pName = entrypoint;
861 
862     if (!s->spirv_compiler) {
863 #if CONFIG_LIBGLSLANG
864         s->spirv_compiler = ff_vk_glslang_init();
865 #elif CONFIG_LIBSHADERC
866         s->spirv_compiler = ff_vk_shaderc_init();
867 #else
868         return AVERROR(ENOSYS);
869 #endif
870         if (!s->spirv_compiler)
871             return AVERROR(ENOMEM);
872     }
873 
874     err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv,
875                                             &spirv_size, entrypoint, &priv);
876     if (err < 0)
877         return err;
878 
879     av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n",
880            shd->name, spirv_size);
881 
882     shader_create.sType    = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
883     shader_create.pNext    = NULL;
884     shader_create.codeSize = spirv_size;
885     shader_create.flags    = 0;
886     shader_create.pCode    = (void *)spirv;
887 
888     ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
889                                  &shd->shader.module);
890 
891     s->spirv_compiler->free_shader(s->spirv_compiler, &priv);
892 
893     if (ret != VK_SUCCESS) {
894         av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n",
895                ff_vk_ret2str(ret));
896         return AVERROR_EXTERNAL;
897     }
898 
899     return 0;
900 }
901 
902 static const struct descriptor_props {
903     size_t struct_size; /* Size of the opaque which updates the descriptor */
904     const char *type;
905     int is_uniform;
906     int mem_quali;      /* Can use a memory qualifier */
907     int dim_needed;     /* Must indicate dimension */
908     int buf_content;    /* Must indicate buffer contents */
909 } descriptor_props[] = {
910     [VK_DESCRIPTOR_TYPE_SAMPLER]                = { sizeof(VkDescriptorImageInfo),  "sampler",       1, 0, 0, 0, },
911     [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE]          = { sizeof(VkDescriptorImageInfo),  "texture",       1, 0, 1, 0, },
912     [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE]          = { sizeof(VkDescriptorImageInfo),  "image",         1, 1, 1, 0, },
913     [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT]       = { sizeof(VkDescriptorImageInfo),  "subpassInput",  1, 0, 0, 0, },
914     [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo),  "sampler",       1, 0, 1, 0, },
915     [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER]         = { sizeof(VkDescriptorBufferInfo),  NULL,           1, 0, 0, 1, },
916     [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER]         = { sizeof(VkDescriptorBufferInfo), "buffer",        0, 1, 0, 1, },
917     [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo),  NULL,           1, 0, 0, 1, },
918     [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer",        0, 1, 0, 1, },
919     [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER]   = { sizeof(VkBufferView),           "samplerBuffer", 1, 0, 0, 0, },
920     [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER]   = { sizeof(VkBufferView),           "imageBuffer",   1, 0, 0, 0, },
921 };
922 
ff_vk_add_descriptor_set(FFVulkanContext * s,FFVulkanPipeline * pl,FFVkSPIRVShader * shd,FFVulkanDescriptorSetBinding * desc,int num,int only_print_to_shader)923 int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
924                              FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
925                              int num, int only_print_to_shader)
926 {
927     VkResult ret;
928     VkDescriptorSetLayout *layout;
929     FFVulkanFunctions *vk = &s->vkfn;
930 
931     if (only_print_to_shader)
932         goto print;
933 
934     pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
935                                        pl->desc_layout_num + pl->qf->nb_queues);
936     if (!pl->desc_layout)
937         return AVERROR(ENOMEM);
938 
939     pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized,
940                                                 sizeof(*pl->desc_set_initialized),
941                                                 pl->descriptor_sets_num + 1);
942     if (!pl->desc_set_initialized)
943         return AVERROR(ENOMEM);
944 
945     pl->desc_set_initialized[pl->descriptor_sets_num] = 0;
946     layout = &pl->desc_layout[pl->desc_layout_num];
947 
948     { /* Create descriptor set layout descriptions */
949         VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
950         VkDescriptorSetLayoutBinding *desc_binding;
951 
952         desc_binding = av_mallocz(sizeof(*desc_binding)*num);
953         if (!desc_binding)
954             return AVERROR(ENOMEM);
955 
956         for (int i = 0; i < num; i++) {
957             desc_binding[i].binding            = i;
958             desc_binding[i].descriptorType     = desc[i].type;
959             desc_binding[i].descriptorCount    = FFMAX(desc[i].elems, 1);
960             desc_binding[i].stageFlags         = desc[i].stages;
961             desc_binding[i].pImmutableSamplers = desc[i].sampler ?
962                                                  desc[i].sampler->sampler :
963                                                  NULL;
964         }
965 
966         desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
967         desc_create_layout.pBindings = desc_binding;
968         desc_create_layout.bindingCount = num;
969 
970         for (int i = 0; i < pl->qf->nb_queues; i++) {
971             ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
972                                                 s->hwctx->alloc, &layout[i]);
973             if (ret != VK_SUCCESS) {
974                 av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
975                        "layout: %s\n", ff_vk_ret2str(ret));
976                 av_free(desc_binding);
977                 return AVERROR_EXTERNAL;
978             }
979         }
980 
981         av_free(desc_binding);
982     }
983 
984     { /* Pool each descriptor by type and update pool counts */
985         for (int i = 0; i < num; i++) {
986             int j;
987             for (j = 0; j < pl->pool_size_desc_num; j++)
988                 if (pl->pool_size_desc[j].type == desc[i].type)
989                     break;
990             if (j >= pl->pool_size_desc_num) {
991                 pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
992                                                       sizeof(*pl->pool_size_desc),
993                                                       ++pl->pool_size_desc_num);
994                 if (!pl->pool_size_desc)
995                     return AVERROR(ENOMEM);
996                 memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
997             }
998             pl->pool_size_desc[j].type             = desc[i].type;
999             pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
1000         }
1001     }
1002 
1003     { /* Create template creation struct */
1004         VkDescriptorUpdateTemplateCreateInfo *dt;
1005         VkDescriptorUpdateTemplateEntry *des_entries;
1006 
1007         /* Freed after descriptor set initialization */
1008         des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
1009         if (!des_entries)
1010             return AVERROR(ENOMEM);
1011 
1012         for (int i = 0; i < num; i++) {
1013             des_entries[i].dstBinding      = i;
1014             des_entries[i].descriptorType  = desc[i].type;
1015             des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
1016             des_entries[i].dstArrayElement = 0;
1017             des_entries[i].offset          = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
1018             des_entries[i].stride          = descriptor_props[desc[i].type].struct_size;
1019         }
1020 
1021         pl->desc_template_info = av_realloc_array(pl->desc_template_info,
1022                                                   sizeof(*pl->desc_template_info),
1023                                                   pl->total_descriptor_sets + pl->qf->nb_queues);
1024         if (!pl->desc_template_info)
1025             return AVERROR(ENOMEM);
1026 
1027         dt = &pl->desc_template_info[pl->total_descriptor_sets];
1028         memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
1029 
1030         for (int i = 0; i < pl->qf->nb_queues; i++) {
1031             dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
1032             dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
1033             dt[i].descriptorSetLayout = layout[i];
1034             dt[i].pDescriptorUpdateEntries = des_entries;
1035             dt[i].descriptorUpdateEntryCount = num;
1036         }
1037     }
1038 
1039     pl->descriptor_sets_num++;
1040 
1041     pl->desc_layout_num += pl->qf->nb_queues;
1042     pl->total_descriptor_sets += pl->qf->nb_queues;
1043 
1044 print:
1045     /* Write shader info */
1046     for (int i = 0; i < num; i++) {
1047         const struct descriptor_props *prop = &descriptor_props[desc[i].type];
1048         GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
1049 
1050         if (desc[i].mem_layout)
1051             GLSLA(", %s", desc[i].mem_layout);
1052         GLSLA(")");
1053 
1054         if (prop->is_uniform)
1055             GLSLA(" uniform");
1056 
1057         if (prop->mem_quali && desc[i].mem_quali)
1058             GLSLA(" %s", desc[i].mem_quali);
1059 
1060         if (prop->type)
1061             GLSLA(" %s", prop->type);
1062 
1063         if (prop->dim_needed)
1064             GLSLA("%iD", desc[i].dimensions);
1065 
1066         GLSLA(" %s", desc[i].name);
1067 
1068         if (prop->buf_content)
1069             GLSLA(" {\n    %s\n}", desc[i].buf_content);
1070         else if (desc[i].elems > 0)
1071             GLSLA("[%i]", desc[i].elems);
1072 
1073         GLSLA(";\n");
1074     }
1075     GLSLA("\n");
1076 
1077     return 0;
1078 }
1079 
ff_vk_update_descriptor_set(FFVulkanContext * s,FFVulkanPipeline * pl,int set_id)1080 void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
1081                                  int set_id)
1082 {
1083     FFVulkanFunctions *vk = &s->vkfn;
1084 
1085     /* If a set has never been updated, update all queues' sets. */
1086     if (!pl->desc_set_initialized[set_id]) {
1087         for (int i = 0; i < pl->qf->nb_queues; i++) {
1088             int idx = set_id*pl->qf->nb_queues + i;
1089             vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
1090                                                 pl->desc_set[idx],
1091                                                 pl->desc_template[idx],
1092                                                 s);
1093         }
1094         pl->desc_set_initialized[set_id] = 1;
1095         return;
1096     }
1097 
1098     set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
1099 
1100     vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
1101                                         pl->desc_set[set_id],
1102                                         pl->desc_template[set_id],
1103                                         s);
1104 }
1105 
ff_vk_update_push_exec(FFVulkanContext * s,FFVkExecContext * e,VkShaderStageFlagBits stage,int offset,size_t size,void * src)1106 void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
1107                             VkShaderStageFlagBits stage, int offset,
1108                             size_t size, void *src)
1109 {
1110     FFVulkanFunctions *vk = &s->vkfn;
1111 
1112     vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
1113                          stage, offset, size, src);
1114 }
1115 
ff_vk_init_pipeline_layout(FFVulkanContext * s,FFVulkanPipeline * pl)1116 int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
1117 {
1118     VkResult ret;
1119     FFVulkanFunctions *vk = &s->vkfn;
1120 
1121     pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
1122     if (!pl->desc_staging)
1123         return AVERROR(ENOMEM);
1124 
1125     { /* Init descriptor set pool */
1126         VkDescriptorPoolCreateInfo pool_create_info = {
1127             .sType         = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1128             .poolSizeCount = pl->pool_size_desc_num,
1129             .pPoolSizes    = pl->pool_size_desc,
1130             .maxSets       = pl->total_descriptor_sets,
1131         };
1132 
1133         ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
1134                                        s->hwctx->alloc, &pl->desc_pool);
1135         av_freep(&pl->pool_size_desc);
1136         if (ret != VK_SUCCESS) {
1137             av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
1138                    "pool: %s\n", ff_vk_ret2str(ret));
1139             return AVERROR_EXTERNAL;
1140         }
1141     }
1142 
1143     { /* Allocate descriptor sets */
1144         VkDescriptorSetAllocateInfo alloc_info = {
1145             .sType              = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1146             .descriptorPool     = pl->desc_pool,
1147             .descriptorSetCount = pl->total_descriptor_sets,
1148             .pSetLayouts        = pl->desc_layout,
1149         };
1150 
1151         pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
1152         if (!pl->desc_set)
1153             return AVERROR(ENOMEM);
1154 
1155         ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
1156                                          pl->desc_set);
1157         if (ret != VK_SUCCESS) {
1158             av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
1159                    ff_vk_ret2str(ret));
1160             return AVERROR_EXTERNAL;
1161         }
1162     }
1163 
1164     { /* Finally create the pipeline layout */
1165         VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
1166             .sType                  = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1167             .pSetLayouts            = (VkDescriptorSetLayout *)pl->desc_staging,
1168             .pushConstantRangeCount = pl->push_consts_num,
1169             .pPushConstantRanges    = pl->push_consts,
1170         };
1171 
1172         for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
1173             pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
1174 
1175         ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
1176                                        s->hwctx->alloc, &pl->pipeline_layout);
1177         av_freep(&pl->push_consts);
1178         pl->push_consts_num = 0;
1179         if (ret != VK_SUCCESS) {
1180             av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
1181                    ff_vk_ret2str(ret));
1182             return AVERROR_EXTERNAL;
1183         }
1184     }
1185 
1186     { /* Descriptor template (for tightly packed descriptors) */
1187         VkDescriptorUpdateTemplateCreateInfo *dt;
1188 
1189         pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
1190         if (!pl->desc_template)
1191             return AVERROR(ENOMEM);
1192 
1193         /* Create update templates for the descriptor sets */
1194         for (int i = 0; i < pl->total_descriptor_sets; i++) {
1195             dt = &pl->desc_template_info[i];
1196             dt->pipelineLayout = pl->pipeline_layout;
1197             ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
1198                                                      dt, s->hwctx->alloc,
1199                                                      &pl->desc_template[i]);
1200             if (ret != VK_SUCCESS) {
1201                 av_log(s, AV_LOG_ERROR, "Unable to init descriptor "
1202                        "template: %s\n", ff_vk_ret2str(ret));
1203                 return AVERROR_EXTERNAL;
1204             }
1205         }
1206 
1207         /* Free the duplicated memory used for the template entries */
1208         for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
1209             dt = &pl->desc_template_info[i];
1210             av_free((void *)dt->pDescriptorUpdateEntries);
1211         }
1212 
1213         av_freep(&pl->desc_template_info);
1214     }
1215 
1216     return 0;
1217 }
1218 
FN_CREATING(FFVulkanContext,FFVulkanPipeline,pipeline,pipelines,pipelines_num)1219 FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
1220 FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf)
1221 {
1222     FFVulkanPipeline *pl = create_pipeline(s);
1223     if (pl)
1224         pl->qf = qf;
1225 
1226     return pl;
1227 }
1228 
ff_vk_init_compute_pipeline(FFVulkanContext * s,FFVulkanPipeline * pl)1229 int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
1230 {
1231     int i;
1232     VkResult ret;
1233     FFVulkanFunctions *vk = &s->vkfn;
1234 
1235     VkComputePipelineCreateInfo pipe = {
1236         .sType  = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1237         .layout = pl->pipeline_layout,
1238     };
1239 
1240     for (i = 0; i < pl->shaders_num; i++) {
1241         if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
1242             pipe.stage = pl->shaders[i]->shader;
1243             break;
1244         }
1245     }
1246     if (i == pl->shaders_num) {
1247         av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
1248         return AVERROR(EINVAL);
1249     }
1250 
1251     ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
1252                                      s->hwctx->alloc, &pl->pipeline);
1253     if (ret != VK_SUCCESS) {
1254         av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
1255                ff_vk_ret2str(ret));
1256         return AVERROR_EXTERNAL;
1257     }
1258 
1259     pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1260 
1261     return 0;
1262 }
1263 
ff_vk_bind_pipeline_exec(FFVulkanContext * s,FFVkExecContext * e,FFVulkanPipeline * pl)1264 void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
1265                               FFVulkanPipeline *pl)
1266 {
1267     FFVulkanFunctions *vk = &s->vkfn;
1268 
1269     vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
1270 
1271     for (int i = 0; i < pl->descriptor_sets_num; i++)
1272         pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
1273 
1274     vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
1275                               pl->pipeline_layout, 0,
1276                               pl->descriptor_sets_num,
1277                               (VkDescriptorSet *)pl->desc_staging,
1278                               0, NULL);
1279 
1280     e->bound_pl = pl;
1281 }
1282 
free_exec_ctx(FFVulkanContext * s,FFVkExecContext * e)1283 static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
1284 {
1285     FFVulkanFunctions *vk = &s->vkfn;
1286 
1287     /* Make sure all queues have finished executing */
1288     for (int i = 0; i < e->qf->nb_queues; i++) {
1289         FFVkQueueCtx *q = &e->queues[i];
1290 
1291         if (q->fence) {
1292             vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1293             vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
1294         }
1295 
1296         /* Free the fence */
1297         if (q->fence)
1298             vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
1299 
1300         /* Free buffer dependencies */
1301         for (int j = 0; j < q->nb_buf_deps; j++)
1302             av_buffer_unref(&q->buf_deps[j]);
1303         av_free(q->buf_deps);
1304 
1305         /* Free frame dependencies */
1306         for (int j = 0; j < q->nb_frame_deps; j++)
1307             av_frame_free(&q->frame_deps[j]);
1308         av_free(q->frame_deps);
1309     }
1310 
1311     if (e->bufs)
1312         vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
1313     if (e->pool)
1314         vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
1315 
1316     av_freep(&e->bufs);
1317     av_freep(&e->queues);
1318     av_freep(&e->sem_sig);
1319     av_freep(&e->sem_sig_val);
1320     av_freep(&e->sem_sig_val_dst);
1321     av_freep(&e->sem_wait);
1322     av_freep(&e->sem_wait_dst);
1323     av_freep(&e->sem_wait_val);
1324     av_free(e);
1325 }
1326 
free_pipeline(FFVulkanContext * s,FFVulkanPipeline * pl)1327 static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
1328 {
1329     FFVulkanFunctions *vk = &s->vkfn;
1330 
1331     for (int i = 0; i < pl->shaders_num; i++) {
1332         FFVkSPIRVShader *shd = pl->shaders[i];
1333         av_bprint_finalize(&shd->src, NULL);
1334         vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
1335                                 s->hwctx->alloc);
1336         av_free(shd);
1337     }
1338 
1339     vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
1340     vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
1341                               s->hwctx->alloc);
1342 
1343     for (int i = 0; i < pl->desc_layout_num; i++) {
1344         if (pl->desc_template && pl->desc_template[i])
1345             vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
1346                                                 s->hwctx->alloc);
1347         if (pl->desc_layout && pl->desc_layout[i])
1348             vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
1349                                            s->hwctx->alloc);
1350     }
1351 
1352     /* Also frees the descriptor sets */
1353     if (pl->desc_pool)
1354         vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
1355                                   s->hwctx->alloc);
1356 
1357     av_freep(&pl->desc_staging);
1358     av_freep(&pl->desc_set);
1359     av_freep(&pl->shaders);
1360     av_freep(&pl->desc_layout);
1361     av_freep(&pl->desc_template);
1362     av_freep(&pl->desc_set_initialized);
1363     av_freep(&pl->push_consts);
1364     pl->push_consts_num = 0;
1365 
1366     /* Only freed in case of failure */
1367     av_freep(&pl->pool_size_desc);
1368     if (pl->desc_template_info) {
1369         for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
1370             VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
1371             av_free((void *)dt->pDescriptorUpdateEntries);
1372         }
1373         av_freep(&pl->desc_template_info);
1374     }
1375 
1376     av_free(pl);
1377 }
1378 
ff_vk_uninit(FFVulkanContext * s)1379 void ff_vk_uninit(FFVulkanContext *s)
1380 {
1381     FFVulkanFunctions *vk = &s->vkfn;
1382 
1383     if (s->spirv_compiler)
1384         s->spirv_compiler->uninit(&s->spirv_compiler);
1385 
1386     for (int i = 0; i < s->exec_ctx_num; i++)
1387         free_exec_ctx(s, s->exec_ctx[i]);
1388     av_freep(&s->exec_ctx);
1389 
1390     for (int i = 0; i < s->samplers_num; i++) {
1391         vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
1392                            s->hwctx->alloc);
1393         av_free(s->samplers[i]);
1394     }
1395     av_freep(&s->samplers);
1396 
1397     for (int i = 0; i < s->pipelines_num; i++)
1398         free_pipeline(s, s->pipelines[i]);
1399     av_freep(&s->pipelines);
1400 
1401     av_freep(&s->scratch);
1402     s->scratch_size = 0;
1403 
1404     av_buffer_unref(&s->device_ref);
1405     av_buffer_unref(&s->frames_ref);
1406 }
1407