• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "formats.h"
20 #include "vulkan.h"
21 #include "glslang.h"
22 
23 /* Generic macro for creating contexts which need to keep their addresses
24  * if another context is created. */
25 #define FN_CREATING(ctx, type, shortname, array, num)                          \
26 static av_always_inline type *create_ ##shortname(ctx *dctx)                   \
27 {                                                                              \
28     type **array, *sctx = av_mallocz(sizeof(*sctx));                           \
29     if (!sctx)                                                                 \
30         return NULL;                                                           \
31                                                                                \
32     array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
33     if (!array) {                                                              \
34         av_free(sctx);                                                         \
35         return NULL;                                                           \
36     }                                                                          \
37                                                                                \
38     dctx->array = array;                                                       \
39     dctx->array[dctx->num++] = sctx;                                           \
40                                                                                \
41     return sctx;                                                               \
42 }
43 
44 const VkComponentMapping ff_comp_identity_map = {
45     .r = VK_COMPONENT_SWIZZLE_IDENTITY,
46     .g = VK_COMPONENT_SWIZZLE_IDENTITY,
47     .b = VK_COMPONENT_SWIZZLE_IDENTITY,
48     .a = VK_COMPONENT_SWIZZLE_IDENTITY,
49 };
50 
51 /* Converts return values to strings */
ff_vk_ret2str(VkResult res)52 const char *ff_vk_ret2str(VkResult res)
53 {
54 #define CASE(VAL) case VAL: return #VAL
55     switch (res) {
56     CASE(VK_SUCCESS);
57     CASE(VK_NOT_READY);
58     CASE(VK_TIMEOUT);
59     CASE(VK_EVENT_SET);
60     CASE(VK_EVENT_RESET);
61     CASE(VK_INCOMPLETE);
62     CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
63     CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
64     CASE(VK_ERROR_INITIALIZATION_FAILED);
65     CASE(VK_ERROR_DEVICE_LOST);
66     CASE(VK_ERROR_MEMORY_MAP_FAILED);
67     CASE(VK_ERROR_LAYER_NOT_PRESENT);
68     CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
69     CASE(VK_ERROR_FEATURE_NOT_PRESENT);
70     CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
71     CASE(VK_ERROR_TOO_MANY_OBJECTS);
72     CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
73     CASE(VK_ERROR_FRAGMENTED_POOL);
74     CASE(VK_ERROR_SURFACE_LOST_KHR);
75     CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
76     CASE(VK_SUBOPTIMAL_KHR);
77     CASE(VK_ERROR_OUT_OF_DATE_KHR);
78     CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
79     CASE(VK_ERROR_VALIDATION_FAILED_EXT);
80     CASE(VK_ERROR_INVALID_SHADER_NV);
81     CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
82     CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
83     CASE(VK_ERROR_NOT_PERMITTED_EXT);
84     default: return "Unknown error";
85     }
86 #undef CASE
87 }
88 
vk_alloc_mem(AVFilterContext * avctx,VkMemoryRequirements * req,VkMemoryPropertyFlagBits req_flags,void * alloc_extension,VkMemoryPropertyFlagBits * mem_flags,VkDeviceMemory * mem)89 static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
90                         VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
91                         VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
92 {
93     VkResult ret;
94     int index = -1;
95     VkPhysicalDeviceProperties props;
96     VkPhysicalDeviceMemoryProperties mprops;
97     VulkanFilterContext *s = avctx->priv;
98 
99     VkMemoryAllocateInfo alloc_info = {
100         .sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
101         .pNext           = alloc_extension,
102     };
103 
104     vkGetPhysicalDeviceProperties(s->hwctx->phys_dev, &props);
105     vkGetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops);
106 
107     /* Align if we need to */
108     if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
109         req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment);
110 
111     alloc_info.allocationSize = req->size;
112 
113     /* The vulkan spec requires memory types to be sorted in the "optimal"
114      * order, so the first matching type we find will be the best/fastest one */
115     for (int i = 0; i < mprops.memoryTypeCount; i++) {
116         /* The memory type must be supported by the requirements (bitfield) */
117         if (!(req->memoryTypeBits & (1 << i)))
118             continue;
119 
120         /* The memory type flags must include our properties */
121         if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
122             continue;
123 
124         /* Found a suitable memory type */
125         index = i;
126         break;
127     }
128 
129     if (index < 0) {
130         av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
131                req_flags);
132         return AVERROR(EINVAL);
133     }
134 
135     alloc_info.memoryTypeIndex = index;
136 
137     ret = vkAllocateMemory(s->hwctx->act_dev, &alloc_info,
138                            s->hwctx->alloc, mem);
139     if (ret != VK_SUCCESS) {
140         av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
141                ff_vk_ret2str(ret));
142         return AVERROR(ENOMEM);
143     }
144 
145     *mem_flags |= mprops.memoryTypes[index].propertyFlags;
146 
147     return 0;
148 }
149 
ff_vk_create_buf(AVFilterContext * avctx,FFVkBuffer * buf,size_t size,VkBufferUsageFlags usage,VkMemoryPropertyFlagBits flags)150 int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
151                      VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
152 {
153     int err;
154     VkResult ret;
155     int use_ded_mem;
156     VulkanFilterContext *s = avctx->priv;
157 
158     VkBufferCreateInfo buf_spawn = {
159         .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
160         .pNext       = NULL,
161         .usage       = usage,
162         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
163         .size        = size, /* Gets FFALIGNED during alloc if host visible
164                                 but should be ok */
165     };
166 
167     VkBufferMemoryRequirementsInfo2 req_desc = {
168         .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
169     };
170     VkMemoryDedicatedAllocateInfo ded_alloc = {
171         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
172         .pNext = NULL,
173     };
174     VkMemoryDedicatedRequirements ded_req = {
175         .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
176     };
177     VkMemoryRequirements2 req = {
178         .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
179         .pNext = &ded_req,
180     };
181 
182     ret = vkCreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
183     if (ret != VK_SUCCESS) {
184         av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
185                ff_vk_ret2str(ret));
186         return AVERROR_EXTERNAL;
187     }
188 
189     req_desc.buffer = buf->buf;
190 
191     vkGetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
192 
193     /* In case the implementation prefers/requires dedicated allocation */
194     use_ded_mem = ded_req.prefersDedicatedAllocation |
195                   ded_req.requiresDedicatedAllocation;
196     if (use_ded_mem)
197         ded_alloc.buffer = buf->buf;
198 
199     err = vk_alloc_mem(avctx, &req.memoryRequirements, flags,
200                        use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
201                        &buf->flags, &buf->mem);
202     if (err)
203         return err;
204 
205     ret = vkBindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
206     if (ret != VK_SUCCESS) {
207         av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
208                ff_vk_ret2str(ret));
209         return AVERROR_EXTERNAL;
210     }
211 
212     return 0;
213 }
214 
ff_vk_map_buffers(AVFilterContext * avctx,FFVkBuffer * buf,uint8_t * mem[],int nb_buffers,int invalidate)215 int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
216                       int nb_buffers, int invalidate)
217 {
218     VkResult ret;
219     VulkanFilterContext *s = avctx->priv;
220     VkMappedMemoryRange *inval_list = NULL;
221     int inval_count = 0;
222 
223     for (int i = 0; i < nb_buffers; i++) {
224         ret = vkMapMemory(s->hwctx->act_dev, buf[i].mem, 0,
225                           VK_WHOLE_SIZE, 0, (void **)&mem[i]);
226         if (ret != VK_SUCCESS) {
227             av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
228                    ff_vk_ret2str(ret));
229             return AVERROR_EXTERNAL;
230         }
231     }
232 
233     if (!invalidate)
234         return 0;
235 
236     for (int i = 0; i < nb_buffers; i++) {
237         const VkMappedMemoryRange ival_buf = {
238             .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
239             .memory = buf[i].mem,
240             .size   = VK_WHOLE_SIZE,
241         };
242         if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
243             continue;
244         inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
245                                      (++inval_count)*sizeof(*inval_list));
246         if (!inval_list)
247             return AVERROR(ENOMEM);
248         inval_list[inval_count - 1] = ival_buf;
249     }
250 
251     if (inval_count) {
252         ret = vkInvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
253                                              inval_list);
254         if (ret != VK_SUCCESS) {
255             av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
256                    ff_vk_ret2str(ret));
257             return AVERROR_EXTERNAL;
258         }
259     }
260 
261     return 0;
262 }
263 
ff_vk_unmap_buffers(AVFilterContext * avctx,FFVkBuffer * buf,int nb_buffers,int flush)264 int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
265                         int flush)
266 {
267     int err = 0;
268     VkResult ret;
269     VulkanFilterContext *s = avctx->priv;
270     VkMappedMemoryRange *flush_list = NULL;
271     int flush_count = 0;
272 
273     if (flush) {
274         for (int i = 0; i < nb_buffers; i++) {
275             const VkMappedMemoryRange flush_buf = {
276                 .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
277                 .memory = buf[i].mem,
278                 .size   = VK_WHOLE_SIZE,
279             };
280             if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
281                 continue;
282             flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
283                                          (++flush_count)*sizeof(*flush_list));
284             if (!flush_list)
285                 return AVERROR(ENOMEM);
286             flush_list[flush_count - 1] = flush_buf;
287         }
288     }
289 
290     if (flush_count) {
291         ret = vkFlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
292                                         flush_list);
293         if (ret != VK_SUCCESS) {
294             av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
295                    ff_vk_ret2str(ret));
296             err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
297         }
298     }
299 
300     for (int i = 0; i < nb_buffers; i++)
301         vkUnmapMemory(s->hwctx->act_dev, buf[i].mem);
302 
303     return err;
304 }
305 
ff_vk_free_buf(AVFilterContext * avctx,FFVkBuffer * buf)306 void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
307 {
308     VulkanFilterContext *s = avctx->priv;
309     if (!buf)
310         return;
311 
312     if (buf->buf != VK_NULL_HANDLE)
313         vkDestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
314     if (buf->mem != VK_NULL_HANDLE)
315         vkFreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
316 }
317 
ff_vk_add_push_constant(AVFilterContext * avctx,VulkanPipeline * pl,int offset,int size,VkShaderStageFlagBits stage)318 int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
319                             int offset, int size, VkShaderStageFlagBits stage)
320 {
321     VkPushConstantRange *pc;
322 
323     pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
324                                        pl->push_consts_num + 1);
325     if (!pl->push_consts)
326         return AVERROR(ENOMEM);
327 
328     pc = &pl->push_consts[pl->push_consts_num++];
329     memset(pc, 0, sizeof(*pc));
330 
331     pc->stageFlags = stage;
332     pc->offset = offset;
333     pc->size = size;
334 
335     return 0;
336 }
337 
FN_CREATING(VulkanFilterContext,FFVkExecContext,exec_ctx,exec_ctx,exec_ctx_num)338 FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
339 int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
340 {
341     VkResult ret;
342     FFVkExecContext *e;
343     VulkanFilterContext *s = avctx->priv;
344 
345     int queue_family = s->queue_family_idx;
346     int nb_queues = s->queue_count;
347 
348     VkCommandPoolCreateInfo cqueue_create = {
349         .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
350         .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
351         .queueFamilyIndex   = queue_family,
352     };
353     VkCommandBufferAllocateInfo cbuf_create = {
354         .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
355         .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
356         .commandBufferCount = nb_queues,
357     };
358 
359     e = create_exec_ctx(s);
360     if (!e)
361         return AVERROR(ENOMEM);
362 
363     e->queues = av_mallocz(nb_queues * sizeof(*e->queues));
364     if (!e->queues)
365         return AVERROR(ENOMEM);
366 
367     e->bufs = av_mallocz(nb_queues * sizeof(*e->bufs));
368     if (!e->bufs)
369         return AVERROR(ENOMEM);
370 
371     /* Create command pool */
372     ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create,
373                               s->hwctx->alloc, &e->pool);
374     if (ret != VK_SUCCESS) {
375         av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
376                ff_vk_ret2str(ret));
377         return AVERROR_EXTERNAL;
378     }
379 
380     cbuf_create.commandPool = e->pool;
381 
382     /* Allocate command buffer */
383     ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
384     if (ret != VK_SUCCESS) {
385         av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
386                ff_vk_ret2str(ret));
387         return AVERROR_EXTERNAL;
388     }
389 
390     for (int i = 0; i < nb_queues; i++) {
391         FFVkQueueCtx *q = &e->queues[i];
392         vkGetDeviceQueue(s->hwctx->act_dev, queue_family, i, &q->queue);
393     }
394 
395     *ctx = e;
396 
397     return 0;
398 }
399 
ff_vk_discard_exec_deps(AVFilterContext * avctx,FFVkExecContext * e)400 void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e)
401 {
402     VulkanFilterContext *s = avctx->priv;
403     FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
404 
405     for (int j = 0; j < q->nb_buf_deps; j++)
406         av_buffer_unref(&q->buf_deps[j]);
407     q->nb_buf_deps = 0;
408 
409     for (int j = 0; j < q->nb_frame_deps; j++)
410         av_frame_free(&q->frame_deps[j]);
411     q->nb_frame_deps = 0;
412 
413     e->sem_wait_cnt = 0;
414     e->sem_sig_cnt = 0;
415 }
416 
ff_vk_start_exec_recording(AVFilterContext * avctx,FFVkExecContext * e)417 int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
418 {
419     VkResult ret;
420     VulkanFilterContext *s = avctx->priv;
421     FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
422 
423     VkCommandBufferBeginInfo cmd_start = {
424         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
425         .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
426     };
427 
428     /* Create the fence and don't wait for it initially */
429     if (!q->fence) {
430         VkFenceCreateInfo fence_spawn = {
431             .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
432         };
433         ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
434                             &q->fence);
435         if (ret != VK_SUCCESS) {
436             av_log(avctx, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
437                    ff_vk_ret2str(ret));
438             return AVERROR_EXTERNAL;
439         }
440     } else {
441         vkWaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
442         vkResetFences(s->hwctx->act_dev, 1, &q->fence);
443     }
444 
445     /* Discard queue dependencies */
446     ff_vk_discard_exec_deps(avctx, e);
447 
448     ret = vkBeginCommandBuffer(e->bufs[s->cur_queue_idx], &cmd_start);
449     if (ret != VK_SUCCESS) {
450         av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
451                ff_vk_ret2str(ret));
452         return AVERROR_EXTERNAL;
453     }
454 
455     return 0;
456 }
457 
ff_vk_get_exec_buf(AVFilterContext * avctx,FFVkExecContext * e)458 VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e)
459 {
460     VulkanFilterContext *s = avctx->priv;
461     return e->bufs[s->cur_queue_idx];
462 }
463 
ff_vk_add_exec_dep(AVFilterContext * avctx,FFVkExecContext * e,AVFrame * frame,VkPipelineStageFlagBits in_wait_dst_flag)464 int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
465                        AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag)
466 {
467     AVFrame **dst;
468     VulkanFilterContext *s = avctx->priv;
469     AVVkFrame *f = (AVVkFrame *)frame->data[0];
470     FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
471     AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
472     int planes = av_pix_fmt_count_planes(fc->sw_format);
473 
474     for (int i = 0; i < planes; i++) {
475         e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
476                                       (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
477         if (!e->sem_wait) {
478             ff_vk_discard_exec_deps(avctx, e);
479             return AVERROR(ENOMEM);
480         }
481 
482         e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
483                                           (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
484         if (!e->sem_wait_dst) {
485             ff_vk_discard_exec_deps(avctx, e);
486             return AVERROR(ENOMEM);
487         }
488 
489         e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
490                                      (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
491         if (!e->sem_sig) {
492             ff_vk_discard_exec_deps(avctx, e);
493             return AVERROR(ENOMEM);
494         }
495 
496         e->sem_wait[e->sem_wait_cnt] = f->sem[i];
497         e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
498         e->sem_wait_cnt++;
499 
500         e->sem_sig[e->sem_sig_cnt] = f->sem[i];
501         e->sem_sig_cnt++;
502     }
503 
504     dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
505                           (q->nb_frame_deps + 1) * sizeof(*dst));
506     if (!dst) {
507         ff_vk_discard_exec_deps(avctx, e);
508         return AVERROR(ENOMEM);
509     }
510 
511     q->frame_deps = dst;
512     q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
513     if (!q->frame_deps[q->nb_frame_deps]) {
514         ff_vk_discard_exec_deps(avctx, e);
515         return AVERROR(ENOMEM);
516     }
517     q->nb_frame_deps++;
518 
519     return 0;
520 }
521 
ff_vk_submit_exec_queue(AVFilterContext * avctx,FFVkExecContext * e)522 int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
523 {
524     VkResult ret;
525     VulkanFilterContext *s = avctx->priv;
526     FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
527 
528     VkSubmitInfo s_info = {
529         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
530         .commandBufferCount   = 1,
531         .pCommandBuffers      = &e->bufs[s->cur_queue_idx],
532 
533         .pWaitSemaphores      = e->sem_wait,
534         .pWaitDstStageMask    = e->sem_wait_dst,
535         .waitSemaphoreCount   = e->sem_wait_cnt,
536 
537         .pSignalSemaphores    = e->sem_sig,
538         .signalSemaphoreCount = e->sem_sig_cnt,
539     };
540 
541     ret = vkEndCommandBuffer(e->bufs[s->cur_queue_idx]);
542     if (ret != VK_SUCCESS) {
543         av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
544                ff_vk_ret2str(ret));
545         return AVERROR_EXTERNAL;
546     }
547 
548     ret = vkQueueSubmit(q->queue, 1, &s_info, q->fence);
549     if (ret != VK_SUCCESS) {
550         av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
551                ff_vk_ret2str(ret));
552         return AVERROR_EXTERNAL;
553     }
554 
555     /* Rotate queues */
556     s->cur_queue_idx = (s->cur_queue_idx + 1) % s->queue_count;
557 
558     return 0;
559 }
560 
ff_vk_add_dep_exec_ctx(AVFilterContext * avctx,FFVkExecContext * e,AVBufferRef ** deps,int nb_deps)561 int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e,
562                            AVBufferRef **deps, int nb_deps)
563 {
564     AVBufferRef **dst;
565     VulkanFilterContext *s = avctx->priv;
566     FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
567 
568     if (!deps || !nb_deps)
569         return 0;
570 
571     dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
572                           (q->nb_buf_deps + nb_deps) * sizeof(*dst));
573     if (!dst)
574         goto err;
575 
576     q->buf_deps = dst;
577 
578     for (int i = 0; i < nb_deps; i++) {
579         q->buf_deps[q->nb_buf_deps] = deps[i];
580         if (!q->buf_deps[q->nb_buf_deps])
581             goto err;
582         q->nb_buf_deps++;
583     }
584 
585     return 0;
586 
587 err:
588     ff_vk_discard_exec_deps(avctx, e);
589     return AVERROR(ENOMEM);
590 }
591 
ff_vk_filter_query_formats(AVFilterContext * avctx)592 int ff_vk_filter_query_formats(AVFilterContext *avctx)
593 {
594     static const enum AVPixelFormat pixel_formats[] = {
595         AV_PIX_FMT_VULKAN, AV_PIX_FMT_NONE,
596     };
597     AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
598     if (!pix_fmts)
599         return AVERROR(ENOMEM);
600 
601     return ff_set_common_formats(avctx, pix_fmts);
602 }
603 
vulkan_filter_set_device(AVFilterContext * avctx,AVBufferRef * device)604 static int vulkan_filter_set_device(AVFilterContext *avctx,
605                                     AVBufferRef *device)
606 {
607     VulkanFilterContext *s = avctx->priv;
608 
609     av_buffer_unref(&s->device_ref);
610 
611     s->device_ref = av_buffer_ref(device);
612     if (!s->device_ref)
613         return AVERROR(ENOMEM);
614 
615     s->device = (AVHWDeviceContext*)s->device_ref->data;
616     s->hwctx  = s->device->hwctx;
617 
618     return 0;
619 }
620 
vulkan_filter_set_frames(AVFilterContext * avctx,AVBufferRef * frames)621 static int vulkan_filter_set_frames(AVFilterContext *avctx,
622                                     AVBufferRef *frames)
623 {
624     VulkanFilterContext *s = avctx->priv;
625 
626     av_buffer_unref(&s->frames_ref);
627 
628     s->frames_ref = av_buffer_ref(frames);
629     if (!s->frames_ref)
630         return AVERROR(ENOMEM);
631 
632     return 0;
633 }
634 
ff_vk_filter_config_input(AVFilterLink * inlink)635 int ff_vk_filter_config_input(AVFilterLink *inlink)
636 {
637     int err;
638     AVFilterContext *avctx = inlink->dst;
639     VulkanFilterContext *s = avctx->priv;
640     AVHWFramesContext *input_frames;
641 
642     if (!inlink->hw_frames_ctx) {
643         av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
644                "hardware frames context on the input.\n");
645         return AVERROR(EINVAL);
646     }
647 
648     /* Extract the device and default output format from the first input. */
649     if (avctx->inputs[0] != inlink)
650         return 0;
651 
652     input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
653     if (input_frames->format != AV_PIX_FMT_VULKAN)
654         return AVERROR(EINVAL);
655 
656     err = vulkan_filter_set_device(avctx, input_frames->device_ref);
657     if (err < 0)
658         return err;
659     err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx);
660     if (err < 0)
661         return err;
662 
663     /* Default output parameters match input parameters. */
664     s->input_format = input_frames->sw_format;
665     if (s->output_format == AV_PIX_FMT_NONE)
666         s->output_format = input_frames->sw_format;
667     if (!s->output_width)
668         s->output_width  = inlink->w;
669     if (!s->output_height)
670         s->output_height = inlink->h;
671 
672     return 0;
673 }
674 
ff_vk_filter_config_output_inplace(AVFilterLink * outlink)675 int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
676 {
677     int err;
678     AVFilterContext *avctx = outlink->src;
679     VulkanFilterContext *s = avctx->priv;
680 
681     av_buffer_unref(&outlink->hw_frames_ctx);
682 
683     if (!s->device_ref) {
684         if (!avctx->hw_device_ctx) {
685             av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
686                    "Vulkan device.\n");
687             return AVERROR(EINVAL);
688         }
689 
690         err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
691         if (err < 0)
692             return err;
693     }
694 
695     outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref);
696     if (!outlink->hw_frames_ctx)
697         return AVERROR(ENOMEM);
698 
699     outlink->w = s->output_width;
700     outlink->h = s->output_height;
701 
702     return 0;
703 }
704 
ff_vk_filter_config_output(AVFilterLink * outlink)705 int ff_vk_filter_config_output(AVFilterLink *outlink)
706 {
707     int err;
708     AVFilterContext *avctx = outlink->src;
709     VulkanFilterContext *s = avctx->priv;
710     AVBufferRef *output_frames_ref;
711     AVHWFramesContext *output_frames;
712 
713     av_buffer_unref(&outlink->hw_frames_ctx);
714 
715     if (!s->device_ref) {
716         if (!avctx->hw_device_ctx) {
717             av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
718                    "Vulkan device.\n");
719             return AVERROR(EINVAL);
720         }
721 
722         err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
723         if (err < 0)
724             return err;
725     }
726 
727     output_frames_ref = av_hwframe_ctx_alloc(s->device_ref);
728     if (!output_frames_ref) {
729         err = AVERROR(ENOMEM);
730         goto fail;
731     }
732     output_frames = (AVHWFramesContext*)output_frames_ref->data;
733 
734     output_frames->format    = AV_PIX_FMT_VULKAN;
735     output_frames->sw_format = s->output_format;
736     output_frames->width     = s->output_width;
737     output_frames->height    = s->output_height;
738 
739     err = av_hwframe_ctx_init(output_frames_ref);
740     if (err < 0) {
741         av_log(avctx, AV_LOG_ERROR, "Failed to initialise output "
742                "frames: %d.\n", err);
743         goto fail;
744     }
745 
746     outlink->hw_frames_ctx = output_frames_ref;
747     outlink->w = s->output_width;
748     outlink->h = s->output_height;
749 
750     return 0;
751 fail:
752     av_buffer_unref(&output_frames_ref);
753     return err;
754 }
755 
ff_vk_filter_init(AVFilterContext * avctx)756 int ff_vk_filter_init(AVFilterContext *avctx)
757 {
758     VulkanFilterContext *s = avctx->priv;
759 
760     s->output_format = AV_PIX_FMT_NONE;
761 
762     if (glslang_init())
763         return AVERROR_EXTERNAL;
764 
765     return 0;
766 }
767 
FN_CREATING(VulkanFilterContext,VkSampler,sampler,samplers,samplers_num)768 FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num)
769 VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
770                               VkFilter filt)
771 {
772     VkResult ret;
773     VulkanFilterContext *s = avctx->priv;
774 
775     VkSamplerCreateInfo sampler_info = {
776         .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
777         .magFilter = filt,
778         .minFilter = sampler_info.magFilter,
779         .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
780                                       VK_SAMPLER_MIPMAP_MODE_LINEAR,
781         .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
782         .addressModeV = sampler_info.addressModeU,
783         .addressModeW = sampler_info.addressModeU,
784         .anisotropyEnable = VK_FALSE,
785         .compareOp = VK_COMPARE_OP_NEVER,
786         .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
787         .unnormalizedCoordinates = unnorm_coords,
788     };
789 
790     VkSampler *sampler = create_sampler(s);
791     if (!sampler)
792         return NULL;
793 
794     ret = vkCreateSampler(s->hwctx->act_dev, &sampler_info,
795                           s->hwctx->alloc, sampler);
796     if (ret != VK_SUCCESS) {
797         av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n",
798                ff_vk_ret2str(ret));
799         return NULL;
800     }
801 
802     return sampler;
803 }
804 
ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)805 int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
806 {
807     if (pix_fmt == AV_PIX_FMT_ABGR   || pix_fmt == AV_PIX_FMT_BGRA   ||
808         pix_fmt == AV_PIX_FMT_RGBA   || pix_fmt == AV_PIX_FMT_RGB24  ||
809         pix_fmt == AV_PIX_FMT_BGR24  || pix_fmt == AV_PIX_FMT_RGB48  ||
810         pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
811         pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0   ||
812         pix_fmt == AV_PIX_FMT_0BGR   || pix_fmt == AV_PIX_FMT_RGB0)
813         return 1;
814     return 0;
815 }
816 
ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)817 const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
818 {
819     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
820     const int high = desc->comp[0].depth > 8;
821     return high ? "rgba16f" : "rgba8";
822 }
823 
824 typedef struct ImageViewCtx {
825     VkImageView view;
826 } ImageViewCtx;
827 
destroy_imageview(void * opaque,uint8_t * data)828 static void destroy_imageview(void *opaque, uint8_t *data)
829 {
830     VulkanFilterContext *s = opaque;
831     ImageViewCtx *iv = (ImageViewCtx *)data;
832     vkDestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
833     av_free(iv);
834 }
835 
ff_vk_create_imageview(AVFilterContext * avctx,FFVkExecContext * e,VkImageView * v,VkImage img,VkFormat fmt,const VkComponentMapping map)836 int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
837                            VkImageView *v, VkImage img, VkFormat fmt,
838                            const VkComponentMapping map)
839 {
840     int err;
841     AVBufferRef *buf;
842     VulkanFilterContext *s = avctx->priv;
843     VkImageViewCreateInfo imgview_spawn = {
844         .sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
845         .pNext      = NULL,
846         .image      = img,
847         .viewType   = VK_IMAGE_VIEW_TYPE_2D,
848         .format     = fmt,
849         .components = map,
850         .subresourceRange = {
851             .aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
852             .baseMipLevel   = 0,
853             .levelCount     = 1,
854             .baseArrayLayer = 0,
855             .layerCount     = 1,
856         },
857     };
858 
859     ImageViewCtx *iv = av_mallocz(sizeof(*iv));
860 
861     VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn,
862                                      s->hwctx->alloc, &iv->view);
863     if (ret != VK_SUCCESS) {
864         av_log(avctx, AV_LOG_ERROR, "Failed to create imageview: %s\n",
865                ff_vk_ret2str(ret));
866         return AVERROR_EXTERNAL;
867     }
868 
869     buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
870     if (!buf) {
871         destroy_imageview(s, (uint8_t *)iv);
872         return AVERROR(ENOMEM);
873     }
874 
875     /* Add to queue dependencies */
876     err = ff_vk_add_dep_exec_ctx(avctx, e, &buf, 1);
877     if (err) {
878         av_buffer_unref(&buf);
879         return err;
880     }
881 
882     *v = iv->view;
883 
884     return 0;
885 }
886 
FN_CREATING(VulkanPipeline,SPIRVShader,shader,shaders,shaders_num)887 FN_CREATING(VulkanPipeline, SPIRVShader, shader, shaders, shaders_num)
888 SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl,
889                                const char *name, VkShaderStageFlags stage)
890 {
891     SPIRVShader *shd = create_shader(pl);
892     if (!shd)
893         return NULL;
894 
895     av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
896 
897     shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
898     shd->shader.stage = stage;
899 
900     shd->name = name;
901 
902     GLSLF(0, #version %i                                                  ,460);
903     GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y))       );
904     GLSLC(0,                                                                  );
905 
906     return shd;
907 }
908 
ff_vk_set_compute_shader_sizes(AVFilterContext * avctx,SPIRVShader * shd,int local_size[3])909 void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
910                                         int local_size[3])
911 {
912     shd->local_size[0] = local_size[0];
913     shd->local_size[1] = local_size[1];
914     shd->local_size[2] = local_size[2];
915 
916     av_bprintf(&shd->src, "layout (local_size_x = %i, "
917                "local_size_y = %i, local_size_z = %i) in;\n\n",
918                shd->local_size[0], shd->local_size[1], shd->local_size[2]);
919 }
920 
print_shader(AVFilterContext * avctx,SPIRVShader * shd,int prio)921 static void print_shader(AVFilterContext *avctx, SPIRVShader *shd, int prio)
922 {
923     int line = 0;
924     const char *p = shd->src.str;
925     const char *start = p;
926 
927     AVBPrint buf;
928     av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
929 
930     for (int i = 0; i < strlen(p); i++) {
931         if (p[i] == '\n') {
932             av_bprintf(&buf, "%i\t", ++line);
933             av_bprint_append_data(&buf, start, &p[i] - start + 1);
934             start = &p[i + 1];
935         }
936     }
937 
938     av_log(avctx, prio, "Shader %s: \n%s", shd->name, buf.str);
939     av_bprint_finalize(&buf, NULL);
940 }
941 
ff_vk_compile_shader(AVFilterContext * avctx,SPIRVShader * shd,const char * entrypoint)942 int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
943                          const char *entrypoint)
944 {
945     VkResult ret;
946     VulkanFilterContext *s = avctx->priv;
947     VkShaderModuleCreateInfo shader_create;
948     GLSlangResult *res;
949 
950     static const enum GLSlangStage emap[] = {
951         [VK_SHADER_STAGE_VERTEX_BIT]   = GLSLANG_VERTEX,
952         [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_FRAGMENT,
953         [VK_SHADER_STAGE_COMPUTE_BIT]  = GLSLANG_COMPUTE,
954     };
955 
956     shd->shader.pName = entrypoint;
957 
958     res = glslang_compile(shd->src.str, emap[shd->shader.stage]);
959     if (!res)
960         return AVERROR(ENOMEM);
961 
962     if (res->rval) {
963         av_log(avctx, AV_LOG_ERROR, "Error compiling shader %s: %s!\n",
964                shd->name, av_err2str(res->rval));
965         print_shader(avctx, shd, AV_LOG_ERROR);
966         if (res->error_msg)
967             av_log(avctx, AV_LOG_ERROR, "%s", res->error_msg);
968         av_free(res->error_msg);
969         return res->rval;
970     }
971 
972     print_shader(avctx, shd, AV_LOG_VERBOSE);
973 
974     shader_create.sType    = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
975     shader_create.pNext    = NULL;
976     shader_create.codeSize = res->size;
977     shader_create.flags    = 0;
978     shader_create.pCode    = res->data;
979 
980     ret = vkCreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
981                                &shd->shader.module);
982 
983     /* Free the GLSlangResult struct */
984     av_free(res->data);
985     av_free(res);
986 
987     if (ret != VK_SUCCESS) {
988         av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n",
989                ff_vk_ret2str(ret));
990         return AVERROR_EXTERNAL;
991     }
992 
993     av_log(avctx, AV_LOG_VERBOSE, "Shader %s linked! Size: %zu bytes\n",
994            shd->name, shader_create.codeSize);
995 
996     return 0;
997 }
998 
999 static const struct descriptor_props {
1000     size_t struct_size; /* Size of the opaque which updates the descriptor */
1001     const char *type;
1002     int is_uniform;
1003     int mem_quali;      /* Can use a memory qualifier */
1004     int dim_needed;     /* Must indicate dimension */
1005     int buf_content;    /* Must indicate buffer contents */
1006 } descriptor_props[] = {
1007     [VK_DESCRIPTOR_TYPE_SAMPLER]                = { sizeof(VkDescriptorImageInfo),  "sampler",       1, 0, 0, 0, },
1008     [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE]          = { sizeof(VkDescriptorImageInfo),  "texture",       1, 0, 1, 0, },
1009     [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE]          = { sizeof(VkDescriptorImageInfo),  "image",         1, 1, 1, 0, },
1010     [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT]       = { sizeof(VkDescriptorImageInfo),  "subpassInput",  1, 0, 0, 0, },
1011     [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo),  "sampler",       1, 0, 1, 0, },
1012     [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER]         = { sizeof(VkDescriptorBufferInfo),  NULL,           1, 0, 0, 1, },
1013     [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER]         = { sizeof(VkDescriptorBufferInfo), "buffer",        0, 1, 0, 1, },
1014     [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo),  NULL,           1, 0, 0, 1, },
1015     [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer",        0, 1, 0, 1, },
1016     [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER]   = { sizeof(VkBufferView),           "samplerBuffer", 1, 0, 0, 0, },
1017     [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER]   = { sizeof(VkBufferView),           "imageBuffer",   1, 0, 0, 0, },
1018 };
1019 
ff_vk_add_descriptor_set(AVFilterContext * avctx,VulkanPipeline * pl,SPIRVShader * shd,VulkanDescriptorSetBinding * desc,int num,int only_print_to_shader)1020 int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
1021                              SPIRVShader *shd, VulkanDescriptorSetBinding *desc,
1022                              int num, int only_print_to_shader)
1023 {
1024     VkResult ret;
1025     VkDescriptorSetLayout *layout;
1026     VulkanFilterContext *s = avctx->priv;
1027 
1028     if (only_print_to_shader)
1029         goto print;
1030 
1031     pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
1032                                        pl->desc_layout_num + 1);
1033     if (!pl->desc_layout)
1034         return AVERROR(ENOMEM);
1035 
1036     layout = &pl->desc_layout[pl->desc_layout_num];
1037     memset(layout, 0, sizeof(*layout));
1038 
1039     { /* Create descriptor set layout descriptions */
1040         VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
1041         VkDescriptorSetLayoutBinding *desc_binding;
1042 
1043         desc_binding = av_mallocz(sizeof(*desc_binding)*num);
1044         if (!desc_binding)
1045             return AVERROR(ENOMEM);
1046 
1047         for (int i = 0; i < num; i++) {
1048             desc_binding[i].binding            = i;
1049             desc_binding[i].descriptorType     = desc[i].type;
1050             desc_binding[i].descriptorCount    = FFMAX(desc[i].elems, 1);
1051             desc_binding[i].stageFlags         = desc[i].stages;
1052             desc_binding[i].pImmutableSamplers = desc[i].samplers;
1053         }
1054 
1055         desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
1056         desc_create_layout.pBindings = desc_binding;
1057         desc_create_layout.bindingCount = num;
1058 
1059         ret = vkCreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
1060                                           s->hwctx->alloc, layout);
1061         av_free(desc_binding);
1062         if (ret != VK_SUCCESS) {
1063             av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
1064                    "layout: %s\n", ff_vk_ret2str(ret));
1065             return AVERROR_EXTERNAL;
1066         }
1067     }
1068 
1069     { /* Pool each descriptor by type and update pool counts */
1070         for (int i = 0; i < num; i++) {
1071             int j;
1072             for (j = 0; j < pl->pool_size_desc_num; j++)
1073                 if (pl->pool_size_desc[j].type == desc[i].type)
1074                     break;
1075             if (j >= pl->pool_size_desc_num) {
1076                 pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
1077                                                       sizeof(*pl->pool_size_desc),
1078                                                       ++pl->pool_size_desc_num);
1079                 if (!pl->pool_size_desc)
1080                     return AVERROR(ENOMEM);
1081                 memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
1082             }
1083             pl->pool_size_desc[j].type             = desc[i].type;
1084             pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1);
1085         }
1086     }
1087 
1088     { /* Create template creation struct */
1089         VkDescriptorUpdateTemplateCreateInfo *dt;
1090         VkDescriptorUpdateTemplateEntry *des_entries;
1091 
1092         /* Freed after descriptor set initialization */
1093         des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
1094         if (!des_entries)
1095             return AVERROR(ENOMEM);
1096 
1097         for (int i = 0; i < num; i++) {
1098             des_entries[i].dstBinding      = i;
1099             des_entries[i].descriptorType  = desc[i].type;
1100             des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
1101             des_entries[i].dstArrayElement = 0;
1102             des_entries[i].offset          = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
1103             des_entries[i].stride          = descriptor_props[desc[i].type].struct_size;
1104         }
1105 
1106         pl->desc_template_info = av_realloc_array(pl->desc_template_info,
1107                                                   sizeof(*pl->desc_template_info),
1108                                                   pl->desc_layout_num + 1);
1109         if (!pl->desc_template_info)
1110             return AVERROR(ENOMEM);
1111 
1112         dt = &pl->desc_template_info[pl->desc_layout_num];
1113         memset(dt, 0, sizeof(*dt));
1114 
1115         dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
1116         dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
1117         dt->descriptorSetLayout = *layout;
1118         dt->pDescriptorUpdateEntries = des_entries;
1119         dt->descriptorUpdateEntryCount = num;
1120     }
1121 
1122     pl->desc_layout_num++;
1123 
1124 print:
1125     /* Write shader info */
1126     for (int i = 0; i < num; i++) {
1127         const struct descriptor_props *prop = &descriptor_props[desc[i].type];
1128         GLSLA("layout (set = %i, binding = %i", pl->desc_layout_num - 1, i);
1129 
1130         if (desc[i].mem_layout)
1131             GLSLA(", %s", desc[i].mem_layout);
1132         GLSLA(")");
1133 
1134         if (prop->is_uniform)
1135             GLSLA(" uniform");
1136 
1137         if (prop->mem_quali && desc[i].mem_quali)
1138             GLSLA(" %s", desc[i].mem_quali);
1139 
1140         if (prop->type)
1141             GLSLA(" %s", prop->type);
1142 
1143         if (prop->dim_needed)
1144             GLSLA("%iD", desc[i].dimensions);
1145 
1146         GLSLA(" %s", desc[i].name);
1147 
1148         if (prop->buf_content)
1149             GLSLA(" {\n    %s\n}", desc[i].buf_content);
1150         else if (desc[i].elems > 0)
1151             GLSLA("[%i]", desc[i].elems);
1152 
1153         GLSLA(";\n");
1154     }
1155     GLSLA("\n");
1156 
1157     return 0;
1158 }
1159 
ff_vk_update_descriptor_set(AVFilterContext * avctx,VulkanPipeline * pl,int set_id)1160 void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
1161                                  int set_id)
1162 {
1163     VulkanFilterContext *s = avctx->priv;
1164 
1165     vkUpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
1166                                       pl->desc_set[s->cur_queue_idx * pl->desc_layout_num + set_id],
1167                                       pl->desc_template[set_id],
1168                                       s);
1169 }
1170 
ff_vk_update_push_exec(AVFilterContext * avctx,FFVkExecContext * e,VkShaderStageFlagBits stage,int offset,size_t size,void * src)1171 void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e,
1172                             VkShaderStageFlagBits stage, int offset,
1173                             size_t size, void *src)
1174 {
1175     VulkanFilterContext *s = avctx->priv;
1176     vkCmdPushConstants(e->bufs[s->cur_queue_idx], e->bound_pl->pipeline_layout,
1177                        stage, offset, size, src);
1178 }
1179 
ff_vk_init_pipeline_layout(AVFilterContext * avctx,VulkanPipeline * pl)1180 int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
1181 {
1182     VkResult ret;
1183     VulkanFilterContext *s = avctx->priv;
1184 
1185     pl->descriptor_sets_num = pl->desc_layout_num * s->queue_count;
1186 
1187     { /* Init descriptor set pool */
1188         VkDescriptorPoolCreateInfo pool_create_info = {
1189             .sType         = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1190             .poolSizeCount = pl->pool_size_desc_num,
1191             .pPoolSizes    = pl->pool_size_desc,
1192             .maxSets       = pl->descriptor_sets_num,
1193         };
1194 
1195         ret = vkCreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
1196                                      s->hwctx->alloc, &pl->desc_pool);
1197         av_freep(&pl->pool_size_desc);
1198         if (ret != VK_SUCCESS) {
1199             av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
1200                    "pool: %s\n", ff_vk_ret2str(ret));
1201             return AVERROR_EXTERNAL;
1202         }
1203     }
1204 
1205     { /* Allocate descriptor sets */
1206         VkDescriptorSetAllocateInfo alloc_info = {
1207             .sType              = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1208             .descriptorPool     = pl->desc_pool,
1209             .descriptorSetCount = pl->descriptor_sets_num,
1210             .pSetLayouts        = pl->desc_layout,
1211         };
1212 
1213         pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set));
1214         if (!pl->desc_set)
1215             return AVERROR(ENOMEM);
1216 
1217         ret = vkAllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
1218                                        pl->desc_set);
1219         if (ret != VK_SUCCESS) {
1220             av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
1221                    ff_vk_ret2str(ret));
1222             return AVERROR_EXTERNAL;
1223         }
1224     }
1225 
1226     { /* Finally create the pipeline layout */
1227         VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
1228             .sType                  = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1229             .setLayoutCount         = pl->desc_layout_num,
1230             .pSetLayouts            = pl->desc_layout,
1231             .pushConstantRangeCount = pl->push_consts_num,
1232             .pPushConstantRanges    = pl->push_consts,
1233         };
1234 
1235         ret = vkCreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
1236                                      s->hwctx->alloc, &pl->pipeline_layout);
1237         av_freep(&pl->push_consts);
1238         pl->push_consts_num = 0;
1239         if (ret != VK_SUCCESS) {
1240             av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
1241                    ff_vk_ret2str(ret));
1242             return AVERROR_EXTERNAL;
1243         }
1244     }
1245 
1246     { /* Descriptor template (for tightly packed descriptors) */
1247         VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
1248 
1249         pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template));
1250         if (!pl->desc_template)
1251             return AVERROR(ENOMEM);
1252 
1253         /* Create update templates for the descriptor sets */
1254         for (int i = 0; i < pl->descriptor_sets_num; i++) {
1255             desc_template_info = &pl->desc_template_info[i % pl->desc_layout_num];
1256             desc_template_info->pipelineLayout = pl->pipeline_layout;
1257             ret = vkCreateDescriptorUpdateTemplate(s->hwctx->act_dev,
1258                                                    desc_template_info,
1259                                                    s->hwctx->alloc,
1260                                                    &pl->desc_template[i]);
1261             av_free((void *)desc_template_info->pDescriptorUpdateEntries);
1262             if (ret != VK_SUCCESS) {
1263                 av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor "
1264                        "template: %s\n", ff_vk_ret2str(ret));
1265                 return AVERROR_EXTERNAL;
1266             }
1267         }
1268 
1269         av_freep(&pl->desc_template_info);
1270     }
1271 
1272     return 0;
1273 }
1274 
FN_CREATING(VulkanFilterContext,VulkanPipeline,pipeline,pipelines,pipelines_num)1275 FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num)
1276 VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx)
1277 {
1278     return create_pipeline(avctx->priv);
1279 }
1280 
ff_vk_init_compute_pipeline(AVFilterContext * avctx,VulkanPipeline * pl)1281 int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl)
1282 {
1283     int i;
1284     VkResult ret;
1285     VulkanFilterContext *s = avctx->priv;
1286 
1287     VkComputePipelineCreateInfo pipe = {
1288         .sType  = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1289         .layout = pl->pipeline_layout,
1290     };
1291 
1292     for (i = 0; i < pl->shaders_num; i++) {
1293         if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
1294             pipe.stage = pl->shaders[i]->shader;
1295             break;
1296         }
1297     }
1298     if (i == pl->shaders_num) {
1299         av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
1300         return AVERROR(EINVAL);
1301     }
1302 
1303     ret = vkCreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
1304                                    s->hwctx->alloc, &pl->pipeline);
1305     if (ret != VK_SUCCESS) {
1306         av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
1307                ff_vk_ret2str(ret));
1308         return AVERROR_EXTERNAL;
1309     }
1310 
1311     pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1312 
1313     return 0;
1314 }
1315 
ff_vk_bind_pipeline_exec(AVFilterContext * avctx,FFVkExecContext * e,VulkanPipeline * pl)1316 void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
1317                               VulkanPipeline *pl)
1318 {
1319     VulkanFilterContext *s = avctx->priv;
1320 
1321     vkCmdBindPipeline(e->bufs[s->cur_queue_idx], pl->bind_point, pl->pipeline);
1322 
1323     vkCmdBindDescriptorSets(e->bufs[s->cur_queue_idx], pl->bind_point,
1324                             pl->pipeline_layout, 0, pl->descriptor_sets_num,
1325                             pl->desc_set, 0, 0);
1326 
1327     e->bound_pl = pl;
1328 }
1329 
free_exec_ctx(VulkanFilterContext * s,FFVkExecContext * e)1330 static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
1331 {
1332     /* Make sure all queues have finished executing */
1333     for (int i = 0; i < s->queue_count; i++) {
1334         FFVkQueueCtx *q = &e->queues[i];
1335 
1336         if (q->fence) {
1337             vkWaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1338             vkResetFences(s->hwctx->act_dev, 1, &q->fence);
1339         }
1340 
1341         /* Free the fence */
1342         if (q->fence)
1343             vkDestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
1344 
1345         /* Free buffer dependencies */
1346         for (int j = 0; j < q->nb_buf_deps; j++)
1347             av_buffer_unref(&q->buf_deps[j]);
1348         av_free(q->buf_deps);
1349 
1350         /* Free frame dependencies */
1351         for (int j = 0; j < q->nb_frame_deps; j++)
1352             av_frame_free(&q->frame_deps[j]);
1353         av_free(q->frame_deps);
1354     }
1355 
1356     if (e->bufs)
1357         vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, s->queue_count, e->bufs);
1358     if (e->pool)
1359         vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
1360 
1361     av_freep(&e->bufs);
1362     av_freep(&e->queues);
1363     av_freep(&e->sem_sig);
1364     av_freep(&e->sem_wait);
1365     av_freep(&e->sem_wait_dst);
1366     av_free(e);
1367 }
1368 
free_pipeline(VulkanFilterContext * s,VulkanPipeline * pl)1369 static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
1370 {
1371     for (int i = 0; i < pl->shaders_num; i++) {
1372         SPIRVShader *shd = pl->shaders[i];
1373         av_bprint_finalize(&shd->src, NULL);
1374         vkDestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
1375                               s->hwctx->alloc);
1376         av_free(shd);
1377     }
1378 
1379     vkDestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
1380     vkDestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
1381                             s->hwctx->alloc);
1382 
1383     for (int i = 0; i < pl->desc_layout_num; i++) {
1384         if (pl->desc_template && pl->desc_template[i])
1385             vkDestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
1386                                               s->hwctx->alloc);
1387         if (pl->desc_layout && pl->desc_layout[i])
1388             vkDestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
1389                                          s->hwctx->alloc);
1390     }
1391 
1392     /* Also frees the descriptor sets */
1393     if (pl->desc_pool)
1394         vkDestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
1395                                 s->hwctx->alloc);
1396 
1397     av_freep(&pl->desc_set);
1398     av_freep(&pl->shaders);
1399     av_freep(&pl->desc_layout);
1400     av_freep(&pl->desc_template);
1401     av_freep(&pl->push_consts);
1402     pl->push_consts_num = 0;
1403 
1404     /* Only freed in case of failure */
1405     av_freep(&pl->pool_size_desc);
1406     if (pl->desc_template_info) {
1407         for (int i = 0; i < pl->descriptor_sets_num; i++)
1408             av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries);
1409         av_freep(&pl->desc_template_info);
1410     }
1411 
1412     av_free(pl);
1413 }
1414 
ff_vk_filter_uninit(AVFilterContext * avctx)1415 void ff_vk_filter_uninit(AVFilterContext *avctx)
1416 {
1417     VulkanFilterContext *s = avctx->priv;
1418 
1419     glslang_uninit();
1420 
1421     for (int i = 0; i < s->exec_ctx_num; i++)
1422         free_exec_ctx(s, s->exec_ctx[i]);
1423     av_freep(&s->exec_ctx);
1424 
1425     for (int i = 0; i < s->samplers_num; i++) {
1426         vkDestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc);
1427         av_free(s->samplers[i]);
1428     }
1429     av_freep(&s->samplers);
1430 
1431     for (int i = 0; i < s->pipelines_num; i++)
1432         free_pipeline(s, s->pipelines[i]);
1433     av_freep(&s->pipelines);
1434 
1435     av_freep(&s->scratch);
1436     s->scratch_size = 0;
1437 
1438     av_buffer_unref(&s->device_ref);
1439     av_buffer_unref(&s->frames_ref);
1440 }
1441