1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "formats.h"
20 #include "vulkan.h"
21 #include "glslang.h"
22
23 /* Generic macro for creating contexts which need to keep their addresses
24 * if another context is created. */
25 #define FN_CREATING(ctx, type, shortname, array, num) \
26 static av_always_inline type *create_ ##shortname(ctx *dctx) \
27 { \
28 type **array, *sctx = av_mallocz(sizeof(*sctx)); \
29 if (!sctx) \
30 return NULL; \
31 \
32 array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
33 if (!array) { \
34 av_free(sctx); \
35 return NULL; \
36 } \
37 \
38 dctx->array = array; \
39 dctx->array[dctx->num++] = sctx; \
40 \
41 return sctx; \
42 }
43
44 const VkComponentMapping ff_comp_identity_map = {
45 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
46 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
47 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
48 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
49 };
50
51 /* Converts return values to strings */
ff_vk_ret2str(VkResult res)52 const char *ff_vk_ret2str(VkResult res)
53 {
54 #define CASE(VAL) case VAL: return #VAL
55 switch (res) {
56 CASE(VK_SUCCESS);
57 CASE(VK_NOT_READY);
58 CASE(VK_TIMEOUT);
59 CASE(VK_EVENT_SET);
60 CASE(VK_EVENT_RESET);
61 CASE(VK_INCOMPLETE);
62 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
63 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
64 CASE(VK_ERROR_INITIALIZATION_FAILED);
65 CASE(VK_ERROR_DEVICE_LOST);
66 CASE(VK_ERROR_MEMORY_MAP_FAILED);
67 CASE(VK_ERROR_LAYER_NOT_PRESENT);
68 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
69 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
70 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
71 CASE(VK_ERROR_TOO_MANY_OBJECTS);
72 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
73 CASE(VK_ERROR_FRAGMENTED_POOL);
74 CASE(VK_ERROR_SURFACE_LOST_KHR);
75 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
76 CASE(VK_SUBOPTIMAL_KHR);
77 CASE(VK_ERROR_OUT_OF_DATE_KHR);
78 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
79 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
80 CASE(VK_ERROR_INVALID_SHADER_NV);
81 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
82 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
83 CASE(VK_ERROR_NOT_PERMITTED_EXT);
84 default: return "Unknown error";
85 }
86 #undef CASE
87 }
88
vk_alloc_mem(AVFilterContext * avctx,VkMemoryRequirements * req,VkMemoryPropertyFlagBits req_flags,void * alloc_extension,VkMemoryPropertyFlagBits * mem_flags,VkDeviceMemory * mem)89 static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
90 VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
91 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
92 {
93 VkResult ret;
94 int index = -1;
95 VkPhysicalDeviceProperties props;
96 VkPhysicalDeviceMemoryProperties mprops;
97 VulkanFilterContext *s = avctx->priv;
98
99 VkMemoryAllocateInfo alloc_info = {
100 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
101 .pNext = alloc_extension,
102 };
103
104 vkGetPhysicalDeviceProperties(s->hwctx->phys_dev, &props);
105 vkGetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops);
106
107 /* Align if we need to */
108 if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
109 req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment);
110
111 alloc_info.allocationSize = req->size;
112
113 /* The vulkan spec requires memory types to be sorted in the "optimal"
114 * order, so the first matching type we find will be the best/fastest one */
115 for (int i = 0; i < mprops.memoryTypeCount; i++) {
116 /* The memory type must be supported by the requirements (bitfield) */
117 if (!(req->memoryTypeBits & (1 << i)))
118 continue;
119
120 /* The memory type flags must include our properties */
121 if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
122 continue;
123
124 /* Found a suitable memory type */
125 index = i;
126 break;
127 }
128
129 if (index < 0) {
130 av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
131 req_flags);
132 return AVERROR(EINVAL);
133 }
134
135 alloc_info.memoryTypeIndex = index;
136
137 ret = vkAllocateMemory(s->hwctx->act_dev, &alloc_info,
138 s->hwctx->alloc, mem);
139 if (ret != VK_SUCCESS) {
140 av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
141 ff_vk_ret2str(ret));
142 return AVERROR(ENOMEM);
143 }
144
145 *mem_flags |= mprops.memoryTypes[index].propertyFlags;
146
147 return 0;
148 }
149
ff_vk_create_buf(AVFilterContext * avctx,FFVkBuffer * buf,size_t size,VkBufferUsageFlags usage,VkMemoryPropertyFlagBits flags)150 int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
151 VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
152 {
153 int err;
154 VkResult ret;
155 int use_ded_mem;
156 VulkanFilterContext *s = avctx->priv;
157
158 VkBufferCreateInfo buf_spawn = {
159 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
160 .pNext = NULL,
161 .usage = usage,
162 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
163 .size = size, /* Gets FFALIGNED during alloc if host visible
164 but should be ok */
165 };
166
167 VkBufferMemoryRequirementsInfo2 req_desc = {
168 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
169 };
170 VkMemoryDedicatedAllocateInfo ded_alloc = {
171 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
172 .pNext = NULL,
173 };
174 VkMemoryDedicatedRequirements ded_req = {
175 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
176 };
177 VkMemoryRequirements2 req = {
178 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
179 .pNext = &ded_req,
180 };
181
182 ret = vkCreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
183 if (ret != VK_SUCCESS) {
184 av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
185 ff_vk_ret2str(ret));
186 return AVERROR_EXTERNAL;
187 }
188
189 req_desc.buffer = buf->buf;
190
191 vkGetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
192
193 /* In case the implementation prefers/requires dedicated allocation */
194 use_ded_mem = ded_req.prefersDedicatedAllocation |
195 ded_req.requiresDedicatedAllocation;
196 if (use_ded_mem)
197 ded_alloc.buffer = buf->buf;
198
199 err = vk_alloc_mem(avctx, &req.memoryRequirements, flags,
200 use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
201 &buf->flags, &buf->mem);
202 if (err)
203 return err;
204
205 ret = vkBindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
206 if (ret != VK_SUCCESS) {
207 av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
208 ff_vk_ret2str(ret));
209 return AVERROR_EXTERNAL;
210 }
211
212 return 0;
213 }
214
ff_vk_map_buffers(AVFilterContext * avctx,FFVkBuffer * buf,uint8_t * mem[],int nb_buffers,int invalidate)215 int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
216 int nb_buffers, int invalidate)
217 {
218 VkResult ret;
219 VulkanFilterContext *s = avctx->priv;
220 VkMappedMemoryRange *inval_list = NULL;
221 int inval_count = 0;
222
223 for (int i = 0; i < nb_buffers; i++) {
224 ret = vkMapMemory(s->hwctx->act_dev, buf[i].mem, 0,
225 VK_WHOLE_SIZE, 0, (void **)&mem[i]);
226 if (ret != VK_SUCCESS) {
227 av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
228 ff_vk_ret2str(ret));
229 return AVERROR_EXTERNAL;
230 }
231 }
232
233 if (!invalidate)
234 return 0;
235
236 for (int i = 0; i < nb_buffers; i++) {
237 const VkMappedMemoryRange ival_buf = {
238 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
239 .memory = buf[i].mem,
240 .size = VK_WHOLE_SIZE,
241 };
242 if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
243 continue;
244 inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
245 (++inval_count)*sizeof(*inval_list));
246 if (!inval_list)
247 return AVERROR(ENOMEM);
248 inval_list[inval_count - 1] = ival_buf;
249 }
250
251 if (inval_count) {
252 ret = vkInvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
253 inval_list);
254 if (ret != VK_SUCCESS) {
255 av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
256 ff_vk_ret2str(ret));
257 return AVERROR_EXTERNAL;
258 }
259 }
260
261 return 0;
262 }
263
ff_vk_unmap_buffers(AVFilterContext * avctx,FFVkBuffer * buf,int nb_buffers,int flush)264 int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
265 int flush)
266 {
267 int err = 0;
268 VkResult ret;
269 VulkanFilterContext *s = avctx->priv;
270 VkMappedMemoryRange *flush_list = NULL;
271 int flush_count = 0;
272
273 if (flush) {
274 for (int i = 0; i < nb_buffers; i++) {
275 const VkMappedMemoryRange flush_buf = {
276 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
277 .memory = buf[i].mem,
278 .size = VK_WHOLE_SIZE,
279 };
280 if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
281 continue;
282 flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
283 (++flush_count)*sizeof(*flush_list));
284 if (!flush_list)
285 return AVERROR(ENOMEM);
286 flush_list[flush_count - 1] = flush_buf;
287 }
288 }
289
290 if (flush_count) {
291 ret = vkFlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
292 flush_list);
293 if (ret != VK_SUCCESS) {
294 av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
295 ff_vk_ret2str(ret));
296 err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
297 }
298 }
299
300 for (int i = 0; i < nb_buffers; i++)
301 vkUnmapMemory(s->hwctx->act_dev, buf[i].mem);
302
303 return err;
304 }
305
ff_vk_free_buf(AVFilterContext * avctx,FFVkBuffer * buf)306 void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
307 {
308 VulkanFilterContext *s = avctx->priv;
309 if (!buf)
310 return;
311
312 if (buf->buf != VK_NULL_HANDLE)
313 vkDestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
314 if (buf->mem != VK_NULL_HANDLE)
315 vkFreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
316 }
317
ff_vk_add_push_constant(AVFilterContext * avctx,VulkanPipeline * pl,int offset,int size,VkShaderStageFlagBits stage)318 int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
319 int offset, int size, VkShaderStageFlagBits stage)
320 {
321 VkPushConstantRange *pc;
322
323 pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
324 pl->push_consts_num + 1);
325 if (!pl->push_consts)
326 return AVERROR(ENOMEM);
327
328 pc = &pl->push_consts[pl->push_consts_num++];
329 memset(pc, 0, sizeof(*pc));
330
331 pc->stageFlags = stage;
332 pc->offset = offset;
333 pc->size = size;
334
335 return 0;
336 }
337
FN_CREATING(VulkanFilterContext,FFVkExecContext,exec_ctx,exec_ctx,exec_ctx_num)338 FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
339 int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
340 {
341 VkResult ret;
342 FFVkExecContext *e;
343 VulkanFilterContext *s = avctx->priv;
344
345 int queue_family = s->queue_family_idx;
346 int nb_queues = s->queue_count;
347
348 VkCommandPoolCreateInfo cqueue_create = {
349 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
350 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
351 .queueFamilyIndex = queue_family,
352 };
353 VkCommandBufferAllocateInfo cbuf_create = {
354 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
355 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
356 .commandBufferCount = nb_queues,
357 };
358
359 e = create_exec_ctx(s);
360 if (!e)
361 return AVERROR(ENOMEM);
362
363 e->queues = av_mallocz(nb_queues * sizeof(*e->queues));
364 if (!e->queues)
365 return AVERROR(ENOMEM);
366
367 e->bufs = av_mallocz(nb_queues * sizeof(*e->bufs));
368 if (!e->bufs)
369 return AVERROR(ENOMEM);
370
371 /* Create command pool */
372 ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create,
373 s->hwctx->alloc, &e->pool);
374 if (ret != VK_SUCCESS) {
375 av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
376 ff_vk_ret2str(ret));
377 return AVERROR_EXTERNAL;
378 }
379
380 cbuf_create.commandPool = e->pool;
381
382 /* Allocate command buffer */
383 ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
384 if (ret != VK_SUCCESS) {
385 av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
386 ff_vk_ret2str(ret));
387 return AVERROR_EXTERNAL;
388 }
389
390 for (int i = 0; i < nb_queues; i++) {
391 FFVkQueueCtx *q = &e->queues[i];
392 vkGetDeviceQueue(s->hwctx->act_dev, queue_family, i, &q->queue);
393 }
394
395 *ctx = e;
396
397 return 0;
398 }
399
ff_vk_discard_exec_deps(AVFilterContext * avctx,FFVkExecContext * e)400 void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e)
401 {
402 VulkanFilterContext *s = avctx->priv;
403 FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
404
405 for (int j = 0; j < q->nb_buf_deps; j++)
406 av_buffer_unref(&q->buf_deps[j]);
407 q->nb_buf_deps = 0;
408
409 for (int j = 0; j < q->nb_frame_deps; j++)
410 av_frame_free(&q->frame_deps[j]);
411 q->nb_frame_deps = 0;
412
413 e->sem_wait_cnt = 0;
414 e->sem_sig_cnt = 0;
415 }
416
ff_vk_start_exec_recording(AVFilterContext * avctx,FFVkExecContext * e)417 int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
418 {
419 VkResult ret;
420 VulkanFilterContext *s = avctx->priv;
421 FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
422
423 VkCommandBufferBeginInfo cmd_start = {
424 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
425 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
426 };
427
428 /* Create the fence and don't wait for it initially */
429 if (!q->fence) {
430 VkFenceCreateInfo fence_spawn = {
431 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
432 };
433 ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
434 &q->fence);
435 if (ret != VK_SUCCESS) {
436 av_log(avctx, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
437 ff_vk_ret2str(ret));
438 return AVERROR_EXTERNAL;
439 }
440 } else {
441 vkWaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
442 vkResetFences(s->hwctx->act_dev, 1, &q->fence);
443 }
444
445 /* Discard queue dependencies */
446 ff_vk_discard_exec_deps(avctx, e);
447
448 ret = vkBeginCommandBuffer(e->bufs[s->cur_queue_idx], &cmd_start);
449 if (ret != VK_SUCCESS) {
450 av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
451 ff_vk_ret2str(ret));
452 return AVERROR_EXTERNAL;
453 }
454
455 return 0;
456 }
457
ff_vk_get_exec_buf(AVFilterContext * avctx,FFVkExecContext * e)458 VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e)
459 {
460 VulkanFilterContext *s = avctx->priv;
461 return e->bufs[s->cur_queue_idx];
462 }
463
ff_vk_add_exec_dep(AVFilterContext * avctx,FFVkExecContext * e,AVFrame * frame,VkPipelineStageFlagBits in_wait_dst_flag)464 int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
465 AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag)
466 {
467 AVFrame **dst;
468 VulkanFilterContext *s = avctx->priv;
469 AVVkFrame *f = (AVVkFrame *)frame->data[0];
470 FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
471 AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
472 int planes = av_pix_fmt_count_planes(fc->sw_format);
473
474 for (int i = 0; i < planes; i++) {
475 e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
476 (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
477 if (!e->sem_wait) {
478 ff_vk_discard_exec_deps(avctx, e);
479 return AVERROR(ENOMEM);
480 }
481
482 e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
483 (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
484 if (!e->sem_wait_dst) {
485 ff_vk_discard_exec_deps(avctx, e);
486 return AVERROR(ENOMEM);
487 }
488
489 e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
490 (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
491 if (!e->sem_sig) {
492 ff_vk_discard_exec_deps(avctx, e);
493 return AVERROR(ENOMEM);
494 }
495
496 e->sem_wait[e->sem_wait_cnt] = f->sem[i];
497 e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
498 e->sem_wait_cnt++;
499
500 e->sem_sig[e->sem_sig_cnt] = f->sem[i];
501 e->sem_sig_cnt++;
502 }
503
504 dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
505 (q->nb_frame_deps + 1) * sizeof(*dst));
506 if (!dst) {
507 ff_vk_discard_exec_deps(avctx, e);
508 return AVERROR(ENOMEM);
509 }
510
511 q->frame_deps = dst;
512 q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
513 if (!q->frame_deps[q->nb_frame_deps]) {
514 ff_vk_discard_exec_deps(avctx, e);
515 return AVERROR(ENOMEM);
516 }
517 q->nb_frame_deps++;
518
519 return 0;
520 }
521
ff_vk_submit_exec_queue(AVFilterContext * avctx,FFVkExecContext * e)522 int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
523 {
524 VkResult ret;
525 VulkanFilterContext *s = avctx->priv;
526 FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
527
528 VkSubmitInfo s_info = {
529 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
530 .commandBufferCount = 1,
531 .pCommandBuffers = &e->bufs[s->cur_queue_idx],
532
533 .pWaitSemaphores = e->sem_wait,
534 .pWaitDstStageMask = e->sem_wait_dst,
535 .waitSemaphoreCount = e->sem_wait_cnt,
536
537 .pSignalSemaphores = e->sem_sig,
538 .signalSemaphoreCount = e->sem_sig_cnt,
539 };
540
541 ret = vkEndCommandBuffer(e->bufs[s->cur_queue_idx]);
542 if (ret != VK_SUCCESS) {
543 av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
544 ff_vk_ret2str(ret));
545 return AVERROR_EXTERNAL;
546 }
547
548 ret = vkQueueSubmit(q->queue, 1, &s_info, q->fence);
549 if (ret != VK_SUCCESS) {
550 av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
551 ff_vk_ret2str(ret));
552 return AVERROR_EXTERNAL;
553 }
554
555 /* Rotate queues */
556 s->cur_queue_idx = (s->cur_queue_idx + 1) % s->queue_count;
557
558 return 0;
559 }
560
ff_vk_add_dep_exec_ctx(AVFilterContext * avctx,FFVkExecContext * e,AVBufferRef ** deps,int nb_deps)561 int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e,
562 AVBufferRef **deps, int nb_deps)
563 {
564 AVBufferRef **dst;
565 VulkanFilterContext *s = avctx->priv;
566 FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
567
568 if (!deps || !nb_deps)
569 return 0;
570
571 dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
572 (q->nb_buf_deps + nb_deps) * sizeof(*dst));
573 if (!dst)
574 goto err;
575
576 q->buf_deps = dst;
577
578 for (int i = 0; i < nb_deps; i++) {
579 q->buf_deps[q->nb_buf_deps] = deps[i];
580 if (!q->buf_deps[q->nb_buf_deps])
581 goto err;
582 q->nb_buf_deps++;
583 }
584
585 return 0;
586
587 err:
588 ff_vk_discard_exec_deps(avctx, e);
589 return AVERROR(ENOMEM);
590 }
591
ff_vk_filter_query_formats(AVFilterContext * avctx)592 int ff_vk_filter_query_formats(AVFilterContext *avctx)
593 {
594 static const enum AVPixelFormat pixel_formats[] = {
595 AV_PIX_FMT_VULKAN, AV_PIX_FMT_NONE,
596 };
597 AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
598 if (!pix_fmts)
599 return AVERROR(ENOMEM);
600
601 return ff_set_common_formats(avctx, pix_fmts);
602 }
603
vulkan_filter_set_device(AVFilterContext * avctx,AVBufferRef * device)604 static int vulkan_filter_set_device(AVFilterContext *avctx,
605 AVBufferRef *device)
606 {
607 VulkanFilterContext *s = avctx->priv;
608
609 av_buffer_unref(&s->device_ref);
610
611 s->device_ref = av_buffer_ref(device);
612 if (!s->device_ref)
613 return AVERROR(ENOMEM);
614
615 s->device = (AVHWDeviceContext*)s->device_ref->data;
616 s->hwctx = s->device->hwctx;
617
618 return 0;
619 }
620
vulkan_filter_set_frames(AVFilterContext * avctx,AVBufferRef * frames)621 static int vulkan_filter_set_frames(AVFilterContext *avctx,
622 AVBufferRef *frames)
623 {
624 VulkanFilterContext *s = avctx->priv;
625
626 av_buffer_unref(&s->frames_ref);
627
628 s->frames_ref = av_buffer_ref(frames);
629 if (!s->frames_ref)
630 return AVERROR(ENOMEM);
631
632 return 0;
633 }
634
ff_vk_filter_config_input(AVFilterLink * inlink)635 int ff_vk_filter_config_input(AVFilterLink *inlink)
636 {
637 int err;
638 AVFilterContext *avctx = inlink->dst;
639 VulkanFilterContext *s = avctx->priv;
640 AVHWFramesContext *input_frames;
641
642 if (!inlink->hw_frames_ctx) {
643 av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
644 "hardware frames context on the input.\n");
645 return AVERROR(EINVAL);
646 }
647
648 /* Extract the device and default output format from the first input. */
649 if (avctx->inputs[0] != inlink)
650 return 0;
651
652 input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
653 if (input_frames->format != AV_PIX_FMT_VULKAN)
654 return AVERROR(EINVAL);
655
656 err = vulkan_filter_set_device(avctx, input_frames->device_ref);
657 if (err < 0)
658 return err;
659 err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx);
660 if (err < 0)
661 return err;
662
663 /* Default output parameters match input parameters. */
664 s->input_format = input_frames->sw_format;
665 if (s->output_format == AV_PIX_FMT_NONE)
666 s->output_format = input_frames->sw_format;
667 if (!s->output_width)
668 s->output_width = inlink->w;
669 if (!s->output_height)
670 s->output_height = inlink->h;
671
672 return 0;
673 }
674
ff_vk_filter_config_output_inplace(AVFilterLink * outlink)675 int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
676 {
677 int err;
678 AVFilterContext *avctx = outlink->src;
679 VulkanFilterContext *s = avctx->priv;
680
681 av_buffer_unref(&outlink->hw_frames_ctx);
682
683 if (!s->device_ref) {
684 if (!avctx->hw_device_ctx) {
685 av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
686 "Vulkan device.\n");
687 return AVERROR(EINVAL);
688 }
689
690 err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
691 if (err < 0)
692 return err;
693 }
694
695 outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref);
696 if (!outlink->hw_frames_ctx)
697 return AVERROR(ENOMEM);
698
699 outlink->w = s->output_width;
700 outlink->h = s->output_height;
701
702 return 0;
703 }
704
ff_vk_filter_config_output(AVFilterLink * outlink)705 int ff_vk_filter_config_output(AVFilterLink *outlink)
706 {
707 int err;
708 AVFilterContext *avctx = outlink->src;
709 VulkanFilterContext *s = avctx->priv;
710 AVBufferRef *output_frames_ref;
711 AVHWFramesContext *output_frames;
712
713 av_buffer_unref(&outlink->hw_frames_ctx);
714
715 if (!s->device_ref) {
716 if (!avctx->hw_device_ctx) {
717 av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
718 "Vulkan device.\n");
719 return AVERROR(EINVAL);
720 }
721
722 err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
723 if (err < 0)
724 return err;
725 }
726
727 output_frames_ref = av_hwframe_ctx_alloc(s->device_ref);
728 if (!output_frames_ref) {
729 err = AVERROR(ENOMEM);
730 goto fail;
731 }
732 output_frames = (AVHWFramesContext*)output_frames_ref->data;
733
734 output_frames->format = AV_PIX_FMT_VULKAN;
735 output_frames->sw_format = s->output_format;
736 output_frames->width = s->output_width;
737 output_frames->height = s->output_height;
738
739 err = av_hwframe_ctx_init(output_frames_ref);
740 if (err < 0) {
741 av_log(avctx, AV_LOG_ERROR, "Failed to initialise output "
742 "frames: %d.\n", err);
743 goto fail;
744 }
745
746 outlink->hw_frames_ctx = output_frames_ref;
747 outlink->w = s->output_width;
748 outlink->h = s->output_height;
749
750 return 0;
751 fail:
752 av_buffer_unref(&output_frames_ref);
753 return err;
754 }
755
ff_vk_filter_init(AVFilterContext * avctx)756 int ff_vk_filter_init(AVFilterContext *avctx)
757 {
758 VulkanFilterContext *s = avctx->priv;
759
760 s->output_format = AV_PIX_FMT_NONE;
761
762 if (glslang_init())
763 return AVERROR_EXTERNAL;
764
765 return 0;
766 }
767
FN_CREATING(VulkanFilterContext,VkSampler,sampler,samplers,samplers_num)768 FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num)
769 VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
770 VkFilter filt)
771 {
772 VkResult ret;
773 VulkanFilterContext *s = avctx->priv;
774
775 VkSamplerCreateInfo sampler_info = {
776 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
777 .magFilter = filt,
778 .minFilter = sampler_info.magFilter,
779 .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
780 VK_SAMPLER_MIPMAP_MODE_LINEAR,
781 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
782 .addressModeV = sampler_info.addressModeU,
783 .addressModeW = sampler_info.addressModeU,
784 .anisotropyEnable = VK_FALSE,
785 .compareOp = VK_COMPARE_OP_NEVER,
786 .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
787 .unnormalizedCoordinates = unnorm_coords,
788 };
789
790 VkSampler *sampler = create_sampler(s);
791 if (!sampler)
792 return NULL;
793
794 ret = vkCreateSampler(s->hwctx->act_dev, &sampler_info,
795 s->hwctx->alloc, sampler);
796 if (ret != VK_SUCCESS) {
797 av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n",
798 ff_vk_ret2str(ret));
799 return NULL;
800 }
801
802 return sampler;
803 }
804
ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)805 int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
806 {
807 if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA ||
808 pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 ||
809 pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 ||
810 pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
811 pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 ||
812 pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0)
813 return 1;
814 return 0;
815 }
816
ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)817 const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
818 {
819 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
820 const int high = desc->comp[0].depth > 8;
821 return high ? "rgba16f" : "rgba8";
822 }
823
824 typedef struct ImageViewCtx {
825 VkImageView view;
826 } ImageViewCtx;
827
destroy_imageview(void * opaque,uint8_t * data)828 static void destroy_imageview(void *opaque, uint8_t *data)
829 {
830 VulkanFilterContext *s = opaque;
831 ImageViewCtx *iv = (ImageViewCtx *)data;
832 vkDestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
833 av_free(iv);
834 }
835
ff_vk_create_imageview(AVFilterContext * avctx,FFVkExecContext * e,VkImageView * v,VkImage img,VkFormat fmt,const VkComponentMapping map)836 int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
837 VkImageView *v, VkImage img, VkFormat fmt,
838 const VkComponentMapping map)
839 {
840 int err;
841 AVBufferRef *buf;
842 VulkanFilterContext *s = avctx->priv;
843 VkImageViewCreateInfo imgview_spawn = {
844 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
845 .pNext = NULL,
846 .image = img,
847 .viewType = VK_IMAGE_VIEW_TYPE_2D,
848 .format = fmt,
849 .components = map,
850 .subresourceRange = {
851 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
852 .baseMipLevel = 0,
853 .levelCount = 1,
854 .baseArrayLayer = 0,
855 .layerCount = 1,
856 },
857 };
858
859 ImageViewCtx *iv = av_mallocz(sizeof(*iv));
860
861 VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn,
862 s->hwctx->alloc, &iv->view);
863 if (ret != VK_SUCCESS) {
864 av_log(avctx, AV_LOG_ERROR, "Failed to create imageview: %s\n",
865 ff_vk_ret2str(ret));
866 return AVERROR_EXTERNAL;
867 }
868
869 buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
870 if (!buf) {
871 destroy_imageview(s, (uint8_t *)iv);
872 return AVERROR(ENOMEM);
873 }
874
875 /* Add to queue dependencies */
876 err = ff_vk_add_dep_exec_ctx(avctx, e, &buf, 1);
877 if (err) {
878 av_buffer_unref(&buf);
879 return err;
880 }
881
882 *v = iv->view;
883
884 return 0;
885 }
886
FN_CREATING(VulkanPipeline,SPIRVShader,shader,shaders,shaders_num)887 FN_CREATING(VulkanPipeline, SPIRVShader, shader, shaders, shaders_num)
888 SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl,
889 const char *name, VkShaderStageFlags stage)
890 {
891 SPIRVShader *shd = create_shader(pl);
892 if (!shd)
893 return NULL;
894
895 av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
896
897 shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
898 shd->shader.stage = stage;
899
900 shd->name = name;
901
902 GLSLF(0, #version %i ,460);
903 GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
904 GLSLC(0, );
905
906 return shd;
907 }
908
ff_vk_set_compute_shader_sizes(AVFilterContext * avctx,SPIRVShader * shd,int local_size[3])909 void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
910 int local_size[3])
911 {
912 shd->local_size[0] = local_size[0];
913 shd->local_size[1] = local_size[1];
914 shd->local_size[2] = local_size[2];
915
916 av_bprintf(&shd->src, "layout (local_size_x = %i, "
917 "local_size_y = %i, local_size_z = %i) in;\n\n",
918 shd->local_size[0], shd->local_size[1], shd->local_size[2]);
919 }
920
print_shader(AVFilterContext * avctx,SPIRVShader * shd,int prio)921 static void print_shader(AVFilterContext *avctx, SPIRVShader *shd, int prio)
922 {
923 int line = 0;
924 const char *p = shd->src.str;
925 const char *start = p;
926
927 AVBPrint buf;
928 av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
929
930 for (int i = 0; i < strlen(p); i++) {
931 if (p[i] == '\n') {
932 av_bprintf(&buf, "%i\t", ++line);
933 av_bprint_append_data(&buf, start, &p[i] - start + 1);
934 start = &p[i + 1];
935 }
936 }
937
938 av_log(avctx, prio, "Shader %s: \n%s", shd->name, buf.str);
939 av_bprint_finalize(&buf, NULL);
940 }
941
ff_vk_compile_shader(AVFilterContext * avctx,SPIRVShader * shd,const char * entrypoint)942 int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
943 const char *entrypoint)
944 {
945 VkResult ret;
946 VulkanFilterContext *s = avctx->priv;
947 VkShaderModuleCreateInfo shader_create;
948 GLSlangResult *res;
949
950 static const enum GLSlangStage emap[] = {
951 [VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_VERTEX,
952 [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_FRAGMENT,
953 [VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_COMPUTE,
954 };
955
956 shd->shader.pName = entrypoint;
957
958 res = glslang_compile(shd->src.str, emap[shd->shader.stage]);
959 if (!res)
960 return AVERROR(ENOMEM);
961
962 if (res->rval) {
963 av_log(avctx, AV_LOG_ERROR, "Error compiling shader %s: %s!\n",
964 shd->name, av_err2str(res->rval));
965 print_shader(avctx, shd, AV_LOG_ERROR);
966 if (res->error_msg)
967 av_log(avctx, AV_LOG_ERROR, "%s", res->error_msg);
968 av_free(res->error_msg);
969 return res->rval;
970 }
971
972 print_shader(avctx, shd, AV_LOG_VERBOSE);
973
974 shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
975 shader_create.pNext = NULL;
976 shader_create.codeSize = res->size;
977 shader_create.flags = 0;
978 shader_create.pCode = res->data;
979
980 ret = vkCreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
981 &shd->shader.module);
982
983 /* Free the GLSlangResult struct */
984 av_free(res->data);
985 av_free(res);
986
987 if (ret != VK_SUCCESS) {
988 av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n",
989 ff_vk_ret2str(ret));
990 return AVERROR_EXTERNAL;
991 }
992
993 av_log(avctx, AV_LOG_VERBOSE, "Shader %s linked! Size: %zu bytes\n",
994 shd->name, shader_create.codeSize);
995
996 return 0;
997 }
998
999 static const struct descriptor_props {
1000 size_t struct_size; /* Size of the opaque which updates the descriptor */
1001 const char *type;
1002 int is_uniform;
1003 int mem_quali; /* Can use a memory qualifier */
1004 int dim_needed; /* Must indicate dimension */
1005 int buf_content; /* Must indicate buffer contents */
1006 } descriptor_props[] = {
1007 [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
1008 [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
1009 [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
1010 [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
1011 [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
1012 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
1013 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
1014 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
1015 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
1016 [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
1017 [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
1018 };
1019
ff_vk_add_descriptor_set(AVFilterContext * avctx,VulkanPipeline * pl,SPIRVShader * shd,VulkanDescriptorSetBinding * desc,int num,int only_print_to_shader)1020 int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
1021 SPIRVShader *shd, VulkanDescriptorSetBinding *desc,
1022 int num, int only_print_to_shader)
1023 {
1024 VkResult ret;
1025 VkDescriptorSetLayout *layout;
1026 VulkanFilterContext *s = avctx->priv;
1027
1028 if (only_print_to_shader)
1029 goto print;
1030
1031 pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
1032 pl->desc_layout_num + 1);
1033 if (!pl->desc_layout)
1034 return AVERROR(ENOMEM);
1035
1036 layout = &pl->desc_layout[pl->desc_layout_num];
1037 memset(layout, 0, sizeof(*layout));
1038
1039 { /* Create descriptor set layout descriptions */
1040 VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
1041 VkDescriptorSetLayoutBinding *desc_binding;
1042
1043 desc_binding = av_mallocz(sizeof(*desc_binding)*num);
1044 if (!desc_binding)
1045 return AVERROR(ENOMEM);
1046
1047 for (int i = 0; i < num; i++) {
1048 desc_binding[i].binding = i;
1049 desc_binding[i].descriptorType = desc[i].type;
1050 desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
1051 desc_binding[i].stageFlags = desc[i].stages;
1052 desc_binding[i].pImmutableSamplers = desc[i].samplers;
1053 }
1054
1055 desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
1056 desc_create_layout.pBindings = desc_binding;
1057 desc_create_layout.bindingCount = num;
1058
1059 ret = vkCreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
1060 s->hwctx->alloc, layout);
1061 av_free(desc_binding);
1062 if (ret != VK_SUCCESS) {
1063 av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
1064 "layout: %s\n", ff_vk_ret2str(ret));
1065 return AVERROR_EXTERNAL;
1066 }
1067 }
1068
1069 { /* Pool each descriptor by type and update pool counts */
1070 for (int i = 0; i < num; i++) {
1071 int j;
1072 for (j = 0; j < pl->pool_size_desc_num; j++)
1073 if (pl->pool_size_desc[j].type == desc[i].type)
1074 break;
1075 if (j >= pl->pool_size_desc_num) {
1076 pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
1077 sizeof(*pl->pool_size_desc),
1078 ++pl->pool_size_desc_num);
1079 if (!pl->pool_size_desc)
1080 return AVERROR(ENOMEM);
1081 memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
1082 }
1083 pl->pool_size_desc[j].type = desc[i].type;
1084 pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1);
1085 }
1086 }
1087
1088 { /* Create template creation struct */
1089 VkDescriptorUpdateTemplateCreateInfo *dt;
1090 VkDescriptorUpdateTemplateEntry *des_entries;
1091
1092 /* Freed after descriptor set initialization */
1093 des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
1094 if (!des_entries)
1095 return AVERROR(ENOMEM);
1096
1097 for (int i = 0; i < num; i++) {
1098 des_entries[i].dstBinding = i;
1099 des_entries[i].descriptorType = desc[i].type;
1100 des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
1101 des_entries[i].dstArrayElement = 0;
1102 des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
1103 des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
1104 }
1105
1106 pl->desc_template_info = av_realloc_array(pl->desc_template_info,
1107 sizeof(*pl->desc_template_info),
1108 pl->desc_layout_num + 1);
1109 if (!pl->desc_template_info)
1110 return AVERROR(ENOMEM);
1111
1112 dt = &pl->desc_template_info[pl->desc_layout_num];
1113 memset(dt, 0, sizeof(*dt));
1114
1115 dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
1116 dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
1117 dt->descriptorSetLayout = *layout;
1118 dt->pDescriptorUpdateEntries = des_entries;
1119 dt->descriptorUpdateEntryCount = num;
1120 }
1121
1122 pl->desc_layout_num++;
1123
1124 print:
1125 /* Write shader info */
1126 for (int i = 0; i < num; i++) {
1127 const struct descriptor_props *prop = &descriptor_props[desc[i].type];
1128 GLSLA("layout (set = %i, binding = %i", pl->desc_layout_num - 1, i);
1129
1130 if (desc[i].mem_layout)
1131 GLSLA(", %s", desc[i].mem_layout);
1132 GLSLA(")");
1133
1134 if (prop->is_uniform)
1135 GLSLA(" uniform");
1136
1137 if (prop->mem_quali && desc[i].mem_quali)
1138 GLSLA(" %s", desc[i].mem_quali);
1139
1140 if (prop->type)
1141 GLSLA(" %s", prop->type);
1142
1143 if (prop->dim_needed)
1144 GLSLA("%iD", desc[i].dimensions);
1145
1146 GLSLA(" %s", desc[i].name);
1147
1148 if (prop->buf_content)
1149 GLSLA(" {\n %s\n}", desc[i].buf_content);
1150 else if (desc[i].elems > 0)
1151 GLSLA("[%i]", desc[i].elems);
1152
1153 GLSLA(";\n");
1154 }
1155 GLSLA("\n");
1156
1157 return 0;
1158 }
1159
ff_vk_update_descriptor_set(AVFilterContext * avctx,VulkanPipeline * pl,int set_id)1160 void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
1161 int set_id)
1162 {
1163 VulkanFilterContext *s = avctx->priv;
1164
1165 vkUpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
1166 pl->desc_set[s->cur_queue_idx * pl->desc_layout_num + set_id],
1167 pl->desc_template[set_id],
1168 s);
1169 }
1170
ff_vk_update_push_exec(AVFilterContext * avctx,FFVkExecContext * e,VkShaderStageFlagBits stage,int offset,size_t size,void * src)1171 void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e,
1172 VkShaderStageFlagBits stage, int offset,
1173 size_t size, void *src)
1174 {
1175 VulkanFilterContext *s = avctx->priv;
1176 vkCmdPushConstants(e->bufs[s->cur_queue_idx], e->bound_pl->pipeline_layout,
1177 stage, offset, size, src);
1178 }
1179
ff_vk_init_pipeline_layout(AVFilterContext * avctx,VulkanPipeline * pl)1180 int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
1181 {
1182 VkResult ret;
1183 VulkanFilterContext *s = avctx->priv;
1184
1185 pl->descriptor_sets_num = pl->desc_layout_num * s->queue_count;
1186
1187 { /* Init descriptor set pool */
1188 VkDescriptorPoolCreateInfo pool_create_info = {
1189 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1190 .poolSizeCount = pl->pool_size_desc_num,
1191 .pPoolSizes = pl->pool_size_desc,
1192 .maxSets = pl->descriptor_sets_num,
1193 };
1194
1195 ret = vkCreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
1196 s->hwctx->alloc, &pl->desc_pool);
1197 av_freep(&pl->pool_size_desc);
1198 if (ret != VK_SUCCESS) {
1199 av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
1200 "pool: %s\n", ff_vk_ret2str(ret));
1201 return AVERROR_EXTERNAL;
1202 }
1203 }
1204
1205 { /* Allocate descriptor sets */
1206 VkDescriptorSetAllocateInfo alloc_info = {
1207 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1208 .descriptorPool = pl->desc_pool,
1209 .descriptorSetCount = pl->descriptor_sets_num,
1210 .pSetLayouts = pl->desc_layout,
1211 };
1212
1213 pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set));
1214 if (!pl->desc_set)
1215 return AVERROR(ENOMEM);
1216
1217 ret = vkAllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
1218 pl->desc_set);
1219 if (ret != VK_SUCCESS) {
1220 av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
1221 ff_vk_ret2str(ret));
1222 return AVERROR_EXTERNAL;
1223 }
1224 }
1225
1226 { /* Finally create the pipeline layout */
1227 VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
1228 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1229 .setLayoutCount = pl->desc_layout_num,
1230 .pSetLayouts = pl->desc_layout,
1231 .pushConstantRangeCount = pl->push_consts_num,
1232 .pPushConstantRanges = pl->push_consts,
1233 };
1234
1235 ret = vkCreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
1236 s->hwctx->alloc, &pl->pipeline_layout);
1237 av_freep(&pl->push_consts);
1238 pl->push_consts_num = 0;
1239 if (ret != VK_SUCCESS) {
1240 av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
1241 ff_vk_ret2str(ret));
1242 return AVERROR_EXTERNAL;
1243 }
1244 }
1245
1246 { /* Descriptor template (for tightly packed descriptors) */
1247 VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
1248
1249 pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template));
1250 if (!pl->desc_template)
1251 return AVERROR(ENOMEM);
1252
1253 /* Create update templates for the descriptor sets */
1254 for (int i = 0; i < pl->descriptor_sets_num; i++) {
1255 desc_template_info = &pl->desc_template_info[i % pl->desc_layout_num];
1256 desc_template_info->pipelineLayout = pl->pipeline_layout;
1257 ret = vkCreateDescriptorUpdateTemplate(s->hwctx->act_dev,
1258 desc_template_info,
1259 s->hwctx->alloc,
1260 &pl->desc_template[i]);
1261 av_free((void *)desc_template_info->pDescriptorUpdateEntries);
1262 if (ret != VK_SUCCESS) {
1263 av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor "
1264 "template: %s\n", ff_vk_ret2str(ret));
1265 return AVERROR_EXTERNAL;
1266 }
1267 }
1268
1269 av_freep(&pl->desc_template_info);
1270 }
1271
1272 return 0;
1273 }
1274
FN_CREATING(VulkanFilterContext,VulkanPipeline,pipeline,pipelines,pipelines_num)1275 FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num)
1276 VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx)
1277 {
1278 return create_pipeline(avctx->priv);
1279 }
1280
ff_vk_init_compute_pipeline(AVFilterContext * avctx,VulkanPipeline * pl)1281 int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl)
1282 {
1283 int i;
1284 VkResult ret;
1285 VulkanFilterContext *s = avctx->priv;
1286
1287 VkComputePipelineCreateInfo pipe = {
1288 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1289 .layout = pl->pipeline_layout,
1290 };
1291
1292 for (i = 0; i < pl->shaders_num; i++) {
1293 if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
1294 pipe.stage = pl->shaders[i]->shader;
1295 break;
1296 }
1297 }
1298 if (i == pl->shaders_num) {
1299 av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
1300 return AVERROR(EINVAL);
1301 }
1302
1303 ret = vkCreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
1304 s->hwctx->alloc, &pl->pipeline);
1305 if (ret != VK_SUCCESS) {
1306 av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
1307 ff_vk_ret2str(ret));
1308 return AVERROR_EXTERNAL;
1309 }
1310
1311 pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1312
1313 return 0;
1314 }
1315
ff_vk_bind_pipeline_exec(AVFilterContext * avctx,FFVkExecContext * e,VulkanPipeline * pl)1316 void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
1317 VulkanPipeline *pl)
1318 {
1319 VulkanFilterContext *s = avctx->priv;
1320
1321 vkCmdBindPipeline(e->bufs[s->cur_queue_idx], pl->bind_point, pl->pipeline);
1322
1323 vkCmdBindDescriptorSets(e->bufs[s->cur_queue_idx], pl->bind_point,
1324 pl->pipeline_layout, 0, pl->descriptor_sets_num,
1325 pl->desc_set, 0, 0);
1326
1327 e->bound_pl = pl;
1328 }
1329
free_exec_ctx(VulkanFilterContext * s,FFVkExecContext * e)1330 static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
1331 {
1332 /* Make sure all queues have finished executing */
1333 for (int i = 0; i < s->queue_count; i++) {
1334 FFVkQueueCtx *q = &e->queues[i];
1335
1336 if (q->fence) {
1337 vkWaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1338 vkResetFences(s->hwctx->act_dev, 1, &q->fence);
1339 }
1340
1341 /* Free the fence */
1342 if (q->fence)
1343 vkDestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
1344
1345 /* Free buffer dependencies */
1346 for (int j = 0; j < q->nb_buf_deps; j++)
1347 av_buffer_unref(&q->buf_deps[j]);
1348 av_free(q->buf_deps);
1349
1350 /* Free frame dependencies */
1351 for (int j = 0; j < q->nb_frame_deps; j++)
1352 av_frame_free(&q->frame_deps[j]);
1353 av_free(q->frame_deps);
1354 }
1355
1356 if (e->bufs)
1357 vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, s->queue_count, e->bufs);
1358 if (e->pool)
1359 vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
1360
1361 av_freep(&e->bufs);
1362 av_freep(&e->queues);
1363 av_freep(&e->sem_sig);
1364 av_freep(&e->sem_wait);
1365 av_freep(&e->sem_wait_dst);
1366 av_free(e);
1367 }
1368
free_pipeline(VulkanFilterContext * s,VulkanPipeline * pl)1369 static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
1370 {
1371 for (int i = 0; i < pl->shaders_num; i++) {
1372 SPIRVShader *shd = pl->shaders[i];
1373 av_bprint_finalize(&shd->src, NULL);
1374 vkDestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
1375 s->hwctx->alloc);
1376 av_free(shd);
1377 }
1378
1379 vkDestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
1380 vkDestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
1381 s->hwctx->alloc);
1382
1383 for (int i = 0; i < pl->desc_layout_num; i++) {
1384 if (pl->desc_template && pl->desc_template[i])
1385 vkDestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
1386 s->hwctx->alloc);
1387 if (pl->desc_layout && pl->desc_layout[i])
1388 vkDestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
1389 s->hwctx->alloc);
1390 }
1391
1392 /* Also frees the descriptor sets */
1393 if (pl->desc_pool)
1394 vkDestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
1395 s->hwctx->alloc);
1396
1397 av_freep(&pl->desc_set);
1398 av_freep(&pl->shaders);
1399 av_freep(&pl->desc_layout);
1400 av_freep(&pl->desc_template);
1401 av_freep(&pl->push_consts);
1402 pl->push_consts_num = 0;
1403
1404 /* Only freed in case of failure */
1405 av_freep(&pl->pool_size_desc);
1406 if (pl->desc_template_info) {
1407 for (int i = 0; i < pl->descriptor_sets_num; i++)
1408 av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries);
1409 av_freep(&pl->desc_template_info);
1410 }
1411
1412 av_free(pl);
1413 }
1414
ff_vk_filter_uninit(AVFilterContext * avctx)1415 void ff_vk_filter_uninit(AVFilterContext *avctx)
1416 {
1417 VulkanFilterContext *s = avctx->priv;
1418
1419 glslang_uninit();
1420
1421 for (int i = 0; i < s->exec_ctx_num; i++)
1422 free_exec_ctx(s, s->exec_ctx[i]);
1423 av_freep(&s->exec_ctx);
1424
1425 for (int i = 0; i < s->samplers_num; i++) {
1426 vkDestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc);
1427 av_free(s->samplers[i]);
1428 }
1429 av_freep(&s->samplers);
1430
1431 for (int i = 0; i < s->pipelines_num; i++)
1432 free_pipeline(s, s->pipelines[i]);
1433 av_freep(&s->pipelines);
1434
1435 av_freep(&s->scratch);
1436 s->scratch_size = 0;
1437
1438 av_buffer_unref(&s->device_ref);
1439 av_buffer_unref(&s->frames_ref);
1440 }
1441