1 /*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19 #include "avassert.h"
20
21 #include "vulkan.h"
22 #include "vulkan_loader.h"
23
24 #if CONFIG_LIBGLSLANG
25 #include "vulkan_glslang.c"
26 #elif CONFIG_LIBSHADERC
27 #include "vulkan_shaderc.c"
28 #endif
29
30 /* Generic macro for creating contexts which need to keep their addresses
31 * if another context is created. */
32 #define FN_CREATING(ctx, type, shortname, array, num) \
33 static av_always_inline type *create_ ##shortname(ctx *dctx) \
34 { \
35 type **array, *sctx = av_mallocz(sizeof(*sctx)); \
36 if (!sctx) \
37 return NULL; \
38 \
39 array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
40 if (!array) { \
41 av_free(sctx); \
42 return NULL; \
43 } \
44 \
45 dctx->array = array; \
46 dctx->array[dctx->num++] = sctx; \
47 \
48 return sctx; \
49 }
50
51 const VkComponentMapping ff_comp_identity_map = {
52 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
53 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
54 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
55 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
56 };
57
58 /* Converts return values to strings */
ff_vk_ret2str(VkResult res)59 const char *ff_vk_ret2str(VkResult res)
60 {
61 #define CASE(VAL) case VAL: return #VAL
62 switch (res) {
63 CASE(VK_SUCCESS);
64 CASE(VK_NOT_READY);
65 CASE(VK_TIMEOUT);
66 CASE(VK_EVENT_SET);
67 CASE(VK_EVENT_RESET);
68 CASE(VK_INCOMPLETE);
69 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
70 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
71 CASE(VK_ERROR_INITIALIZATION_FAILED);
72 CASE(VK_ERROR_DEVICE_LOST);
73 CASE(VK_ERROR_MEMORY_MAP_FAILED);
74 CASE(VK_ERROR_LAYER_NOT_PRESENT);
75 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
76 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
77 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
78 CASE(VK_ERROR_TOO_MANY_OBJECTS);
79 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
80 CASE(VK_ERROR_FRAGMENTED_POOL);
81 CASE(VK_ERROR_SURFACE_LOST_KHR);
82 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
83 CASE(VK_SUBOPTIMAL_KHR);
84 CASE(VK_ERROR_OUT_OF_DATE_KHR);
85 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
86 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
87 CASE(VK_ERROR_INVALID_SHADER_NV);
88 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
89 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
90 CASE(VK_ERROR_NOT_PERMITTED_EXT);
91 default: return "Unknown error";
92 }
93 #undef CASE
94 }
95
ff_vk_qf_init(FFVulkanContext * s,FFVkQueueFamilyCtx * qf,VkQueueFlagBits dev_family,int nb_queues)96 void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
97 VkQueueFlagBits dev_family, int nb_queues)
98 {
99 switch (dev_family) {
100 case VK_QUEUE_GRAPHICS_BIT:
101 qf->queue_family = s->hwctx->queue_family_index;
102 qf->actual_queues = s->hwctx->nb_graphics_queues;
103 break;
104 case VK_QUEUE_COMPUTE_BIT:
105 qf->queue_family = s->hwctx->queue_family_comp_index;
106 qf->actual_queues = s->hwctx->nb_comp_queues;
107 break;
108 case VK_QUEUE_TRANSFER_BIT:
109 qf->queue_family = s->hwctx->queue_family_tx_index;
110 qf->actual_queues = s->hwctx->nb_tx_queues;
111 break;
112 case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
113 qf->queue_family = s->hwctx->queue_family_encode_index;
114 qf->actual_queues = s->hwctx->nb_encode_queues;
115 break;
116 case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
117 qf->queue_family = s->hwctx->queue_family_decode_index;
118 qf->actual_queues = s->hwctx->nb_decode_queues;
119 break;
120 default:
121 av_assert0(0); /* Should never happen */
122 }
123
124 if (!nb_queues)
125 qf->nb_queues = qf->actual_queues;
126 else
127 qf->nb_queues = nb_queues;
128
129 return;
130 }
131
ff_vk_qf_rotate(FFVkQueueFamilyCtx * qf)132 void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
133 {
134 qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
135 }
136
vk_alloc_mem(FFVulkanContext * s,VkMemoryRequirements * req,VkMemoryPropertyFlagBits req_flags,void * alloc_extension,VkMemoryPropertyFlagBits * mem_flags,VkDeviceMemory * mem)137 static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
138 VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
139 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
140 {
141 VkResult ret;
142 int index = -1;
143 FFVulkanFunctions *vk = &s->vkfn;
144
145 VkMemoryAllocateInfo alloc_info = {
146 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
147 .pNext = alloc_extension,
148 };
149
150 /* Align if we need to */
151 if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
152 req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
153
154 alloc_info.allocationSize = req->size;
155
156 /* The vulkan spec requires memory types to be sorted in the "optimal"
157 * order, so the first matching type we find will be the best/fastest one */
158 for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
159 /* The memory type must be supported by the requirements (bitfield) */
160 if (!(req->memoryTypeBits & (1 << i)))
161 continue;
162
163 /* The memory type flags must include our properties */
164 if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
165 continue;
166
167 /* Found a suitable memory type */
168 index = i;
169 break;
170 }
171
172 if (index < 0) {
173 av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
174 req_flags);
175 return AVERROR(EINVAL);
176 }
177
178 alloc_info.memoryTypeIndex = index;
179
180 ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
181 s->hwctx->alloc, mem);
182 if (ret != VK_SUCCESS) {
183 av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
184 ff_vk_ret2str(ret));
185 return AVERROR(ENOMEM);
186 }
187
188 *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
189
190 return 0;
191 }
192
ff_vk_create_buf(FFVulkanContext * s,FFVkBuffer * buf,size_t size,VkBufferUsageFlags usage,VkMemoryPropertyFlagBits flags)193 int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
194 VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
195 {
196 int err;
197 VkResult ret;
198 int use_ded_mem;
199 FFVulkanFunctions *vk = &s->vkfn;
200
201 VkBufferCreateInfo buf_spawn = {
202 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
203 .pNext = NULL,
204 .usage = usage,
205 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
206 .size = size, /* Gets FFALIGNED during alloc if host visible
207 but should be ok */
208 };
209
210 VkBufferMemoryRequirementsInfo2 req_desc = {
211 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
212 };
213 VkMemoryDedicatedAllocateInfo ded_alloc = {
214 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
215 .pNext = NULL,
216 };
217 VkMemoryDedicatedRequirements ded_req = {
218 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
219 };
220 VkMemoryRequirements2 req = {
221 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
222 .pNext = &ded_req,
223 };
224
225 ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
226 if (ret != VK_SUCCESS) {
227 av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
228 ff_vk_ret2str(ret));
229 return AVERROR_EXTERNAL;
230 }
231
232 req_desc.buffer = buf->buf;
233
234 vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
235
236 /* In case the implementation prefers/requires dedicated allocation */
237 use_ded_mem = ded_req.prefersDedicatedAllocation |
238 ded_req.requiresDedicatedAllocation;
239 if (use_ded_mem)
240 ded_alloc.buffer = buf->buf;
241
242 err = vk_alloc_mem(s, &req.memoryRequirements, flags,
243 use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
244 &buf->flags, &buf->mem);
245 if (err)
246 return err;
247
248 ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
249 if (ret != VK_SUCCESS) {
250 av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
251 ff_vk_ret2str(ret));
252 return AVERROR_EXTERNAL;
253 }
254
255 return 0;
256 }
257
ff_vk_map_buffers(FFVulkanContext * s,FFVkBuffer * buf,uint8_t * mem[],int nb_buffers,int invalidate)258 int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
259 int nb_buffers, int invalidate)
260 {
261 VkResult ret;
262 FFVulkanFunctions *vk = &s->vkfn;
263 VkMappedMemoryRange *inval_list = NULL;
264 int inval_count = 0;
265
266 for (int i = 0; i < nb_buffers; i++) {
267 ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
268 VK_WHOLE_SIZE, 0, (void **)&mem[i]);
269 if (ret != VK_SUCCESS) {
270 av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
271 ff_vk_ret2str(ret));
272 return AVERROR_EXTERNAL;
273 }
274 }
275
276 if (!invalidate)
277 return 0;
278
279 for (int i = 0; i < nb_buffers; i++) {
280 const VkMappedMemoryRange ival_buf = {
281 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
282 .memory = buf[i].mem,
283 .size = VK_WHOLE_SIZE,
284 };
285 if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
286 continue;
287 inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
288 (++inval_count)*sizeof(*inval_list));
289 if (!inval_list)
290 return AVERROR(ENOMEM);
291 inval_list[inval_count - 1] = ival_buf;
292 }
293
294 if (inval_count) {
295 ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
296 inval_list);
297 if (ret != VK_SUCCESS) {
298 av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
299 ff_vk_ret2str(ret));
300 return AVERROR_EXTERNAL;
301 }
302 }
303
304 return 0;
305 }
306
ff_vk_unmap_buffers(FFVulkanContext * s,FFVkBuffer * buf,int nb_buffers,int flush)307 int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
308 int flush)
309 {
310 int err = 0;
311 VkResult ret;
312 FFVulkanFunctions *vk = &s->vkfn;
313 VkMappedMemoryRange *flush_list = NULL;
314 int flush_count = 0;
315
316 if (flush) {
317 for (int i = 0; i < nb_buffers; i++) {
318 const VkMappedMemoryRange flush_buf = {
319 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
320 .memory = buf[i].mem,
321 .size = VK_WHOLE_SIZE,
322 };
323 if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
324 continue;
325 flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
326 (++flush_count)*sizeof(*flush_list));
327 if (!flush_list)
328 return AVERROR(ENOMEM);
329 flush_list[flush_count - 1] = flush_buf;
330 }
331 }
332
333 if (flush_count) {
334 ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
335 flush_list);
336 if (ret != VK_SUCCESS) {
337 av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
338 ff_vk_ret2str(ret));
339 err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
340 }
341 }
342
343 for (int i = 0; i < nb_buffers; i++)
344 vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
345
346 return err;
347 }
348
ff_vk_free_buf(FFVulkanContext * s,FFVkBuffer * buf)349 void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
350 {
351 FFVulkanFunctions *vk = &s->vkfn;
352
353 if (!buf || !s->hwctx)
354 return;
355
356 vk->DeviceWaitIdle(s->hwctx->act_dev);
357
358 if (buf->buf != VK_NULL_HANDLE)
359 vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
360 if (buf->mem != VK_NULL_HANDLE)
361 vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
362 }
363
ff_vk_add_push_constant(FFVulkanPipeline * pl,int offset,int size,VkShaderStageFlagBits stage)364 int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
365 VkShaderStageFlagBits stage)
366 {
367 VkPushConstantRange *pc;
368
369 pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
370 pl->push_consts_num + 1);
371 if (!pl->push_consts)
372 return AVERROR(ENOMEM);
373
374 pc = &pl->push_consts[pl->push_consts_num++];
375 memset(pc, 0, sizeof(*pc));
376
377 pc->stageFlags = stage;
378 pc->offset = offset;
379 pc->size = size;
380
381 return 0;
382 }
383
FN_CREATING(FFVulkanContext,FFVkExecContext,exec_ctx,exec_ctx,exec_ctx_num)384 FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
385 int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
386 FFVkQueueFamilyCtx *qf)
387 {
388 VkResult ret;
389 FFVkExecContext *e;
390 FFVulkanFunctions *vk = &s->vkfn;
391
392 VkCommandPoolCreateInfo cqueue_create = {
393 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
394 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
395 .queueFamilyIndex = qf->queue_family,
396 };
397 VkCommandBufferAllocateInfo cbuf_create = {
398 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
399 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
400 .commandBufferCount = qf->nb_queues,
401 };
402
403 e = create_exec_ctx(s);
404 if (!e)
405 return AVERROR(ENOMEM);
406
407 e->qf = qf;
408
409 e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
410 if (!e->queues)
411 return AVERROR(ENOMEM);
412
413 e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
414 if (!e->bufs)
415 return AVERROR(ENOMEM);
416
417 /* Create command pool */
418 ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
419 s->hwctx->alloc, &e->pool);
420 if (ret != VK_SUCCESS) {
421 av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
422 ff_vk_ret2str(ret));
423 return AVERROR_EXTERNAL;
424 }
425
426 cbuf_create.commandPool = e->pool;
427
428 /* Allocate command buffer */
429 ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
430 if (ret != VK_SUCCESS) {
431 av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
432 ff_vk_ret2str(ret));
433 return AVERROR_EXTERNAL;
434 }
435
436 for (int i = 0; i < qf->nb_queues; i++) {
437 FFVkQueueCtx *q = &e->queues[i];
438 vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
439 i % qf->actual_queues, &q->queue);
440 }
441
442 *ctx = e;
443
444 return 0;
445 }
446
ff_vk_discard_exec_deps(FFVkExecContext * e)447 void ff_vk_discard_exec_deps(FFVkExecContext *e)
448 {
449 FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
450
451 for (int j = 0; j < q->nb_buf_deps; j++)
452 av_buffer_unref(&q->buf_deps[j]);
453 q->nb_buf_deps = 0;
454
455 for (int j = 0; j < q->nb_frame_deps; j++)
456 av_frame_free(&q->frame_deps[j]);
457 q->nb_frame_deps = 0;
458
459 e->sem_wait_cnt = 0;
460 e->sem_sig_cnt = 0;
461 }
462
ff_vk_start_exec_recording(FFVulkanContext * s,FFVkExecContext * e)463 int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
464 {
465 VkResult ret;
466 FFVulkanFunctions *vk = &s->vkfn;
467 FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
468
469 VkCommandBufferBeginInfo cmd_start = {
470 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
471 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
472 };
473
474 /* Create the fence and don't wait for it initially */
475 if (!q->fence) {
476 VkFenceCreateInfo fence_spawn = {
477 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
478 };
479 ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
480 &q->fence);
481 if (ret != VK_SUCCESS) {
482 av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
483 ff_vk_ret2str(ret));
484 return AVERROR_EXTERNAL;
485 }
486 } else {
487 vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
488 vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
489 }
490
491 /* Discard queue dependencies */
492 ff_vk_discard_exec_deps(e);
493
494 ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
495 if (ret != VK_SUCCESS) {
496 av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
497 ff_vk_ret2str(ret));
498 return AVERROR_EXTERNAL;
499 }
500
501 return 0;
502 }
503
ff_vk_get_exec_buf(FFVkExecContext * e)504 VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e)
505 {
506 return e->bufs[e->qf->cur_queue];
507 }
508
ff_vk_add_exec_dep(FFVulkanContext * s,FFVkExecContext * e,AVFrame * frame,VkPipelineStageFlagBits in_wait_dst_flag)509 int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
510 VkPipelineStageFlagBits in_wait_dst_flag)
511 {
512 AVFrame **dst;
513 AVVkFrame *f = (AVVkFrame *)frame->data[0];
514 FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
515 AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
516 int planes = av_pix_fmt_count_planes(fc->sw_format);
517
518 for (int i = 0; i < planes; i++) {
519 e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
520 (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
521 if (!e->sem_wait) {
522 ff_vk_discard_exec_deps(e);
523 return AVERROR(ENOMEM);
524 }
525
526 e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
527 (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
528 if (!e->sem_wait_dst) {
529 ff_vk_discard_exec_deps(e);
530 return AVERROR(ENOMEM);
531 }
532
533 e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc,
534 (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val));
535 if (!e->sem_wait_val) {
536 ff_vk_discard_exec_deps(e);
537 return AVERROR(ENOMEM);
538 }
539
540 e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
541 (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
542 if (!e->sem_sig) {
543 ff_vk_discard_exec_deps(e);
544 return AVERROR(ENOMEM);
545 }
546
547 e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc,
548 (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val));
549 if (!e->sem_sig_val) {
550 ff_vk_discard_exec_deps(e);
551 return AVERROR(ENOMEM);
552 }
553
554 e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
555 (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
556 if (!e->sem_sig_val_dst) {
557 ff_vk_discard_exec_deps(e);
558 return AVERROR(ENOMEM);
559 }
560
561 e->sem_wait[e->sem_wait_cnt] = f->sem[i];
562 e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
563 e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
564 e->sem_wait_cnt++;
565
566 e->sem_sig[e->sem_sig_cnt] = f->sem[i];
567 e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
568 e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
569 e->sem_sig_cnt++;
570 }
571
572 dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
573 (q->nb_frame_deps + 1) * sizeof(*dst));
574 if (!dst) {
575 ff_vk_discard_exec_deps(e);
576 return AVERROR(ENOMEM);
577 }
578
579 q->frame_deps = dst;
580 q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
581 if (!q->frame_deps[q->nb_frame_deps]) {
582 ff_vk_discard_exec_deps(e);
583 return AVERROR(ENOMEM);
584 }
585 q->nb_frame_deps++;
586
587 return 0;
588 }
589
ff_vk_submit_exec_queue(FFVulkanContext * s,FFVkExecContext * e)590 int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
591 {
592 VkResult ret;
593 FFVulkanFunctions *vk = &s->vkfn;
594 FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
595
596 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
597 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
598 .pWaitSemaphoreValues = e->sem_wait_val,
599 .pSignalSemaphoreValues = e->sem_sig_val,
600 .waitSemaphoreValueCount = e->sem_wait_cnt,
601 .signalSemaphoreValueCount = e->sem_sig_cnt,
602 };
603
604 VkSubmitInfo s_info = {
605 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
606 .pNext = &s_timeline_sem_info,
607
608 .commandBufferCount = 1,
609 .pCommandBuffers = &e->bufs[e->qf->cur_queue],
610
611 .pWaitSemaphores = e->sem_wait,
612 .pWaitDstStageMask = e->sem_wait_dst,
613 .waitSemaphoreCount = e->sem_wait_cnt,
614
615 .pSignalSemaphores = e->sem_sig,
616 .signalSemaphoreCount = e->sem_sig_cnt,
617 };
618
619 ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
620 if (ret != VK_SUCCESS) {
621 av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
622 ff_vk_ret2str(ret));
623 return AVERROR_EXTERNAL;
624 }
625
626 ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
627 if (ret != VK_SUCCESS) {
628 av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
629 ff_vk_ret2str(ret));
630 return AVERROR_EXTERNAL;
631 }
632
633 for (int i = 0; i < e->sem_sig_cnt; i++)
634 *e->sem_sig_val_dst[i] += 1;
635
636 return 0;
637 }
638
ff_vk_add_dep_exec_ctx(FFVulkanContext * s,FFVkExecContext * e,AVBufferRef ** deps,int nb_deps)639 int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
640 AVBufferRef **deps, int nb_deps)
641 {
642 AVBufferRef **dst;
643 FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
644
645 if (!deps || !nb_deps)
646 return 0;
647
648 dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
649 (q->nb_buf_deps + nb_deps) * sizeof(*dst));
650 if (!dst)
651 goto err;
652
653 q->buf_deps = dst;
654
655 for (int i = 0; i < nb_deps; i++) {
656 q->buf_deps[q->nb_buf_deps] = deps[i];
657 if (!q->buf_deps[q->nb_buf_deps])
658 goto err;
659 q->nb_buf_deps++;
660 }
661
662 return 0;
663
664 err:
665 ff_vk_discard_exec_deps(e);
666 return AVERROR(ENOMEM);
667 }
668
FN_CREATING(FFVulkanContext,FFVkSampler,sampler,samplers,samplers_num)669 FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
670 FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
671 int unnorm_coords, VkFilter filt)
672 {
673 VkResult ret;
674 FFVulkanFunctions *vk = &s->vkfn;
675
676 VkSamplerCreateInfo sampler_info = {
677 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
678 .magFilter = filt,
679 .minFilter = sampler_info.magFilter,
680 .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
681 VK_SAMPLER_MIPMAP_MODE_LINEAR,
682 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
683 .addressModeV = sampler_info.addressModeU,
684 .addressModeW = sampler_info.addressModeU,
685 .anisotropyEnable = VK_FALSE,
686 .compareOp = VK_COMPARE_OP_NEVER,
687 .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
688 .unnormalizedCoordinates = unnorm_coords,
689 };
690
691 FFVkSampler *sctx = create_sampler(s);
692 if (!sctx)
693 return NULL;
694
695 ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
696 s->hwctx->alloc, &sctx->sampler[0]);
697 if (ret != VK_SUCCESS) {
698 av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
699 ff_vk_ret2str(ret));
700 return NULL;
701 }
702
703 for (int i = 1; i < 4; i++)
704 sctx->sampler[i] = sctx->sampler[0];
705
706 return sctx;
707 }
708
ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)709 int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
710 {
711 if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA ||
712 pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 ||
713 pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 ||
714 pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
715 pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 ||
716 pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0)
717 return 1;
718 return 0;
719 }
720
ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)721 const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
722 {
723 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
724 const int high = desc->comp[0].depth > 8;
725 return high ? "rgba16f" : "rgba8";
726 }
727
728 typedef struct ImageViewCtx {
729 VkImageView view;
730 } ImageViewCtx;
731
destroy_imageview(void * opaque,uint8_t * data)732 static void destroy_imageview(void *opaque, uint8_t *data)
733 {
734 FFVulkanContext *s = opaque;
735 FFVulkanFunctions *vk = &s->vkfn;
736 ImageViewCtx *iv = (ImageViewCtx *)data;
737
738 vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
739 av_free(iv);
740 }
741
ff_vk_create_imageview(FFVulkanContext * s,FFVkExecContext * e,VkImageView * v,VkImage img,VkFormat fmt,const VkComponentMapping map)742 int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
743 VkImageView *v, VkImage img, VkFormat fmt,
744 const VkComponentMapping map)
745 {
746 int err;
747 AVBufferRef *buf;
748 FFVulkanFunctions *vk = &s->vkfn;
749
750 VkImageViewCreateInfo imgview_spawn = {
751 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
752 .pNext = NULL,
753 .image = img,
754 .viewType = VK_IMAGE_VIEW_TYPE_2D,
755 .format = fmt,
756 .components = map,
757 .subresourceRange = {
758 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
759 .baseMipLevel = 0,
760 .levelCount = 1,
761 .baseArrayLayer = 0,
762 .layerCount = 1,
763 },
764 };
765
766 ImageViewCtx *iv = av_mallocz(sizeof(*iv));
767
768 VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn,
769 s->hwctx->alloc, &iv->view);
770 if (ret != VK_SUCCESS) {
771 av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
772 ff_vk_ret2str(ret));
773 return AVERROR_EXTERNAL;
774 }
775
776 buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
777 if (!buf) {
778 destroy_imageview(s, (uint8_t *)iv);
779 return AVERROR(ENOMEM);
780 }
781
782 /* Add to queue dependencies */
783 err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1);
784 if (err) {
785 av_buffer_unref(&buf);
786 return err;
787 }
788
789 *v = iv->view;
790
791 return 0;
792 }
793
FN_CREATING(FFVulkanPipeline,FFVkSPIRVShader,shader,shaders,shaders_num)794 FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num)
795 FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
796 VkShaderStageFlags stage)
797 {
798 FFVkSPIRVShader *shd = create_shader(pl);
799 if (!shd)
800 return NULL;
801
802 av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
803
804 shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
805 shd->shader.stage = stage;
806
807 shd->name = name;
808
809 GLSLF(0, #version %i ,460);
810 GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
811 GLSLC(0, );
812
813 return shd;
814 }
815
ff_vk_set_compute_shader_sizes(FFVkSPIRVShader * shd,int local_size[3])816 void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
817 {
818 shd->local_size[0] = local_size[0];
819 shd->local_size[1] = local_size[1];
820 shd->local_size[2] = local_size[2];
821
822 av_bprintf(&shd->src, "layout (local_size_x = %i, "
823 "local_size_y = %i, local_size_z = %i) in;\n\n",
824 shd->local_size[0], shd->local_size[1], shd->local_size[2]);
825 }
826
ff_vk_print_shader(void * ctx,FFVkSPIRVShader * shd,int prio)827 void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
828 {
829 int line = 0;
830 const char *p = shd->src.str;
831 const char *start = p;
832 const size_t len = strlen(p);
833
834 AVBPrint buf;
835 av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
836
837 for (int i = 0; i < len; i++) {
838 if (p[i] == '\n') {
839 av_bprintf(&buf, "%i\t", ++line);
840 av_bprint_append_data(&buf, start, &p[i] - start + 1);
841 start = &p[i + 1];
842 }
843 }
844
845 av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str);
846 av_bprint_finalize(&buf, NULL);
847 }
848
ff_vk_compile_shader(FFVulkanContext * s,FFVkSPIRVShader * shd,const char * entrypoint)849 int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
850 const char *entrypoint)
851 {
852 int err;
853 VkResult ret;
854 FFVulkanFunctions *vk = &s->vkfn;
855 VkShaderModuleCreateInfo shader_create;
856 uint8_t *spirv;
857 size_t spirv_size;
858 void *priv;
859
860 shd->shader.pName = entrypoint;
861
862 if (!s->spirv_compiler) {
863 #if CONFIG_LIBGLSLANG
864 s->spirv_compiler = ff_vk_glslang_init();
865 #elif CONFIG_LIBSHADERC
866 s->spirv_compiler = ff_vk_shaderc_init();
867 #else
868 return AVERROR(ENOSYS);
869 #endif
870 if (!s->spirv_compiler)
871 return AVERROR(ENOMEM);
872 }
873
874 err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv,
875 &spirv_size, entrypoint, &priv);
876 if (err < 0)
877 return err;
878
879 av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n",
880 shd->name, spirv_size);
881
882 shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
883 shader_create.pNext = NULL;
884 shader_create.codeSize = spirv_size;
885 shader_create.flags = 0;
886 shader_create.pCode = (void *)spirv;
887
888 ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
889 &shd->shader.module);
890
891 s->spirv_compiler->free_shader(s->spirv_compiler, &priv);
892
893 if (ret != VK_SUCCESS) {
894 av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n",
895 ff_vk_ret2str(ret));
896 return AVERROR_EXTERNAL;
897 }
898
899 return 0;
900 }
901
902 static const struct descriptor_props {
903 size_t struct_size; /* Size of the opaque which updates the descriptor */
904 const char *type;
905 int is_uniform;
906 int mem_quali; /* Can use a memory qualifier */
907 int dim_needed; /* Must indicate dimension */
908 int buf_content; /* Must indicate buffer contents */
909 } descriptor_props[] = {
910 [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
911 [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
912 [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
913 [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
914 [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
915 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
916 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
917 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
918 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
919 [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
920 [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
921 };
922
ff_vk_add_descriptor_set(FFVulkanContext * s,FFVulkanPipeline * pl,FFVkSPIRVShader * shd,FFVulkanDescriptorSetBinding * desc,int num,int only_print_to_shader)923 int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
924 FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
925 int num, int only_print_to_shader)
926 {
927 VkResult ret;
928 VkDescriptorSetLayout *layout;
929 FFVulkanFunctions *vk = &s->vkfn;
930
931 if (only_print_to_shader)
932 goto print;
933
934 pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
935 pl->desc_layout_num + pl->qf->nb_queues);
936 if (!pl->desc_layout)
937 return AVERROR(ENOMEM);
938
939 pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized,
940 sizeof(*pl->desc_set_initialized),
941 pl->descriptor_sets_num + 1);
942 if (!pl->desc_set_initialized)
943 return AVERROR(ENOMEM);
944
945 pl->desc_set_initialized[pl->descriptor_sets_num] = 0;
946 layout = &pl->desc_layout[pl->desc_layout_num];
947
948 { /* Create descriptor set layout descriptions */
949 VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
950 VkDescriptorSetLayoutBinding *desc_binding;
951
952 desc_binding = av_mallocz(sizeof(*desc_binding)*num);
953 if (!desc_binding)
954 return AVERROR(ENOMEM);
955
956 for (int i = 0; i < num; i++) {
957 desc_binding[i].binding = i;
958 desc_binding[i].descriptorType = desc[i].type;
959 desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
960 desc_binding[i].stageFlags = desc[i].stages;
961 desc_binding[i].pImmutableSamplers = desc[i].sampler ?
962 desc[i].sampler->sampler :
963 NULL;
964 }
965
966 desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
967 desc_create_layout.pBindings = desc_binding;
968 desc_create_layout.bindingCount = num;
969
970 for (int i = 0; i < pl->qf->nb_queues; i++) {
971 ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
972 s->hwctx->alloc, &layout[i]);
973 if (ret != VK_SUCCESS) {
974 av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
975 "layout: %s\n", ff_vk_ret2str(ret));
976 av_free(desc_binding);
977 return AVERROR_EXTERNAL;
978 }
979 }
980
981 av_free(desc_binding);
982 }
983
984 { /* Pool each descriptor by type and update pool counts */
985 for (int i = 0; i < num; i++) {
986 int j;
987 for (j = 0; j < pl->pool_size_desc_num; j++)
988 if (pl->pool_size_desc[j].type == desc[i].type)
989 break;
990 if (j >= pl->pool_size_desc_num) {
991 pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
992 sizeof(*pl->pool_size_desc),
993 ++pl->pool_size_desc_num);
994 if (!pl->pool_size_desc)
995 return AVERROR(ENOMEM);
996 memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
997 }
998 pl->pool_size_desc[j].type = desc[i].type;
999 pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
1000 }
1001 }
1002
1003 { /* Create template creation struct */
1004 VkDescriptorUpdateTemplateCreateInfo *dt;
1005 VkDescriptorUpdateTemplateEntry *des_entries;
1006
1007 /* Freed after descriptor set initialization */
1008 des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
1009 if (!des_entries)
1010 return AVERROR(ENOMEM);
1011
1012 for (int i = 0; i < num; i++) {
1013 des_entries[i].dstBinding = i;
1014 des_entries[i].descriptorType = desc[i].type;
1015 des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
1016 des_entries[i].dstArrayElement = 0;
1017 des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
1018 des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
1019 }
1020
1021 pl->desc_template_info = av_realloc_array(pl->desc_template_info,
1022 sizeof(*pl->desc_template_info),
1023 pl->total_descriptor_sets + pl->qf->nb_queues);
1024 if (!pl->desc_template_info)
1025 return AVERROR(ENOMEM);
1026
1027 dt = &pl->desc_template_info[pl->total_descriptor_sets];
1028 memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
1029
1030 for (int i = 0; i < pl->qf->nb_queues; i++) {
1031 dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
1032 dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
1033 dt[i].descriptorSetLayout = layout[i];
1034 dt[i].pDescriptorUpdateEntries = des_entries;
1035 dt[i].descriptorUpdateEntryCount = num;
1036 }
1037 }
1038
1039 pl->descriptor_sets_num++;
1040
1041 pl->desc_layout_num += pl->qf->nb_queues;
1042 pl->total_descriptor_sets += pl->qf->nb_queues;
1043
1044 print:
1045 /* Write shader info */
1046 for (int i = 0; i < num; i++) {
1047 const struct descriptor_props *prop = &descriptor_props[desc[i].type];
1048 GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
1049
1050 if (desc[i].mem_layout)
1051 GLSLA(", %s", desc[i].mem_layout);
1052 GLSLA(")");
1053
1054 if (prop->is_uniform)
1055 GLSLA(" uniform");
1056
1057 if (prop->mem_quali && desc[i].mem_quali)
1058 GLSLA(" %s", desc[i].mem_quali);
1059
1060 if (prop->type)
1061 GLSLA(" %s", prop->type);
1062
1063 if (prop->dim_needed)
1064 GLSLA("%iD", desc[i].dimensions);
1065
1066 GLSLA(" %s", desc[i].name);
1067
1068 if (prop->buf_content)
1069 GLSLA(" {\n %s\n}", desc[i].buf_content);
1070 else if (desc[i].elems > 0)
1071 GLSLA("[%i]", desc[i].elems);
1072
1073 GLSLA(";\n");
1074 }
1075 GLSLA("\n");
1076
1077 return 0;
1078 }
1079
ff_vk_update_descriptor_set(FFVulkanContext * s,FFVulkanPipeline * pl,int set_id)1080 void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
1081 int set_id)
1082 {
1083 FFVulkanFunctions *vk = &s->vkfn;
1084
1085 /* If a set has never been updated, update all queues' sets. */
1086 if (!pl->desc_set_initialized[set_id]) {
1087 for (int i = 0; i < pl->qf->nb_queues; i++) {
1088 int idx = set_id*pl->qf->nb_queues + i;
1089 vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
1090 pl->desc_set[idx],
1091 pl->desc_template[idx],
1092 s);
1093 }
1094 pl->desc_set_initialized[set_id] = 1;
1095 return;
1096 }
1097
1098 set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
1099
1100 vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
1101 pl->desc_set[set_id],
1102 pl->desc_template[set_id],
1103 s);
1104 }
1105
ff_vk_update_push_exec(FFVulkanContext * s,FFVkExecContext * e,VkShaderStageFlagBits stage,int offset,size_t size,void * src)1106 void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
1107 VkShaderStageFlagBits stage, int offset,
1108 size_t size, void *src)
1109 {
1110 FFVulkanFunctions *vk = &s->vkfn;
1111
1112 vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
1113 stage, offset, size, src);
1114 }
1115
ff_vk_init_pipeline_layout(FFVulkanContext * s,FFVulkanPipeline * pl)1116 int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
1117 {
1118 VkResult ret;
1119 FFVulkanFunctions *vk = &s->vkfn;
1120
1121 pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
1122 if (!pl->desc_staging)
1123 return AVERROR(ENOMEM);
1124
1125 { /* Init descriptor set pool */
1126 VkDescriptorPoolCreateInfo pool_create_info = {
1127 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1128 .poolSizeCount = pl->pool_size_desc_num,
1129 .pPoolSizes = pl->pool_size_desc,
1130 .maxSets = pl->total_descriptor_sets,
1131 };
1132
1133 ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
1134 s->hwctx->alloc, &pl->desc_pool);
1135 av_freep(&pl->pool_size_desc);
1136 if (ret != VK_SUCCESS) {
1137 av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
1138 "pool: %s\n", ff_vk_ret2str(ret));
1139 return AVERROR_EXTERNAL;
1140 }
1141 }
1142
1143 { /* Allocate descriptor sets */
1144 VkDescriptorSetAllocateInfo alloc_info = {
1145 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1146 .descriptorPool = pl->desc_pool,
1147 .descriptorSetCount = pl->total_descriptor_sets,
1148 .pSetLayouts = pl->desc_layout,
1149 };
1150
1151 pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
1152 if (!pl->desc_set)
1153 return AVERROR(ENOMEM);
1154
1155 ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
1156 pl->desc_set);
1157 if (ret != VK_SUCCESS) {
1158 av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
1159 ff_vk_ret2str(ret));
1160 return AVERROR_EXTERNAL;
1161 }
1162 }
1163
1164 { /* Finally create the pipeline layout */
1165 VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
1166 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1167 .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging,
1168 .pushConstantRangeCount = pl->push_consts_num,
1169 .pPushConstantRanges = pl->push_consts,
1170 };
1171
1172 for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
1173 pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
1174
1175 ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
1176 s->hwctx->alloc, &pl->pipeline_layout);
1177 av_freep(&pl->push_consts);
1178 pl->push_consts_num = 0;
1179 if (ret != VK_SUCCESS) {
1180 av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
1181 ff_vk_ret2str(ret));
1182 return AVERROR_EXTERNAL;
1183 }
1184 }
1185
1186 { /* Descriptor template (for tightly packed descriptors) */
1187 VkDescriptorUpdateTemplateCreateInfo *dt;
1188
1189 pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
1190 if (!pl->desc_template)
1191 return AVERROR(ENOMEM);
1192
1193 /* Create update templates for the descriptor sets */
1194 for (int i = 0; i < pl->total_descriptor_sets; i++) {
1195 dt = &pl->desc_template_info[i];
1196 dt->pipelineLayout = pl->pipeline_layout;
1197 ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
1198 dt, s->hwctx->alloc,
1199 &pl->desc_template[i]);
1200 if (ret != VK_SUCCESS) {
1201 av_log(s, AV_LOG_ERROR, "Unable to init descriptor "
1202 "template: %s\n", ff_vk_ret2str(ret));
1203 return AVERROR_EXTERNAL;
1204 }
1205 }
1206
1207 /* Free the duplicated memory used for the template entries */
1208 for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
1209 dt = &pl->desc_template_info[i];
1210 av_free((void *)dt->pDescriptorUpdateEntries);
1211 }
1212
1213 av_freep(&pl->desc_template_info);
1214 }
1215
1216 return 0;
1217 }
1218
FN_CREATING(FFVulkanContext,FFVulkanPipeline,pipeline,pipelines,pipelines_num)1219 FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
1220 FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf)
1221 {
1222 FFVulkanPipeline *pl = create_pipeline(s);
1223 if (pl)
1224 pl->qf = qf;
1225
1226 return pl;
1227 }
1228
ff_vk_init_compute_pipeline(FFVulkanContext * s,FFVulkanPipeline * pl)1229 int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
1230 {
1231 int i;
1232 VkResult ret;
1233 FFVulkanFunctions *vk = &s->vkfn;
1234
1235 VkComputePipelineCreateInfo pipe = {
1236 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1237 .layout = pl->pipeline_layout,
1238 };
1239
1240 for (i = 0; i < pl->shaders_num; i++) {
1241 if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
1242 pipe.stage = pl->shaders[i]->shader;
1243 break;
1244 }
1245 }
1246 if (i == pl->shaders_num) {
1247 av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
1248 return AVERROR(EINVAL);
1249 }
1250
1251 ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
1252 s->hwctx->alloc, &pl->pipeline);
1253 if (ret != VK_SUCCESS) {
1254 av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
1255 ff_vk_ret2str(ret));
1256 return AVERROR_EXTERNAL;
1257 }
1258
1259 pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1260
1261 return 0;
1262 }
1263
ff_vk_bind_pipeline_exec(FFVulkanContext * s,FFVkExecContext * e,FFVulkanPipeline * pl)1264 void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
1265 FFVulkanPipeline *pl)
1266 {
1267 FFVulkanFunctions *vk = &s->vkfn;
1268
1269 vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
1270
1271 for (int i = 0; i < pl->descriptor_sets_num; i++)
1272 pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
1273
1274 vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
1275 pl->pipeline_layout, 0,
1276 pl->descriptor_sets_num,
1277 (VkDescriptorSet *)pl->desc_staging,
1278 0, NULL);
1279
1280 e->bound_pl = pl;
1281 }
1282
free_exec_ctx(FFVulkanContext * s,FFVkExecContext * e)1283 static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
1284 {
1285 FFVulkanFunctions *vk = &s->vkfn;
1286
1287 /* Make sure all queues have finished executing */
1288 for (int i = 0; i < e->qf->nb_queues; i++) {
1289 FFVkQueueCtx *q = &e->queues[i];
1290
1291 if (q->fence) {
1292 vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1293 vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
1294 }
1295
1296 /* Free the fence */
1297 if (q->fence)
1298 vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
1299
1300 /* Free buffer dependencies */
1301 for (int j = 0; j < q->nb_buf_deps; j++)
1302 av_buffer_unref(&q->buf_deps[j]);
1303 av_free(q->buf_deps);
1304
1305 /* Free frame dependencies */
1306 for (int j = 0; j < q->nb_frame_deps; j++)
1307 av_frame_free(&q->frame_deps[j]);
1308 av_free(q->frame_deps);
1309 }
1310
1311 if (e->bufs)
1312 vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
1313 if (e->pool)
1314 vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
1315
1316 av_freep(&e->bufs);
1317 av_freep(&e->queues);
1318 av_freep(&e->sem_sig);
1319 av_freep(&e->sem_sig_val);
1320 av_freep(&e->sem_sig_val_dst);
1321 av_freep(&e->sem_wait);
1322 av_freep(&e->sem_wait_dst);
1323 av_freep(&e->sem_wait_val);
1324 av_free(e);
1325 }
1326
free_pipeline(FFVulkanContext * s,FFVulkanPipeline * pl)1327 static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
1328 {
1329 FFVulkanFunctions *vk = &s->vkfn;
1330
1331 for (int i = 0; i < pl->shaders_num; i++) {
1332 FFVkSPIRVShader *shd = pl->shaders[i];
1333 av_bprint_finalize(&shd->src, NULL);
1334 vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
1335 s->hwctx->alloc);
1336 av_free(shd);
1337 }
1338
1339 vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
1340 vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
1341 s->hwctx->alloc);
1342
1343 for (int i = 0; i < pl->desc_layout_num; i++) {
1344 if (pl->desc_template && pl->desc_template[i])
1345 vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
1346 s->hwctx->alloc);
1347 if (pl->desc_layout && pl->desc_layout[i])
1348 vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
1349 s->hwctx->alloc);
1350 }
1351
1352 /* Also frees the descriptor sets */
1353 if (pl->desc_pool)
1354 vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
1355 s->hwctx->alloc);
1356
1357 av_freep(&pl->desc_staging);
1358 av_freep(&pl->desc_set);
1359 av_freep(&pl->shaders);
1360 av_freep(&pl->desc_layout);
1361 av_freep(&pl->desc_template);
1362 av_freep(&pl->desc_set_initialized);
1363 av_freep(&pl->push_consts);
1364 pl->push_consts_num = 0;
1365
1366 /* Only freed in case of failure */
1367 av_freep(&pl->pool_size_desc);
1368 if (pl->desc_template_info) {
1369 for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
1370 VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
1371 av_free((void *)dt->pDescriptorUpdateEntries);
1372 }
1373 av_freep(&pl->desc_template_info);
1374 }
1375
1376 av_free(pl);
1377 }
1378
ff_vk_uninit(FFVulkanContext * s)1379 void ff_vk_uninit(FFVulkanContext *s)
1380 {
1381 FFVulkanFunctions *vk = &s->vkfn;
1382
1383 if (s->spirv_compiler)
1384 s->spirv_compiler->uninit(&s->spirv_compiler);
1385
1386 for (int i = 0; i < s->exec_ctx_num; i++)
1387 free_exec_ctx(s, s->exec_ctx[i]);
1388 av_freep(&s->exec_ctx);
1389
1390 for (int i = 0; i < s->samplers_num; i++) {
1391 vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
1392 s->hwctx->alloc);
1393 av_free(s->samplers[i]);
1394 }
1395 av_freep(&s->samplers);
1396
1397 for (int i = 0; i < s->pipelines_num; i++)
1398 free_pipeline(s, s->pipelines[i]);
1399 av_freep(&s->pipelines);
1400
1401 av_freep(&s->scratch);
1402 s->scratch_size = 0;
1403
1404 av_buffer_unref(&s->device_ref);
1405 av_buffer_unref(&s->frames_ref);
1406 }
1407