1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 * Copyright 2021 Red Hat Inc.
4 * All Rights Reserved.
5 *
6 * SPDX-License-Identifier: MIT
7 */
8
9 #ifndef _WIN32
10 #include "drm-uapi/amdgpu_drm.h"
11 #endif
12
13 #include "util/vl_zscan_data.h"
14 #include "vk_video/vulkan_video_codecs_common.h"
15 #include "ac_uvd_dec.h"
16 #include "ac_vcn_av1_default.h"
17 #include "ac_vcn_dec.h"
18
19 #include "radv_buffer.h"
20 #include "radv_cs.h"
21 #include "radv_debug.h"
22 #include "radv_device_memory.h"
23 #include "radv_entrypoints.h"
24 #include "radv_image.h"
25 #include "radv_image_view.h"
26 #include "radv_video.h"
27
28 #define NUM_H2645_REFS 16
29 #define FB_BUFFER_OFFSET 0x1000
30 #define FB_BUFFER_SIZE 2048
31 #define FB_BUFFER_SIZE_TONGA (2048 * 64)
32 #define IT_SCALING_TABLE_SIZE 992
33 #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
34
35 /* Not 100% sure this isn't too much but works */
36 #define VID_DEFAULT_ALIGNMENT 256
37
38 static void set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val);
39
40 static bool
radv_enable_tier2(struct radv_physical_device * pdev)41 radv_enable_tier2(struct radv_physical_device *pdev)
42 {
43 const struct radv_instance *instance = radv_physical_device_instance(pdev);
44
45 if (pdev->info.vcn_ip_version >= VCN_3_0_0 && !(instance->debug_flags & RADV_DEBUG_VIDEO_ARRAY_PATH))
46 return true;
47 return false;
48 }
49
50 static uint32_t
radv_video_get_db_alignment(struct radv_physical_device * pdev,int width,bool is_h265_main_10_or_av1)51 radv_video_get_db_alignment(struct radv_physical_device *pdev, int width, bool is_h265_main_10_or_av1)
52 {
53 if (pdev->info.vcn_ip_version >= VCN_2_0_0 && width > 32 && is_h265_main_10_or_av1)
54 return 64;
55 return 32;
56 }
57
58 static bool
radv_vid_buffer_upload_alloc(struct radv_cmd_buffer * cmd_buffer,unsigned size,unsigned * out_offset,void ** ptr)59 radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr)
60 {
61 return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, VID_DEFAULT_ALIGNMENT, out_offset, ptr);
62 }
63
64 /* vcn unified queue (sq) ib header */
65 void
radv_vcn_sq_header(struct radeon_cmdbuf * cs,struct rvcn_sq_var * sq,unsigned type,bool skip_signature)66 radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature)
67 {
68 if (!skip_signature) {
69 /* vcn ib signature */
70 radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE);
71 radeon_emit(cs, RADEON_VCN_SIGNATURE);
72 sq->signature_ib_checksum = &cs->buf[cs->cdw];
73 radeon_emit(cs, 0);
74 sq->signature_ib_total_size_in_dw = &cs->buf[cs->cdw];
75 radeon_emit(cs, 0);
76 } else {
77 sq->signature_ib_checksum = NULL;
78 sq->signature_ib_total_size_in_dw = NULL;
79 }
80
81 /* vcn ib engine info */
82 radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE);
83 radeon_emit(cs, RADEON_VCN_ENGINE_INFO);
84 radeon_emit(cs, type);
85 sq->engine_ib_size_of_packages = &cs->buf[cs->cdw];
86 radeon_emit(cs, 0);
87 }
88
89 void
radv_vcn_sq_tail(struct radeon_cmdbuf * cs,struct rvcn_sq_var * sq)90 radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq)
91 {
92 uint32_t *end;
93 uint32_t size_in_dw;
94 uint32_t checksum = 0;
95
96 end = &cs->buf[cs->cdw];
97
98 if (sq->signature_ib_checksum == NULL && sq->signature_ib_total_size_in_dw == NULL) {
99 if (sq->engine_ib_size_of_packages == NULL)
100 return;
101
102 size_in_dw = end - sq->engine_ib_size_of_packages + 3; /* package_size, package_type, engine_type */
103 *sq->engine_ib_size_of_packages = size_in_dw * sizeof(uint32_t);
104 } else {
105 size_in_dw = end - sq->signature_ib_total_size_in_dw - 1;
106 *sq->signature_ib_total_size_in_dw = size_in_dw;
107 *sq->engine_ib_size_of_packages = size_in_dw * sizeof(uint32_t);
108
109 for (int i = 0; i < size_in_dw; i++)
110 checksum += *(sq->signature_ib_checksum + 2 + i);
111
112 *sq->signature_ib_checksum = checksum;
113 }
114 }
115
116 void
radv_vcn_write_event(struct radv_cmd_buffer * cmd_buffer,struct radv_event * event,unsigned value)117 radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, unsigned value)
118 {
119 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
120 struct radv_physical_device *pdev = radv_device_physical(device);
121 struct rvcn_sq_var sq;
122 struct radeon_cmdbuf *cs = cmd_buffer->cs;
123
124 radv_cs_add_buffer(device->ws, cs, event->bo);
125 uint64_t va = radv_buffer_get_va(event->bo);
126
127 bool separate_queue = pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED;
128 if (cmd_buffer->qf == RADV_QUEUE_VIDEO_DEC && separate_queue && pdev->vid_dec_reg.data2) {
129 radeon_check_space(device->ws, cmd_buffer->cs, 8);
130 set_reg(cmd_buffer, pdev->vid_dec_reg.data0, va & 0xffffffff);
131 set_reg(cmd_buffer, pdev->vid_dec_reg.data1, va >> 32);
132 set_reg(cmd_buffer, pdev->vid_dec_reg.data2, value);
133 set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, RDECODE_CMD_WRITE_MEMORY << 1);
134 return;
135 }
136
137 radeon_check_space(device->ws, cs, 256);
138 radv_vcn_sq_header(cs, &sq, RADEON_VCN_ENGINE_TYPE_COMMON, separate_queue);
139 struct rvcn_cmn_engine_ib_package *ib_header = (struct rvcn_cmn_engine_ib_package *)&(cs->buf[cs->cdw]);
140 ib_header->package_size = sizeof(struct rvcn_cmn_engine_ib_package) + sizeof(struct rvcn_cmn_engine_op_writememory);
141 cs->cdw++;
142 ib_header->package_type = RADEON_VCN_IB_COMMON_OP_WRITEMEMORY;
143 cs->cdw++;
144
145 struct rvcn_cmn_engine_op_writememory *write_memory = (struct rvcn_cmn_engine_op_writememory *)&(cs->buf[cs->cdw]);
146 write_memory->dest_addr_lo = va & 0xffffffff;
147 write_memory->dest_addr_hi = va >> 32;
148 write_memory->data = value;
149
150 cs->cdw += sizeof(*write_memory) / 4;
151 radv_vcn_sq_tail(cs, &sq);
152 }
153
154 static void
radv_vcn_sq_start(struct radv_cmd_buffer * cmd_buffer)155 radv_vcn_sq_start(struct radv_cmd_buffer *cmd_buffer)
156 {
157 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
158
159 radeon_check_space(device->ws, cmd_buffer->cs, 256);
160 radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_DECODE, false);
161 rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
162 ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
163 cmd_buffer->cs->cdw++;
164 ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
165 cmd_buffer->cs->cdw++;
166 cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
167 cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
168 memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
169 }
170
171 /* generate an stream handle */
172 static unsigned
radv_vid_alloc_stream_handle(struct radv_physical_device * pdev)173 radv_vid_alloc_stream_handle(struct radv_physical_device *pdev)
174 {
175 unsigned stream_handle = pdev->stream_handle_base;
176
177 stream_handle ^= ++pdev->stream_handle_counter;
178 return stream_handle;
179 }
180
181 static void
init_uvd_decoder(struct radv_physical_device * pdev)182 init_uvd_decoder(struct radv_physical_device *pdev)
183 {
184 if (pdev->info.family >= CHIP_VEGA10) {
185 pdev->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
186 pdev->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
187 pdev->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
188 pdev->vid_dec_reg.cntl = RUVD_ENGINE_CNTL_SOC15;
189 } else {
190 pdev->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0;
191 pdev->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1;
192 pdev->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD;
193 pdev->vid_dec_reg.cntl = RUVD_ENGINE_CNTL;
194 }
195 }
196
197 static void
init_vcn_decoder(struct radv_physical_device * pdev)198 init_vcn_decoder(struct radv_physical_device *pdev)
199 {
200 switch (pdev->info.vcn_ip_version) {
201 case VCN_1_0_0:
202 case VCN_1_0_1:
203 pdev->vid_dec_reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0;
204 pdev->vid_dec_reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1;
205 pdev->vid_dec_reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD;
206 pdev->vid_dec_reg.cntl = RDECODE_VCN1_ENGINE_CNTL;
207 break;
208 case VCN_2_0_0:
209 case VCN_2_0_2:
210 case VCN_2_0_3:
211 case VCN_2_2_0:
212 pdev->vid_dec_reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0;
213 pdev->vid_dec_reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1;
214 pdev->vid_dec_reg.data2 = RDECODE_VCN2_GPCOM_VCPU_DATA2;
215 pdev->vid_dec_reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD;
216 pdev->vid_dec_reg.cntl = RDECODE_VCN2_ENGINE_CNTL;
217 break;
218 case VCN_2_5_0:
219 case VCN_2_6_0:
220 case VCN_3_0_0:
221 case VCN_3_0_2:
222 case VCN_3_0_16:
223 case VCN_3_0_33:
224 case VCN_3_1_1:
225 case VCN_3_1_2:
226 pdev->vid_dec_reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0;
227 pdev->vid_dec_reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1;
228 pdev->vid_dec_reg.data2 = RDECODE_VCN2_5_GPCOM_VCPU_DATA2;
229 pdev->vid_dec_reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
230 pdev->vid_dec_reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
231 break;
232 case VCN_4_0_3:
233 pdev->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
234 pdev->av1_version = RDECODE_AV1_VER_1;
235 break;
236 case VCN_4_0_0:
237 case VCN_4_0_2:
238 case VCN_4_0_4:
239 case VCN_4_0_5:
240 case VCN_4_0_6:
241 pdev->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
242 pdev->av1_version = RDECODE_AV1_VER_1;
243 break;
244 default:
245 break;
246 }
247 }
248
249 void
radv_init_physical_device_decoder(struct radv_physical_device * pdev)250 radv_init_physical_device_decoder(struct radv_physical_device *pdev)
251 {
252 if (pdev->info.vcn_ip_version >= VCN_4_0_0)
253 pdev->vid_decode_ip = AMD_IP_VCN_UNIFIED;
254 else if (radv_has_uvd(pdev))
255 pdev->vid_decode_ip = AMD_IP_UVD;
256 else
257 pdev->vid_decode_ip = AMD_IP_VCN_DEC;
258 pdev->av1_version = RDECODE_AV1_VER_0;
259
260 pdev->stream_handle_counter = 0;
261 pdev->stream_handle_base = 0;
262
263 pdev->stream_handle_base = util_bitreverse(getpid());
264
265 pdev->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_LINEAR;
266
267 if (radv_has_uvd(pdev))
268 init_uvd_decoder(pdev);
269 else
270 init_vcn_decoder(pdev);
271 }
272
273 void
radv_probe_video_decode(struct radv_physical_device * pdev)274 radv_probe_video_decode(struct radv_physical_device *pdev)
275 {
276 const struct radv_instance *instance = radv_physical_device_instance(pdev);
277
278 pdev->video_decode_enabled = false;
279
280 /* TODO: Add VCN 5.0+. */
281 if (pdev->info.vcn_ip_version >= VCN_5_0_0)
282 return;
283
284 /* The support for decode events are available at the same time as encode */
285 if (pdev->info.vcn_ip_version >= VCN_4_0_0) {
286 if (pdev->info.vcn_enc_major_version > 1)
287 pdev->video_decode_enabled = true;
288 /* VCN 4 FW 1.22 has all the necessary pieces to pass CTS */
289 if (pdev->info.vcn_enc_major_version == 1 && pdev->info.vcn_enc_minor_version >= 22)
290 pdev->video_decode_enabled = true;
291 } else if (pdev->info.vcn_ip_version >= VCN_3_0_0) {
292 if (pdev->info.vcn_enc_major_version > 1)
293 pdev->video_decode_enabled = true;
294 /* VCN 3 FW 1.33 has all the necessary pieces to pass CTS */
295 if (pdev->info.vcn_enc_major_version == 1 && pdev->info.vcn_enc_minor_version >= 33)
296 pdev->video_decode_enabled = true;
297 } else if (pdev->info.vcn_ip_version >= VCN_2_0_0) {
298 if (pdev->info.vcn_enc_major_version > 1)
299 pdev->video_decode_enabled = true;
300 /* VCN 2 FW 1.24 has all the necessary pieces to pass CTS */
301 if (pdev->info.vcn_enc_major_version == 1 && pdev->info.vcn_enc_minor_version >= 24)
302 pdev->video_decode_enabled = true;
303 }
304 if (instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) {
305 pdev->video_decode_enabled = true;
306 }
307 }
308
309 static bool
have_it(struct radv_video_session * vid)310 have_it(struct radv_video_session *vid)
311 {
312 return vid->stream_type == RDECODE_CODEC_H264_PERF || vid->stream_type == RDECODE_CODEC_H265;
313 }
314
315 static bool
have_probs(struct radv_video_session * vid)316 have_probs(struct radv_video_session *vid)
317 {
318 return vid->stream_type == RDECODE_CODEC_AV1;
319 }
320
321 static unsigned
calc_ctx_size_h264_perf(struct radv_video_session * vid)322 calc_ctx_size_h264_perf(struct radv_video_session *vid)
323 {
324 unsigned width_in_mb, height_in_mb, ctx_size;
325 unsigned width = align(vid->vk.max_coded.width, VK_VIDEO_H264_MACROBLOCK_WIDTH);
326 unsigned height = align(vid->vk.max_coded.height, VK_VIDEO_H264_MACROBLOCK_HEIGHT);
327
328 unsigned max_references = vid->vk.max_dpb_slots + 1;
329
330 /* picture width & height in 16 pixel units */
331 width_in_mb = width / VK_VIDEO_H264_MACROBLOCK_WIDTH;
332 height_in_mb = align(height / VK_VIDEO_H264_MACROBLOCK_HEIGHT, 2);
333
334 ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
335
336 return ctx_size;
337 }
338
339 static unsigned
calc_ctx_size_h265_main(struct radv_video_session * vid)340 calc_ctx_size_h265_main(struct radv_video_session *vid)
341 {
342 /* this is taken from radeonsi and seems correct for h265 */
343 unsigned width = align(vid->vk.max_coded.width, VK_VIDEO_H264_MACROBLOCK_WIDTH);
344 unsigned height = align(vid->vk.max_coded.height, VK_VIDEO_H264_MACROBLOCK_HEIGHT);
345
346 unsigned max_references = vid->vk.max_dpb_slots + 1;
347
348 if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
349 max_references = MAX2(max_references, 8);
350 else
351 max_references = MAX2(max_references, 17);
352
353 width = align(width, 16);
354 height = align(height, 16);
355 return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
356 }
357
358 static unsigned
calc_ctx_size_h265_main10(struct radv_video_session * vid)359 calc_ctx_size_h265_main10(struct radv_video_session *vid)
360 {
361 unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
362 unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
363 unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
364
365 /* this is taken from radeonsi and seems correct for h265 */
366 unsigned width = align(vid->vk.max_coded.width, VK_VIDEO_H264_MACROBLOCK_WIDTH);
367 unsigned height = align(vid->vk.max_coded.height, VK_VIDEO_H264_MACROBLOCK_HEIGHT);
368 unsigned coeff_10bit = 2;
369
370 unsigned max_references = vid->vk.max_dpb_slots + 1;
371
372 if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
373 max_references = MAX2(max_references, 8);
374 else
375 max_references = MAX2(max_references, 17);
376
377 /* 64x64 is the maximum ctb size. */
378 log2_ctb_size = 6;
379
380 width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
381 height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
382
383 num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
384 context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
385 max_mb_address = (unsigned)ceil(height * 8 / 2048.0);
386
387 cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
388 db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
389
390 return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
391 }
392
393 static unsigned
calc_ctx_size_av1(struct radv_device * device,struct radv_video_session * vid)394 calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid)
395 {
396 const struct radv_physical_device *pdev = radv_device_physical(device);
397 return ac_vcn_dec_calc_ctx_size_av1(pdev->av1_version);
398 }
399
400 static void
radv_video_patch_session_parameters(struct vk_video_session_parameters * params)401 radv_video_patch_session_parameters(struct vk_video_session_parameters *params)
402 {
403 switch (params->op) {
404 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
405 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
406 default:
407 return;
408 case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
409 case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
410 radv_video_patch_encode_session_parameters(params);
411 break;
412 }
413 }
414
415 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateVideoSessionKHR(VkDevice _device,const VkVideoSessionCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkVideoSessionKHR * pVideoSession)416 radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *pCreateInfo,
417 const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession)
418 {
419 VK_FROM_HANDLE(radv_device, device, _device);
420 struct radv_physical_device *pdev = radv_device_physical(device);
421 const struct radv_instance *instance = radv_physical_device_instance(pdev);
422
423 struct radv_video_session *vid =
424 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
425 if (!vid)
426 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
427
428 memset(vid, 0, sizeof(struct radv_video_session));
429
430 VkResult result = vk_video_session_init(&device->vk, &vid->vk, pCreateInfo);
431 if (result != VK_SUCCESS) {
432 vk_free2(&device->vk.alloc, pAllocator, vid);
433 return result;
434 }
435
436 vid->dpb_type = DPB_MAX_RES;
437
438 switch (vid->vk.op) {
439 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
440 vid->stream_type = RDECODE_CODEC_H264_PERF;
441 if (radv_enable_tier2(pdev))
442 vid->dpb_type = DPB_DYNAMIC_TIER_2;
443 break;
444 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
445 vid->stream_type = RDECODE_CODEC_H265;
446 if (radv_enable_tier2(pdev))
447 vid->dpb_type = DPB_DYNAMIC_TIER_2;
448 break;
449 case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
450 vid->stream_type = RDECODE_CODEC_AV1;
451 vid->dpb_type = DPB_DYNAMIC_TIER_2;
452 break;
453 case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
454 vid->encode = true;
455 vid->enc_session.encode_standard = RENCODE_ENCODE_STANDARD_H264;
456 vid->enc_session.aligned_picture_width = align(vid->vk.max_coded.width, 16);
457 vid->enc_session.aligned_picture_height = align(vid->vk.max_coded.height, 16);
458 vid->enc_session.padding_width = vid->enc_session.aligned_picture_width - vid->vk.max_coded.width;
459 vid->enc_session.padding_height = vid->enc_session.aligned_picture_height - vid->vk.max_coded.height;
460 vid->enc_session.display_remote = 0;
461 vid->enc_session.pre_encode_mode = 0;
462 vid->enc_session.pre_encode_chroma_enabled = 0;
463 switch (vid->vk.enc_usage.tuning_mode) {
464 case VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR:
465 default:
466 vid->enc_preset_mode = RENCODE_PRESET_MODE_BALANCE;
467 break;
468 case VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR:
469 case VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR:
470 vid->enc_preset_mode = RENCODE_PRESET_MODE_SPEED;
471 break;
472 case VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR:
473 case VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR:
474 vid->enc_preset_mode = RENCODE_PRESET_MODE_QUALITY;
475 break;
476 }
477 break;
478 case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
479 vid->encode = true;
480 vid->enc_session.encode_standard = RENCODE_ENCODE_STANDARD_HEVC;
481 vid->enc_session.aligned_picture_width = align(vid->vk.max_coded.width, 64);
482 vid->enc_session.aligned_picture_height = align(vid->vk.max_coded.height, 64);
483 vid->enc_session.padding_width = vid->enc_session.aligned_picture_width - vid->vk.max_coded.width;
484 vid->enc_session.padding_height = vid->enc_session.aligned_picture_height - vid->vk.max_coded.height;
485 vid->enc_session.display_remote = 0;
486 vid->enc_session.pre_encode_mode = 0;
487 vid->enc_session.pre_encode_chroma_enabled = 0;
488 switch (vid->vk.enc_usage.tuning_mode) {
489 case VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR:
490 default:
491 vid->enc_preset_mode = RENCODE_PRESET_MODE_BALANCE;
492 break;
493 case VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR:
494 case VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR:
495 vid->enc_preset_mode = RENCODE_PRESET_MODE_SPEED;
496 break;
497 case VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR:
498 case VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR:
499 vid->enc_preset_mode = RENCODE_PRESET_MODE_QUALITY;
500 break;
501 }
502 break;
503 default:
504 return VK_ERROR_FEATURE_NOT_PRESENT;
505 }
506
507 vid->stream_handle = radv_vid_alloc_stream_handle(pdev);
508 vid->dbg_frame_cnt = 0;
509 vid->db_alignment = radv_video_get_db_alignment(
510 pdev, vid->vk.max_coded.width,
511 (vid->stream_type == RDECODE_CODEC_AV1 ||
512 (vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)));
513
514 *pVideoSession = radv_video_session_to_handle(vid);
515 return VK_SUCCESS;
516 }
517
518 VKAPI_ATTR void VKAPI_CALL
radv_DestroyVideoSessionKHR(VkDevice _device,VkVideoSessionKHR _session,const VkAllocationCallbacks * pAllocator)519 radv_DestroyVideoSessionKHR(VkDevice _device, VkVideoSessionKHR _session, const VkAllocationCallbacks *pAllocator)
520 {
521 VK_FROM_HANDLE(radv_device, device, _device);
522 VK_FROM_HANDLE(radv_video_session, vid, _session);
523 if (!_session)
524 return;
525
526 vk_object_base_finish(&vid->vk.base);
527 vk_free2(&device->vk.alloc, pAllocator, vid);
528 }
529
530 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateVideoSessionParametersKHR(VkDevice _device,const VkVideoSessionParametersCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkVideoSessionParametersKHR * pVideoSessionParameters)531 radv_CreateVideoSessionParametersKHR(VkDevice _device, const VkVideoSessionParametersCreateInfoKHR *pCreateInfo,
532 const VkAllocationCallbacks *pAllocator,
533 VkVideoSessionParametersKHR *pVideoSessionParameters)
534 {
535 VK_FROM_HANDLE(radv_device, device, _device);
536 VK_FROM_HANDLE(radv_video_session, vid, pCreateInfo->videoSession);
537 VK_FROM_HANDLE(radv_video_session_params, templ, pCreateInfo->videoSessionParametersTemplate);
538 const struct radv_physical_device *pdev = radv_device_physical(device);
539 const struct radv_instance *instance = radv_physical_device_instance(pdev);
540 struct radv_video_session_params *params =
541 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*params), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
542 if (!params)
543 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
544
545 VkResult result =
546 vk_video_session_parameters_init(&device->vk, ¶ms->vk, &vid->vk, templ ? &templ->vk : NULL, pCreateInfo);
547 if (result != VK_SUCCESS) {
548 vk_free2(&device->vk.alloc, pAllocator, params);
549 return result;
550 }
551
552 radv_video_patch_session_parameters(¶ms->vk);
553
554 *pVideoSessionParameters = radv_video_session_params_to_handle(params);
555 return VK_SUCCESS;
556 }
557
558 VKAPI_ATTR void VKAPI_CALL
radv_DestroyVideoSessionParametersKHR(VkDevice _device,VkVideoSessionParametersKHR _params,const VkAllocationCallbacks * pAllocator)559 radv_DestroyVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR _params,
560 const VkAllocationCallbacks *pAllocator)
561 {
562 VK_FROM_HANDLE(radv_device, device, _device);
563 VK_FROM_HANDLE(radv_video_session_params, params, _params);
564
565 vk_video_session_parameters_finish(&device->vk, ¶ms->vk);
566 vk_free2(&device->vk.alloc, pAllocator, params);
567 }
568
569 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice,const VkVideoProfileInfoKHR * pVideoProfile,VkVideoCapabilitiesKHR * pCapabilities)570 radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, const VkVideoProfileInfoKHR *pVideoProfile,
571 VkVideoCapabilitiesKHR *pCapabilities)
572 {
573 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
574 const struct video_codec_cap *cap = NULL;
575 bool is_encode = false;
576
577 switch (pVideoProfile->videoCodecOperation) {
578 #ifndef _WIN32
579 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
580 cap = &pdev->info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC];
581 break;
582 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
583 cap = &pdev->info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC];
584 break;
585 case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
586 cap = &pdev->info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1];
587 break;
588 case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
589 cap = &pdev->info.enc_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC];
590 is_encode = true;
591 break;
592 case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
593 cap = &pdev->info.enc_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC];
594 is_encode = true;
595 break;
596 #endif
597 default:
598 unreachable("unsupported operation");
599 }
600
601 if (cap && !cap->valid)
602 cap = NULL;
603
604 pCapabilities->flags = 0;
605 pCapabilities->pictureAccessGranularity.width = VK_VIDEO_H264_MACROBLOCK_WIDTH;
606 pCapabilities->pictureAccessGranularity.height = VK_VIDEO_H264_MACROBLOCK_HEIGHT;
607 pCapabilities->minCodedExtent.width = VK_VIDEO_H264_MACROBLOCK_WIDTH;
608 pCapabilities->minCodedExtent.height = VK_VIDEO_H264_MACROBLOCK_HEIGHT;
609
610 struct VkVideoDecodeCapabilitiesKHR *dec_caps = NULL;
611 struct VkVideoEncodeCapabilitiesKHR *enc_caps = NULL;
612 if (!is_encode) {
613 dec_caps =
614 (struct VkVideoDecodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR);
615 if (dec_caps)
616 dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR;
617 pCapabilities->minBitstreamBufferOffsetAlignment = 128;
618 pCapabilities->minBitstreamBufferSizeAlignment = 128;
619 } else {
620 enc_caps =
621 (struct VkVideoEncodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_ENCODE_CAPABILITIES_KHR);
622
623 if (enc_caps) {
624 enc_caps->flags = 0;
625 enc_caps->rateControlModes = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR |
626 VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR |
627 VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR;
628 enc_caps->maxRateControlLayers = RADV_ENC_MAX_RATE_LAYER;
629 enc_caps->maxBitrate = 1000000000;
630 enc_caps->maxQualityLevels = 2;
631 enc_caps->encodeInputPictureGranularity = pCapabilities->pictureAccessGranularity;
632 enc_caps->supportedEncodeFeedbackFlags = VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BUFFER_OFFSET_BIT_KHR |
633 VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BYTES_WRITTEN_BIT_KHR;
634 }
635 pCapabilities->minBitstreamBufferOffsetAlignment = 16;
636 pCapabilities->minBitstreamBufferSizeAlignment = 16;
637 }
638
639 switch (pVideoProfile->videoCodecOperation) {
640 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
641 /* H264 allows different luma and chroma bit depths */
642 if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
643 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
644
645 struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)vk_find_struct(
646 pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR);
647
648 const struct VkVideoDecodeH264ProfileInfoKHR *h264_profile =
649 vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H264_PROFILE_INFO_KHR);
650
651 if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE &&
652 h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN &&
653 h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_HIGH)
654 return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
655
656 if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR)
657 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
658
659 pCapabilities->maxDpbSlots = NUM_H2645_REFS + 1;
660 pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
661
662 /* for h264 on navi21+ separate dpb images should work */
663 if (radv_enable_tier2(pdev))
664 pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
665 ext->fieldOffsetGranularity.x = 0;
666 ext->fieldOffsetGranularity.y = 0;
667 ext->maxLevelIdc = STD_VIDEO_H264_LEVEL_IDC_5_1;
668 strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME);
669 pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION;
670 break;
671 }
672 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
673 /* H265 allows different luma and chroma bit depths */
674 if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
675 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
676
677 struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *)vk_find_struct(
678 pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR);
679
680 const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
681 vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
682
683 if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN &&
684 h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10 &&
685 h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_STILL_PICTURE)
686 return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
687
688 if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
689 pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
690 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
691
692 pCapabilities->maxDpbSlots = NUM_H2645_REFS + 1;
693 pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
694 /* for h265 on navi21+ separate dpb images should work */
695 if (radv_enable_tier2(pdev))
696 pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
697 ext->maxLevelIdc = STD_VIDEO_H265_LEVEL_IDC_5_1;
698 strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME);
699 pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION;
700 break;
701 }
702 case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: {
703 const bool have_12bit = pdev->info.vcn_ip_version >= VCN_5_0_0 ||
704 pdev->info.vcn_ip_version == VCN_4_0_0;
705 /* Monochrome sampling implies an undefined chroma bit depth, and is supported in profile MAIN for AV1. */
706 if (pVideoProfile->chromaSubsampling != VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR &&
707 pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
708 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
709 struct VkVideoDecodeAV1CapabilitiesKHR *ext =
710 vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_AV1_CAPABILITIES_KHR);
711
712 const struct VkVideoDecodeAV1ProfileInfoKHR *av1_profile =
713 vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_AV1_PROFILE_INFO_KHR);
714
715 if (av1_profile->stdProfile != STD_VIDEO_AV1_PROFILE_MAIN &&
716 (!have_12bit || av1_profile->stdProfile != STD_VIDEO_AV1_PROFILE_PROFESSIONAL))
717 return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
718
719 if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
720 pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR &&
721 (!have_12bit || pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR))
722 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
723
724 pCapabilities->maxDpbSlots = 9;
725 pCapabilities->maxActiveReferencePictures = STD_VIDEO_AV1_NUM_REF_FRAMES;
726 pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
727 ext->maxLevel = STD_VIDEO_AV1_LEVEL_6_1; /* For VCN3/4, the only h/w currently with AV1 decode support */
728 strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME);
729 pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION;
730 break;
731 }
732 case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: {
733 struct VkVideoEncodeH264CapabilitiesKHR *ext = (struct VkVideoEncodeH264CapabilitiesKHR *)vk_find_struct(
734 pCapabilities->pNext, VIDEO_ENCODE_H264_CAPABILITIES_KHR);
735
736 const struct VkVideoEncodeH264ProfileInfoKHR *h264_profile =
737 vk_find_struct_const(pVideoProfile->pNext, VIDEO_ENCODE_H264_PROFILE_INFO_KHR);
738
739 if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE &&
740 h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN &&
741 h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_HIGH)
742 return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
743
744 if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR)
745 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
746
747 pCapabilities->maxDpbSlots = NUM_H2645_REFS;
748 pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
749 ext->flags = VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_KHR |
750 VK_VIDEO_ENCODE_H264_CAPABILITY_PER_PICTURE_TYPE_MIN_MAX_QP_BIT_KHR;
751 ext->maxLevelIdc = cap ? cap->max_level : 0;
752 ext->maxSliceCount = 1;
753 ext->maxPPictureL0ReferenceCount = 1;
754 ext->maxBPictureL0ReferenceCount = 0;
755 ext->maxL1ReferenceCount = 0;
756 ext->maxTemporalLayerCount = 4;
757 ext->expectDyadicTemporalLayerPattern = false;
758 ext->minQp = 0;
759 ext->maxQp = 51;
760 ext->prefersGopRemainingFrames = false;
761 ext->requiresGopRemainingFrames = false;
762 ext->stdSyntaxFlags = VK_VIDEO_ENCODE_H264_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR |
763 VK_VIDEO_ENCODE_H264_STD_ENTROPY_CODING_MODE_FLAG_UNSET_BIT_KHR |
764 VK_VIDEO_ENCODE_H264_STD_ENTROPY_CODING_MODE_FLAG_SET_BIT_KHR;
765 if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_3)
766 ext->stdSyntaxFlags |= VK_VIDEO_ENCODE_H264_STD_WEIGHTED_BIPRED_IDC_EXPLICIT_BIT_KHR;
767
768 strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME);
769 pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION;
770 break;
771 }
772 case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: {
773 struct VkVideoEncodeH265CapabilitiesKHR *ext = (struct VkVideoEncodeH265CapabilitiesKHR *)vk_find_struct(
774 pCapabilities->pNext, VIDEO_ENCODE_H265_CAPABILITIES_KHR);
775
776 const struct VkVideoEncodeH265ProfileInfoKHR *h265_profile =
777 vk_find_struct_const(pVideoProfile->pNext, VIDEO_ENCODE_H265_PROFILE_INFO_KHR);
778
779 if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN &&
780 (pdev->enc_hw_ver < RADV_VIDEO_ENC_HW_2 ||
781 h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10))
782 return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
783
784 if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
785 (pdev->enc_hw_ver < RADV_VIDEO_ENC_HW_2 ||
786 pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR))
787 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
788
789 pCapabilities->pictureAccessGranularity.width = VK_VIDEO_H265_CTU_MAX_WIDTH;
790 if (enc_caps)
791 enc_caps->encodeInputPictureGranularity = pCapabilities->pictureAccessGranularity;
792
793 pCapabilities->maxDpbSlots = NUM_H2645_REFS;
794 pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
795 ext->flags = VK_VIDEO_ENCODE_H265_CAPABILITY_PER_PICTURE_TYPE_MIN_MAX_QP_BIT_KHR;
796 ext->maxLevelIdc = cap ? cap->max_level : 0;
797 ext->maxSliceSegmentCount = 1;
798 ext->maxTiles.width = 1;
799 ext->maxTiles.height = 1;
800 ext->ctbSizes = VK_VIDEO_ENCODE_H265_CTB_SIZE_64_BIT_KHR;
801 ext->transformBlockSizes =
802 VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_4_BIT_KHR | VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_8_BIT_KHR |
803 VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_16_BIT_KHR | VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_32_BIT_KHR;
804 ext->maxPPictureL0ReferenceCount = 1;
805 ext->maxBPictureL0ReferenceCount = 0;
806 ext->maxL1ReferenceCount = 0;
807 ext->maxSubLayerCount = 4;
808 ext->expectDyadicTemporalSubLayerPattern = false;
809 ext->minQp = 0;
810 ext->maxQp = 51;
811 ext->prefersGopRemainingFrames = false;
812 ext->requiresGopRemainingFrames = false;
813 ext->stdSyntaxFlags = VK_VIDEO_ENCODE_H265_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR |
814 VK_VIDEO_ENCODE_H265_STD_DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG_SET_BIT_KHR |
815 VK_VIDEO_ENCODE_H265_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR |
816 VK_VIDEO_ENCODE_H265_STD_ENTROPY_CODING_SYNC_ENABLED_FLAG_SET_BIT_KHR;
817
818 if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_2)
819 ext->stdSyntaxFlags |= VK_VIDEO_ENCODE_H265_STD_SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG_SET_BIT_KHR;
820
821 if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_3)
822 ext->stdSyntaxFlags |= VK_VIDEO_ENCODE_H265_STD_TRANSFORM_SKIP_ENABLED_FLAG_SET_BIT_KHR;
823 strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME);
824 pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION;
825 break;
826 }
827 default:
828 break;
829 }
830
831 if (cap) {
832 pCapabilities->maxCodedExtent.width = cap->max_width;
833 pCapabilities->maxCodedExtent.height = cap->max_height;
834 } else {
835 switch (pVideoProfile->videoCodecOperation) {
836 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
837 pCapabilities->maxCodedExtent.width = (pdev->info.family < CHIP_TONGA) ? 2048 : 4096;
838 pCapabilities->maxCodedExtent.height = (pdev->info.family < CHIP_TONGA) ? 1152 : 4096;
839 break;
840 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
841 pCapabilities->maxCodedExtent.width =
842 (pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
843 pCapabilities->maxCodedExtent.height =
844 (pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
845 break;
846 default:
847 break;
848 }
849 }
850
851 return VK_SUCCESS;
852 }
853
854 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceVideoFormatInfoKHR * pVideoFormatInfo,uint32_t * pVideoFormatPropertyCount,VkVideoFormatPropertiesKHR * pVideoFormatProperties)855 radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,
856 const VkPhysicalDeviceVideoFormatInfoKHR *pVideoFormatInfo,
857 uint32_t *pVideoFormatPropertyCount,
858 VkVideoFormatPropertiesKHR *pVideoFormatProperties)
859 {
860 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
861
862 if ((pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR |
863 VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)) &&
864 !pdev->video_encode_enabled)
865 return VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR;
866
867 /* radv requires separate allocates for DPB and decode video. */
868 if ((pVideoFormatInfo->imageUsage &
869 (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) ==
870 (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
871 return VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR;
872
873 VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out, pVideoFormatProperties, pVideoFormatPropertyCount);
874
875 bool need_8bit = true;
876 bool need_10bit = false;
877 bool need_12bit = false;
878 const struct VkVideoProfileListInfoKHR *prof_list =
879 (struct VkVideoProfileListInfoKHR *)vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
880 if (prof_list) {
881 for (unsigned i = 0; i < prof_list->profileCount; i++) {
882 const VkVideoProfileInfoKHR *profile = &prof_list->pProfiles[i];
883 if (profile->lumaBitDepth & VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
884 need_10bit = true;
885 else if (profile->lumaBitDepth & VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR)
886 need_12bit = true;
887 }
888 }
889
890 if (need_12bit) {
891 vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
892 {
893 p->format = VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16;
894 p->componentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
895 p->componentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
896 p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
897 p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
898 p->imageCreateFlags = 0;
899 if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
900 p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
901 p->imageType = VK_IMAGE_TYPE_2D;
902 p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
903 p->imageUsageFlags = pVideoFormatInfo->imageUsage;
904 }
905
906 if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)) {
907 need_8bit = false;
908 need_10bit = false;
909 }
910 }
911
912 if (need_10bit) {
913 vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
914 {
915 p->format = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16;
916 p->componentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
917 p->componentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
918 p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
919 p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
920 p->imageCreateFlags = 0;
921 if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
922 p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
923 p->imageType = VK_IMAGE_TYPE_2D;
924 p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
925 p->imageUsageFlags = pVideoFormatInfo->imageUsage;
926 }
927
928 if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR))
929 need_8bit = false;
930 }
931
932 if (need_8bit) {
933 vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
934 {
935 p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
936 p->componentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
937 p->componentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
938 p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
939 p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
940 p->imageCreateFlags = 0;
941 if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
942 p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
943 p->imageType = VK_IMAGE_TYPE_2D;
944 p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
945 p->imageUsageFlags = pVideoFormatInfo->imageUsage;
946 }
947 }
948
949 return vk_outarray_status(&out);
950 }
951
952 #define RADV_BIND_SESSION_CTX 0
953 #define RADV_BIND_DECODER_CTX 1
954
955 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device,VkVideoSessionKHR videoSession,uint32_t * pMemoryRequirementsCount,VkVideoSessionMemoryRequirementsKHR * pMemoryRequirements)956 radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR videoSession,
957 uint32_t *pMemoryRequirementsCount,
958 VkVideoSessionMemoryRequirementsKHR *pMemoryRequirements)
959 {
960 VK_FROM_HANDLE(radv_device, device, _device);
961 VK_FROM_HANDLE(radv_video_session, vid, videoSession);
962 const struct radv_physical_device *pdev = radv_device_physical(device);
963
964 uint32_t memory_type_bits = (1u << pdev->memory_properties.memoryTypeCount) - 1;
965
966 if (vid->encode) {
967 return radv_video_get_encode_session_memory_requirements(device, vid, pMemoryRequirementsCount,
968 pMemoryRequirements);
969 }
970 VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount);
971 /* 1 buffer for session context */
972 if (pdev->info.family >= CHIP_POLARIS10) {
973 vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
974 {
975 m->memoryBindIndex = RADV_BIND_SESSION_CTX;
976 m->memoryRequirements.size = RDECODE_SESSION_CONTEXT_SIZE;
977 m->memoryRequirements.alignment = 0;
978 m->memoryRequirements.memoryTypeBits = memory_type_bits;
979 }
980 }
981
982 if (vid->stream_type == RDECODE_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10) {
983 vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
984 {
985 m->memoryBindIndex = RADV_BIND_DECODER_CTX;
986 m->memoryRequirements.size = align(calc_ctx_size_h264_perf(vid), 4096);
987 m->memoryRequirements.alignment = 0;
988 m->memoryRequirements.memoryTypeBits = memory_type_bits;
989 }
990 }
991 if (vid->stream_type == RDECODE_CODEC_H265) {
992 uint32_t ctx_size;
993
994 if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
995 ctx_size = calc_ctx_size_h265_main10(vid);
996 else
997 ctx_size = calc_ctx_size_h265_main(vid);
998 vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
999 {
1000 m->memoryBindIndex = RADV_BIND_DECODER_CTX;
1001 m->memoryRequirements.size = align(ctx_size, 4096);
1002 m->memoryRequirements.alignment = 0;
1003 m->memoryRequirements.memoryTypeBits = memory_type_bits;
1004 }
1005 }
1006 if (vid->stream_type == RDECODE_CODEC_AV1) {
1007 vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
1008 {
1009 m->memoryBindIndex = RADV_BIND_DECODER_CTX;
1010 m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096);
1011 m->memoryRequirements.alignment = 0;
1012 m->memoryRequirements.memoryTypeBits = 0;
1013 for (unsigned i = 0; i < pdev->memory_properties.memoryTypeCount; i++)
1014 if (pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1015 m->memoryRequirements.memoryTypeBits |= (1 << i);
1016 }
1017 }
1018 return vk_outarray_status(&out);
1019 }
1020
1021 VKAPI_ATTR VkResult VKAPI_CALL
radv_UpdateVideoSessionParametersKHR(VkDevice _device,VkVideoSessionParametersKHR videoSessionParameters,const VkVideoSessionParametersUpdateInfoKHR * pUpdateInfo)1022 radv_UpdateVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR videoSessionParameters,
1023 const VkVideoSessionParametersUpdateInfoKHR *pUpdateInfo)
1024 {
1025 VK_FROM_HANDLE(radv_video_session_params, params, videoSessionParameters);
1026
1027 VkResult result = vk_video_session_parameters_update(¶ms->vk, pUpdateInfo);
1028 if (result != VK_SUCCESS)
1029 return result;
1030 radv_video_patch_session_parameters(¶ms->vk);
1031 return result;
1032 }
1033
1034 static void
copy_bind(struct radv_vid_mem * dst,const VkBindVideoSessionMemoryInfoKHR * src)1035 copy_bind(struct radv_vid_mem *dst, const VkBindVideoSessionMemoryInfoKHR *src)
1036 {
1037 dst->mem = radv_device_memory_from_handle(src->memory);
1038 dst->offset = src->memoryOffset;
1039 dst->size = src->memorySize;
1040 }
1041
1042 VKAPI_ATTR VkResult VKAPI_CALL
radv_BindVideoSessionMemoryKHR(VkDevice _device,VkVideoSessionKHR videoSession,uint32_t videoSessionBindMemoryCount,const VkBindVideoSessionMemoryInfoKHR * pBindSessionMemoryInfos)1043 radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession, uint32_t videoSessionBindMemoryCount,
1044 const VkBindVideoSessionMemoryInfoKHR *pBindSessionMemoryInfos)
1045 {
1046 VK_FROM_HANDLE(radv_video_session, vid, videoSession);
1047
1048 for (unsigned i = 0; i < videoSessionBindMemoryCount; i++) {
1049 switch (pBindSessionMemoryInfos[i].memoryBindIndex) {
1050 case RADV_BIND_SESSION_CTX:
1051 copy_bind(&vid->sessionctx, &pBindSessionMemoryInfos[i]);
1052 break;
1053 case RADV_BIND_DECODER_CTX:
1054 copy_bind(&vid->ctx, &pBindSessionMemoryInfos[i]);
1055 break;
1056 default:
1057 assert(0);
1058 break;
1059 }
1060 }
1061 return VK_SUCCESS;
1062 }
1063
1064 /* add a new set register command to the IB */
1065 static void
set_reg(struct radv_cmd_buffer * cmd_buffer,unsigned reg,uint32_t val)1066 set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
1067 {
1068 struct radeon_cmdbuf *cs = cmd_buffer->cs;
1069 radeon_emit(cs, RDECODE_PKT0(reg >> 2, 0));
1070 radeon_emit(cs, val);
1071 }
1072
1073 static void
send_cmd(struct radv_cmd_buffer * cmd_buffer,unsigned cmd,struct radeon_winsys_bo * bo,uint32_t offset)1074 send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset)
1075 {
1076 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1077 const struct radv_physical_device *pdev = radv_device_physical(device);
1078 uint64_t addr;
1079
1080 radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
1081 addr = radv_buffer_get_va(bo);
1082 addr += offset;
1083
1084 if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
1085 radeon_check_space(device->ws, cmd_buffer->cs, 6);
1086 set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
1087 set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
1088 set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
1089 return;
1090 }
1091 switch (cmd) {
1092 case RDECODE_CMD_MSG_BUFFER:
1093 cmd_buffer->video.decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
1094 cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32);
1095 cmd_buffer->video.decode_buffer->msg_buffer_address_lo = (addr);
1096 break;
1097 case RDECODE_CMD_DPB_BUFFER:
1098 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
1099 cmd_buffer->video.decode_buffer->dpb_buffer_address_hi = (addr >> 32);
1100 cmd_buffer->video.decode_buffer->dpb_buffer_address_lo = (addr);
1101 break;
1102 case RDECODE_CMD_DECODING_TARGET_BUFFER:
1103 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
1104 cmd_buffer->video.decode_buffer->target_buffer_address_hi = (addr >> 32);
1105 cmd_buffer->video.decode_buffer->target_buffer_address_lo = (addr);
1106 break;
1107 case RDECODE_CMD_FEEDBACK_BUFFER:
1108 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
1109 cmd_buffer->video.decode_buffer->feedback_buffer_address_hi = (addr >> 32);
1110 cmd_buffer->video.decode_buffer->feedback_buffer_address_lo = (addr);
1111 break;
1112 case RDECODE_CMD_PROB_TBL_BUFFER:
1113 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
1114 cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
1115 cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_lo = (addr);
1116 break;
1117 case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
1118 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
1119 cmd_buffer->video.decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
1120 cmd_buffer->video.decode_buffer->session_contex_buffer_address_lo = (addr);
1121 break;
1122 case RDECODE_CMD_BITSTREAM_BUFFER:
1123 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
1124 cmd_buffer->video.decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
1125 cmd_buffer->video.decode_buffer->bitstream_buffer_address_lo = (addr);
1126 break;
1127 case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
1128 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
1129 cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
1130 cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_lo = (addr);
1131 break;
1132 case RDECODE_CMD_CONTEXT_BUFFER:
1133 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
1134 cmd_buffer->video.decode_buffer->context_buffer_address_hi = (addr >> 32);
1135 cmd_buffer->video.decode_buffer->context_buffer_address_lo = (addr);
1136 break;
1137 default:
1138 assert(0);
1139 }
1140 }
1141
1142 static void
rvcn_dec_message_create(struct radv_video_session * vid,void * ptr,uint32_t size)1143 rvcn_dec_message_create(struct radv_video_session *vid, void *ptr, uint32_t size)
1144 {
1145 rvcn_dec_message_header_t *header = ptr;
1146 rvcn_dec_message_create_t *create = (void *)((char *)ptr + sizeof(rvcn_dec_message_header_t));
1147
1148 memset(ptr, 0, size);
1149 header->header_size = sizeof(rvcn_dec_message_header_t);
1150 header->total_size = size;
1151 header->num_buffers = 1;
1152 header->msg_type = RDECODE_MSG_CREATE;
1153 header->stream_handle = vid->stream_handle;
1154 header->status_report_feedback_number = 0;
1155
1156 header->index[0].message_id = RDECODE_MESSAGE_CREATE;
1157 header->index[0].offset = sizeof(rvcn_dec_message_header_t);
1158 header->index[0].size = sizeof(rvcn_dec_message_create_t);
1159 header->index[0].filled = 0;
1160
1161 create->stream_type = vid->stream_type;
1162 create->session_flags = 0;
1163 create->width_in_samples = vid->vk.max_coded.width;
1164 create->height_in_samples = vid->vk.max_coded.height;
1165 }
1166
1167 static void
rvcn_dec_message_feedback(void * ptr)1168 rvcn_dec_message_feedback(void *ptr)
1169 {
1170 rvcn_dec_feedback_header_t *header = (void *)ptr;
1171
1172 header->header_size = sizeof(rvcn_dec_feedback_header_t);
1173 header->total_size = sizeof(rvcn_dec_feedback_header_t);
1174 header->num_buffers = 0;
1175 }
1176
1177 static const uint8_t h264_levels[] = {10, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52, 60, 61, 62};
1178 static uint8_t
get_h264_level(StdVideoH264LevelIdc level)1179 get_h264_level(StdVideoH264LevelIdc level)
1180 {
1181 assert(level <= STD_VIDEO_H264_LEVEL_IDC_6_2);
1182 return h264_levels[level];
1183 }
1184
1185 static void
update_h264_scaling(unsigned char scaling_list_4x4[6][16],unsigned char scaling_list_8x8[2][64],const StdVideoH264ScalingLists * scaling_lists)1186 update_h264_scaling(unsigned char scaling_list_4x4[6][16], unsigned char scaling_list_8x8[2][64],
1187 const StdVideoH264ScalingLists *scaling_lists)
1188 {
1189 for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) {
1190 for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS; j++)
1191 scaling_list_4x4[i][vl_zscan_normal_16[j]] = scaling_lists->ScalingList4x4[i][j];
1192 }
1193
1194 for (int i = 0; i < 2; i++) {
1195 for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS; j++)
1196 scaling_list_8x8[i][vl_zscan_normal[j]] = scaling_lists->ScalingList8x8[i][j];
1197 }
1198 }
1199
1200 static rvcn_dec_message_avc_t
get_h264_msg(struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * slice_offset,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)1201 get_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
1202 const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
1203 uint32_t *height_in_samples, void *it_ptr)
1204 {
1205 rvcn_dec_message_avc_t result;
1206 const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
1207 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
1208
1209 *slice_offset = h264_pic_info->pSliceOffsets[0];
1210
1211 memset(&result, 0, sizeof(result));
1212
1213 assert(params->vk.h264_dec.h264_sps_count > 0);
1214 const StdVideoH264SequenceParameterSet *sps =
1215 vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
1216 switch (sps->profile_idc) {
1217 case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
1218 result.profile = RDECODE_H264_PROFILE_BASELINE;
1219 break;
1220 case STD_VIDEO_H264_PROFILE_IDC_MAIN:
1221 result.profile = RDECODE_H264_PROFILE_MAIN;
1222 break;
1223 case STD_VIDEO_H264_PROFILE_IDC_HIGH:
1224 result.profile = RDECODE_H264_PROFILE_HIGH;
1225 break;
1226 default:
1227 fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
1228 result.profile = RDECODE_H264_PROFILE_MAIN;
1229 break;
1230 }
1231
1232 *width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
1233 *height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
1234 if (!sps->flags.frame_mbs_only_flag)
1235 *height_in_samples *= 2;
1236 result.level = get_h264_level(sps->level_idc);
1237
1238 result.sps_info_flags = 0;
1239
1240 result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
1241 result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
1242 result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
1243 result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
1244 if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
1245 result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
1246
1247 result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
1248 result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
1249 result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
1250 result.pic_order_cnt_type = sps->pic_order_cnt_type;
1251 result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
1252
1253 result.chroma_format = sps->chroma_format_idc;
1254
1255 const StdVideoH264PictureParameterSet *pps =
1256 vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
1257 result.pps_info_flags = 0;
1258 result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
1259 result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
1260 result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
1261 result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
1262 result.pps_info_flags |= pps->weighted_bipred_idc << 4;
1263 result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
1264 result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
1265 result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
1266
1267 result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
1268 result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
1269 result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
1270
1271 StdVideoH264ScalingLists scaling_lists;
1272 vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
1273 update_h264_scaling(result.scaling_list_4x4, result.scaling_list_8x8, &scaling_lists);
1274
1275 memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
1276 memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
1277 memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
1278
1279 result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
1280 result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
1281
1282 result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
1283 result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
1284
1285 result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
1286
1287 result.num_ref_frames = sps->max_num_ref_frames;
1288 result.non_existing_frame_flags = 0;
1289 result.used_for_reference_flags = 0;
1290
1291 memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
1292 memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
1293 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
1294 int idx = frame_info->pReferenceSlots[i].slotIndex;
1295 const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
1296 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
1297
1298 result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
1299 result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
1300 result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
1301
1302 result.ref_frame_list[i] = idx;
1303
1304 if (dpb_slot->pStdReferenceInfo->flags.top_field_flag)
1305 result.used_for_reference_flags |= (1 << (2 * i));
1306 if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
1307 result.used_for_reference_flags |= (1 << (2 * i + 1));
1308
1309 if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag && !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
1310 result.used_for_reference_flags |= (3 << (2 * i));
1311
1312 if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
1313 result.ref_frame_list[i] |= 0x80;
1314 if (dpb_slot->pStdReferenceInfo->flags.is_non_existing)
1315 result.non_existing_frame_flags |= 1 << i;
1316 }
1317 result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
1318 result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
1319
1320 return result;
1321 }
1322
1323 static void
update_h265_scaling(void * it_ptr,const StdVideoH265ScalingLists * scaling_lists)1324 update_h265_scaling(void *it_ptr, const StdVideoH265ScalingLists *scaling_lists)
1325 {
1326 if (scaling_lists) {
1327 memcpy(it_ptr, scaling_lists->ScalingList4x4,
1328 STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
1329 memcpy((char *)it_ptr + 96, scaling_lists->ScalingList8x8,
1330 STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
1331 memcpy((char *)it_ptr + 480, scaling_lists->ScalingList16x16,
1332 STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
1333 memcpy((char *)it_ptr + 864, scaling_lists->ScalingList32x32,
1334 STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
1335 } else {
1336 memset(it_ptr, 0, STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
1337 memset((char *)it_ptr + 96, 0,
1338 STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
1339 memset((char *)it_ptr + 480, 0,
1340 STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
1341 memset((char *)it_ptr + 864, 0,
1342 STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
1343 }
1344 }
1345
1346 static rvcn_dec_message_hevc_t
get_h265_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)1347 get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
1348 const struct VkVideoDecodeInfoKHR *frame_info,
1349 uint32_t *width_in_samples,
1350 uint32_t *height_in_samples,
1351 void *it_ptr)
1352 {
1353 const struct radv_physical_device *pdev = radv_device_physical(device);
1354 rvcn_dec_message_hevc_t result;
1355 int i, j;
1356 const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
1357 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
1358 memset(&result, 0, sizeof(result));
1359
1360 const StdVideoH265SequenceParameterSet *sps =
1361 vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
1362 const StdVideoH265PictureParameterSet *pps =
1363 vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
1364
1365 result.sps_info_flags = 0;
1366 result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
1367 result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
1368 result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
1369 result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
1370 result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
1371 result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
1372 result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
1373 result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
1374 result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
1375
1376 if (pdev->info.family == CHIP_CARRIZO)
1377 result.sps_info_flags |= 1 << 9;
1378
1379 if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) {
1380 result.sps_info_flags |= 1 << 11;
1381 }
1382 result.st_rps_bits = h265_pic_info->pStdPictureInfo->NumBitsForSTRefPicSetInSlice;
1383
1384 *width_in_samples = sps->pic_width_in_luma_samples;
1385 *height_in_samples = sps->pic_height_in_luma_samples;
1386 result.chroma_format = sps->chroma_format_idc;
1387 result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
1388 result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
1389 result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
1390 result.sps_max_dec_pic_buffering_minus1 =
1391 sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
1392 result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
1393 result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
1394 result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
1395 result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
1396 result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
1397 result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
1398 if (sps->flags.pcm_enabled_flag) {
1399 result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
1400 result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
1401 result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
1402 result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
1403 }
1404 result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
1405
1406 result.pps_info_flags = 0;
1407 result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
1408 result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
1409 result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
1410 result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
1411 result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
1412 result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
1413 result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
1414 result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
1415 result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
1416 result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
1417 result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
1418 result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
1419 result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
1420 result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
1421 result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
1422 result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
1423 result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
1424 result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
1425 result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
1426 result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
1427
1428 result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
1429 result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
1430 result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
1431 result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
1432 result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
1433 result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
1434 result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
1435 result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
1436 result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
1437 result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
1438 result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
1439 result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
1440 result.init_qp_minus26 = pps->init_qp_minus26;
1441
1442 for (i = 0; i < 19; ++i)
1443 result.column_width_minus1[i] = pps->column_width_minus1[i];
1444
1445 for (i = 0; i < 21; ++i)
1446 result.row_height_minus1[i] = pps->row_height_minus1[i];
1447
1448 result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
1449 result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
1450
1451 uint8_t idxs[16];
1452 memset(result.poc_list, 0, 16 * sizeof(int));
1453 memset(result.ref_pic_list, 0x7f, 16);
1454 memset(idxs, 0xff, 16);
1455 for (i = 0; i < frame_info->referenceSlotCount; i++) {
1456 const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
1457 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
1458 int idx = frame_info->pReferenceSlots[i].slotIndex;
1459 result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
1460 result.ref_pic_list[i] = idx;
1461 idxs[idx] = i;
1462 }
1463 result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
1464
1465 #define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
1466 for (i = 0; i < 8; ++i)
1467 result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
1468
1469 for (i = 0; i < 8; ++i)
1470 result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
1471
1472 for (i = 0; i < 8; ++i)
1473 result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
1474
1475 const StdVideoH265ScalingLists *scaling_lists = NULL;
1476 if (pps->flags.pps_scaling_list_data_present_flag)
1477 scaling_lists = pps->pScalingLists;
1478 else if (sps->flags.sps_scaling_list_data_present_flag)
1479 scaling_lists = sps->pScalingLists;
1480
1481 update_h265_scaling(it_ptr, scaling_lists);
1482
1483 if (scaling_lists) {
1484 for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
1485 result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
1486
1487 for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
1488 result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
1489 }
1490
1491 for (i = 0; i < 2; i++) {
1492 for (j = 0; j < 15; j++)
1493 result.direct_reflist[i][j] = 0xff;
1494 }
1495
1496 if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
1497 if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
1498 result.p010_mode = 1;
1499 result.msb_mode = 1;
1500 } else {
1501 result.p010_mode = 0;
1502 result.luma_10to8 = 5;
1503 result.chroma_10to8 = 5;
1504 result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
1505 result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
1506 }
1507 }
1508
1509 return result;
1510 }
1511
1512 enum {
1513 AV1_RESTORE_NONE = 0,
1514 AV1_RESTORE_WIENER = 1,
1515 AV1_RESTORE_SGRPROJ = 2,
1516 AV1_RESTORE_SWITCHABLE = 3,
1517 };
1518
1519 #define AV1_SUPERRES_NUM 8
1520 #define AV1_SUPERRES_DENOM_MIN 9
1521
1522 static rvcn_dec_message_av1_t
get_av1_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,void * probs_ptr,int * update_reference_slot)1523 get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
1524 const struct VkVideoDecodeInfoKHR *frame_info, void *probs_ptr, int *update_reference_slot)
1525 {
1526 const struct radv_physical_device *pdev = radv_device_physical(device);
1527 rvcn_dec_message_av1_t result;
1528 unsigned i, j;
1529 const struct VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
1530 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);
1531 const StdVideoDecodeAV1PictureInfo *pi = av1_pic_info->pStdPictureInfo;
1532 const StdVideoAV1SequenceHeader *seq_hdr = ¶ms->vk.av1_dec.seq_hdr.base;
1533 memset(&result, 0, sizeof(result));
1534
1535 const int intra_only_decoding = vid->vk.max_dpb_slots == 0;
1536 if (intra_only_decoding)
1537 assert(frame_info->pSetupReferenceSlot == NULL);
1538
1539 *update_reference_slot = !(intra_only_decoding || pi->refresh_frame_flags == 0);
1540
1541 result.frame_header_flags = (1 /*av1_pic_info->frame_header->flags.show_frame*/
1542 << RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_SHIFT) &
1543 RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_MASK;
1544
1545 result.frame_header_flags |= (pi->flags.disable_cdf_update << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_SHIFT) &
1546 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_MASK;
1547
1548 result.frame_header_flags |=
1549 ((!pi->flags.disable_frame_end_update_cdf) << RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_SHIFT) &
1550 RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_MASK;
1551
1552 result.frame_header_flags |=
1553 ((pi->frame_type == STD_VIDEO_AV1_FRAME_TYPE_INTRA_ONLY) << RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_SHIFT) &
1554 RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_MASK;
1555
1556 result.frame_header_flags |= (pi->flags.allow_intrabc << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_SHIFT) &
1557 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_MASK;
1558
1559 result.frame_header_flags |=
1560 (pi->flags.allow_high_precision_mv << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) &
1561 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK;
1562
1563 result.frame_header_flags |=
1564 (seq_hdr->pColorConfig->flags.mono_chrome << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) &
1565 RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK;
1566
1567 result.frame_header_flags |= (pi->flags.skip_mode_present << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) &
1568 RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK;
1569
1570 result.frame_header_flags |=
1571 (pi->pQuantization->flags.using_qmatrix << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) &
1572 RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK;
1573
1574 result.frame_header_flags |=
1575 (seq_hdr->flags.enable_filter_intra << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) &
1576 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_MASK;
1577
1578 result.frame_header_flags |=
1579 (seq_hdr->flags.enable_intra_edge_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_SHIFT) &
1580 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_MASK;
1581
1582 result.frame_header_flags |=
1583 (seq_hdr->flags.enable_interintra_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_SHIFT) &
1584 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_MASK;
1585
1586 result.frame_header_flags |=
1587 (seq_hdr->flags.enable_masked_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_SHIFT) &
1588 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_MASK;
1589
1590 result.frame_header_flags |=
1591 (pi->flags.allow_warped_motion << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_SHIFT) &
1592 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_MASK;
1593
1594 result.frame_header_flags |=
1595 (seq_hdr->flags.enable_dual_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_SHIFT) &
1596 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_MASK;
1597
1598 result.frame_header_flags |=
1599 (seq_hdr->flags.enable_order_hint << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_SHIFT) &
1600 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_MASK;
1601
1602 result.frame_header_flags |= (seq_hdr->flags.enable_jnt_comp << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_SHIFT) &
1603 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_MASK;
1604
1605 result.frame_header_flags |= (pi->flags.use_ref_frame_mvs << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_SHIFT) &
1606 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_MASK;
1607
1608 result.frame_header_flags |=
1609 (pi->flags.allow_screen_content_tools << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_SHIFT) &
1610 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_MASK;
1611
1612 result.frame_header_flags |=
1613 (pi->flags.force_integer_mv << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) &
1614 RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK;
1615
1616 result.frame_header_flags |=
1617 (pi->pLoopFilter->flags.loop_filter_delta_enabled << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) &
1618 RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK;
1619
1620 result.frame_header_flags |=
1621 (pi->pLoopFilter->flags.loop_filter_delta_update << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) &
1622 RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK;
1623
1624 result.frame_header_flags |= (pi->flags.delta_q_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) &
1625 RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK;
1626
1627 result.frame_header_flags |= (pi->flags.delta_lf_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_SHIFT) &
1628 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_MASK;
1629
1630 result.frame_header_flags |= (pi->flags.reduced_tx_set << RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_SHIFT) &
1631 RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_MASK;
1632
1633 result.frame_header_flags |=
1634 (pi->flags.segmentation_enabled << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_SHIFT) &
1635 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_MASK;
1636
1637 result.frame_header_flags |=
1638 (pi->flags.segmentation_update_map << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_SHIFT) &
1639 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_MASK;
1640
1641 result.frame_header_flags |=
1642 (pi->flags.segmentation_temporal_update << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
1643 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_MASK;
1644
1645 result.frame_header_flags |= (pi->flags.delta_lf_multi << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_SHIFT) &
1646 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_MASK;
1647
1648 result.frame_header_flags |=
1649 (pi->flags.is_motion_mode_switchable << RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_SHIFT) &
1650 RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_MASK;
1651
1652 result.frame_header_flags |= ((!intra_only_decoding ? !(pi->refresh_frame_flags) : 1)
1653 << RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_SHIFT) &
1654 RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_MASK;
1655
1656 result.frame_header_flags |=
1657 ((!seq_hdr->flags.enable_ref_frame_mvs) << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_SHIFT) &
1658 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_MASK;
1659
1660 result.current_frame_id = pi->current_frame_id;
1661 result.frame_offset = pi->OrderHint;
1662 result.profile = seq_hdr->seq_profile;
1663 result.is_annexb = 0;
1664
1665 result.frame_type = pi->frame_type;
1666 result.primary_ref_frame = pi->primary_ref_frame;
1667
1668 const struct VkVideoDecodeAV1DpbSlotInfoKHR *setup_dpb_slot =
1669 intra_only_decoding
1670 ? NULL
1671 : vk_find_struct_const(frame_info->pSetupReferenceSlot->pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR);
1672
1673 /* The AMD FW interface does not need this information, since it's
1674 * redundant with the information derivable from the current frame header,
1675 * which the FW is parsing and tracking.
1676 */
1677 (void)setup_dpb_slot;
1678 result.curr_pic_idx = intra_only_decoding ? 0 : frame_info->pSetupReferenceSlot->slotIndex;
1679
1680 result.sb_size = seq_hdr->flags.use_128x128_superblock;
1681 result.interp_filter = pi->interpolation_filter;
1682 for (i = 0; i < 2; ++i)
1683 result.filter_level[i] = pi->pLoopFilter->loop_filter_level[i];
1684 result.filter_level_u = pi->pLoopFilter->loop_filter_level[2];
1685 result.filter_level_v = pi->pLoopFilter->loop_filter_level[3];
1686 result.sharpness_level = pi->pLoopFilter->loop_filter_sharpness;
1687 for (i = 0; i < 8; ++i)
1688 result.ref_deltas[i] = pi->pLoopFilter->loop_filter_ref_deltas[i];
1689 for (i = 0; i < 2; ++i)
1690 result.mode_deltas[i] = pi->pLoopFilter->loop_filter_mode_deltas[i];
1691 result.base_qindex = pi->pQuantization->base_q_idx;
1692 result.y_dc_delta_q = pi->pQuantization->DeltaQYDc;
1693 result.u_dc_delta_q = pi->pQuantization->DeltaQUDc;
1694 result.v_dc_delta_q = pi->pQuantization->DeltaQVDc;
1695 result.u_ac_delta_q = pi->pQuantization->DeltaQUAc;
1696 result.v_ac_delta_q = pi->pQuantization->DeltaQVAc;
1697
1698 if (pi->pQuantization->flags.using_qmatrix) {
1699 result.qm_y = pi->pQuantization->qm_y | 0xf0;
1700 result.qm_u = pi->pQuantization->qm_u | 0xf0;
1701 result.qm_v = pi->pQuantization->qm_v | 0xf0;
1702 } else {
1703 result.qm_y = 0xff;
1704 result.qm_u = 0xff;
1705 result.qm_v = 0xff;
1706 }
1707 result.delta_q_res = (1 << pi->delta_q_res);
1708 result.delta_lf_res = (1 << pi->delta_lf_res);
1709 result.tile_cols = pi->pTileInfo->TileCols;
1710 result.tile_rows = pi->pTileInfo->TileRows;
1711
1712 result.tx_mode = pi->TxMode;
1713 result.reference_mode = (pi->flags.reference_select == 1) ? 2 : 0;
1714 result.chroma_format = seq_hdr->pColorConfig->flags.mono_chrome ? 0 : 1;
1715 result.tile_size_bytes = pi->pTileInfo->tile_size_bytes_minus_1;
1716 result.context_update_tile_id = pi->pTileInfo->context_update_tile_id;
1717
1718 for (i = 0; i < result.tile_cols; i++)
1719 result.tile_col_start_sb[i] = pi->pTileInfo->pMiColStarts[i];
1720 result.tile_col_start_sb[result.tile_cols] =
1721 result.tile_col_start_sb[result.tile_cols - 1] + pi->pTileInfo->pWidthInSbsMinus1[result.tile_cols - 1] + 1;
1722 for (i = 0; i < pi->pTileInfo->TileRows; i++)
1723 result.tile_row_start_sb[i] = pi->pTileInfo->pMiRowStarts[i];
1724 result.tile_row_start_sb[result.tile_rows] =
1725 result.tile_row_start_sb[result.tile_rows - 1] + pi->pTileInfo->pHeightInSbsMinus1[result.tile_rows - 1] + 1;
1726
1727 result.max_width = seq_hdr->max_frame_width_minus_1 + 1;
1728 result.max_height = seq_hdr->max_frame_height_minus_1 + 1;
1729 VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
1730 result.superres_scale_denominator =
1731 pi->flags.use_superres ? pi->coded_denom + AV1_SUPERRES_DENOM_MIN : AV1_SUPERRES_NUM;
1732 if (pi->flags.use_superres) {
1733 result.width =
1734 (frameExtent.width * 8 + result.superres_scale_denominator / 2) / result.superres_scale_denominator;
1735 } else {
1736 result.width = frameExtent.width;
1737 }
1738 result.height = frameExtent.height;
1739
1740 result.superres_upscaled_width = frameExtent.width;
1741
1742 result.order_hint_bits = seq_hdr->order_hint_bits_minus_1 + 1;
1743
1744 /* The VCN FW will evict references that aren't specified in
1745 * ref_frame_map, even if they are still valid. To prevent this we will
1746 * specify every possible reference in ref_frame_map.
1747 */
1748 uint16_t used_slots = (1 << result.curr_pic_idx);
1749 for (i = 0; i < frame_info->referenceSlotCount; i++) {
1750 const struct VkVideoDecodeAV1DpbSlotInfoKHR *ref_dpb_slot =
1751 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR);
1752 (void)ref_dpb_slot; /* Again, the FW is tracking this information for us, so no need for it. */
1753 (void)ref_dpb_slot; /* the FW is tracking this information for us, so no need for it. */
1754 int32_t slotIndex = frame_info->pReferenceSlots[i].slotIndex;
1755 result.ref_frame_map[i] = slotIndex;
1756 used_slots |= 1 << slotIndex;
1757 }
1758 /* Go through all the slots and fill in the ones that haven't been used. */
1759 for (j = 0; j < STD_VIDEO_AV1_NUM_REF_FRAMES + 1; j++) {
1760 if ((used_slots & (1 << j)) == 0) {
1761 result.ref_frame_map[i] = j;
1762 used_slots |= 1 << j;
1763 i++;
1764 }
1765 }
1766
1767 assert(used_slots == 0x1ff && i == STD_VIDEO_AV1_NUM_REF_FRAMES);
1768
1769 for (i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; ++i) {
1770 result.frame_refs[i] =
1771 av1_pic_info->referenceNameSlotIndices[i] == -1 ? 0x7f : av1_pic_info->referenceNameSlotIndices[i];
1772 }
1773
1774 result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = seq_hdr->pColorConfig->BitDepth - 8;
1775
1776 int16_t *feature_data = (int16_t *)probs_ptr;
1777 int fd_idx = 0;
1778 for (i = 0; i < 8; ++i) {
1779 result.feature_mask[i] = pi->pSegmentation->FeatureEnabled[i];
1780 for (j = 0; j < 8; ++j) {
1781 result.feature_data[i][j] = pi->pSegmentation->FeatureData[i][j];
1782 feature_data[fd_idx++] = result.feature_data[i][j];
1783 }
1784 }
1785
1786 memcpy(((char *)probs_ptr + 128), result.feature_mask, 8);
1787 result.cdef_damping = pi->pCDEF->cdef_damping_minus_3 + 3;
1788 result.cdef_bits = pi->pCDEF->cdef_bits;
1789 for (i = 0; i < 8; ++i) {
1790 result.cdef_strengths[i] = (pi->pCDEF->cdef_y_pri_strength[i] << 2) + pi->pCDEF->cdef_y_sec_strength[i];
1791 result.cdef_uv_strengths[i] = (pi->pCDEF->cdef_uv_pri_strength[i] << 2) + pi->pCDEF->cdef_uv_sec_strength[i];
1792 }
1793
1794 if (pi->flags.UsesLr) {
1795 for (int plane = 0; plane < STD_VIDEO_AV1_MAX_NUM_PLANES; plane++) {
1796 result.frame_restoration_type[plane] = pi->pLoopRestoration->FrameRestorationType[plane];
1797 result.log2_restoration_unit_size_minus5[plane] = pi->pLoopRestoration->LoopRestorationSize[plane];
1798 }
1799 }
1800
1801 if (seq_hdr->pColorConfig->BitDepth > 8) {
1802 if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 ||
1803 vid->vk.picture_format == VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) {
1804 result.p010_mode = 1;
1805 result.msb_mode = 1;
1806 } else {
1807 result.luma_10to8 = 1;
1808 result.chroma_10to8 = 1;
1809 }
1810 }
1811
1812 result.preskip_segid = 0;
1813 result.last_active_segid = 0;
1814 for (i = 0; i < 8; i++) {
1815 for (j = 0; j < 8; j++) {
1816 if (result.feature_mask[i] & (1 << j)) {
1817 result.last_active_segid = i;
1818 if (j >= 5)
1819 result.preskip_segid = 1;
1820 }
1821 }
1822 }
1823 result.seg_lossless_flag = 0;
1824 for (i = 0; i < 8; ++i) {
1825 int av1_get_qindex, qindex;
1826 int segfeature_active = result.feature_mask[i] & (1 << 0);
1827 if (segfeature_active) {
1828 int seg_qindex = result.base_qindex + result.feature_data[i][0];
1829 av1_get_qindex = seg_qindex < 0 ? 0 : (seg_qindex > 255 ? 255 : seg_qindex);
1830 } else {
1831 av1_get_qindex = result.base_qindex;
1832 }
1833 qindex = pi->flags.segmentation_enabled ? av1_get_qindex : result.base_qindex;
1834 result.seg_lossless_flag |= (((qindex == 0) && result.y_dc_delta_q == 0 && result.u_dc_delta_q == 0 &&
1835 result.v_dc_delta_q == 0 && result.u_ac_delta_q == 0 && result.v_ac_delta_q == 0)
1836 << i);
1837 }
1838
1839 rvcn_dec_film_grain_params_t *fg_params = &result.film_grain;
1840 fg_params->apply_grain = pi->flags.apply_grain;
1841 if (fg_params->apply_grain) {
1842 rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)((char *)probs_ptr + 256);
1843 fg_params->random_seed = pi->pFilmGrain->grain_seed;
1844 fg_params->grain_scale_shift = pi->pFilmGrain->grain_scale_shift;
1845 fg_params->scaling_shift = pi->pFilmGrain->grain_scaling_minus_8 + 8;
1846 fg_params->chroma_scaling_from_luma = pi->pFilmGrain->flags.chroma_scaling_from_luma;
1847 fg_params->num_y_points = pi->pFilmGrain->num_y_points;
1848 fg_params->num_cb_points = pi->pFilmGrain->num_cb_points;
1849 fg_params->num_cr_points = pi->pFilmGrain->num_cr_points;
1850 fg_params->cb_mult = pi->pFilmGrain->cb_mult;
1851 fg_params->cb_luma_mult = pi->pFilmGrain->cb_luma_mult;
1852 fg_params->cb_offset = pi->pFilmGrain->cb_offset;
1853 fg_params->cr_mult = pi->pFilmGrain->cr_mult;
1854 fg_params->cr_luma_mult = pi->pFilmGrain->cr_luma_mult;
1855 fg_params->cr_offset = pi->pFilmGrain->cr_offset;
1856 fg_params->bit_depth_minus_8 = result.bit_depth_luma_minus8;
1857 for (i = 0; i < fg_params->num_y_points; ++i) {
1858 fg_params->scaling_points_y[i][0] = pi->pFilmGrain->point_y_value[i];
1859 fg_params->scaling_points_y[i][1] = pi->pFilmGrain->point_y_scaling[i];
1860 }
1861 for (i = 0; i < fg_params->num_cb_points; ++i) {
1862 fg_params->scaling_points_cb[i][0] = pi->pFilmGrain->point_cb_value[i];
1863 fg_params->scaling_points_cb[i][1] = pi->pFilmGrain->point_cb_scaling[i];
1864 }
1865 for (i = 0; i < fg_params->num_cr_points; ++i) {
1866 fg_params->scaling_points_cr[i][0] = pi->pFilmGrain->point_cr_value[i];
1867 fg_params->scaling_points_cr[i][1] = pi->pFilmGrain->point_cr_scaling[i];
1868 }
1869
1870 fg_params->ar_coeff_lag = pi->pFilmGrain->ar_coeff_lag;
1871 fg_params->ar_coeff_shift = pi->pFilmGrain->ar_coeff_shift_minus_6 + 6;
1872
1873 for (i = 0; i < 24; ++i)
1874 fg_params->ar_coeffs_y[i] = pi->pFilmGrain->ar_coeffs_y_plus_128[i] - 128;
1875
1876 for (i = 0; i < 25; ++i) {
1877 fg_params->ar_coeffs_cb[i] = pi->pFilmGrain->ar_coeffs_cb_plus_128[i] - 128;
1878 fg_params->ar_coeffs_cr[i] = pi->pFilmGrain->ar_coeffs_cr_plus_128[i] - 128;
1879 }
1880
1881 fg_params->overlap_flag = pi->pFilmGrain->flags.overlap_flag;
1882 fg_params->clip_to_restricted_range = pi->pFilmGrain->flags.clip_to_restricted_range;
1883 ac_vcn_av1_init_film_grain_buffer(pdev->av1_version, fg_params, fg_buf);
1884 }
1885
1886 result.uncompressed_header_size = 0;
1887 for (i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; ++i) {
1888 result.global_motion[i].wmtype = pi->pGlobalMotion->GmType[i];
1889 for (j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; ++j)
1890 result.global_motion[i].wmmat[j] = pi->pGlobalMotion->gm_params[i][j];
1891 }
1892 for (i = 0; i < av1_pic_info->tileCount && i < 256; ++i) {
1893 result.tile_info[i].offset = av1_pic_info->pTileOffsets[i];
1894 result.tile_info[i].size = av1_pic_info->pTileSizes[i];
1895 }
1896
1897 return result;
1898 }
1899
1900
1901 static bool
rvcn_dec_message_decode(struct radv_cmd_buffer * cmd_buffer,struct radv_video_session * vid,struct radv_video_session_params * params,void * ptr,void * it_probs_ptr,uint32_t * slice_offset,const struct VkVideoDecodeInfoKHR * frame_info)1902 rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_session *vid,
1903 struct radv_video_session_params *params, void *ptr, void *it_probs_ptr, uint32_t *slice_offset,
1904 const struct VkVideoDecodeInfoKHR *frame_info)
1905 {
1906 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1907 const struct radv_physical_device *pdev = radv_device_physical(device);
1908 rvcn_dec_message_header_t *header;
1909 rvcn_dec_message_index_t *index_codec;
1910 rvcn_dec_message_decode_t *decode;
1911 rvcn_dec_message_index_t *index_dynamic_dpb = NULL;
1912 rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
1913 void *codec;
1914 unsigned sizes = 0, offset_decode, offset_codec, offset_dynamic_dpb;
1915 struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
1916 struct radv_image *img = dst_iv->image;
1917 struct radv_image_plane *luma = &img->planes[0];
1918 struct radv_image_plane *chroma = &img->planes[1];
1919
1920 header = ptr;
1921 sizes += sizeof(rvcn_dec_message_header_t);
1922
1923 index_codec = (void *)((char *)header + sizes);
1924 sizes += sizeof(rvcn_dec_message_index_t);
1925
1926 if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
1927 index_dynamic_dpb = (void *)((char *)header + sizes);
1928 sizes += sizeof(rvcn_dec_message_index_t);
1929 }
1930
1931 offset_decode = sizes;
1932 decode = (void *)((char *)header + sizes);
1933 sizes += sizeof(rvcn_dec_message_decode_t);
1934
1935 if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
1936 offset_dynamic_dpb = sizes;
1937 dynamic_dpb_t2 = (void *)((char *)header + sizes);
1938 sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
1939 }
1940
1941 offset_codec = sizes;
1942 codec = (void *)((char *)header + sizes);
1943
1944 memset(ptr, 0, sizes);
1945
1946 header->header_size = sizeof(rvcn_dec_message_header_t);
1947 header->total_size = sizes;
1948 header->msg_type = RDECODE_MSG_DECODE;
1949 header->stream_handle = vid->stream_handle;
1950 header->status_report_feedback_number = vid->dbg_frame_cnt++;
1951
1952 header->index[0].message_id = RDECODE_MESSAGE_DECODE;
1953 header->index[0].offset = offset_decode;
1954 header->index[0].size = sizeof(rvcn_dec_message_decode_t);
1955 header->index[0].filled = 0;
1956 header->num_buffers = 1;
1957
1958 index_codec->offset = offset_codec;
1959 index_codec->filled = 0;
1960 ++header->num_buffers;
1961
1962 if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
1963 index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
1964 index_dynamic_dpb->offset = offset_dynamic_dpb;
1965 index_dynamic_dpb->filled = 0;
1966 ++header->num_buffers;
1967 index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
1968 }
1969
1970 decode->stream_type = vid->stream_type;
1971 decode->decode_flags = 0;
1972 decode->width_in_samples = frame_info->dstPictureResource.codedExtent.width;
1973 decode->height_in_samples = frame_info->dstPictureResource.codedExtent.height;
1974
1975 decode->bsd_size = frame_info->srcBufferRange;
1976
1977 decode->dt_size = dst_iv->image->planes[0].surface.total_size + dst_iv->image->planes[1].surface.total_size;
1978 decode->sct_size = 0;
1979 decode->sc_coeff_size = 0;
1980
1981 decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
1982
1983 decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
1984 decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
1985
1986 if (luma->surface.meta_offset) {
1987 fprintf(stderr, "DCC SURFACES NOT SUPPORTED.\n");
1988 return false;
1989 }
1990
1991 decode->dt_tiling_mode = 0;
1992 decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
1993 decode->dt_array_mode = pdev->vid_addr_gfx_mode;
1994 decode->dt_field_mode = 0;
1995 decode->dt_surf_tile_config = 0;
1996 decode->dt_uv_surf_tile_config = 0;
1997
1998 int dt_array_idx = frame_info->dstPictureResource.baseArrayLayer + dst_iv->vk.base_array_layer;
1999
2000 decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset +
2001 dt_array_idx * luma->surface.u.gfx9.surf_slice_size;
2002 decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset +
2003 dt_array_idx * chroma->surface.u.gfx9.surf_slice_size;
2004 decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
2005 decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
2006
2007 if (vid->stream_type == RDECODE_CODEC_AV1)
2008 decode->db_pitch_uv = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
2009
2010 *slice_offset = 0;
2011
2012 /* Intra-only decoding will only work without a setup slot for AV1
2013 * (non-filmgrain) currently, other codecs require the application to pass a
2014 * setup slot for this use-case, since the FW is not able to skip write-out
2015 * for H26X. In order to fix that properly, additional scratch space will
2016 * be needed in the video session just for intra-only DPB targets.
2017 */
2018 int dpb_update_required = 1;
2019
2020 switch (vid->vk.op) {
2021 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
2022 index_codec->size = sizeof(rvcn_dec_message_avc_t);
2023 rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples,
2024 &decode->height_in_samples, it_probs_ptr);
2025 memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t));
2026 index_codec->message_id = RDECODE_MESSAGE_AVC;
2027 break;
2028 }
2029 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
2030 index_codec->size = sizeof(rvcn_dec_message_hevc_t);
2031 rvcn_dec_message_hevc_t hevc = get_h265_msg(device, vid, params, frame_info,
2032 &decode->width_in_samples,
2033 &decode->height_in_samples,
2034 it_probs_ptr);
2035 memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t));
2036 index_codec->message_id = RDECODE_MESSAGE_HEVC;
2037 break;
2038 }
2039 case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: {
2040 index_codec->size = sizeof(rvcn_dec_message_av1_t);
2041 rvcn_dec_message_av1_t av1 = get_av1_msg(device, vid, params, frame_info, it_probs_ptr, &dpb_update_required);
2042 memcpy(codec, (void *)&av1, sizeof(rvcn_dec_message_av1_t));
2043 index_codec->message_id = RDECODE_MESSAGE_AV1;
2044 assert(frame_info->referenceSlotCount < 9);
2045 break;
2046 }
2047 default:
2048 unreachable("unknown operation");
2049 }
2050
2051 if (dpb_update_required)
2052 assert(frame_info->pSetupReferenceSlot != NULL);
2053
2054 struct radv_image_view *dpb_iv =
2055 frame_info->pSetupReferenceSlot
2056 ? radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding)
2057 : NULL;
2058 struct radv_image *dpb = dpb_iv ? dpb_iv->image : img;
2059
2060 int dpb_array_idx = 0;
2061 if (dpb_update_required)
2062 dpb_array_idx = frame_info->pSetupReferenceSlot->pPictureResource->baseArrayLayer + dpb_iv->vk.base_array_layer;
2063
2064 decode->dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
2065 decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
2066 decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height;
2067 decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode;
2068 decode->db_array_mode = pdev->vid_addr_gfx_mode;
2069
2070 decode->hw_ctxt_size = vid->ctx.size;
2071
2072 if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
2073 return true;
2074
2075 uint64_t addr;
2076 radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
2077 addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
2078
2079 addr += dpb_array_idx * (dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size);
2080 dynamic_dpb_t2->dpbCurrLo = addr;
2081 dynamic_dpb_t2->dpbCurrHi = addr >> 32;
2082
2083 if (vid->vk.op == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
2084 /* The following loop will fill in the references for the current frame,
2085 * this ensures all DPB addresses are "valid" (pointing at the current
2086 * decode target), so that the firmware doesn't evict things it should not.
2087 * It will not perform any actual writes to these dummy slots.
2088 */
2089 for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) {
2090 dynamic_dpb_t2->dpbAddrHi[i] = addr;
2091 dynamic_dpb_t2->dpbAddrLo[i] = addr >> 32;
2092 }
2093 }
2094
2095 for (int i = 0; i < frame_info->referenceSlotCount; i++) {
2096 int32_t slot_idx = frame_info->pReferenceSlots[i].slotIndex;
2097 assert(slot_idx >= 0 && slot_idx < 16);
2098 struct radv_image_view *f_dpb_iv =
2099 radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
2100 assert(f_dpb_iv != NULL);
2101 struct radv_image *dpb_img = f_dpb_iv->image;
2102 int f_dpb_array_idx = frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer;
2103
2104 radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
2105 addr = radv_buffer_get_va(dpb_img->bindings[0].bo) + dpb_img->bindings[0].offset;
2106 addr += f_dpb_array_idx * (dpb_img->planes[0].surface.u.gfx9.surf_slice_size + dpb_img->planes[1].surface.u.gfx9.surf_slice_size);
2107 dynamic_dpb_t2->dpbAddrLo[i] = addr;
2108 dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
2109
2110 ++dynamic_dpb_t2->dpbArraySize;
2111 }
2112
2113 radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
2114 addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
2115 addr += dpb_array_idx * (dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size);
2116 dynamic_dpb_t2->dpbCurrLo = addr;
2117 dynamic_dpb_t2->dpbCurrHi = addr >> 32;
2118
2119 decode->decode_flags = 1;
2120 dynamic_dpb_t2->dpbConfigFlags = 0;
2121
2122 dynamic_dpb_t2->dpbLumaPitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
2123 dynamic_dpb_t2->dpbLumaAlignedHeight = dpb->planes[0].surface.u.gfx9.surf_height;
2124 dynamic_dpb_t2->dpbLumaAlignedSize = dpb->planes[0].surface.u.gfx9.surf_slice_size;
2125
2126 dynamic_dpb_t2->dpbChromaPitch = dpb->planes[1].surface.u.gfx9.surf_pitch;
2127 dynamic_dpb_t2->dpbChromaAlignedHeight = dpb->planes[1].surface.u.gfx9.surf_height;
2128 dynamic_dpb_t2->dpbChromaAlignedSize = dpb->planes[1].surface.u.gfx9.surf_slice_size;
2129
2130 return true;
2131 }
2132
2133 static struct ruvd_h264
get_uvd_h264_msg(struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * slice_offset,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)2134 get_uvd_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
2135 const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
2136 uint32_t *height_in_samples, void *it_ptr)
2137 {
2138 struct ruvd_h264 result;
2139 const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
2140 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
2141
2142 *slice_offset = h264_pic_info->pSliceOffsets[0];
2143
2144 memset(&result, 0, sizeof(result));
2145
2146 const StdVideoH264SequenceParameterSet *sps =
2147 vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
2148 switch (sps->profile_idc) {
2149 case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
2150 result.profile = RUVD_H264_PROFILE_BASELINE;
2151 break;
2152 case STD_VIDEO_H264_PROFILE_IDC_MAIN:
2153 result.profile = RUVD_H264_PROFILE_MAIN;
2154 break;
2155 case STD_VIDEO_H264_PROFILE_IDC_HIGH:
2156 result.profile = RUVD_H264_PROFILE_HIGH;
2157 break;
2158 default:
2159 fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
2160 result.profile = RUVD_H264_PROFILE_MAIN;
2161 break;
2162 }
2163
2164 *width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
2165 *height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
2166 if (!sps->flags.frame_mbs_only_flag)
2167 *height_in_samples *= 2;
2168 result.level = get_h264_level(sps->level_idc);
2169
2170 result.sps_info_flags = 0;
2171
2172 result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
2173 result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
2174 result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
2175 result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
2176 result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
2177
2178 result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2179 result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2180 result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
2181 result.pic_order_cnt_type = sps->pic_order_cnt_type;
2182 result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
2183
2184 result.chroma_format = sps->chroma_format_idc;
2185
2186 const StdVideoH264PictureParameterSet *pps =
2187 vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
2188 result.pps_info_flags = 0;
2189 result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
2190 result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
2191 result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
2192 result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
2193 result.pps_info_flags |= pps->weighted_bipred_idc << 4;
2194 result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
2195 result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
2196 result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
2197
2198 result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
2199 result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
2200 result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
2201
2202 StdVideoH264ScalingLists scaling_lists;
2203 vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
2204 update_h264_scaling(result.scaling_list_4x4, result.scaling_list_8x8, &scaling_lists);
2205
2206 memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
2207 memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
2208 memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
2209
2210 result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2211 result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2212
2213 result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
2214 result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
2215
2216 result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
2217
2218 result.num_ref_frames = sps->max_num_ref_frames;
2219 memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
2220 memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
2221 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
2222 int idx = frame_info->pReferenceSlots[i].slotIndex;
2223 const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
2224 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
2225
2226 result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
2227 result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
2228 result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
2229
2230 result.ref_frame_list[i] = idx;
2231
2232 if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
2233 result.ref_frame_list[i] |= 0x80;
2234 }
2235 result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
2236 result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
2237
2238 return result;
2239 }
2240
2241 static struct ruvd_h265
get_uvd_h265_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)2242 get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
2243 const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples,
2244 uint32_t *height_in_samples, void *it_ptr)
2245 {
2246 const struct radv_physical_device *pdev = radv_device_physical(device);
2247 struct ruvd_h265 result;
2248 int i, j;
2249 const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
2250 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
2251
2252 memset(&result, 0, sizeof(result));
2253
2254 const StdVideoH265SequenceParameterSet *sps =
2255 vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
2256 const StdVideoH265PictureParameterSet *pps =
2257 vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
2258
2259 result.sps_info_flags = 0;
2260 result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
2261 result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
2262 result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
2263 result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
2264 result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
2265 result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
2266 result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
2267 result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
2268 result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
2269
2270 if (pdev->info.family == CHIP_CARRIZO)
2271 result.sps_info_flags |= 1 << 9;
2272
2273 *width_in_samples = sps->pic_width_in_luma_samples;
2274 *height_in_samples = sps->pic_height_in_luma_samples;
2275 result.chroma_format = sps->chroma_format_idc;
2276 result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2277 result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2278 result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
2279 result.sps_max_dec_pic_buffering_minus1 =
2280 sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
2281 result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
2282 result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
2283 result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
2284 result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
2285 result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
2286 result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
2287 if (sps->flags.pcm_enabled_flag) {
2288 result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
2289 result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
2290 result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
2291 result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
2292 }
2293 result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
2294
2295 result.pps_info_flags = 0;
2296 result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
2297 result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
2298 result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
2299 result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
2300 result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
2301 result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
2302 result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
2303 result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
2304 result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
2305 result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
2306 result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
2307 result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
2308 result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
2309 result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
2310 result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
2311 result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
2312 result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
2313 result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
2314 result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
2315 result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
2316
2317 result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
2318 result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
2319 result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2320 result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2321 result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
2322 result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
2323 result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
2324 result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
2325 result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
2326 result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
2327 result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
2328 result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
2329 result.init_qp_minus26 = pps->init_qp_minus26;
2330
2331 for (i = 0; i < 19; ++i)
2332 result.column_width_minus1[i] = pps->column_width_minus1[i];
2333
2334 for (i = 0; i < 21; ++i)
2335 result.row_height_minus1[i] = pps->row_height_minus1[i];
2336
2337 result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
2338 result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
2339
2340 uint8_t idxs[16];
2341 memset(result.poc_list, 0, 16 * sizeof(int));
2342 memset(result.ref_pic_list, 0x7f, 16);
2343 memset(idxs, 0xff, 16);
2344 for (i = 0; i < frame_info->referenceSlotCount; i++) {
2345 const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
2346 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
2347 int idx = frame_info->pReferenceSlots[i].slotIndex;
2348 result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
2349 result.ref_pic_list[i] = idx;
2350 idxs[idx] = i;
2351 }
2352 result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
2353
2354 #define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
2355 for (i = 0; i < 8; ++i)
2356 result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
2357
2358 for (i = 0; i < 8; ++i)
2359 result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
2360
2361 for (i = 0; i < 8; ++i)
2362 result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
2363
2364 const StdVideoH265ScalingLists *scaling_lists = NULL;
2365 if (pps->flags.pps_scaling_list_data_present_flag)
2366 scaling_lists = pps->pScalingLists;
2367 else if (sps->flags.sps_scaling_list_data_present_flag)
2368 scaling_lists = sps->pScalingLists;
2369
2370 update_h265_scaling(it_ptr, scaling_lists);
2371 if (scaling_lists) {
2372 for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
2373 result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
2374
2375 for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
2376 result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
2377 }
2378
2379 for (i = 0; i < 2; i++) {
2380 for (j = 0; j < 15; j++)
2381 result.direct_reflist[i][j] = 0xff;
2382 }
2383
2384 if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
2385 if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
2386 result.p010_mode = 1;
2387 result.msb_mode = 1;
2388 } else {
2389 result.p010_mode = 0;
2390 result.luma_10to8 = 5;
2391 result.chroma_10to8 = 5;
2392 result.sclr_luma10to8 = 4;
2393 result.sclr_chroma10to8 = 4;
2394 }
2395 }
2396
2397 return result;
2398 }
2399
2400 static unsigned
texture_offset_legacy(struct radeon_surf * surface,unsigned layer)2401 texture_offset_legacy(struct radeon_surf *surface, unsigned layer)
2402 {
2403 return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +
2404 layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
2405 }
2406
2407 static bool
ruvd_dec_message_decode(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,void * ptr,void * it_ptr,uint32_t * slice_offset,const struct VkVideoDecodeInfoKHR * frame_info)2408 ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *vid,
2409 struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
2410 const struct VkVideoDecodeInfoKHR *frame_info)
2411 {
2412 const struct radv_physical_device *pdev = radv_device_physical(device);
2413 struct ruvd_msg *msg = ptr;
2414 struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2415 struct radv_image *img = dst_iv->image;
2416 struct radv_image_plane *luma = &img->planes[0];
2417 struct radv_image_plane *chroma = &img->planes[1];
2418 struct radv_image_view *dpb_iv =
2419 radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2420 struct radv_image *dpb = dpb_iv->image;
2421
2422 memset(msg, 0, sizeof(struct ruvd_msg));
2423 msg->size = sizeof(*msg);
2424 msg->msg_type = RUVD_MSG_DECODE;
2425 msg->stream_handle = vid->stream_handle;
2426 msg->status_report_feedback_number = vid->dbg_frame_cnt++;
2427
2428 msg->body.decode.stream_type = vid->stream_type;
2429 msg->body.decode.decode_flags = 0x1;
2430 msg->body.decode.width_in_samples = frame_info->dstPictureResource.codedExtent.width;
2431 msg->body.decode.height_in_samples = frame_info->dstPictureResource.codedExtent.height;
2432
2433 msg->body.decode.dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
2434 msg->body.decode.bsd_size = frame_info->srcBufferRange;
2435 msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment);
2436
2437 if (vid->stream_type == RUVD_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10)
2438 msg->body.decode.dpb_reserved = vid->ctx.size;
2439
2440 *slice_offset = 0;
2441 switch (vid->vk.op) {
2442 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
2443 msg->body.decode.codec.h264 =
2444 get_uvd_h264_msg(vid, params, frame_info, slice_offset, &msg->body.decode.width_in_samples,
2445 &msg->body.decode.height_in_samples, it_ptr);
2446 break;
2447 }
2448 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
2449 msg->body.decode.codec.h265 = get_uvd_h265_msg(device, vid, params, frame_info,
2450 &msg->body.decode.width_in_samples,
2451 &msg->body.decode.height_in_samples,
2452 it_ptr);
2453
2454 if (vid->ctx.mem)
2455 msg->body.decode.dpb_reserved = vid->ctx.size;
2456 break;
2457 }
2458 default:
2459 return false;
2460 }
2461
2462 msg->body.decode.dt_field_mode = false;
2463
2464 int dt_array_idx = frame_info->dstPictureResource.baseArrayLayer + dst_iv->vk.base_array_layer;
2465
2466 if (pdev->info.gfx_level >= GFX9) {
2467 msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
2468 msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
2469 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
2470 msg->body.decode.dt_luma_top_offset = luma->surface.u.gfx9.surf_offset +
2471 dt_array_idx * luma->surface.u.gfx9.surf_slice_size;
2472 msg->body.decode.dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset +
2473 dt_array_idx * chroma->surface.u.gfx9.surf_slice_size;
2474 msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
2475 msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
2476 msg->body.decode.dt_surf_tile_config = 0;
2477 } else {
2478 msg->body.decode.dt_pitch = luma->surface.u.legacy.level[0].nblk_x * luma->surface.blk_w;
2479 switch (luma->surface.u.legacy.level[0].mode) {
2480 case RADEON_SURF_MODE_LINEAR_ALIGNED:
2481 msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
2482 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
2483 break;
2484 case RADEON_SURF_MODE_1D:
2485 msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
2486 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
2487 break;
2488 case RADEON_SURF_MODE_2D:
2489 msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
2490 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
2491 break;
2492 default:
2493 assert(0);
2494 break;
2495 }
2496
2497 msg->body.decode.dt_luma_top_offset = texture_offset_legacy(&luma->surface, dt_array_idx);
2498 if (chroma)
2499 msg->body.decode.dt_chroma_top_offset = texture_offset_legacy(&chroma->surface, dt_array_idx);
2500 msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
2501 msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
2502
2503 if (chroma) {
2504 assert(luma->surface.u.legacy.bankw == chroma->surface.u.legacy.bankw);
2505 assert(luma->surface.u.legacy.bankh == chroma->surface.u.legacy.bankh);
2506 assert(luma->surface.u.legacy.mtilea == chroma->surface.u.legacy.mtilea);
2507 }
2508
2509 msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(util_logbase2(luma->surface.u.legacy.bankw));
2510 msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(util_logbase2(luma->surface.u.legacy.bankh));
2511 msg->body.decode.dt_surf_tile_config |=
2512 RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea));
2513 }
2514
2515 if (pdev->info.family >= CHIP_STONEY)
2516 msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2;
2517
2518 msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config;
2519 msg->body.decode.extension_support = 0x1;
2520
2521 return true;
2522 }
2523
2524 static void
ruvd_dec_message_create(struct radv_video_session * vid,void * ptr)2525 ruvd_dec_message_create(struct radv_video_session *vid, void *ptr)
2526 {
2527 struct ruvd_msg *msg = ptr;
2528
2529 memset(ptr, 0, sizeof(*msg));
2530 msg->size = sizeof(*msg);
2531 msg->msg_type = RUVD_MSG_CREATE;
2532 msg->stream_handle = vid->stream_handle;
2533 msg->body.create.stream_type = vid->stream_type;
2534 msg->body.create.width_in_samples = vid->vk.max_coded.width;
2535 msg->body.create.height_in_samples = vid->vk.max_coded.height;
2536 }
2537
2538 VKAPI_ATTR void VKAPI_CALL
radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoBeginCodingInfoKHR * pBeginInfo)2539 radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCodingInfoKHR *pBeginInfo)
2540 {
2541 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2542 VK_FROM_HANDLE(radv_video_session, vid, pBeginInfo->videoSession);
2543 VK_FROM_HANDLE(radv_video_session_params, params, pBeginInfo->videoSessionParameters);
2544
2545 cmd_buffer->video.vid = vid;
2546 cmd_buffer->video.params = params;
2547 }
2548
2549 static void
radv_vcn_cmd_reset(struct radv_cmd_buffer * cmd_buffer)2550 radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
2551 {
2552 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2553 const struct radv_physical_device *pdev = radv_device_physical(device);
2554 struct radv_video_session *vid = cmd_buffer->video.vid;
2555 uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
2556
2557 void *ptr;
2558 uint32_t out_offset;
2559
2560 if (vid->stream_type == RDECODE_CODEC_AV1) {
2561 uint8_t *ctxptr = radv_buffer_map(device->ws, vid->ctx.mem->bo);
2562 ctxptr += vid->ctx.offset;
2563 ac_vcn_av1_init_probs(pdev->av1_version, ctxptr);
2564 device->ws->buffer_unmap(device->ws, vid->ctx.mem->bo, false);
2565 }
2566 radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2567
2568 if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
2569 radv_vcn_sq_start(cmd_buffer);
2570
2571 rvcn_dec_message_create(vid, ptr, size);
2572 send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2573 send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
2574 /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
2575
2576 if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
2577 radeon_check_space(device->ws, cmd_buffer->cs, 8);
2578 for (unsigned i = 0; i < 8; i++)
2579 radeon_emit(cmd_buffer->cs, 0x81ff);
2580 } else
2581 radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
2582 }
2583
2584 static void
radv_uvd_cmd_reset(struct radv_cmd_buffer * cmd_buffer)2585 radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
2586 {
2587 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2588 struct radv_video_session *vid = cmd_buffer->video.vid;
2589 uint32_t size = sizeof(struct ruvd_msg);
2590 void *ptr;
2591 uint32_t out_offset;
2592 radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2593
2594 ruvd_dec_message_create(vid, ptr);
2595 if (vid->sessionctx.mem)
2596 send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2597 send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
2598
2599 /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
2600 int padsize = vid->sessionctx.mem ? 4 : 6;
2601 radeon_check_space(device->ws, cmd_buffer->cs, padsize);
2602 for (unsigned i = 0; i < padsize; i++)
2603 radeon_emit(cmd_buffer->cs, PKT2_NOP_PAD);
2604 }
2605
2606 VKAPI_ATTR void VKAPI_CALL
radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoCodingControlInfoKHR * pCodingControlInfo)2607 radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo)
2608 {
2609 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2610 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2611 struct radv_physical_device *pdev = radv_device_physical(device);
2612
2613 if (cmd_buffer->video.vid->encode) {
2614 radv_video_enc_control_video_coding(cmd_buffer, pCodingControlInfo);
2615 return;
2616 }
2617 if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
2618 if (radv_has_uvd(pdev))
2619 radv_uvd_cmd_reset(cmd_buffer);
2620 else
2621 radv_vcn_cmd_reset(cmd_buffer);
2622 }
2623 }
2624
2625 VKAPI_ATTR void VKAPI_CALL
radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoEndCodingInfoKHR * pEndCodingInfo)2626 radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo)
2627 {
2628 }
2629
2630 static void
radv_uvd_decode_video(struct radv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)2631 radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
2632 {
2633 VK_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
2634 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2635 const struct radv_physical_device *pdev = radv_device_physical(device);
2636 struct radv_video_session *vid = cmd_buffer->video.vid;
2637 struct radv_video_session_params *params = cmd_buffer->video.params;
2638 unsigned size = sizeof(struct ruvd_msg);
2639 void *ptr, *fb_ptr, *it_probs_ptr = NULL;
2640 uint32_t out_offset, fb_offset, it_probs_offset = 0;
2641 struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
2642 unsigned fb_size = (pdev->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
2643
2644 radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
2645 fb_bo = cmd_buffer->upload.upload_bo;
2646 if (have_it(vid)) {
2647 radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr);
2648 it_probs_bo = cmd_buffer->upload.upload_bo;
2649 }
2650
2651 radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2652 msg_bo = cmd_buffer->upload.upload_bo;
2653
2654 uint32_t slice_offset;
2655 ruvd_dec_message_decode(device, vid, params, ptr, it_probs_ptr, &slice_offset, frame_info);
2656 rvcn_dec_message_feedback(fb_ptr);
2657 if (vid->sessionctx.mem)
2658 send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2659 send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
2660
2661 if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
2662 struct radv_image_view *dpb_iv =
2663 radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2664 struct radv_image *dpb = dpb_iv->image;
2665 send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
2666 }
2667
2668 if (vid->ctx.mem)
2669 send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
2670
2671 send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
2672 src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
2673
2674 struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2675 struct radv_image *img = dst_iv->image;
2676 send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
2677 send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
2678 if (have_it(vid))
2679 send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
2680
2681 radeon_check_space(device->ws, cmd_buffer->cs, 2);
2682 set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
2683 }
2684
2685 static void
radv_vcn_decode_video(struct radv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)2686 radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
2687 {
2688 VK_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
2689 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2690 const struct radv_physical_device *pdev = radv_device_physical(device);
2691 struct radv_video_session *vid = cmd_buffer->video.vid;
2692 struct radv_video_session_params *params = cmd_buffer->video.params;
2693 unsigned size = 0;
2694 void *ptr, *fb_ptr, *it_probs_ptr = NULL;
2695 uint32_t out_offset, fb_offset, it_probs_offset = 0;
2696 struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
2697
2698 size += sizeof(rvcn_dec_message_header_t); /* header */
2699 size += sizeof(rvcn_dec_message_index_t); /* codec */
2700 if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2701 size += sizeof(rvcn_dec_message_index_t);
2702 size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2703 }
2704 size += sizeof(rvcn_dec_message_decode_t); /* decode */
2705 switch (vid->vk.op) {
2706 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
2707 size += sizeof(rvcn_dec_message_avc_t);
2708 break;
2709 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
2710 size += sizeof(rvcn_dec_message_hevc_t);
2711 break;
2712 case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
2713 size += sizeof(rvcn_dec_message_av1_t);
2714 break;
2715 default:
2716 unreachable("unsupported codec.");
2717 }
2718
2719 radv_vid_buffer_upload_alloc(cmd_buffer, FB_BUFFER_SIZE, &fb_offset, &fb_ptr);
2720 fb_bo = cmd_buffer->upload.upload_bo;
2721 if (have_it(vid)) {
2722 radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr);
2723 it_probs_bo = cmd_buffer->upload.upload_bo;
2724 } else if (have_probs(vid)) {
2725 radv_vid_buffer_upload_alloc(cmd_buffer, sizeof(rvcn_dec_av1_segment_fg_t), &it_probs_offset, &it_probs_ptr);
2726 it_probs_bo = cmd_buffer->upload.upload_bo;
2727 }
2728
2729 radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2730 msg_bo = cmd_buffer->upload.upload_bo;
2731
2732 if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
2733 radv_vcn_sq_start(cmd_buffer);
2734
2735 uint32_t slice_offset;
2736 rvcn_dec_message_decode(cmd_buffer, vid, params, ptr, it_probs_ptr, &slice_offset, frame_info);
2737 rvcn_dec_message_feedback(fb_ptr);
2738 send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2739 send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
2740
2741 if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
2742 struct radv_image_view *dpb_iv =
2743 radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2744 struct radv_image *dpb = dpb_iv->image;
2745 send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
2746 }
2747
2748 if (vid->ctx.mem)
2749 send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
2750
2751 send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
2752 src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
2753
2754 struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2755 struct radv_image *img = dst_iv->image;
2756 send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
2757 send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
2758 if (have_it(vid))
2759 send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
2760 else if (have_probs(vid))
2761 send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset);
2762
2763 if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
2764 radeon_check_space(device->ws, cmd_buffer->cs, 2);
2765 set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
2766 } else
2767 radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
2768 }
2769
2770 VKAPI_ATTR void VKAPI_CALL
radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer,const VkVideoDecodeInfoKHR * frame_info)2771 radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info)
2772 {
2773 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2774 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2775 struct radv_physical_device *pdev = radv_device_physical(device);
2776
2777 if (radv_has_uvd(pdev))
2778 radv_uvd_decode_video(cmd_buffer, frame_info);
2779 else
2780 radv_vcn_decode_video(cmd_buffer, frame_info);
2781 }
2782
2783 void
radv_video_get_profile_alignments(struct radv_physical_device * pdev,const VkVideoProfileListInfoKHR * profile_list,uint32_t * width_align_out,uint32_t * height_align_out)2784 radv_video_get_profile_alignments(struct radv_physical_device *pdev, const VkVideoProfileListInfoKHR *profile_list,
2785 uint32_t *width_align_out, uint32_t *height_align_out)
2786 {
2787 vk_video_get_profile_alignments(profile_list, width_align_out, height_align_out);
2788 bool is_h265_main_10 = false;
2789
2790 if (profile_list) {
2791 for (unsigned i = 0; i < profile_list->profileCount; i++) {
2792 if (profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
2793 const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
2794 vk_find_struct_const(profile_list->pProfiles[i].pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
2795 if (h265_profile->stdProfileIdc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
2796 is_h265_main_10 = true;
2797 }
2798 }
2799 } else
2800 is_h265_main_10 = true;
2801
2802 uint32_t db_alignment = radv_video_get_db_alignment(pdev, 64, is_h265_main_10);
2803 *width_align_out = MAX2(*width_align_out, db_alignment);
2804 *height_align_out = MAX2(*height_align_out, db_alignment);
2805 }
2806