1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Copyright 2021 Red Hat Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28 #include "radv_private.h"
29
30 #ifndef _WIN32
31 #include "drm-uapi/amdgpu_drm.h"
32 #endif
33
34 #include "util/vl_zscan_data.h"
35 #include "vk_video/vulkan_video_codecs_common.h"
36 #include "ac_uvd_dec.h"
37 #include "ac_vcn_av1_default.h"
38 #include "ac_vcn_dec.h"
39
40 #include "radv_cs.h"
41 #include "radv_debug.h"
42
43 #define NUM_H264_REFS 17
44 #define NUM_H265_REFS 8
45 #define FB_BUFFER_OFFSET 0x1000
46 #define FB_BUFFER_SIZE 2048
47 #define FB_BUFFER_SIZE_TONGA (2048 * 64)
48 #define IT_SCALING_TABLE_SIZE 992
49 #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
50
51 /* Not 100% sure this isn't too much but works */
52 #define VID_DEFAULT_ALIGNMENT 256
53
54 static bool
radv_enable_tier2(struct radv_physical_device * pdevice)55 radv_enable_tier2(struct radv_physical_device *pdevice)
56 {
57 if (pdevice->rad_info.vcn_ip_version >= VCN_3_0_0 && !(pdevice->instance->debug_flags & RADV_DEBUG_VIDEO_ARRAY_PATH))
58 return true;
59 return false;
60 }
61
62 static uint32_t
radv_video_get_db_alignment(struct radv_physical_device * pdevice,int width,bool is_h265_main_10_or_av1)63 radv_video_get_db_alignment(struct radv_physical_device *pdevice, int width, bool is_h265_main_10_or_av1)
64 {
65 if (pdevice->rad_info.vcn_ip_version >= VCN_2_0_0 && width > 32 && is_h265_main_10_or_av1)
66 return 64;
67 return 32;
68 }
69
70 static bool
radv_vid_buffer_upload_alloc(struct radv_cmd_buffer * cmd_buffer,unsigned size,unsigned * out_offset,void ** ptr)71 radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr)
72 {
73 return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, VID_DEFAULT_ALIGNMENT, out_offset, ptr);
74 }
75
76 /* vcn unified queue (sq) ib header */
77 static void
radv_vcn_sq_header(struct radeon_cmdbuf * cs,struct rvcn_sq_var * sq,bool enc)78 radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, bool enc)
79 {
80 /* vcn ib signature */
81 radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE);
82 radeon_emit(cs, RADEON_VCN_SIGNATURE);
83 sq->ib_checksum = &cs->buf[cs->cdw];
84 radeon_emit(cs, 0);
85 sq->ib_total_size_in_dw = &cs->buf[cs->cdw];
86 radeon_emit(cs, 0);
87
88 /* vcn ib engine info */
89 radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE);
90 radeon_emit(cs, RADEON_VCN_ENGINE_INFO);
91 radeon_emit(cs, enc ? RADEON_VCN_ENGINE_TYPE_ENCODE : RADEON_VCN_ENGINE_TYPE_DECODE);
92 radeon_emit(cs, 0);
93 }
94
95 static void
radv_vcn_sq_tail(struct radeon_cmdbuf * cs,struct rvcn_sq_var * sq)96 radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq)
97 {
98 uint32_t *end;
99 uint32_t size_in_dw;
100 uint32_t checksum = 0;
101
102 if (sq->ib_checksum == NULL || sq->ib_total_size_in_dw == NULL)
103 return;
104
105 end = &cs->buf[cs->cdw];
106 size_in_dw = end - sq->ib_total_size_in_dw - 1;
107 *sq->ib_total_size_in_dw = size_in_dw;
108 *(sq->ib_total_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
109
110 for (int i = 0; i < size_in_dw; i++)
111 checksum += *(sq->ib_checksum + 2 + i);
112
113 *sq->ib_checksum = checksum;
114 }
115
116 static void
radv_vcn_sq_start(struct radv_cmd_buffer * cmd_buffer)117 radv_vcn_sq_start(struct radv_cmd_buffer *cmd_buffer)
118 {
119 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 256);
120 radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false);
121 rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
122 ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
123 cmd_buffer->cs->cdw++;
124 ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
125 cmd_buffer->cs->cdw++;
126 cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
127 cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
128 memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
129 }
130
131 /* generate an stream handle */
132 static unsigned
radv_vid_alloc_stream_handle(struct radv_physical_device * pdevice)133 radv_vid_alloc_stream_handle(struct radv_physical_device *pdevice)
134 {
135 unsigned stream_handle = pdevice->stream_handle_base;
136
137 stream_handle ^= ++pdevice->stream_handle_counter;
138 return stream_handle;
139 }
140
141 static void
init_uvd_decoder(struct radv_physical_device * pdevice)142 init_uvd_decoder(struct radv_physical_device *pdevice)
143 {
144 if (pdevice->rad_info.family >= CHIP_VEGA10) {
145 pdevice->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
146 pdevice->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
147 pdevice->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
148 pdevice->vid_dec_reg.cntl = RUVD_ENGINE_CNTL_SOC15;
149 } else {
150 pdevice->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0;
151 pdevice->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1;
152 pdevice->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD;
153 pdevice->vid_dec_reg.cntl = RUVD_ENGINE_CNTL;
154 }
155 }
156
157 static void
init_vcn_decoder(struct radv_physical_device * pdevice)158 init_vcn_decoder(struct radv_physical_device *pdevice)
159 {
160 switch (pdevice->rad_info.vcn_ip_version) {
161 case VCN_1_0_0:
162 case VCN_1_0_1:
163 pdevice->vid_dec_reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0;
164 pdevice->vid_dec_reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1;
165 pdevice->vid_dec_reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD;
166 pdevice->vid_dec_reg.cntl = RDECODE_VCN1_ENGINE_CNTL;
167 break;
168 case VCN_2_0_0:
169 case VCN_2_0_2:
170 case VCN_2_0_3:
171 case VCN_2_2_0:
172 pdevice->vid_dec_reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0;
173 pdevice->vid_dec_reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1;
174 pdevice->vid_dec_reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD;
175 pdevice->vid_dec_reg.cntl = RDECODE_VCN2_ENGINE_CNTL;
176 break;
177 case VCN_2_5_0:
178 case VCN_2_6_0:
179 case VCN_3_0_0:
180 case VCN_3_0_16:
181 case VCN_3_0_33:
182 case VCN_3_1_1:
183 case VCN_3_1_2:
184 pdevice->vid_dec_reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0;
185 pdevice->vid_dec_reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1;
186 pdevice->vid_dec_reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
187 pdevice->vid_dec_reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
188 break;
189 case VCN_4_0_3:
190 pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
191 pdevice->av1_version = RDECODE_AV1_VER_1;
192 break;
193 case VCN_4_0_0:
194 case VCN_4_0_2:
195 case VCN_4_0_4:
196 case VCN_4_0_5:
197 pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
198 pdevice->av1_version = RDECODE_AV1_VER_1;
199 break;
200 default:
201 break;
202 }
203 }
204
205 void
radv_init_physical_device_decoder(struct radv_physical_device * pdevice)206 radv_init_physical_device_decoder(struct radv_physical_device *pdevice)
207 {
208 if (pdevice->rad_info.vcn_ip_version >= VCN_4_0_0)
209 pdevice->vid_decode_ip = AMD_IP_VCN_UNIFIED;
210 else if (radv_has_uvd(pdevice))
211 pdevice->vid_decode_ip = AMD_IP_UVD;
212 else
213 pdevice->vid_decode_ip = AMD_IP_VCN_DEC;
214 pdevice->av1_version = RDECODE_AV1_VER_0;
215
216 pdevice->stream_handle_counter = 0;
217 pdevice->stream_handle_base = 0;
218
219 pdevice->stream_handle_base = util_bitreverse(getpid());
220
221 pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_LINEAR;
222
223 if (radv_has_uvd(pdevice))
224 init_uvd_decoder(pdevice);
225 else
226 init_vcn_decoder(pdevice);
227 }
228
229 static bool
have_it(struct radv_video_session * vid)230 have_it(struct radv_video_session *vid)
231 {
232 return vid->stream_type == RDECODE_CODEC_H264_PERF || vid->stream_type == RDECODE_CODEC_H265;
233 }
234
235 static bool
have_probs(struct radv_video_session * vid)236 have_probs(struct radv_video_session *vid)
237 {
238 return vid->stream_type == RDECODE_CODEC_AV1;
239 }
240
241 static unsigned
calc_ctx_size_h264_perf(struct radv_video_session * vid)242 calc_ctx_size_h264_perf(struct radv_video_session *vid)
243 {
244 unsigned width_in_mb, height_in_mb, ctx_size;
245 unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
246 unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
247
248 unsigned max_references = vid->vk.max_dpb_slots + 1;
249
250 /* picture width & height in 16 pixel units */
251 width_in_mb = width / VL_MACROBLOCK_WIDTH;
252 height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
253
254 ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
255
256 return ctx_size;
257 }
258
259 static unsigned
calc_ctx_size_h265_main(struct radv_video_session * vid)260 calc_ctx_size_h265_main(struct radv_video_session *vid)
261 {
262 unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
263 unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
264
265 unsigned max_references = vid->vk.max_dpb_slots + 1;
266
267 if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
268 max_references = MAX2(max_references, 8);
269 else
270 max_references = MAX2(max_references, 17);
271
272 width = align(width, 16);
273 height = align(height, 16);
274 return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
275 }
276
277 static unsigned
calc_ctx_size_h265_main10(struct radv_video_session * vid)278 calc_ctx_size_h265_main10(struct radv_video_session *vid)
279 {
280 unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
281 unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
282 unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
283
284 unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
285 unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
286 unsigned coeff_10bit = 2;
287
288 unsigned max_references = vid->vk.max_dpb_slots + 1;
289
290 if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
291 max_references = MAX2(max_references, 8);
292 else
293 max_references = MAX2(max_references, 17);
294
295 /* 64x64 is the maximum ctb size. */
296 log2_ctb_size = 6;
297
298 width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
299 height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
300
301 num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
302 context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
303 max_mb_address = (unsigned)ceil(height * 8 / 2048.0);
304
305 cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
306 db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
307
308 return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
309 }
310
311 static unsigned
calc_ctx_size_av1(struct radv_device * device,struct radv_video_session * vid)312 calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid)
313 {
314 struct radv_physical_device *pdev = device->physical_device;
315 unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0
316 ? align(sizeof(rvcn_av1_frame_context_t), 2048)
317 : align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048);
318 unsigned ctx_size = (9 + 4) * frame_ctxt_size + 9 * 64 * 34 * 512 + 9 * 64 * 34 * 256 * 5;
319
320 int num_64x64_CTB_8k = 68;
321 int num_128x128_CTB_8k = 34;
322 int sdb_pitch_64x64 = align(32 * num_64x64_CTB_8k, 256) * 2;
323 int sdb_pitch_128x128 = align(32 * num_128x128_CTB_8k, 256) * 2;
324 int sdb_lf_size_ctb_64x64 = sdb_pitch_64x64 * (align(1728, 64) / 64);
325 int sdb_lf_size_ctb_128x128 = sdb_pitch_128x128 * (align(3008, 64) / 64);
326 int sdb_superres_size_ctb_64x64 = sdb_pitch_64x64 * (align(3232, 64) / 64);
327 int sdb_superres_size_ctb_128x128 = sdb_pitch_128x128 * (align(6208, 64) / 64);
328 int sdb_output_size_ctb_64x64 = sdb_pitch_64x64 * (align(1312, 64) / 64);
329 int sdb_output_size_ctb_128x128 = sdb_pitch_128x128 * (align(2336, 64) / 64);
330 int sdb_fg_avg_luma_size_ctb_64x64 = sdb_pitch_64x64 * (align(384, 64) / 64);
331 int sdb_fg_avg_luma_size_ctb_128x128 = sdb_pitch_128x128 * (align(640, 64) / 64);
332
333 ctx_size += (MAX2(sdb_lf_size_ctb_64x64, sdb_lf_size_ctb_128x128) +
334 MAX2(sdb_superres_size_ctb_64x64, sdb_superres_size_ctb_128x128) +
335 MAX2(sdb_output_size_ctb_64x64, sdb_output_size_ctb_128x128) +
336 MAX2(sdb_fg_avg_luma_size_ctb_64x64, sdb_fg_avg_luma_size_ctb_128x128)) *
337 2 +
338 68 * 512;
339
340 return ctx_size;
341 }
342
343 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateVideoSessionKHR(VkDevice _device,const VkVideoSessionCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkVideoSessionKHR * pVideoSession)344 radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *pCreateInfo,
345 const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession)
346 {
347 RADV_FROM_HANDLE(radv_device, device, _device);
348
349 struct radv_video_session *vid =
350 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
351 if (!vid)
352 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
353
354 memset(vid, 0, sizeof(struct radv_video_session));
355
356 VkResult result = vk_video_session_init(&device->vk, &vid->vk, pCreateInfo);
357 if (result != VK_SUCCESS) {
358 vk_free2(&device->vk.alloc, pAllocator, vid);
359 return result;
360 }
361
362 vid->interlaced = false;
363 vid->dpb_type = DPB_MAX_RES;
364
365 switch (vid->vk.op) {
366 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
367 vid->stream_type = RDECODE_CODEC_H264_PERF;
368 if (radv_enable_tier2(device->physical_device))
369 vid->dpb_type = DPB_DYNAMIC_TIER_2;
370 break;
371 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
372 vid->stream_type = RDECODE_CODEC_H265;
373 if (radv_enable_tier2(device->physical_device))
374 vid->dpb_type = DPB_DYNAMIC_TIER_2;
375 break;
376 case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
377 vid->stream_type = RDECODE_CODEC_AV1;
378 vid->dpb_type = DPB_DYNAMIC_TIER_2;
379 break;
380 default:
381 return VK_ERROR_FEATURE_NOT_PRESENT;
382 }
383
384 vid->stream_handle = radv_vid_alloc_stream_handle(device->physical_device);
385 vid->dbg_frame_cnt = 0;
386 vid->db_alignment = radv_video_get_db_alignment(
387 device->physical_device, vid->vk.max_coded.width,
388 (vid->stream_type == RDECODE_CODEC_AV1 ||
389 (vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)));
390
391 *pVideoSession = radv_video_session_to_handle(vid);
392 return VK_SUCCESS;
393 }
394
395 VKAPI_ATTR void VKAPI_CALL
radv_DestroyVideoSessionKHR(VkDevice _device,VkVideoSessionKHR _session,const VkAllocationCallbacks * pAllocator)396 radv_DestroyVideoSessionKHR(VkDevice _device, VkVideoSessionKHR _session, const VkAllocationCallbacks *pAllocator)
397 {
398 RADV_FROM_HANDLE(radv_device, device, _device);
399 RADV_FROM_HANDLE(radv_video_session, vid, _session);
400 if (!_session)
401 return;
402
403 vk_object_base_finish(&vid->vk.base);
404 vk_free2(&device->vk.alloc, pAllocator, vid);
405 }
406
407 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateVideoSessionParametersKHR(VkDevice _device,const VkVideoSessionParametersCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkVideoSessionParametersKHR * pVideoSessionParameters)408 radv_CreateVideoSessionParametersKHR(VkDevice _device, const VkVideoSessionParametersCreateInfoKHR *pCreateInfo,
409 const VkAllocationCallbacks *pAllocator,
410 VkVideoSessionParametersKHR *pVideoSessionParameters)
411 {
412 RADV_FROM_HANDLE(radv_device, device, _device);
413 RADV_FROM_HANDLE(radv_video_session, vid, pCreateInfo->videoSession);
414 RADV_FROM_HANDLE(radv_video_session_params, templ, pCreateInfo->videoSessionParametersTemplate);
415 struct radv_video_session_params *params =
416 vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*params), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
417 if (!params)
418 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
419
420 VkResult result =
421 vk_video_session_parameters_init(&device->vk, ¶ms->vk, &vid->vk, templ ? &templ->vk : NULL, pCreateInfo);
422 if (result != VK_SUCCESS) {
423 vk_free2(&device->vk.alloc, pAllocator, params);
424 return result;
425 }
426
427 *pVideoSessionParameters = radv_video_session_params_to_handle(params);
428 return VK_SUCCESS;
429 }
430
431 VKAPI_ATTR void VKAPI_CALL
radv_DestroyVideoSessionParametersKHR(VkDevice _device,VkVideoSessionParametersKHR _params,const VkAllocationCallbacks * pAllocator)432 radv_DestroyVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR _params,
433 const VkAllocationCallbacks *pAllocator)
434 {
435 RADV_FROM_HANDLE(radv_device, device, _device);
436 RADV_FROM_HANDLE(radv_video_session_params, params, _params);
437
438 vk_video_session_parameters_finish(&device->vk, ¶ms->vk);
439 vk_free2(&device->vk.alloc, pAllocator, params);
440 }
441
442 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice,const VkVideoProfileInfoKHR * pVideoProfile,VkVideoCapabilitiesKHR * pCapabilities)443 radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, const VkVideoProfileInfoKHR *pVideoProfile,
444 VkVideoCapabilitiesKHR *pCapabilities)
445 {
446 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
447 const struct video_codec_cap *cap = NULL;
448
449 switch (pVideoProfile->videoCodecOperation) {
450 #ifndef _WIN32
451 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
452 cap = &pdevice->rad_info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC];
453 break;
454 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
455 cap = &pdevice->rad_info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC];
456 break;
457 case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
458 cap = &pdevice->rad_info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1];
459 break;
460 #endif
461 default:
462 unreachable("unsupported operation");
463 }
464
465 if (cap && !cap->valid)
466 cap = NULL;
467
468 pCapabilities->flags = 0;
469 pCapabilities->minBitstreamBufferOffsetAlignment = 128;
470 pCapabilities->minBitstreamBufferSizeAlignment = 128;
471 pCapabilities->pictureAccessGranularity.width = VL_MACROBLOCK_WIDTH;
472 pCapabilities->pictureAccessGranularity.height = VL_MACROBLOCK_HEIGHT;
473 pCapabilities->minCodedExtent.width = VL_MACROBLOCK_WIDTH;
474 pCapabilities->minCodedExtent.height = VL_MACROBLOCK_HEIGHT;
475
476 struct VkVideoDecodeCapabilitiesKHR *dec_caps =
477 (struct VkVideoDecodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR);
478 if (dec_caps)
479 dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR;
480
481 switch (pVideoProfile->videoCodecOperation) {
482 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
483 /* H264 allows different luma and chroma bit depths */
484 if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
485 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
486
487 struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)vk_find_struct(
488 pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR);
489
490 const struct VkVideoDecodeH264ProfileInfoKHR *h264_profile =
491 vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H264_PROFILE_INFO_KHR);
492
493 if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE &&
494 h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN &&
495 h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_HIGH)
496 return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
497
498 if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR)
499 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
500
501 pCapabilities->maxDpbSlots = NUM_H264_REFS;
502 pCapabilities->maxActiveReferencePictures = NUM_H264_REFS;
503
504 /* for h264 on navi21+ separate dpb images should work */
505 if (radv_enable_tier2(pdevice))
506 pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
507 ext->fieldOffsetGranularity.x = 0;
508 ext->fieldOffsetGranularity.y = 0;
509 ext->maxLevelIdc = STD_VIDEO_H264_LEVEL_IDC_5_1;
510 strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME);
511 pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION;
512 break;
513 }
514 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
515 /* H265 allows different luma and chroma bit depths */
516 if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
517 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
518
519 struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *)vk_find_struct(
520 pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR);
521
522 const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
523 vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
524
525 if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN &&
526 h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10 &&
527 h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_STILL_PICTURE)
528 return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
529
530 if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
531 pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
532 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
533
534 pCapabilities->maxDpbSlots = NUM_H264_REFS;
535 pCapabilities->maxActiveReferencePictures = NUM_H265_REFS;
536 /* for h265 on navi21+ separate dpb images should work */
537 if (radv_enable_tier2(pdevice))
538 pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
539 ext->maxLevelIdc = STD_VIDEO_H265_LEVEL_IDC_5_1;
540 strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME);
541 pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION;
542 break;
543 }
544 case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: {
545 /* Monochrome sampling implies an undefined chroma bit depth, and is supported in profile MAIN for AV1. */
546 if (pVideoProfile->chromaSubsampling != VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR &&
547 pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
548 return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
549 struct VkVideoDecodeAV1CapabilitiesKHR *ext =
550 vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_AV1_CAPABILITIES_KHR);
551 pCapabilities->maxDpbSlots = 9;
552 pCapabilities->maxActiveReferencePictures = STD_VIDEO_AV1_NUM_REF_FRAMES;
553 pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
554 ext->maxLevel = STD_VIDEO_AV1_LEVEL_6_1; /* For VCN3/4, the only h/w currently with AV1 decode support */
555 strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME);
556 pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION;
557 break;
558 }
559 default:
560 break;
561 }
562
563 if (cap) {
564 pCapabilities->maxCodedExtent.width = cap->max_width;
565 pCapabilities->maxCodedExtent.height = cap->max_height;
566 } else {
567 switch (pVideoProfile->videoCodecOperation) {
568 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
569 pCapabilities->maxCodedExtent.width = (pdevice->rad_info.family < CHIP_TONGA) ? 2048 : 4096;
570 pCapabilities->maxCodedExtent.height = (pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096;
571 break;
572 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
573 pCapabilities->maxCodedExtent.width =
574 (pdevice->rad_info.family < CHIP_RENOIR) ? ((pdevice->rad_info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
575 pCapabilities->maxCodedExtent.height =
576 (pdevice->rad_info.family < CHIP_RENOIR) ? ((pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
577 break;
578 default:
579 break;
580 }
581 }
582
583 return VK_SUCCESS;
584 }
585
586 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceVideoFormatInfoKHR * pVideoFormatInfo,uint32_t * pVideoFormatPropertyCount,VkVideoFormatPropertiesKHR * pVideoFormatProperties)587 radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,
588 const VkPhysicalDeviceVideoFormatInfoKHR *pVideoFormatInfo,
589 uint32_t *pVideoFormatPropertyCount,
590 VkVideoFormatPropertiesKHR *pVideoFormatProperties)
591 {
592 /* radv requires separate allocates for DPB and decode video. */
593 if ((pVideoFormatInfo->imageUsage &
594 (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) ==
595 (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
596 return VK_ERROR_FORMAT_NOT_SUPPORTED;
597
598 VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out, pVideoFormatProperties, pVideoFormatPropertyCount);
599
600 bool need_8bit = true;
601 bool need_10bit = false;
602 const struct VkVideoProfileListInfoKHR *prof_list =
603 (struct VkVideoProfileListInfoKHR *)vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
604 if (prof_list) {
605 for (unsigned i = 0; i < prof_list->profileCount; i++) {
606 const VkVideoProfileInfoKHR *profile = &prof_list->pProfiles[i];
607 if (profile->lumaBitDepth & VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
608 need_10bit = true;
609 }
610 }
611
612 if (need_10bit) {
613 vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
614 {
615 p->format = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16;
616 p->imageType = VK_IMAGE_TYPE_2D;
617 p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
618 p->imageUsageFlags = pVideoFormatInfo->imageUsage;
619 }
620
621 if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR))
622 need_8bit = false;
623 }
624
625 if (need_8bit) {
626 vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
627 {
628 p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
629 p->imageType = VK_IMAGE_TYPE_2D;
630 p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
631 p->imageUsageFlags = pVideoFormatInfo->imageUsage;
632 }
633 }
634
635 return vk_outarray_status(&out);
636 }
637
638 #define RADV_BIND_SESSION_CTX 0
639 #define RADV_BIND_DECODER_CTX 1
640
641 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device,VkVideoSessionKHR videoSession,uint32_t * pMemoryRequirementsCount,VkVideoSessionMemoryRequirementsKHR * pMemoryRequirements)642 radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR videoSession,
643 uint32_t *pMemoryRequirementsCount,
644 VkVideoSessionMemoryRequirementsKHR *pMemoryRequirements)
645 {
646 RADV_FROM_HANDLE(radv_device, device, _device);
647 RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
648 uint32_t memory_type_bits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
649
650 VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount);
651 /* 1 buffer for session context */
652 if (device->physical_device->rad_info.family >= CHIP_POLARIS10) {
653 vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
654 {
655 m->memoryBindIndex = RADV_BIND_SESSION_CTX;
656 m->memoryRequirements.size = RDECODE_SESSION_CONTEXT_SIZE;
657 m->memoryRequirements.alignment = 0;
658 m->memoryRequirements.memoryTypeBits = memory_type_bits;
659 }
660 }
661
662 if (vid->stream_type == RDECODE_CODEC_H264_PERF && device->physical_device->rad_info.family >= CHIP_POLARIS10) {
663 vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
664 {
665 m->memoryBindIndex = RADV_BIND_DECODER_CTX;
666 m->memoryRequirements.size = align(calc_ctx_size_h264_perf(vid), 4096);
667 m->memoryRequirements.alignment = 0;
668 m->memoryRequirements.memoryTypeBits = memory_type_bits;
669 }
670 }
671 if (vid->stream_type == RDECODE_CODEC_H265) {
672 uint32_t ctx_size;
673
674 if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
675 ctx_size = calc_ctx_size_h265_main10(vid);
676 else
677 ctx_size = calc_ctx_size_h265_main(vid);
678 vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
679 {
680 m->memoryBindIndex = RADV_BIND_DECODER_CTX;
681 m->memoryRequirements.size = align(ctx_size, 4096);
682 m->memoryRequirements.alignment = 0;
683 m->memoryRequirements.memoryTypeBits = memory_type_bits;
684 }
685 }
686 if (vid->stream_type == RDECODE_CODEC_AV1) {
687 vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
688 {
689 m->memoryBindIndex = RADV_BIND_DECODER_CTX;
690 m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096);
691 m->memoryRequirements.alignment = 0;
692 m->memoryRequirements.memoryTypeBits = 0;
693 for (unsigned i = 0; i < device->physical_device->memory_properties.memoryTypeCount; i++)
694 if (device->physical_device->memory_properties.memoryTypes[i].propertyFlags &
695 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
696 m->memoryRequirements.memoryTypeBits |= (1 << i);
697 }
698 }
699 return vk_outarray_status(&out);
700 }
701
702 VKAPI_ATTR VkResult VKAPI_CALL
radv_UpdateVideoSessionParametersKHR(VkDevice _device,VkVideoSessionParametersKHR videoSessionParameters,const VkVideoSessionParametersUpdateInfoKHR * pUpdateInfo)703 radv_UpdateVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR videoSessionParameters,
704 const VkVideoSessionParametersUpdateInfoKHR *pUpdateInfo)
705 {
706 RADV_FROM_HANDLE(radv_video_session_params, params, videoSessionParameters);
707
708 return vk_video_session_parameters_update(¶ms->vk, pUpdateInfo);
709 }
710
711 static void
copy_bind(struct radv_vid_mem * dst,const VkBindVideoSessionMemoryInfoKHR * src)712 copy_bind(struct radv_vid_mem *dst, const VkBindVideoSessionMemoryInfoKHR *src)
713 {
714 dst->mem = radv_device_memory_from_handle(src->memory);
715 dst->offset = src->memoryOffset;
716 dst->size = src->memorySize;
717 }
718
719 VKAPI_ATTR VkResult VKAPI_CALL
radv_BindVideoSessionMemoryKHR(VkDevice _device,VkVideoSessionKHR videoSession,uint32_t videoSessionBindMemoryCount,const VkBindVideoSessionMemoryInfoKHR * pBindSessionMemoryInfos)720 radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession, uint32_t videoSessionBindMemoryCount,
721 const VkBindVideoSessionMemoryInfoKHR *pBindSessionMemoryInfos)
722 {
723 RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
724
725 for (unsigned i = 0; i < videoSessionBindMemoryCount; i++) {
726 switch (pBindSessionMemoryInfos[i].memoryBindIndex) {
727 case RADV_BIND_SESSION_CTX:
728 copy_bind(&vid->sessionctx, &pBindSessionMemoryInfos[i]);
729 break;
730 case RADV_BIND_DECODER_CTX:
731 copy_bind(&vid->ctx, &pBindSessionMemoryInfos[i]);
732 break;
733 default:
734 assert(0);
735 break;
736 }
737 }
738 return VK_SUCCESS;
739 }
740
741 /* add a new set register command to the IB */
742 static void
set_reg(struct radv_cmd_buffer * cmd_buffer,unsigned reg,uint32_t val)743 set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
744 {
745 struct radeon_cmdbuf *cs = cmd_buffer->cs;
746 radeon_emit(cs, RDECODE_PKT0(reg >> 2, 0));
747 radeon_emit(cs, val);
748 }
749
750 static void
send_cmd(struct radv_cmd_buffer * cmd_buffer,unsigned cmd,struct radeon_winsys_bo * bo,uint32_t offset)751 send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset)
752 {
753 struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
754 uint64_t addr;
755
756 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
757 addr = radv_buffer_get_va(bo);
758 addr += offset;
759
760 if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
761 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 6);
762 set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
763 set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
764 set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
765 return;
766 }
767 switch (cmd) {
768 case RDECODE_CMD_MSG_BUFFER:
769 cmd_buffer->video.decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
770 cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32);
771 cmd_buffer->video.decode_buffer->msg_buffer_address_lo = (addr);
772 break;
773 case RDECODE_CMD_DPB_BUFFER:
774 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
775 cmd_buffer->video.decode_buffer->dpb_buffer_address_hi = (addr >> 32);
776 cmd_buffer->video.decode_buffer->dpb_buffer_address_lo = (addr);
777 break;
778 case RDECODE_CMD_DECODING_TARGET_BUFFER:
779 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
780 cmd_buffer->video.decode_buffer->target_buffer_address_hi = (addr >> 32);
781 cmd_buffer->video.decode_buffer->target_buffer_address_lo = (addr);
782 break;
783 case RDECODE_CMD_FEEDBACK_BUFFER:
784 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
785 cmd_buffer->video.decode_buffer->feedback_buffer_address_hi = (addr >> 32);
786 cmd_buffer->video.decode_buffer->feedback_buffer_address_lo = (addr);
787 break;
788 case RDECODE_CMD_PROB_TBL_BUFFER:
789 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
790 cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
791 cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_lo = (addr);
792 break;
793 case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
794 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
795 cmd_buffer->video.decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
796 cmd_buffer->video.decode_buffer->session_contex_buffer_address_lo = (addr);
797 break;
798 case RDECODE_CMD_BITSTREAM_BUFFER:
799 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
800 cmd_buffer->video.decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
801 cmd_buffer->video.decode_buffer->bitstream_buffer_address_lo = (addr);
802 break;
803 case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
804 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
805 cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
806 cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_lo = (addr);
807 break;
808 case RDECODE_CMD_CONTEXT_BUFFER:
809 cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
810 cmd_buffer->video.decode_buffer->context_buffer_address_hi = (addr >> 32);
811 cmd_buffer->video.decode_buffer->context_buffer_address_lo = (addr);
812 break;
813 default:
814 assert(0);
815 }
816 }
817
818 static void
rvcn_dec_message_create(struct radv_video_session * vid,void * ptr,uint32_t size)819 rvcn_dec_message_create(struct radv_video_session *vid, void *ptr, uint32_t size)
820 {
821 rvcn_dec_message_header_t *header = ptr;
822 rvcn_dec_message_create_t *create = (void *)((char *)ptr + sizeof(rvcn_dec_message_header_t));
823
824 memset(ptr, 0, size);
825 header->header_size = sizeof(rvcn_dec_message_header_t);
826 header->total_size = size;
827 header->num_buffers = 1;
828 header->msg_type = RDECODE_MSG_CREATE;
829 header->stream_handle = vid->stream_handle;
830 header->status_report_feedback_number = 0;
831
832 header->index[0].message_id = RDECODE_MESSAGE_CREATE;
833 header->index[0].offset = sizeof(rvcn_dec_message_header_t);
834 header->index[0].size = sizeof(rvcn_dec_message_create_t);
835 header->index[0].filled = 0;
836
837 create->stream_type = vid->stream_type;
838 create->session_flags = 0;
839 create->width_in_samples = vid->vk.max_coded.width;
840 create->height_in_samples = vid->vk.max_coded.height;
841 }
842
843 static void
rvcn_dec_message_feedback(void * ptr)844 rvcn_dec_message_feedback(void *ptr)
845 {
846 rvcn_dec_feedback_header_t *header = (void *)ptr;
847
848 header->header_size = sizeof(rvcn_dec_feedback_header_t);
849 header->total_size = sizeof(rvcn_dec_feedback_header_t);
850 header->num_buffers = 0;
851 }
852
853 static const uint8_t h264_levels[] = {10, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52, 60, 61, 62};
854 static uint8_t
get_h264_level(StdVideoH264LevelIdc level)855 get_h264_level(StdVideoH264LevelIdc level)
856 {
857 assert(level <= STD_VIDEO_H264_LEVEL_IDC_6_2);
858 return h264_levels[level];
859 }
860
861 static void
update_h264_scaling(unsigned char scaling_list_4x4[6][16],unsigned char scaling_list_8x8[2][64],const StdVideoH264ScalingLists * scaling_lists)862 update_h264_scaling(unsigned char scaling_list_4x4[6][16], unsigned char scaling_list_8x8[2][64],
863 const StdVideoH264ScalingLists *scaling_lists)
864 {
865 for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) {
866 for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS; j++)
867 scaling_list_4x4[i][vl_zscan_normal_16[j]] = scaling_lists->ScalingList4x4[i][j];
868 }
869
870 for (int i = 0; i < 2; i++) {
871 for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS; j++)
872 scaling_list_8x8[i][vl_zscan_normal[j]] = scaling_lists->ScalingList8x8[i][j];
873 }
874 }
875
876 static rvcn_dec_message_avc_t
get_h264_msg(struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * slice_offset,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)877 get_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
878 const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
879 uint32_t *height_in_samples, void *it_ptr)
880 {
881 rvcn_dec_message_avc_t result;
882 const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
883 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
884
885 *slice_offset = h264_pic_info->pSliceOffsets[0];
886
887 memset(&result, 0, sizeof(result));
888
889 assert(params->vk.h264_dec.h264_sps_count > 0);
890 const StdVideoH264SequenceParameterSet *sps =
891 vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
892 switch (sps->profile_idc) {
893 case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
894 result.profile = RDECODE_H264_PROFILE_BASELINE;
895 break;
896 case STD_VIDEO_H264_PROFILE_IDC_MAIN:
897 result.profile = RDECODE_H264_PROFILE_MAIN;
898 break;
899 case STD_VIDEO_H264_PROFILE_IDC_HIGH:
900 result.profile = RDECODE_H264_PROFILE_HIGH;
901 break;
902 default:
903 fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
904 result.profile = RDECODE_H264_PROFILE_MAIN;
905 break;
906 }
907
908 *width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
909 *height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
910 if (!sps->flags.frame_mbs_only_flag)
911 *height_in_samples *= 2;
912 result.level = get_h264_level(sps->level_idc);
913
914 result.sps_info_flags = 0;
915
916 result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
917 result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
918 result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
919 result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
920 if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
921 result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
922
923 result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
924 result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
925 result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
926 result.pic_order_cnt_type = sps->pic_order_cnt_type;
927 result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
928
929 result.chroma_format = sps->chroma_format_idc;
930
931 const StdVideoH264PictureParameterSet *pps =
932 vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
933 result.pps_info_flags = 0;
934 result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
935 result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
936 result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
937 result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
938 result.pps_info_flags |= pps->weighted_bipred_idc << 4;
939 result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
940 result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
941 result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
942
943 result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
944 result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
945 result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
946
947 StdVideoH264ScalingLists scaling_lists;
948 vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
949 update_h264_scaling(result.scaling_list_4x4, result.scaling_list_8x8, &scaling_lists);
950
951 memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
952 memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
953 memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
954
955 result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
956 result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
957
958 result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
959 result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
960
961 result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
962
963 result.num_ref_frames = sps->max_num_ref_frames;
964 result.non_existing_frame_flags = 0;
965 result.used_for_reference_flags = 0;
966
967 memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
968 memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
969 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
970 int idx = frame_info->pReferenceSlots[i].slotIndex;
971 const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
972 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
973
974 result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
975 result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
976 result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
977
978 result.ref_frame_list[i] = idx;
979
980 if (dpb_slot->pStdReferenceInfo->flags.top_field_flag)
981 result.used_for_reference_flags |= (1 << (2 * i));
982 if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
983 result.used_for_reference_flags |= (1 << (2 * i + 1));
984
985 if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag && !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
986 result.used_for_reference_flags |= (3 << (2 * i));
987
988 if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
989 result.ref_frame_list[i] |= 0x80;
990 if (dpb_slot->pStdReferenceInfo->flags.is_non_existing)
991 result.non_existing_frame_flags |= 1 << i;
992 }
993 result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
994 result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
995
996 return result;
997 }
998
999 static void
update_h265_scaling(void * it_ptr,const StdVideoH265ScalingLists * scaling_lists)1000 update_h265_scaling(void *it_ptr, const StdVideoH265ScalingLists *scaling_lists)
1001 {
1002 if (scaling_lists) {
1003 memcpy(it_ptr, scaling_lists->ScalingList4x4,
1004 STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
1005 memcpy((char *)it_ptr + 96, scaling_lists->ScalingList8x8,
1006 STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
1007 memcpy((char *)it_ptr + 480, scaling_lists->ScalingList16x16,
1008 STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
1009 memcpy((char *)it_ptr + 864, scaling_lists->ScalingList32x32,
1010 STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
1011 } else {
1012 memset(it_ptr, 0, STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
1013 memset((char *)it_ptr + 96, 0,
1014 STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
1015 memset((char *)it_ptr + 480, 0,
1016 STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
1017 memset((char *)it_ptr + 864, 0,
1018 STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
1019 }
1020 }
1021
1022 static rvcn_dec_message_hevc_t
get_h265_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,void * it_ptr)1023 get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
1024 const struct VkVideoDecodeInfoKHR *frame_info, void *it_ptr)
1025 {
1026 rvcn_dec_message_hevc_t result;
1027 int i, j;
1028 const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
1029 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
1030 memset(&result, 0, sizeof(result));
1031
1032 const StdVideoH265SequenceParameterSet *sps =
1033 vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
1034 const StdVideoH265PictureParameterSet *pps =
1035 vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
1036
1037 result.sps_info_flags = 0;
1038 result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
1039 result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
1040 result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
1041 result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
1042 result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
1043 result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
1044 result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
1045 result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
1046 result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
1047
1048 if (device->physical_device->rad_info.family == CHIP_CARRIZO)
1049 result.sps_info_flags |= 1 << 9;
1050
1051 if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) {
1052 result.sps_info_flags |= 1 << 11;
1053 }
1054 result.st_rps_bits = h265_pic_info->pStdPictureInfo->NumBitsForSTRefPicSetInSlice;
1055
1056 result.chroma_format = sps->chroma_format_idc;
1057 result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
1058 result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
1059 result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
1060 result.sps_max_dec_pic_buffering_minus1 =
1061 sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
1062 result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
1063 result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
1064 result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
1065 result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
1066 result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
1067 result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
1068 if (sps->flags.pcm_enabled_flag) {
1069 result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
1070 result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
1071 result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
1072 result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
1073 }
1074 result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
1075
1076 result.pps_info_flags = 0;
1077 result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
1078 result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
1079 result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
1080 result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
1081 result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
1082 result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
1083 result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
1084 result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
1085 result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
1086 result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
1087 result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
1088 result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
1089 result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
1090 result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
1091 result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
1092 result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
1093 result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
1094 result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
1095 result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
1096 result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
1097
1098 result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
1099 result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
1100 result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
1101 result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
1102 result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
1103 result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
1104 result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
1105 result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
1106 result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
1107 result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
1108 result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
1109 result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
1110 result.init_qp_minus26 = pps->init_qp_minus26;
1111
1112 for (i = 0; i < 19; ++i)
1113 result.column_width_minus1[i] = pps->column_width_minus1[i];
1114
1115 for (i = 0; i < 21; ++i)
1116 result.row_height_minus1[i] = pps->row_height_minus1[i];
1117
1118 result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
1119 result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
1120
1121 uint8_t idxs[16];
1122 memset(result.poc_list, 0, 16 * sizeof(int));
1123 memset(result.ref_pic_list, 0x7f, 16);
1124 memset(idxs, 0xff, 16);
1125 for (i = 0; i < frame_info->referenceSlotCount; i++) {
1126 const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
1127 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
1128 int idx = frame_info->pReferenceSlots[i].slotIndex;
1129 result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
1130 result.ref_pic_list[i] = idx;
1131 idxs[idx] = i;
1132 }
1133 result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
1134
1135 #define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
1136 for (i = 0; i < 8; ++i)
1137 result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
1138
1139 for (i = 0; i < 8; ++i)
1140 result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
1141
1142 for (i = 0; i < 8; ++i)
1143 result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
1144
1145 const StdVideoH265ScalingLists *scaling_lists = NULL;
1146 if (pps->flags.pps_scaling_list_data_present_flag)
1147 scaling_lists = pps->pScalingLists;
1148 else if (sps->flags.sps_scaling_list_data_present_flag)
1149 scaling_lists = sps->pScalingLists;
1150
1151 update_h265_scaling(it_ptr, scaling_lists);
1152
1153 if (scaling_lists) {
1154 for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
1155 result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
1156
1157 for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
1158 result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
1159 }
1160
1161 for (i = 0; i < 2; i++) {
1162 for (j = 0; j < 15; j++)
1163 result.direct_reflist[i][j] = 0xff;
1164 }
1165
1166 if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
1167 if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
1168 result.p010_mode = 1;
1169 result.msb_mode = 1;
1170 } else {
1171 result.p010_mode = 0;
1172 result.luma_10to8 = 5;
1173 result.chroma_10to8 = 5;
1174 result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
1175 result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
1176 }
1177 }
1178
1179 return result;
1180 }
1181
1182 enum {
1183 AV1_RESTORE_NONE = 0,
1184 AV1_RESTORE_WIENER = 1,
1185 AV1_RESTORE_SGRPROJ = 2,
1186 AV1_RESTORE_SWITCHABLE = 3,
1187 };
1188
1189 #define AV1_SUPERRES_NUM 8
1190 #define AV1_SUPERRES_DENOM_MIN 9
1191
1192 #define LUMA_BLOCK_SIZE_Y 73
1193 #define LUMA_BLOCK_SIZE_X 82
1194 #define CHROMA_BLOCK_SIZE_Y 38
1195 #define CHROMA_BLOCK_SIZE_X 44
1196
1197 static int32_t
radv_vcn_av1_film_grain_random_number(unsigned short * seed,int32_t bits)1198 radv_vcn_av1_film_grain_random_number(unsigned short *seed, int32_t bits)
1199 {
1200 unsigned short bit;
1201 unsigned short value = *seed;
1202
1203 bit = ((value >> 0) ^ (value >> 1) ^ (value >> 3) ^ (value >> 12)) & 1;
1204 value = (value >> 1) | (bit << 15);
1205 *seed = value;
1206
1207 return (value >> (16 - bits)) & ((1 << bits) - 1);
1208 }
1209
1210 static void
radv_vcn_av1_film_grain_init_scaling(uint8_t scaling_points[][2],uint8_t num,short scaling_lut[])1211 radv_vcn_av1_film_grain_init_scaling(uint8_t scaling_points[][2], uint8_t num, short scaling_lut[])
1212 {
1213 int32_t i, x, delta_x, delta_y;
1214 int64_t delta;
1215
1216 if (num == 0)
1217 return;
1218
1219 for (i = 0; i < scaling_points[0][0]; i++)
1220 scaling_lut[i] = scaling_points[0][1];
1221
1222 for (i = 0; i < num - 1; i++) {
1223 delta_y = scaling_points[i + 1][1] - scaling_points[i][1];
1224 delta_x = scaling_points[i + 1][0] - scaling_points[i][0];
1225
1226 delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
1227
1228 for (x = 0; x < delta_x; x++)
1229 scaling_lut[scaling_points[i][0] + x] = (short)(scaling_points[i][1] + (int32_t)((x * delta + 32768) >> 16));
1230 }
1231
1232 for (i = scaling_points[num - 1][0]; i < 256; i++)
1233 scaling_lut[i] = scaling_points[num - 1][1];
1234 }
1235
1236 static void
radv_vcn_av1_init_film_grain_buffer(rvcn_dec_film_grain_params_t * fg_params,rvcn_dec_av1_fg_init_buf_t * fg_buf)1237 radv_vcn_av1_init_film_grain_buffer(rvcn_dec_film_grain_params_t *fg_params, rvcn_dec_av1_fg_init_buf_t *fg_buf)
1238 {
1239 const int32_t luma_block_size_y = LUMA_BLOCK_SIZE_Y;
1240 const int32_t luma_block_size_x = LUMA_BLOCK_SIZE_X;
1241 const int32_t chroma_block_size_y = CHROMA_BLOCK_SIZE_Y;
1242 const int32_t chroma_block_size_x = CHROMA_BLOCK_SIZE_X;
1243 const int32_t gauss_bits = 11;
1244 int32_t filt_luma_grain_block[LUMA_BLOCK_SIZE_Y][LUMA_BLOCK_SIZE_X];
1245 int32_t filt_cb_grain_block[CHROMA_BLOCK_SIZE_Y][CHROMA_BLOCK_SIZE_X];
1246 int32_t filt_cr_grain_block[CHROMA_BLOCK_SIZE_Y][CHROMA_BLOCK_SIZE_X];
1247 int32_t chroma_subsamp_y = 1;
1248 int32_t chroma_subsamp_x = 1;
1249 unsigned short seed = fg_params->random_seed;
1250 int32_t ar_coeff_lag = fg_params->ar_coeff_lag;
1251 int32_t bit_depth = fg_params->bit_depth_minus_8 + 8;
1252 short grain_center = 128 << (bit_depth - 8);
1253 short grain_min = 0 - grain_center;
1254 short grain_max = (256 << (bit_depth - 8)) - 1 - grain_center;
1255 int32_t shift = 12 - bit_depth + fg_params->grain_scale_shift;
1256 short luma_grain_block_tmp[64][80];
1257 short cb_grain_block_tmp[32][40];
1258 short cr_grain_block_tmp[32][40];
1259 short *align_ptr, *align_ptr0, *align_ptr1;
1260 int32_t x, y, g, i, j, c, c0, c1, delta_row, delta_col;
1261 int32_t s, s0, s1, pos, r;
1262
1263 /* generate luma grain block */
1264 memset(filt_luma_grain_block, 0, sizeof(filt_luma_grain_block));
1265 for (y = 0; y < luma_block_size_y; y++) {
1266 for (x = 0; x < luma_block_size_x; x++) {
1267 g = 0;
1268 if (fg_params->num_y_points > 0) {
1269 r = radv_vcn_av1_film_grain_random_number(&seed, gauss_bits);
1270 g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
1271 }
1272 filt_luma_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
1273 }
1274 }
1275
1276 for (y = 3; y < luma_block_size_y; y++) {
1277 for (x = 3; x < luma_block_size_x - 3; x++) {
1278 s = 0;
1279 pos = 0;
1280 for (delta_row = -ar_coeff_lag; delta_row <= 0; delta_row++) {
1281 for (delta_col = -ar_coeff_lag; delta_col <= ar_coeff_lag; delta_col++) {
1282 if (delta_row == 0 && delta_col == 0)
1283 break;
1284 c = fg_params->ar_coeffs_y[pos];
1285 s += filt_luma_grain_block[y + delta_row][x + delta_col] * c;
1286 pos++;
1287 }
1288 }
1289 filt_luma_grain_block[y][x] = AV1_CLAMP(
1290 filt_luma_grain_block[y][x] + ROUND_POWER_OF_TWO(s, fg_params->ar_coeff_shift), grain_min, grain_max);
1291 }
1292 }
1293
1294 /* generate chroma grain block */
1295 memset(filt_cb_grain_block, 0, sizeof(filt_cb_grain_block));
1296 shift = 12 - bit_depth + fg_params->grain_scale_shift;
1297 seed = fg_params->random_seed ^ 0xb524;
1298 for (y = 0; y < chroma_block_size_y; y++) {
1299 for (x = 0; x < chroma_block_size_x; x++) {
1300 g = 0;
1301 if (fg_params->num_cb_points || fg_params->chroma_scaling_from_luma) {
1302 r = radv_vcn_av1_film_grain_random_number(&seed, gauss_bits);
1303 g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
1304 }
1305 filt_cb_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
1306 }
1307 }
1308
1309 memset(filt_cr_grain_block, 0, sizeof(filt_cr_grain_block));
1310 seed = fg_params->random_seed ^ 0x49d8;
1311 for (y = 0; y < chroma_block_size_y; y++) {
1312 for (x = 0; x < chroma_block_size_x; x++) {
1313 g = 0;
1314 if (fg_params->num_cr_points || fg_params->chroma_scaling_from_luma) {
1315 r = radv_vcn_av1_film_grain_random_number(&seed, gauss_bits);
1316 g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
1317 }
1318 filt_cr_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
1319 }
1320 }
1321
1322 for (y = 3; y < chroma_block_size_y; y++) {
1323 for (x = 3; x < chroma_block_size_x - 3; x++) {
1324 s0 = 0, s1 = 0, pos = 0;
1325 for (delta_row = -ar_coeff_lag; delta_row <= 0; delta_row++) {
1326 for (delta_col = -ar_coeff_lag; delta_col <= ar_coeff_lag; delta_col++) {
1327 c0 = fg_params->ar_coeffs_cb[pos];
1328 c1 = fg_params->ar_coeffs_cr[pos];
1329 if (delta_row == 0 && delta_col == 0) {
1330 if (fg_params->num_y_points > 0) {
1331 int luma = 0;
1332 int luma_x = ((x - 3) << chroma_subsamp_x) + 3;
1333 int luma_y = ((y - 3) << chroma_subsamp_y) + 3;
1334 for (i = 0; i <= chroma_subsamp_y; i++)
1335 for (j = 0; j <= chroma_subsamp_x; j++)
1336 luma += filt_luma_grain_block[luma_y + i][luma_x + j];
1337
1338 luma = ROUND_POWER_OF_TWO(luma, chroma_subsamp_x + chroma_subsamp_y);
1339 s0 += luma * c0;
1340 s1 += luma * c1;
1341 }
1342 break;
1343 }
1344 s0 += filt_cb_grain_block[y + delta_row][x + delta_col] * c0;
1345 s1 += filt_cr_grain_block[y + delta_row][x + delta_col] * c1;
1346 pos++;
1347 }
1348 }
1349 filt_cb_grain_block[y][x] = AV1_CLAMP(
1350 filt_cb_grain_block[y][x] + ROUND_POWER_OF_TWO(s0, fg_params->ar_coeff_shift), grain_min, grain_max);
1351 filt_cr_grain_block[y][x] = AV1_CLAMP(
1352 filt_cr_grain_block[y][x] + ROUND_POWER_OF_TWO(s1, fg_params->ar_coeff_shift), grain_min, grain_max);
1353 }
1354 }
1355
1356 for (i = 9; i < luma_block_size_y; i++)
1357 for (j = 9; j < luma_block_size_x; j++)
1358 luma_grain_block_tmp[i - 9][j - 9] = filt_luma_grain_block[i][j];
1359
1360 for (i = 6; i < chroma_block_size_y; i++)
1361 for (j = 6; j < chroma_block_size_x; j++) {
1362 cb_grain_block_tmp[i - 6][j - 6] = filt_cb_grain_block[i][j];
1363 cr_grain_block_tmp[i - 6][j - 6] = filt_cr_grain_block[i][j];
1364 }
1365
1366 align_ptr = &fg_buf->luma_grain_block[0][0];
1367 for (i = 0; i < 64; i++) {
1368 for (j = 0; j < 80; j++)
1369 *align_ptr++ = luma_grain_block_tmp[i][j];
1370
1371 if (((i + 1) % 4) == 0)
1372 align_ptr += 64;
1373 }
1374
1375 align_ptr0 = &fg_buf->cb_grain_block[0][0];
1376 align_ptr1 = &fg_buf->cr_grain_block[0][0];
1377 for (i = 0; i < 32; i++) {
1378 for (j = 0; j < 40; j++) {
1379 *align_ptr0++ = cb_grain_block_tmp[i][j];
1380 *align_ptr1++ = cr_grain_block_tmp[i][j];
1381 }
1382 if (((i + 1) % 8) == 0) {
1383 align_ptr0 += 64;
1384 align_ptr1 += 64;
1385 }
1386 }
1387
1388 memset(fg_buf->scaling_lut_y, 0, sizeof(fg_buf->scaling_lut_y));
1389 radv_vcn_av1_film_grain_init_scaling(fg_params->scaling_points_y, fg_params->num_y_points, fg_buf->scaling_lut_y);
1390 if (fg_params->chroma_scaling_from_luma) {
1391 memcpy(fg_buf->scaling_lut_cb, fg_buf->scaling_lut_y, sizeof(fg_buf->scaling_lut_y));
1392 memcpy(fg_buf->scaling_lut_cr, fg_buf->scaling_lut_y, sizeof(fg_buf->scaling_lut_y));
1393 } else {
1394 memset(fg_buf->scaling_lut_cb, 0, sizeof(fg_buf->scaling_lut_cb));
1395 memset(fg_buf->scaling_lut_cr, 0, sizeof(fg_buf->scaling_lut_cr));
1396 radv_vcn_av1_film_grain_init_scaling(fg_params->scaling_points_cb, fg_params->num_cb_points,
1397 fg_buf->scaling_lut_cb);
1398 radv_vcn_av1_film_grain_init_scaling(fg_params->scaling_points_cr, fg_params->num_cr_points,
1399 fg_buf->scaling_lut_cr);
1400 }
1401 }
1402
1403 static rvcn_dec_message_av1_t
get_av1_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,void * probs_ptr,int * update_reference_slot)1404 get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
1405 const struct VkVideoDecodeInfoKHR *frame_info, void *probs_ptr, int *update_reference_slot)
1406 {
1407 rvcn_dec_message_av1_t result;
1408 unsigned i, j;
1409 const struct VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
1410 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);
1411 const StdVideoDecodeAV1PictureInfo *pi = av1_pic_info->pStdPictureInfo;
1412 const StdVideoAV1SequenceHeader *seq_hdr = ¶ms->vk.av1_dec.seq_hdr.base;
1413 memset(&result, 0, sizeof(result));
1414
1415 const int intra_only_decoding = vid->vk.max_dpb_slots == 0;
1416 if (intra_only_decoding)
1417 assert(frame_info->pSetupReferenceSlot == NULL);
1418
1419 *update_reference_slot = !(intra_only_decoding || pi->refresh_frame_flags == 0);
1420
1421 result.frame_header_flags = (1 /*av1_pic_info->frame_header->flags.show_frame*/
1422 << RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_SHIFT) &
1423 RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_MASK;
1424
1425 result.frame_header_flags |= (pi->flags.disable_cdf_update << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_SHIFT) &
1426 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_MASK;
1427
1428 result.frame_header_flags |=
1429 ((!pi->flags.disable_frame_end_update_cdf) << RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_SHIFT) &
1430 RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_MASK;
1431
1432 result.frame_header_flags |=
1433 ((pi->frame_type == STD_VIDEO_AV1_FRAME_TYPE_INTRA_ONLY) << RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_SHIFT) &
1434 RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_MASK;
1435
1436 result.frame_header_flags |= (pi->flags.allow_intrabc << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_SHIFT) &
1437 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_MASK;
1438
1439 result.frame_header_flags |=
1440 (pi->flags.allow_high_precision_mv << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) &
1441 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK;
1442
1443 result.frame_header_flags |=
1444 (seq_hdr->pColorConfig->flags.mono_chrome << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) &
1445 RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK;
1446
1447 result.frame_header_flags |= (pi->flags.skip_mode_present << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) &
1448 RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK;
1449
1450 result.frame_header_flags |=
1451 (pi->pQuantization->flags.using_qmatrix << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) &
1452 RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK;
1453
1454 result.frame_header_flags |=
1455 (seq_hdr->flags.enable_filter_intra << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) &
1456 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_MASK;
1457
1458 result.frame_header_flags |=
1459 (seq_hdr->flags.enable_intra_edge_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_SHIFT) &
1460 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_MASK;
1461
1462 result.frame_header_flags |=
1463 (seq_hdr->flags.enable_interintra_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_SHIFT) &
1464 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_MASK;
1465
1466 result.frame_header_flags |=
1467 (seq_hdr->flags.enable_masked_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_SHIFT) &
1468 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_MASK;
1469
1470 result.frame_header_flags |=
1471 (pi->flags.allow_warped_motion << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_SHIFT) &
1472 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_MASK;
1473
1474 result.frame_header_flags |=
1475 (seq_hdr->flags.enable_dual_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_SHIFT) &
1476 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_MASK;
1477
1478 result.frame_header_flags |=
1479 (seq_hdr->flags.enable_order_hint << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_SHIFT) &
1480 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_MASK;
1481
1482 result.frame_header_flags |= (seq_hdr->flags.enable_jnt_comp << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_SHIFT) &
1483 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_MASK;
1484
1485 result.frame_header_flags |= (pi->flags.use_ref_frame_mvs << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_SHIFT) &
1486 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_MASK;
1487
1488 result.frame_header_flags |=
1489 (pi->flags.allow_screen_content_tools << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_SHIFT) &
1490 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_MASK;
1491
1492 result.frame_header_flags |=
1493 (pi->flags.force_integer_mv << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) &
1494 RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK;
1495
1496 result.frame_header_flags |=
1497 (pi->pLoopFilter->flags.loop_filter_delta_enabled << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) &
1498 RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK;
1499
1500 result.frame_header_flags |=
1501 (pi->pLoopFilter->flags.loop_filter_delta_update << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) &
1502 RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK;
1503
1504 result.frame_header_flags |= (pi->flags.delta_q_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) &
1505 RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK;
1506
1507 result.frame_header_flags |= (pi->flags.delta_lf_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_SHIFT) &
1508 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_MASK;
1509
1510 result.frame_header_flags |= (pi->flags.reduced_tx_set << RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_SHIFT) &
1511 RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_MASK;
1512
1513 result.frame_header_flags |=
1514 (pi->flags.segmentation_enabled << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_SHIFT) &
1515 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_MASK;
1516
1517 result.frame_header_flags |=
1518 (pi->flags.segmentation_update_map << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_SHIFT) &
1519 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_MASK;
1520
1521 result.frame_header_flags |=
1522 (pi->flags.segmentation_temporal_update << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
1523 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_MASK;
1524
1525 result.frame_header_flags |= (pi->flags.delta_lf_multi << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_SHIFT) &
1526 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_MASK;
1527
1528 result.frame_header_flags |=
1529 (pi->flags.is_motion_mode_switchable << RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_SHIFT) &
1530 RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_MASK;
1531
1532 result.frame_header_flags |= ((!intra_only_decoding ? !(pi->refresh_frame_flags) : 1)
1533 << RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_SHIFT) &
1534 RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_MASK;
1535
1536 result.frame_header_flags |=
1537 ((!seq_hdr->flags.enable_ref_frame_mvs) << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_SHIFT) &
1538 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_MASK;
1539
1540 result.current_frame_id = pi->current_frame_id;
1541 result.frame_offset = pi->OrderHint;
1542 result.profile = seq_hdr->seq_profile;
1543 result.is_annexb = 0;
1544
1545 result.frame_type = pi->frame_type;
1546 result.primary_ref_frame = pi->primary_ref_frame;
1547
1548 const struct VkVideoDecodeAV1DpbSlotInfoKHR *setup_dpb_slot =
1549 intra_only_decoding
1550 ? NULL
1551 : vk_find_struct_const(frame_info->pSetupReferenceSlot->pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR);
1552
1553 /* The AMD FW interface does not need this information, since it's
1554 * redundant with the information derivable from the current frame header,
1555 * which the FW is parsing and tracking.
1556 */
1557 (void)setup_dpb_slot;
1558 result.curr_pic_idx = intra_only_decoding ? 0 : frame_info->pSetupReferenceSlot->slotIndex;
1559
1560 result.sb_size = seq_hdr->flags.use_128x128_superblock;
1561 result.interp_filter = pi->interpolation_filter;
1562 for (i = 0; i < 2; ++i)
1563 result.filter_level[i] = pi->pLoopFilter->loop_filter_level[i];
1564 result.filter_level_u = pi->pLoopFilter->loop_filter_level[2];
1565 result.filter_level_v = pi->pLoopFilter->loop_filter_level[3];
1566 result.sharpness_level = pi->pLoopFilter->loop_filter_sharpness;
1567 for (i = 0; i < 8; ++i)
1568 result.ref_deltas[i] = pi->pLoopFilter->loop_filter_ref_deltas[i];
1569 for (i = 0; i < 2; ++i)
1570 result.mode_deltas[i] = pi->pLoopFilter->loop_filter_mode_deltas[i];
1571 result.base_qindex = pi->pQuantization->base_q_idx;
1572 result.y_dc_delta_q = pi->pQuantization->DeltaQYDc;
1573 result.u_dc_delta_q = pi->pQuantization->DeltaQUDc;
1574 result.v_dc_delta_q = pi->pQuantization->DeltaQVDc;
1575 result.u_ac_delta_q = pi->pQuantization->DeltaQUAc;
1576 result.v_ac_delta_q = pi->pQuantization->DeltaQVAc;
1577
1578 if (pi->pQuantization->flags.using_qmatrix) {
1579 result.qm_y = pi->pQuantization->qm_y | 0xf0;
1580 result.qm_u = pi->pQuantization->qm_u | 0xf0;
1581 result.qm_v = pi->pQuantization->qm_v | 0xf0;
1582 } else {
1583 result.qm_y = 0xff;
1584 result.qm_u = 0xff;
1585 result.qm_v = 0xff;
1586 }
1587 result.delta_q_res = (1 << pi->delta_q_res);
1588 result.delta_lf_res = (1 << pi->delta_lf_res);
1589 result.tile_cols = pi->pTileInfo->TileCols;
1590 result.tile_rows = pi->pTileInfo->TileRows;
1591
1592 result.tx_mode = pi->TxMode;
1593 result.reference_mode = (pi->flags.reference_select == 1) ? 2 : 0;
1594 result.chroma_format = seq_hdr->pColorConfig->flags.mono_chrome ? 0 : 1;
1595 result.tile_size_bytes = pi->pTileInfo->tile_size_bytes_minus_1;
1596 result.context_update_tile_id = pi->pTileInfo->context_update_tile_id;
1597
1598 for (i = 0; i < result.tile_cols; i++)
1599 result.tile_col_start_sb[i] = pi->pTileInfo->pMiColStarts[i];
1600 result.tile_col_start_sb[result.tile_cols] =
1601 result.tile_col_start_sb[result.tile_cols - 1] + pi->pTileInfo->pWidthInSbsMinus1[result.tile_cols - 1] + 1;
1602 for (i = 0; i < pi->pTileInfo->TileRows; i++)
1603 result.tile_row_start_sb[i] = pi->pTileInfo->pMiRowStarts[i];
1604 result.tile_row_start_sb[result.tile_rows] =
1605 result.tile_row_start_sb[result.tile_rows - 1] + pi->pTileInfo->pHeightInSbsMinus1[result.tile_rows - 1] + 1;
1606
1607 result.max_width = seq_hdr->max_frame_width_minus_1 + 1;
1608 result.max_height = seq_hdr->max_frame_height_minus_1 + 1;
1609 VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
1610 result.superres_scale_denominator =
1611 pi->flags.use_superres ? pi->coded_denom + AV1_SUPERRES_DENOM_MIN : AV1_SUPERRES_NUM;
1612 if (pi->flags.use_superres) {
1613 result.width =
1614 (frameExtent.width * 8 + result.superres_scale_denominator / 2) / result.superres_scale_denominator;
1615 } else {
1616 result.width = frameExtent.width;
1617 }
1618 result.height = frameExtent.height;
1619
1620 result.superres_upscaled_width = frameExtent.width;
1621
1622 result.order_hint_bits = seq_hdr->order_hint_bits_minus_1 + 1;
1623
1624 /* The VCN FW will evict references that aren't specified in
1625 * ref_frame_map, even if they are still valid. To prevent this we will
1626 * specify every possible reference in ref_frame_map.
1627 */
1628 uint16_t used_slots = (1 << result.curr_pic_idx);
1629 for (i = 0; i < frame_info->referenceSlotCount; i++) {
1630 const struct VkVideoDecodeAV1DpbSlotInfoKHR *ref_dpb_slot =
1631 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR);
1632 (void)ref_dpb_slot; /* Again, the FW is tracking this information for us, so no need for it. */
1633 (void)ref_dpb_slot; /* the FW is tracking this information for us, so no need for it. */
1634 int32_t slotIndex = frame_info->pReferenceSlots[i].slotIndex;
1635 result.ref_frame_map[i] = slotIndex;
1636 used_slots |= 1 << slotIndex;
1637 }
1638 /* Go through all the slots and fill in the ones that haven't been used. */
1639 for (j = 0; j < STD_VIDEO_AV1_NUM_REF_FRAMES + 1; j++) {
1640 if ((used_slots & (1 << j)) == 0) {
1641 result.ref_frame_map[i] = j;
1642 used_slots |= 1 << j;
1643 i++;
1644 }
1645 }
1646
1647 assert(used_slots == 0x1ff && i == STD_VIDEO_AV1_NUM_REF_FRAMES);
1648
1649 for (i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; ++i) {
1650 result.frame_refs[i] =
1651 av1_pic_info->referenceNameSlotIndices[i] == -1 ? 0x7f : av1_pic_info->referenceNameSlotIndices[i];
1652 }
1653
1654 result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = seq_hdr->pColorConfig->BitDepth - 8;
1655
1656 int16_t *feature_data = (int16_t *)probs_ptr;
1657 int fd_idx = 0;
1658 for (i = 0; i < 8; ++i) {
1659 result.feature_mask[i] = pi->pSegmentation->FeatureEnabled[i];
1660 for (j = 0; j < 8; ++j) {
1661 result.feature_data[i][j] = pi->pSegmentation->FeatureData[i][j];
1662 feature_data[fd_idx++] = result.feature_data[i][j];
1663 }
1664 }
1665
1666 memcpy(((char *)probs_ptr + 128), result.feature_mask, 8);
1667 result.cdef_damping = pi->pCDEF->cdef_damping_minus_3 + 3;
1668 result.cdef_bits = pi->pCDEF->cdef_bits;
1669 for (i = 0; i < 8; ++i) {
1670 result.cdef_strengths[i] = (pi->pCDEF->cdef_y_pri_strength[i] << 2) + pi->pCDEF->cdef_y_sec_strength[i];
1671 result.cdef_uv_strengths[i] = (pi->pCDEF->cdef_uv_pri_strength[i] << 2) + pi->pCDEF->cdef_uv_sec_strength[i];
1672 }
1673
1674 if (pi->flags.UsesLr) {
1675 for (int plane = 0; plane < STD_VIDEO_AV1_MAX_NUM_PLANES; plane++) {
1676 result.frame_restoration_type[plane] = pi->pLoopRestoration->FrameRestorationType[plane];
1677 result.log2_restoration_unit_size_minus5[plane] = pi->pLoopRestoration->LoopRestorationSize[plane];
1678 }
1679 }
1680
1681 if (seq_hdr->pColorConfig->BitDepth > 8) {
1682 if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 ||
1683 vid->vk.picture_format == VK_FORMAT_G16_B16R16_2PLANE_420_UNORM) {
1684 result.p010_mode = 1;
1685 result.msb_mode = 1;
1686 } else {
1687 result.luma_10to8 = 1;
1688 result.chroma_10to8 = 1;
1689 }
1690 }
1691
1692 result.preskip_segid = 0;
1693 result.last_active_segid = 0;
1694 for (i = 0; i < 8; i++) {
1695 for (j = 0; j < 8; j++) {
1696 if (result.feature_mask[i] & (1 << j)) {
1697 result.last_active_segid = i;
1698 if (j >= 5)
1699 result.preskip_segid = 1;
1700 }
1701 }
1702 }
1703 result.seg_lossless_flag = 0;
1704 for (i = 0; i < 8; ++i) {
1705 int av1_get_qindex, qindex;
1706 int segfeature_active = result.feature_mask[i] & (1 << 0);
1707 if (segfeature_active) {
1708 int seg_qindex = result.base_qindex + result.feature_data[i][0];
1709 av1_get_qindex = seg_qindex < 0 ? 0 : (seg_qindex > 255 ? 255 : seg_qindex);
1710 } else {
1711 av1_get_qindex = result.base_qindex;
1712 }
1713 qindex = pi->flags.segmentation_enabled ? av1_get_qindex : result.base_qindex;
1714 result.seg_lossless_flag |= (((qindex == 0) && result.y_dc_delta_q == 0 && result.u_dc_delta_q == 0 &&
1715 result.v_dc_delta_q == 0 && result.u_ac_delta_q == 0 && result.v_ac_delta_q == 0)
1716 << i);
1717 }
1718
1719 rvcn_dec_film_grain_params_t *fg_params = &result.film_grain;
1720 fg_params->apply_grain = pi->flags.apply_grain;
1721 if (fg_params->apply_grain) {
1722 rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)((char *)probs_ptr + 256);
1723 fg_params->random_seed = pi->pFilmGrain->grain_seed;
1724 fg_params->grain_scale_shift = pi->pFilmGrain->grain_scale_shift;
1725 fg_params->scaling_shift = pi->pFilmGrain->grain_scaling_minus_8 + 8;
1726 fg_params->chroma_scaling_from_luma = pi->pFilmGrain->flags.chroma_scaling_from_luma;
1727 fg_params->num_y_points = pi->pFilmGrain->num_y_points;
1728 fg_params->num_cb_points = pi->pFilmGrain->num_cb_points;
1729 fg_params->num_cr_points = pi->pFilmGrain->num_cr_points;
1730 fg_params->cb_mult = pi->pFilmGrain->cb_mult;
1731 fg_params->cb_luma_mult = pi->pFilmGrain->cb_luma_mult;
1732 fg_params->cb_offset = pi->pFilmGrain->cb_offset;
1733 fg_params->cr_mult = pi->pFilmGrain->cr_mult;
1734 fg_params->cr_luma_mult = pi->pFilmGrain->cr_luma_mult;
1735 fg_params->cr_offset = pi->pFilmGrain->cr_offset;
1736 fg_params->bit_depth_minus_8 = result.bit_depth_luma_minus8;
1737 for (i = 0; i < fg_params->num_y_points; ++i) {
1738 fg_params->scaling_points_y[i][0] = pi->pFilmGrain->point_y_value[i];
1739 fg_params->scaling_points_y[i][1] = pi->pFilmGrain->point_y_scaling[i];
1740 }
1741 for (i = 0; i < fg_params->num_cb_points; ++i) {
1742 fg_params->scaling_points_cb[i][0] = pi->pFilmGrain->point_cb_value[i];
1743 fg_params->scaling_points_cb[i][1] = pi->pFilmGrain->point_cb_scaling[i];
1744 }
1745 for (i = 0; i < fg_params->num_cr_points; ++i) {
1746 fg_params->scaling_points_cr[i][0] = pi->pFilmGrain->point_cr_value[i];
1747 fg_params->scaling_points_cr[i][1] = pi->pFilmGrain->point_cr_scaling[i];
1748 }
1749
1750 fg_params->ar_coeff_lag = pi->pFilmGrain->ar_coeff_lag;
1751 fg_params->ar_coeff_shift = pi->pFilmGrain->ar_coeff_shift_minus_6 + 6;
1752
1753 for (i = 0; i < 24; ++i)
1754 fg_params->ar_coeffs_y[i] = pi->pFilmGrain->ar_coeffs_y_plus_128[i] - 128;
1755
1756 for (i = 0; i < 25; ++i) {
1757 fg_params->ar_coeffs_cb[i] = pi->pFilmGrain->ar_coeffs_cb_plus_128[i] - 128;
1758 fg_params->ar_coeffs_cr[i] = pi->pFilmGrain->ar_coeffs_cr_plus_128[i] - 128;
1759 }
1760
1761 fg_params->overlap_flag = pi->pFilmGrain->flags.overlap_flag;
1762 fg_params->clip_to_restricted_range = pi->pFilmGrain->flags.clip_to_restricted_range;
1763 radv_vcn_av1_init_film_grain_buffer(fg_params, fg_buf);
1764 }
1765
1766 result.uncompressed_header_size = 0;
1767 for (i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; ++i) {
1768 result.global_motion[i].wmtype = pi->pGlobalMotion->GmType[i];
1769 for (j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; ++j)
1770 result.global_motion[i].wmmat[j] = pi->pGlobalMotion->gm_params[i][j];
1771 }
1772 for (i = 0; i < av1_pic_info->tileCount && i < 256; ++i) {
1773 result.tile_info[i].offset = av1_pic_info->pTileOffsets[i];
1774 result.tile_info[i].size = av1_pic_info->pTileSizes[i];
1775 }
1776
1777 return result;
1778 }
1779
1780 static void
rvcn_av1_init_mode_probs(void * prob)1781 rvcn_av1_init_mode_probs(void *prob)
1782 {
1783 rvcn_av1_frame_context_t *fc = (rvcn_av1_frame_context_t *)prob;
1784 int i;
1785
1786 memcpy(fc->palette_y_size_cdf, default_palette_y_size_cdf, sizeof(default_palette_y_size_cdf));
1787 memcpy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf, sizeof(default_palette_uv_size_cdf));
1788 memcpy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf, sizeof(default_palette_y_color_index_cdf));
1789 memcpy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf,
1790 sizeof(default_palette_uv_color_index_cdf));
1791 memcpy(fc->kf_y_cdf, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf));
1792 memcpy(fc->angle_delta_cdf, default_angle_delta_cdf, sizeof(default_angle_delta_cdf));
1793 memcpy(fc->comp_inter_cdf, default_comp_inter_cdf, sizeof(default_comp_inter_cdf));
1794 memcpy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf, sizeof(default_comp_ref_type_cdf));
1795 memcpy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf, sizeof(default_uni_comp_ref_cdf));
1796 memcpy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf, sizeof(default_palette_y_mode_cdf));
1797 memcpy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf, sizeof(default_palette_uv_mode_cdf));
1798 memcpy(fc->comp_ref_cdf, default_comp_ref_cdf, sizeof(default_comp_ref_cdf));
1799 memcpy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf, sizeof(default_comp_bwdref_cdf));
1800 memcpy(fc->single_ref_cdf, default_single_ref_cdf, sizeof(default_single_ref_cdf));
1801 memcpy(fc->txfm_partition_cdf, default_txfm_partition_cdf, sizeof(default_txfm_partition_cdf));
1802 memcpy(fc->compound_index_cdf, default_compound_idx_cdfs, sizeof(default_compound_idx_cdfs));
1803 memcpy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs, sizeof(default_comp_group_idx_cdfs));
1804 memcpy(fc->newmv_cdf, default_newmv_cdf, sizeof(default_newmv_cdf));
1805 memcpy(fc->zeromv_cdf, default_zeromv_cdf, sizeof(default_zeromv_cdf));
1806 memcpy(fc->refmv_cdf, default_refmv_cdf, sizeof(default_refmv_cdf));
1807 memcpy(fc->drl_cdf, default_drl_cdf, sizeof(default_drl_cdf));
1808 memcpy(fc->motion_mode_cdf, default_motion_mode_cdf, sizeof(default_motion_mode_cdf));
1809 memcpy(fc->obmc_cdf, default_obmc_cdf, sizeof(default_obmc_cdf));
1810 memcpy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf, sizeof(default_inter_compound_mode_cdf));
1811 memcpy(fc->compound_type_cdf, default_compound_type_cdf, sizeof(default_compound_type_cdf));
1812 memcpy(fc->wedge_idx_cdf, default_wedge_idx_cdf, sizeof(default_wedge_idx_cdf));
1813 memcpy(fc->interintra_cdf, default_interintra_cdf, sizeof(default_interintra_cdf));
1814 memcpy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf, sizeof(default_wedge_interintra_cdf));
1815 memcpy(fc->interintra_mode_cdf, default_interintra_mode_cdf, sizeof(default_interintra_mode_cdf));
1816 memcpy(fc->pred_cdf, default_segment_pred_cdf, sizeof(default_segment_pred_cdf));
1817 memcpy(fc->switchable_restore_cdf, default_switchable_restore_cdf, sizeof(default_switchable_restore_cdf));
1818 memcpy(fc->wiener_restore_cdf, default_wiener_restore_cdf, sizeof(default_wiener_restore_cdf));
1819 memcpy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf, sizeof(default_sgrproj_restore_cdf));
1820 memcpy(fc->y_mode_cdf, default_if_y_mode_cdf, sizeof(default_if_y_mode_cdf));
1821 memcpy(fc->uv_mode_cdf, default_uv_mode_cdf, sizeof(default_uv_mode_cdf));
1822 memcpy(fc->switchable_interp_cdf, default_switchable_interp_cdf, sizeof(default_switchable_interp_cdf));
1823 memcpy(fc->partition_cdf, default_partition_cdf, sizeof(default_partition_cdf));
1824 memcpy(fc->intra_ext_tx_cdf, default_intra_ext_tx_cdf, sizeof(default_intra_ext_tx_cdf));
1825 memcpy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf, sizeof(default_inter_ext_tx_cdf));
1826 memcpy(fc->skip_cdfs, default_skip_cdfs, sizeof(default_skip_cdfs));
1827 memcpy(fc->intra_inter_cdf, default_intra_inter_cdf, sizeof(default_intra_inter_cdf));
1828 memcpy(fc->tree_cdf, default_seg_tree_cdf, sizeof(default_seg_tree_cdf));
1829 for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i)
1830 memcpy(fc->spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i],
1831 sizeof(default_spatial_pred_seg_tree_cdf[i]));
1832 memcpy(fc->tx_size_cdf, default_tx_size_cdf, sizeof(default_tx_size_cdf));
1833 memcpy(fc->delta_q_cdf, default_delta_q_cdf, sizeof(default_delta_q_cdf));
1834 memcpy(fc->skip_mode_cdfs, default_skip_mode_cdfs, sizeof(default_skip_mode_cdfs));
1835 memcpy(fc->delta_lf_cdf, default_delta_lf_cdf, sizeof(default_delta_lf_cdf));
1836 memcpy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf, sizeof(default_delta_lf_multi_cdf));
1837 memcpy(fc->cfl_sign_cdf, default_cfl_sign_cdf, sizeof(default_cfl_sign_cdf));
1838 memcpy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf, sizeof(default_cfl_alpha_cdf));
1839 memcpy(fc->filter_intra_cdfs, default_filter_intra_cdfs, sizeof(default_filter_intra_cdfs));
1840 memcpy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf, sizeof(default_filter_intra_mode_cdf));
1841 memcpy(fc->intrabc_cdf, default_intrabc_cdf, sizeof(default_intrabc_cdf));
1842 }
1843
1844 static void
rvcn_av1_init_mv_probs(void * prob)1845 rvcn_av1_init_mv_probs(void *prob)
1846 {
1847 rvcn_av1_frame_context_t *fc = (rvcn_av1_frame_context_t *)prob;
1848
1849 memcpy(fc->nmvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1850 memcpy(fc->nmvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1851 memcpy(fc->nmvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf,
1852 sizeof(default_nmv_context.comps[0].class0_cdf));
1853 memcpy(fc->nmvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf,
1854 sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1855 memcpy(fc->nmvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf,
1856 sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1857 memcpy(fc->nmvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf,
1858 sizeof(default_nmv_context.comps[0].classes_cdf));
1859 memcpy(fc->nmvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1860 memcpy(fc->nmvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1861 memcpy(fc->nmvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1862 memcpy(fc->nmvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1863 memcpy(fc->nmvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf,
1864 sizeof(default_nmv_context.comps[1].class0_cdf));
1865 memcpy(fc->nmvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf,
1866 sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1867 memcpy(fc->nmvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf,
1868 sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1869 memcpy(fc->nmvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf,
1870 sizeof(default_nmv_context.comps[1].classes_cdf));
1871 memcpy(fc->nmvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1872 memcpy(fc->nmvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1873 memcpy(fc->nmvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1874 memcpy(fc->ndvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1875 memcpy(fc->ndvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1876 memcpy(fc->ndvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf,
1877 sizeof(default_nmv_context.comps[0].class0_cdf));
1878 memcpy(fc->ndvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf,
1879 sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1880 memcpy(fc->ndvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf,
1881 sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1882 memcpy(fc->ndvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf,
1883 sizeof(default_nmv_context.comps[0].classes_cdf));
1884 memcpy(fc->ndvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1885 memcpy(fc->ndvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1886 memcpy(fc->ndvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1887 memcpy(fc->ndvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1888 memcpy(fc->ndvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf,
1889 sizeof(default_nmv_context.comps[1].class0_cdf));
1890 memcpy(fc->ndvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf,
1891 sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1892 memcpy(fc->ndvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf,
1893 sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1894 memcpy(fc->ndvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf,
1895 sizeof(default_nmv_context.comps[1].classes_cdf));
1896 memcpy(fc->ndvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1897 memcpy(fc->ndvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1898 memcpy(fc->ndvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1899 }
1900
1901 static void
rvcn_av1_default_coef_probs(void * prob,int index)1902 rvcn_av1_default_coef_probs(void *prob, int index)
1903 {
1904 rvcn_av1_frame_context_t *fc = (rvcn_av1_frame_context_t *)prob;
1905
1906 memcpy(fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index], sizeof(av1_default_txb_skip_cdfs[index]));
1907 memcpy(fc->eob_extra_cdf, av1_default_eob_extra_cdfs[index], sizeof(av1_default_eob_extra_cdfs[index]));
1908 memcpy(fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index], sizeof(av1_default_dc_sign_cdfs[index]));
1909 memcpy(fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index], sizeof(av1_default_coeff_lps_multi_cdfs[index]));
1910 memcpy(fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index],
1911 sizeof(av1_default_coeff_base_multi_cdfs[index]));
1912 memcpy(fc->coeff_base_eob_cdf, av1_default_coeff_base_eob_multi_cdfs[index],
1913 sizeof(av1_default_coeff_base_eob_multi_cdfs[index]));
1914 memcpy(fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index], sizeof(av1_default_eob_multi16_cdfs[index]));
1915 memcpy(fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index], sizeof(av1_default_eob_multi32_cdfs[index]));
1916 memcpy(fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index], sizeof(av1_default_eob_multi64_cdfs[index]));
1917 memcpy(fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index], sizeof(av1_default_eob_multi128_cdfs[index]));
1918 memcpy(fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index], sizeof(av1_default_eob_multi256_cdfs[index]));
1919 memcpy(fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index], sizeof(av1_default_eob_multi512_cdfs[index]));
1920 memcpy(fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index], sizeof(av1_default_eob_multi1024_cdfs[index]));
1921 }
1922
1923 static void
rvcn_vcn4_init_mode_probs(void * prob)1924 rvcn_vcn4_init_mode_probs(void *prob)
1925 {
1926 rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t *)prob;
1927 int i;
1928
1929 memcpy(fc->palette_y_size_cdf, default_palette_y_size_cdf, sizeof(default_palette_y_size_cdf));
1930 memcpy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf, sizeof(default_palette_uv_size_cdf));
1931 memcpy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf, sizeof(default_palette_y_color_index_cdf));
1932 memcpy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf,
1933 sizeof(default_palette_uv_color_index_cdf));
1934 memcpy(fc->kf_y_cdf, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf));
1935 memcpy(fc->angle_delta_cdf, default_angle_delta_cdf, sizeof(default_angle_delta_cdf));
1936 memcpy(fc->comp_inter_cdf, default_comp_inter_cdf, sizeof(default_comp_inter_cdf));
1937 memcpy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf, sizeof(default_comp_ref_type_cdf));
1938 memcpy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf, sizeof(default_uni_comp_ref_cdf));
1939 memcpy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf, sizeof(default_palette_y_mode_cdf));
1940 memcpy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf, sizeof(default_palette_uv_mode_cdf));
1941 memcpy(fc->comp_ref_cdf, default_comp_ref_cdf, sizeof(default_comp_ref_cdf));
1942 memcpy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf, sizeof(default_comp_bwdref_cdf));
1943 memcpy(fc->single_ref_cdf, default_single_ref_cdf, sizeof(default_single_ref_cdf));
1944 memcpy(fc->txfm_partition_cdf, default_txfm_partition_cdf, sizeof(default_txfm_partition_cdf));
1945 memcpy(fc->compound_index_cdf, default_compound_idx_cdfs, sizeof(default_compound_idx_cdfs));
1946 memcpy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs, sizeof(default_comp_group_idx_cdfs));
1947 memcpy(fc->newmv_cdf, default_newmv_cdf, sizeof(default_newmv_cdf));
1948 memcpy(fc->zeromv_cdf, default_zeromv_cdf, sizeof(default_zeromv_cdf));
1949 memcpy(fc->refmv_cdf, default_refmv_cdf, sizeof(default_refmv_cdf));
1950 memcpy(fc->drl_cdf, default_drl_cdf, sizeof(default_drl_cdf));
1951 memcpy(fc->motion_mode_cdf, default_motion_mode_cdf, sizeof(default_motion_mode_cdf));
1952 memcpy(fc->obmc_cdf, default_obmc_cdf, sizeof(default_obmc_cdf));
1953 memcpy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf, sizeof(default_inter_compound_mode_cdf));
1954 memcpy(fc->compound_type_cdf, default_compound_type_cdf, sizeof(default_compound_type_cdf));
1955 memcpy(fc->wedge_idx_cdf, default_wedge_idx_cdf, sizeof(default_wedge_idx_cdf));
1956 memcpy(fc->interintra_cdf, default_interintra_cdf, sizeof(default_interintra_cdf));
1957 memcpy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf, sizeof(default_wedge_interintra_cdf));
1958 memcpy(fc->interintra_mode_cdf, default_interintra_mode_cdf, sizeof(default_interintra_mode_cdf));
1959 memcpy(fc->pred_cdf, default_segment_pred_cdf, sizeof(default_segment_pred_cdf));
1960 memcpy(fc->switchable_restore_cdf, default_switchable_restore_cdf, sizeof(default_switchable_restore_cdf));
1961 memcpy(fc->wiener_restore_cdf, default_wiener_restore_cdf, sizeof(default_wiener_restore_cdf));
1962 memcpy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf, sizeof(default_sgrproj_restore_cdf));
1963 memcpy(fc->y_mode_cdf, default_if_y_mode_cdf, sizeof(default_if_y_mode_cdf));
1964 memcpy(fc->uv_mode_cdf, default_uv_mode_cdf, sizeof(default_uv_mode_cdf));
1965 memcpy(fc->switchable_interp_cdf, default_switchable_interp_cdf, sizeof(default_switchable_interp_cdf));
1966 memcpy(fc->partition_cdf, default_partition_cdf, sizeof(default_partition_cdf));
1967 memcpy(fc->intra_ext_tx_cdf, &default_intra_ext_tx_cdf[1], sizeof(default_intra_ext_tx_cdf[1]) * 2);
1968 memcpy(fc->inter_ext_tx_cdf, &default_inter_ext_tx_cdf[1], sizeof(default_inter_ext_tx_cdf[1]) * 3);
1969 memcpy(fc->skip_cdfs, default_skip_cdfs, sizeof(default_skip_cdfs));
1970 memcpy(fc->intra_inter_cdf, default_intra_inter_cdf, sizeof(default_intra_inter_cdf));
1971 memcpy(fc->tree_cdf, default_seg_tree_cdf, sizeof(default_seg_tree_cdf));
1972 for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i)
1973 memcpy(fc->spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i],
1974 sizeof(default_spatial_pred_seg_tree_cdf[i]));
1975 memcpy(fc->tx_size_cdf, default_tx_size_cdf, sizeof(default_tx_size_cdf));
1976 memcpy(fc->delta_q_cdf, default_delta_q_cdf, sizeof(default_delta_q_cdf));
1977 memcpy(fc->skip_mode_cdfs, default_skip_mode_cdfs, sizeof(default_skip_mode_cdfs));
1978 memcpy(fc->delta_lf_cdf, default_delta_lf_cdf, sizeof(default_delta_lf_cdf));
1979 memcpy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf, sizeof(default_delta_lf_multi_cdf));
1980 memcpy(fc->cfl_sign_cdf, default_cfl_sign_cdf, sizeof(default_cfl_sign_cdf));
1981 memcpy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf, sizeof(default_cfl_alpha_cdf));
1982 memcpy(fc->filter_intra_cdfs, default_filter_intra_cdfs, sizeof(default_filter_intra_cdfs));
1983 memcpy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf, sizeof(default_filter_intra_mode_cdf));
1984 memcpy(fc->intrabc_cdf, default_intrabc_cdf, sizeof(default_intrabc_cdf));
1985 }
1986
1987 static void
rvcn_vcn4_av1_init_mv_probs(void * prob)1988 rvcn_vcn4_av1_init_mv_probs(void *prob)
1989 {
1990 rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t *)prob;
1991
1992 memcpy(fc->nmvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1993 memcpy(fc->nmvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1994 memcpy(fc->nmvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf,
1995 sizeof(default_nmv_context.comps[0].class0_cdf));
1996 memcpy(fc->nmvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf,
1997 sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1998 memcpy(fc->nmvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf,
1999 sizeof(default_nmv_context.comps[0].class0_hp_cdf));
2000 memcpy(fc->nmvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf,
2001 sizeof(default_nmv_context.comps[0].classes_cdf));
2002 memcpy(fc->nmvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
2003 memcpy(fc->nmvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
2004 memcpy(fc->nmvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
2005 memcpy(fc->nmvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
2006 memcpy(fc->nmvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf,
2007 sizeof(default_nmv_context.comps[1].class0_cdf));
2008 memcpy(fc->nmvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf,
2009 sizeof(default_nmv_context.comps[1].class0_fp_cdf));
2010 memcpy(fc->nmvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf,
2011 sizeof(default_nmv_context.comps[1].class0_hp_cdf));
2012 memcpy(fc->nmvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf,
2013 sizeof(default_nmv_context.comps[1].classes_cdf));
2014 memcpy(fc->nmvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
2015 memcpy(fc->nmvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
2016 memcpy(fc->nmvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
2017 memcpy(fc->ndvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
2018 memcpy(fc->ndvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
2019 memcpy(fc->ndvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf,
2020 sizeof(default_nmv_context.comps[0].class0_cdf));
2021 memcpy(fc->ndvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf,
2022 sizeof(default_nmv_context.comps[0].class0_fp_cdf));
2023 memcpy(fc->ndvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf,
2024 sizeof(default_nmv_context.comps[0].class0_hp_cdf));
2025 memcpy(fc->ndvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf,
2026 sizeof(default_nmv_context.comps[0].classes_cdf));
2027 memcpy(fc->ndvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
2028 memcpy(fc->ndvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
2029 memcpy(fc->ndvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
2030 memcpy(fc->ndvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
2031 memcpy(fc->ndvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf,
2032 sizeof(default_nmv_context.comps[1].class0_cdf));
2033 memcpy(fc->ndvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf,
2034 sizeof(default_nmv_context.comps[1].class0_fp_cdf));
2035 memcpy(fc->ndvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf,
2036 sizeof(default_nmv_context.comps[1].class0_hp_cdf));
2037 memcpy(fc->ndvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf,
2038 sizeof(default_nmv_context.comps[1].classes_cdf));
2039 memcpy(fc->ndvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
2040 memcpy(fc->ndvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
2041 memcpy(fc->ndvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
2042 }
2043
2044 static void
rvcn_vcn4_av1_default_coef_probs(void * prob,int index)2045 rvcn_vcn4_av1_default_coef_probs(void *prob, int index)
2046 {
2047 rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t *)prob;
2048 char *p;
2049 int i, j;
2050 unsigned size;
2051
2052 memcpy(fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index], sizeof(av1_default_txb_skip_cdfs[index]));
2053
2054 p = (char *)fc->eob_extra_cdf;
2055 size = sizeof(av1_default_eob_extra_cdfs[0][0][0][0]) * EOB_COEF_CONTEXTS_VCN4;
2056 for (i = 0; i < AV1_TX_SIZES; i++) {
2057 for (j = 0; j < AV1_PLANE_TYPES; j++) {
2058 memcpy(p, &av1_default_eob_extra_cdfs[index][i][j][3], size);
2059 p += size;
2060 }
2061 }
2062
2063 memcpy(fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index], sizeof(av1_default_dc_sign_cdfs[index]));
2064 memcpy(fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index], sizeof(av1_default_coeff_lps_multi_cdfs[index]));
2065 memcpy(fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index],
2066 sizeof(av1_default_coeff_base_multi_cdfs[index]));
2067 memcpy(fc->coeff_base_eob_cdf, av1_default_coeff_base_eob_multi_cdfs[index],
2068 sizeof(av1_default_coeff_base_eob_multi_cdfs[index]));
2069 memcpy(fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index], sizeof(av1_default_eob_multi16_cdfs[index]));
2070 memcpy(fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index], sizeof(av1_default_eob_multi32_cdfs[index]));
2071 memcpy(fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index], sizeof(av1_default_eob_multi64_cdfs[index]));
2072 memcpy(fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index], sizeof(av1_default_eob_multi128_cdfs[index]));
2073 memcpy(fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index], sizeof(av1_default_eob_multi256_cdfs[index]));
2074 memcpy(fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index], sizeof(av1_default_eob_multi512_cdfs[index]));
2075 memcpy(fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index], sizeof(av1_default_eob_multi1024_cdfs[index]));
2076 }
2077
2078 static bool
rvcn_dec_message_decode(struct radv_cmd_buffer * cmd_buffer,struct radv_video_session * vid,struct radv_video_session_params * params,void * ptr,void * it_probs_ptr,uint32_t * slice_offset,const struct VkVideoDecodeInfoKHR * frame_info)2079 rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_session *vid,
2080 struct radv_video_session_params *params, void *ptr, void *it_probs_ptr, uint32_t *slice_offset,
2081 const struct VkVideoDecodeInfoKHR *frame_info)
2082 {
2083 struct radv_device *device = cmd_buffer->device;
2084 rvcn_dec_message_header_t *header;
2085 rvcn_dec_message_index_t *index_codec;
2086 rvcn_dec_message_decode_t *decode;
2087 rvcn_dec_message_index_t *index_dynamic_dpb = NULL;
2088 rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
2089 void *codec;
2090 unsigned sizes = 0, offset_decode, offset_codec, offset_dynamic_dpb;
2091 struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2092 struct radv_image *img = dst_iv->image;
2093 struct radv_image_plane *luma = &img->planes[0];
2094 struct radv_image_plane *chroma = &img->planes[1];
2095
2096 header = ptr;
2097 sizes += sizeof(rvcn_dec_message_header_t);
2098
2099 index_codec = (void *)((char *)header + sizes);
2100 sizes += sizeof(rvcn_dec_message_index_t);
2101
2102 if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2103 index_dynamic_dpb = (void *)((char *)header + sizes);
2104 sizes += sizeof(rvcn_dec_message_index_t);
2105 }
2106
2107 offset_decode = sizes;
2108 decode = (void *)((char *)header + sizes);
2109 sizes += sizeof(rvcn_dec_message_decode_t);
2110
2111 if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2112 offset_dynamic_dpb = sizes;
2113 dynamic_dpb_t2 = (void *)((char *)header + sizes);
2114 sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2115 }
2116
2117 offset_codec = sizes;
2118 codec = (void *)((char *)header + sizes);
2119
2120 memset(ptr, 0, sizes);
2121
2122 header->header_size = sizeof(rvcn_dec_message_header_t);
2123 header->total_size = sizes;
2124 header->msg_type = RDECODE_MSG_DECODE;
2125 header->stream_handle = vid->stream_handle;
2126 header->status_report_feedback_number = vid->dbg_frame_cnt++;
2127
2128 header->index[0].message_id = RDECODE_MESSAGE_DECODE;
2129 header->index[0].offset = offset_decode;
2130 header->index[0].size = sizeof(rvcn_dec_message_decode_t);
2131 header->index[0].filled = 0;
2132 header->num_buffers = 1;
2133
2134 index_codec->offset = offset_codec;
2135 index_codec->filled = 0;
2136 ++header->num_buffers;
2137
2138 if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2139 index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
2140 index_dynamic_dpb->offset = offset_dynamic_dpb;
2141 index_dynamic_dpb->filled = 0;
2142 ++header->num_buffers;
2143 index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2144 }
2145
2146 decode->stream_type = vid->stream_type;
2147 decode->decode_flags = 0;
2148 decode->width_in_samples = frame_info->dstPictureResource.codedExtent.width;
2149 decode->height_in_samples = frame_info->dstPictureResource.codedExtent.height;
2150
2151 decode->bsd_size = frame_info->srcBufferRange;
2152
2153 decode->dt_size = dst_iv->image->planes[0].surface.total_size + dst_iv->image->planes[1].surface.total_size;
2154 decode->sct_size = 0;
2155 decode->sc_coeff_size = 0;
2156
2157 decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
2158
2159 decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
2160 decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
2161
2162 if (luma->surface.meta_offset) {
2163 fprintf(stderr, "DCC SURFACES NOT SUPPORTED.\n");
2164 return false;
2165 }
2166
2167 decode->dt_tiling_mode = 0;
2168 decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
2169 decode->dt_array_mode = device->physical_device->vid_addr_gfx_mode;
2170 decode->dt_field_mode = vid->interlaced ? 1 : 0;
2171 decode->dt_surf_tile_config = 0;
2172 decode->dt_uv_surf_tile_config = 0;
2173
2174 decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
2175 decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
2176
2177 if (decode->dt_field_mode) {
2178 decode->dt_luma_bottom_offset = luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
2179 decode->dt_chroma_bottom_offset = chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
2180 } else {
2181 decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
2182 decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
2183 }
2184 if (vid->stream_type == RDECODE_CODEC_AV1)
2185 decode->db_pitch_uv = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
2186
2187 *slice_offset = 0;
2188
2189 /* Intra-only decoding will only work without a setup slot for AV1
2190 * currently, other codecs require the application to pass a
2191 * setup slot for this use-case, since the FW is not able to skip write-out
2192 * for H26X. In order to fix that properly, additional scratch space will
2193 * be needed in the video session just for intra-only DPB targets.
2194 */
2195 int dpb_update_required = 1;
2196
2197 switch (vid->vk.op) {
2198 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
2199 index_codec->size = sizeof(rvcn_dec_message_avc_t);
2200 rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples,
2201 &decode->height_in_samples, it_probs_ptr);
2202 memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t));
2203 index_codec->message_id = RDECODE_MESSAGE_AVC;
2204 break;
2205 }
2206 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
2207 index_codec->size = sizeof(rvcn_dec_message_hevc_t);
2208 rvcn_dec_message_hevc_t hevc = get_h265_msg(device, vid, params, frame_info, it_probs_ptr);
2209 memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t));
2210 index_codec->message_id = RDECODE_MESSAGE_HEVC;
2211 break;
2212 }
2213 case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: {
2214 index_codec->size = sizeof(rvcn_dec_message_av1_t);
2215 rvcn_dec_message_av1_t av1 = get_av1_msg(device, vid, params, frame_info, it_probs_ptr, &dpb_update_required);
2216 memcpy(codec, (void *)&av1, sizeof(rvcn_dec_message_av1_t));
2217 index_codec->message_id = RDECODE_MESSAGE_AV1;
2218 assert(frame_info->referenceSlotCount < 9);
2219 break;
2220 }
2221 default:
2222 unreachable("unknown operation");
2223 }
2224
2225 if (dpb_update_required)
2226 assert(frame_info->pSetupReferenceSlot != NULL);
2227
2228 struct radv_image_view *dpb_iv =
2229 dpb_update_required
2230 ? radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding)
2231 : NULL;
2232 struct radv_image *dpb = dpb_update_required ? dpb_iv->image : img;
2233
2234 decode->dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
2235 decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
2236 decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height;
2237 decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode;
2238 decode->db_array_mode = device->physical_device->vid_addr_gfx_mode;
2239
2240 decode->hw_ctxt_size = vid->ctx.size;
2241
2242 if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
2243 return true;
2244
2245 uint64_t addr;
2246 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
2247 addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
2248 dynamic_dpb_t2->dpbCurrLo = addr;
2249 dynamic_dpb_t2->dpbCurrHi = addr >> 32;
2250
2251 if (vid->vk.op == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
2252 /* The following loop will fill in the references for the current frame,
2253 * this ensures all DPB addresses are "valid" (pointing at the current
2254 * decode target), so that the firmware doesn't evict things it should not.
2255 * It will not perform any actual writes to these dummy slots.
2256 */
2257 for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) {
2258 dynamic_dpb_t2->dpbAddrHi[i] = addr;
2259 dynamic_dpb_t2->dpbAddrLo[i] = addr >> 32;
2260 }
2261 }
2262
2263 for (int i = 0; i < frame_info->referenceSlotCount; i++) {
2264 int32_t slot_idx = frame_info->pReferenceSlots[i].slotIndex;
2265 assert(slot_idx >= 0 && slot_idx < 16);
2266 struct radv_image_view *f_dpb_iv =
2267 radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
2268 assert(f_dpb_iv != NULL);
2269 struct radv_image *dpb_img = f_dpb_iv->image;
2270
2271 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
2272 addr = radv_buffer_get_va(dpb_img->bindings[0].bo) + dpb_img->bindings[0].offset;
2273
2274 dynamic_dpb_t2->dpbAddrLo[i] = addr;
2275 dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
2276 ++dynamic_dpb_t2->dpbArraySize;
2277 }
2278
2279 radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
2280 addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
2281
2282 dynamic_dpb_t2->dpbCurrLo = addr;
2283 dynamic_dpb_t2->dpbCurrHi = addr >> 32;
2284
2285 decode->decode_flags = 1;
2286 dynamic_dpb_t2->dpbConfigFlags = 0;
2287 dynamic_dpb_t2->dpbLumaPitch = luma->surface.u.gfx9.surf_pitch;
2288 dynamic_dpb_t2->dpbLumaAlignedHeight = luma->surface.u.gfx9.surf_height;
2289 dynamic_dpb_t2->dpbLumaAlignedSize = luma->surface.u.gfx9.surf_slice_size;
2290
2291 dynamic_dpb_t2->dpbChromaPitch = chroma->surface.u.gfx9.surf_pitch;
2292 dynamic_dpb_t2->dpbChromaAlignedHeight = chroma->surface.u.gfx9.surf_height;
2293 dynamic_dpb_t2->dpbChromaAlignedSize = chroma->surface.u.gfx9.surf_slice_size;
2294
2295 return true;
2296 }
2297
2298 static struct ruvd_h264
get_uvd_h264_msg(struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * slice_offset,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)2299 get_uvd_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
2300 const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
2301 uint32_t *height_in_samples, void *it_ptr)
2302 {
2303 struct ruvd_h264 result;
2304 const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
2305 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
2306
2307 *slice_offset = h264_pic_info->pSliceOffsets[0];
2308
2309 memset(&result, 0, sizeof(result));
2310
2311 const StdVideoH264SequenceParameterSet *sps =
2312 vk_video_find_h264_dec_std_sps(¶ms->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
2313 switch (sps->profile_idc) {
2314 case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
2315 result.profile = RUVD_H264_PROFILE_BASELINE;
2316 break;
2317 case STD_VIDEO_H264_PROFILE_IDC_MAIN:
2318 result.profile = RUVD_H264_PROFILE_MAIN;
2319 break;
2320 case STD_VIDEO_H264_PROFILE_IDC_HIGH:
2321 result.profile = RUVD_H264_PROFILE_HIGH;
2322 break;
2323 default:
2324 fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
2325 result.profile = RUVD_H264_PROFILE_MAIN;
2326 break;
2327 }
2328
2329 *width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
2330 *height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
2331 if (!sps->flags.frame_mbs_only_flag)
2332 *height_in_samples *= 2;
2333 result.level = get_h264_level(sps->level_idc);
2334
2335 result.sps_info_flags = 0;
2336
2337 result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
2338 result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
2339 result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
2340 result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
2341 result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
2342
2343 result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2344 result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2345 result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
2346 result.pic_order_cnt_type = sps->pic_order_cnt_type;
2347 result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
2348
2349 result.chroma_format = sps->chroma_format_idc;
2350
2351 const StdVideoH264PictureParameterSet *pps =
2352 vk_video_find_h264_dec_std_pps(¶ms->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
2353 result.pps_info_flags = 0;
2354 result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
2355 result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
2356 result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
2357 result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
2358 result.pps_info_flags |= pps->weighted_bipred_idc << 4;
2359 result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
2360 result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
2361 result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
2362
2363 result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
2364 result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
2365 result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
2366
2367 StdVideoH264ScalingLists scaling_lists;
2368 vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
2369 update_h264_scaling(result.scaling_list_4x4, result.scaling_list_8x8, &scaling_lists);
2370
2371 memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
2372 memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
2373 memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
2374
2375 result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2376 result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2377
2378 result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
2379 result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
2380
2381 result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
2382
2383 result.num_ref_frames = sps->max_num_ref_frames;
2384 memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
2385 memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
2386 for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
2387 int idx = frame_info->pReferenceSlots[i].slotIndex;
2388 const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
2389 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
2390
2391 result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
2392 result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
2393 result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
2394
2395 result.ref_frame_list[i] = idx;
2396
2397 if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
2398 result.ref_frame_list[i] |= 0x80;
2399 }
2400 result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
2401 result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
2402
2403 return result;
2404 }
2405
2406 static struct ruvd_h265
get_uvd_h265_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,void * it_ptr)2407 get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
2408 const struct VkVideoDecodeInfoKHR *frame_info, void *it_ptr)
2409 {
2410 struct ruvd_h265 result;
2411 int i, j;
2412 const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
2413 vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
2414
2415 memset(&result, 0, sizeof(result));
2416
2417 const StdVideoH265SequenceParameterSet *sps =
2418 vk_video_find_h265_dec_std_sps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
2419 const StdVideoH265PictureParameterSet *pps =
2420 vk_video_find_h265_dec_std_pps(¶ms->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
2421
2422 result.sps_info_flags = 0;
2423 result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
2424 result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
2425 result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
2426 result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
2427 result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
2428 result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
2429 result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
2430 result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
2431 result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
2432
2433 if (device->physical_device->rad_info.family == CHIP_CARRIZO)
2434 result.sps_info_flags |= 1 << 9;
2435
2436 result.chroma_format = sps->chroma_format_idc;
2437 result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2438 result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2439 result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
2440 result.sps_max_dec_pic_buffering_minus1 =
2441 sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
2442 result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
2443 result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
2444 result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
2445 result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
2446 result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
2447 result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
2448 if (sps->flags.pcm_enabled_flag) {
2449 result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
2450 result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
2451 result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
2452 result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
2453 }
2454 result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
2455
2456 result.pps_info_flags = 0;
2457 result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
2458 result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
2459 result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
2460 result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
2461 result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
2462 result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
2463 result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
2464 result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
2465 result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
2466 result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
2467 result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
2468 result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
2469 result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
2470 result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
2471 result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
2472 result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
2473 result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
2474 result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
2475 result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
2476 result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
2477
2478 result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
2479 result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
2480 result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2481 result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2482 result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
2483 result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
2484 result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
2485 result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
2486 result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
2487 result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
2488 result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
2489 result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
2490 result.init_qp_minus26 = pps->init_qp_minus26;
2491
2492 for (i = 0; i < 19; ++i)
2493 result.column_width_minus1[i] = pps->column_width_minus1[i];
2494
2495 for (i = 0; i < 21; ++i)
2496 result.row_height_minus1[i] = pps->row_height_minus1[i];
2497
2498 result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
2499 result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
2500
2501 uint8_t idxs[16];
2502 memset(result.poc_list, 0, 16 * sizeof(int));
2503 memset(result.ref_pic_list, 0x7f, 16);
2504 memset(idxs, 0xff, 16);
2505 for (i = 0; i < frame_info->referenceSlotCount; i++) {
2506 const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
2507 vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
2508 int idx = frame_info->pReferenceSlots[i].slotIndex;
2509 result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
2510 result.ref_pic_list[i] = idx;
2511 idxs[idx] = i;
2512 }
2513 result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
2514
2515 #define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
2516 for (i = 0; i < 8; ++i)
2517 result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
2518
2519 for (i = 0; i < 8; ++i)
2520 result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
2521
2522 for (i = 0; i < 8; ++i)
2523 result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
2524
2525 const StdVideoH265ScalingLists *scaling_lists = NULL;
2526 if (pps->flags.pps_scaling_list_data_present_flag)
2527 scaling_lists = pps->pScalingLists;
2528 else if (sps->flags.sps_scaling_list_data_present_flag)
2529 scaling_lists = sps->pScalingLists;
2530
2531 update_h265_scaling(it_ptr, scaling_lists);
2532 if (scaling_lists) {
2533 for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
2534 result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
2535
2536 for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
2537 result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
2538 }
2539
2540 for (i = 0; i < 2; i++) {
2541 for (j = 0; j < 15; j++)
2542 result.direct_reflist[i][j] = 0xff;
2543 }
2544
2545 if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
2546 if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
2547 result.p010_mode = 1;
2548 result.msb_mode = 1;
2549 } else {
2550 result.p010_mode = 0;
2551 result.luma_10to8 = 5;
2552 result.chroma_10to8 = 5;
2553 result.sclr_luma10to8 = 4;
2554 result.sclr_chroma10to8 = 4;
2555 }
2556 }
2557
2558 return result;
2559 }
2560
2561 static unsigned
texture_offset_legacy(struct radeon_surf * surface,unsigned layer)2562 texture_offset_legacy(struct radeon_surf *surface, unsigned layer)
2563 {
2564 return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +
2565 layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
2566 }
2567
2568 static bool
ruvd_dec_message_decode(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,void * ptr,void * it_ptr,uint32_t * slice_offset,const struct VkVideoDecodeInfoKHR * frame_info)2569 ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *vid,
2570 struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
2571 const struct VkVideoDecodeInfoKHR *frame_info)
2572 {
2573 struct ruvd_msg *msg = ptr;
2574 struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2575 struct radv_image *img = dst_iv->image;
2576 struct radv_image_plane *luma = &img->planes[0];
2577 struct radv_image_plane *chroma = &img->planes[1];
2578 struct radv_image_view *dpb_iv =
2579 radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2580 struct radv_image *dpb = dpb_iv->image;
2581
2582 memset(msg, 0, sizeof(struct ruvd_msg));
2583 msg->size = sizeof(*msg);
2584 msg->msg_type = RUVD_MSG_DECODE;
2585 msg->stream_handle = vid->stream_handle;
2586 msg->status_report_feedback_number = vid->dbg_frame_cnt++;
2587
2588 msg->body.decode.stream_type = vid->stream_type;
2589 msg->body.decode.decode_flags = 0x1;
2590 msg->body.decode.width_in_samples = frame_info->dstPictureResource.codedExtent.width;
2591 msg->body.decode.height_in_samples = frame_info->dstPictureResource.codedExtent.height;
2592
2593 msg->body.decode.dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
2594 msg->body.decode.bsd_size = frame_info->srcBufferRange;
2595 msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment);
2596
2597 if (vid->stream_type == RUVD_CODEC_H264_PERF && device->physical_device->rad_info.family >= CHIP_POLARIS10)
2598 msg->body.decode.dpb_reserved = vid->ctx.size;
2599
2600 *slice_offset = 0;
2601 switch (vid->vk.op) {
2602 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
2603 msg->body.decode.codec.h264 =
2604 get_uvd_h264_msg(vid, params, frame_info, slice_offset, &msg->body.decode.width_in_samples,
2605 &msg->body.decode.height_in_samples, it_ptr);
2606 break;
2607 }
2608 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
2609 msg->body.decode.codec.h265 = get_uvd_h265_msg(device, vid, params, frame_info, it_ptr);
2610
2611 if (vid->ctx.mem)
2612 msg->body.decode.dpb_reserved = vid->ctx.size;
2613 break;
2614 }
2615 default:
2616 return false;
2617 }
2618
2619 msg->body.decode.dt_field_mode = false;
2620
2621 if (device->physical_device->rad_info.gfx_level >= GFX9) {
2622 msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
2623 msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
2624 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
2625 msg->body.decode.dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
2626 msg->body.decode.dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
2627 if (msg->body.decode.dt_field_mode) {
2628 msg->body.decode.dt_luma_bottom_offset =
2629 luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
2630 msg->body.decode.dt_chroma_bottom_offset =
2631 chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
2632 } else {
2633 msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
2634 msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
2635 }
2636 msg->body.decode.dt_surf_tile_config = 0;
2637 } else {
2638 msg->body.decode.dt_pitch = luma->surface.u.legacy.level[0].nblk_x * luma->surface.blk_w;
2639 switch (luma->surface.u.legacy.level[0].mode) {
2640 case RADEON_SURF_MODE_LINEAR_ALIGNED:
2641 msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
2642 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
2643 break;
2644 case RADEON_SURF_MODE_1D:
2645 msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
2646 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
2647 break;
2648 case RADEON_SURF_MODE_2D:
2649 msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
2650 msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
2651 break;
2652 default:
2653 assert(0);
2654 break;
2655 }
2656
2657 msg->body.decode.dt_luma_top_offset = texture_offset_legacy(&luma->surface, 0);
2658 if (chroma)
2659 msg->body.decode.dt_chroma_top_offset = texture_offset_legacy(&chroma->surface, 0);
2660 if (msg->body.decode.dt_field_mode) {
2661 msg->body.decode.dt_luma_bottom_offset = texture_offset_legacy(&luma->surface, 1);
2662 if (chroma)
2663 msg->body.decode.dt_chroma_bottom_offset = texture_offset_legacy(&chroma->surface, 1);
2664 } else {
2665 msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
2666 msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
2667 }
2668
2669 if (chroma) {
2670 assert(luma->surface.u.legacy.bankw == chroma->surface.u.legacy.bankw);
2671 assert(luma->surface.u.legacy.bankh == chroma->surface.u.legacy.bankh);
2672 assert(luma->surface.u.legacy.mtilea == chroma->surface.u.legacy.mtilea);
2673 }
2674
2675 msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(util_logbase2(luma->surface.u.legacy.bankw));
2676 msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(util_logbase2(luma->surface.u.legacy.bankh));
2677 msg->body.decode.dt_surf_tile_config |=
2678 RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea));
2679 }
2680
2681 if (device->physical_device->rad_info.family >= CHIP_STONEY)
2682 msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2;
2683
2684 msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config;
2685 msg->body.decode.extension_support = 0x1;
2686
2687 return true;
2688 }
2689
2690 static void
ruvd_dec_message_create(struct radv_video_session * vid,void * ptr)2691 ruvd_dec_message_create(struct radv_video_session *vid, void *ptr)
2692 {
2693 struct ruvd_msg *msg = ptr;
2694
2695 memset(ptr, 0, sizeof(*msg));
2696 msg->size = sizeof(*msg);
2697 msg->msg_type = RUVD_MSG_CREATE;
2698 msg->stream_handle = vid->stream_handle;
2699 msg->body.create.stream_type = vid->stream_type;
2700 msg->body.create.width_in_samples = vid->vk.max_coded.width;
2701 msg->body.create.height_in_samples = vid->vk.max_coded.height;
2702 }
2703
2704 VKAPI_ATTR void VKAPI_CALL
radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoBeginCodingInfoKHR * pBeginInfo)2705 radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCodingInfoKHR *pBeginInfo)
2706 {
2707 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2708 RADV_FROM_HANDLE(radv_video_session, vid, pBeginInfo->videoSession);
2709 RADV_FROM_HANDLE(radv_video_session_params, params, pBeginInfo->videoSessionParameters);
2710
2711 cmd_buffer->video.vid = vid;
2712 cmd_buffer->video.params = params;
2713 }
2714
2715 static void
radv_vcn_cmd_reset(struct radv_cmd_buffer * cmd_buffer)2716 radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
2717 {
2718 struct radv_video_session *vid = cmd_buffer->video.vid;
2719 struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
2720 uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
2721
2722 void *ptr;
2723 uint32_t out_offset;
2724
2725 if (vid->stream_type == RDECODE_CODEC_AV1) {
2726 unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0
2727 ? align(sizeof(rvcn_av1_frame_context_t), 2048)
2728 : align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048);
2729
2730 uint8_t *ctxptr = cmd_buffer->device->ws->buffer_map(vid->ctx.mem->bo);
2731 ctxptr += vid->ctx.offset;
2732 if (pdev->av1_version == RDECODE_AV1_VER_0) {
2733 for (unsigned i = 0; i < 4; ++i) {
2734 rvcn_av1_init_mode_probs((void *)(ctxptr + i * frame_ctxt_size));
2735 rvcn_av1_init_mv_probs((void *)(ctxptr + i * frame_ctxt_size));
2736 rvcn_av1_default_coef_probs((void *)(ctxptr + i * frame_ctxt_size), i);
2737 }
2738 } else {
2739 for (unsigned i = 0; i < 4; ++i) {
2740 rvcn_vcn4_init_mode_probs((void *)(ctxptr + i * frame_ctxt_size));
2741 rvcn_vcn4_av1_init_mv_probs((void *)(ctxptr + i * frame_ctxt_size));
2742 rvcn_vcn4_av1_default_coef_probs((void *)(ctxptr + i * frame_ctxt_size), i);
2743 }
2744 }
2745 cmd_buffer->device->ws->buffer_unmap(vid->ctx.mem->bo);
2746 }
2747 radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2748
2749 if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED)
2750 radv_vcn_sq_start(cmd_buffer);
2751
2752 rvcn_dec_message_create(vid, ptr, size);
2753 send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2754 send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
2755 /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
2756
2757 if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
2758 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8);
2759 for (unsigned i = 0; i < 8; i++)
2760 radeon_emit(cmd_buffer->cs, 0x81ff);
2761 } else
2762 radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
2763 }
2764
2765 static void
radv_uvd_cmd_reset(struct radv_cmd_buffer * cmd_buffer)2766 radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
2767 {
2768 struct radv_video_session *vid = cmd_buffer->video.vid;
2769 uint32_t size = sizeof(struct ruvd_msg);
2770 void *ptr;
2771 uint32_t out_offset;
2772 radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2773
2774 ruvd_dec_message_create(vid, ptr);
2775 if (vid->sessionctx.mem)
2776 send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2777 send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
2778
2779 /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
2780 int padsize = vid->sessionctx.mem ? 4 : 6;
2781 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, padsize);
2782 for (unsigned i = 0; i < padsize; i++)
2783 radeon_emit(cmd_buffer->cs, PKT2_NOP_PAD);
2784 }
2785
2786 VKAPI_ATTR void VKAPI_CALL
radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoCodingControlInfoKHR * pCodingControlInfo)2787 radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo)
2788 {
2789 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2790 if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
2791 if (radv_has_uvd(cmd_buffer->device->physical_device))
2792 radv_uvd_cmd_reset(cmd_buffer);
2793 else
2794 radv_vcn_cmd_reset(cmd_buffer);
2795 }
2796 }
2797
2798 VKAPI_ATTR void VKAPI_CALL
radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoEndCodingInfoKHR * pEndCodingInfo)2799 radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo)
2800 {
2801 }
2802
2803 static void
radv_uvd_decode_video(struct radv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)2804 radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
2805 {
2806 RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
2807 struct radv_video_session *vid = cmd_buffer->video.vid;
2808 struct radv_video_session_params *params = cmd_buffer->video.params;
2809 unsigned size = sizeof(struct ruvd_msg);
2810 void *ptr, *fb_ptr, *it_probs_ptr = NULL;
2811 uint32_t out_offset, fb_offset, it_probs_offset = 0;
2812 struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
2813 unsigned fb_size =
2814 (cmd_buffer->device->physical_device->rad_info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
2815
2816 radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
2817 fb_bo = cmd_buffer->upload.upload_bo;
2818 if (have_it(vid)) {
2819 radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr);
2820 it_probs_bo = cmd_buffer->upload.upload_bo;
2821 }
2822
2823 radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2824 msg_bo = cmd_buffer->upload.upload_bo;
2825
2826 uint32_t slice_offset;
2827 ruvd_dec_message_decode(cmd_buffer->device, vid, params, ptr, it_probs_ptr, &slice_offset, frame_info);
2828 rvcn_dec_message_feedback(fb_ptr);
2829 if (vid->sessionctx.mem)
2830 send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2831 send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
2832
2833 if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
2834 struct radv_image_view *dpb_iv =
2835 radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2836 struct radv_image *dpb = dpb_iv->image;
2837 send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
2838 }
2839
2840 if (vid->ctx.mem)
2841 send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
2842
2843 send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
2844 src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
2845
2846 struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2847 struct radv_image *img = dst_iv->image;
2848 send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
2849 send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
2850 if (have_it(vid))
2851 send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
2852
2853 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
2854 set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
2855 }
2856
2857 static void
radv_vcn_decode_video(struct radv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)2858 radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
2859 {
2860 RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
2861 struct radv_video_session *vid = cmd_buffer->video.vid;
2862 struct radv_video_session_params *params = cmd_buffer->video.params;
2863 unsigned size = 0;
2864 void *ptr, *fb_ptr, *it_probs_ptr = NULL;
2865 uint32_t out_offset, fb_offset, it_probs_offset = 0;
2866 struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
2867
2868 size += sizeof(rvcn_dec_message_header_t); /* header */
2869 size += sizeof(rvcn_dec_message_index_t); /* codec */
2870 if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2871 size += sizeof(rvcn_dec_message_index_t);
2872 size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2873 }
2874 size += sizeof(rvcn_dec_message_decode_t); /* decode */
2875 switch (vid->vk.op) {
2876 case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
2877 size += sizeof(rvcn_dec_message_avc_t);
2878 break;
2879 case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
2880 size += sizeof(rvcn_dec_message_hevc_t);
2881 break;
2882 case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
2883 size += sizeof(rvcn_dec_message_av1_t);
2884 break;
2885 default:
2886 unreachable("unsupported codec.");
2887 }
2888
2889 radv_vid_buffer_upload_alloc(cmd_buffer, FB_BUFFER_SIZE, &fb_offset, &fb_ptr);
2890 fb_bo = cmd_buffer->upload.upload_bo;
2891 if (have_it(vid)) {
2892 radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr);
2893 it_probs_bo = cmd_buffer->upload.upload_bo;
2894 } else if (have_probs(vid)) {
2895 radv_vid_buffer_upload_alloc(cmd_buffer, sizeof(rvcn_dec_av1_segment_fg_t), &it_probs_offset, &it_probs_ptr);
2896 it_probs_bo = cmd_buffer->upload.upload_bo;
2897 }
2898
2899 radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2900 msg_bo = cmd_buffer->upload.upload_bo;
2901
2902 if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED)
2903 radv_vcn_sq_start(cmd_buffer);
2904
2905 uint32_t slice_offset;
2906 rvcn_dec_message_decode(cmd_buffer, vid, params, ptr, it_probs_ptr, &slice_offset, frame_info);
2907 rvcn_dec_message_feedback(fb_ptr);
2908 send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2909 send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
2910
2911 if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
2912 struct radv_image_view *dpb_iv =
2913 radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2914 struct radv_image *dpb = dpb_iv->image;
2915 send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
2916 }
2917
2918 if (vid->ctx.mem)
2919 send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
2920
2921 send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
2922 src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
2923
2924 struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2925 struct radv_image *img = dst_iv->image;
2926 send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
2927 send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
2928 if (have_it(vid))
2929 send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
2930 else if (have_probs(vid))
2931 send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset);
2932
2933 if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
2934 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
2935 set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
2936 } else
2937 radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
2938 }
2939
2940 VKAPI_ATTR void VKAPI_CALL
radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer,const VkVideoDecodeInfoKHR * frame_info)2941 radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info)
2942 {
2943 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2944
2945 if (radv_has_uvd(cmd_buffer->device->physical_device))
2946 radv_uvd_decode_video(cmd_buffer, frame_info);
2947 else
2948 radv_vcn_decode_video(cmd_buffer, frame_info);
2949 }
2950
2951 void
radv_video_get_profile_alignments(struct radv_physical_device * pdevice,const VkVideoProfileListInfoKHR * profile_list,uint32_t * width_align_out,uint32_t * height_align_out)2952 radv_video_get_profile_alignments(struct radv_physical_device *pdevice, const VkVideoProfileListInfoKHR *profile_list,
2953 uint32_t *width_align_out, uint32_t *height_align_out)
2954 {
2955 vk_video_get_profile_alignments(profile_list, width_align_out, height_align_out);
2956 bool is_h265_main_10 = false;
2957 for (unsigned i = 0; i < profile_list->profileCount; i++) {
2958 if (profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
2959 const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
2960 vk_find_struct_const(profile_list->pProfiles[i].pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
2961 if (h265_profile->stdProfileIdc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
2962 is_h265_main_10 = true;
2963 }
2964 }
2965
2966 uint32_t db_alignment = radv_video_get_db_alignment(pdevice, 64, is_h265_main_10);
2967 *width_align_out = MAX2(*width_align_out, db_alignment);
2968 *height_align_out = MAX2(*height_align_out, db_alignment);
2969 }
2970