• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  * Copyright 2021 Red Hat Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 #include "radv_private.h"
29 
30 #ifndef _WIN32
31 #include "drm-uapi/amdgpu_drm.h"
32 #endif
33 
34 #include "util/vl_zscan_data.h"
35 #include "vk_video/vulkan_video_codecs_common.h"
36 #include "ac_uvd_dec.h"
37 #include "ac_vcn_av1_default.h"
38 #include "ac_vcn_dec.h"
39 
40 #include "radv_cs.h"
41 #include "radv_debug.h"
42 
43 #define NUM_H264_REFS                17
44 #define NUM_H265_REFS                8
45 #define FB_BUFFER_OFFSET             0x1000
46 #define FB_BUFFER_SIZE               2048
47 #define FB_BUFFER_SIZE_TONGA         (2048 * 64)
48 #define IT_SCALING_TABLE_SIZE        992
49 #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
50 
51 /* Not 100% sure this isn't too much but works */
52 #define VID_DEFAULT_ALIGNMENT 256
53 
54 static bool
radv_enable_tier2(struct radv_physical_device * pdevice)55 radv_enable_tier2(struct radv_physical_device *pdevice)
56 {
57    if (pdevice->rad_info.vcn_ip_version >= VCN_3_0_0 && !(pdevice->instance->debug_flags & RADV_DEBUG_VIDEO_ARRAY_PATH))
58       return true;
59    return false;
60 }
61 
62 static uint32_t
radv_video_get_db_alignment(struct radv_physical_device * pdevice,int width,bool is_h265_main_10_or_av1)63 radv_video_get_db_alignment(struct radv_physical_device *pdevice, int width, bool is_h265_main_10_or_av1)
64 {
65    if (pdevice->rad_info.vcn_ip_version >= VCN_2_0_0 && width > 32 && is_h265_main_10_or_av1)
66       return 64;
67    return 32;
68 }
69 
70 static bool
radv_vid_buffer_upload_alloc(struct radv_cmd_buffer * cmd_buffer,unsigned size,unsigned * out_offset,void ** ptr)71 radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr)
72 {
73    return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, VID_DEFAULT_ALIGNMENT, out_offset, ptr);
74 }
75 
76 /* vcn unified queue (sq) ib header */
77 static void
radv_vcn_sq_header(struct radeon_cmdbuf * cs,struct rvcn_sq_var * sq,bool enc)78 radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, bool enc)
79 {
80    /* vcn ib signature */
81    radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE);
82    radeon_emit(cs, RADEON_VCN_SIGNATURE);
83    sq->ib_checksum = &cs->buf[cs->cdw];
84    radeon_emit(cs, 0);
85    sq->ib_total_size_in_dw = &cs->buf[cs->cdw];
86    radeon_emit(cs, 0);
87 
88    /* vcn ib engine info */
89    radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE);
90    radeon_emit(cs, RADEON_VCN_ENGINE_INFO);
91    radeon_emit(cs, enc ? RADEON_VCN_ENGINE_TYPE_ENCODE : RADEON_VCN_ENGINE_TYPE_DECODE);
92    radeon_emit(cs, 0);
93 }
94 
95 static void
radv_vcn_sq_tail(struct radeon_cmdbuf * cs,struct rvcn_sq_var * sq)96 radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq)
97 {
98    uint32_t *end;
99    uint32_t size_in_dw;
100    uint32_t checksum = 0;
101 
102    if (sq->ib_checksum == NULL || sq->ib_total_size_in_dw == NULL)
103       return;
104 
105    end = &cs->buf[cs->cdw];
106    size_in_dw = end - sq->ib_total_size_in_dw - 1;
107    *sq->ib_total_size_in_dw = size_in_dw;
108    *(sq->ib_total_size_in_dw + 4) = size_in_dw * sizeof(uint32_t);
109 
110    for (int i = 0; i < size_in_dw; i++)
111       checksum += *(sq->ib_checksum + 2 + i);
112 
113    *sq->ib_checksum = checksum;
114 }
115 
116 static void
radv_vcn_sq_start(struct radv_cmd_buffer * cmd_buffer)117 radv_vcn_sq_start(struct radv_cmd_buffer *cmd_buffer)
118 {
119    radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 256);
120    radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, false);
121    rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
122    ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
123    cmd_buffer->cs->cdw++;
124    ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
125    cmd_buffer->cs->cdw++;
126    cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
127    cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
128    memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
129 }
130 
131 /* generate an stream handle */
132 static unsigned
radv_vid_alloc_stream_handle(struct radv_physical_device * pdevice)133 radv_vid_alloc_stream_handle(struct radv_physical_device *pdevice)
134 {
135    unsigned stream_handle = pdevice->stream_handle_base;
136 
137    stream_handle ^= ++pdevice->stream_handle_counter;
138    return stream_handle;
139 }
140 
141 static void
init_uvd_decoder(struct radv_physical_device * pdevice)142 init_uvd_decoder(struct radv_physical_device *pdevice)
143 {
144    if (pdevice->rad_info.family >= CHIP_VEGA10) {
145       pdevice->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
146       pdevice->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
147       pdevice->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
148       pdevice->vid_dec_reg.cntl = RUVD_ENGINE_CNTL_SOC15;
149    } else {
150       pdevice->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0;
151       pdevice->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1;
152       pdevice->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD;
153       pdevice->vid_dec_reg.cntl = RUVD_ENGINE_CNTL;
154    }
155 }
156 
157 static void
init_vcn_decoder(struct radv_physical_device * pdevice)158 init_vcn_decoder(struct radv_physical_device *pdevice)
159 {
160    switch (pdevice->rad_info.vcn_ip_version) {
161    case VCN_1_0_0:
162    case VCN_1_0_1:
163       pdevice->vid_dec_reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0;
164       pdevice->vid_dec_reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1;
165       pdevice->vid_dec_reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD;
166       pdevice->vid_dec_reg.cntl = RDECODE_VCN1_ENGINE_CNTL;
167       break;
168    case VCN_2_0_0:
169    case VCN_2_0_2:
170    case VCN_2_0_3:
171    case VCN_2_2_0:
172       pdevice->vid_dec_reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0;
173       pdevice->vid_dec_reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1;
174       pdevice->vid_dec_reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD;
175       pdevice->vid_dec_reg.cntl = RDECODE_VCN2_ENGINE_CNTL;
176       break;
177    case VCN_2_5_0:
178    case VCN_2_6_0:
179    case VCN_3_0_0:
180    case VCN_3_0_16:
181    case VCN_3_0_33:
182    case VCN_3_1_1:
183    case VCN_3_1_2:
184       pdevice->vid_dec_reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0;
185       pdevice->vid_dec_reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1;
186       pdevice->vid_dec_reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
187       pdevice->vid_dec_reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
188       break;
189    case VCN_4_0_3:
190       pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
191       pdevice->av1_version = RDECODE_AV1_VER_1;
192       break;
193    case VCN_4_0_0:
194    case VCN_4_0_2:
195    case VCN_4_0_4:
196    case VCN_4_0_5:
197       pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
198       pdevice->av1_version = RDECODE_AV1_VER_1;
199       break;
200    default:
201       break;
202    }
203 }
204 
205 void
radv_init_physical_device_decoder(struct radv_physical_device * pdevice)206 radv_init_physical_device_decoder(struct radv_physical_device *pdevice)
207 {
208    if (pdevice->rad_info.vcn_ip_version >= VCN_4_0_0)
209       pdevice->vid_decode_ip = AMD_IP_VCN_UNIFIED;
210    else if (radv_has_uvd(pdevice))
211       pdevice->vid_decode_ip = AMD_IP_UVD;
212    else
213       pdevice->vid_decode_ip = AMD_IP_VCN_DEC;
214    pdevice->av1_version = RDECODE_AV1_VER_0;
215 
216    pdevice->stream_handle_counter = 0;
217    pdevice->stream_handle_base = 0;
218 
219    pdevice->stream_handle_base = util_bitreverse(getpid());
220 
221    pdevice->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_LINEAR;
222 
223    if (radv_has_uvd(pdevice))
224       init_uvd_decoder(pdevice);
225    else
226       init_vcn_decoder(pdevice);
227 }
228 
229 static bool
have_it(struct radv_video_session * vid)230 have_it(struct radv_video_session *vid)
231 {
232    return vid->stream_type == RDECODE_CODEC_H264_PERF || vid->stream_type == RDECODE_CODEC_H265;
233 }
234 
235 static bool
have_probs(struct radv_video_session * vid)236 have_probs(struct radv_video_session *vid)
237 {
238    return vid->stream_type == RDECODE_CODEC_AV1;
239 }
240 
241 static unsigned
calc_ctx_size_h264_perf(struct radv_video_session * vid)242 calc_ctx_size_h264_perf(struct radv_video_session *vid)
243 {
244    unsigned width_in_mb, height_in_mb, ctx_size;
245    unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
246    unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
247 
248    unsigned max_references = vid->vk.max_dpb_slots + 1;
249 
250    /* picture width & height in 16 pixel units */
251    width_in_mb = width / VL_MACROBLOCK_WIDTH;
252    height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
253 
254    ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
255 
256    return ctx_size;
257 }
258 
259 static unsigned
calc_ctx_size_h265_main(struct radv_video_session * vid)260 calc_ctx_size_h265_main(struct radv_video_session *vid)
261 {
262    unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
263    unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
264 
265    unsigned max_references = vid->vk.max_dpb_slots + 1;
266 
267    if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
268       max_references = MAX2(max_references, 8);
269    else
270       max_references = MAX2(max_references, 17);
271 
272    width = align(width, 16);
273    height = align(height, 16);
274    return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
275 }
276 
277 static unsigned
calc_ctx_size_h265_main10(struct radv_video_session * vid)278 calc_ctx_size_h265_main10(struct radv_video_session *vid)
279 {
280    unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
281    unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
282    unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
283 
284    unsigned width = align(vid->vk.max_coded.width, VL_MACROBLOCK_WIDTH);
285    unsigned height = align(vid->vk.max_coded.height, VL_MACROBLOCK_HEIGHT);
286    unsigned coeff_10bit = 2;
287 
288    unsigned max_references = vid->vk.max_dpb_slots + 1;
289 
290    if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
291       max_references = MAX2(max_references, 8);
292    else
293       max_references = MAX2(max_references, 17);
294 
295    /* 64x64 is the maximum ctb size. */
296    log2_ctb_size = 6;
297 
298    width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
299    height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
300 
301    num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
302    context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
303    max_mb_address = (unsigned)ceil(height * 8 / 2048.0);
304 
305    cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
306    db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
307 
308    return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
309 }
310 
311 static unsigned
calc_ctx_size_av1(struct radv_device * device,struct radv_video_session * vid)312 calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid)
313 {
314    struct radv_physical_device *pdev = device->physical_device;
315    unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0
316                                  ? align(sizeof(rvcn_av1_frame_context_t), 2048)
317                                  : align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048);
318    unsigned ctx_size = (9 + 4) * frame_ctxt_size + 9 * 64 * 34 * 512 + 9 * 64 * 34 * 256 * 5;
319 
320    int num_64x64_CTB_8k = 68;
321    int num_128x128_CTB_8k = 34;
322    int sdb_pitch_64x64 = align(32 * num_64x64_CTB_8k, 256) * 2;
323    int sdb_pitch_128x128 = align(32 * num_128x128_CTB_8k, 256) * 2;
324    int sdb_lf_size_ctb_64x64 = sdb_pitch_64x64 * (align(1728, 64) / 64);
325    int sdb_lf_size_ctb_128x128 = sdb_pitch_128x128 * (align(3008, 64) / 64);
326    int sdb_superres_size_ctb_64x64 = sdb_pitch_64x64 * (align(3232, 64) / 64);
327    int sdb_superres_size_ctb_128x128 = sdb_pitch_128x128 * (align(6208, 64) / 64);
328    int sdb_output_size_ctb_64x64 = sdb_pitch_64x64 * (align(1312, 64) / 64);
329    int sdb_output_size_ctb_128x128 = sdb_pitch_128x128 * (align(2336, 64) / 64);
330    int sdb_fg_avg_luma_size_ctb_64x64 = sdb_pitch_64x64 * (align(384, 64) / 64);
331    int sdb_fg_avg_luma_size_ctb_128x128 = sdb_pitch_128x128 * (align(640, 64) / 64);
332 
333    ctx_size += (MAX2(sdb_lf_size_ctb_64x64, sdb_lf_size_ctb_128x128) +
334                 MAX2(sdb_superres_size_ctb_64x64, sdb_superres_size_ctb_128x128) +
335                 MAX2(sdb_output_size_ctb_64x64, sdb_output_size_ctb_128x128) +
336                 MAX2(sdb_fg_avg_luma_size_ctb_64x64, sdb_fg_avg_luma_size_ctb_128x128)) *
337                   2 +
338                68 * 512;
339 
340    return ctx_size;
341 }
342 
343 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateVideoSessionKHR(VkDevice _device,const VkVideoSessionCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkVideoSessionKHR * pVideoSession)344 radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *pCreateInfo,
345                            const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession)
346 {
347    RADV_FROM_HANDLE(radv_device, device, _device);
348 
349    struct radv_video_session *vid =
350       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
351    if (!vid)
352       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
353 
354    memset(vid, 0, sizeof(struct radv_video_session));
355 
356    VkResult result = vk_video_session_init(&device->vk, &vid->vk, pCreateInfo);
357    if (result != VK_SUCCESS) {
358       vk_free2(&device->vk.alloc, pAllocator, vid);
359       return result;
360    }
361 
362    vid->interlaced = false;
363    vid->dpb_type = DPB_MAX_RES;
364 
365    switch (vid->vk.op) {
366    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
367       vid->stream_type = RDECODE_CODEC_H264_PERF;
368       if (radv_enable_tier2(device->physical_device))
369          vid->dpb_type = DPB_DYNAMIC_TIER_2;
370       break;
371    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
372       vid->stream_type = RDECODE_CODEC_H265;
373       if (radv_enable_tier2(device->physical_device))
374          vid->dpb_type = DPB_DYNAMIC_TIER_2;
375       break;
376    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
377       vid->stream_type = RDECODE_CODEC_AV1;
378       vid->dpb_type = DPB_DYNAMIC_TIER_2;
379       break;
380    default:
381       return VK_ERROR_FEATURE_NOT_PRESENT;
382    }
383 
384    vid->stream_handle = radv_vid_alloc_stream_handle(device->physical_device);
385    vid->dbg_frame_cnt = 0;
386    vid->db_alignment = radv_video_get_db_alignment(
387       device->physical_device, vid->vk.max_coded.width,
388       (vid->stream_type == RDECODE_CODEC_AV1 ||
389        (vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)));
390 
391    *pVideoSession = radv_video_session_to_handle(vid);
392    return VK_SUCCESS;
393 }
394 
395 VKAPI_ATTR void VKAPI_CALL
radv_DestroyVideoSessionKHR(VkDevice _device,VkVideoSessionKHR _session,const VkAllocationCallbacks * pAllocator)396 radv_DestroyVideoSessionKHR(VkDevice _device, VkVideoSessionKHR _session, const VkAllocationCallbacks *pAllocator)
397 {
398    RADV_FROM_HANDLE(radv_device, device, _device);
399    RADV_FROM_HANDLE(radv_video_session, vid, _session);
400    if (!_session)
401       return;
402 
403    vk_object_base_finish(&vid->vk.base);
404    vk_free2(&device->vk.alloc, pAllocator, vid);
405 }
406 
407 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateVideoSessionParametersKHR(VkDevice _device,const VkVideoSessionParametersCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkVideoSessionParametersKHR * pVideoSessionParameters)408 radv_CreateVideoSessionParametersKHR(VkDevice _device, const VkVideoSessionParametersCreateInfoKHR *pCreateInfo,
409                                      const VkAllocationCallbacks *pAllocator,
410                                      VkVideoSessionParametersKHR *pVideoSessionParameters)
411 {
412    RADV_FROM_HANDLE(radv_device, device, _device);
413    RADV_FROM_HANDLE(radv_video_session, vid, pCreateInfo->videoSession);
414    RADV_FROM_HANDLE(radv_video_session_params, templ, pCreateInfo->videoSessionParametersTemplate);
415    struct radv_video_session_params *params =
416       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*params), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
417    if (!params)
418       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
419 
420    VkResult result =
421       vk_video_session_parameters_init(&device->vk, &params->vk, &vid->vk, templ ? &templ->vk : NULL, pCreateInfo);
422    if (result != VK_SUCCESS) {
423       vk_free2(&device->vk.alloc, pAllocator, params);
424       return result;
425    }
426 
427    *pVideoSessionParameters = radv_video_session_params_to_handle(params);
428    return VK_SUCCESS;
429 }
430 
431 VKAPI_ATTR void VKAPI_CALL
radv_DestroyVideoSessionParametersKHR(VkDevice _device,VkVideoSessionParametersKHR _params,const VkAllocationCallbacks * pAllocator)432 radv_DestroyVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR _params,
433                                       const VkAllocationCallbacks *pAllocator)
434 {
435    RADV_FROM_HANDLE(radv_device, device, _device);
436    RADV_FROM_HANDLE(radv_video_session_params, params, _params);
437 
438    vk_video_session_parameters_finish(&device->vk, &params->vk);
439    vk_free2(&device->vk.alloc, pAllocator, params);
440 }
441 
442 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice,const VkVideoProfileInfoKHR * pVideoProfile,VkVideoCapabilitiesKHR * pCapabilities)443 radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, const VkVideoProfileInfoKHR *pVideoProfile,
444                                            VkVideoCapabilitiesKHR *pCapabilities)
445 {
446    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
447    const struct video_codec_cap *cap = NULL;
448 
449    switch (pVideoProfile->videoCodecOperation) {
450 #ifndef _WIN32
451    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
452       cap = &pdevice->rad_info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC];
453       break;
454    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
455       cap = &pdevice->rad_info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC];
456       break;
457    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
458       cap = &pdevice->rad_info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1];
459       break;
460 #endif
461    default:
462       unreachable("unsupported operation");
463    }
464 
465    if (cap && !cap->valid)
466       cap = NULL;
467 
468    pCapabilities->flags = 0;
469    pCapabilities->minBitstreamBufferOffsetAlignment = 128;
470    pCapabilities->minBitstreamBufferSizeAlignment = 128;
471    pCapabilities->pictureAccessGranularity.width = VL_MACROBLOCK_WIDTH;
472    pCapabilities->pictureAccessGranularity.height = VL_MACROBLOCK_HEIGHT;
473    pCapabilities->minCodedExtent.width = VL_MACROBLOCK_WIDTH;
474    pCapabilities->minCodedExtent.height = VL_MACROBLOCK_HEIGHT;
475 
476    struct VkVideoDecodeCapabilitiesKHR *dec_caps =
477       (struct VkVideoDecodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR);
478    if (dec_caps)
479       dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR;
480 
481    switch (pVideoProfile->videoCodecOperation) {
482    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
483       /* H264 allows different luma and chroma bit depths */
484       if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
485          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
486 
487       struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)vk_find_struct(
488          pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR);
489 
490       const struct VkVideoDecodeH264ProfileInfoKHR *h264_profile =
491          vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H264_PROFILE_INFO_KHR);
492 
493       if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE &&
494           h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN &&
495           h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_HIGH)
496          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
497 
498       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR)
499          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
500 
501       pCapabilities->maxDpbSlots = NUM_H264_REFS;
502       pCapabilities->maxActiveReferencePictures = NUM_H264_REFS;
503 
504       /* for h264 on navi21+ separate dpb images should work */
505       if (radv_enable_tier2(pdevice))
506          pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
507       ext->fieldOffsetGranularity.x = 0;
508       ext->fieldOffsetGranularity.y = 0;
509       ext->maxLevelIdc = STD_VIDEO_H264_LEVEL_IDC_5_1;
510       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME);
511       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION;
512       break;
513    }
514    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
515       /* H265 allows different luma and chroma bit depths */
516       if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
517          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
518 
519       struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *)vk_find_struct(
520          pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR);
521 
522       const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
523          vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
524 
525       if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN &&
526           h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10 &&
527           h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_STILL_PICTURE)
528          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
529 
530       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
531           pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
532          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
533 
534       pCapabilities->maxDpbSlots = NUM_H264_REFS;
535       pCapabilities->maxActiveReferencePictures = NUM_H265_REFS;
536       /* for h265 on navi21+ separate dpb images should work */
537       if (radv_enable_tier2(pdevice))
538          pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
539       ext->maxLevelIdc = STD_VIDEO_H265_LEVEL_IDC_5_1;
540       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME);
541       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION;
542       break;
543    }
544    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: {
545       /* Monochrome sampling implies an undefined chroma bit depth, and is supported in profile MAIN for AV1. */
546       if (pVideoProfile->chromaSubsampling != VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR &&
547           pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
548          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
549       struct VkVideoDecodeAV1CapabilitiesKHR *ext =
550          vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_AV1_CAPABILITIES_KHR);
551       pCapabilities->maxDpbSlots = 9;
552       pCapabilities->maxActiveReferencePictures = STD_VIDEO_AV1_NUM_REF_FRAMES;
553       pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
554       ext->maxLevel = STD_VIDEO_AV1_LEVEL_6_1; /* For VCN3/4, the only h/w currently with AV1 decode support */
555       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME);
556       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION;
557       break;
558    }
559    default:
560       break;
561    }
562 
563    if (cap) {
564       pCapabilities->maxCodedExtent.width = cap->max_width;
565       pCapabilities->maxCodedExtent.height = cap->max_height;
566    } else {
567       switch (pVideoProfile->videoCodecOperation) {
568       case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
569          pCapabilities->maxCodedExtent.width = (pdevice->rad_info.family < CHIP_TONGA) ? 2048 : 4096;
570          pCapabilities->maxCodedExtent.height = (pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096;
571          break;
572       case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
573          pCapabilities->maxCodedExtent.width =
574             (pdevice->rad_info.family < CHIP_RENOIR) ? ((pdevice->rad_info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
575          pCapabilities->maxCodedExtent.height =
576             (pdevice->rad_info.family < CHIP_RENOIR) ? ((pdevice->rad_info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
577          break;
578       default:
579          break;
580       }
581    }
582 
583    return VK_SUCCESS;
584 }
585 
586 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceVideoFormatInfoKHR * pVideoFormatInfo,uint32_t * pVideoFormatPropertyCount,VkVideoFormatPropertiesKHR * pVideoFormatProperties)587 radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,
588                                                const VkPhysicalDeviceVideoFormatInfoKHR *pVideoFormatInfo,
589                                                uint32_t *pVideoFormatPropertyCount,
590                                                VkVideoFormatPropertiesKHR *pVideoFormatProperties)
591 {
592    /* radv requires separate allocates for DPB and decode video. */
593    if ((pVideoFormatInfo->imageUsage &
594         (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) ==
595        (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
596       return VK_ERROR_FORMAT_NOT_SUPPORTED;
597 
598    VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out, pVideoFormatProperties, pVideoFormatPropertyCount);
599 
600    bool need_8bit = true;
601    bool need_10bit = false;
602    const struct VkVideoProfileListInfoKHR *prof_list =
603       (struct VkVideoProfileListInfoKHR *)vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
604    if (prof_list) {
605       for (unsigned i = 0; i < prof_list->profileCount; i++) {
606          const VkVideoProfileInfoKHR *profile = &prof_list->pProfiles[i];
607          if (profile->lumaBitDepth & VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
608             need_10bit = true;
609       }
610    }
611 
612    if (need_10bit) {
613       vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
614       {
615          p->format = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16;
616          p->imageType = VK_IMAGE_TYPE_2D;
617          p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
618          p->imageUsageFlags = pVideoFormatInfo->imageUsage;
619       }
620 
621       if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR))
622          need_8bit = false;
623    }
624 
625    if (need_8bit) {
626       vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
627       {
628          p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
629          p->imageType = VK_IMAGE_TYPE_2D;
630          p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
631          p->imageUsageFlags = pVideoFormatInfo->imageUsage;
632       }
633    }
634 
635    return vk_outarray_status(&out);
636 }
637 
638 #define RADV_BIND_SESSION_CTX 0
639 #define RADV_BIND_DECODER_CTX 1
640 
641 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device,VkVideoSessionKHR videoSession,uint32_t * pMemoryRequirementsCount,VkVideoSessionMemoryRequirementsKHR * pMemoryRequirements)642 radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR videoSession,
643                                           uint32_t *pMemoryRequirementsCount,
644                                           VkVideoSessionMemoryRequirementsKHR *pMemoryRequirements)
645 {
646    RADV_FROM_HANDLE(radv_device, device, _device);
647    RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
648    uint32_t memory_type_bits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
649 
650    VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount);
651    /* 1 buffer for session context */
652    if (device->physical_device->rad_info.family >= CHIP_POLARIS10) {
653       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
654       {
655          m->memoryBindIndex = RADV_BIND_SESSION_CTX;
656          m->memoryRequirements.size = RDECODE_SESSION_CONTEXT_SIZE;
657          m->memoryRequirements.alignment = 0;
658          m->memoryRequirements.memoryTypeBits = memory_type_bits;
659       }
660    }
661 
662    if (vid->stream_type == RDECODE_CODEC_H264_PERF && device->physical_device->rad_info.family >= CHIP_POLARIS10) {
663       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
664       {
665          m->memoryBindIndex = RADV_BIND_DECODER_CTX;
666          m->memoryRequirements.size = align(calc_ctx_size_h264_perf(vid), 4096);
667          m->memoryRequirements.alignment = 0;
668          m->memoryRequirements.memoryTypeBits = memory_type_bits;
669       }
670    }
671    if (vid->stream_type == RDECODE_CODEC_H265) {
672       uint32_t ctx_size;
673 
674       if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
675          ctx_size = calc_ctx_size_h265_main10(vid);
676       else
677          ctx_size = calc_ctx_size_h265_main(vid);
678       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
679       {
680          m->memoryBindIndex = RADV_BIND_DECODER_CTX;
681          m->memoryRequirements.size = align(ctx_size, 4096);
682          m->memoryRequirements.alignment = 0;
683          m->memoryRequirements.memoryTypeBits = memory_type_bits;
684       }
685    }
686    if (vid->stream_type == RDECODE_CODEC_AV1) {
687       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
688       {
689          m->memoryBindIndex = RADV_BIND_DECODER_CTX;
690          m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096);
691          m->memoryRequirements.alignment = 0;
692          m->memoryRequirements.memoryTypeBits = 0;
693          for (unsigned i = 0; i < device->physical_device->memory_properties.memoryTypeCount; i++)
694             if (device->physical_device->memory_properties.memoryTypes[i].propertyFlags &
695                 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
696                m->memoryRequirements.memoryTypeBits |= (1 << i);
697       }
698    }
699    return vk_outarray_status(&out);
700 }
701 
702 VKAPI_ATTR VkResult VKAPI_CALL
radv_UpdateVideoSessionParametersKHR(VkDevice _device,VkVideoSessionParametersKHR videoSessionParameters,const VkVideoSessionParametersUpdateInfoKHR * pUpdateInfo)703 radv_UpdateVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR videoSessionParameters,
704                                      const VkVideoSessionParametersUpdateInfoKHR *pUpdateInfo)
705 {
706    RADV_FROM_HANDLE(radv_video_session_params, params, videoSessionParameters);
707 
708    return vk_video_session_parameters_update(&params->vk, pUpdateInfo);
709 }
710 
711 static void
copy_bind(struct radv_vid_mem * dst,const VkBindVideoSessionMemoryInfoKHR * src)712 copy_bind(struct radv_vid_mem *dst, const VkBindVideoSessionMemoryInfoKHR *src)
713 {
714    dst->mem = radv_device_memory_from_handle(src->memory);
715    dst->offset = src->memoryOffset;
716    dst->size = src->memorySize;
717 }
718 
719 VKAPI_ATTR VkResult VKAPI_CALL
radv_BindVideoSessionMemoryKHR(VkDevice _device,VkVideoSessionKHR videoSession,uint32_t videoSessionBindMemoryCount,const VkBindVideoSessionMemoryInfoKHR * pBindSessionMemoryInfos)720 radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession, uint32_t videoSessionBindMemoryCount,
721                                const VkBindVideoSessionMemoryInfoKHR *pBindSessionMemoryInfos)
722 {
723    RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
724 
725    for (unsigned i = 0; i < videoSessionBindMemoryCount; i++) {
726       switch (pBindSessionMemoryInfos[i].memoryBindIndex) {
727       case RADV_BIND_SESSION_CTX:
728          copy_bind(&vid->sessionctx, &pBindSessionMemoryInfos[i]);
729          break;
730       case RADV_BIND_DECODER_CTX:
731          copy_bind(&vid->ctx, &pBindSessionMemoryInfos[i]);
732          break;
733       default:
734          assert(0);
735          break;
736       }
737    }
738    return VK_SUCCESS;
739 }
740 
741 /* add a new set register command to the IB */
742 static void
set_reg(struct radv_cmd_buffer * cmd_buffer,unsigned reg,uint32_t val)743 set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
744 {
745    struct radeon_cmdbuf *cs = cmd_buffer->cs;
746    radeon_emit(cs, RDECODE_PKT0(reg >> 2, 0));
747    radeon_emit(cs, val);
748 }
749 
750 static void
send_cmd(struct radv_cmd_buffer * cmd_buffer,unsigned cmd,struct radeon_winsys_bo * bo,uint32_t offset)751 send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset)
752 {
753    struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
754    uint64_t addr;
755 
756    radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
757    addr = radv_buffer_get_va(bo);
758    addr += offset;
759 
760    if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
761       radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 6);
762       set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
763       set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
764       set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
765       return;
766    }
767    switch (cmd) {
768    case RDECODE_CMD_MSG_BUFFER:
769       cmd_buffer->video.decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
770       cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32);
771       cmd_buffer->video.decode_buffer->msg_buffer_address_lo = (addr);
772       break;
773    case RDECODE_CMD_DPB_BUFFER:
774       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
775       cmd_buffer->video.decode_buffer->dpb_buffer_address_hi = (addr >> 32);
776       cmd_buffer->video.decode_buffer->dpb_buffer_address_lo = (addr);
777       break;
778    case RDECODE_CMD_DECODING_TARGET_BUFFER:
779       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
780       cmd_buffer->video.decode_buffer->target_buffer_address_hi = (addr >> 32);
781       cmd_buffer->video.decode_buffer->target_buffer_address_lo = (addr);
782       break;
783    case RDECODE_CMD_FEEDBACK_BUFFER:
784       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
785       cmd_buffer->video.decode_buffer->feedback_buffer_address_hi = (addr >> 32);
786       cmd_buffer->video.decode_buffer->feedback_buffer_address_lo = (addr);
787       break;
788    case RDECODE_CMD_PROB_TBL_BUFFER:
789       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
790       cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
791       cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_lo = (addr);
792       break;
793    case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
794       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
795       cmd_buffer->video.decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
796       cmd_buffer->video.decode_buffer->session_contex_buffer_address_lo = (addr);
797       break;
798    case RDECODE_CMD_BITSTREAM_BUFFER:
799       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
800       cmd_buffer->video.decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
801       cmd_buffer->video.decode_buffer->bitstream_buffer_address_lo = (addr);
802       break;
803    case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
804       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
805       cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
806       cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_lo = (addr);
807       break;
808    case RDECODE_CMD_CONTEXT_BUFFER:
809       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
810       cmd_buffer->video.decode_buffer->context_buffer_address_hi = (addr >> 32);
811       cmd_buffer->video.decode_buffer->context_buffer_address_lo = (addr);
812       break;
813    default:
814       assert(0);
815    }
816 }
817 
818 static void
rvcn_dec_message_create(struct radv_video_session * vid,void * ptr,uint32_t size)819 rvcn_dec_message_create(struct radv_video_session *vid, void *ptr, uint32_t size)
820 {
821    rvcn_dec_message_header_t *header = ptr;
822    rvcn_dec_message_create_t *create = (void *)((char *)ptr + sizeof(rvcn_dec_message_header_t));
823 
824    memset(ptr, 0, size);
825    header->header_size = sizeof(rvcn_dec_message_header_t);
826    header->total_size = size;
827    header->num_buffers = 1;
828    header->msg_type = RDECODE_MSG_CREATE;
829    header->stream_handle = vid->stream_handle;
830    header->status_report_feedback_number = 0;
831 
832    header->index[0].message_id = RDECODE_MESSAGE_CREATE;
833    header->index[0].offset = sizeof(rvcn_dec_message_header_t);
834    header->index[0].size = sizeof(rvcn_dec_message_create_t);
835    header->index[0].filled = 0;
836 
837    create->stream_type = vid->stream_type;
838    create->session_flags = 0;
839    create->width_in_samples = vid->vk.max_coded.width;
840    create->height_in_samples = vid->vk.max_coded.height;
841 }
842 
843 static void
rvcn_dec_message_feedback(void * ptr)844 rvcn_dec_message_feedback(void *ptr)
845 {
846    rvcn_dec_feedback_header_t *header = (void *)ptr;
847 
848    header->header_size = sizeof(rvcn_dec_feedback_header_t);
849    header->total_size = sizeof(rvcn_dec_feedback_header_t);
850    header->num_buffers = 0;
851 }
852 
853 static const uint8_t h264_levels[] = {10, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52, 60, 61, 62};
854 static uint8_t
get_h264_level(StdVideoH264LevelIdc level)855 get_h264_level(StdVideoH264LevelIdc level)
856 {
857    assert(level <= STD_VIDEO_H264_LEVEL_IDC_6_2);
858    return h264_levels[level];
859 }
860 
861 static void
update_h264_scaling(unsigned char scaling_list_4x4[6][16],unsigned char scaling_list_8x8[2][64],const StdVideoH264ScalingLists * scaling_lists)862 update_h264_scaling(unsigned char scaling_list_4x4[6][16], unsigned char scaling_list_8x8[2][64],
863                     const StdVideoH264ScalingLists *scaling_lists)
864 {
865    for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) {
866       for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS; j++)
867          scaling_list_4x4[i][vl_zscan_normal_16[j]] = scaling_lists->ScalingList4x4[i][j];
868    }
869 
870    for (int i = 0; i < 2; i++) {
871       for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS; j++)
872          scaling_list_8x8[i][vl_zscan_normal[j]] = scaling_lists->ScalingList8x8[i][j];
873    }
874 }
875 
876 static rvcn_dec_message_avc_t
get_h264_msg(struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * slice_offset,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)877 get_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
878              const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
879              uint32_t *height_in_samples, void *it_ptr)
880 {
881    rvcn_dec_message_avc_t result;
882    const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
883       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
884 
885    *slice_offset = h264_pic_info->pSliceOffsets[0];
886 
887    memset(&result, 0, sizeof(result));
888 
889    assert(params->vk.h264_dec.h264_sps_count > 0);
890    const StdVideoH264SequenceParameterSet *sps =
891       vk_video_find_h264_dec_std_sps(&params->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
892    switch (sps->profile_idc) {
893    case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
894       result.profile = RDECODE_H264_PROFILE_BASELINE;
895       break;
896    case STD_VIDEO_H264_PROFILE_IDC_MAIN:
897       result.profile = RDECODE_H264_PROFILE_MAIN;
898       break;
899    case STD_VIDEO_H264_PROFILE_IDC_HIGH:
900       result.profile = RDECODE_H264_PROFILE_HIGH;
901       break;
902    default:
903       fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
904       result.profile = RDECODE_H264_PROFILE_MAIN;
905       break;
906    }
907 
908    *width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
909    *height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
910    if (!sps->flags.frame_mbs_only_flag)
911       *height_in_samples *= 2;
912    result.level = get_h264_level(sps->level_idc);
913 
914    result.sps_info_flags = 0;
915 
916    result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
917    result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
918    result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
919    result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
920    if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
921       result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
922 
923    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
924    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
925    result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
926    result.pic_order_cnt_type = sps->pic_order_cnt_type;
927    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
928 
929    result.chroma_format = sps->chroma_format_idc;
930 
931    const StdVideoH264PictureParameterSet *pps =
932       vk_video_find_h264_dec_std_pps(&params->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
933    result.pps_info_flags = 0;
934    result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
935    result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
936    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
937    result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
938    result.pps_info_flags |= pps->weighted_bipred_idc << 4;
939    result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
940    result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
941    result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
942 
943    result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
944    result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
945    result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
946 
947    StdVideoH264ScalingLists scaling_lists;
948    vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
949    update_h264_scaling(result.scaling_list_4x4, result.scaling_list_8x8, &scaling_lists);
950 
951    memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
952    memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
953    memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
954 
955    result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
956    result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
957 
958    result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
959    result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
960 
961    result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
962 
963    result.num_ref_frames = sps->max_num_ref_frames;
964    result.non_existing_frame_flags = 0;
965    result.used_for_reference_flags = 0;
966 
967    memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
968    memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
969    for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
970       int idx = frame_info->pReferenceSlots[i].slotIndex;
971       const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
972          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
973 
974       result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
975       result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
976       result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
977 
978       result.ref_frame_list[i] = idx;
979 
980       if (dpb_slot->pStdReferenceInfo->flags.top_field_flag)
981          result.used_for_reference_flags |= (1 << (2 * i));
982       if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
983          result.used_for_reference_flags |= (1 << (2 * i + 1));
984 
985       if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag && !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
986          result.used_for_reference_flags |= (3 << (2 * i));
987 
988       if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
989          result.ref_frame_list[i] |= 0x80;
990       if (dpb_slot->pStdReferenceInfo->flags.is_non_existing)
991          result.non_existing_frame_flags |= 1 << i;
992    }
993    result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
994    result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
995 
996    return result;
997 }
998 
999 static void
update_h265_scaling(void * it_ptr,const StdVideoH265ScalingLists * scaling_lists)1000 update_h265_scaling(void *it_ptr, const StdVideoH265ScalingLists *scaling_lists)
1001 {
1002    if (scaling_lists) {
1003       memcpy(it_ptr, scaling_lists->ScalingList4x4,
1004              STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
1005       memcpy((char *)it_ptr + 96, scaling_lists->ScalingList8x8,
1006              STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
1007       memcpy((char *)it_ptr + 480, scaling_lists->ScalingList16x16,
1008              STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
1009       memcpy((char *)it_ptr + 864, scaling_lists->ScalingList32x32,
1010              STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
1011    } else {
1012       memset(it_ptr, 0, STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
1013       memset((char *)it_ptr + 96, 0,
1014              STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
1015       memset((char *)it_ptr + 480, 0,
1016              STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
1017       memset((char *)it_ptr + 864, 0,
1018              STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
1019    }
1020 }
1021 
1022 static rvcn_dec_message_hevc_t
get_h265_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,void * it_ptr)1023 get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
1024              const struct VkVideoDecodeInfoKHR *frame_info, void *it_ptr)
1025 {
1026    rvcn_dec_message_hevc_t result;
1027    int i, j;
1028    const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
1029       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
1030    memset(&result, 0, sizeof(result));
1031 
1032    const StdVideoH265SequenceParameterSet *sps =
1033       vk_video_find_h265_dec_std_sps(&params->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
1034    const StdVideoH265PictureParameterSet *pps =
1035       vk_video_find_h265_dec_std_pps(&params->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
1036 
1037    result.sps_info_flags = 0;
1038    result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
1039    result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
1040    result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
1041    result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
1042    result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
1043    result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
1044    result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
1045    result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
1046    result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
1047 
1048    if (device->physical_device->rad_info.family == CHIP_CARRIZO)
1049       result.sps_info_flags |= 1 << 9;
1050 
1051    if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) {
1052       result.sps_info_flags |= 1 << 11;
1053    }
1054    result.st_rps_bits = h265_pic_info->pStdPictureInfo->NumBitsForSTRefPicSetInSlice;
1055 
1056    result.chroma_format = sps->chroma_format_idc;
1057    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
1058    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
1059    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
1060    result.sps_max_dec_pic_buffering_minus1 =
1061       sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
1062    result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
1063    result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
1064    result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
1065    result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
1066    result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
1067    result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
1068    if (sps->flags.pcm_enabled_flag) {
1069       result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
1070       result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
1071       result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
1072       result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
1073    }
1074    result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
1075 
1076    result.pps_info_flags = 0;
1077    result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
1078    result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
1079    result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
1080    result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
1081    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
1082    result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
1083    result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
1084    result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
1085    result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
1086    result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
1087    result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
1088    result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
1089    result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
1090    result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
1091    result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
1092    result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
1093    result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
1094    result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
1095    result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
1096    result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
1097 
1098    result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
1099    result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
1100    result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
1101    result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
1102    result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
1103    result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
1104    result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
1105    result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
1106    result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
1107    result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
1108    result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
1109    result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
1110    result.init_qp_minus26 = pps->init_qp_minus26;
1111 
1112    for (i = 0; i < 19; ++i)
1113       result.column_width_minus1[i] = pps->column_width_minus1[i];
1114 
1115    for (i = 0; i < 21; ++i)
1116       result.row_height_minus1[i] = pps->row_height_minus1[i];
1117 
1118    result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
1119    result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
1120 
1121    uint8_t idxs[16];
1122    memset(result.poc_list, 0, 16 * sizeof(int));
1123    memset(result.ref_pic_list, 0x7f, 16);
1124    memset(idxs, 0xff, 16);
1125    for (i = 0; i < frame_info->referenceSlotCount; i++) {
1126       const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
1127          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
1128       int idx = frame_info->pReferenceSlots[i].slotIndex;
1129       result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
1130       result.ref_pic_list[i] = idx;
1131       idxs[idx] = i;
1132    }
1133    result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
1134 
1135 #define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
1136    for (i = 0; i < 8; ++i)
1137       result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
1138 
1139    for (i = 0; i < 8; ++i)
1140       result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
1141 
1142    for (i = 0; i < 8; ++i)
1143       result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
1144 
1145    const StdVideoH265ScalingLists *scaling_lists = NULL;
1146    if (pps->flags.pps_scaling_list_data_present_flag)
1147       scaling_lists = pps->pScalingLists;
1148    else if (sps->flags.sps_scaling_list_data_present_flag)
1149       scaling_lists = sps->pScalingLists;
1150 
1151    update_h265_scaling(it_ptr, scaling_lists);
1152 
1153    if (scaling_lists) {
1154       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
1155          result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
1156 
1157       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
1158          result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
1159    }
1160 
1161    for (i = 0; i < 2; i++) {
1162       for (j = 0; j < 15; j++)
1163          result.direct_reflist[i][j] = 0xff;
1164    }
1165 
1166    if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
1167       if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
1168          result.p010_mode = 1;
1169          result.msb_mode = 1;
1170       } else {
1171          result.p010_mode = 0;
1172          result.luma_10to8 = 5;
1173          result.chroma_10to8 = 5;
1174          result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
1175          result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
1176       }
1177    }
1178 
1179    return result;
1180 }
1181 
1182 enum {
1183    AV1_RESTORE_NONE = 0,
1184    AV1_RESTORE_WIENER = 1,
1185    AV1_RESTORE_SGRPROJ = 2,
1186    AV1_RESTORE_SWITCHABLE = 3,
1187 };
1188 
1189 #define AV1_SUPERRES_NUM       8
1190 #define AV1_SUPERRES_DENOM_MIN 9
1191 
1192 #define LUMA_BLOCK_SIZE_Y   73
1193 #define LUMA_BLOCK_SIZE_X   82
1194 #define CHROMA_BLOCK_SIZE_Y 38
1195 #define CHROMA_BLOCK_SIZE_X 44
1196 
1197 static int32_t
radv_vcn_av1_film_grain_random_number(unsigned short * seed,int32_t bits)1198 radv_vcn_av1_film_grain_random_number(unsigned short *seed, int32_t bits)
1199 {
1200    unsigned short bit;
1201    unsigned short value = *seed;
1202 
1203    bit = ((value >> 0) ^ (value >> 1) ^ (value >> 3) ^ (value >> 12)) & 1;
1204    value = (value >> 1) | (bit << 15);
1205    *seed = value;
1206 
1207    return (value >> (16 - bits)) & ((1 << bits) - 1);
1208 }
1209 
1210 static void
radv_vcn_av1_film_grain_init_scaling(uint8_t scaling_points[][2],uint8_t num,short scaling_lut[])1211 radv_vcn_av1_film_grain_init_scaling(uint8_t scaling_points[][2], uint8_t num, short scaling_lut[])
1212 {
1213    int32_t i, x, delta_x, delta_y;
1214    int64_t delta;
1215 
1216    if (num == 0)
1217       return;
1218 
1219    for (i = 0; i < scaling_points[0][0]; i++)
1220       scaling_lut[i] = scaling_points[0][1];
1221 
1222    for (i = 0; i < num - 1; i++) {
1223       delta_y = scaling_points[i + 1][1] - scaling_points[i][1];
1224       delta_x = scaling_points[i + 1][0] - scaling_points[i][0];
1225 
1226       delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
1227 
1228       for (x = 0; x < delta_x; x++)
1229          scaling_lut[scaling_points[i][0] + x] = (short)(scaling_points[i][1] + (int32_t)((x * delta + 32768) >> 16));
1230    }
1231 
1232    for (i = scaling_points[num - 1][0]; i < 256; i++)
1233       scaling_lut[i] = scaling_points[num - 1][1];
1234 }
1235 
1236 static void
radv_vcn_av1_init_film_grain_buffer(rvcn_dec_film_grain_params_t * fg_params,rvcn_dec_av1_fg_init_buf_t * fg_buf)1237 radv_vcn_av1_init_film_grain_buffer(rvcn_dec_film_grain_params_t *fg_params, rvcn_dec_av1_fg_init_buf_t *fg_buf)
1238 {
1239    const int32_t luma_block_size_y = LUMA_BLOCK_SIZE_Y;
1240    const int32_t luma_block_size_x = LUMA_BLOCK_SIZE_X;
1241    const int32_t chroma_block_size_y = CHROMA_BLOCK_SIZE_Y;
1242    const int32_t chroma_block_size_x = CHROMA_BLOCK_SIZE_X;
1243    const int32_t gauss_bits = 11;
1244    int32_t filt_luma_grain_block[LUMA_BLOCK_SIZE_Y][LUMA_BLOCK_SIZE_X];
1245    int32_t filt_cb_grain_block[CHROMA_BLOCK_SIZE_Y][CHROMA_BLOCK_SIZE_X];
1246    int32_t filt_cr_grain_block[CHROMA_BLOCK_SIZE_Y][CHROMA_BLOCK_SIZE_X];
1247    int32_t chroma_subsamp_y = 1;
1248    int32_t chroma_subsamp_x = 1;
1249    unsigned short seed = fg_params->random_seed;
1250    int32_t ar_coeff_lag = fg_params->ar_coeff_lag;
1251    int32_t bit_depth = fg_params->bit_depth_minus_8 + 8;
1252    short grain_center = 128 << (bit_depth - 8);
1253    short grain_min = 0 - grain_center;
1254    short grain_max = (256 << (bit_depth - 8)) - 1 - grain_center;
1255    int32_t shift = 12 - bit_depth + fg_params->grain_scale_shift;
1256    short luma_grain_block_tmp[64][80];
1257    short cb_grain_block_tmp[32][40];
1258    short cr_grain_block_tmp[32][40];
1259    short *align_ptr, *align_ptr0, *align_ptr1;
1260    int32_t x, y, g, i, j, c, c0, c1, delta_row, delta_col;
1261    int32_t s, s0, s1, pos, r;
1262 
1263    /* generate luma grain block */
1264    memset(filt_luma_grain_block, 0, sizeof(filt_luma_grain_block));
1265    for (y = 0; y < luma_block_size_y; y++) {
1266       for (x = 0; x < luma_block_size_x; x++) {
1267          g = 0;
1268          if (fg_params->num_y_points > 0) {
1269             r = radv_vcn_av1_film_grain_random_number(&seed, gauss_bits);
1270             g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
1271          }
1272          filt_luma_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
1273       }
1274    }
1275 
1276    for (y = 3; y < luma_block_size_y; y++) {
1277       for (x = 3; x < luma_block_size_x - 3; x++) {
1278          s = 0;
1279          pos = 0;
1280          for (delta_row = -ar_coeff_lag; delta_row <= 0; delta_row++) {
1281             for (delta_col = -ar_coeff_lag; delta_col <= ar_coeff_lag; delta_col++) {
1282                if (delta_row == 0 && delta_col == 0)
1283                   break;
1284                c = fg_params->ar_coeffs_y[pos];
1285                s += filt_luma_grain_block[y + delta_row][x + delta_col] * c;
1286                pos++;
1287             }
1288          }
1289          filt_luma_grain_block[y][x] = AV1_CLAMP(
1290             filt_luma_grain_block[y][x] + ROUND_POWER_OF_TWO(s, fg_params->ar_coeff_shift), grain_min, grain_max);
1291       }
1292    }
1293 
1294    /* generate chroma grain block */
1295    memset(filt_cb_grain_block, 0, sizeof(filt_cb_grain_block));
1296    shift = 12 - bit_depth + fg_params->grain_scale_shift;
1297    seed = fg_params->random_seed ^ 0xb524;
1298    for (y = 0; y < chroma_block_size_y; y++) {
1299       for (x = 0; x < chroma_block_size_x; x++) {
1300          g = 0;
1301          if (fg_params->num_cb_points || fg_params->chroma_scaling_from_luma) {
1302             r = radv_vcn_av1_film_grain_random_number(&seed, gauss_bits);
1303             g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
1304          }
1305          filt_cb_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
1306       }
1307    }
1308 
1309    memset(filt_cr_grain_block, 0, sizeof(filt_cr_grain_block));
1310    seed = fg_params->random_seed ^ 0x49d8;
1311    for (y = 0; y < chroma_block_size_y; y++) {
1312       for (x = 0; x < chroma_block_size_x; x++) {
1313          g = 0;
1314          if (fg_params->num_cr_points || fg_params->chroma_scaling_from_luma) {
1315             r = radv_vcn_av1_film_grain_random_number(&seed, gauss_bits);
1316             g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
1317          }
1318          filt_cr_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
1319       }
1320    }
1321 
1322    for (y = 3; y < chroma_block_size_y; y++) {
1323       for (x = 3; x < chroma_block_size_x - 3; x++) {
1324          s0 = 0, s1 = 0, pos = 0;
1325          for (delta_row = -ar_coeff_lag; delta_row <= 0; delta_row++) {
1326             for (delta_col = -ar_coeff_lag; delta_col <= ar_coeff_lag; delta_col++) {
1327                c0 = fg_params->ar_coeffs_cb[pos];
1328                c1 = fg_params->ar_coeffs_cr[pos];
1329                if (delta_row == 0 && delta_col == 0) {
1330                   if (fg_params->num_y_points > 0) {
1331                      int luma = 0;
1332                      int luma_x = ((x - 3) << chroma_subsamp_x) + 3;
1333                      int luma_y = ((y - 3) << chroma_subsamp_y) + 3;
1334                      for (i = 0; i <= chroma_subsamp_y; i++)
1335                         for (j = 0; j <= chroma_subsamp_x; j++)
1336                            luma += filt_luma_grain_block[luma_y + i][luma_x + j];
1337 
1338                      luma = ROUND_POWER_OF_TWO(luma, chroma_subsamp_x + chroma_subsamp_y);
1339                      s0 += luma * c0;
1340                      s1 += luma * c1;
1341                   }
1342                   break;
1343                }
1344                s0 += filt_cb_grain_block[y + delta_row][x + delta_col] * c0;
1345                s1 += filt_cr_grain_block[y + delta_row][x + delta_col] * c1;
1346                pos++;
1347             }
1348          }
1349          filt_cb_grain_block[y][x] = AV1_CLAMP(
1350             filt_cb_grain_block[y][x] + ROUND_POWER_OF_TWO(s0, fg_params->ar_coeff_shift), grain_min, grain_max);
1351          filt_cr_grain_block[y][x] = AV1_CLAMP(
1352             filt_cr_grain_block[y][x] + ROUND_POWER_OF_TWO(s1, fg_params->ar_coeff_shift), grain_min, grain_max);
1353       }
1354    }
1355 
1356    for (i = 9; i < luma_block_size_y; i++)
1357       for (j = 9; j < luma_block_size_x; j++)
1358          luma_grain_block_tmp[i - 9][j - 9] = filt_luma_grain_block[i][j];
1359 
1360    for (i = 6; i < chroma_block_size_y; i++)
1361       for (j = 6; j < chroma_block_size_x; j++) {
1362          cb_grain_block_tmp[i - 6][j - 6] = filt_cb_grain_block[i][j];
1363          cr_grain_block_tmp[i - 6][j - 6] = filt_cr_grain_block[i][j];
1364       }
1365 
1366    align_ptr = &fg_buf->luma_grain_block[0][0];
1367    for (i = 0; i < 64; i++) {
1368       for (j = 0; j < 80; j++)
1369          *align_ptr++ = luma_grain_block_tmp[i][j];
1370 
1371       if (((i + 1) % 4) == 0)
1372          align_ptr += 64;
1373    }
1374 
1375    align_ptr0 = &fg_buf->cb_grain_block[0][0];
1376    align_ptr1 = &fg_buf->cr_grain_block[0][0];
1377    for (i = 0; i < 32; i++) {
1378       for (j = 0; j < 40; j++) {
1379          *align_ptr0++ = cb_grain_block_tmp[i][j];
1380          *align_ptr1++ = cr_grain_block_tmp[i][j];
1381       }
1382       if (((i + 1) % 8) == 0) {
1383          align_ptr0 += 64;
1384          align_ptr1 += 64;
1385       }
1386    }
1387 
1388    memset(fg_buf->scaling_lut_y, 0, sizeof(fg_buf->scaling_lut_y));
1389    radv_vcn_av1_film_grain_init_scaling(fg_params->scaling_points_y, fg_params->num_y_points, fg_buf->scaling_lut_y);
1390    if (fg_params->chroma_scaling_from_luma) {
1391       memcpy(fg_buf->scaling_lut_cb, fg_buf->scaling_lut_y, sizeof(fg_buf->scaling_lut_y));
1392       memcpy(fg_buf->scaling_lut_cr, fg_buf->scaling_lut_y, sizeof(fg_buf->scaling_lut_y));
1393    } else {
1394       memset(fg_buf->scaling_lut_cb, 0, sizeof(fg_buf->scaling_lut_cb));
1395       memset(fg_buf->scaling_lut_cr, 0, sizeof(fg_buf->scaling_lut_cr));
1396       radv_vcn_av1_film_grain_init_scaling(fg_params->scaling_points_cb, fg_params->num_cb_points,
1397                                            fg_buf->scaling_lut_cb);
1398       radv_vcn_av1_film_grain_init_scaling(fg_params->scaling_points_cr, fg_params->num_cr_points,
1399                                            fg_buf->scaling_lut_cr);
1400    }
1401 }
1402 
1403 static rvcn_dec_message_av1_t
get_av1_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,void * probs_ptr,int * update_reference_slot)1404 get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
1405             const struct VkVideoDecodeInfoKHR *frame_info, void *probs_ptr, int *update_reference_slot)
1406 {
1407    rvcn_dec_message_av1_t result;
1408    unsigned i, j;
1409    const struct VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
1410       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);
1411    const StdVideoDecodeAV1PictureInfo *pi = av1_pic_info->pStdPictureInfo;
1412    const StdVideoAV1SequenceHeader *seq_hdr = &params->vk.av1_dec.seq_hdr.base;
1413    memset(&result, 0, sizeof(result));
1414 
1415    const int intra_only_decoding = vid->vk.max_dpb_slots == 0;
1416    if (intra_only_decoding)
1417       assert(frame_info->pSetupReferenceSlot == NULL);
1418 
1419    *update_reference_slot = !(intra_only_decoding || pi->refresh_frame_flags == 0);
1420 
1421    result.frame_header_flags = (1 /*av1_pic_info->frame_header->flags.show_frame*/
1422                                 << RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_SHIFT) &
1423                                RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_MASK;
1424 
1425    result.frame_header_flags |= (pi->flags.disable_cdf_update << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_SHIFT) &
1426                                 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_MASK;
1427 
1428    result.frame_header_flags |=
1429       ((!pi->flags.disable_frame_end_update_cdf) << RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_SHIFT) &
1430       RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_MASK;
1431 
1432    result.frame_header_flags |=
1433       ((pi->frame_type == STD_VIDEO_AV1_FRAME_TYPE_INTRA_ONLY) << RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_SHIFT) &
1434       RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_MASK;
1435 
1436    result.frame_header_flags |= (pi->flags.allow_intrabc << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_SHIFT) &
1437                                 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_MASK;
1438 
1439    result.frame_header_flags |=
1440       (pi->flags.allow_high_precision_mv << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) &
1441       RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK;
1442 
1443    result.frame_header_flags |=
1444       (seq_hdr->pColorConfig->flags.mono_chrome << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) &
1445       RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK;
1446 
1447    result.frame_header_flags |= (pi->flags.skip_mode_present << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) &
1448                                 RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK;
1449 
1450    result.frame_header_flags |=
1451       (pi->pQuantization->flags.using_qmatrix << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) &
1452       RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK;
1453 
1454    result.frame_header_flags |=
1455       (seq_hdr->flags.enable_filter_intra << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) &
1456       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_MASK;
1457 
1458    result.frame_header_flags |=
1459       (seq_hdr->flags.enable_intra_edge_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_SHIFT) &
1460       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_MASK;
1461 
1462    result.frame_header_flags |=
1463       (seq_hdr->flags.enable_interintra_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_SHIFT) &
1464       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_MASK;
1465 
1466    result.frame_header_flags |=
1467       (seq_hdr->flags.enable_masked_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_SHIFT) &
1468       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_MASK;
1469 
1470    result.frame_header_flags |=
1471       (pi->flags.allow_warped_motion << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_SHIFT) &
1472       RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_MASK;
1473 
1474    result.frame_header_flags |=
1475       (seq_hdr->flags.enable_dual_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_SHIFT) &
1476       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_MASK;
1477 
1478    result.frame_header_flags |=
1479       (seq_hdr->flags.enable_order_hint << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_SHIFT) &
1480       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_MASK;
1481 
1482    result.frame_header_flags |= (seq_hdr->flags.enable_jnt_comp << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_SHIFT) &
1483                                 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_MASK;
1484 
1485    result.frame_header_flags |= (pi->flags.use_ref_frame_mvs << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_SHIFT) &
1486                                 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_MASK;
1487 
1488    result.frame_header_flags |=
1489       (pi->flags.allow_screen_content_tools << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_SHIFT) &
1490       RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_MASK;
1491 
1492    result.frame_header_flags |=
1493       (pi->flags.force_integer_mv << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) &
1494       RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK;
1495 
1496    result.frame_header_flags |=
1497       (pi->pLoopFilter->flags.loop_filter_delta_enabled << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) &
1498       RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK;
1499 
1500    result.frame_header_flags |=
1501       (pi->pLoopFilter->flags.loop_filter_delta_update << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) &
1502       RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK;
1503 
1504    result.frame_header_flags |= (pi->flags.delta_q_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) &
1505                                 RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK;
1506 
1507    result.frame_header_flags |= (pi->flags.delta_lf_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_SHIFT) &
1508                                 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_MASK;
1509 
1510    result.frame_header_flags |= (pi->flags.reduced_tx_set << RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_SHIFT) &
1511                                 RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_MASK;
1512 
1513    result.frame_header_flags |=
1514       (pi->flags.segmentation_enabled << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_SHIFT) &
1515       RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_MASK;
1516 
1517    result.frame_header_flags |=
1518       (pi->flags.segmentation_update_map << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_SHIFT) &
1519       RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_MASK;
1520 
1521    result.frame_header_flags |=
1522       (pi->flags.segmentation_temporal_update << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
1523       RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_MASK;
1524 
1525    result.frame_header_flags |= (pi->flags.delta_lf_multi << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_SHIFT) &
1526                                 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_MASK;
1527 
1528    result.frame_header_flags |=
1529       (pi->flags.is_motion_mode_switchable << RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_SHIFT) &
1530       RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_MASK;
1531 
1532    result.frame_header_flags |= ((!intra_only_decoding ? !(pi->refresh_frame_flags) : 1)
1533                                  << RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_SHIFT) &
1534                                 RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_MASK;
1535 
1536    result.frame_header_flags |=
1537       ((!seq_hdr->flags.enable_ref_frame_mvs) << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_SHIFT) &
1538       RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_MASK;
1539 
1540    result.current_frame_id = pi->current_frame_id;
1541    result.frame_offset = pi->OrderHint;
1542    result.profile = seq_hdr->seq_profile;
1543    result.is_annexb = 0;
1544 
1545    result.frame_type = pi->frame_type;
1546    result.primary_ref_frame = pi->primary_ref_frame;
1547 
1548    const struct VkVideoDecodeAV1DpbSlotInfoKHR *setup_dpb_slot =
1549       intra_only_decoding
1550          ? NULL
1551          : vk_find_struct_const(frame_info->pSetupReferenceSlot->pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR);
1552 
1553    /* The AMD FW interface does not need this information, since it's
1554     * redundant with the information derivable from the current frame header,
1555     * which the FW is parsing and tracking.
1556     */
1557    (void)setup_dpb_slot;
1558    result.curr_pic_idx = intra_only_decoding ? 0 : frame_info->pSetupReferenceSlot->slotIndex;
1559 
1560    result.sb_size = seq_hdr->flags.use_128x128_superblock;
1561    result.interp_filter = pi->interpolation_filter;
1562    for (i = 0; i < 2; ++i)
1563       result.filter_level[i] = pi->pLoopFilter->loop_filter_level[i];
1564    result.filter_level_u = pi->pLoopFilter->loop_filter_level[2];
1565    result.filter_level_v = pi->pLoopFilter->loop_filter_level[3];
1566    result.sharpness_level = pi->pLoopFilter->loop_filter_sharpness;
1567    for (i = 0; i < 8; ++i)
1568       result.ref_deltas[i] = pi->pLoopFilter->loop_filter_ref_deltas[i];
1569    for (i = 0; i < 2; ++i)
1570       result.mode_deltas[i] = pi->pLoopFilter->loop_filter_mode_deltas[i];
1571    result.base_qindex = pi->pQuantization->base_q_idx;
1572    result.y_dc_delta_q = pi->pQuantization->DeltaQYDc;
1573    result.u_dc_delta_q = pi->pQuantization->DeltaQUDc;
1574    result.v_dc_delta_q = pi->pQuantization->DeltaQVDc;
1575    result.u_ac_delta_q = pi->pQuantization->DeltaQUAc;
1576    result.v_ac_delta_q = pi->pQuantization->DeltaQVAc;
1577 
1578    if (pi->pQuantization->flags.using_qmatrix) {
1579       result.qm_y = pi->pQuantization->qm_y | 0xf0;
1580       result.qm_u = pi->pQuantization->qm_u | 0xf0;
1581       result.qm_v = pi->pQuantization->qm_v | 0xf0;
1582    } else {
1583       result.qm_y = 0xff;
1584       result.qm_u = 0xff;
1585       result.qm_v = 0xff;
1586    }
1587    result.delta_q_res = (1 << pi->delta_q_res);
1588    result.delta_lf_res = (1 << pi->delta_lf_res);
1589    result.tile_cols = pi->pTileInfo->TileCols;
1590    result.tile_rows = pi->pTileInfo->TileRows;
1591 
1592    result.tx_mode = pi->TxMode;
1593    result.reference_mode = (pi->flags.reference_select == 1) ? 2 : 0;
1594    result.chroma_format = seq_hdr->pColorConfig->flags.mono_chrome ? 0 : 1;
1595    result.tile_size_bytes = pi->pTileInfo->tile_size_bytes_minus_1;
1596    result.context_update_tile_id = pi->pTileInfo->context_update_tile_id;
1597 
1598    for (i = 0; i < result.tile_cols; i++)
1599       result.tile_col_start_sb[i] = pi->pTileInfo->pMiColStarts[i];
1600    result.tile_col_start_sb[result.tile_cols] =
1601       result.tile_col_start_sb[result.tile_cols - 1] + pi->pTileInfo->pWidthInSbsMinus1[result.tile_cols - 1] + 1;
1602    for (i = 0; i < pi->pTileInfo->TileRows; i++)
1603       result.tile_row_start_sb[i] = pi->pTileInfo->pMiRowStarts[i];
1604    result.tile_row_start_sb[result.tile_rows] =
1605       result.tile_row_start_sb[result.tile_rows - 1] + pi->pTileInfo->pHeightInSbsMinus1[result.tile_rows - 1] + 1;
1606 
1607    result.max_width = seq_hdr->max_frame_width_minus_1 + 1;
1608    result.max_height = seq_hdr->max_frame_height_minus_1 + 1;
1609    VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
1610    result.superres_scale_denominator =
1611       pi->flags.use_superres ? pi->coded_denom + AV1_SUPERRES_DENOM_MIN : AV1_SUPERRES_NUM;
1612    if (pi->flags.use_superres) {
1613       result.width =
1614          (frameExtent.width * 8 + result.superres_scale_denominator / 2) / result.superres_scale_denominator;
1615    } else {
1616       result.width = frameExtent.width;
1617    }
1618    result.height = frameExtent.height;
1619 
1620    result.superres_upscaled_width = frameExtent.width;
1621 
1622    result.order_hint_bits = seq_hdr->order_hint_bits_minus_1 + 1;
1623 
1624    /* The VCN FW will evict references that aren't specified in
1625     * ref_frame_map, even if they are still valid. To prevent this we will
1626     * specify every possible reference in ref_frame_map.
1627     */
1628    uint16_t used_slots = (1 << result.curr_pic_idx);
1629    for (i = 0; i < frame_info->referenceSlotCount; i++) {
1630       const struct VkVideoDecodeAV1DpbSlotInfoKHR *ref_dpb_slot =
1631          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR);
1632       (void)ref_dpb_slot; /* Again, the FW is tracking this information for us, so no need for it. */
1633       (void)ref_dpb_slot; /* the FW is tracking this information for us, so no need for it. */
1634       int32_t slotIndex = frame_info->pReferenceSlots[i].slotIndex;
1635       result.ref_frame_map[i] = slotIndex;
1636       used_slots |= 1 << slotIndex;
1637    }
1638    /* Go through all the slots and fill in the ones that haven't been used. */
1639    for (j = 0; j < STD_VIDEO_AV1_NUM_REF_FRAMES + 1; j++) {
1640       if ((used_slots & (1 << j)) == 0) {
1641          result.ref_frame_map[i] = j;
1642          used_slots |= 1 << j;
1643          i++;
1644       }
1645    }
1646 
1647    assert(used_slots == 0x1ff && i == STD_VIDEO_AV1_NUM_REF_FRAMES);
1648 
1649    for (i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; ++i) {
1650       result.frame_refs[i] =
1651          av1_pic_info->referenceNameSlotIndices[i] == -1 ? 0x7f : av1_pic_info->referenceNameSlotIndices[i];
1652    }
1653 
1654    result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = seq_hdr->pColorConfig->BitDepth - 8;
1655 
1656    int16_t *feature_data = (int16_t *)probs_ptr;
1657    int fd_idx = 0;
1658    for (i = 0; i < 8; ++i) {
1659       result.feature_mask[i] = pi->pSegmentation->FeatureEnabled[i];
1660       for (j = 0; j < 8; ++j) {
1661          result.feature_data[i][j] = pi->pSegmentation->FeatureData[i][j];
1662          feature_data[fd_idx++] = result.feature_data[i][j];
1663       }
1664    }
1665 
1666    memcpy(((char *)probs_ptr + 128), result.feature_mask, 8);
1667    result.cdef_damping = pi->pCDEF->cdef_damping_minus_3 + 3;
1668    result.cdef_bits = pi->pCDEF->cdef_bits;
1669    for (i = 0; i < 8; ++i) {
1670       result.cdef_strengths[i] = (pi->pCDEF->cdef_y_pri_strength[i] << 2) + pi->pCDEF->cdef_y_sec_strength[i];
1671       result.cdef_uv_strengths[i] = (pi->pCDEF->cdef_uv_pri_strength[i] << 2) + pi->pCDEF->cdef_uv_sec_strength[i];
1672    }
1673 
1674    if (pi->flags.UsesLr) {
1675       for (int plane = 0; plane < STD_VIDEO_AV1_MAX_NUM_PLANES; plane++) {
1676          result.frame_restoration_type[plane] = pi->pLoopRestoration->FrameRestorationType[plane];
1677          result.log2_restoration_unit_size_minus5[plane] = pi->pLoopRestoration->LoopRestorationSize[plane];
1678       }
1679    }
1680 
1681    if (seq_hdr->pColorConfig->BitDepth > 8) {
1682       if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 ||
1683           vid->vk.picture_format == VK_FORMAT_G16_B16R16_2PLANE_420_UNORM) {
1684          result.p010_mode = 1;
1685          result.msb_mode = 1;
1686       } else {
1687          result.luma_10to8 = 1;
1688          result.chroma_10to8 = 1;
1689       }
1690    }
1691 
1692    result.preskip_segid = 0;
1693    result.last_active_segid = 0;
1694    for (i = 0; i < 8; i++) {
1695       for (j = 0; j < 8; j++) {
1696          if (result.feature_mask[i] & (1 << j)) {
1697             result.last_active_segid = i;
1698             if (j >= 5)
1699                result.preskip_segid = 1;
1700          }
1701       }
1702    }
1703    result.seg_lossless_flag = 0;
1704    for (i = 0; i < 8; ++i) {
1705       int av1_get_qindex, qindex;
1706       int segfeature_active = result.feature_mask[i] & (1 << 0);
1707       if (segfeature_active) {
1708          int seg_qindex = result.base_qindex + result.feature_data[i][0];
1709          av1_get_qindex = seg_qindex < 0 ? 0 : (seg_qindex > 255 ? 255 : seg_qindex);
1710       } else {
1711          av1_get_qindex = result.base_qindex;
1712       }
1713       qindex = pi->flags.segmentation_enabled ? av1_get_qindex : result.base_qindex;
1714       result.seg_lossless_flag |= (((qindex == 0) && result.y_dc_delta_q == 0 && result.u_dc_delta_q == 0 &&
1715                                     result.v_dc_delta_q == 0 && result.u_ac_delta_q == 0 && result.v_ac_delta_q == 0)
1716                                    << i);
1717    }
1718 
1719    rvcn_dec_film_grain_params_t *fg_params = &result.film_grain;
1720    fg_params->apply_grain = pi->flags.apply_grain;
1721    if (fg_params->apply_grain) {
1722       rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)((char *)probs_ptr + 256);
1723       fg_params->random_seed = pi->pFilmGrain->grain_seed;
1724       fg_params->grain_scale_shift = pi->pFilmGrain->grain_scale_shift;
1725       fg_params->scaling_shift = pi->pFilmGrain->grain_scaling_minus_8 + 8;
1726       fg_params->chroma_scaling_from_luma = pi->pFilmGrain->flags.chroma_scaling_from_luma;
1727       fg_params->num_y_points = pi->pFilmGrain->num_y_points;
1728       fg_params->num_cb_points = pi->pFilmGrain->num_cb_points;
1729       fg_params->num_cr_points = pi->pFilmGrain->num_cr_points;
1730       fg_params->cb_mult = pi->pFilmGrain->cb_mult;
1731       fg_params->cb_luma_mult = pi->pFilmGrain->cb_luma_mult;
1732       fg_params->cb_offset = pi->pFilmGrain->cb_offset;
1733       fg_params->cr_mult = pi->pFilmGrain->cr_mult;
1734       fg_params->cr_luma_mult = pi->pFilmGrain->cr_luma_mult;
1735       fg_params->cr_offset = pi->pFilmGrain->cr_offset;
1736       fg_params->bit_depth_minus_8 = result.bit_depth_luma_minus8;
1737       for (i = 0; i < fg_params->num_y_points; ++i) {
1738          fg_params->scaling_points_y[i][0] = pi->pFilmGrain->point_y_value[i];
1739          fg_params->scaling_points_y[i][1] = pi->pFilmGrain->point_y_scaling[i];
1740       }
1741       for (i = 0; i < fg_params->num_cb_points; ++i) {
1742          fg_params->scaling_points_cb[i][0] = pi->pFilmGrain->point_cb_value[i];
1743          fg_params->scaling_points_cb[i][1] = pi->pFilmGrain->point_cb_scaling[i];
1744       }
1745       for (i = 0; i < fg_params->num_cr_points; ++i) {
1746          fg_params->scaling_points_cr[i][0] = pi->pFilmGrain->point_cr_value[i];
1747          fg_params->scaling_points_cr[i][1] = pi->pFilmGrain->point_cr_scaling[i];
1748       }
1749 
1750       fg_params->ar_coeff_lag = pi->pFilmGrain->ar_coeff_lag;
1751       fg_params->ar_coeff_shift = pi->pFilmGrain->ar_coeff_shift_minus_6 + 6;
1752 
1753       for (i = 0; i < 24; ++i)
1754          fg_params->ar_coeffs_y[i] = pi->pFilmGrain->ar_coeffs_y_plus_128[i] - 128;
1755 
1756       for (i = 0; i < 25; ++i) {
1757          fg_params->ar_coeffs_cb[i] = pi->pFilmGrain->ar_coeffs_cb_plus_128[i] - 128;
1758          fg_params->ar_coeffs_cr[i] = pi->pFilmGrain->ar_coeffs_cr_plus_128[i] - 128;
1759       }
1760 
1761       fg_params->overlap_flag = pi->pFilmGrain->flags.overlap_flag;
1762       fg_params->clip_to_restricted_range = pi->pFilmGrain->flags.clip_to_restricted_range;
1763       radv_vcn_av1_init_film_grain_buffer(fg_params, fg_buf);
1764    }
1765 
1766    result.uncompressed_header_size = 0;
1767    for (i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; ++i) {
1768       result.global_motion[i].wmtype = pi->pGlobalMotion->GmType[i];
1769       for (j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; ++j)
1770          result.global_motion[i].wmmat[j] = pi->pGlobalMotion->gm_params[i][j];
1771    }
1772    for (i = 0; i < av1_pic_info->tileCount && i < 256; ++i) {
1773       result.tile_info[i].offset = av1_pic_info->pTileOffsets[i];
1774       result.tile_info[i].size = av1_pic_info->pTileSizes[i];
1775    }
1776 
1777    return result;
1778 }
1779 
1780 static void
rvcn_av1_init_mode_probs(void * prob)1781 rvcn_av1_init_mode_probs(void *prob)
1782 {
1783    rvcn_av1_frame_context_t *fc = (rvcn_av1_frame_context_t *)prob;
1784    int i;
1785 
1786    memcpy(fc->palette_y_size_cdf, default_palette_y_size_cdf, sizeof(default_palette_y_size_cdf));
1787    memcpy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf, sizeof(default_palette_uv_size_cdf));
1788    memcpy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf, sizeof(default_palette_y_color_index_cdf));
1789    memcpy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf,
1790           sizeof(default_palette_uv_color_index_cdf));
1791    memcpy(fc->kf_y_cdf, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf));
1792    memcpy(fc->angle_delta_cdf, default_angle_delta_cdf, sizeof(default_angle_delta_cdf));
1793    memcpy(fc->comp_inter_cdf, default_comp_inter_cdf, sizeof(default_comp_inter_cdf));
1794    memcpy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf, sizeof(default_comp_ref_type_cdf));
1795    memcpy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf, sizeof(default_uni_comp_ref_cdf));
1796    memcpy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf, sizeof(default_palette_y_mode_cdf));
1797    memcpy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf, sizeof(default_palette_uv_mode_cdf));
1798    memcpy(fc->comp_ref_cdf, default_comp_ref_cdf, sizeof(default_comp_ref_cdf));
1799    memcpy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf, sizeof(default_comp_bwdref_cdf));
1800    memcpy(fc->single_ref_cdf, default_single_ref_cdf, sizeof(default_single_ref_cdf));
1801    memcpy(fc->txfm_partition_cdf, default_txfm_partition_cdf, sizeof(default_txfm_partition_cdf));
1802    memcpy(fc->compound_index_cdf, default_compound_idx_cdfs, sizeof(default_compound_idx_cdfs));
1803    memcpy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs, sizeof(default_comp_group_idx_cdfs));
1804    memcpy(fc->newmv_cdf, default_newmv_cdf, sizeof(default_newmv_cdf));
1805    memcpy(fc->zeromv_cdf, default_zeromv_cdf, sizeof(default_zeromv_cdf));
1806    memcpy(fc->refmv_cdf, default_refmv_cdf, sizeof(default_refmv_cdf));
1807    memcpy(fc->drl_cdf, default_drl_cdf, sizeof(default_drl_cdf));
1808    memcpy(fc->motion_mode_cdf, default_motion_mode_cdf, sizeof(default_motion_mode_cdf));
1809    memcpy(fc->obmc_cdf, default_obmc_cdf, sizeof(default_obmc_cdf));
1810    memcpy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf, sizeof(default_inter_compound_mode_cdf));
1811    memcpy(fc->compound_type_cdf, default_compound_type_cdf, sizeof(default_compound_type_cdf));
1812    memcpy(fc->wedge_idx_cdf, default_wedge_idx_cdf, sizeof(default_wedge_idx_cdf));
1813    memcpy(fc->interintra_cdf, default_interintra_cdf, sizeof(default_interintra_cdf));
1814    memcpy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf, sizeof(default_wedge_interintra_cdf));
1815    memcpy(fc->interintra_mode_cdf, default_interintra_mode_cdf, sizeof(default_interintra_mode_cdf));
1816    memcpy(fc->pred_cdf, default_segment_pred_cdf, sizeof(default_segment_pred_cdf));
1817    memcpy(fc->switchable_restore_cdf, default_switchable_restore_cdf, sizeof(default_switchable_restore_cdf));
1818    memcpy(fc->wiener_restore_cdf, default_wiener_restore_cdf, sizeof(default_wiener_restore_cdf));
1819    memcpy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf, sizeof(default_sgrproj_restore_cdf));
1820    memcpy(fc->y_mode_cdf, default_if_y_mode_cdf, sizeof(default_if_y_mode_cdf));
1821    memcpy(fc->uv_mode_cdf, default_uv_mode_cdf, sizeof(default_uv_mode_cdf));
1822    memcpy(fc->switchable_interp_cdf, default_switchable_interp_cdf, sizeof(default_switchable_interp_cdf));
1823    memcpy(fc->partition_cdf, default_partition_cdf, sizeof(default_partition_cdf));
1824    memcpy(fc->intra_ext_tx_cdf, default_intra_ext_tx_cdf, sizeof(default_intra_ext_tx_cdf));
1825    memcpy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf, sizeof(default_inter_ext_tx_cdf));
1826    memcpy(fc->skip_cdfs, default_skip_cdfs, sizeof(default_skip_cdfs));
1827    memcpy(fc->intra_inter_cdf, default_intra_inter_cdf, sizeof(default_intra_inter_cdf));
1828    memcpy(fc->tree_cdf, default_seg_tree_cdf, sizeof(default_seg_tree_cdf));
1829    for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i)
1830       memcpy(fc->spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i],
1831              sizeof(default_spatial_pred_seg_tree_cdf[i]));
1832    memcpy(fc->tx_size_cdf, default_tx_size_cdf, sizeof(default_tx_size_cdf));
1833    memcpy(fc->delta_q_cdf, default_delta_q_cdf, sizeof(default_delta_q_cdf));
1834    memcpy(fc->skip_mode_cdfs, default_skip_mode_cdfs, sizeof(default_skip_mode_cdfs));
1835    memcpy(fc->delta_lf_cdf, default_delta_lf_cdf, sizeof(default_delta_lf_cdf));
1836    memcpy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf, sizeof(default_delta_lf_multi_cdf));
1837    memcpy(fc->cfl_sign_cdf, default_cfl_sign_cdf, sizeof(default_cfl_sign_cdf));
1838    memcpy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf, sizeof(default_cfl_alpha_cdf));
1839    memcpy(fc->filter_intra_cdfs, default_filter_intra_cdfs, sizeof(default_filter_intra_cdfs));
1840    memcpy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf, sizeof(default_filter_intra_mode_cdf));
1841    memcpy(fc->intrabc_cdf, default_intrabc_cdf, sizeof(default_intrabc_cdf));
1842 }
1843 
1844 static void
rvcn_av1_init_mv_probs(void * prob)1845 rvcn_av1_init_mv_probs(void *prob)
1846 {
1847    rvcn_av1_frame_context_t *fc = (rvcn_av1_frame_context_t *)prob;
1848 
1849    memcpy(fc->nmvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1850    memcpy(fc->nmvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1851    memcpy(fc->nmvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf,
1852           sizeof(default_nmv_context.comps[0].class0_cdf));
1853    memcpy(fc->nmvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf,
1854           sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1855    memcpy(fc->nmvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf,
1856           sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1857    memcpy(fc->nmvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf,
1858           sizeof(default_nmv_context.comps[0].classes_cdf));
1859    memcpy(fc->nmvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1860    memcpy(fc->nmvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1861    memcpy(fc->nmvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1862    memcpy(fc->nmvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1863    memcpy(fc->nmvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf,
1864           sizeof(default_nmv_context.comps[1].class0_cdf));
1865    memcpy(fc->nmvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf,
1866           sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1867    memcpy(fc->nmvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf,
1868           sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1869    memcpy(fc->nmvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf,
1870           sizeof(default_nmv_context.comps[1].classes_cdf));
1871    memcpy(fc->nmvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1872    memcpy(fc->nmvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1873    memcpy(fc->nmvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1874    memcpy(fc->ndvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1875    memcpy(fc->ndvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1876    memcpy(fc->ndvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf,
1877           sizeof(default_nmv_context.comps[0].class0_cdf));
1878    memcpy(fc->ndvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf,
1879           sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1880    memcpy(fc->ndvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf,
1881           sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1882    memcpy(fc->ndvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf,
1883           sizeof(default_nmv_context.comps[0].classes_cdf));
1884    memcpy(fc->ndvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1885    memcpy(fc->ndvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1886    memcpy(fc->ndvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1887    memcpy(fc->ndvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1888    memcpy(fc->ndvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf,
1889           sizeof(default_nmv_context.comps[1].class0_cdf));
1890    memcpy(fc->ndvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf,
1891           sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1892    memcpy(fc->ndvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf,
1893           sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1894    memcpy(fc->ndvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf,
1895           sizeof(default_nmv_context.comps[1].classes_cdf));
1896    memcpy(fc->ndvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1897    memcpy(fc->ndvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1898    memcpy(fc->ndvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1899 }
1900 
1901 static void
rvcn_av1_default_coef_probs(void * prob,int index)1902 rvcn_av1_default_coef_probs(void *prob, int index)
1903 {
1904    rvcn_av1_frame_context_t *fc = (rvcn_av1_frame_context_t *)prob;
1905 
1906    memcpy(fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index], sizeof(av1_default_txb_skip_cdfs[index]));
1907    memcpy(fc->eob_extra_cdf, av1_default_eob_extra_cdfs[index], sizeof(av1_default_eob_extra_cdfs[index]));
1908    memcpy(fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index], sizeof(av1_default_dc_sign_cdfs[index]));
1909    memcpy(fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index], sizeof(av1_default_coeff_lps_multi_cdfs[index]));
1910    memcpy(fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index],
1911           sizeof(av1_default_coeff_base_multi_cdfs[index]));
1912    memcpy(fc->coeff_base_eob_cdf, av1_default_coeff_base_eob_multi_cdfs[index],
1913           sizeof(av1_default_coeff_base_eob_multi_cdfs[index]));
1914    memcpy(fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index], sizeof(av1_default_eob_multi16_cdfs[index]));
1915    memcpy(fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index], sizeof(av1_default_eob_multi32_cdfs[index]));
1916    memcpy(fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index], sizeof(av1_default_eob_multi64_cdfs[index]));
1917    memcpy(fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index], sizeof(av1_default_eob_multi128_cdfs[index]));
1918    memcpy(fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index], sizeof(av1_default_eob_multi256_cdfs[index]));
1919    memcpy(fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index], sizeof(av1_default_eob_multi512_cdfs[index]));
1920    memcpy(fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index], sizeof(av1_default_eob_multi1024_cdfs[index]));
1921 }
1922 
1923 static void
rvcn_vcn4_init_mode_probs(void * prob)1924 rvcn_vcn4_init_mode_probs(void *prob)
1925 {
1926    rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t *)prob;
1927    int i;
1928 
1929    memcpy(fc->palette_y_size_cdf, default_palette_y_size_cdf, sizeof(default_palette_y_size_cdf));
1930    memcpy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf, sizeof(default_palette_uv_size_cdf));
1931    memcpy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf, sizeof(default_palette_y_color_index_cdf));
1932    memcpy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf,
1933           sizeof(default_palette_uv_color_index_cdf));
1934    memcpy(fc->kf_y_cdf, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf));
1935    memcpy(fc->angle_delta_cdf, default_angle_delta_cdf, sizeof(default_angle_delta_cdf));
1936    memcpy(fc->comp_inter_cdf, default_comp_inter_cdf, sizeof(default_comp_inter_cdf));
1937    memcpy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf, sizeof(default_comp_ref_type_cdf));
1938    memcpy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf, sizeof(default_uni_comp_ref_cdf));
1939    memcpy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf, sizeof(default_palette_y_mode_cdf));
1940    memcpy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf, sizeof(default_palette_uv_mode_cdf));
1941    memcpy(fc->comp_ref_cdf, default_comp_ref_cdf, sizeof(default_comp_ref_cdf));
1942    memcpy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf, sizeof(default_comp_bwdref_cdf));
1943    memcpy(fc->single_ref_cdf, default_single_ref_cdf, sizeof(default_single_ref_cdf));
1944    memcpy(fc->txfm_partition_cdf, default_txfm_partition_cdf, sizeof(default_txfm_partition_cdf));
1945    memcpy(fc->compound_index_cdf, default_compound_idx_cdfs, sizeof(default_compound_idx_cdfs));
1946    memcpy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs, sizeof(default_comp_group_idx_cdfs));
1947    memcpy(fc->newmv_cdf, default_newmv_cdf, sizeof(default_newmv_cdf));
1948    memcpy(fc->zeromv_cdf, default_zeromv_cdf, sizeof(default_zeromv_cdf));
1949    memcpy(fc->refmv_cdf, default_refmv_cdf, sizeof(default_refmv_cdf));
1950    memcpy(fc->drl_cdf, default_drl_cdf, sizeof(default_drl_cdf));
1951    memcpy(fc->motion_mode_cdf, default_motion_mode_cdf, sizeof(default_motion_mode_cdf));
1952    memcpy(fc->obmc_cdf, default_obmc_cdf, sizeof(default_obmc_cdf));
1953    memcpy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf, sizeof(default_inter_compound_mode_cdf));
1954    memcpy(fc->compound_type_cdf, default_compound_type_cdf, sizeof(default_compound_type_cdf));
1955    memcpy(fc->wedge_idx_cdf, default_wedge_idx_cdf, sizeof(default_wedge_idx_cdf));
1956    memcpy(fc->interintra_cdf, default_interintra_cdf, sizeof(default_interintra_cdf));
1957    memcpy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf, sizeof(default_wedge_interintra_cdf));
1958    memcpy(fc->interintra_mode_cdf, default_interintra_mode_cdf, sizeof(default_interintra_mode_cdf));
1959    memcpy(fc->pred_cdf, default_segment_pred_cdf, sizeof(default_segment_pred_cdf));
1960    memcpy(fc->switchable_restore_cdf, default_switchable_restore_cdf, sizeof(default_switchable_restore_cdf));
1961    memcpy(fc->wiener_restore_cdf, default_wiener_restore_cdf, sizeof(default_wiener_restore_cdf));
1962    memcpy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf, sizeof(default_sgrproj_restore_cdf));
1963    memcpy(fc->y_mode_cdf, default_if_y_mode_cdf, sizeof(default_if_y_mode_cdf));
1964    memcpy(fc->uv_mode_cdf, default_uv_mode_cdf, sizeof(default_uv_mode_cdf));
1965    memcpy(fc->switchable_interp_cdf, default_switchable_interp_cdf, sizeof(default_switchable_interp_cdf));
1966    memcpy(fc->partition_cdf, default_partition_cdf, sizeof(default_partition_cdf));
1967    memcpy(fc->intra_ext_tx_cdf, &default_intra_ext_tx_cdf[1], sizeof(default_intra_ext_tx_cdf[1]) * 2);
1968    memcpy(fc->inter_ext_tx_cdf, &default_inter_ext_tx_cdf[1], sizeof(default_inter_ext_tx_cdf[1]) * 3);
1969    memcpy(fc->skip_cdfs, default_skip_cdfs, sizeof(default_skip_cdfs));
1970    memcpy(fc->intra_inter_cdf, default_intra_inter_cdf, sizeof(default_intra_inter_cdf));
1971    memcpy(fc->tree_cdf, default_seg_tree_cdf, sizeof(default_seg_tree_cdf));
1972    for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i)
1973       memcpy(fc->spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i],
1974              sizeof(default_spatial_pred_seg_tree_cdf[i]));
1975    memcpy(fc->tx_size_cdf, default_tx_size_cdf, sizeof(default_tx_size_cdf));
1976    memcpy(fc->delta_q_cdf, default_delta_q_cdf, sizeof(default_delta_q_cdf));
1977    memcpy(fc->skip_mode_cdfs, default_skip_mode_cdfs, sizeof(default_skip_mode_cdfs));
1978    memcpy(fc->delta_lf_cdf, default_delta_lf_cdf, sizeof(default_delta_lf_cdf));
1979    memcpy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf, sizeof(default_delta_lf_multi_cdf));
1980    memcpy(fc->cfl_sign_cdf, default_cfl_sign_cdf, sizeof(default_cfl_sign_cdf));
1981    memcpy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf, sizeof(default_cfl_alpha_cdf));
1982    memcpy(fc->filter_intra_cdfs, default_filter_intra_cdfs, sizeof(default_filter_intra_cdfs));
1983    memcpy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf, sizeof(default_filter_intra_mode_cdf));
1984    memcpy(fc->intrabc_cdf, default_intrabc_cdf, sizeof(default_intrabc_cdf));
1985 }
1986 
1987 static void
rvcn_vcn4_av1_init_mv_probs(void * prob)1988 rvcn_vcn4_av1_init_mv_probs(void *prob)
1989 {
1990    rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t *)prob;
1991 
1992    memcpy(fc->nmvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1993    memcpy(fc->nmvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1994    memcpy(fc->nmvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf,
1995           sizeof(default_nmv_context.comps[0].class0_cdf));
1996    memcpy(fc->nmvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf,
1997           sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1998    memcpy(fc->nmvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf,
1999           sizeof(default_nmv_context.comps[0].class0_hp_cdf));
2000    memcpy(fc->nmvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf,
2001           sizeof(default_nmv_context.comps[0].classes_cdf));
2002    memcpy(fc->nmvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
2003    memcpy(fc->nmvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
2004    memcpy(fc->nmvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
2005    memcpy(fc->nmvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
2006    memcpy(fc->nmvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf,
2007           sizeof(default_nmv_context.comps[1].class0_cdf));
2008    memcpy(fc->nmvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf,
2009           sizeof(default_nmv_context.comps[1].class0_fp_cdf));
2010    memcpy(fc->nmvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf,
2011           sizeof(default_nmv_context.comps[1].class0_hp_cdf));
2012    memcpy(fc->nmvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf,
2013           sizeof(default_nmv_context.comps[1].classes_cdf));
2014    memcpy(fc->nmvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
2015    memcpy(fc->nmvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
2016    memcpy(fc->nmvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
2017    memcpy(fc->ndvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
2018    memcpy(fc->ndvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
2019    memcpy(fc->ndvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf,
2020           sizeof(default_nmv_context.comps[0].class0_cdf));
2021    memcpy(fc->ndvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf,
2022           sizeof(default_nmv_context.comps[0].class0_fp_cdf));
2023    memcpy(fc->ndvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf,
2024           sizeof(default_nmv_context.comps[0].class0_hp_cdf));
2025    memcpy(fc->ndvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf,
2026           sizeof(default_nmv_context.comps[0].classes_cdf));
2027    memcpy(fc->ndvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
2028    memcpy(fc->ndvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
2029    memcpy(fc->ndvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
2030    memcpy(fc->ndvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
2031    memcpy(fc->ndvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf,
2032           sizeof(default_nmv_context.comps[1].class0_cdf));
2033    memcpy(fc->ndvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf,
2034           sizeof(default_nmv_context.comps[1].class0_fp_cdf));
2035    memcpy(fc->ndvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf,
2036           sizeof(default_nmv_context.comps[1].class0_hp_cdf));
2037    memcpy(fc->ndvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf,
2038           sizeof(default_nmv_context.comps[1].classes_cdf));
2039    memcpy(fc->ndvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
2040    memcpy(fc->ndvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
2041    memcpy(fc->ndvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
2042 }
2043 
2044 static void
rvcn_vcn4_av1_default_coef_probs(void * prob,int index)2045 rvcn_vcn4_av1_default_coef_probs(void *prob, int index)
2046 {
2047    rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t *)prob;
2048    char *p;
2049    int i, j;
2050    unsigned size;
2051 
2052    memcpy(fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index], sizeof(av1_default_txb_skip_cdfs[index]));
2053 
2054    p = (char *)fc->eob_extra_cdf;
2055    size = sizeof(av1_default_eob_extra_cdfs[0][0][0][0]) * EOB_COEF_CONTEXTS_VCN4;
2056    for (i = 0; i < AV1_TX_SIZES; i++) {
2057       for (j = 0; j < AV1_PLANE_TYPES; j++) {
2058          memcpy(p, &av1_default_eob_extra_cdfs[index][i][j][3], size);
2059          p += size;
2060       }
2061    }
2062 
2063    memcpy(fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index], sizeof(av1_default_dc_sign_cdfs[index]));
2064    memcpy(fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index], sizeof(av1_default_coeff_lps_multi_cdfs[index]));
2065    memcpy(fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index],
2066           sizeof(av1_default_coeff_base_multi_cdfs[index]));
2067    memcpy(fc->coeff_base_eob_cdf, av1_default_coeff_base_eob_multi_cdfs[index],
2068           sizeof(av1_default_coeff_base_eob_multi_cdfs[index]));
2069    memcpy(fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index], sizeof(av1_default_eob_multi16_cdfs[index]));
2070    memcpy(fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index], sizeof(av1_default_eob_multi32_cdfs[index]));
2071    memcpy(fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index], sizeof(av1_default_eob_multi64_cdfs[index]));
2072    memcpy(fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index], sizeof(av1_default_eob_multi128_cdfs[index]));
2073    memcpy(fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index], sizeof(av1_default_eob_multi256_cdfs[index]));
2074    memcpy(fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index], sizeof(av1_default_eob_multi512_cdfs[index]));
2075    memcpy(fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index], sizeof(av1_default_eob_multi1024_cdfs[index]));
2076 }
2077 
2078 static bool
rvcn_dec_message_decode(struct radv_cmd_buffer * cmd_buffer,struct radv_video_session * vid,struct radv_video_session_params * params,void * ptr,void * it_probs_ptr,uint32_t * slice_offset,const struct VkVideoDecodeInfoKHR * frame_info)2079 rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_session *vid,
2080                         struct radv_video_session_params *params, void *ptr, void *it_probs_ptr, uint32_t *slice_offset,
2081                         const struct VkVideoDecodeInfoKHR *frame_info)
2082 {
2083    struct radv_device *device = cmd_buffer->device;
2084    rvcn_dec_message_header_t *header;
2085    rvcn_dec_message_index_t *index_codec;
2086    rvcn_dec_message_decode_t *decode;
2087    rvcn_dec_message_index_t *index_dynamic_dpb = NULL;
2088    rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
2089    void *codec;
2090    unsigned sizes = 0, offset_decode, offset_codec, offset_dynamic_dpb;
2091    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2092    struct radv_image *img = dst_iv->image;
2093    struct radv_image_plane *luma = &img->planes[0];
2094    struct radv_image_plane *chroma = &img->planes[1];
2095 
2096    header = ptr;
2097    sizes += sizeof(rvcn_dec_message_header_t);
2098 
2099    index_codec = (void *)((char *)header + sizes);
2100    sizes += sizeof(rvcn_dec_message_index_t);
2101 
2102    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2103       index_dynamic_dpb = (void *)((char *)header + sizes);
2104       sizes += sizeof(rvcn_dec_message_index_t);
2105    }
2106 
2107    offset_decode = sizes;
2108    decode = (void *)((char *)header + sizes);
2109    sizes += sizeof(rvcn_dec_message_decode_t);
2110 
2111    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2112       offset_dynamic_dpb = sizes;
2113       dynamic_dpb_t2 = (void *)((char *)header + sizes);
2114       sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2115    }
2116 
2117    offset_codec = sizes;
2118    codec = (void *)((char *)header + sizes);
2119 
2120    memset(ptr, 0, sizes);
2121 
2122    header->header_size = sizeof(rvcn_dec_message_header_t);
2123    header->total_size = sizes;
2124    header->msg_type = RDECODE_MSG_DECODE;
2125    header->stream_handle = vid->stream_handle;
2126    header->status_report_feedback_number = vid->dbg_frame_cnt++;
2127 
2128    header->index[0].message_id = RDECODE_MESSAGE_DECODE;
2129    header->index[0].offset = offset_decode;
2130    header->index[0].size = sizeof(rvcn_dec_message_decode_t);
2131    header->index[0].filled = 0;
2132    header->num_buffers = 1;
2133 
2134    index_codec->offset = offset_codec;
2135    index_codec->filled = 0;
2136    ++header->num_buffers;
2137 
2138    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2139       index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
2140       index_dynamic_dpb->offset = offset_dynamic_dpb;
2141       index_dynamic_dpb->filled = 0;
2142       ++header->num_buffers;
2143       index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2144    }
2145 
2146    decode->stream_type = vid->stream_type;
2147    decode->decode_flags = 0;
2148    decode->width_in_samples = frame_info->dstPictureResource.codedExtent.width;
2149    decode->height_in_samples = frame_info->dstPictureResource.codedExtent.height;
2150 
2151    decode->bsd_size = frame_info->srcBufferRange;
2152 
2153    decode->dt_size = dst_iv->image->planes[0].surface.total_size + dst_iv->image->planes[1].surface.total_size;
2154    decode->sct_size = 0;
2155    decode->sc_coeff_size = 0;
2156 
2157    decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
2158 
2159    decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
2160    decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
2161 
2162    if (luma->surface.meta_offset) {
2163       fprintf(stderr, "DCC SURFACES NOT SUPPORTED.\n");
2164       return false;
2165    }
2166 
2167    decode->dt_tiling_mode = 0;
2168    decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
2169    decode->dt_array_mode = device->physical_device->vid_addr_gfx_mode;
2170    decode->dt_field_mode = vid->interlaced ? 1 : 0;
2171    decode->dt_surf_tile_config = 0;
2172    decode->dt_uv_surf_tile_config = 0;
2173 
2174    decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
2175    decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
2176 
2177    if (decode->dt_field_mode) {
2178       decode->dt_luma_bottom_offset = luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
2179       decode->dt_chroma_bottom_offset = chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
2180    } else {
2181       decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
2182       decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
2183    }
2184    if (vid->stream_type == RDECODE_CODEC_AV1)
2185       decode->db_pitch_uv = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
2186 
2187    *slice_offset = 0;
2188 
2189    /* Intra-only decoding will only work without a setup slot for AV1
2190     * currently, other codecs require the application to pass a
2191     * setup slot for this use-case, since the FW is not able to skip write-out
2192     * for H26X.  In order to fix that properly, additional scratch space will
2193     * be needed in the video session just for intra-only DPB targets.
2194     */
2195    int dpb_update_required = 1;
2196 
2197    switch (vid->vk.op) {
2198    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
2199       index_codec->size = sizeof(rvcn_dec_message_avc_t);
2200       rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples,
2201                                                 &decode->height_in_samples, it_probs_ptr);
2202       memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t));
2203       index_codec->message_id = RDECODE_MESSAGE_AVC;
2204       break;
2205    }
2206    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
2207       index_codec->size = sizeof(rvcn_dec_message_hevc_t);
2208       rvcn_dec_message_hevc_t hevc = get_h265_msg(device, vid, params, frame_info, it_probs_ptr);
2209       memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t));
2210       index_codec->message_id = RDECODE_MESSAGE_HEVC;
2211       break;
2212    }
2213    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: {
2214       index_codec->size = sizeof(rvcn_dec_message_av1_t);
2215       rvcn_dec_message_av1_t av1 = get_av1_msg(device, vid, params, frame_info, it_probs_ptr, &dpb_update_required);
2216       memcpy(codec, (void *)&av1, sizeof(rvcn_dec_message_av1_t));
2217       index_codec->message_id = RDECODE_MESSAGE_AV1;
2218       assert(frame_info->referenceSlotCount < 9);
2219       break;
2220    }
2221    default:
2222       unreachable("unknown operation");
2223    }
2224 
2225    if (dpb_update_required)
2226       assert(frame_info->pSetupReferenceSlot != NULL);
2227 
2228    struct radv_image_view *dpb_iv =
2229       dpb_update_required
2230          ? radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding)
2231          : NULL;
2232    struct radv_image *dpb = dpb_update_required ? dpb_iv->image : img;
2233 
2234    decode->dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
2235    decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
2236    decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height;
2237    decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode;
2238    decode->db_array_mode = device->physical_device->vid_addr_gfx_mode;
2239 
2240    decode->hw_ctxt_size = vid->ctx.size;
2241 
2242    if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
2243       return true;
2244 
2245    uint64_t addr;
2246    radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
2247    addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
2248    dynamic_dpb_t2->dpbCurrLo = addr;
2249    dynamic_dpb_t2->dpbCurrHi = addr >> 32;
2250 
2251    if (vid->vk.op == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
2252       /* The following loop will fill in the references for the current frame,
2253        * this ensures all DPB addresses are "valid" (pointing at the current
2254        * decode target), so that the firmware doesn't evict things it should not.
2255        * It will not perform any actual writes to these dummy slots.
2256        */
2257       for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) {
2258          dynamic_dpb_t2->dpbAddrHi[i] = addr;
2259          dynamic_dpb_t2->dpbAddrLo[i] = addr >> 32;
2260       }
2261    }
2262 
2263    for (int i = 0; i < frame_info->referenceSlotCount; i++) {
2264       int32_t slot_idx = frame_info->pReferenceSlots[i].slotIndex;
2265       assert(slot_idx >= 0 && slot_idx < 16);
2266       struct radv_image_view *f_dpb_iv =
2267          radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
2268       assert(f_dpb_iv != NULL);
2269       struct radv_image *dpb_img = f_dpb_iv->image;
2270 
2271       radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
2272       addr = radv_buffer_get_va(dpb_img->bindings[0].bo) + dpb_img->bindings[0].offset;
2273 
2274       dynamic_dpb_t2->dpbAddrLo[i] = addr;
2275       dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
2276       ++dynamic_dpb_t2->dpbArraySize;
2277    }
2278 
2279    radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
2280    addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
2281 
2282    dynamic_dpb_t2->dpbCurrLo = addr;
2283    dynamic_dpb_t2->dpbCurrHi = addr >> 32;
2284 
2285    decode->decode_flags = 1;
2286    dynamic_dpb_t2->dpbConfigFlags = 0;
2287    dynamic_dpb_t2->dpbLumaPitch = luma->surface.u.gfx9.surf_pitch;
2288    dynamic_dpb_t2->dpbLumaAlignedHeight = luma->surface.u.gfx9.surf_height;
2289    dynamic_dpb_t2->dpbLumaAlignedSize = luma->surface.u.gfx9.surf_slice_size;
2290 
2291    dynamic_dpb_t2->dpbChromaPitch = chroma->surface.u.gfx9.surf_pitch;
2292    dynamic_dpb_t2->dpbChromaAlignedHeight = chroma->surface.u.gfx9.surf_height;
2293    dynamic_dpb_t2->dpbChromaAlignedSize = chroma->surface.u.gfx9.surf_slice_size;
2294 
2295    return true;
2296 }
2297 
2298 static struct ruvd_h264
get_uvd_h264_msg(struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * slice_offset,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)2299 get_uvd_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
2300                  const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
2301                  uint32_t *height_in_samples, void *it_ptr)
2302 {
2303    struct ruvd_h264 result;
2304    const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
2305       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
2306 
2307    *slice_offset = h264_pic_info->pSliceOffsets[0];
2308 
2309    memset(&result, 0, sizeof(result));
2310 
2311    const StdVideoH264SequenceParameterSet *sps =
2312       vk_video_find_h264_dec_std_sps(&params->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
2313    switch (sps->profile_idc) {
2314    case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
2315       result.profile = RUVD_H264_PROFILE_BASELINE;
2316       break;
2317    case STD_VIDEO_H264_PROFILE_IDC_MAIN:
2318       result.profile = RUVD_H264_PROFILE_MAIN;
2319       break;
2320    case STD_VIDEO_H264_PROFILE_IDC_HIGH:
2321       result.profile = RUVD_H264_PROFILE_HIGH;
2322       break;
2323    default:
2324       fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
2325       result.profile = RUVD_H264_PROFILE_MAIN;
2326       break;
2327    }
2328 
2329    *width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
2330    *height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
2331    if (!sps->flags.frame_mbs_only_flag)
2332       *height_in_samples *= 2;
2333    result.level = get_h264_level(sps->level_idc);
2334 
2335    result.sps_info_flags = 0;
2336 
2337    result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
2338    result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
2339    result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
2340    result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
2341    result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
2342 
2343    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2344    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2345    result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
2346    result.pic_order_cnt_type = sps->pic_order_cnt_type;
2347    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
2348 
2349    result.chroma_format = sps->chroma_format_idc;
2350 
2351    const StdVideoH264PictureParameterSet *pps =
2352       vk_video_find_h264_dec_std_pps(&params->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
2353    result.pps_info_flags = 0;
2354    result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
2355    result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
2356    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
2357    result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
2358    result.pps_info_flags |= pps->weighted_bipred_idc << 4;
2359    result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
2360    result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
2361    result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
2362 
2363    result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
2364    result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
2365    result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
2366 
2367    StdVideoH264ScalingLists scaling_lists;
2368    vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
2369    update_h264_scaling(result.scaling_list_4x4, result.scaling_list_8x8, &scaling_lists);
2370 
2371    memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
2372    memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
2373    memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
2374 
2375    result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2376    result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2377 
2378    result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
2379    result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
2380 
2381    result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
2382 
2383    result.num_ref_frames = sps->max_num_ref_frames;
2384    memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
2385    memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
2386    for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
2387       int idx = frame_info->pReferenceSlots[i].slotIndex;
2388       const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
2389          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
2390 
2391       result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
2392       result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
2393       result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
2394 
2395       result.ref_frame_list[i] = idx;
2396 
2397       if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
2398          result.ref_frame_list[i] |= 0x80;
2399    }
2400    result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
2401    result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
2402 
2403    return result;
2404 }
2405 
2406 static struct ruvd_h265
get_uvd_h265_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,void * it_ptr)2407 get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
2408                  const struct VkVideoDecodeInfoKHR *frame_info, void *it_ptr)
2409 {
2410    struct ruvd_h265 result;
2411    int i, j;
2412    const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
2413       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
2414 
2415    memset(&result, 0, sizeof(result));
2416 
2417    const StdVideoH265SequenceParameterSet *sps =
2418       vk_video_find_h265_dec_std_sps(&params->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
2419    const StdVideoH265PictureParameterSet *pps =
2420       vk_video_find_h265_dec_std_pps(&params->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
2421 
2422    result.sps_info_flags = 0;
2423    result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
2424    result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
2425    result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
2426    result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
2427    result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
2428    result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
2429    result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
2430    result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
2431    result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
2432 
2433    if (device->physical_device->rad_info.family == CHIP_CARRIZO)
2434       result.sps_info_flags |= 1 << 9;
2435 
2436    result.chroma_format = sps->chroma_format_idc;
2437    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2438    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2439    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
2440    result.sps_max_dec_pic_buffering_minus1 =
2441       sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
2442    result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
2443    result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
2444    result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
2445    result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
2446    result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
2447    result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
2448    if (sps->flags.pcm_enabled_flag) {
2449       result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
2450       result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
2451       result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
2452       result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
2453    }
2454    result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
2455 
2456    result.pps_info_flags = 0;
2457    result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
2458    result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
2459    result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
2460    result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
2461    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
2462    result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
2463    result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
2464    result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
2465    result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
2466    result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
2467    result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
2468    result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
2469    result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
2470    result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
2471    result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
2472    result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
2473    result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
2474    result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
2475    result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
2476    result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
2477 
2478    result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
2479    result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
2480    result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2481    result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2482    result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
2483    result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
2484    result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
2485    result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
2486    result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
2487    result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
2488    result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
2489    result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
2490    result.init_qp_minus26 = pps->init_qp_minus26;
2491 
2492    for (i = 0; i < 19; ++i)
2493       result.column_width_minus1[i] = pps->column_width_minus1[i];
2494 
2495    for (i = 0; i < 21; ++i)
2496       result.row_height_minus1[i] = pps->row_height_minus1[i];
2497 
2498    result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
2499    result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
2500 
2501    uint8_t idxs[16];
2502    memset(result.poc_list, 0, 16 * sizeof(int));
2503    memset(result.ref_pic_list, 0x7f, 16);
2504    memset(idxs, 0xff, 16);
2505    for (i = 0; i < frame_info->referenceSlotCount; i++) {
2506       const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
2507          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
2508       int idx = frame_info->pReferenceSlots[i].slotIndex;
2509       result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
2510       result.ref_pic_list[i] = idx;
2511       idxs[idx] = i;
2512    }
2513    result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
2514 
2515 #define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
2516    for (i = 0; i < 8; ++i)
2517       result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
2518 
2519    for (i = 0; i < 8; ++i)
2520       result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
2521 
2522    for (i = 0; i < 8; ++i)
2523       result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
2524 
2525    const StdVideoH265ScalingLists *scaling_lists = NULL;
2526    if (pps->flags.pps_scaling_list_data_present_flag)
2527       scaling_lists = pps->pScalingLists;
2528    else if (sps->flags.sps_scaling_list_data_present_flag)
2529       scaling_lists = sps->pScalingLists;
2530 
2531    update_h265_scaling(it_ptr, scaling_lists);
2532    if (scaling_lists) {
2533       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
2534          result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
2535 
2536       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
2537          result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
2538    }
2539 
2540    for (i = 0; i < 2; i++) {
2541       for (j = 0; j < 15; j++)
2542          result.direct_reflist[i][j] = 0xff;
2543    }
2544 
2545    if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
2546       if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
2547          result.p010_mode = 1;
2548          result.msb_mode = 1;
2549       } else {
2550          result.p010_mode = 0;
2551          result.luma_10to8 = 5;
2552          result.chroma_10to8 = 5;
2553          result.sclr_luma10to8 = 4;
2554          result.sclr_chroma10to8 = 4;
2555       }
2556    }
2557 
2558    return result;
2559 }
2560 
2561 static unsigned
texture_offset_legacy(struct radeon_surf * surface,unsigned layer)2562 texture_offset_legacy(struct radeon_surf *surface, unsigned layer)
2563 {
2564    return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +
2565           layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
2566 }
2567 
2568 static bool
ruvd_dec_message_decode(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,void * ptr,void * it_ptr,uint32_t * slice_offset,const struct VkVideoDecodeInfoKHR * frame_info)2569 ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *vid,
2570                         struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
2571                         const struct VkVideoDecodeInfoKHR *frame_info)
2572 {
2573    struct ruvd_msg *msg = ptr;
2574    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2575    struct radv_image *img = dst_iv->image;
2576    struct radv_image_plane *luma = &img->planes[0];
2577    struct radv_image_plane *chroma = &img->planes[1];
2578    struct radv_image_view *dpb_iv =
2579       radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2580    struct radv_image *dpb = dpb_iv->image;
2581 
2582    memset(msg, 0, sizeof(struct ruvd_msg));
2583    msg->size = sizeof(*msg);
2584    msg->msg_type = RUVD_MSG_DECODE;
2585    msg->stream_handle = vid->stream_handle;
2586    msg->status_report_feedback_number = vid->dbg_frame_cnt++;
2587 
2588    msg->body.decode.stream_type = vid->stream_type;
2589    msg->body.decode.decode_flags = 0x1;
2590    msg->body.decode.width_in_samples = frame_info->dstPictureResource.codedExtent.width;
2591    msg->body.decode.height_in_samples = frame_info->dstPictureResource.codedExtent.height;
2592 
2593    msg->body.decode.dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
2594    msg->body.decode.bsd_size = frame_info->srcBufferRange;
2595    msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment);
2596 
2597    if (vid->stream_type == RUVD_CODEC_H264_PERF && device->physical_device->rad_info.family >= CHIP_POLARIS10)
2598       msg->body.decode.dpb_reserved = vid->ctx.size;
2599 
2600    *slice_offset = 0;
2601    switch (vid->vk.op) {
2602    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
2603       msg->body.decode.codec.h264 =
2604          get_uvd_h264_msg(vid, params, frame_info, slice_offset, &msg->body.decode.width_in_samples,
2605                           &msg->body.decode.height_in_samples, it_ptr);
2606       break;
2607    }
2608    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
2609       msg->body.decode.codec.h265 = get_uvd_h265_msg(device, vid, params, frame_info, it_ptr);
2610 
2611       if (vid->ctx.mem)
2612          msg->body.decode.dpb_reserved = vid->ctx.size;
2613       break;
2614    }
2615    default:
2616       return false;
2617    }
2618 
2619    msg->body.decode.dt_field_mode = false;
2620 
2621    if (device->physical_device->rad_info.gfx_level >= GFX9) {
2622       msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
2623       msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
2624       msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
2625       msg->body.decode.dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
2626       msg->body.decode.dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
2627       if (msg->body.decode.dt_field_mode) {
2628          msg->body.decode.dt_luma_bottom_offset =
2629             luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
2630          msg->body.decode.dt_chroma_bottom_offset =
2631             chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
2632       } else {
2633          msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
2634          msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
2635       }
2636       msg->body.decode.dt_surf_tile_config = 0;
2637    } else {
2638       msg->body.decode.dt_pitch = luma->surface.u.legacy.level[0].nblk_x * luma->surface.blk_w;
2639       switch (luma->surface.u.legacy.level[0].mode) {
2640       case RADEON_SURF_MODE_LINEAR_ALIGNED:
2641          msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
2642          msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
2643          break;
2644       case RADEON_SURF_MODE_1D:
2645          msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
2646          msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
2647          break;
2648       case RADEON_SURF_MODE_2D:
2649          msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
2650          msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
2651          break;
2652       default:
2653          assert(0);
2654          break;
2655       }
2656 
2657       msg->body.decode.dt_luma_top_offset = texture_offset_legacy(&luma->surface, 0);
2658       if (chroma)
2659          msg->body.decode.dt_chroma_top_offset = texture_offset_legacy(&chroma->surface, 0);
2660       if (msg->body.decode.dt_field_mode) {
2661          msg->body.decode.dt_luma_bottom_offset = texture_offset_legacy(&luma->surface, 1);
2662          if (chroma)
2663             msg->body.decode.dt_chroma_bottom_offset = texture_offset_legacy(&chroma->surface, 1);
2664       } else {
2665          msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
2666          msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
2667       }
2668 
2669       if (chroma) {
2670          assert(luma->surface.u.legacy.bankw == chroma->surface.u.legacy.bankw);
2671          assert(luma->surface.u.legacy.bankh == chroma->surface.u.legacy.bankh);
2672          assert(luma->surface.u.legacy.mtilea == chroma->surface.u.legacy.mtilea);
2673       }
2674 
2675       msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(util_logbase2(luma->surface.u.legacy.bankw));
2676       msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(util_logbase2(luma->surface.u.legacy.bankh));
2677       msg->body.decode.dt_surf_tile_config |=
2678          RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea));
2679    }
2680 
2681    if (device->physical_device->rad_info.family >= CHIP_STONEY)
2682       msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2;
2683 
2684    msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config;
2685    msg->body.decode.extension_support = 0x1;
2686 
2687    return true;
2688 }
2689 
2690 static void
ruvd_dec_message_create(struct radv_video_session * vid,void * ptr)2691 ruvd_dec_message_create(struct radv_video_session *vid, void *ptr)
2692 {
2693    struct ruvd_msg *msg = ptr;
2694 
2695    memset(ptr, 0, sizeof(*msg));
2696    msg->size = sizeof(*msg);
2697    msg->msg_type = RUVD_MSG_CREATE;
2698    msg->stream_handle = vid->stream_handle;
2699    msg->body.create.stream_type = vid->stream_type;
2700    msg->body.create.width_in_samples = vid->vk.max_coded.width;
2701    msg->body.create.height_in_samples = vid->vk.max_coded.height;
2702 }
2703 
2704 VKAPI_ATTR void VKAPI_CALL
radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoBeginCodingInfoKHR * pBeginInfo)2705 radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCodingInfoKHR *pBeginInfo)
2706 {
2707    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2708    RADV_FROM_HANDLE(radv_video_session, vid, pBeginInfo->videoSession);
2709    RADV_FROM_HANDLE(radv_video_session_params, params, pBeginInfo->videoSessionParameters);
2710 
2711    cmd_buffer->video.vid = vid;
2712    cmd_buffer->video.params = params;
2713 }
2714 
2715 static void
radv_vcn_cmd_reset(struct radv_cmd_buffer * cmd_buffer)2716 radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
2717 {
2718    struct radv_video_session *vid = cmd_buffer->video.vid;
2719    struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
2720    uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
2721 
2722    void *ptr;
2723    uint32_t out_offset;
2724 
2725    if (vid->stream_type == RDECODE_CODEC_AV1) {
2726       unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0
2727                                     ? align(sizeof(rvcn_av1_frame_context_t), 2048)
2728                                     : align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048);
2729 
2730       uint8_t *ctxptr = cmd_buffer->device->ws->buffer_map(vid->ctx.mem->bo);
2731       ctxptr += vid->ctx.offset;
2732       if (pdev->av1_version == RDECODE_AV1_VER_0) {
2733          for (unsigned i = 0; i < 4; ++i) {
2734             rvcn_av1_init_mode_probs((void *)(ctxptr + i * frame_ctxt_size));
2735             rvcn_av1_init_mv_probs((void *)(ctxptr + i * frame_ctxt_size));
2736             rvcn_av1_default_coef_probs((void *)(ctxptr + i * frame_ctxt_size), i);
2737          }
2738       } else {
2739          for (unsigned i = 0; i < 4; ++i) {
2740             rvcn_vcn4_init_mode_probs((void *)(ctxptr + i * frame_ctxt_size));
2741             rvcn_vcn4_av1_init_mv_probs((void *)(ctxptr + i * frame_ctxt_size));
2742             rvcn_vcn4_av1_default_coef_probs((void *)(ctxptr + i * frame_ctxt_size), i);
2743          }
2744       }
2745       cmd_buffer->device->ws->buffer_unmap(vid->ctx.mem->bo);
2746    }
2747    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2748 
2749    if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED)
2750       radv_vcn_sq_start(cmd_buffer);
2751 
2752    rvcn_dec_message_create(vid, ptr, size);
2753    send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2754    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
2755    /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
2756 
2757    if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
2758       radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8);
2759       for (unsigned i = 0; i < 8; i++)
2760          radeon_emit(cmd_buffer->cs, 0x81ff);
2761    } else
2762       radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
2763 }
2764 
2765 static void
radv_uvd_cmd_reset(struct radv_cmd_buffer * cmd_buffer)2766 radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
2767 {
2768    struct radv_video_session *vid = cmd_buffer->video.vid;
2769    uint32_t size = sizeof(struct ruvd_msg);
2770    void *ptr;
2771    uint32_t out_offset;
2772    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2773 
2774    ruvd_dec_message_create(vid, ptr);
2775    if (vid->sessionctx.mem)
2776       send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2777    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
2778 
2779    /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
2780    int padsize = vid->sessionctx.mem ? 4 : 6;
2781    radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, padsize);
2782    for (unsigned i = 0; i < padsize; i++)
2783       radeon_emit(cmd_buffer->cs, PKT2_NOP_PAD);
2784 }
2785 
2786 VKAPI_ATTR void VKAPI_CALL
radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoCodingControlInfoKHR * pCodingControlInfo)2787 radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo)
2788 {
2789    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2790    if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
2791       if (radv_has_uvd(cmd_buffer->device->physical_device))
2792          radv_uvd_cmd_reset(cmd_buffer);
2793       else
2794          radv_vcn_cmd_reset(cmd_buffer);
2795    }
2796 }
2797 
2798 VKAPI_ATTR void VKAPI_CALL
radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoEndCodingInfoKHR * pEndCodingInfo)2799 radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo)
2800 {
2801 }
2802 
2803 static void
radv_uvd_decode_video(struct radv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)2804 radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
2805 {
2806    RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
2807    struct radv_video_session *vid = cmd_buffer->video.vid;
2808    struct radv_video_session_params *params = cmd_buffer->video.params;
2809    unsigned size = sizeof(struct ruvd_msg);
2810    void *ptr, *fb_ptr, *it_probs_ptr = NULL;
2811    uint32_t out_offset, fb_offset, it_probs_offset = 0;
2812    struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
2813    unsigned fb_size =
2814       (cmd_buffer->device->physical_device->rad_info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
2815 
2816    radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
2817    fb_bo = cmd_buffer->upload.upload_bo;
2818    if (have_it(vid)) {
2819       radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr);
2820       it_probs_bo = cmd_buffer->upload.upload_bo;
2821    }
2822 
2823    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2824    msg_bo = cmd_buffer->upload.upload_bo;
2825 
2826    uint32_t slice_offset;
2827    ruvd_dec_message_decode(cmd_buffer->device, vid, params, ptr, it_probs_ptr, &slice_offset, frame_info);
2828    rvcn_dec_message_feedback(fb_ptr);
2829    if (vid->sessionctx.mem)
2830       send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2831    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
2832 
2833    if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
2834       struct radv_image_view *dpb_iv =
2835          radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2836       struct radv_image *dpb = dpb_iv->image;
2837       send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
2838    }
2839 
2840    if (vid->ctx.mem)
2841       send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
2842 
2843    send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
2844             src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
2845 
2846    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2847    struct radv_image *img = dst_iv->image;
2848    send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
2849    send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
2850    if (have_it(vid))
2851       send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
2852 
2853    radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
2854    set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
2855 }
2856 
2857 static void
radv_vcn_decode_video(struct radv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)2858 radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
2859 {
2860    RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
2861    struct radv_video_session *vid = cmd_buffer->video.vid;
2862    struct radv_video_session_params *params = cmd_buffer->video.params;
2863    unsigned size = 0;
2864    void *ptr, *fb_ptr, *it_probs_ptr = NULL;
2865    uint32_t out_offset, fb_offset, it_probs_offset = 0;
2866    struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
2867 
2868    size += sizeof(rvcn_dec_message_header_t); /* header */
2869    size += sizeof(rvcn_dec_message_index_t);  /* codec */
2870    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2871       size += sizeof(rvcn_dec_message_index_t);
2872       size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2873    }
2874    size += sizeof(rvcn_dec_message_decode_t); /* decode */
2875    switch (vid->vk.op) {
2876    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
2877       size += sizeof(rvcn_dec_message_avc_t);
2878       break;
2879    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
2880       size += sizeof(rvcn_dec_message_hevc_t);
2881       break;
2882    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
2883       size += sizeof(rvcn_dec_message_av1_t);
2884       break;
2885    default:
2886       unreachable("unsupported codec.");
2887    }
2888 
2889    radv_vid_buffer_upload_alloc(cmd_buffer, FB_BUFFER_SIZE, &fb_offset, &fb_ptr);
2890    fb_bo = cmd_buffer->upload.upload_bo;
2891    if (have_it(vid)) {
2892       radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr);
2893       it_probs_bo = cmd_buffer->upload.upload_bo;
2894    } else if (have_probs(vid)) {
2895       radv_vid_buffer_upload_alloc(cmd_buffer, sizeof(rvcn_dec_av1_segment_fg_t), &it_probs_offset, &it_probs_ptr);
2896       it_probs_bo = cmd_buffer->upload.upload_bo;
2897    }
2898 
2899    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2900    msg_bo = cmd_buffer->upload.upload_bo;
2901 
2902    if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED)
2903       radv_vcn_sq_start(cmd_buffer);
2904 
2905    uint32_t slice_offset;
2906    rvcn_dec_message_decode(cmd_buffer, vid, params, ptr, it_probs_ptr, &slice_offset, frame_info);
2907    rvcn_dec_message_feedback(fb_ptr);
2908    send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2909    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
2910 
2911    if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
2912       struct radv_image_view *dpb_iv =
2913          radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2914       struct radv_image *dpb = dpb_iv->image;
2915       send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
2916    }
2917 
2918    if (vid->ctx.mem)
2919       send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
2920 
2921    send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
2922             src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
2923 
2924    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2925    struct radv_image *img = dst_iv->image;
2926    send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
2927    send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
2928    if (have_it(vid))
2929       send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
2930    else if (have_probs(vid))
2931       send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset);
2932 
2933    if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
2934       radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
2935       set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
2936    } else
2937       radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
2938 }
2939 
2940 VKAPI_ATTR void VKAPI_CALL
radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer,const VkVideoDecodeInfoKHR * frame_info)2941 radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info)
2942 {
2943    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2944 
2945    if (radv_has_uvd(cmd_buffer->device->physical_device))
2946       radv_uvd_decode_video(cmd_buffer, frame_info);
2947    else
2948       radv_vcn_decode_video(cmd_buffer, frame_info);
2949 }
2950 
2951 void
radv_video_get_profile_alignments(struct radv_physical_device * pdevice,const VkVideoProfileListInfoKHR * profile_list,uint32_t * width_align_out,uint32_t * height_align_out)2952 radv_video_get_profile_alignments(struct radv_physical_device *pdevice, const VkVideoProfileListInfoKHR *profile_list,
2953                                   uint32_t *width_align_out, uint32_t *height_align_out)
2954 {
2955    vk_video_get_profile_alignments(profile_list, width_align_out, height_align_out);
2956    bool is_h265_main_10 = false;
2957    for (unsigned i = 0; i < profile_list->profileCount; i++) {
2958       if (profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
2959          const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
2960             vk_find_struct_const(profile_list->pProfiles[i].pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
2961          if (h265_profile->stdProfileIdc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
2962             is_h265_main_10 = true;
2963       }
2964    }
2965 
2966    uint32_t db_alignment = radv_video_get_db_alignment(pdevice, 64, is_h265_main_10);
2967    *width_align_out = MAX2(*width_align_out, db_alignment);
2968    *height_align_out = MAX2(*height_align_out, db_alignment);
2969 }
2970