• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  * Copyright 2021 Red Hat Inc.
4  * All Rights Reserved.
5  *
6  * SPDX-License-Identifier: MIT
7  */
8 
9 #ifndef _WIN32
10 #include "drm-uapi/amdgpu_drm.h"
11 #endif
12 
13 #include "util/vl_zscan_data.h"
14 #include "vk_video/vulkan_video_codecs_common.h"
15 #include "ac_uvd_dec.h"
16 #include "ac_vcn_av1_default.h"
17 #include "ac_vcn_dec.h"
18 
19 #include "radv_buffer.h"
20 #include "radv_cs.h"
21 #include "radv_debug.h"
22 #include "radv_device_memory.h"
23 #include "radv_entrypoints.h"
24 #include "radv_image.h"
25 #include "radv_image_view.h"
26 #include "radv_video.h"
27 
28 #define NUM_H2645_REFS               16
29 #define FB_BUFFER_OFFSET             0x1000
30 #define FB_BUFFER_SIZE               2048
31 #define FB_BUFFER_SIZE_TONGA         (2048 * 64)
32 #define IT_SCALING_TABLE_SIZE        992
33 #define RDECODE_SESSION_CONTEXT_SIZE (128 * 1024)
34 
35 /* Not 100% sure this isn't too much but works */
36 #define VID_DEFAULT_ALIGNMENT 256
37 
38 static void set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val);
39 
40 static bool
radv_enable_tier2(struct radv_physical_device * pdev)41 radv_enable_tier2(struct radv_physical_device *pdev)
42 {
43    const struct radv_instance *instance = radv_physical_device_instance(pdev);
44 
45    if (pdev->info.vcn_ip_version >= VCN_3_0_0 && !(instance->debug_flags & RADV_DEBUG_VIDEO_ARRAY_PATH))
46       return true;
47    return false;
48 }
49 
50 static uint32_t
radv_video_get_db_alignment(struct radv_physical_device * pdev,int width,bool is_h265_main_10_or_av1)51 radv_video_get_db_alignment(struct radv_physical_device *pdev, int width, bool is_h265_main_10_or_av1)
52 {
53    if (pdev->info.vcn_ip_version >= VCN_2_0_0 && width > 32 && is_h265_main_10_or_av1)
54       return 64;
55    return 32;
56 }
57 
58 static bool
radv_vid_buffer_upload_alloc(struct radv_cmd_buffer * cmd_buffer,unsigned size,unsigned * out_offset,void ** ptr)59 radv_vid_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, unsigned *out_offset, void **ptr)
60 {
61    return radv_cmd_buffer_upload_alloc_aligned(cmd_buffer, size, VID_DEFAULT_ALIGNMENT, out_offset, ptr);
62 }
63 
64 /* vcn unified queue (sq) ib header */
65 void
radv_vcn_sq_header(struct radeon_cmdbuf * cs,struct rvcn_sq_var * sq,unsigned type,bool skip_signature)66 radv_vcn_sq_header(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq, unsigned type, bool skip_signature)
67 {
68    if (!skip_signature) {
69       /* vcn ib signature */
70       radeon_emit(cs, RADEON_VCN_SIGNATURE_SIZE);
71       radeon_emit(cs, RADEON_VCN_SIGNATURE);
72       sq->signature_ib_checksum = &cs->buf[cs->cdw];
73       radeon_emit(cs, 0);
74       sq->signature_ib_total_size_in_dw = &cs->buf[cs->cdw];
75       radeon_emit(cs, 0);
76    } else {
77       sq->signature_ib_checksum = NULL;
78       sq->signature_ib_total_size_in_dw = NULL;
79    }
80 
81    /* vcn ib engine info */
82    radeon_emit(cs, RADEON_VCN_ENGINE_INFO_SIZE);
83    radeon_emit(cs, RADEON_VCN_ENGINE_INFO);
84    radeon_emit(cs, type);
85    sq->engine_ib_size_of_packages = &cs->buf[cs->cdw];
86    radeon_emit(cs, 0);
87 }
88 
89 void
radv_vcn_sq_tail(struct radeon_cmdbuf * cs,struct rvcn_sq_var * sq)90 radv_vcn_sq_tail(struct radeon_cmdbuf *cs, struct rvcn_sq_var *sq)
91 {
92    uint32_t *end;
93    uint32_t size_in_dw;
94    uint32_t checksum = 0;
95 
96    end = &cs->buf[cs->cdw];
97 
98    if (sq->signature_ib_checksum == NULL && sq->signature_ib_total_size_in_dw == NULL) {
99       if (sq->engine_ib_size_of_packages == NULL)
100          return;
101 
102       size_in_dw = end - sq->engine_ib_size_of_packages + 3; /* package_size, package_type, engine_type */
103       *sq->engine_ib_size_of_packages = size_in_dw * sizeof(uint32_t);
104    } else {
105       size_in_dw = end - sq->signature_ib_total_size_in_dw - 1;
106       *sq->signature_ib_total_size_in_dw = size_in_dw;
107       *sq->engine_ib_size_of_packages = size_in_dw * sizeof(uint32_t);
108 
109       for (int i = 0; i < size_in_dw; i++)
110          checksum += *(sq->signature_ib_checksum + 2 + i);
111 
112       *sq->signature_ib_checksum = checksum;
113    }
114 }
115 
116 void
radv_vcn_write_event(struct radv_cmd_buffer * cmd_buffer,struct radv_event * event,unsigned value)117 radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, unsigned value)
118 {
119    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
120    struct radv_physical_device *pdev = radv_device_physical(device);
121    struct rvcn_sq_var sq;
122    struct radeon_cmdbuf *cs = cmd_buffer->cs;
123 
124    radv_cs_add_buffer(device->ws, cs, event->bo);
125    uint64_t va = radv_buffer_get_va(event->bo);
126 
127    bool separate_queue = pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED;
128    if (cmd_buffer->qf == RADV_QUEUE_VIDEO_DEC && separate_queue && pdev->vid_dec_reg.data2) {
129       radeon_check_space(device->ws, cmd_buffer->cs, 8);
130       set_reg(cmd_buffer, pdev->vid_dec_reg.data0, va & 0xffffffff);
131       set_reg(cmd_buffer, pdev->vid_dec_reg.data1, va >> 32);
132       set_reg(cmd_buffer, pdev->vid_dec_reg.data2, value);
133       set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, RDECODE_CMD_WRITE_MEMORY << 1);
134       return;
135    }
136 
137    radeon_check_space(device->ws, cs, 256);
138    radv_vcn_sq_header(cs, &sq, RADEON_VCN_ENGINE_TYPE_COMMON, separate_queue);
139    struct rvcn_cmn_engine_ib_package *ib_header = (struct rvcn_cmn_engine_ib_package *)&(cs->buf[cs->cdw]);
140    ib_header->package_size = sizeof(struct rvcn_cmn_engine_ib_package) + sizeof(struct rvcn_cmn_engine_op_writememory);
141    cs->cdw++;
142    ib_header->package_type = RADEON_VCN_IB_COMMON_OP_WRITEMEMORY;
143    cs->cdw++;
144 
145    struct rvcn_cmn_engine_op_writememory *write_memory = (struct rvcn_cmn_engine_op_writememory *)&(cs->buf[cs->cdw]);
146    write_memory->dest_addr_lo = va & 0xffffffff;
147    write_memory->dest_addr_hi = va >> 32;
148    write_memory->data = value;
149 
150    cs->cdw += sizeof(*write_memory) / 4;
151    radv_vcn_sq_tail(cs, &sq);
152 }
153 
154 static void
radv_vcn_sq_start(struct radv_cmd_buffer * cmd_buffer)155 radv_vcn_sq_start(struct radv_cmd_buffer *cmd_buffer)
156 {
157    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
158 
159    radeon_check_space(device->ws, cmd_buffer->cs, 256);
160    radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_DECODE, false);
161    rvcn_decode_ib_package_t *ib_header = (rvcn_decode_ib_package_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
162    ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) + sizeof(struct rvcn_decode_ib_package_s);
163    cmd_buffer->cs->cdw++;
164    ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
165    cmd_buffer->cs->cdw++;
166    cmd_buffer->video.decode_buffer = (rvcn_decode_buffer_t *)&(cmd_buffer->cs->buf[cmd_buffer->cs->cdw]);
167    cmd_buffer->cs->cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
168    memset(cmd_buffer->video.decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
169 }
170 
171 /* generate an stream handle */
172 static unsigned
radv_vid_alloc_stream_handle(struct radv_physical_device * pdev)173 radv_vid_alloc_stream_handle(struct radv_physical_device *pdev)
174 {
175    unsigned stream_handle = pdev->stream_handle_base;
176 
177    stream_handle ^= ++pdev->stream_handle_counter;
178    return stream_handle;
179 }
180 
181 static void
init_uvd_decoder(struct radv_physical_device * pdev)182 init_uvd_decoder(struct radv_physical_device *pdev)
183 {
184    if (pdev->info.family >= CHIP_VEGA10) {
185       pdev->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
186       pdev->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
187       pdev->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
188       pdev->vid_dec_reg.cntl = RUVD_ENGINE_CNTL_SOC15;
189    } else {
190       pdev->vid_dec_reg.data0 = RUVD_GPCOM_VCPU_DATA0;
191       pdev->vid_dec_reg.data1 = RUVD_GPCOM_VCPU_DATA1;
192       pdev->vid_dec_reg.cmd = RUVD_GPCOM_VCPU_CMD;
193       pdev->vid_dec_reg.cntl = RUVD_ENGINE_CNTL;
194    }
195 }
196 
197 static void
init_vcn_decoder(struct radv_physical_device * pdev)198 init_vcn_decoder(struct radv_physical_device *pdev)
199 {
200    switch (pdev->info.vcn_ip_version) {
201    case VCN_1_0_0:
202    case VCN_1_0_1:
203       pdev->vid_dec_reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0;
204       pdev->vid_dec_reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1;
205       pdev->vid_dec_reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD;
206       pdev->vid_dec_reg.cntl = RDECODE_VCN1_ENGINE_CNTL;
207       break;
208    case VCN_2_0_0:
209    case VCN_2_0_2:
210    case VCN_2_0_3:
211    case VCN_2_2_0:
212       pdev->vid_dec_reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0;
213       pdev->vid_dec_reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1;
214       pdev->vid_dec_reg.data2 = RDECODE_VCN2_GPCOM_VCPU_DATA2;
215       pdev->vid_dec_reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD;
216       pdev->vid_dec_reg.cntl = RDECODE_VCN2_ENGINE_CNTL;
217       break;
218    case VCN_2_5_0:
219    case VCN_2_6_0:
220    case VCN_3_0_0:
221    case VCN_3_0_2:
222    case VCN_3_0_16:
223    case VCN_3_0_33:
224    case VCN_3_1_1:
225    case VCN_3_1_2:
226       pdev->vid_dec_reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0;
227       pdev->vid_dec_reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1;
228       pdev->vid_dec_reg.data2 = RDECODE_VCN2_5_GPCOM_VCPU_DATA2;
229       pdev->vid_dec_reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
230       pdev->vid_dec_reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
231       break;
232    case VCN_4_0_3:
233       pdev->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
234       pdev->av1_version = RDECODE_AV1_VER_1;
235       break;
236    case VCN_4_0_0:
237    case VCN_4_0_2:
238    case VCN_4_0_4:
239    case VCN_4_0_5:
240    case VCN_4_0_6:
241       pdev->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
242       pdev->av1_version = RDECODE_AV1_VER_1;
243       break;
244    default:
245       break;
246    }
247 }
248 
249 void
radv_init_physical_device_decoder(struct radv_physical_device * pdev)250 radv_init_physical_device_decoder(struct radv_physical_device *pdev)
251 {
252    if (pdev->info.vcn_ip_version >= VCN_4_0_0)
253       pdev->vid_decode_ip = AMD_IP_VCN_UNIFIED;
254    else if (radv_has_uvd(pdev))
255       pdev->vid_decode_ip = AMD_IP_UVD;
256    else
257       pdev->vid_decode_ip = AMD_IP_VCN_DEC;
258    pdev->av1_version = RDECODE_AV1_VER_0;
259 
260    pdev->stream_handle_counter = 0;
261    pdev->stream_handle_base = 0;
262 
263    pdev->stream_handle_base = util_bitreverse(getpid());
264 
265    pdev->vid_addr_gfx_mode = RDECODE_ARRAY_MODE_LINEAR;
266 
267    if (radv_has_uvd(pdev))
268       init_uvd_decoder(pdev);
269    else
270       init_vcn_decoder(pdev);
271 }
272 
273 void
radv_probe_video_decode(struct radv_physical_device * pdev)274 radv_probe_video_decode(struct radv_physical_device *pdev)
275 {
276    const struct radv_instance *instance = radv_physical_device_instance(pdev);
277 
278    pdev->video_decode_enabled = false;
279 
280    /* The support for decode events are available at the same time as encode */
281    if (pdev->info.vcn_ip_version >= VCN_4_0_0) {
282       if (pdev->info.vcn_enc_major_version > 1)
283          pdev->video_decode_enabled = true;
284       /* VCN 4 FW 1.22 has all the necessary pieces to pass CTS */
285       if (pdev->info.vcn_enc_major_version == 1 && pdev->info.vcn_enc_minor_version >= 22)
286          pdev->video_decode_enabled = true;
287    } else if (pdev->info.vcn_ip_version >= VCN_3_0_0) {
288       if (pdev->info.vcn_enc_major_version > 1)
289          pdev->video_decode_enabled = true;
290       /* VCN 3 FW 1.33 has all the necessary pieces to pass CTS */
291       if (pdev->info.vcn_enc_major_version == 1 && pdev->info.vcn_enc_minor_version >= 33)
292          pdev->video_decode_enabled = true;
293    } else if (pdev->info.vcn_ip_version >= VCN_2_0_0) {
294       if (pdev->info.vcn_enc_major_version > 1)
295          pdev->video_decode_enabled = true;
296       /* VCN 2 FW 1.24 has all the necessary pieces to pass CTS */
297       if (pdev->info.vcn_enc_major_version == 1 && pdev->info.vcn_enc_minor_version >= 24)
298          pdev->video_decode_enabled = true;
299    }
300    if (instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) {
301       pdev->video_decode_enabled = true;
302    }
303 }
304 
305 static bool
have_it(struct radv_video_session * vid)306 have_it(struct radv_video_session *vid)
307 {
308    return vid->stream_type == RDECODE_CODEC_H264_PERF || vid->stream_type == RDECODE_CODEC_H265;
309 }
310 
311 static bool
have_probs(struct radv_video_session * vid)312 have_probs(struct radv_video_session *vid)
313 {
314    return vid->stream_type == RDECODE_CODEC_AV1;
315 }
316 
317 static unsigned
calc_ctx_size_h264_perf(struct radv_video_session * vid)318 calc_ctx_size_h264_perf(struct radv_video_session *vid)
319 {
320    unsigned width_in_mb, height_in_mb, ctx_size;
321    unsigned width = align(vid->vk.max_coded.width, VK_VIDEO_H264_MACROBLOCK_WIDTH);
322    unsigned height = align(vid->vk.max_coded.height, VK_VIDEO_H264_MACROBLOCK_HEIGHT);
323 
324    unsigned max_references = vid->vk.max_dpb_slots + 1;
325 
326    /* picture width & height in 16 pixel units */
327    width_in_mb = width / VK_VIDEO_H264_MACROBLOCK_WIDTH;
328    height_in_mb = align(height / VK_VIDEO_H264_MACROBLOCK_HEIGHT, 2);
329 
330    ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
331 
332    return ctx_size;
333 }
334 
335 static unsigned
calc_ctx_size_h265_main(struct radv_video_session * vid)336 calc_ctx_size_h265_main(struct radv_video_session *vid)
337 {
338    /* this is taken from radeonsi and seems correct for h265 */
339    unsigned width = align(vid->vk.max_coded.width, VK_VIDEO_H264_MACROBLOCK_WIDTH);
340    unsigned height = align(vid->vk.max_coded.height, VK_VIDEO_H264_MACROBLOCK_HEIGHT);
341 
342    unsigned max_references = vid->vk.max_dpb_slots + 1;
343 
344    if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
345       max_references = MAX2(max_references, 8);
346    else
347       max_references = MAX2(max_references, 17);
348 
349    width = align(width, 16);
350    height = align(height, 16);
351    return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
352 }
353 
354 static unsigned
calc_ctx_size_h265_main10(struct radv_video_session * vid)355 calc_ctx_size_h265_main10(struct radv_video_session *vid)
356 {
357    unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
358    unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
359    unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
360 
361    /* this is taken from radeonsi and seems correct for h265 */
362    unsigned width = align(vid->vk.max_coded.width, VK_VIDEO_H264_MACROBLOCK_WIDTH);
363    unsigned height = align(vid->vk.max_coded.height, VK_VIDEO_H264_MACROBLOCK_HEIGHT);
364    unsigned coeff_10bit = 2;
365 
366    unsigned max_references = vid->vk.max_dpb_slots + 1;
367 
368    if (vid->vk.max_coded.width * vid->vk.max_coded.height >= 4096 * 2000)
369       max_references = MAX2(max_references, 8);
370    else
371       max_references = MAX2(max_references, 17);
372 
373    /* 64x64 is the maximum ctb size. */
374    log2_ctb_size = 6;
375 
376    width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
377    height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
378 
379    num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
380    context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
381    max_mb_address = (unsigned)ceil(height * 8 / 2048.0);
382 
383    cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
384    db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
385 
386    return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
387 }
388 
389 static unsigned
calc_ctx_size_av1(struct radv_device * device,struct radv_video_session * vid)390 calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid)
391 {
392    const struct radv_physical_device *pdev = radv_device_physical(device);
393    return ac_vcn_dec_calc_ctx_size_av1(pdev->av1_version);
394 }
395 
396 static void
radv_video_patch_session_parameters(struct vk_video_session_parameters * params)397 radv_video_patch_session_parameters(struct vk_video_session_parameters *params)
398 {
399    switch (params->op) {
400    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
401    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
402    default:
403       return;
404    case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
405    case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
406       radv_video_patch_encode_session_parameters(params);
407       break;
408    }
409 }
410 
411 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateVideoSessionKHR(VkDevice _device,const VkVideoSessionCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkVideoSessionKHR * pVideoSession)412 radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *pCreateInfo,
413                            const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession)
414 {
415    VK_FROM_HANDLE(radv_device, device, _device);
416    struct radv_physical_device *pdev = radv_device_physical(device);
417    const struct radv_instance *instance = radv_physical_device_instance(pdev);
418 
419    struct radv_video_session *vid =
420       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
421    if (!vid)
422       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
423 
424    memset(vid, 0, sizeof(struct radv_video_session));
425 
426    VkResult result = vk_video_session_init(&device->vk, &vid->vk, pCreateInfo);
427    if (result != VK_SUCCESS) {
428       vk_free2(&device->vk.alloc, pAllocator, vid);
429       return result;
430    }
431 
432    vid->dpb_type = DPB_MAX_RES;
433 
434    switch (vid->vk.op) {
435    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
436       vid->stream_type = RDECODE_CODEC_H264_PERF;
437       if (radv_enable_tier2(pdev))
438          vid->dpb_type = DPB_DYNAMIC_TIER_2;
439       break;
440    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
441       vid->stream_type = RDECODE_CODEC_H265;
442       if (radv_enable_tier2(pdev))
443          vid->dpb_type = DPB_DYNAMIC_TIER_2;
444       break;
445    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
446       vid->stream_type = RDECODE_CODEC_AV1;
447       vid->dpb_type = DPB_DYNAMIC_TIER_2;
448       break;
449    case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
450       vid->encode = true;
451       vid->enc_session.encode_standard = RENCODE_ENCODE_STANDARD_H264;
452       vid->enc_session.aligned_picture_width = align(vid->vk.max_coded.width, 16);
453       vid->enc_session.aligned_picture_height = align(vid->vk.max_coded.height, 16);
454       vid->enc_session.padding_width = vid->enc_session.aligned_picture_width - vid->vk.max_coded.width;
455       vid->enc_session.padding_height = vid->enc_session.aligned_picture_height - vid->vk.max_coded.height;
456       vid->enc_session.display_remote = 0;
457       vid->enc_session.pre_encode_mode = 0;
458       vid->enc_session.pre_encode_chroma_enabled = 0;
459       switch (vid->vk.enc_usage.tuning_mode) {
460       case VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR:
461       default:
462          vid->enc_preset_mode = RENCODE_PRESET_MODE_BALANCE;
463          break;
464       case VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR:
465       case VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR:
466          vid->enc_preset_mode = RENCODE_PRESET_MODE_SPEED;
467          break;
468       case VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR:
469       case VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR:
470          vid->enc_preset_mode = RENCODE_PRESET_MODE_QUALITY;
471          break;
472       }
473       break;
474    case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
475       vid->encode = true;
476       vid->enc_session.encode_standard = RENCODE_ENCODE_STANDARD_HEVC;
477       vid->enc_session.aligned_picture_width = align(vid->vk.max_coded.width, 64);
478       vid->enc_session.aligned_picture_height = align(vid->vk.max_coded.height, 64);
479       vid->enc_session.padding_width = vid->enc_session.aligned_picture_width - vid->vk.max_coded.width;
480       vid->enc_session.padding_height = vid->enc_session.aligned_picture_height - vid->vk.max_coded.height;
481       vid->enc_session.display_remote = 0;
482       vid->enc_session.pre_encode_mode = 0;
483       vid->enc_session.pre_encode_chroma_enabled = 0;
484       switch (vid->vk.enc_usage.tuning_mode) {
485       case VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR:
486       default:
487          vid->enc_preset_mode = RENCODE_PRESET_MODE_BALANCE;
488          break;
489       case VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR:
490       case VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR:
491          vid->enc_preset_mode = RENCODE_PRESET_MODE_SPEED;
492          break;
493       case VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR:
494       case VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR:
495          vid->enc_preset_mode = RENCODE_PRESET_MODE_QUALITY;
496          break;
497       }
498       break;
499    default:
500       return VK_ERROR_FEATURE_NOT_PRESENT;
501    }
502 
503    vid->stream_handle = radv_vid_alloc_stream_handle(pdev);
504    vid->dbg_frame_cnt = 0;
505    vid->db_alignment = radv_video_get_db_alignment(
506       pdev, vid->vk.max_coded.width,
507       (vid->stream_type == RDECODE_CODEC_AV1 ||
508        (vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)));
509 
510    *pVideoSession = radv_video_session_to_handle(vid);
511    return VK_SUCCESS;
512 }
513 
514 VKAPI_ATTR void VKAPI_CALL
radv_DestroyVideoSessionKHR(VkDevice _device,VkVideoSessionKHR _session,const VkAllocationCallbacks * pAllocator)515 radv_DestroyVideoSessionKHR(VkDevice _device, VkVideoSessionKHR _session, const VkAllocationCallbacks *pAllocator)
516 {
517    VK_FROM_HANDLE(radv_device, device, _device);
518    VK_FROM_HANDLE(radv_video_session, vid, _session);
519    if (!_session)
520       return;
521 
522    vk_object_base_finish(&vid->vk.base);
523    vk_free2(&device->vk.alloc, pAllocator, vid);
524 }
525 
526 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateVideoSessionParametersKHR(VkDevice _device,const VkVideoSessionParametersCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkVideoSessionParametersKHR * pVideoSessionParameters)527 radv_CreateVideoSessionParametersKHR(VkDevice _device, const VkVideoSessionParametersCreateInfoKHR *pCreateInfo,
528                                      const VkAllocationCallbacks *pAllocator,
529                                      VkVideoSessionParametersKHR *pVideoSessionParameters)
530 {
531    VK_FROM_HANDLE(radv_device, device, _device);
532    VK_FROM_HANDLE(radv_video_session, vid, pCreateInfo->videoSession);
533    VK_FROM_HANDLE(radv_video_session_params, templ, pCreateInfo->videoSessionParametersTemplate);
534    const struct radv_physical_device *pdev = radv_device_physical(device);
535    const struct radv_instance *instance = radv_physical_device_instance(pdev);
536    struct radv_video_session_params *params =
537       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*params), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
538    if (!params)
539       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
540 
541    VkResult result =
542       vk_video_session_parameters_init(&device->vk, &params->vk, &vid->vk, templ ? &templ->vk : NULL, pCreateInfo);
543    if (result != VK_SUCCESS) {
544       vk_free2(&device->vk.alloc, pAllocator, params);
545       return result;
546    }
547 
548    radv_video_patch_session_parameters(&params->vk);
549 
550    *pVideoSessionParameters = radv_video_session_params_to_handle(params);
551    return VK_SUCCESS;
552 }
553 
554 VKAPI_ATTR void VKAPI_CALL
radv_DestroyVideoSessionParametersKHR(VkDevice _device,VkVideoSessionParametersKHR _params,const VkAllocationCallbacks * pAllocator)555 radv_DestroyVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR _params,
556                                       const VkAllocationCallbacks *pAllocator)
557 {
558    VK_FROM_HANDLE(radv_device, device, _device);
559    VK_FROM_HANDLE(radv_video_session_params, params, _params);
560 
561    vk_video_session_parameters_finish(&device->vk, &params->vk);
562    vk_free2(&device->vk.alloc, pAllocator, params);
563 }
564 
565 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice,const VkVideoProfileInfoKHR * pVideoProfile,VkVideoCapabilitiesKHR * pCapabilities)566 radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, const VkVideoProfileInfoKHR *pVideoProfile,
567                                            VkVideoCapabilitiesKHR *pCapabilities)
568 {
569    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
570    const struct video_codec_cap *cap = NULL;
571    bool is_encode = false;
572 
573    switch (pVideoProfile->videoCodecOperation) {
574 #ifndef _WIN32
575    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
576       cap = &pdev->info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC];
577       break;
578    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
579       cap = &pdev->info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC];
580       break;
581    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
582       cap = &pdev->info.dec_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1];
583       break;
584    case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR:
585       cap = &pdev->info.enc_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC];
586       is_encode = true;
587       break;
588    case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR:
589       cap = &pdev->info.enc_caps.codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC];
590       is_encode = true;
591       break;
592 #endif
593    default:
594       unreachable("unsupported operation");
595    }
596 
597    if (cap && !cap->valid)
598       cap = NULL;
599 
600    pCapabilities->flags = 0;
601    pCapabilities->pictureAccessGranularity.width = VK_VIDEO_H264_MACROBLOCK_WIDTH;
602    pCapabilities->pictureAccessGranularity.height = VK_VIDEO_H264_MACROBLOCK_HEIGHT;
603    pCapabilities->minCodedExtent.width = VK_VIDEO_H264_MACROBLOCK_WIDTH;
604    pCapabilities->minCodedExtent.height = VK_VIDEO_H264_MACROBLOCK_HEIGHT;
605 
606    struct VkVideoDecodeCapabilitiesKHR *dec_caps = NULL;
607    struct VkVideoEncodeCapabilitiesKHR *enc_caps = NULL;
608    if (!is_encode) {
609       dec_caps =
610          (struct VkVideoDecodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_CAPABILITIES_KHR);
611       if (dec_caps)
612          dec_caps->flags = VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_DISTINCT_BIT_KHR;
613       pCapabilities->minBitstreamBufferOffsetAlignment = 128;
614       pCapabilities->minBitstreamBufferSizeAlignment = 128;
615    } else {
616       enc_caps =
617          (struct VkVideoEncodeCapabilitiesKHR *)vk_find_struct(pCapabilities->pNext, VIDEO_ENCODE_CAPABILITIES_KHR);
618 
619       if (enc_caps) {
620          enc_caps->flags = 0;
621          enc_caps->rateControlModes = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR |
622                                       VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR |
623                                       VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR;
624          enc_caps->maxRateControlLayers = RADV_ENC_MAX_RATE_LAYER;
625          enc_caps->maxBitrate = 1000000000;
626          enc_caps->maxQualityLevels = 2;
627          enc_caps->encodeInputPictureGranularity = pCapabilities->pictureAccessGranularity;
628          enc_caps->supportedEncodeFeedbackFlags = VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BUFFER_OFFSET_BIT_KHR |
629                                                   VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BYTES_WRITTEN_BIT_KHR;
630       }
631       pCapabilities->minBitstreamBufferOffsetAlignment = 16;
632       pCapabilities->minBitstreamBufferSizeAlignment = 16;
633    }
634 
635    switch (pVideoProfile->videoCodecOperation) {
636    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
637       /* H264 allows different luma and chroma bit depths */
638       if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
639          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
640 
641       struct VkVideoDecodeH264CapabilitiesKHR *ext = (struct VkVideoDecodeH264CapabilitiesKHR *)vk_find_struct(
642          pCapabilities->pNext, VIDEO_DECODE_H264_CAPABILITIES_KHR);
643 
644       const struct VkVideoDecodeH264ProfileInfoKHR *h264_profile =
645          vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H264_PROFILE_INFO_KHR);
646 
647       if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE &&
648           h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN &&
649           h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_HIGH)
650          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
651 
652       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR)
653          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
654 
655       pCapabilities->maxDpbSlots = NUM_H2645_REFS + 1;
656       pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
657 
658       /* for h264 on navi21+ separate dpb images should work */
659       if (radv_enable_tier2(pdev))
660          pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
661       ext->fieldOffsetGranularity.x = 0;
662       ext->fieldOffsetGranularity.y = 0;
663       ext->maxLevelIdc = STD_VIDEO_H264_LEVEL_IDC_5_1;
664       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME);
665       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION;
666       break;
667    }
668    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
669       /* H265 allows different luma and chroma bit depths */
670       if (pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
671          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
672 
673       struct VkVideoDecodeH265CapabilitiesKHR *ext = (struct VkVideoDecodeH265CapabilitiesKHR *)vk_find_struct(
674          pCapabilities->pNext, VIDEO_DECODE_H265_CAPABILITIES_KHR);
675 
676       const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
677          vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
678 
679       if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN &&
680           h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10 &&
681           h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_STILL_PICTURE)
682          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
683 
684       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
685           pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
686          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
687 
688       pCapabilities->maxDpbSlots = NUM_H2645_REFS + 1;
689       pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
690       /* for h265 on navi21+ separate dpb images should work */
691       if (radv_enable_tier2(pdev))
692          pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
693       ext->maxLevelIdc = STD_VIDEO_H265_LEVEL_IDC_5_1;
694       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME);
695       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION;
696       break;
697    }
698    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: {
699       const bool have_12bit = pdev->info.vcn_ip_version >= VCN_5_0_0 ||
700                               pdev->info.vcn_ip_version == VCN_4_0_0;
701       /* Monochrome sampling implies an undefined chroma bit depth, and is supported in profile MAIN for AV1. */
702       if (pVideoProfile->chromaSubsampling != VK_VIDEO_CHROMA_SUBSAMPLING_MONOCHROME_BIT_KHR &&
703           pVideoProfile->lumaBitDepth != pVideoProfile->chromaBitDepth)
704          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
705       struct VkVideoDecodeAV1CapabilitiesKHR *ext =
706          vk_find_struct(pCapabilities->pNext, VIDEO_DECODE_AV1_CAPABILITIES_KHR);
707 
708       const struct VkVideoDecodeAV1ProfileInfoKHR *av1_profile =
709          vk_find_struct_const(pVideoProfile->pNext, VIDEO_DECODE_AV1_PROFILE_INFO_KHR);
710 
711       if (av1_profile->stdProfile != STD_VIDEO_AV1_PROFILE_MAIN &&
712           (!have_12bit || av1_profile->stdProfile != STD_VIDEO_AV1_PROFILE_PROFESSIONAL))
713          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
714 
715       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
716           pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR &&
717           (!have_12bit || pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR))
718          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
719 
720       pCapabilities->maxDpbSlots = 9;
721       pCapabilities->maxActiveReferencePictures = STD_VIDEO_AV1_NUM_REF_FRAMES;
722       pCapabilities->flags |= VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR;
723       ext->maxLevel = STD_VIDEO_AV1_LEVEL_6_1; /* For VCN3/4, the only h/w currently with AV1 decode support */
724       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME);
725       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION;
726       break;
727    }
728    case VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR: {
729       struct VkVideoEncodeH264CapabilitiesKHR *ext = (struct VkVideoEncodeH264CapabilitiesKHR *)vk_find_struct(
730          pCapabilities->pNext, VIDEO_ENCODE_H264_CAPABILITIES_KHR);
731 
732       const struct VkVideoEncodeH264ProfileInfoKHR *h264_profile =
733          vk_find_struct_const(pVideoProfile->pNext, VIDEO_ENCODE_H264_PROFILE_INFO_KHR);
734 
735       if (h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_BASELINE &&
736           h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_MAIN &&
737           h264_profile->stdProfileIdc != STD_VIDEO_H264_PROFILE_IDC_HIGH)
738          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
739 
740       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR)
741          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
742 
743       pCapabilities->maxDpbSlots = NUM_H2645_REFS;
744       pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
745       ext->flags = VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_KHR |
746                    VK_VIDEO_ENCODE_H264_CAPABILITY_PER_PICTURE_TYPE_MIN_MAX_QP_BIT_KHR;
747       ext->maxLevelIdc = cap ? cap->max_level : 0;
748       ext->maxSliceCount = 1;
749       ext->maxPPictureL0ReferenceCount = 1;
750       ext->maxBPictureL0ReferenceCount = 0;
751       ext->maxL1ReferenceCount = 0;
752       ext->maxTemporalLayerCount = 4;
753       ext->expectDyadicTemporalLayerPattern = false;
754       ext->minQp = 0;
755       ext->maxQp = 51;
756       ext->prefersGopRemainingFrames = false;
757       ext->requiresGopRemainingFrames = false;
758       ext->stdSyntaxFlags = VK_VIDEO_ENCODE_H264_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR |
759                             VK_VIDEO_ENCODE_H264_STD_ENTROPY_CODING_MODE_FLAG_UNSET_BIT_KHR |
760                             VK_VIDEO_ENCODE_H264_STD_ENTROPY_CODING_MODE_FLAG_SET_BIT_KHR;
761       if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_3)
762          ext->stdSyntaxFlags |= VK_VIDEO_ENCODE_H264_STD_WEIGHTED_BIPRED_IDC_EXPLICIT_BIT_KHR;
763 
764       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME);
765       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION;
766       break;
767    }
768    case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: {
769       struct VkVideoEncodeH265CapabilitiesKHR *ext = (struct VkVideoEncodeH265CapabilitiesKHR *)vk_find_struct(
770          pCapabilities->pNext, VIDEO_ENCODE_H265_CAPABILITIES_KHR);
771 
772       const struct VkVideoEncodeH265ProfileInfoKHR *h265_profile =
773          vk_find_struct_const(pVideoProfile->pNext, VIDEO_ENCODE_H265_PROFILE_INFO_KHR);
774 
775       if (h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN &&
776           (pdev->enc_hw_ver < RADV_VIDEO_ENC_HW_2 ||
777            h265_profile->stdProfileIdc != STD_VIDEO_H265_PROFILE_IDC_MAIN_10))
778          return VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR;
779 
780       if (pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_8_BIT_KHR &&
781           (pdev->enc_hw_ver < RADV_VIDEO_ENC_HW_2 ||
782            pVideoProfile->lumaBitDepth != VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR))
783          return VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR;
784 
785       pCapabilities->pictureAccessGranularity.width = VK_VIDEO_H265_CTU_MAX_WIDTH;
786       if (enc_caps)
787          enc_caps->encodeInputPictureGranularity = pCapabilities->pictureAccessGranularity;
788 
789       pCapabilities->maxDpbSlots = NUM_H2645_REFS;
790       pCapabilities->maxActiveReferencePictures = NUM_H2645_REFS;
791       ext->flags = VK_VIDEO_ENCODE_H265_CAPABILITY_PER_PICTURE_TYPE_MIN_MAX_QP_BIT_KHR;
792       ext->maxLevelIdc = cap ? cap->max_level : 0;
793       ext->maxSliceSegmentCount = 1;
794       ext->maxTiles.width = 1;
795       ext->maxTiles.height = 1;
796       ext->ctbSizes = VK_VIDEO_ENCODE_H265_CTB_SIZE_64_BIT_KHR;
797       ext->transformBlockSizes =
798          VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_4_BIT_KHR | VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_8_BIT_KHR |
799          VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_16_BIT_KHR | VK_VIDEO_ENCODE_H265_TRANSFORM_BLOCK_SIZE_32_BIT_KHR;
800       ext->maxPPictureL0ReferenceCount = 1;
801       ext->maxBPictureL0ReferenceCount = 0;
802       ext->maxL1ReferenceCount = 0;
803       ext->maxSubLayerCount = 4;
804       ext->expectDyadicTemporalSubLayerPattern = false;
805       ext->minQp = 0;
806       ext->maxQp = 51;
807       ext->prefersGopRemainingFrames = false;
808       ext->requiresGopRemainingFrames = false;
809       ext->stdSyntaxFlags = VK_VIDEO_ENCODE_H265_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR |
810                             VK_VIDEO_ENCODE_H265_STD_DEBLOCKING_FILTER_OVERRIDE_ENABLED_FLAG_SET_BIT_KHR |
811                             VK_VIDEO_ENCODE_H265_STD_CONSTRAINED_INTRA_PRED_FLAG_SET_BIT_KHR |
812                             VK_VIDEO_ENCODE_H265_STD_ENTROPY_CODING_SYNC_ENABLED_FLAG_SET_BIT_KHR;
813 
814       if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_2)
815          ext->stdSyntaxFlags |= VK_VIDEO_ENCODE_H265_STD_SAMPLE_ADAPTIVE_OFFSET_ENABLED_FLAG_SET_BIT_KHR;
816 
817       if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_3)
818          ext->stdSyntaxFlags |= VK_VIDEO_ENCODE_H265_STD_TRANSFORM_SKIP_ENABLED_FLAG_SET_BIT_KHR;
819       strcpy(pCapabilities->stdHeaderVersion.extensionName, VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME);
820       pCapabilities->stdHeaderVersion.specVersion = VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION;
821       break;
822    }
823    default:
824       break;
825    }
826 
827    if (cap) {
828       pCapabilities->maxCodedExtent.width = cap->max_width;
829       pCapabilities->maxCodedExtent.height = cap->max_height;
830    } else {
831       switch (pVideoProfile->videoCodecOperation) {
832       case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
833          pCapabilities->maxCodedExtent.width = (pdev->info.family < CHIP_TONGA) ? 2048 : 4096;
834          pCapabilities->maxCodedExtent.height = (pdev->info.family < CHIP_TONGA) ? 1152 : 4096;
835          break;
836       case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
837          pCapabilities->maxCodedExtent.width =
838             (pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
839          pCapabilities->maxCodedExtent.height =
840             (pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
841          break;
842       default:
843          break;
844       }
845    }
846 
847    return VK_SUCCESS;
848 }
849 
850 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceVideoFormatInfoKHR * pVideoFormatInfo,uint32_t * pVideoFormatPropertyCount,VkVideoFormatPropertiesKHR * pVideoFormatProperties)851 radv_GetPhysicalDeviceVideoFormatPropertiesKHR(VkPhysicalDevice physicalDevice,
852                                                const VkPhysicalDeviceVideoFormatInfoKHR *pVideoFormatInfo,
853                                                uint32_t *pVideoFormatPropertyCount,
854                                                VkVideoFormatPropertiesKHR *pVideoFormatProperties)
855 {
856    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
857 
858    if ((pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR |
859                                         VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)) &&
860        !pdev->video_encode_enabled)
861       return VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR;
862 
863    /* radv requires separate allocates for DPB and decode video. */
864    if ((pVideoFormatInfo->imageUsage &
865         (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR)) ==
866        (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR))
867       return VK_ERROR_IMAGE_USAGE_NOT_SUPPORTED_KHR;
868 
869    VK_OUTARRAY_MAKE_TYPED(VkVideoFormatPropertiesKHR, out, pVideoFormatProperties, pVideoFormatPropertyCount);
870 
871    bool need_8bit = true;
872    bool need_10bit = false;
873    bool need_12bit = false;
874    const struct VkVideoProfileListInfoKHR *prof_list =
875       (struct VkVideoProfileListInfoKHR *)vk_find_struct_const(pVideoFormatInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
876    if (prof_list) {
877       for (unsigned i = 0; i < prof_list->profileCount; i++) {
878          const VkVideoProfileInfoKHR *profile = &prof_list->pProfiles[i];
879          if (profile->lumaBitDepth & VK_VIDEO_COMPONENT_BIT_DEPTH_10_BIT_KHR)
880             need_10bit = true;
881          else if (profile->lumaBitDepth & VK_VIDEO_COMPONENT_BIT_DEPTH_12_BIT_KHR)
882             need_12bit = true;
883       }
884    }
885 
886    if (need_12bit) {
887       vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
888       {
889          p->format = VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16;
890          p->componentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
891          p->componentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
892          p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
893          p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
894          p->imageCreateFlags = 0;
895          if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
896             p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
897          p->imageType = VK_IMAGE_TYPE_2D;
898          p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
899          p->imageUsageFlags = pVideoFormatInfo->imageUsage;
900       }
901 
902       if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)) {
903          need_8bit = false;
904          need_10bit = false;
905       }
906    }
907 
908    if (need_10bit) {
909       vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
910       {
911          p->format = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16;
912          p->componentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
913          p->componentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
914          p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
915          p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
916          p->imageCreateFlags = 0;
917          if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
918             p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
919          p->imageType = VK_IMAGE_TYPE_2D;
920          p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
921          p->imageUsageFlags = pVideoFormatInfo->imageUsage;
922       }
923 
924       if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR))
925          need_8bit = false;
926    }
927 
928    if (need_8bit) {
929       vk_outarray_append_typed(VkVideoFormatPropertiesKHR, &out, p)
930       {
931          p->format = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
932          p->componentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
933          p->componentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
934          p->componentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
935          p->componentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
936          p->imageCreateFlags = 0;
937          if (pVideoFormatInfo->imageUsage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
938             p->imageCreateFlags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT | VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
939          p->imageType = VK_IMAGE_TYPE_2D;
940          p->imageTiling = VK_IMAGE_TILING_OPTIMAL;
941          p->imageUsageFlags = pVideoFormatInfo->imageUsage;
942       }
943    }
944 
945    return vk_outarray_status(&out);
946 }
947 
948 #define RADV_BIND_SESSION_CTX 0
949 #define RADV_BIND_DECODER_CTX 1
950 
951 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device,VkVideoSessionKHR videoSession,uint32_t * pMemoryRequirementsCount,VkVideoSessionMemoryRequirementsKHR * pMemoryRequirements)952 radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR videoSession,
953                                           uint32_t *pMemoryRequirementsCount,
954                                           VkVideoSessionMemoryRequirementsKHR *pMemoryRequirements)
955 {
956    VK_FROM_HANDLE(radv_device, device, _device);
957    VK_FROM_HANDLE(radv_video_session, vid, videoSession);
958    const struct radv_physical_device *pdev = radv_device_physical(device);
959 
960    uint32_t memory_type_bits = (1u << pdev->memory_properties.memoryTypeCount) - 1;
961 
962    if (vid->encode) {
963       return radv_video_get_encode_session_memory_requirements(device, vid, pMemoryRequirementsCount,
964                                                                pMemoryRequirements);
965    }
966    VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount);
967    /* 1 buffer for session context */
968    if (pdev->info.family >= CHIP_POLARIS10) {
969       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
970       {
971          m->memoryBindIndex = RADV_BIND_SESSION_CTX;
972          m->memoryRequirements.size = RDECODE_SESSION_CONTEXT_SIZE;
973          m->memoryRequirements.alignment = 0;
974          m->memoryRequirements.memoryTypeBits = memory_type_bits;
975       }
976    }
977 
978    if (vid->stream_type == RDECODE_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10) {
979       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
980       {
981          m->memoryBindIndex = RADV_BIND_DECODER_CTX;
982          m->memoryRequirements.size = align(calc_ctx_size_h264_perf(vid), 4096);
983          m->memoryRequirements.alignment = 0;
984          m->memoryRequirements.memoryTypeBits = memory_type_bits;
985       }
986    }
987    if (vid->stream_type == RDECODE_CODEC_H265) {
988       uint32_t ctx_size;
989 
990       if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
991          ctx_size = calc_ctx_size_h265_main10(vid);
992       else
993          ctx_size = calc_ctx_size_h265_main(vid);
994       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
995       {
996          m->memoryBindIndex = RADV_BIND_DECODER_CTX;
997          m->memoryRequirements.size = align(ctx_size, 4096);
998          m->memoryRequirements.alignment = 0;
999          m->memoryRequirements.memoryTypeBits = memory_type_bits;
1000       }
1001    }
1002    if (vid->stream_type == RDECODE_CODEC_AV1) {
1003       vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
1004       {
1005          m->memoryBindIndex = RADV_BIND_DECODER_CTX;
1006          m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096);
1007          m->memoryRequirements.alignment = 0;
1008          m->memoryRequirements.memoryTypeBits = 0;
1009          for (unsigned i = 0; i < pdev->memory_properties.memoryTypeCount; i++)
1010             if (pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1011                m->memoryRequirements.memoryTypeBits |= (1 << i);
1012       }
1013    }
1014    return vk_outarray_status(&out);
1015 }
1016 
1017 VKAPI_ATTR VkResult VKAPI_CALL
radv_UpdateVideoSessionParametersKHR(VkDevice _device,VkVideoSessionParametersKHR videoSessionParameters,const VkVideoSessionParametersUpdateInfoKHR * pUpdateInfo)1018 radv_UpdateVideoSessionParametersKHR(VkDevice _device, VkVideoSessionParametersKHR videoSessionParameters,
1019                                      const VkVideoSessionParametersUpdateInfoKHR *pUpdateInfo)
1020 {
1021    VK_FROM_HANDLE(radv_video_session_params, params, videoSessionParameters);
1022 
1023    VkResult result = vk_video_session_parameters_update(&params->vk, pUpdateInfo);
1024    if (result != VK_SUCCESS)
1025       return result;
1026    radv_video_patch_session_parameters(&params->vk);
1027    return result;
1028 }
1029 
1030 static void
copy_bind(struct radv_vid_mem * dst,const VkBindVideoSessionMemoryInfoKHR * src)1031 copy_bind(struct radv_vid_mem *dst, const VkBindVideoSessionMemoryInfoKHR *src)
1032 {
1033    dst->mem = radv_device_memory_from_handle(src->memory);
1034    dst->offset = src->memoryOffset;
1035    dst->size = src->memorySize;
1036 }
1037 
1038 VKAPI_ATTR VkResult VKAPI_CALL
radv_BindVideoSessionMemoryKHR(VkDevice _device,VkVideoSessionKHR videoSession,uint32_t videoSessionBindMemoryCount,const VkBindVideoSessionMemoryInfoKHR * pBindSessionMemoryInfos)1039 radv_BindVideoSessionMemoryKHR(VkDevice _device, VkVideoSessionKHR videoSession, uint32_t videoSessionBindMemoryCount,
1040                                const VkBindVideoSessionMemoryInfoKHR *pBindSessionMemoryInfos)
1041 {
1042    VK_FROM_HANDLE(radv_video_session, vid, videoSession);
1043 
1044    for (unsigned i = 0; i < videoSessionBindMemoryCount; i++) {
1045       switch (pBindSessionMemoryInfos[i].memoryBindIndex) {
1046       case RADV_BIND_SESSION_CTX:
1047          copy_bind(&vid->sessionctx, &pBindSessionMemoryInfos[i]);
1048          break;
1049       case RADV_BIND_DECODER_CTX:
1050          copy_bind(&vid->ctx, &pBindSessionMemoryInfos[i]);
1051          break;
1052       default:
1053          assert(0);
1054          break;
1055       }
1056    }
1057    return VK_SUCCESS;
1058 }
1059 
1060 /* add a new set register command to the IB */
1061 static void
set_reg(struct radv_cmd_buffer * cmd_buffer,unsigned reg,uint32_t val)1062 set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
1063 {
1064    struct radeon_cmdbuf *cs = cmd_buffer->cs;
1065    radeon_emit(cs, RDECODE_PKT0(reg >> 2, 0));
1066    radeon_emit(cs, val);
1067 }
1068 
1069 static void
send_cmd(struct radv_cmd_buffer * cmd_buffer,unsigned cmd,struct radeon_winsys_bo * bo,uint32_t offset)1070 send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset)
1071 {
1072    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1073    const struct radv_physical_device *pdev = radv_device_physical(device);
1074    uint64_t addr;
1075 
1076    radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
1077    addr = radv_buffer_get_va(bo);
1078    addr += offset;
1079 
1080    if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
1081       radeon_check_space(device->ws, cmd_buffer->cs, 6);
1082       set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
1083       set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
1084       set_reg(cmd_buffer, pdev->vid_dec_reg.cmd, cmd << 1);
1085       return;
1086    }
1087    switch (cmd) {
1088    case RDECODE_CMD_MSG_BUFFER:
1089       cmd_buffer->video.decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
1090       cmd_buffer->video.decode_buffer->msg_buffer_address_hi = (addr >> 32);
1091       cmd_buffer->video.decode_buffer->msg_buffer_address_lo = (addr);
1092       break;
1093    case RDECODE_CMD_DPB_BUFFER:
1094       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
1095       cmd_buffer->video.decode_buffer->dpb_buffer_address_hi = (addr >> 32);
1096       cmd_buffer->video.decode_buffer->dpb_buffer_address_lo = (addr);
1097       break;
1098    case RDECODE_CMD_DECODING_TARGET_BUFFER:
1099       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
1100       cmd_buffer->video.decode_buffer->target_buffer_address_hi = (addr >> 32);
1101       cmd_buffer->video.decode_buffer->target_buffer_address_lo = (addr);
1102       break;
1103    case RDECODE_CMD_FEEDBACK_BUFFER:
1104       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
1105       cmd_buffer->video.decode_buffer->feedback_buffer_address_hi = (addr >> 32);
1106       cmd_buffer->video.decode_buffer->feedback_buffer_address_lo = (addr);
1107       break;
1108    case RDECODE_CMD_PROB_TBL_BUFFER:
1109       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
1110       cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
1111       cmd_buffer->video.decode_buffer->prob_tbl_buffer_address_lo = (addr);
1112       break;
1113    case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
1114       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
1115       cmd_buffer->video.decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
1116       cmd_buffer->video.decode_buffer->session_contex_buffer_address_lo = (addr);
1117       break;
1118    case RDECODE_CMD_BITSTREAM_BUFFER:
1119       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
1120       cmd_buffer->video.decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
1121       cmd_buffer->video.decode_buffer->bitstream_buffer_address_lo = (addr);
1122       break;
1123    case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
1124       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
1125       cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
1126       cmd_buffer->video.decode_buffer->it_sclr_table_buffer_address_lo = (addr);
1127       break;
1128    case RDECODE_CMD_CONTEXT_BUFFER:
1129       cmd_buffer->video.decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
1130       cmd_buffer->video.decode_buffer->context_buffer_address_hi = (addr >> 32);
1131       cmd_buffer->video.decode_buffer->context_buffer_address_lo = (addr);
1132       break;
1133    default:
1134       assert(0);
1135    }
1136 }
1137 
1138 static void
rvcn_dec_message_create(struct radv_video_session * vid,void * ptr,uint32_t size)1139 rvcn_dec_message_create(struct radv_video_session *vid, void *ptr, uint32_t size)
1140 {
1141    rvcn_dec_message_header_t *header = ptr;
1142    rvcn_dec_message_create_t *create = (void *)((char *)ptr + sizeof(rvcn_dec_message_header_t));
1143 
1144    memset(ptr, 0, size);
1145    header->header_size = sizeof(rvcn_dec_message_header_t);
1146    header->total_size = size;
1147    header->num_buffers = 1;
1148    header->msg_type = RDECODE_MSG_CREATE;
1149    header->stream_handle = vid->stream_handle;
1150    header->status_report_feedback_number = 0;
1151 
1152    header->index[0].message_id = RDECODE_MESSAGE_CREATE;
1153    header->index[0].offset = sizeof(rvcn_dec_message_header_t);
1154    header->index[0].size = sizeof(rvcn_dec_message_create_t);
1155    header->index[0].filled = 0;
1156 
1157    create->stream_type = vid->stream_type;
1158    create->session_flags = 0;
1159    create->width_in_samples = vid->vk.max_coded.width;
1160    create->height_in_samples = vid->vk.max_coded.height;
1161 }
1162 
1163 static void
rvcn_dec_message_feedback(void * ptr)1164 rvcn_dec_message_feedback(void *ptr)
1165 {
1166    rvcn_dec_feedback_header_t *header = (void *)ptr;
1167 
1168    header->header_size = sizeof(rvcn_dec_feedback_header_t);
1169    header->total_size = sizeof(rvcn_dec_feedback_header_t);
1170    header->num_buffers = 0;
1171 }
1172 
1173 static const uint8_t h264_levels[] = {10, 11, 12, 13, 20, 21, 22, 30, 31, 32, 40, 41, 42, 50, 51, 52, 60, 61, 62};
1174 static uint8_t
get_h264_level(StdVideoH264LevelIdc level)1175 get_h264_level(StdVideoH264LevelIdc level)
1176 {
1177    assert(level <= STD_VIDEO_H264_LEVEL_IDC_6_2);
1178    return h264_levels[level];
1179 }
1180 
1181 static void
update_h264_scaling(unsigned char scaling_list_4x4[6][16],unsigned char scaling_list_8x8[2][64],const StdVideoH264ScalingLists * scaling_lists)1182 update_h264_scaling(unsigned char scaling_list_4x4[6][16], unsigned char scaling_list_8x8[2][64],
1183                     const StdVideoH264ScalingLists *scaling_lists)
1184 {
1185    for (int i = 0; i < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS; i++) {
1186       for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS; j++)
1187          scaling_list_4x4[i][vl_zscan_normal_16[j]] = scaling_lists->ScalingList4x4[i][j];
1188    }
1189 
1190    for (int i = 0; i < 2; i++) {
1191       for (int j = 0; j < STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS; j++)
1192          scaling_list_8x8[i][vl_zscan_normal[j]] = scaling_lists->ScalingList8x8[i][j];
1193    }
1194 }
1195 
1196 static rvcn_dec_message_avc_t
get_h264_msg(struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * slice_offset,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)1197 get_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
1198              const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
1199              uint32_t *height_in_samples, void *it_ptr)
1200 {
1201    rvcn_dec_message_avc_t result;
1202    const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
1203       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
1204 
1205    *slice_offset = h264_pic_info->pSliceOffsets[0];
1206 
1207    memset(&result, 0, sizeof(result));
1208 
1209    assert(params->vk.h264_dec.h264_sps_count > 0);
1210    const StdVideoH264SequenceParameterSet *sps =
1211       vk_video_find_h264_dec_std_sps(&params->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
1212    switch (sps->profile_idc) {
1213    case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
1214       result.profile = RDECODE_H264_PROFILE_BASELINE;
1215       break;
1216    case STD_VIDEO_H264_PROFILE_IDC_MAIN:
1217       result.profile = RDECODE_H264_PROFILE_MAIN;
1218       break;
1219    case STD_VIDEO_H264_PROFILE_IDC_HIGH:
1220       result.profile = RDECODE_H264_PROFILE_HIGH;
1221       break;
1222    default:
1223       fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
1224       result.profile = RDECODE_H264_PROFILE_MAIN;
1225       break;
1226    }
1227 
1228    *width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
1229    *height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
1230    if (!sps->flags.frame_mbs_only_flag)
1231       *height_in_samples *= 2;
1232    result.level = get_h264_level(sps->level_idc);
1233 
1234    result.sps_info_flags = 0;
1235 
1236    result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
1237    result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
1238    result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
1239    result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
1240    if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
1241       result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
1242 
1243    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
1244    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
1245    result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
1246    result.pic_order_cnt_type = sps->pic_order_cnt_type;
1247    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
1248 
1249    result.chroma_format = sps->chroma_format_idc;
1250 
1251    const StdVideoH264PictureParameterSet *pps =
1252       vk_video_find_h264_dec_std_pps(&params->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
1253    result.pps_info_flags = 0;
1254    result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
1255    result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
1256    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
1257    result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
1258    result.pps_info_flags |= pps->weighted_bipred_idc << 4;
1259    result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
1260    result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
1261    result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
1262 
1263    result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
1264    result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
1265    result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
1266 
1267    StdVideoH264ScalingLists scaling_lists;
1268    vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
1269    update_h264_scaling(result.scaling_list_4x4, result.scaling_list_8x8, &scaling_lists);
1270 
1271    memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
1272    memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
1273    memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
1274 
1275    result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
1276    result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
1277 
1278    result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
1279    result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
1280 
1281    result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
1282 
1283    result.num_ref_frames = sps->max_num_ref_frames;
1284    result.non_existing_frame_flags = 0;
1285    result.used_for_reference_flags = 0;
1286 
1287    memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
1288    memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
1289    for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
1290       int idx = frame_info->pReferenceSlots[i].slotIndex;
1291       const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
1292          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
1293 
1294       result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
1295       result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
1296       result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
1297 
1298       result.ref_frame_list[i] = idx;
1299 
1300       if (dpb_slot->pStdReferenceInfo->flags.top_field_flag)
1301          result.used_for_reference_flags |= (1 << (2 * i));
1302       if (dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
1303          result.used_for_reference_flags |= (1 << (2 * i + 1));
1304 
1305       if (!dpb_slot->pStdReferenceInfo->flags.top_field_flag && !dpb_slot->pStdReferenceInfo->flags.bottom_field_flag)
1306          result.used_for_reference_flags |= (3 << (2 * i));
1307 
1308       if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
1309          result.ref_frame_list[i] |= 0x80;
1310       if (dpb_slot->pStdReferenceInfo->flags.is_non_existing)
1311          result.non_existing_frame_flags |= 1 << i;
1312    }
1313    result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
1314    result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
1315 
1316    return result;
1317 }
1318 
1319 static void
update_h265_scaling(void * it_ptr,const StdVideoH265ScalingLists * scaling_lists)1320 update_h265_scaling(void *it_ptr, const StdVideoH265ScalingLists *scaling_lists)
1321 {
1322    if (scaling_lists) {
1323       memcpy(it_ptr, scaling_lists->ScalingList4x4,
1324              STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
1325       memcpy((char *)it_ptr + 96, scaling_lists->ScalingList8x8,
1326              STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
1327       memcpy((char *)it_ptr + 480, scaling_lists->ScalingList16x16,
1328              STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
1329       memcpy((char *)it_ptr + 864, scaling_lists->ScalingList32x32,
1330              STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
1331    } else {
1332       memset(it_ptr, 0, STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS);
1333       memset((char *)it_ptr + 96, 0,
1334              STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS);
1335       memset((char *)it_ptr + 480, 0,
1336              STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS);
1337       memset((char *)it_ptr + 864, 0,
1338              STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS * STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS);
1339    }
1340 }
1341 
1342 static rvcn_dec_message_hevc_t
get_h265_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)1343 get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
1344              const struct VkVideoDecodeInfoKHR *frame_info,
1345              uint32_t *width_in_samples,
1346              uint32_t *height_in_samples,
1347              void *it_ptr)
1348 {
1349    const struct radv_physical_device *pdev = radv_device_physical(device);
1350    rvcn_dec_message_hevc_t result;
1351    int i, j;
1352    const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
1353       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
1354    memset(&result, 0, sizeof(result));
1355 
1356    const StdVideoH265SequenceParameterSet *sps =
1357       vk_video_find_h265_dec_std_sps(&params->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
1358    const StdVideoH265PictureParameterSet *pps =
1359       vk_video_find_h265_dec_std_pps(&params->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
1360 
1361    result.sps_info_flags = 0;
1362    result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
1363    result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
1364    result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
1365    result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
1366    result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
1367    result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
1368    result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
1369    result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
1370    result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
1371 
1372    if (pdev->info.family == CHIP_CARRIZO)
1373       result.sps_info_flags |= 1 << 9;
1374 
1375    if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) {
1376       result.sps_info_flags |= 1 << 11;
1377    }
1378    result.st_rps_bits = h265_pic_info->pStdPictureInfo->NumBitsForSTRefPicSetInSlice;
1379 
1380    *width_in_samples = sps->pic_width_in_luma_samples;
1381    *height_in_samples = sps->pic_height_in_luma_samples;
1382    result.chroma_format = sps->chroma_format_idc;
1383    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
1384    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
1385    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
1386    result.sps_max_dec_pic_buffering_minus1 =
1387       sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
1388    result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
1389    result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
1390    result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
1391    result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
1392    result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
1393    result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
1394    if (sps->flags.pcm_enabled_flag) {
1395       result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
1396       result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
1397       result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
1398       result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
1399    }
1400    result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
1401 
1402    result.pps_info_flags = 0;
1403    result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
1404    result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
1405    result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
1406    result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
1407    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
1408    result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
1409    result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
1410    result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
1411    result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
1412    result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
1413    result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
1414    result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
1415    result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
1416    result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
1417    result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
1418    result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
1419    result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
1420    result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
1421    result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
1422    result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
1423 
1424    result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
1425    result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
1426    result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
1427    result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
1428    result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
1429    result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
1430    result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
1431    result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
1432    result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
1433    result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
1434    result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
1435    result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
1436    result.init_qp_minus26 = pps->init_qp_minus26;
1437 
1438    for (i = 0; i < 19; ++i)
1439       result.column_width_minus1[i] = pps->column_width_minus1[i];
1440 
1441    for (i = 0; i < 21; ++i)
1442       result.row_height_minus1[i] = pps->row_height_minus1[i];
1443 
1444    result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
1445    result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
1446 
1447    uint8_t idxs[16];
1448    memset(result.poc_list, 0, 16 * sizeof(int));
1449    memset(result.ref_pic_list, 0x7f, 16);
1450    memset(idxs, 0xff, 16);
1451    for (i = 0; i < frame_info->referenceSlotCount; i++) {
1452       const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
1453          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
1454       int idx = frame_info->pReferenceSlots[i].slotIndex;
1455       result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
1456       result.ref_pic_list[i] = idx;
1457       idxs[idx] = i;
1458    }
1459    result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
1460 
1461 #define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
1462    for (i = 0; i < 8; ++i)
1463       result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
1464 
1465    for (i = 0; i < 8; ++i)
1466       result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
1467 
1468    for (i = 0; i < 8; ++i)
1469       result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
1470 
1471    const StdVideoH265ScalingLists *scaling_lists = NULL;
1472    if (pps->flags.pps_scaling_list_data_present_flag)
1473       scaling_lists = pps->pScalingLists;
1474    else if (sps->flags.sps_scaling_list_data_present_flag)
1475       scaling_lists = sps->pScalingLists;
1476 
1477    update_h265_scaling(it_ptr, scaling_lists);
1478 
1479    if (scaling_lists) {
1480       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
1481          result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
1482 
1483       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
1484          result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
1485    }
1486 
1487    for (i = 0; i < 2; i++) {
1488       for (j = 0; j < 15; j++)
1489          result.direct_reflist[i][j] = 0xff;
1490    }
1491 
1492    if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
1493       if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
1494          result.p010_mode = 1;
1495          result.msb_mode = 1;
1496       } else {
1497          result.p010_mode = 0;
1498          result.luma_10to8 = 5;
1499          result.chroma_10to8 = 5;
1500          result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
1501          result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
1502       }
1503    }
1504 
1505    return result;
1506 }
1507 
1508 enum {
1509    AV1_RESTORE_NONE = 0,
1510    AV1_RESTORE_WIENER = 1,
1511    AV1_RESTORE_SGRPROJ = 2,
1512    AV1_RESTORE_SWITCHABLE = 3,
1513 };
1514 
1515 #define AV1_SUPERRES_NUM       8
1516 #define AV1_SUPERRES_DENOM_MIN 9
1517 
1518 static rvcn_dec_message_av1_t
get_av1_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,void * probs_ptr,int * update_reference_slot)1519 get_av1_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
1520             const struct VkVideoDecodeInfoKHR *frame_info, void *probs_ptr, int *update_reference_slot)
1521 {
1522    rvcn_dec_message_av1_t result;
1523    unsigned i, j;
1524    const struct VkVideoDecodeAV1PictureInfoKHR *av1_pic_info =
1525       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_AV1_PICTURE_INFO_KHR);
1526    const StdVideoDecodeAV1PictureInfo *pi = av1_pic_info->pStdPictureInfo;
1527    const StdVideoAV1SequenceHeader *seq_hdr = &params->vk.av1_dec.seq_hdr.base;
1528    memset(&result, 0, sizeof(result));
1529 
1530    const int intra_only_decoding = vid->vk.max_dpb_slots == 0;
1531    if (intra_only_decoding)
1532       assert(frame_info->pSetupReferenceSlot == NULL);
1533 
1534    *update_reference_slot = !(intra_only_decoding || pi->refresh_frame_flags == 0);
1535 
1536    result.frame_header_flags = (1 /*av1_pic_info->frame_header->flags.show_frame*/
1537                                 << RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_SHIFT) &
1538                                RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_MASK;
1539 
1540    result.frame_header_flags |= (pi->flags.disable_cdf_update << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_SHIFT) &
1541                                 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_MASK;
1542 
1543    result.frame_header_flags |=
1544       ((!pi->flags.disable_frame_end_update_cdf) << RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_SHIFT) &
1545       RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_MASK;
1546 
1547    result.frame_header_flags |=
1548       ((pi->frame_type == STD_VIDEO_AV1_FRAME_TYPE_INTRA_ONLY) << RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_SHIFT) &
1549       RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_MASK;
1550 
1551    result.frame_header_flags |= (pi->flags.allow_intrabc << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_SHIFT) &
1552                                 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_MASK;
1553 
1554    result.frame_header_flags |=
1555       (pi->flags.allow_high_precision_mv << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) &
1556       RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK;
1557 
1558    result.frame_header_flags |=
1559       (seq_hdr->pColorConfig->flags.mono_chrome << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) &
1560       RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK;
1561 
1562    result.frame_header_flags |= (pi->flags.skip_mode_present << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) &
1563                                 RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK;
1564 
1565    result.frame_header_flags |=
1566       (pi->pQuantization->flags.using_qmatrix << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) &
1567       RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK;
1568 
1569    result.frame_header_flags |=
1570       (seq_hdr->flags.enable_filter_intra << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) &
1571       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_MASK;
1572 
1573    result.frame_header_flags |=
1574       (seq_hdr->flags.enable_intra_edge_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_SHIFT) &
1575       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_MASK;
1576 
1577    result.frame_header_flags |=
1578       (seq_hdr->flags.enable_interintra_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_SHIFT) &
1579       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_MASK;
1580 
1581    result.frame_header_flags |=
1582       (seq_hdr->flags.enable_masked_compound << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_SHIFT) &
1583       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_MASK;
1584 
1585    result.frame_header_flags |=
1586       (pi->flags.allow_warped_motion << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_SHIFT) &
1587       RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_MASK;
1588 
1589    result.frame_header_flags |=
1590       (seq_hdr->flags.enable_dual_filter << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_SHIFT) &
1591       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_MASK;
1592 
1593    result.frame_header_flags |=
1594       (seq_hdr->flags.enable_order_hint << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_SHIFT) &
1595       RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_MASK;
1596 
1597    result.frame_header_flags |= (seq_hdr->flags.enable_jnt_comp << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_SHIFT) &
1598                                 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_MASK;
1599 
1600    result.frame_header_flags |= (pi->flags.use_ref_frame_mvs << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_SHIFT) &
1601                                 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_MASK;
1602 
1603    result.frame_header_flags |=
1604       (pi->flags.allow_screen_content_tools << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_SHIFT) &
1605       RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_MASK;
1606 
1607    result.frame_header_flags |=
1608       (pi->flags.force_integer_mv << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) &
1609       RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK;
1610 
1611    result.frame_header_flags |=
1612       (pi->pLoopFilter->flags.loop_filter_delta_enabled << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) &
1613       RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK;
1614 
1615    result.frame_header_flags |=
1616       (pi->pLoopFilter->flags.loop_filter_delta_update << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) &
1617       RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK;
1618 
1619    result.frame_header_flags |= (pi->flags.delta_q_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) &
1620                                 RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK;
1621 
1622    result.frame_header_flags |= (pi->flags.delta_lf_present << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_SHIFT) &
1623                                 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_MASK;
1624 
1625    result.frame_header_flags |= (pi->flags.reduced_tx_set << RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_SHIFT) &
1626                                 RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_MASK;
1627 
1628    result.frame_header_flags |=
1629       (pi->flags.segmentation_enabled << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_SHIFT) &
1630       RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_MASK;
1631 
1632    result.frame_header_flags |=
1633       (pi->flags.segmentation_update_map << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_SHIFT) &
1634       RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_MASK;
1635 
1636    result.frame_header_flags |=
1637       (pi->flags.segmentation_temporal_update << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
1638       RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_MASK;
1639 
1640    result.frame_header_flags |= (pi->flags.delta_lf_multi << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_SHIFT) &
1641                                 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_MASK;
1642 
1643    result.frame_header_flags |=
1644       (pi->flags.is_motion_mode_switchable << RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_SHIFT) &
1645       RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_MASK;
1646 
1647    result.frame_header_flags |= ((!intra_only_decoding ? !(pi->refresh_frame_flags) : 1)
1648                                  << RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_SHIFT) &
1649                                 RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_MASK;
1650 
1651    result.frame_header_flags |=
1652       ((!seq_hdr->flags.enable_ref_frame_mvs) << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_SHIFT) &
1653       RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_MASK;
1654 
1655    result.current_frame_id = pi->current_frame_id;
1656    result.frame_offset = pi->OrderHint;
1657    result.profile = seq_hdr->seq_profile;
1658    result.is_annexb = 0;
1659 
1660    result.frame_type = pi->frame_type;
1661    result.primary_ref_frame = pi->primary_ref_frame;
1662 
1663    const struct VkVideoDecodeAV1DpbSlotInfoKHR *setup_dpb_slot =
1664       intra_only_decoding
1665          ? NULL
1666          : vk_find_struct_const(frame_info->pSetupReferenceSlot->pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR);
1667 
1668    /* The AMD FW interface does not need this information, since it's
1669     * redundant with the information derivable from the current frame header,
1670     * which the FW is parsing and tracking.
1671     */
1672    (void)setup_dpb_slot;
1673    result.curr_pic_idx = intra_only_decoding ? 0 : frame_info->pSetupReferenceSlot->slotIndex;
1674 
1675    result.sb_size = seq_hdr->flags.use_128x128_superblock;
1676    result.interp_filter = pi->interpolation_filter;
1677    for (i = 0; i < 2; ++i)
1678       result.filter_level[i] = pi->pLoopFilter->loop_filter_level[i];
1679    result.filter_level_u = pi->pLoopFilter->loop_filter_level[2];
1680    result.filter_level_v = pi->pLoopFilter->loop_filter_level[3];
1681    result.sharpness_level = pi->pLoopFilter->loop_filter_sharpness;
1682    for (i = 0; i < 8; ++i)
1683       result.ref_deltas[i] = pi->pLoopFilter->loop_filter_ref_deltas[i];
1684    for (i = 0; i < 2; ++i)
1685       result.mode_deltas[i] = pi->pLoopFilter->loop_filter_mode_deltas[i];
1686    result.base_qindex = pi->pQuantization->base_q_idx;
1687    result.y_dc_delta_q = pi->pQuantization->DeltaQYDc;
1688    result.u_dc_delta_q = pi->pQuantization->DeltaQUDc;
1689    result.v_dc_delta_q = pi->pQuantization->DeltaQVDc;
1690    result.u_ac_delta_q = pi->pQuantization->DeltaQUAc;
1691    result.v_ac_delta_q = pi->pQuantization->DeltaQVAc;
1692 
1693    if (pi->pQuantization->flags.using_qmatrix) {
1694       result.qm_y = pi->pQuantization->qm_y | 0xf0;
1695       result.qm_u = pi->pQuantization->qm_u | 0xf0;
1696       result.qm_v = pi->pQuantization->qm_v | 0xf0;
1697    } else {
1698       result.qm_y = 0xff;
1699       result.qm_u = 0xff;
1700       result.qm_v = 0xff;
1701    }
1702    result.delta_q_res = (1 << pi->delta_q_res);
1703    result.delta_lf_res = (1 << pi->delta_lf_res);
1704    result.tile_cols = pi->pTileInfo->TileCols;
1705    result.tile_rows = pi->pTileInfo->TileRows;
1706 
1707    result.tx_mode = pi->TxMode;
1708    result.reference_mode = (pi->flags.reference_select == 1) ? 2 : 0;
1709    result.chroma_format = seq_hdr->pColorConfig->flags.mono_chrome ? 0 : 1;
1710    result.tile_size_bytes = pi->pTileInfo->tile_size_bytes_minus_1;
1711    result.context_update_tile_id = pi->pTileInfo->context_update_tile_id;
1712 
1713    for (i = 0; i < result.tile_cols; i++)
1714       result.tile_col_start_sb[i] = pi->pTileInfo->pMiColStarts[i];
1715    result.tile_col_start_sb[result.tile_cols] =
1716       result.tile_col_start_sb[result.tile_cols - 1] + pi->pTileInfo->pWidthInSbsMinus1[result.tile_cols - 1] + 1;
1717    for (i = 0; i < pi->pTileInfo->TileRows; i++)
1718       result.tile_row_start_sb[i] = pi->pTileInfo->pMiRowStarts[i];
1719    result.tile_row_start_sb[result.tile_rows] =
1720       result.tile_row_start_sb[result.tile_rows - 1] + pi->pTileInfo->pHeightInSbsMinus1[result.tile_rows - 1] + 1;
1721 
1722    result.max_width = seq_hdr->max_frame_width_minus_1 + 1;
1723    result.max_height = seq_hdr->max_frame_height_minus_1 + 1;
1724    VkExtent2D frameExtent = frame_info->dstPictureResource.codedExtent;
1725    result.superres_scale_denominator =
1726       pi->flags.use_superres ? pi->coded_denom + AV1_SUPERRES_DENOM_MIN : AV1_SUPERRES_NUM;
1727    if (pi->flags.use_superres) {
1728       result.width =
1729          (frameExtent.width * 8 + result.superres_scale_denominator / 2) / result.superres_scale_denominator;
1730    } else {
1731       result.width = frameExtent.width;
1732    }
1733    result.height = frameExtent.height;
1734 
1735    result.superres_upscaled_width = frameExtent.width;
1736 
1737    result.order_hint_bits = seq_hdr->order_hint_bits_minus_1 + 1;
1738 
1739    /* The VCN FW will evict references that aren't specified in
1740     * ref_frame_map, even if they are still valid. To prevent this we will
1741     * specify every possible reference in ref_frame_map.
1742     */
1743    uint16_t used_slots = (1 << result.curr_pic_idx);
1744    for (i = 0; i < frame_info->referenceSlotCount; i++) {
1745       const struct VkVideoDecodeAV1DpbSlotInfoKHR *ref_dpb_slot =
1746          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR);
1747       (void)ref_dpb_slot; /* Again, the FW is tracking this information for us, so no need for it. */
1748       (void)ref_dpb_slot; /* the FW is tracking this information for us, so no need for it. */
1749       int32_t slotIndex = frame_info->pReferenceSlots[i].slotIndex;
1750       result.ref_frame_map[i] = slotIndex;
1751       used_slots |= 1 << slotIndex;
1752    }
1753    /* Go through all the slots and fill in the ones that haven't been used. */
1754    for (j = 0; j < STD_VIDEO_AV1_NUM_REF_FRAMES + 1; j++) {
1755       if ((used_slots & (1 << j)) == 0) {
1756          result.ref_frame_map[i] = j;
1757          used_slots |= 1 << j;
1758          i++;
1759       }
1760    }
1761 
1762    assert(used_slots == 0x1ff && i == STD_VIDEO_AV1_NUM_REF_FRAMES);
1763 
1764    for (i = 0; i < STD_VIDEO_AV1_REFS_PER_FRAME; ++i) {
1765       result.frame_refs[i] =
1766          av1_pic_info->referenceNameSlotIndices[i] == -1 ? 0x7f : av1_pic_info->referenceNameSlotIndices[i];
1767    }
1768 
1769    result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = seq_hdr->pColorConfig->BitDepth - 8;
1770 
1771    int16_t *feature_data = (int16_t *)probs_ptr;
1772    int fd_idx = 0;
1773    for (i = 0; i < 8; ++i) {
1774       result.feature_mask[i] = pi->pSegmentation->FeatureEnabled[i];
1775       for (j = 0; j < 8; ++j) {
1776          result.feature_data[i][j] = pi->pSegmentation->FeatureData[i][j];
1777          feature_data[fd_idx++] = result.feature_data[i][j];
1778       }
1779    }
1780 
1781    memcpy(((char *)probs_ptr + 128), result.feature_mask, 8);
1782    result.cdef_damping = pi->pCDEF->cdef_damping_minus_3 + 3;
1783    result.cdef_bits = pi->pCDEF->cdef_bits;
1784    for (i = 0; i < 8; ++i) {
1785       result.cdef_strengths[i] = (pi->pCDEF->cdef_y_pri_strength[i] << 2) + pi->pCDEF->cdef_y_sec_strength[i];
1786       result.cdef_uv_strengths[i] = (pi->pCDEF->cdef_uv_pri_strength[i] << 2) + pi->pCDEF->cdef_uv_sec_strength[i];
1787    }
1788 
1789    if (pi->flags.UsesLr) {
1790       for (int plane = 0; plane < STD_VIDEO_AV1_MAX_NUM_PLANES; plane++) {
1791          result.frame_restoration_type[plane] = pi->pLoopRestoration->FrameRestorationType[plane];
1792          result.log2_restoration_unit_size_minus5[plane] = pi->pLoopRestoration->LoopRestorationSize[plane];
1793       }
1794    }
1795 
1796    if (seq_hdr->pColorConfig->BitDepth > 8) {
1797       if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 ||
1798           vid->vk.picture_format == VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) {
1799          result.p010_mode = 1;
1800          result.msb_mode = 1;
1801       } else {
1802          result.luma_10to8 = 1;
1803          result.chroma_10to8 = 1;
1804       }
1805    }
1806 
1807    result.preskip_segid = 0;
1808    result.last_active_segid = 0;
1809    for (i = 0; i < 8; i++) {
1810       for (j = 0; j < 8; j++) {
1811          if (result.feature_mask[i] & (1 << j)) {
1812             result.last_active_segid = i;
1813             if (j >= 5)
1814                result.preskip_segid = 1;
1815          }
1816       }
1817    }
1818    result.seg_lossless_flag = 0;
1819    for (i = 0; i < 8; ++i) {
1820       int av1_get_qindex, qindex;
1821       int segfeature_active = result.feature_mask[i] & (1 << 0);
1822       if (segfeature_active) {
1823          int seg_qindex = result.base_qindex + result.feature_data[i][0];
1824          av1_get_qindex = seg_qindex < 0 ? 0 : (seg_qindex > 255 ? 255 : seg_qindex);
1825       } else {
1826          av1_get_qindex = result.base_qindex;
1827       }
1828       qindex = pi->flags.segmentation_enabled ? av1_get_qindex : result.base_qindex;
1829       result.seg_lossless_flag |= (((qindex == 0) && result.y_dc_delta_q == 0 && result.u_dc_delta_q == 0 &&
1830                                     result.v_dc_delta_q == 0 && result.u_ac_delta_q == 0 && result.v_ac_delta_q == 0)
1831                                    << i);
1832    }
1833 
1834    rvcn_dec_film_grain_params_t *fg_params = &result.film_grain;
1835    fg_params->apply_grain = pi->flags.apply_grain;
1836    if (fg_params->apply_grain) {
1837       rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)((char *)probs_ptr + 256);
1838       fg_params->random_seed = pi->pFilmGrain->grain_seed;
1839       fg_params->grain_scale_shift = pi->pFilmGrain->grain_scale_shift;
1840       fg_params->scaling_shift = pi->pFilmGrain->grain_scaling_minus_8 + 8;
1841       fg_params->chroma_scaling_from_luma = pi->pFilmGrain->flags.chroma_scaling_from_luma;
1842       fg_params->num_y_points = pi->pFilmGrain->num_y_points;
1843       fg_params->num_cb_points = pi->pFilmGrain->num_cb_points;
1844       fg_params->num_cr_points = pi->pFilmGrain->num_cr_points;
1845       fg_params->cb_mult = pi->pFilmGrain->cb_mult;
1846       fg_params->cb_luma_mult = pi->pFilmGrain->cb_luma_mult;
1847       fg_params->cb_offset = pi->pFilmGrain->cb_offset;
1848       fg_params->cr_mult = pi->pFilmGrain->cr_mult;
1849       fg_params->cr_luma_mult = pi->pFilmGrain->cr_luma_mult;
1850       fg_params->cr_offset = pi->pFilmGrain->cr_offset;
1851       fg_params->bit_depth_minus_8 = result.bit_depth_luma_minus8;
1852       for (i = 0; i < fg_params->num_y_points; ++i) {
1853          fg_params->scaling_points_y[i][0] = pi->pFilmGrain->point_y_value[i];
1854          fg_params->scaling_points_y[i][1] = pi->pFilmGrain->point_y_scaling[i];
1855       }
1856       for (i = 0; i < fg_params->num_cb_points; ++i) {
1857          fg_params->scaling_points_cb[i][0] = pi->pFilmGrain->point_cb_value[i];
1858          fg_params->scaling_points_cb[i][1] = pi->pFilmGrain->point_cb_scaling[i];
1859       }
1860       for (i = 0; i < fg_params->num_cr_points; ++i) {
1861          fg_params->scaling_points_cr[i][0] = pi->pFilmGrain->point_cr_value[i];
1862          fg_params->scaling_points_cr[i][1] = pi->pFilmGrain->point_cr_scaling[i];
1863       }
1864 
1865       fg_params->ar_coeff_lag = pi->pFilmGrain->ar_coeff_lag;
1866       fg_params->ar_coeff_shift = pi->pFilmGrain->ar_coeff_shift_minus_6 + 6;
1867 
1868       for (i = 0; i < 24; ++i)
1869          fg_params->ar_coeffs_y[i] = pi->pFilmGrain->ar_coeffs_y_plus_128[i] - 128;
1870 
1871       for (i = 0; i < 25; ++i) {
1872          fg_params->ar_coeffs_cb[i] = pi->pFilmGrain->ar_coeffs_cb_plus_128[i] - 128;
1873          fg_params->ar_coeffs_cr[i] = pi->pFilmGrain->ar_coeffs_cr_plus_128[i] - 128;
1874       }
1875 
1876       fg_params->overlap_flag = pi->pFilmGrain->flags.overlap_flag;
1877       fg_params->clip_to_restricted_range = pi->pFilmGrain->flags.clip_to_restricted_range;
1878       ac_vcn_av1_init_film_grain_buffer(fg_params, fg_buf);
1879    }
1880 
1881    result.uncompressed_header_size = 0;
1882    for (i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; ++i) {
1883       result.global_motion[i].wmtype = pi->pGlobalMotion->GmType[i];
1884       for (j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; ++j)
1885          result.global_motion[i].wmmat[j] = pi->pGlobalMotion->gm_params[i][j];
1886    }
1887    for (i = 0; i < av1_pic_info->tileCount && i < 256; ++i) {
1888       result.tile_info[i].offset = av1_pic_info->pTileOffsets[i];
1889       result.tile_info[i].size = av1_pic_info->pTileSizes[i];
1890    }
1891 
1892    return result;
1893 }
1894 
1895 
1896 static bool
rvcn_dec_message_decode(struct radv_cmd_buffer * cmd_buffer,struct radv_video_session * vid,struct radv_video_session_params * params,void * ptr,void * it_probs_ptr,uint32_t * slice_offset,const struct VkVideoDecodeInfoKHR * frame_info)1897 rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_session *vid,
1898                         struct radv_video_session_params *params, void *ptr, void *it_probs_ptr, uint32_t *slice_offset,
1899                         const struct VkVideoDecodeInfoKHR *frame_info)
1900 {
1901    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1902    const struct radv_physical_device *pdev = radv_device_physical(device);
1903    rvcn_dec_message_header_t *header;
1904    rvcn_dec_message_index_t *index_codec;
1905    rvcn_dec_message_decode_t *decode;
1906    rvcn_dec_message_index_t *index_dynamic_dpb = NULL;
1907    rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
1908    void *codec;
1909    unsigned sizes = 0, offset_decode, offset_codec, offset_dynamic_dpb;
1910    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
1911    struct radv_image *img = dst_iv->image;
1912    struct radv_image_plane *luma = &img->planes[0];
1913    struct radv_image_plane *chroma = &img->planes[1];
1914 
1915    header = ptr;
1916    sizes += sizeof(rvcn_dec_message_header_t);
1917 
1918    index_codec = (void *)((char *)header + sizes);
1919    sizes += sizeof(rvcn_dec_message_index_t);
1920 
1921    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
1922       index_dynamic_dpb = (void *)((char *)header + sizes);
1923       sizes += sizeof(rvcn_dec_message_index_t);
1924    }
1925 
1926    offset_decode = sizes;
1927    decode = (void *)((char *)header + sizes);
1928    sizes += sizeof(rvcn_dec_message_decode_t);
1929 
1930    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
1931       offset_dynamic_dpb = sizes;
1932       dynamic_dpb_t2 = (void *)((char *)header + sizes);
1933       sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
1934    }
1935 
1936    offset_codec = sizes;
1937    codec = (void *)((char *)header + sizes);
1938 
1939    memset(ptr, 0, sizes);
1940 
1941    header->header_size = sizeof(rvcn_dec_message_header_t);
1942    header->total_size = sizes;
1943    header->msg_type = RDECODE_MSG_DECODE;
1944    header->stream_handle = vid->stream_handle;
1945    header->status_report_feedback_number = vid->dbg_frame_cnt++;
1946 
1947    header->index[0].message_id = RDECODE_MESSAGE_DECODE;
1948    header->index[0].offset = offset_decode;
1949    header->index[0].size = sizeof(rvcn_dec_message_decode_t);
1950    header->index[0].filled = 0;
1951    header->num_buffers = 1;
1952 
1953    index_codec->offset = offset_codec;
1954    index_codec->filled = 0;
1955    ++header->num_buffers;
1956 
1957    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
1958       index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
1959       index_dynamic_dpb->offset = offset_dynamic_dpb;
1960       index_dynamic_dpb->filled = 0;
1961       ++header->num_buffers;
1962       index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
1963    }
1964 
1965    decode->stream_type = vid->stream_type;
1966    decode->decode_flags = 0;
1967    decode->width_in_samples = frame_info->dstPictureResource.codedExtent.width;
1968    decode->height_in_samples = frame_info->dstPictureResource.codedExtent.height;
1969 
1970    decode->bsd_size = frame_info->srcBufferRange;
1971 
1972    decode->dt_size = dst_iv->image->planes[0].surface.total_size + dst_iv->image->planes[1].surface.total_size;
1973    decode->sct_size = 0;
1974    decode->sc_coeff_size = 0;
1975 
1976    decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
1977 
1978    decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
1979    decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
1980 
1981    if (luma->surface.meta_offset) {
1982       fprintf(stderr, "DCC SURFACES NOT SUPPORTED.\n");
1983       return false;
1984    }
1985 
1986    decode->dt_tiling_mode = 0;
1987    decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
1988    decode->dt_array_mode = pdev->vid_addr_gfx_mode;
1989    decode->dt_field_mode = 0;
1990    decode->dt_surf_tile_config = 0;
1991    decode->dt_uv_surf_tile_config = 0;
1992 
1993    int dt_array_idx = frame_info->dstPictureResource.baseArrayLayer + dst_iv->vk.base_array_layer;
1994 
1995    decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset +
1996       dt_array_idx * luma->surface.u.gfx9.surf_slice_size;
1997    decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset +
1998       dt_array_idx * chroma->surface.u.gfx9.surf_slice_size;
1999    decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
2000    decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
2001 
2002    if (vid->stream_type == RDECODE_CODEC_AV1)
2003       decode->db_pitch_uv = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
2004 
2005    *slice_offset = 0;
2006 
2007    /* Intra-only decoding will only work without a setup slot for AV1
2008     * (non-filmgrain) currently, other codecs require the application to pass a
2009     * setup slot for this use-case, since the FW is not able to skip write-out
2010     * for H26X. In order to fix that properly, additional scratch space will
2011     * be needed in the video session just for intra-only DPB targets.
2012     */
2013    int dpb_update_required = 1;
2014 
2015    switch (vid->vk.op) {
2016    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
2017       index_codec->size = sizeof(rvcn_dec_message_avc_t);
2018       rvcn_dec_message_avc_t avc = get_h264_msg(vid, params, frame_info, slice_offset, &decode->width_in_samples,
2019                                                 &decode->height_in_samples, it_probs_ptr);
2020       memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t));
2021       index_codec->message_id = RDECODE_MESSAGE_AVC;
2022       break;
2023    }
2024    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
2025       index_codec->size = sizeof(rvcn_dec_message_hevc_t);
2026       rvcn_dec_message_hevc_t hevc = get_h265_msg(device, vid, params, frame_info,
2027                                                   &decode->width_in_samples,
2028                                                   &decode->height_in_samples,
2029                                                   it_probs_ptr);
2030       memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t));
2031       index_codec->message_id = RDECODE_MESSAGE_HEVC;
2032       break;
2033    }
2034    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: {
2035       index_codec->size = sizeof(rvcn_dec_message_av1_t);
2036       rvcn_dec_message_av1_t av1 = get_av1_msg(device, vid, params, frame_info, it_probs_ptr, &dpb_update_required);
2037       memcpy(codec, (void *)&av1, sizeof(rvcn_dec_message_av1_t));
2038       index_codec->message_id = RDECODE_MESSAGE_AV1;
2039       assert(frame_info->referenceSlotCount < 9);
2040       break;
2041    }
2042    default:
2043       unreachable("unknown operation");
2044    }
2045 
2046    if (dpb_update_required)
2047       assert(frame_info->pSetupReferenceSlot != NULL);
2048 
2049    struct radv_image_view *dpb_iv =
2050       frame_info->pSetupReferenceSlot
2051          ? radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding)
2052          : NULL;
2053    struct radv_image *dpb = dpb_iv ? dpb_iv->image : img;
2054 
2055    int dpb_array_idx = 0;
2056    if (dpb_update_required)
2057       dpb_array_idx = frame_info->pSetupReferenceSlot->pPictureResource->baseArrayLayer + dpb_iv->vk.base_array_layer;
2058 
2059    decode->dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
2060    decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
2061    decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height;
2062    decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode;
2063    decode->db_array_mode = pdev->vid_addr_gfx_mode;
2064 
2065    decode->hw_ctxt_size = vid->ctx.size;
2066 
2067    if (vid->dpb_type != DPB_DYNAMIC_TIER_2)
2068       return true;
2069 
2070    uint64_t addr;
2071    radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
2072    addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
2073 
2074    addr += dpb_array_idx * (dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size);
2075    dynamic_dpb_t2->dpbCurrLo = addr;
2076    dynamic_dpb_t2->dpbCurrHi = addr >> 32;
2077 
2078    if (vid->vk.op == VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR) {
2079       /* The following loop will fill in the references for the current frame,
2080        * this ensures all DPB addresses are "valid" (pointing at the current
2081        * decode target), so that the firmware doesn't evict things it should not.
2082        * It will not perform any actual writes to these dummy slots.
2083        */
2084       for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) {
2085          dynamic_dpb_t2->dpbAddrHi[i] = addr;
2086          dynamic_dpb_t2->dpbAddrLo[i] = addr >> 32;
2087       }
2088    }
2089 
2090    for (int i = 0; i < frame_info->referenceSlotCount; i++) {
2091       int32_t slot_idx = frame_info->pReferenceSlots[i].slotIndex;
2092       assert(slot_idx >= 0 && slot_idx < 16);
2093       struct radv_image_view *f_dpb_iv =
2094          radv_image_view_from_handle(frame_info->pReferenceSlots[i].pPictureResource->imageViewBinding);
2095       assert(f_dpb_iv != NULL);
2096       struct radv_image *dpb_img = f_dpb_iv->image;
2097       int f_dpb_array_idx = frame_info->pReferenceSlots[i].pPictureResource->baseArrayLayer + f_dpb_iv->vk.base_array_layer;
2098 
2099       radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb_img->bindings[0].bo);
2100       addr = radv_buffer_get_va(dpb_img->bindings[0].bo) + dpb_img->bindings[0].offset;
2101       addr += f_dpb_array_idx * (dpb_img->planes[0].surface.u.gfx9.surf_slice_size + dpb_img->planes[1].surface.u.gfx9.surf_slice_size);
2102       dynamic_dpb_t2->dpbAddrLo[i] = addr;
2103       dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
2104 
2105       ++dynamic_dpb_t2->dpbArraySize;
2106    }
2107 
2108    radv_cs_add_buffer(device->ws, cmd_buffer->cs, dpb->bindings[0].bo);
2109    addr = radv_buffer_get_va(dpb->bindings[0].bo) + dpb->bindings[0].offset;
2110    addr += dpb_array_idx * (dpb->planes[0].surface.u.gfx9.surf_slice_size + dpb->planes[1].surface.u.gfx9.surf_slice_size);
2111    dynamic_dpb_t2->dpbCurrLo = addr;
2112    dynamic_dpb_t2->dpbCurrHi = addr >> 32;
2113 
2114    decode->decode_flags = 1;
2115    dynamic_dpb_t2->dpbConfigFlags = 0;
2116 
2117    dynamic_dpb_t2->dpbLumaPitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
2118    dynamic_dpb_t2->dpbLumaAlignedHeight = dpb->planes[0].surface.u.gfx9.surf_height;
2119    dynamic_dpb_t2->dpbLumaAlignedSize = dpb->planes[0].surface.u.gfx9.surf_slice_size;
2120 
2121    dynamic_dpb_t2->dpbChromaPitch = dpb->planes[1].surface.u.gfx9.surf_pitch;
2122    dynamic_dpb_t2->dpbChromaAlignedHeight = dpb->planes[1].surface.u.gfx9.surf_height;
2123    dynamic_dpb_t2->dpbChromaAlignedSize = dpb->planes[1].surface.u.gfx9.surf_slice_size;
2124 
2125    return true;
2126 }
2127 
2128 static struct ruvd_h264
get_uvd_h264_msg(struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * slice_offset,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)2129 get_uvd_h264_msg(struct radv_video_session *vid, struct radv_video_session_params *params,
2130                  const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *slice_offset, uint32_t *width_in_samples,
2131                  uint32_t *height_in_samples, void *it_ptr)
2132 {
2133    struct ruvd_h264 result;
2134    const struct VkVideoDecodeH264PictureInfoKHR *h264_pic_info =
2135       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H264_PICTURE_INFO_KHR);
2136 
2137    *slice_offset = h264_pic_info->pSliceOffsets[0];
2138 
2139    memset(&result, 0, sizeof(result));
2140 
2141    const StdVideoH264SequenceParameterSet *sps =
2142       vk_video_find_h264_dec_std_sps(&params->vk, h264_pic_info->pStdPictureInfo->seq_parameter_set_id);
2143    switch (sps->profile_idc) {
2144    case STD_VIDEO_H264_PROFILE_IDC_BASELINE:
2145       result.profile = RUVD_H264_PROFILE_BASELINE;
2146       break;
2147    case STD_VIDEO_H264_PROFILE_IDC_MAIN:
2148       result.profile = RUVD_H264_PROFILE_MAIN;
2149       break;
2150    case STD_VIDEO_H264_PROFILE_IDC_HIGH:
2151       result.profile = RUVD_H264_PROFILE_HIGH;
2152       break;
2153    default:
2154       fprintf(stderr, "UNSUPPORTED CODEC %d\n", sps->profile_idc);
2155       result.profile = RUVD_H264_PROFILE_MAIN;
2156       break;
2157    }
2158 
2159    *width_in_samples = (sps->pic_width_in_mbs_minus1 + 1) * 16;
2160    *height_in_samples = (sps->pic_height_in_map_units_minus1 + 1) * 16;
2161    if (!sps->flags.frame_mbs_only_flag)
2162       *height_in_samples *= 2;
2163    result.level = get_h264_level(sps->level_idc);
2164 
2165    result.sps_info_flags = 0;
2166 
2167    result.sps_info_flags |= sps->flags.direct_8x8_inference_flag << 0;
2168    result.sps_info_flags |= sps->flags.mb_adaptive_frame_field_flag << 1;
2169    result.sps_info_flags |= sps->flags.frame_mbs_only_flag << 2;
2170    result.sps_info_flags |= sps->flags.delta_pic_order_always_zero_flag << 3;
2171    result.sps_info_flags |= 1 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
2172 
2173    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2174    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2175    result.log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
2176    result.pic_order_cnt_type = sps->pic_order_cnt_type;
2177    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
2178 
2179    result.chroma_format = sps->chroma_format_idc;
2180 
2181    const StdVideoH264PictureParameterSet *pps =
2182       vk_video_find_h264_dec_std_pps(&params->vk, h264_pic_info->pStdPictureInfo->pic_parameter_set_id);
2183    result.pps_info_flags = 0;
2184    result.pps_info_flags |= pps->flags.transform_8x8_mode_flag << 0;
2185    result.pps_info_flags |= pps->flags.redundant_pic_cnt_present_flag << 1;
2186    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 2;
2187    result.pps_info_flags |= pps->flags.deblocking_filter_control_present_flag << 3;
2188    result.pps_info_flags |= pps->weighted_bipred_idc << 4;
2189    result.pps_info_flags |= pps->flags.weighted_pred_flag << 6;
2190    result.pps_info_flags |= pps->flags.bottom_field_pic_order_in_frame_present_flag << 7;
2191    result.pps_info_flags |= pps->flags.entropy_coding_mode_flag << 8;
2192 
2193    result.pic_init_qp_minus26 = pps->pic_init_qp_minus26;
2194    result.chroma_qp_index_offset = pps->chroma_qp_index_offset;
2195    result.second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
2196 
2197    StdVideoH264ScalingLists scaling_lists;
2198    vk_video_derive_h264_scaling_list(sps, pps, &scaling_lists);
2199    update_h264_scaling(result.scaling_list_4x4, result.scaling_list_8x8, &scaling_lists);
2200 
2201    memset(it_ptr, 0, IT_SCALING_TABLE_SIZE);
2202    memcpy(it_ptr, result.scaling_list_4x4, 6 * 16);
2203    memcpy((char *)it_ptr + 96, result.scaling_list_8x8, 2 * 64);
2204 
2205    result.num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2206    result.num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2207 
2208    result.curr_field_order_cnt_list[0] = h264_pic_info->pStdPictureInfo->PicOrderCnt[0];
2209    result.curr_field_order_cnt_list[1] = h264_pic_info->pStdPictureInfo->PicOrderCnt[1];
2210 
2211    result.frame_num = h264_pic_info->pStdPictureInfo->frame_num;
2212 
2213    result.num_ref_frames = sps->max_num_ref_frames;
2214    memset(result.ref_frame_list, 0xff, sizeof(unsigned char) * 16);
2215    memset(result.frame_num_list, 0, sizeof(unsigned int) * 16);
2216    for (unsigned i = 0; i < frame_info->referenceSlotCount; i++) {
2217       int idx = frame_info->pReferenceSlots[i].slotIndex;
2218       const struct VkVideoDecodeH264DpbSlotInfoKHR *dpb_slot =
2219          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR);
2220 
2221       result.frame_num_list[i] = dpb_slot->pStdReferenceInfo->FrameNum;
2222       result.field_order_cnt_list[i][0] = dpb_slot->pStdReferenceInfo->PicOrderCnt[0];
2223       result.field_order_cnt_list[i][1] = dpb_slot->pStdReferenceInfo->PicOrderCnt[1];
2224 
2225       result.ref_frame_list[i] = idx;
2226 
2227       if (dpb_slot->pStdReferenceInfo->flags.used_for_long_term_reference)
2228          result.ref_frame_list[i] |= 0x80;
2229    }
2230    result.curr_pic_ref_frame_num = frame_info->referenceSlotCount;
2231    result.decoded_pic_idx = frame_info->pSetupReferenceSlot->slotIndex;
2232 
2233    return result;
2234 }
2235 
2236 static struct ruvd_h265
get_uvd_h265_msg(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,const struct VkVideoDecodeInfoKHR * frame_info,uint32_t * width_in_samples,uint32_t * height_in_samples,void * it_ptr)2237 get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct radv_video_session_params *params,
2238                  const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples,
2239                  uint32_t *height_in_samples, void *it_ptr)
2240 {
2241    const struct radv_physical_device *pdev = radv_device_physical(device);
2242    struct ruvd_h265 result;
2243    int i, j;
2244    const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
2245       vk_find_struct_const(frame_info->pNext, VIDEO_DECODE_H265_PICTURE_INFO_KHR);
2246 
2247    memset(&result, 0, sizeof(result));
2248 
2249    const StdVideoH265SequenceParameterSet *sps =
2250       vk_video_find_h265_dec_std_sps(&params->vk, h265_pic_info->pStdPictureInfo->pps_seq_parameter_set_id);
2251    const StdVideoH265PictureParameterSet *pps =
2252       vk_video_find_h265_dec_std_pps(&params->vk, h265_pic_info->pStdPictureInfo->pps_pic_parameter_set_id);
2253 
2254    result.sps_info_flags = 0;
2255    result.sps_info_flags |= sps->flags.scaling_list_enabled_flag << 0;
2256    result.sps_info_flags |= sps->flags.amp_enabled_flag << 1;
2257    result.sps_info_flags |= sps->flags.sample_adaptive_offset_enabled_flag << 2;
2258    result.sps_info_flags |= sps->flags.pcm_enabled_flag << 3;
2259    result.sps_info_flags |= sps->flags.pcm_loop_filter_disabled_flag << 4;
2260    result.sps_info_flags |= sps->flags.long_term_ref_pics_present_flag << 5;
2261    result.sps_info_flags |= sps->flags.sps_temporal_mvp_enabled_flag << 6;
2262    result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
2263    result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
2264 
2265    if (pdev->info.family == CHIP_CARRIZO)
2266       result.sps_info_flags |= 1 << 9;
2267 
2268    *width_in_samples = sps->pic_width_in_luma_samples;
2269    *height_in_samples = sps->pic_height_in_luma_samples;
2270    result.chroma_format = sps->chroma_format_idc;
2271    result.bit_depth_luma_minus8 = sps->bit_depth_luma_minus8;
2272    result.bit_depth_chroma_minus8 = sps->bit_depth_chroma_minus8;
2273    result.log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
2274    result.sps_max_dec_pic_buffering_minus1 =
2275       sps->pDecPicBufMgr->max_dec_pic_buffering_minus1[sps->sps_max_sub_layers_minus1];
2276    result.log2_min_luma_coding_block_size_minus3 = sps->log2_min_luma_coding_block_size_minus3;
2277    result.log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_luma_coding_block_size;
2278    result.log2_min_transform_block_size_minus2 = sps->log2_min_luma_transform_block_size_minus2;
2279    result.log2_diff_max_min_transform_block_size = sps->log2_diff_max_min_luma_transform_block_size;
2280    result.max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter;
2281    result.max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra;
2282    if (sps->flags.pcm_enabled_flag) {
2283       result.pcm_sample_bit_depth_luma_minus1 = sps->pcm_sample_bit_depth_luma_minus1;
2284       result.pcm_sample_bit_depth_chroma_minus1 = sps->pcm_sample_bit_depth_chroma_minus1;
2285       result.log2_min_pcm_luma_coding_block_size_minus3 = sps->log2_min_pcm_luma_coding_block_size_minus3;
2286       result.log2_diff_max_min_pcm_luma_coding_block_size = sps->log2_diff_max_min_pcm_luma_coding_block_size;
2287    }
2288    result.num_short_term_ref_pic_sets = sps->num_short_term_ref_pic_sets;
2289 
2290    result.pps_info_flags = 0;
2291    result.pps_info_flags |= pps->flags.dependent_slice_segments_enabled_flag << 0;
2292    result.pps_info_flags |= pps->flags.output_flag_present_flag << 1;
2293    result.pps_info_flags |= pps->flags.sign_data_hiding_enabled_flag << 2;
2294    result.pps_info_flags |= pps->flags.cabac_init_present_flag << 3;
2295    result.pps_info_flags |= pps->flags.constrained_intra_pred_flag << 4;
2296    result.pps_info_flags |= pps->flags.transform_skip_enabled_flag << 5;
2297    result.pps_info_flags |= pps->flags.cu_qp_delta_enabled_flag << 6;
2298    result.pps_info_flags |= pps->flags.pps_slice_chroma_qp_offsets_present_flag << 7;
2299    result.pps_info_flags |= pps->flags.weighted_pred_flag << 8;
2300    result.pps_info_flags |= pps->flags.weighted_bipred_flag << 9;
2301    result.pps_info_flags |= pps->flags.transquant_bypass_enabled_flag << 10;
2302    result.pps_info_flags |= pps->flags.tiles_enabled_flag << 11;
2303    result.pps_info_flags |= pps->flags.entropy_coding_sync_enabled_flag << 12;
2304    result.pps_info_flags |= pps->flags.uniform_spacing_flag << 13;
2305    result.pps_info_flags |= pps->flags.loop_filter_across_tiles_enabled_flag << 14;
2306    result.pps_info_flags |= pps->flags.pps_loop_filter_across_slices_enabled_flag << 15;
2307    result.pps_info_flags |= pps->flags.deblocking_filter_override_enabled_flag << 16;
2308    result.pps_info_flags |= pps->flags.pps_deblocking_filter_disabled_flag << 17;
2309    result.pps_info_flags |= pps->flags.lists_modification_present_flag << 18;
2310    result.pps_info_flags |= pps->flags.slice_segment_header_extension_present_flag << 19;
2311 
2312    result.num_extra_slice_header_bits = pps->num_extra_slice_header_bits;
2313    result.num_long_term_ref_pic_sps = sps->num_long_term_ref_pics_sps;
2314    result.num_ref_idx_l0_default_active_minus1 = pps->num_ref_idx_l0_default_active_minus1;
2315    result.num_ref_idx_l1_default_active_minus1 = pps->num_ref_idx_l1_default_active_minus1;
2316    result.pps_cb_qp_offset = pps->pps_cb_qp_offset;
2317    result.pps_cr_qp_offset = pps->pps_cr_qp_offset;
2318    result.pps_beta_offset_div2 = pps->pps_beta_offset_div2;
2319    result.pps_tc_offset_div2 = pps->pps_tc_offset_div2;
2320    result.diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth;
2321    result.num_tile_columns_minus1 = pps->num_tile_columns_minus1;
2322    result.num_tile_rows_minus1 = pps->num_tile_rows_minus1;
2323    result.log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level_minus2;
2324    result.init_qp_minus26 = pps->init_qp_minus26;
2325 
2326    for (i = 0; i < 19; ++i)
2327       result.column_width_minus1[i] = pps->column_width_minus1[i];
2328 
2329    for (i = 0; i < 21; ++i)
2330       result.row_height_minus1[i] = pps->row_height_minus1[i];
2331 
2332    result.num_delta_pocs_ref_rps_idx = h265_pic_info->pStdPictureInfo->NumDeltaPocsOfRefRpsIdx;
2333    result.curr_poc = h265_pic_info->pStdPictureInfo->PicOrderCntVal;
2334 
2335    uint8_t idxs[16];
2336    memset(result.poc_list, 0, 16 * sizeof(int));
2337    memset(result.ref_pic_list, 0x7f, 16);
2338    memset(idxs, 0xff, 16);
2339    for (i = 0; i < frame_info->referenceSlotCount; i++) {
2340       const struct VkVideoDecodeH265DpbSlotInfoKHR *dpb_slot =
2341          vk_find_struct_const(frame_info->pReferenceSlots[i].pNext, VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR);
2342       int idx = frame_info->pReferenceSlots[i].slotIndex;
2343       result.poc_list[i] = dpb_slot->pStdReferenceInfo->PicOrderCntVal;
2344       result.ref_pic_list[i] = idx;
2345       idxs[idx] = i;
2346    }
2347    result.curr_idx = frame_info->pSetupReferenceSlot->slotIndex;
2348 
2349 #define IDXS(x) ((x) == 0xff ? 0xff : idxs[(x)])
2350    for (i = 0; i < 8; ++i)
2351       result.ref_pic_set_st_curr_before[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrBefore[i]);
2352 
2353    for (i = 0; i < 8; ++i)
2354       result.ref_pic_set_st_curr_after[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetStCurrAfter[i]);
2355 
2356    for (i = 0; i < 8; ++i)
2357       result.ref_pic_set_lt_curr[i] = IDXS(h265_pic_info->pStdPictureInfo->RefPicSetLtCurr[i]);
2358 
2359    const StdVideoH265ScalingLists *scaling_lists = NULL;
2360    if (pps->flags.pps_scaling_list_data_present_flag)
2361       scaling_lists = pps->pScalingLists;
2362    else if (sps->flags.sps_scaling_list_data_present_flag)
2363       scaling_lists = sps->pScalingLists;
2364 
2365    update_h265_scaling(it_ptr, scaling_lists);
2366    if (scaling_lists) {
2367       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS; ++i)
2368          result.ucScalingListDCCoefSizeID2[i] = scaling_lists->ScalingListDCCoef16x16[i];
2369 
2370       for (i = 0; i < STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS; ++i)
2371          result.ucScalingListDCCoefSizeID3[i] = scaling_lists->ScalingListDCCoef32x32[i];
2372    }
2373 
2374    for (i = 0; i < 2; i++) {
2375       for (j = 0; j < 15; j++)
2376          result.direct_reflist[i][j] = 0xff;
2377    }
2378 
2379    if (vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10) {
2380       if (vid->vk.picture_format == VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) {
2381          result.p010_mode = 1;
2382          result.msb_mode = 1;
2383       } else {
2384          result.p010_mode = 0;
2385          result.luma_10to8 = 5;
2386          result.chroma_10to8 = 5;
2387          result.sclr_luma10to8 = 4;
2388          result.sclr_chroma10to8 = 4;
2389       }
2390    }
2391 
2392    return result;
2393 }
2394 
2395 static unsigned
texture_offset_legacy(struct radeon_surf * surface,unsigned layer)2396 texture_offset_legacy(struct radeon_surf *surface, unsigned layer)
2397 {
2398    return (uint64_t)surface->u.legacy.level[0].offset_256B * 256 +
2399           layer * (uint64_t)surface->u.legacy.level[0].slice_size_dw * 4;
2400 }
2401 
2402 static bool
ruvd_dec_message_decode(struct radv_device * device,struct radv_video_session * vid,struct radv_video_session_params * params,void * ptr,void * it_ptr,uint32_t * slice_offset,const struct VkVideoDecodeInfoKHR * frame_info)2403 ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *vid,
2404                         struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
2405                         const struct VkVideoDecodeInfoKHR *frame_info)
2406 {
2407    const struct radv_physical_device *pdev = radv_device_physical(device);
2408    struct ruvd_msg *msg = ptr;
2409    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2410    struct radv_image *img = dst_iv->image;
2411    struct radv_image_plane *luma = &img->planes[0];
2412    struct radv_image_plane *chroma = &img->planes[1];
2413    struct radv_image_view *dpb_iv =
2414       radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2415    struct radv_image *dpb = dpb_iv->image;
2416 
2417    memset(msg, 0, sizeof(struct ruvd_msg));
2418    msg->size = sizeof(*msg);
2419    msg->msg_type = RUVD_MSG_DECODE;
2420    msg->stream_handle = vid->stream_handle;
2421    msg->status_report_feedback_number = vid->dbg_frame_cnt++;
2422 
2423    msg->body.decode.stream_type = vid->stream_type;
2424    msg->body.decode.decode_flags = 0x1;
2425    msg->body.decode.width_in_samples = frame_info->dstPictureResource.codedExtent.width;
2426    msg->body.decode.height_in_samples = frame_info->dstPictureResource.codedExtent.height;
2427 
2428    msg->body.decode.dpb_size = (vid->dpb_type != DPB_DYNAMIC_TIER_2) ? dpb->size : 0;
2429    msg->body.decode.bsd_size = frame_info->srcBufferRange;
2430    msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment);
2431 
2432    if (vid->stream_type == RUVD_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10)
2433       msg->body.decode.dpb_reserved = vid->ctx.size;
2434 
2435    *slice_offset = 0;
2436    switch (vid->vk.op) {
2437    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: {
2438       msg->body.decode.codec.h264 =
2439          get_uvd_h264_msg(vid, params, frame_info, slice_offset, &msg->body.decode.width_in_samples,
2440                           &msg->body.decode.height_in_samples, it_ptr);
2441       break;
2442    }
2443    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: {
2444       msg->body.decode.codec.h265 = get_uvd_h265_msg(device, vid, params, frame_info,
2445                                                      &msg->body.decode.width_in_samples,
2446                                                      &msg->body.decode.height_in_samples,
2447                                                      it_ptr);
2448 
2449       if (vid->ctx.mem)
2450          msg->body.decode.dpb_reserved = vid->ctx.size;
2451       break;
2452    }
2453    default:
2454       return false;
2455    }
2456 
2457    msg->body.decode.dt_field_mode = false;
2458 
2459    int dt_array_idx = frame_info->dstPictureResource.baseArrayLayer + dst_iv->vk.base_array_layer;
2460 
2461    if (pdev->info.gfx_level >= GFX9) {
2462       msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
2463       msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
2464       msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
2465       msg->body.decode.dt_luma_top_offset = luma->surface.u.gfx9.surf_offset +
2466          dt_array_idx * luma->surface.u.gfx9.surf_slice_size;
2467       msg->body.decode.dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset +
2468          dt_array_idx * chroma->surface.u.gfx9.surf_slice_size;
2469       msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
2470       msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
2471       msg->body.decode.dt_surf_tile_config = 0;
2472    } else {
2473       msg->body.decode.dt_pitch = luma->surface.u.legacy.level[0].nblk_x * luma->surface.blk_w;
2474       switch (luma->surface.u.legacy.level[0].mode) {
2475       case RADEON_SURF_MODE_LINEAR_ALIGNED:
2476          msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
2477          msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
2478          break;
2479       case RADEON_SURF_MODE_1D:
2480          msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
2481          msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
2482          break;
2483       case RADEON_SURF_MODE_2D:
2484          msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
2485          msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
2486          break;
2487       default:
2488          assert(0);
2489          break;
2490       }
2491 
2492       msg->body.decode.dt_luma_top_offset = texture_offset_legacy(&luma->surface, dt_array_idx);
2493       if (chroma)
2494          msg->body.decode.dt_chroma_top_offset = texture_offset_legacy(&chroma->surface, dt_array_idx);
2495       msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
2496       msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
2497 
2498       if (chroma) {
2499          assert(luma->surface.u.legacy.bankw == chroma->surface.u.legacy.bankw);
2500          assert(luma->surface.u.legacy.bankh == chroma->surface.u.legacy.bankh);
2501          assert(luma->surface.u.legacy.mtilea == chroma->surface.u.legacy.mtilea);
2502       }
2503 
2504       msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(util_logbase2(luma->surface.u.legacy.bankw));
2505       msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(util_logbase2(luma->surface.u.legacy.bankh));
2506       msg->body.decode.dt_surf_tile_config |=
2507          RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea));
2508    }
2509 
2510    if (pdev->info.family >= CHIP_STONEY)
2511       msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2;
2512 
2513    msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config;
2514    msg->body.decode.extension_support = 0x1;
2515 
2516    return true;
2517 }
2518 
2519 static void
ruvd_dec_message_create(struct radv_video_session * vid,void * ptr)2520 ruvd_dec_message_create(struct radv_video_session *vid, void *ptr)
2521 {
2522    struct ruvd_msg *msg = ptr;
2523 
2524    memset(ptr, 0, sizeof(*msg));
2525    msg->size = sizeof(*msg);
2526    msg->msg_type = RUVD_MSG_CREATE;
2527    msg->stream_handle = vid->stream_handle;
2528    msg->body.create.stream_type = vid->stream_type;
2529    msg->body.create.width_in_samples = vid->vk.max_coded.width;
2530    msg->body.create.height_in_samples = vid->vk.max_coded.height;
2531 }
2532 
2533 VKAPI_ATTR void VKAPI_CALL
radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoBeginCodingInfoKHR * pBeginInfo)2534 radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCodingInfoKHR *pBeginInfo)
2535 {
2536    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2537    VK_FROM_HANDLE(radv_video_session, vid, pBeginInfo->videoSession);
2538    VK_FROM_HANDLE(radv_video_session_params, params, pBeginInfo->videoSessionParameters);
2539 
2540    cmd_buffer->video.vid = vid;
2541    cmd_buffer->video.params = params;
2542 
2543    if (vid->encode)
2544       radv_video_enc_begin_coding(cmd_buffer);
2545 }
2546 
2547 static void
radv_vcn_cmd_reset(struct radv_cmd_buffer * cmd_buffer)2548 radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
2549 {
2550    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2551    const struct radv_physical_device *pdev = radv_device_physical(device);
2552    struct radv_video_session *vid = cmd_buffer->video.vid;
2553    uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
2554 
2555    void *ptr;
2556    uint32_t out_offset;
2557 
2558    if (vid->stream_type == RDECODE_CODEC_AV1) {
2559       uint8_t *ctxptr = radv_buffer_map(device->ws, vid->ctx.mem->bo);
2560       ctxptr += vid->ctx.offset;
2561       ac_vcn_av1_init_probs(pdev->av1_version, ctxptr);
2562       device->ws->buffer_unmap(device->ws, vid->ctx.mem->bo, false);
2563    }
2564    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2565 
2566    if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
2567       radv_vcn_sq_start(cmd_buffer);
2568 
2569    rvcn_dec_message_create(vid, ptr, size);
2570    send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2571    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
2572    /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
2573 
2574    if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
2575       radeon_check_space(device->ws, cmd_buffer->cs, 8);
2576       for (unsigned i = 0; i < 8; i++)
2577          radeon_emit(cmd_buffer->cs, 0x81ff);
2578    } else
2579       radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
2580 }
2581 
2582 static void
radv_uvd_cmd_reset(struct radv_cmd_buffer * cmd_buffer)2583 radv_uvd_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
2584 {
2585    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2586    struct radv_video_session *vid = cmd_buffer->video.vid;
2587    uint32_t size = sizeof(struct ruvd_msg);
2588    void *ptr;
2589    uint32_t out_offset;
2590    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2591 
2592    ruvd_dec_message_create(vid, ptr);
2593    if (vid->sessionctx.mem)
2594       send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2595    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
2596 
2597    /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
2598    int padsize = vid->sessionctx.mem ? 4 : 6;
2599    radeon_check_space(device->ws, cmd_buffer->cs, padsize);
2600    for (unsigned i = 0; i < padsize; i++)
2601       radeon_emit(cmd_buffer->cs, PKT2_NOP_PAD);
2602 }
2603 
2604 VKAPI_ATTR void VKAPI_CALL
radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoCodingControlInfoKHR * pCodingControlInfo)2605 radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo)
2606 {
2607    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2608    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2609    struct radv_physical_device *pdev = radv_device_physical(device);
2610 
2611    if (cmd_buffer->video.vid->encode) {
2612       radv_video_enc_control_video_coding(cmd_buffer, pCodingControlInfo);
2613       return;
2614    }
2615    if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
2616       if (radv_has_uvd(pdev))
2617          radv_uvd_cmd_reset(cmd_buffer);
2618       else
2619          radv_vcn_cmd_reset(cmd_buffer);
2620    }
2621 }
2622 
2623 VKAPI_ATTR void VKAPI_CALL
radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer,const VkVideoEndCodingInfoKHR * pEndCodingInfo)2624 radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo)
2625 {
2626    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2627 
2628    if (cmd_buffer->video.vid->encode) {
2629       radv_video_enc_end_coding(cmd_buffer);
2630       return;
2631    }
2632 }
2633 
2634 static void
radv_uvd_decode_video(struct radv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)2635 radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
2636 {
2637    VK_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
2638    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2639    const struct radv_physical_device *pdev = radv_device_physical(device);
2640    struct radv_video_session *vid = cmd_buffer->video.vid;
2641    struct radv_video_session_params *params = cmd_buffer->video.params;
2642    unsigned size = sizeof(struct ruvd_msg);
2643    void *ptr, *fb_ptr, *it_probs_ptr = NULL;
2644    uint32_t out_offset, fb_offset, it_probs_offset = 0;
2645    struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
2646    unsigned fb_size = (pdev->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
2647 
2648    radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
2649    fb_bo = cmd_buffer->upload.upload_bo;
2650    if (have_it(vid)) {
2651       radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr);
2652       it_probs_bo = cmd_buffer->upload.upload_bo;
2653    }
2654 
2655    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2656    msg_bo = cmd_buffer->upload.upload_bo;
2657 
2658    uint32_t slice_offset;
2659    ruvd_dec_message_decode(device, vid, params, ptr, it_probs_ptr, &slice_offset, frame_info);
2660    rvcn_dec_message_feedback(fb_ptr);
2661    if (vid->sessionctx.mem)
2662       send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2663    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
2664 
2665    if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
2666       struct radv_image_view *dpb_iv =
2667          radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2668       struct radv_image *dpb = dpb_iv->image;
2669       send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
2670    }
2671 
2672    if (vid->ctx.mem)
2673       send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
2674 
2675    send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
2676             src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
2677 
2678    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2679    struct radv_image *img = dst_iv->image;
2680    send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
2681    send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
2682    if (have_it(vid))
2683       send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
2684 
2685    radeon_check_space(device->ws, cmd_buffer->cs, 2);
2686    set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
2687 }
2688 
2689 static void
radv_vcn_decode_video(struct radv_cmd_buffer * cmd_buffer,const VkVideoDecodeInfoKHR * frame_info)2690 radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
2691 {
2692    VK_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
2693    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2694    const struct radv_physical_device *pdev = radv_device_physical(device);
2695    struct radv_video_session *vid = cmd_buffer->video.vid;
2696    struct radv_video_session_params *params = cmd_buffer->video.params;
2697    unsigned size = 0;
2698    void *ptr, *fb_ptr, *it_probs_ptr = NULL;
2699    uint32_t out_offset, fb_offset, it_probs_offset = 0;
2700    struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
2701 
2702    size += sizeof(rvcn_dec_message_header_t); /* header */
2703    size += sizeof(rvcn_dec_message_index_t);  /* codec */
2704    if (vid->dpb_type == DPB_DYNAMIC_TIER_2) {
2705       size += sizeof(rvcn_dec_message_index_t);
2706       size += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2707    }
2708    size += sizeof(rvcn_dec_message_decode_t); /* decode */
2709    switch (vid->vk.op) {
2710    case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
2711       size += sizeof(rvcn_dec_message_avc_t);
2712       break;
2713    case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
2714       size += sizeof(rvcn_dec_message_hevc_t);
2715       break;
2716    case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
2717       size += sizeof(rvcn_dec_message_av1_t);
2718       break;
2719    default:
2720       unreachable("unsupported codec.");
2721    }
2722 
2723    radv_vid_buffer_upload_alloc(cmd_buffer, FB_BUFFER_SIZE, &fb_offset, &fb_ptr);
2724    fb_bo = cmd_buffer->upload.upload_bo;
2725    if (have_it(vid)) {
2726       radv_vid_buffer_upload_alloc(cmd_buffer, IT_SCALING_TABLE_SIZE, &it_probs_offset, &it_probs_ptr);
2727       it_probs_bo = cmd_buffer->upload.upload_bo;
2728    } else if (have_probs(vid)) {
2729       radv_vid_buffer_upload_alloc(cmd_buffer, sizeof(rvcn_dec_av1_segment_fg_t), &it_probs_offset, &it_probs_ptr);
2730       it_probs_bo = cmd_buffer->upload.upload_bo;
2731    }
2732 
2733    radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
2734    msg_bo = cmd_buffer->upload.upload_bo;
2735 
2736    if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
2737       radv_vcn_sq_start(cmd_buffer);
2738 
2739    uint32_t slice_offset;
2740    rvcn_dec_message_decode(cmd_buffer, vid, params, ptr, it_probs_ptr, &slice_offset, frame_info);
2741    rvcn_dec_message_feedback(fb_ptr);
2742    send_cmd(cmd_buffer, RDECODE_CMD_SESSION_CONTEXT_BUFFER, vid->sessionctx.mem->bo, vid->sessionctx.offset);
2743    send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, msg_bo, out_offset);
2744 
2745    if (vid->dpb_type != DPB_DYNAMIC_TIER_2) {
2746       struct radv_image_view *dpb_iv =
2747          radv_image_view_from_handle(frame_info->pSetupReferenceSlot->pPictureResource->imageViewBinding);
2748       struct radv_image *dpb = dpb_iv->image;
2749       send_cmd(cmd_buffer, RDECODE_CMD_DPB_BUFFER, dpb->bindings[0].bo, dpb->bindings[0].offset);
2750    }
2751 
2752    if (vid->ctx.mem)
2753       send_cmd(cmd_buffer, RDECODE_CMD_CONTEXT_BUFFER, vid->ctx.mem->bo, vid->ctx.offset);
2754 
2755    send_cmd(cmd_buffer, RDECODE_CMD_BITSTREAM_BUFFER, src_buffer->bo,
2756             src_buffer->offset + frame_info->srcBufferOffset + slice_offset);
2757 
2758    struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
2759    struct radv_image *img = dst_iv->image;
2760    send_cmd(cmd_buffer, RDECODE_CMD_DECODING_TARGET_BUFFER, img->bindings[0].bo, img->bindings[0].offset);
2761    send_cmd(cmd_buffer, RDECODE_CMD_FEEDBACK_BUFFER, fb_bo, fb_offset);
2762    if (have_it(vid))
2763       send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
2764    else if (have_probs(vid))
2765       send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset);
2766 
2767    if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
2768       radeon_check_space(device->ws, cmd_buffer->cs, 2);
2769       set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
2770    } else
2771       radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
2772 }
2773 
2774 VKAPI_ATTR void VKAPI_CALL
radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer,const VkVideoDecodeInfoKHR * frame_info)2775 radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info)
2776 {
2777    VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2778    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2779    struct radv_physical_device *pdev = radv_device_physical(device);
2780 
2781    if (radv_has_uvd(pdev))
2782       radv_uvd_decode_video(cmd_buffer, frame_info);
2783    else
2784       radv_vcn_decode_video(cmd_buffer, frame_info);
2785 }
2786 
2787 void
radv_video_get_profile_alignments(struct radv_physical_device * pdev,const VkVideoProfileListInfoKHR * profile_list,uint32_t * width_align_out,uint32_t * height_align_out)2788 radv_video_get_profile_alignments(struct radv_physical_device *pdev, const VkVideoProfileListInfoKHR *profile_list,
2789                                   uint32_t *width_align_out, uint32_t *height_align_out)
2790 {
2791    vk_video_get_profile_alignments(profile_list, width_align_out, height_align_out);
2792    bool is_h265_main_10 = false;
2793 
2794    if (profile_list) {
2795       for (unsigned i = 0; i < profile_list->profileCount; i++) {
2796          if (profile_list->pProfiles[i].videoCodecOperation == VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR) {
2797             const struct VkVideoDecodeH265ProfileInfoKHR *h265_profile =
2798                vk_find_struct_const(profile_list->pProfiles[i].pNext, VIDEO_DECODE_H265_PROFILE_INFO_KHR);
2799             if (h265_profile->stdProfileIdc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)
2800                is_h265_main_10 = true;
2801          }
2802       }
2803    } else
2804       is_h265_main_10 = true;
2805 
2806    uint32_t db_alignment = radv_video_get_db_alignment(pdev, 64, is_h265_main_10);
2807    *width_align_out = MAX2(*width_align_out, db_alignment);
2808    *height_align_out = MAX2(*height_align_out, db_alignment);
2809 }
2810