1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 *
5 * SPDX-License-Identifier: MIT
6 *
7 **************************************************************************/
8
9 #include "radeon_vcn_dec.h"
10
11 #include "pipe/p_video_codec.h"
12 #include "radeonsi/si_pipe.h"
13 #include "util/u_memory.h"
14 #include "util/u_video.h"
15 #include "vl/vl_mpeg12_decoder.h"
16 #include "vl/vl_probs_table.h"
17 #include "pspdecryptionparam.h"
18
19 #include <assert.h>
20 #include <stdio.h>
21
22 #include "ac_vcn_av1_default.h"
23 #include "ac_debug.h"
24
25 #define FB_BUFFER_OFFSET 0x2000
26 #define FB_BUFFER_SIZE 2048
27 #define IT_SCALING_TABLE_SIZE 992
28 #define VP9_PROBS_TABLE_SIZE (RDECODE_VP9_PROBS_DATA_SIZE + 256)
29
30 #define NUM_MPEG2_REFS 6
31 #define NUM_H264_REFS 17
32 #define NUM_VC1_REFS 5
33 #define NUM_VP9_REFS 8
34 #define NUM_AV1_REFS 8
35 #define NUM_AV1_REFS_PER_FRAME 7
36
37 static enum pipe_format get_buffer_format(struct radeon_decoder *dec);
38 static unsigned calc_dpb_size(struct radeon_decoder *dec);
39 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder *dec);
40 static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec);
41 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec,
42 struct pipe_h265_picture_desc *pic);
43
radeon_dec_destroy_associated_data(void * data)44 static void radeon_dec_destroy_associated_data(void *data)
45 {
46 /* NOOP, since we only use an intptr */
47 }
48
get_current_pic_index(struct radeon_decoder * dec,struct pipe_video_buffer * target,unsigned char * curr_pic_idx)49 static void get_current_pic_index(struct radeon_decoder *dec,
50 struct pipe_video_buffer *target,
51 unsigned char *curr_pic_idx)
52 {
53 for (int i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
54 if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) {
55 if (target->codec != NULL)
56 *curr_pic_idx = (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base);
57 else {
58 *curr_pic_idx = i;
59 vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
60 &radeon_dec_destroy_associated_data);
61 }
62 break;
63 } else if (!dec->render_pic_list[i]) {
64 dec->render_pic_list[i] = target;
65 *curr_pic_idx = i;
66 vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
67 &radeon_dec_destroy_associated_data);
68 break;
69 }
70 }
71 }
72
get_h264_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_h264_picture_desc * pic)73 static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec,
74 struct pipe_video_buffer *target,
75 struct pipe_h264_picture_desc *pic)
76 {
77 rvcn_dec_message_avc_t result;
78 unsigned i, j, k;
79
80 memset(&result, 0, sizeof(result));
81 switch (pic->base.profile) {
82 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
83 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
84 result.profile = RDECODE_H264_PROFILE_BASELINE;
85 break;
86
87 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
88 result.profile = RDECODE_H264_PROFILE_MAIN;
89 break;
90
91 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
92 result.profile = RDECODE_H264_PROFILE_HIGH;
93 break;
94
95 default:
96 assert(0);
97 break;
98 }
99
100 result.level = dec->base.level;
101
102 result.sps_info_flags = 0;
103 result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
104 result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
105 result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
106 result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
107 result.sps_info_flags |= ((dec->dpb_type == DPB_DYNAMIC_TIER_2) ? 0 : 1)
108 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
109
110 result.chroma_format = pic->pps->sps->chroma_format_idc;
111 result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
112 result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
113 result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
114 result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
115 result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
116
117 result.pps_info_flags = 0;
118 result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
119 result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
120 result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
121 result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
122 result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
123 result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
124 result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
125 result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
126
127 result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
128 result.slice_group_map_type = pic->pps->slice_group_map_type;
129 result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
130 result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
131 result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
132 result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
133
134 memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6 * 16);
135 memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2 * 64);
136
137 memcpy(dec->it, result.scaling_list_4x4, 6 * 16);
138 memcpy((dec->it + 96), result.scaling_list_8x8, 2 * 64);
139
140 result.num_ref_frames = pic->num_ref_frames;
141
142 result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
143 result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
144
145 result.frame_num = pic->frame_num;
146 memcpy(result.frame_num_list, pic->frame_num_list, 4 * 16);
147 result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
148 result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
149 memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4 * 16 * 2);
150 result.non_existing_frame_flags = 0;
151 result.used_for_reference_flags = 0;
152
153 if (dec->dpb_type != DPB_DYNAMIC_TIER_2) {
154 result.decoded_pic_idx = pic->frame_num;
155 goto end;
156 }
157
158 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
159 for (j = 0; (j < ARRAY_SIZE(pic->ref) && (pic->ref[j] != NULL)); j++) {
160 if (dec->render_pic_list[i] == pic->ref[j])
161 break;
162 if (j == ARRAY_SIZE(pic->ref) - 1)
163 dec->render_pic_list[i] = NULL;
164 else if (pic->ref[j + 1] == NULL)
165 dec->render_pic_list[i] = NULL;
166 }
167 }
168 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
169 if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) {
170 if (target->codec != NULL){
171 result.decoded_pic_idx =
172 (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base);
173 } else {
174 result.decoded_pic_idx = i;
175 vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
176 &radeon_dec_destroy_associated_data);
177 }
178 break;
179 }
180 }
181 if (i == ARRAY_SIZE(dec->render_pic_list)) {
182 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
183 if (!dec->render_pic_list[i]) {
184 dec->render_pic_list[i] = target;
185 result.decoded_pic_idx = i;
186 vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
187 &radeon_dec_destroy_associated_data);
188 break;
189 }
190 }
191 }
192 for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) {
193 result.ref_frame_list[i] = pic->ref[i] ?
194 (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) : 0xff;
195
196 if (result.ref_frame_list[i] != 0xff) {
197 if (pic->top_is_reference[i])
198 result.used_for_reference_flags |= (1 << (2 * i));
199 if (pic->bottom_is_reference[i])
200 result.used_for_reference_flags |= (1 << (2 * i + 1));
201
202 if (pic->is_long_term[i])
203 result.ref_frame_list[i] |= 0x80;
204
205 result.curr_pic_ref_frame_num++;
206
207 for (j = 0; j < ARRAY_SIZE(dec->h264_valid_ref_num); j++) {
208 if ((dec->h264_valid_ref_num[j] != (unsigned)-1)
209 && (dec->h264_valid_ref_num[j] == result.frame_num_list[i]))
210 break;
211 }
212
213 for (k = 0; k < ARRAY_SIZE(dec->h264_valid_poc_num); k++) {
214 if ((dec->h264_valid_poc_num[k] != (unsigned)-1)
215 && ((dec->h264_valid_poc_num[k] == result.field_order_cnt_list[i][0])
216 || dec->h264_valid_poc_num[k] == result.field_order_cnt_list[i][1]))
217 break;
218 }
219 if ((j == ARRAY_SIZE(dec->h264_valid_ref_num)) && (k == ARRAY_SIZE(dec->h264_valid_poc_num))) {
220 result.non_existing_frame_flags |= 1 << i;
221 result.curr_pic_ref_frame_num--;
222 result.ref_frame_list[i] = 0xff;
223 }
224 }
225 }
226
227 /* need at least one reference for P/B frames */
228 if (result.curr_pic_ref_frame_num == 0 && pic->slice_parameter.slice_info_present) {
229 for (i = 0; i < pic->slice_count; i++) {
230 if (pic->slice_parameter.slice_type[i] % 5 != 2) {
231 result.curr_pic_ref_frame_num++;
232 result.ref_frame_list[0] = 0;
233 result.non_existing_frame_flags &= ~1;
234 break;
235 }
236 }
237 }
238
239 for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) {
240 if (result.ref_frame_list[i] != 0xff) {
241 dec->h264_valid_ref_num[i] = result.frame_num_list[i];
242 dec->h264_valid_poc_num[2 * i] = pic->top_is_reference[i] ?
243 result.field_order_cnt_list[i][0] : (unsigned) -1;
244 dec->h264_valid_poc_num[2 * i + 1] = pic->bottom_is_reference[i] ?
245 result.field_order_cnt_list[i][1] : (unsigned) -1;
246 } else {
247 dec->h264_valid_ref_num[i] =
248 dec->h264_valid_poc_num[2 * i] =
249 dec->h264_valid_poc_num[2 * i + 1] = (unsigned) -1;
250 }
251 }
252
253 dec->h264_valid_ref_num[ARRAY_SIZE(dec->h264_valid_ref_num) - 1] = result.frame_num;
254 dec->h264_valid_poc_num[ARRAY_SIZE(dec->h264_valid_poc_num) - 2] =
255 pic->field_pic_flag && pic->bottom_field_flag ?
256 (unsigned) -1 : result.curr_field_order_cnt_list[0];
257 dec->h264_valid_poc_num[ARRAY_SIZE(dec->h264_valid_poc_num) - 1] =
258 pic->field_pic_flag && !pic->bottom_field_flag ?
259 (unsigned) -1 : result.curr_field_order_cnt_list[1];
260
261 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
262 dec->ref_codec.bts = CODEC_8_BITS;
263 dec->ref_codec.index = result.decoded_pic_idx;
264 dec->ref_codec.ref_size = 16;
265 dec->ref_codec.num_refs = result.curr_pic_ref_frame_num;
266 STATIC_ASSERT(sizeof(dec->ref_codec.ref_list) == sizeof(result.ref_frame_list));
267 memcpy(dec->ref_codec.ref_list, result.ref_frame_list, sizeof(result.ref_frame_list));
268 }
269
270 end:
271 return result;
272 }
273
get_h265_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_h265_picture_desc * pic)274 static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec,
275 struct pipe_video_buffer *target,
276 struct pipe_h265_picture_desc *pic)
277 {
278 rvcn_dec_message_hevc_t result;
279 unsigned i, j, num_refs = 0;
280
281 memset(&result, 0, sizeof(result));
282 result.sps_info_flags = 0;
283 result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
284 result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
285 result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
286 result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
287 result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
288 result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
289 result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
290 result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
291 result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
292 if (((struct si_screen *)dec->screen)->info.family == CHIP_CARRIZO)
293 result.sps_info_flags |= 1 << 9;
294 if (pic->UseRefPicList == true) {
295 result.sps_info_flags |= 1 << 10;
296 result.sps_info_flags |= 1 << 12;
297 }
298 if (pic->UseStRpsBits == true && pic->pps->st_rps_bits != 0) {
299 result.sps_info_flags |= 1 << 11;
300 result.st_rps_bits = pic->pps->st_rps_bits;
301 }
302
303 result.chroma_format = pic->pps->sps->chroma_format_idc;
304 result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
305 result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
306 result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
307 result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
308 result.log2_min_luma_coding_block_size_minus3 =
309 pic->pps->sps->log2_min_luma_coding_block_size_minus3;
310 result.log2_diff_max_min_luma_coding_block_size =
311 pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
312 result.log2_min_transform_block_size_minus2 =
313 pic->pps->sps->log2_min_transform_block_size_minus2;
314 result.log2_diff_max_min_transform_block_size =
315 pic->pps->sps->log2_diff_max_min_transform_block_size;
316 result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
317 result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
318 result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
319 result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
320 result.log2_min_pcm_luma_coding_block_size_minus3 =
321 pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
322 result.log2_diff_max_min_pcm_luma_coding_block_size =
323 pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
324 result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
325
326 result.pps_info_flags = 0;
327 result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
328 result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
329 result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
330 result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
331 result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
332 result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
333 result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
334 result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
335 result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
336 result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
337 result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
338 result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
339 result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
340 result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
341 result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
342 result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
343 result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
344 result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
345 result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
346 result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
347
348 result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
349 result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
350 result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
351 result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
352 result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
353 result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
354 result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
355 result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
356 result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
357 result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
358 result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
359 result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
360 result.init_qp_minus26 = pic->pps->init_qp_minus26;
361
362 for (i = 0; i < 19; ++i)
363 result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
364
365 for (i = 0; i < 21; ++i)
366 result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
367
368 result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
369 result.curr_poc = pic->CurrPicOrderCntVal;
370
371 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
372 for (j = 0;
373 (j < ARRAY_SIZE(pic->ref) && (pic->ref[j] != NULL));
374 j++) {
375 if (dec->render_pic_list[i] == pic->ref[j])
376 break;
377 if (j == ARRAY_SIZE(pic->ref) - 1)
378 dec->render_pic_list[i] = NULL;
379 else if (pic->ref[j + 1] == NULL)
380 dec->render_pic_list[i] = NULL;
381 }
382 }
383 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
384 if (dec->render_pic_list[i] == NULL) {
385 dec->render_pic_list[i] = target;
386 result.curr_idx = i;
387 break;
388 }
389 }
390
391 vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)result.curr_idx,
392 &radeon_dec_destroy_associated_data);
393
394 for (i = 0; i < 16; ++i) {
395 struct pipe_video_buffer *ref = pic->ref[i];
396 uintptr_t ref_pic = 0;
397
398 result.poc_list[i] = pic->PicOrderCntVal[i];
399
400 if (ref) {
401 ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
402 num_refs++;
403 } else
404 ref_pic = 0x7F;
405 result.ref_pic_list[i] = ref_pic;
406 }
407
408 for (i = 0; i < 8; ++i) {
409 result.ref_pic_set_st_curr_before[i] = 0xFF;
410 result.ref_pic_set_st_curr_after[i] = 0xFF;
411 result.ref_pic_set_lt_curr[i] = 0xFF;
412 }
413
414 for (i = 0; i < pic->NumPocStCurrBefore; ++i)
415 result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
416
417 for (i = 0; i < pic->NumPocStCurrAfter; ++i)
418 result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
419
420 for (i = 0; i < pic->NumPocLtCurr; ++i)
421 result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
422
423 for (i = 0; i < 6; ++i)
424 result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
425
426 for (i = 0; i < 2; ++i)
427 result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
428
429 memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
430 memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
431 memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
432 memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
433
434 for (i = 0; i < 2; i++) {
435 for (j = 0; j < 15; j++)
436 result.direct_reflist[i][j] = pic->RefPicList[0][i][j];
437 }
438
439 if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
440 if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) {
441 result.p010_mode = 1;
442 result.msb_mode = 1;
443 } else {
444 result.p010_mode = 0;
445 result.luma_10to8 = 5;
446 result.chroma_10to8 = 5;
447 result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
448 result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
449 }
450 }
451
452 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
453 dec->ref_codec.bts = (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) ?
454 CODEC_10_BITS : CODEC_8_BITS;
455 dec->ref_codec.index = result.curr_idx;
456 dec->ref_codec.ref_size = 15;
457 dec->ref_codec.num_refs = num_refs;
458 STATIC_ASSERT(sizeof(dec->ref_codec.ref_list) == sizeof(result.ref_pic_list));
459 memcpy(dec->ref_codec.ref_list, result.ref_pic_list, sizeof(result.ref_pic_list));
460 }
461 return result;
462 }
463
fill_probs_table(void * ptr)464 static void fill_probs_table(void *ptr)
465 {
466 rvcn_dec_vp9_probs_t *probs = (rvcn_dec_vp9_probs_t *)ptr;
467
468 memcpy(&probs->coef_probs[0], default_coef_probs_4x4, sizeof(default_coef_probs_4x4));
469 memcpy(&probs->coef_probs[1], default_coef_probs_8x8, sizeof(default_coef_probs_8x8));
470 memcpy(&probs->coef_probs[2], default_coef_probs_16x16, sizeof(default_coef_probs_16x16));
471 memcpy(&probs->coef_probs[3], default_coef_probs_32x32, sizeof(default_coef_probs_32x32));
472 memcpy(probs->y_mode_prob, default_if_y_probs, sizeof(default_if_y_probs));
473 memcpy(probs->uv_mode_prob, default_if_uv_probs, sizeof(default_if_uv_probs));
474 memcpy(probs->single_ref_prob, default_single_ref_p, sizeof(default_single_ref_p));
475 memcpy(probs->switchable_interp_prob, default_switchable_interp_prob,
476 sizeof(default_switchable_interp_prob));
477 memcpy(probs->partition_prob, default_partition_probs, sizeof(default_partition_probs));
478 memcpy(probs->inter_mode_probs, default_inter_mode_probs, sizeof(default_inter_mode_probs));
479 memcpy(probs->mbskip_probs, default_skip_probs, sizeof(default_skip_probs));
480 memcpy(probs->intra_inter_prob, default_intra_inter_p, sizeof(default_intra_inter_p));
481 memcpy(probs->comp_inter_prob, default_comp_inter_p, sizeof(default_comp_inter_p));
482 memcpy(probs->comp_ref_prob, default_comp_ref_p, sizeof(default_comp_ref_p));
483 memcpy(probs->tx_probs_32x32, default_tx_probs_32x32, sizeof(default_tx_probs_32x32));
484 memcpy(probs->tx_probs_16x16, default_tx_probs_16x16, sizeof(default_tx_probs_16x16));
485 memcpy(probs->tx_probs_8x8, default_tx_probs_8x8, sizeof(default_tx_probs_8x8));
486 memcpy(probs->mv_joints, default_nmv_joints, sizeof(default_nmv_joints));
487 memcpy(&probs->mv_comps[0], default_nmv_components, sizeof(default_nmv_components));
488 memset(&probs->nmvc_mask, 0, sizeof(rvcn_dec_vp9_nmv_ctx_mask_t));
489 }
490
get_vp9_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_vp9_picture_desc * pic)491 static rvcn_dec_message_vp9_t get_vp9_msg(struct radeon_decoder *dec,
492 struct pipe_video_buffer *target,
493 struct pipe_vp9_picture_desc *pic)
494 {
495 rvcn_dec_message_vp9_t result;
496 unsigned i, j, num_refs = 0;
497
498 memset(&result, 0, sizeof(result));
499
500 /* segment table */
501 rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(dec->probs);
502
503 if (pic->picture_parameter.pic_fields.segmentation_enabled) {
504 for (i = 0; i < 8; ++i) {
505 prbs->seg.feature_data[i] =
506 (pic->slice_parameter.seg_param[i].alt_quant & 0xffff) |
507 ((pic->slice_parameter.seg_param[i].alt_lf & 0xff) << 16) |
508 ((pic->slice_parameter.seg_param[i].segment_flags.segment_reference & 0xf) << 24);
509 prbs->seg.feature_mask[i] =
510 (pic->slice_parameter.seg_param[i].alt_quant_enabled << 0) |
511 (pic->slice_parameter.seg_param[i].alt_lf_enabled << 1) |
512 (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_enabled << 2) |
513 (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_skipped << 3);
514 }
515
516 for (i = 0; i < 7; ++i)
517 prbs->seg.tree_probs[i] = pic->picture_parameter.mb_segment_tree_probs[i];
518
519 for (i = 0; i < 3; ++i)
520 prbs->seg.pred_probs[i] = pic->picture_parameter.segment_pred_probs[i];
521
522 prbs->seg.abs_delta = pic->picture_parameter.abs_delta;
523 } else
524 memset(prbs->segment_data, 0, sizeof(prbs->segment_data));
525
526 result.frame_header_flags = (pic->picture_parameter.pic_fields.frame_type
527 << RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) &
528 RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK;
529
530 result.frame_header_flags |= (pic->picture_parameter.pic_fields.error_resilient_mode
531 << RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) &
532 RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK;
533
534 result.frame_header_flags |= (pic->picture_parameter.pic_fields.intra_only
535 << RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) &
536 RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK;
537
538 result.frame_header_flags |= (pic->picture_parameter.pic_fields.allow_high_precision_mv
539 << RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) &
540 RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK;
541
542 result.frame_header_flags |= (pic->picture_parameter.pic_fields.frame_parallel_decoding_mode
543 << RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT) &
544 RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK;
545
546 result.frame_header_flags |= (pic->picture_parameter.pic_fields.refresh_frame_context
547 << RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) &
548 RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK;
549
550 result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_enabled
551 << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) &
552 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK;
553
554 result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_update_map
555 << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) &
556 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK;
557
558 result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_temporal_update
559 << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
560 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK;
561
562 result.frame_header_flags |= (pic->picture_parameter.mode_ref_delta_enabled
563 << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) &
564 RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK;
565
566 result.frame_header_flags |= (pic->picture_parameter.mode_ref_delta_update
567 << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) &
568 RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK;
569
570 result.frame_header_flags |=
571 ((dec->show_frame && !pic->picture_parameter.pic_fields.error_resilient_mode &&
572 dec->last_width == dec->base.width && dec->last_height == dec->base.height)
573 << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) &
574 RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK;
575 dec->show_frame = pic->picture_parameter.pic_fields.show_frame;
576
577 result.frame_header_flags |= (1 << RDECODE_FRAME_HDR_INFO_VP9_USE_UNCOMPRESSED_HEADER_SHIFT) &
578 RDECODE_FRAME_HDR_INFO_VP9_USE_UNCOMPRESSED_HEADER_MASK;
579
580 result.interp_filter = pic->picture_parameter.pic_fields.mcomp_filter_type;
581
582 result.frame_context_idx = pic->picture_parameter.pic_fields.frame_context_idx;
583 result.reset_frame_context = pic->picture_parameter.pic_fields.reset_frame_context;
584
585 result.filter_level = pic->picture_parameter.filter_level;
586 result.sharpness_level = pic->picture_parameter.sharpness_level;
587
588 for (i = 0; i < 8; ++i)
589 memcpy(result.lf_adj_level[i], pic->slice_parameter.seg_param[i].filter_level, 4 * 2);
590
591 if (pic->picture_parameter.pic_fields.lossless_flag) {
592 result.base_qindex = 0;
593 result.y_dc_delta_q = 0;
594 result.uv_ac_delta_q = 0;
595 result.uv_dc_delta_q = 0;
596 } else {
597 result.base_qindex = pic->picture_parameter.base_qindex;
598 result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q;
599 result.uv_ac_delta_q = pic->picture_parameter.uv_ac_delta_q;
600 result.uv_dc_delta_q = pic->picture_parameter.uv_dc_delta_q;
601 }
602
603 result.log2_tile_cols = pic->picture_parameter.log2_tile_columns;
604 result.log2_tile_rows = pic->picture_parameter.log2_tile_rows;
605 result.chroma_format = 1;
606 result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 =
607 (pic->picture_parameter.bit_depth - 8);
608
609 result.vp9_frame_size = align(dec->bs_size, 128);
610 result.uncompressed_header_size = pic->picture_parameter.frame_header_length_in_bytes;
611 result.compressed_header_size = pic->picture_parameter.first_partition_size;
612
613 assert(dec->base.max_references + 1 <= ARRAY_SIZE(dec->render_pic_list));
614
615 //clear the dec->render list if it is not used as a reference
616 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
617 if (dec->render_pic_list[i]) {
618 for (j=0;j<8;j++) {
619 if (dec->render_pic_list[i] == pic->ref[j])
620 break;
621 }
622 if (j == 8)
623 dec->render_pic_list[i] = NULL;
624 }
625 }
626
627 get_current_pic_index(dec, target, &result.curr_pic_idx);
628
629 for (i = 0; i < 8; i++) {
630 uintptr_t ref_frame;
631 if (pic->ref[i]) {
632 ref_frame = (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base);
633 num_refs++;
634 } else
635 ref_frame = 0x7f;
636 result.ref_frame_map[i] = ref_frame;
637 }
638
639 result.frame_refs[0] = result.ref_frame_map[pic->picture_parameter.pic_fields.last_ref_frame];
640 result.ref_frame_sign_bias[0] = pic->picture_parameter.pic_fields.last_ref_frame_sign_bias;
641 result.frame_refs[1] = result.ref_frame_map[pic->picture_parameter.pic_fields.golden_ref_frame];
642 result.ref_frame_sign_bias[1] = pic->picture_parameter.pic_fields.golden_ref_frame_sign_bias;
643 result.frame_refs[2] = result.ref_frame_map[pic->picture_parameter.pic_fields.alt_ref_frame];
644 result.ref_frame_sign_bias[2] = pic->picture_parameter.pic_fields.alt_ref_frame_sign_bias;
645
646 if (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) {
647 if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) {
648 result.p010_mode = 1;
649 result.msb_mode = 1;
650 } else {
651 result.p010_mode = 0;
652 result.luma_10to8 = 1;
653 result.chroma_10to8 = 1;
654 }
655 }
656
657 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
658 dec->ref_codec.bts = (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) ?
659 CODEC_10_BITS : CODEC_8_BITS;
660 dec->ref_codec.index = result.curr_pic_idx;
661 dec->ref_codec.ref_size = 8;
662 dec->ref_codec.num_refs = num_refs;
663 memset(dec->ref_codec.ref_list, 0x7f, sizeof(dec->ref_codec.ref_list));
664 memcpy(dec->ref_codec.ref_list, result.ref_frame_map, sizeof(result.ref_frame_map));
665 }
666
667 dec->last_width = dec->base.width;
668 dec->last_height = dec->base.height;
669
670 return result;
671 }
672
get_h265_reflist(rvcn_dec_message_hevc_direct_ref_list_t * hevc_reflist,struct pipe_h265_picture_desc * pic)673 static void get_h265_reflist(rvcn_dec_message_hevc_direct_ref_list_t *hevc_reflist,
674 struct pipe_h265_picture_desc *pic)
675 {
676 hevc_reflist->num_direct_reflist = pic->slice_parameter.slice_count;
677 for (int i = 0; i <hevc_reflist->num_direct_reflist; i++) {
678 for (int j = 0; j < 2; j++) {
679 for (int k = 0; k < 15; k++)
680 hevc_reflist->multi_direct_reflist[i][j][k] = pic->RefPicList[i][j][k];
681 }
682 }
683 }
684
set_drm_keys(rvcn_dec_message_drm_t * drm,DECRYPT_PARAMETERS * decrypted)685 static void set_drm_keys(rvcn_dec_message_drm_t *drm, DECRYPT_PARAMETERS *decrypted)
686 {
687 int cbc = decrypted->u.s.cbc;
688 int ctr = decrypted->u.s.ctr;
689 int id = decrypted->u.s.drm_id;
690 int ekc = 1;
691 int data1 = 1;
692 int data2 = 1;
693
694 drm->drm_cmd = 0;
695 drm->drm_cntl = 0;
696
697 drm->drm_cntl = 1 << DRM_CNTL_BYPASS_SHIFT;
698
699 if (cbc || ctr) {
700 drm->drm_cntl = 0 << DRM_CNTL_BYPASS_SHIFT;
701 drm->drm_cmd |= 0xff << DRM_CMD_BYTE_MASK_SHIFT;
702
703 if (ctr)
704 drm->drm_cmd |= 0x00 << DRM_CMD_ALGORITHM_SHIFT;
705 else if (cbc)
706 drm->drm_cmd |= 0x02 << DRM_CMD_ALGORITHM_SHIFT;
707
708 drm->drm_cmd |= 1 << DRM_CMD_GEN_MASK_SHIFT;
709 drm->drm_cmd |= ekc << DRM_CMD_UNWRAP_KEY_SHIFT;
710 drm->drm_cmd |= 0 << DRM_CMD_OFFSET_SHIFT;
711 drm->drm_cmd |= data2 << DRM_CMD_CNT_DATA_SHIFT;
712 drm->drm_cmd |= data1 << DRM_CMD_CNT_KEY_SHIFT;
713 drm->drm_cmd |= ekc << DRM_CMD_KEY_SHIFT;
714 drm->drm_cmd |= id << DRM_CMD_SESSION_SEL_SHIFT;
715
716 if (ekc)
717 memcpy(drm->drm_wrapped_key, decrypted->encrypted_key, 16);
718 if (data1)
719 memcpy(drm->drm_key, decrypted->session_iv, 16);
720 if (data2)
721 memcpy(drm->drm_counter, decrypted->encrypted_iv, 16);
722 drm->drm_offset = 0;
723 }
724 }
725
rvcn_dec_av1_film_grain_surface(struct pipe_video_buffer ** target,struct pipe_av1_picture_desc * pic)726 static void rvcn_dec_av1_film_grain_surface(struct pipe_video_buffer **target,
727 struct pipe_av1_picture_desc *pic)
728 {
729 if (!pic->picture_parameter.film_grain_info.film_grain_info_fields.apply_grain ||
730 !pic->film_grain_target)
731 return;
732
733 *target = pic->film_grain_target;
734 }
735
get_av1_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_av1_picture_desc * pic)736 static rvcn_dec_message_av1_t get_av1_msg(struct radeon_decoder *dec,
737 struct pipe_video_buffer *target,
738 struct pipe_av1_picture_desc *pic)
739 {
740 rvcn_dec_message_av1_t result;
741 unsigned i, j, num_refs = 0;
742 uint16_t tile_count = pic->picture_parameter.tile_cols * pic->picture_parameter.tile_rows;
743
744 memset(&result, 0, sizeof(result));
745
746 result.frame_header_flags = (pic->picture_parameter.pic_info_fields.show_frame
747 << RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_SHIFT) &
748 RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_MASK;
749
750 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.disable_cdf_update
751 << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_SHIFT) &
752 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_MASK;
753
754 result.frame_header_flags |= ((!pic->picture_parameter.pic_info_fields.disable_frame_end_update_cdf)
755 << RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_SHIFT) &
756 RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_MASK;
757
758 result.frame_header_flags |= ((pic->picture_parameter.pic_info_fields.frame_type ==
759 2 /* INTRA_ONLY_FRAME */) << RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_SHIFT) &
760 RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_MASK;
761
762 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_intrabc
763 << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_SHIFT) &
764 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_MASK;
765
766 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_high_precision_mv
767 << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) &
768 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK;
769
770 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.mono_chrome
771 << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) &
772 RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK;
773
774 result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.skip_mode_present
775 << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) &
776 RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK;
777
778 result.frame_header_flags |= (((pic->picture_parameter.qmatrix_fields.qm_y == 0xf) ? 0 : 1)
779 << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) &
780 RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK;
781
782 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_filter_intra
783 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) &
784 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_MASK;
785
786 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_intra_edge_filter
787 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_SHIFT) &
788 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_MASK;
789
790 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_interintra_compound
791 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_SHIFT) &
792 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_MASK;
793
794 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_masked_compound
795 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_SHIFT) &
796 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_MASK;
797
798 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_warped_motion
799 << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_SHIFT) &
800 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_MASK;
801
802 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_dual_filter
803 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_SHIFT) &
804 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_MASK;
805
806 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_order_hint
807 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_SHIFT) &
808 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_MASK;
809
810 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_jnt_comp
811 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_SHIFT) &
812 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_MASK;
813
814 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.use_ref_frame_mvs
815 << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_SHIFT) &
816 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_MASK;
817
818 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_screen_content_tools
819 << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_SHIFT) &
820 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_MASK;
821
822 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.force_integer_mv
823 << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) &
824 RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK;
825
826 result.frame_header_flags |= (pic->picture_parameter.loop_filter_info_fields.mode_ref_delta_enabled
827 << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) &
828 RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK;
829
830 result.frame_header_flags |= (pic->picture_parameter.loop_filter_info_fields.mode_ref_delta_update
831 << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) &
832 RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK;
833
834 result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.delta_q_present_flag
835 << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) &
836 RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK;
837
838 result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.delta_lf_present_flag
839 << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_SHIFT) &
840 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_MASK;
841
842 result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.reduced_tx_set_used
843 << RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_SHIFT) &
844 RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_MASK;
845
846 result.frame_header_flags |= (pic->picture_parameter.seg_info.segment_info_fields.enabled
847 << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_SHIFT) &
848 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_MASK;
849
850 result.frame_header_flags |= (pic->picture_parameter.seg_info.segment_info_fields.update_map
851 << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_SHIFT) &
852 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_MASK;
853
854 result.frame_header_flags |= (pic->picture_parameter.seg_info.segment_info_fields.temporal_update
855 << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
856 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_MASK;
857
858 result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.delta_lf_multi
859 << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_SHIFT) &
860 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_MASK;
861
862 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.is_motion_mode_switchable
863 << RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_SHIFT) &
864 RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_MASK;
865
866 result.frame_header_flags |= ((!pic->picture_parameter.refresh_frame_flags)
867 << RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_SHIFT) &
868 RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_MASK;
869
870 result.frame_header_flags |= ((!pic->picture_parameter.seq_info_fields.ref_frame_mvs)
871 << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_SHIFT) &
872 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_MASK;
873
874 result.current_frame_id = pic->picture_parameter.current_frame_id;
875 result.frame_offset = pic->picture_parameter.order_hint;
876
877 result.profile = pic->picture_parameter.profile;
878 result.is_annexb = 0;
879 result.frame_type = pic->picture_parameter.pic_info_fields.frame_type;
880 result.primary_ref_frame = pic->picture_parameter.primary_ref_frame;
881
882 get_current_pic_index(dec, target, &result.curr_pic_idx);
883
884 result.sb_size = pic->picture_parameter.seq_info_fields.use_128x128_superblock;
885 result.interp_filter = pic->picture_parameter.interp_filter;
886 for (i = 0; i < 2; ++i)
887 result.filter_level[i] = pic->picture_parameter.filter_level[i];
888 result.filter_level_u = pic->picture_parameter.filter_level_u;
889 result.filter_level_v = pic->picture_parameter.filter_level_v;
890 result.sharpness_level = pic->picture_parameter.loop_filter_info_fields.sharpness_level;
891 for (i = 0; i < 8; ++i)
892 result.ref_deltas[i] = pic->picture_parameter.ref_deltas[i];
893 for (i = 0; i < 2; ++i)
894 result.mode_deltas[i] = pic->picture_parameter.mode_deltas[i];
895 result.base_qindex = pic->picture_parameter.base_qindex;
896 result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q;
897 result.u_dc_delta_q = pic->picture_parameter.u_dc_delta_q;
898 result.v_dc_delta_q = pic->picture_parameter.v_dc_delta_q;
899 result.u_ac_delta_q = pic->picture_parameter.u_ac_delta_q;
900 result.v_ac_delta_q = pic->picture_parameter.v_ac_delta_q;
901 result.qm_y = pic->picture_parameter.qmatrix_fields.qm_y | 0xf0;
902 result.qm_u = pic->picture_parameter.qmatrix_fields.qm_u | 0xf0;
903 result.qm_v = pic->picture_parameter.qmatrix_fields.qm_v | 0xf0;
904 result.delta_q_res = 1 << pic->picture_parameter.mode_control_fields.log2_delta_q_res;
905 result.delta_lf_res = 1 << pic->picture_parameter.mode_control_fields.log2_delta_lf_res;
906
907 result.tile_cols = pic->picture_parameter.tile_cols;
908 result.tile_rows = pic->picture_parameter.tile_rows;
909 result.tx_mode = pic->picture_parameter.mode_control_fields.tx_mode;
910 result.reference_mode = (pic->picture_parameter.mode_control_fields.reference_select == 1) ? 2 : 0;
911 result.chroma_format = pic->picture_parameter.seq_info_fields.mono_chrome ? 0 : 1;
912 result.tile_size_bytes = 0xff;
913 result.context_update_tile_id = pic->picture_parameter.context_update_tile_id;
914 for (i = 0; i < 65; ++i) {
915 result.tile_col_start_sb[i] = pic->picture_parameter.tile_col_start_sb[i];
916 result.tile_row_start_sb[i] = pic->picture_parameter.tile_row_start_sb[i];
917 }
918 result.max_width = pic->picture_parameter.max_width;
919 result.max_height = pic->picture_parameter.max_height;
920 if (pic->picture_parameter.pic_info_fields.use_superres) {
921 result.width = (pic->picture_parameter.frame_width * 8 + pic->picture_parameter.superres_scale_denominator / 2) /
922 pic->picture_parameter.superres_scale_denominator;
923 result.superres_scale_denominator = pic->picture_parameter.superres_scale_denominator;
924 } else {
925 result.width = pic->picture_parameter.frame_width;
926 result.superres_scale_denominator = pic->picture_parameter.superres_scale_denominator;
927 }
928 result.height = pic->picture_parameter.frame_height;
929 result.superres_upscaled_width = pic->picture_parameter.frame_width;
930 result.order_hint_bits = pic->picture_parameter.order_hint_bits_minus_1 + 1;
931
932 /* Limit to target size in case applications try to decode into smaller
933 * target buffer. */
934 result.width = MIN2(target->width, result.width);
935 result.height = MIN2(target->height, result.height);
936 result.superres_upscaled_width = MIN2(target->width, result.superres_upscaled_width);
937
938 for (i = 0; i < NUM_AV1_REFS; ++i) {
939 uintptr_t ref_frame;
940 if (pic->ref[i]) {
941 ref_frame = (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base);
942 num_refs++;
943 } else
944 ref_frame = 0x7f;
945 result.ref_frame_map[i] = ref_frame;
946 }
947 for (i = 0; i < NUM_AV1_REFS_PER_FRAME; ++i)
948 result.frame_refs[i] = result.ref_frame_map[pic->picture_parameter.ref_frame_idx[i]];
949
950 result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = pic->picture_parameter.bit_depth_idx << 1;
951
952 for (i = 0; i < 8; ++i) {
953 for (j = 0; j < 8; ++j)
954 result.feature_data[i][j] = pic->picture_parameter.seg_info.feature_data[i][j];
955 result.feature_mask[i] = pic->picture_parameter.seg_info.feature_mask[i];
956 }
957 memcpy(dec->probs, &pic->picture_parameter.seg_info.feature_data, 128);
958 memcpy((dec->probs + 128), &pic->picture_parameter.seg_info.feature_mask, 8);
959
960 result.cdef_damping = pic->picture_parameter.cdef_damping_minus_3 + 3;
961 result.cdef_bits = pic->picture_parameter.cdef_bits;
962 for (i = 0; i < 8; ++i) {
963 result.cdef_strengths[i] = pic->picture_parameter.cdef_y_strengths[i];
964 result.cdef_uv_strengths[i] = pic->picture_parameter.cdef_uv_strengths[i];
965 }
966 result.frame_restoration_type[0] = pic->picture_parameter.loop_restoration_fields.yframe_restoration_type;
967 result.frame_restoration_type[1] = pic->picture_parameter.loop_restoration_fields.cbframe_restoration_type;
968 result.frame_restoration_type[2] = pic->picture_parameter.loop_restoration_fields.crframe_restoration_type;
969 for (i = 0; i < 3; ++i) {
970 int log2_num = 0;
971 int unit_size = pic->picture_parameter.lr_unit_size[i];
972 if (unit_size) {
973 while (unit_size >>= 1)
974 log2_num++;
975 result.log2_restoration_unit_size_minus5[i] = log2_num - 5;
976 } else {
977 result.log2_restoration_unit_size_minus5[i] = 0;
978 }
979 }
980
981 if (pic->picture_parameter.bit_depth_idx) {
982 if (target->buffer_format == PIPE_FORMAT_P010 ||
983 target->buffer_format == PIPE_FORMAT_P012 ||
984 target->buffer_format == PIPE_FORMAT_P016) {
985 result.p010_mode = 1;
986 result.msb_mode = 1;
987 } else {
988 result.luma_10to8 = 1;
989 result.chroma_10to8 = 1;
990 }
991 }
992
993 result.preskip_segid = 0;
994 result.last_active_segid = 0;
995 for (i = 0; i < 8; i++) {
996 for (j = 0; j < 8; j++) {
997 if (pic->picture_parameter.seg_info.feature_mask[i] & (1 << j)) {
998 result.last_active_segid = i;
999 if (j >= 5)
1000 result.preskip_segid = 1;
1001 }
1002 }
1003 }
1004
1005 result.seg_lossless_flag = 0;
1006 for (i = 0; i < 8; ++i) {
1007 int av1_get_qindex, qindex;
1008 int segfeature_active = pic->picture_parameter.seg_info.feature_mask[i] & (1 << 0);
1009 if (segfeature_active) {
1010 int seg_qindex = pic->picture_parameter.base_qindex +
1011 pic->picture_parameter.seg_info.feature_data[i][0];
1012 av1_get_qindex = seg_qindex < 0 ? 0 : (seg_qindex > 255 ? 255 : seg_qindex);
1013 } else {
1014 av1_get_qindex = pic->picture_parameter.base_qindex;
1015 }
1016 qindex = pic->picture_parameter.seg_info.segment_info_fields.enabled ?
1017 av1_get_qindex :
1018 pic->picture_parameter.base_qindex;
1019 result.seg_lossless_flag |= (((qindex == 0) && result.y_dc_delta_q == 0 &&
1020 result.u_dc_delta_q == 0 && result.v_dc_delta_q == 0 &&
1021 result.u_ac_delta_q == 0 && result.v_ac_delta_q == 0) << i);
1022 }
1023
1024 rvcn_dec_film_grain_params_t* fg_params = &result.film_grain;
1025 fg_params->apply_grain = pic->picture_parameter.film_grain_info.film_grain_info_fields.apply_grain;
1026 if (fg_params->apply_grain) {
1027 rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)(dec->probs + 256);
1028
1029 fg_params->random_seed = pic->picture_parameter.film_grain_info.grain_seed;
1030 fg_params->grain_scale_shift =
1031 pic->picture_parameter.film_grain_info.film_grain_info_fields.grain_scale_shift;
1032 fg_params->scaling_shift =
1033 pic->picture_parameter.film_grain_info.film_grain_info_fields.grain_scaling_minus_8 + 8;
1034 fg_params->chroma_scaling_from_luma =
1035 pic->picture_parameter.film_grain_info.film_grain_info_fields.chroma_scaling_from_luma;
1036 fg_params->num_y_points = pic->picture_parameter.film_grain_info.num_y_points;
1037 fg_params->num_cb_points = pic->picture_parameter.film_grain_info.num_cb_points;
1038 fg_params->num_cr_points = pic->picture_parameter.film_grain_info.num_cr_points;
1039 fg_params->cb_mult = pic->picture_parameter.film_grain_info.cb_mult;
1040 fg_params->cb_luma_mult = pic->picture_parameter.film_grain_info.cb_luma_mult;
1041 fg_params->cb_offset = pic->picture_parameter.film_grain_info.cb_offset;
1042 fg_params->cr_mult = pic->picture_parameter.film_grain_info.cr_mult;
1043 fg_params->cr_luma_mult = pic->picture_parameter.film_grain_info.cr_luma_mult;
1044 fg_params->cr_offset = pic->picture_parameter.film_grain_info.cr_offset;
1045 fg_params->bit_depth_minus_8 = pic->picture_parameter.bit_depth_idx << 1;
1046
1047 for (i = 0; i < fg_params->num_y_points; ++i) {
1048 fg_params->scaling_points_y[i][0] = pic->picture_parameter.film_grain_info.point_y_value[i];
1049 fg_params->scaling_points_y[i][1] = pic->picture_parameter.film_grain_info.point_y_scaling[i];
1050 }
1051 for (i = 0; i < fg_params->num_cb_points; ++i) {
1052 fg_params->scaling_points_cb[i][0] = pic->picture_parameter.film_grain_info.point_cb_value[i];
1053 fg_params->scaling_points_cb[i][1] = pic->picture_parameter.film_grain_info.point_cb_scaling[i];
1054 }
1055 for (i = 0; i < fg_params->num_cr_points; ++i) {
1056 fg_params->scaling_points_cr[i][0] = pic->picture_parameter.film_grain_info.point_cr_value[i];
1057 fg_params->scaling_points_cr[i][1] = pic->picture_parameter.film_grain_info.point_cr_scaling[i];
1058 }
1059
1060 fg_params->ar_coeff_lag = pic->picture_parameter.film_grain_info.film_grain_info_fields.ar_coeff_lag;
1061 fg_params->ar_coeff_shift =
1062 pic->picture_parameter.film_grain_info.film_grain_info_fields.ar_coeff_shift_minus_6 + 6;
1063
1064 for (i = 0; i < 24; ++i)
1065 fg_params->ar_coeffs_y[i] = pic->picture_parameter.film_grain_info.ar_coeffs_y[i];
1066
1067 for (i = 0; i < 25; ++i) {
1068 fg_params->ar_coeffs_cb[i] = pic->picture_parameter.film_grain_info.ar_coeffs_cb[i];
1069 fg_params->ar_coeffs_cr[i] = pic->picture_parameter.film_grain_info.ar_coeffs_cr[i];
1070 }
1071
1072 fg_params->overlap_flag = pic->picture_parameter.film_grain_info.film_grain_info_fields.overlap_flag;
1073 fg_params->clip_to_restricted_range =
1074 pic->picture_parameter.film_grain_info.film_grain_info_fields.clip_to_restricted_range;
1075
1076 ac_vcn_av1_init_film_grain_buffer(dec->av1_version, fg_params, fg_buf);
1077 }
1078
1079 result.uncompressed_header_size = 0;
1080 for (i = 0; i < 7; ++i) {
1081 result.global_motion[i + 1].wmtype = (rvcn_dec_transformation_type_e)pic->picture_parameter.wm[i].wmtype;
1082 for (j = 0; j < 6; ++j)
1083 result.global_motion[i + 1].wmmat[j] = pic->picture_parameter.wm[i].wmmat[j];
1084 }
1085 for (i = 0; i < tile_count && i < 256; ++i) {
1086 result.tile_info[i].offset = pic->slice_parameter.slice_data_offset[i];
1087 result.tile_info[i].size = pic->slice_parameter.slice_data_size[i];
1088 }
1089
1090 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
1091 dec->ref_codec.bts = pic->picture_parameter.bit_depth_idx;
1092 dec->ref_codec.index = result.curr_pic_idx;
1093 dec->ref_codec.ref_size = 8;
1094 dec->ref_codec.num_refs = num_refs;
1095 memset(dec->ref_codec.ref_list, 0x7f, sizeof(dec->ref_codec.ref_list));
1096 memcpy(dec->ref_codec.ref_list, result.ref_frame_map, sizeof(result.ref_frame_map));
1097 }
1098
1099 return result;
1100 }
1101
calc_ctx_size_h265_main(struct radeon_decoder * dec)1102 static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec)
1103 {
1104 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
1105 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
1106
1107 unsigned max_references = dec->base.max_references + 1;
1108
1109 if (dec->base.width * dec->base.height >= 4096 * 2000)
1110 max_references = MAX2(max_references, 8);
1111 else
1112 max_references = MAX2(max_references, 17);
1113
1114 width = align(width, 16);
1115 height = align(height, 16);
1116 return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
1117 }
1118
calc_ctx_size_h265_main10(struct radeon_decoder * dec,struct pipe_h265_picture_desc * pic)1119 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec,
1120 struct pipe_h265_picture_desc *pic)
1121 {
1122 unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
1123 unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
1124 unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
1125
1126 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
1127 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
1128 unsigned coeff_10bit =
1129 (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
1130
1131 unsigned max_references = dec->base.max_references + 1;
1132
1133 if (dec->base.width * dec->base.height >= 4096 * 2000)
1134 max_references = MAX2(max_references, 8);
1135 else
1136 max_references = MAX2(max_references, 17);
1137
1138 log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 +
1139 pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
1140
1141 width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
1142 height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
1143
1144 num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
1145 context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
1146 max_mb_address = (unsigned)ceil(height * 8 / 2048.0);
1147
1148 cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
1149 db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
1150
1151 return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
1152 }
1153
get_vc1_msg(struct pipe_vc1_picture_desc * pic)1154 static rvcn_dec_message_vc1_t get_vc1_msg(struct pipe_vc1_picture_desc *pic)
1155 {
1156 rvcn_dec_message_vc1_t result;
1157
1158 memset(&result, 0, sizeof(result));
1159 switch (pic->base.profile) {
1160 case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
1161 result.profile = RDECODE_VC1_PROFILE_SIMPLE;
1162 result.level = 1;
1163 break;
1164
1165 case PIPE_VIDEO_PROFILE_VC1_MAIN:
1166 result.profile = RDECODE_VC1_PROFILE_MAIN;
1167 result.level = 2;
1168 break;
1169
1170 case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
1171 result.profile = RDECODE_VC1_PROFILE_ADVANCED;
1172 result.level = 4;
1173 break;
1174
1175 default:
1176 assert(0);
1177 }
1178
1179 result.sps_info_flags |= pic->postprocflag << 7;
1180 result.sps_info_flags |= pic->pulldown << 6;
1181 result.sps_info_flags |= pic->interlace << 5;
1182 result.sps_info_flags |= pic->tfcntrflag << 4;
1183 result.sps_info_flags |= pic->finterpflag << 3;
1184 result.sps_info_flags |= pic->psf << 1;
1185
1186 result.pps_info_flags |= pic->range_mapy_flag << 31;
1187 result.pps_info_flags |= pic->range_mapy << 28;
1188 result.pps_info_flags |= pic->range_mapuv_flag << 27;
1189 result.pps_info_flags |= pic->range_mapuv << 24;
1190 result.pps_info_flags |= pic->multires << 21;
1191 result.pps_info_flags |= pic->maxbframes << 16;
1192 result.pps_info_flags |= pic->overlap << 11;
1193 result.pps_info_flags |= pic->quantizer << 9;
1194 result.pps_info_flags |= pic->panscan_flag << 7;
1195 result.pps_info_flags |= pic->refdist_flag << 6;
1196 result.pps_info_flags |= pic->vstransform << 0;
1197
1198 if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
1199 result.pps_info_flags |= pic->syncmarker << 20;
1200 result.pps_info_flags |= pic->rangered << 19;
1201 result.pps_info_flags |= pic->loopfilter << 5;
1202 result.pps_info_flags |= pic->fastuvmc << 4;
1203 result.pps_info_flags |= pic->extended_mv << 3;
1204 result.pps_info_flags |= pic->extended_dmv << 8;
1205 result.pps_info_flags |= pic->dquant << 1;
1206 }
1207
1208 result.chroma_format = 1;
1209
1210 return result;
1211 }
1212
get_ref_pic_idx(struct radeon_decoder * dec,struct pipe_video_buffer * ref)1213 static uint32_t get_ref_pic_idx(struct radeon_decoder *dec, struct pipe_video_buffer *ref)
1214 {
1215 uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
1216 uint32_t max = MAX2(dec->frame_number, 1) - 1;
1217 uintptr_t frame;
1218
1219 /* seems to be the most sane fallback */
1220 if (!ref)
1221 return max;
1222
1223 /* get the frame number from the associated data */
1224 frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
1225
1226 /* limit the frame number to a valid range */
1227 return MAX2(MIN2(frame, max), min);
1228 }
1229
get_mpeg2_msg(struct radeon_decoder * dec,struct pipe_mpeg12_picture_desc * pic)1230 static rvcn_dec_message_mpeg2_vld_t get_mpeg2_msg(struct radeon_decoder *dec,
1231 struct pipe_mpeg12_picture_desc *pic)
1232 {
1233 const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
1234 rvcn_dec_message_mpeg2_vld_t result;
1235 unsigned i;
1236
1237 memset(&result, 0, sizeof(result));
1238 result.decoded_pic_idx = dec->frame_number;
1239
1240 result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]);
1241 result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]);
1242
1243 if (pic->intra_matrix) {
1244 result.load_intra_quantiser_matrix = 1;
1245 for (i = 0; i < 64; ++i) {
1246 result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
1247 }
1248 }
1249 if (pic->non_intra_matrix) {
1250 result.load_nonintra_quantiser_matrix = 1;
1251 for (i = 0; i < 64; ++i) {
1252 result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
1253 }
1254 }
1255
1256 result.profile_and_level_indication = 0;
1257 result.chroma_format = 0x1;
1258
1259 result.picture_coding_type = pic->picture_coding_type;
1260 result.f_code[0][0] = pic->f_code[0][0] + 1;
1261 result.f_code[0][1] = pic->f_code[0][1] + 1;
1262 result.f_code[1][0] = pic->f_code[1][0] + 1;
1263 result.f_code[1][1] = pic->f_code[1][1] + 1;
1264 result.intra_dc_precision = pic->intra_dc_precision;
1265 result.pic_structure = pic->picture_structure;
1266 result.top_field_first = pic->top_field_first;
1267 result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
1268 result.concealment_motion_vectors = pic->concealment_motion_vectors;
1269 result.q_scale_type = pic->q_scale_type;
1270 result.intra_vlc_format = pic->intra_vlc_format;
1271 result.alternate_scan = pic->alternate_scan;
1272
1273 return result;
1274 }
1275
get_mpeg4_msg(struct radeon_decoder * dec,struct pipe_mpeg4_picture_desc * pic)1276 static rvcn_dec_message_mpeg4_asp_vld_t get_mpeg4_msg(struct radeon_decoder *dec,
1277 struct pipe_mpeg4_picture_desc *pic)
1278 {
1279 rvcn_dec_message_mpeg4_asp_vld_t result;
1280 unsigned i;
1281
1282 memset(&result, 0, sizeof(result));
1283 result.decoded_pic_idx = dec->frame_number;
1284
1285 result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]);
1286 result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]);
1287
1288 result.variant_type = 0;
1289 result.profile_and_level_indication = 0xF0;
1290
1291 result.video_object_layer_verid = 0x5;
1292 result.video_object_layer_shape = 0x0;
1293
1294 result.video_object_layer_width = dec->base.width;
1295 result.video_object_layer_height = dec->base.height;
1296
1297 result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
1298
1299 result.short_video_header = pic->short_video_header;
1300 result.interlaced = pic->interlaced;
1301 result.load_intra_quant_mat = 1;
1302 result.load_nonintra_quant_mat = 1;
1303 result.quarter_sample = pic->quarter_sample;
1304 result.complexity_estimation_disable = 1;
1305 result.resync_marker_disable = pic->resync_marker_disable;
1306 result.newpred_enable = 0;
1307 result.reduced_resolution_vop_enable = 0;
1308
1309 result.quant_type = pic->quant_type;
1310
1311 for (i = 0; i < 64; ++i) {
1312 result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
1313 result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
1314 }
1315
1316 return result;
1317 }
1318
rvcn_dec_message_create(struct radeon_decoder * dec)1319 static void rvcn_dec_message_create(struct radeon_decoder *dec)
1320 {
1321 rvcn_dec_message_header_t *header = dec->msg;
1322 rvcn_dec_message_create_t *create = dec->msg + sizeof(rvcn_dec_message_header_t);
1323 unsigned sizes = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
1324
1325 memset(dec->msg, 0, sizes);
1326 header->header_size = sizeof(rvcn_dec_message_header_t);
1327 header->total_size = sizes;
1328 header->num_buffers = 1;
1329 header->msg_type = RDECODE_MSG_CREATE;
1330 header->stream_handle = dec->stream_handle;
1331 header->status_report_feedback_number = 0;
1332
1333 header->index[0].message_id = RDECODE_MESSAGE_CREATE;
1334 header->index[0].offset = sizeof(rvcn_dec_message_header_t);
1335 header->index[0].size = sizeof(rvcn_dec_message_create_t);
1336 header->index[0].filled = 0;
1337
1338 create->stream_type = dec->stream_type;
1339 create->session_flags = 0;
1340 create->width_in_samples = dec->base.width;
1341 create->height_in_samples = dec->base.height;
1342 }
1343
rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder * dec,rvcn_dec_message_decode_t * decode,rvcn_dec_message_dynamic_dpb_t2_t * dynamic_dpb_t2,bool encrypted)1344 static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn_dec_message_decode_t *decode,
1345 rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2, bool encrypted)
1346 {
1347 struct rvcn_dec_dynamic_dpb_t2 *dpb = NULL;
1348 struct si_resource *res;
1349 unsigned width, height;
1350 uint64_t addr;
1351 int i;
1352
1353 width = align(decode->width_in_samples, dec->db_alignment);
1354 height = align(decode->height_in_samples, dec->db_alignment);
1355
1356 list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
1357 bool found = false;
1358
1359 res = &(((struct si_texture *)((struct vl_video_buffer *)d->vbuf)->resources[0])->buffer);
1360 for (i = 0; i < dec->ref_codec.ref_size; ++i) {
1361 if (((dec->ref_codec.ref_list[i] & 0x7f) != 0x7f) && (d->index == (dec->ref_codec.ref_list[i] & 0x7f))) {
1362 addr = dec->ws->buffer_get_virtual_address(res->buf);
1363 dec->ws->cs_add_buffer(&dec->cs, res->buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM);
1364 dynamic_dpb_t2->dpbAddrLo[i] = addr;
1365 dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
1366 ++dynamic_dpb_t2->dpbArraySize;
1367 found = true;
1368 }
1369 }
1370 if (!found) {
1371 if (d->vbuf->width != width || d->vbuf->height != height) {
1372 list_del(&d->list);
1373 list_addtail(&d->list, &dec->dpb_unref_list);
1374 } else {
1375 d->index = 0x7f;
1376 }
1377 }
1378 }
1379
1380 list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
1381 if (d->vbuf->width == width && d->vbuf->height == height && d->index == dec->ref_codec.index) {
1382 dpb = d;
1383 break;
1384 }
1385 }
1386
1387 if (!dpb) {
1388 list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
1389 if (d->index == 0x7f) {
1390 d->index = dec->ref_codec.index;
1391 dpb = d;
1392 break;
1393 }
1394 }
1395 }
1396
1397 list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_unref_list, list) {
1398 list_del(&d->list);
1399 d->vbuf->destroy(d->vbuf);
1400 FREE(d);
1401 }
1402
1403 if (!dpb) {
1404 dpb = CALLOC_STRUCT(rvcn_dec_dynamic_dpb_t2);
1405 if (!dpb)
1406 return 1;
1407 dpb->index = dec->ref_codec.index;
1408
1409 struct pipe_video_buffer templat;
1410 memset(&templat, 0, sizeof(templat));
1411 templat.buffer_format = get_buffer_format(dec);
1412 templat.width = width;
1413 templat.height = height;
1414 templat.bind = PIPE_BIND_VIDEO_DECODE_DPB;
1415 if (encrypted)
1416 templat.bind |= PIPE_BIND_PROTECTED;
1417 dpb->vbuf = dec->base.context->create_video_buffer(dec->base.context, &templat);
1418
1419 if (!dpb->vbuf) {
1420 RADEON_DEC_ERR("Can't allocate dpb buffer.\n");
1421 FREE(dpb);
1422 return 1;
1423 }
1424 list_addtail(&dpb->list, &dec->dpb_ref_list);
1425 }
1426
1427 if (dynamic_dpb_t2->dpbArraySize < dec->ref_codec.num_refs) {
1428 struct rvcn_dec_dynamic_dpb_t2 *d =
1429 list_first_entry(&dec->dpb_ref_list, struct rvcn_dec_dynamic_dpb_t2, list);
1430
1431 res = &(((struct si_texture *)((struct vl_video_buffer *)d->vbuf)->resources[0])->buffer);
1432 addr = dec->ws->buffer_get_virtual_address(res->buf);
1433 for (i = 0; i < dec->ref_codec.num_refs; ++i) {
1434 if (dynamic_dpb_t2->dpbAddrLo[i] || dynamic_dpb_t2->dpbAddrHi[i])
1435 continue;
1436 dynamic_dpb_t2->dpbAddrLo[i] = addr;
1437 dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
1438 ++dynamic_dpb_t2->dpbArraySize;
1439 }
1440 assert(dynamic_dpb_t2->dpbArraySize == dec->ref_codec.num_refs);
1441 }
1442
1443 struct si_texture *dpb_luma, *dpb_chroma;
1444
1445 dpb_luma = (struct si_texture *)((struct vl_video_buffer *)dpb->vbuf)->resources[0];
1446 dpb_chroma = (struct si_texture *)((struct vl_video_buffer *)dpb->vbuf)->resources[1];
1447
1448 decode->db_swizzle_mode = dpb_luma->surface.u.gfx9.swizzle_mode;
1449
1450 dec->ws->cs_add_buffer(&dec->cs, (dpb_luma->buffer).buf, RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM);
1451 addr = dec->ws->buffer_get_virtual_address((dpb_luma->buffer).buf);
1452
1453 dynamic_dpb_t2->dpbLumaPitch = dpb_luma->surface.u.gfx9.surf_pitch;
1454 dynamic_dpb_t2->dpbLumaAlignedHeight = dpb_luma->surface.u.gfx9.surf_height;
1455 dynamic_dpb_t2->dpbLumaAlignedSize = dpb_luma->surface.u.gfx9.surf_slice_size;
1456 dynamic_dpb_t2->dpbChromaPitch = dpb_chroma->surface.u.gfx9.surf_pitch;
1457 dynamic_dpb_t2->dpbChromaAlignedHeight = dpb_chroma->surface.u.gfx9.surf_height;
1458 dynamic_dpb_t2->dpbChromaAlignedSize = dpb_chroma->surface.u.gfx9.surf_slice_size;
1459
1460 dynamic_dpb_t2->dpbCurrLo = addr;
1461 dynamic_dpb_t2->dpbCurrHi = addr >> 32;
1462 decode->decode_flags = 1;
1463 dynamic_dpb_t2->dpbConfigFlags = 0;
1464
1465 return 0;
1466 }
1467
rvcn_dec_message_decode(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)1468 static struct pb_buffer_lean *rvcn_dec_message_decode(struct radeon_decoder *dec,
1469 struct pipe_video_buffer *target,
1470 struct pipe_picture_desc *picture)
1471 {
1472 DECRYPT_PARAMETERS *decrypt = (DECRYPT_PARAMETERS *)picture->decrypt_key;
1473 bool encrypted = picture->protected_playback;
1474 struct si_texture *luma;
1475 struct si_texture *chroma;
1476 struct pipe_video_buffer *out_surf = target;
1477 ASSERTED struct si_screen *sscreen = (struct si_screen *)dec->screen;
1478 rvcn_dec_message_header_t *header;
1479 rvcn_dec_message_index_t *index_codec;
1480 rvcn_dec_message_index_t *index_drm = NULL;
1481 rvcn_dec_message_index_t *index_dynamic_dpb = NULL;
1482 rvcn_dec_message_index_t *index_hevc_direct_reflist = NULL;
1483 rvcn_dec_message_decode_t *decode;
1484 unsigned sizes = 0, offset_decode, offset_codec;
1485 unsigned offset_drm = 0, offset_dynamic_dpb = 0, offset_hevc_direct_reflist = 0;
1486 void *codec;
1487 rvcn_dec_message_drm_t *drm = NULL;
1488 rvcn_dec_message_dynamic_dpb_t *dynamic_dpb = NULL;
1489 rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
1490 rvcn_dec_message_hevc_direct_ref_list_t *hevc_reflist = NULL;
1491 bool dpb_resize = false;
1492 header = dec->msg;
1493 sizes += sizeof(rvcn_dec_message_header_t);
1494
1495 index_codec = (void*)header + sizes;
1496 sizes += sizeof(rvcn_dec_message_index_t);
1497
1498 if (encrypted) {
1499 index_drm = (void*)header + sizes;
1500 sizes += sizeof(rvcn_dec_message_index_t);
1501 }
1502
1503 if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
1504 index_dynamic_dpb = (void*)header + sizes;
1505 sizes += sizeof(rvcn_dec_message_index_t);
1506 }
1507
1508 if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
1509 index_hevc_direct_reflist = (void*)header + sizes;
1510 sizes += sizeof(rvcn_dec_message_index_t);
1511 }
1512
1513 offset_decode = sizes;
1514 decode = (void*)header + sizes;
1515 sizes += sizeof(rvcn_dec_message_decode_t);
1516
1517 if (encrypted) {
1518 offset_drm = sizes;
1519 drm = (void*)header + sizes;
1520 sizes += sizeof(rvcn_dec_message_drm_t);
1521 }
1522
1523 if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
1524 offset_dynamic_dpb = sizes;
1525 if (dec->dpb_type == DPB_DYNAMIC_TIER_1) {
1526 dynamic_dpb = (void*)header + sizes;
1527 sizes += sizeof(rvcn_dec_message_dynamic_dpb_t);
1528 }
1529 else if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
1530 dynamic_dpb_t2 = (void*)header + sizes;
1531 sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
1532 }
1533 }
1534
1535 if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
1536 offset_hevc_direct_reflist = sizes;
1537 hevc_reflist = (void*)header + sizes;
1538 sizes += align((4 + 2 * 15 * ((struct pipe_h265_picture_desc *)picture)->slice_parameter.slice_count), 4);
1539 }
1540
1541 offset_codec = sizes;
1542 codec = (void*)header + sizes;
1543
1544 memset(dec->msg, 0, sizes);
1545 header->header_size = sizeof(rvcn_dec_message_header_t);
1546 header->total_size = sizes;
1547 header->msg_type = RDECODE_MSG_DECODE;
1548 header->stream_handle = dec->stream_handle;
1549 header->status_report_feedback_number = dec->frame_number;
1550
1551 header->index[0].message_id = RDECODE_MESSAGE_DECODE;
1552 header->index[0].offset = offset_decode;
1553 header->index[0].size = sizeof(rvcn_dec_message_decode_t);
1554 header->index[0].filled = 0;
1555 header->num_buffers = 1;
1556
1557 index_codec->offset = offset_codec;
1558 index_codec->size = 0;
1559 index_codec->filled = 0;
1560 ++header->num_buffers;
1561
1562 if (encrypted) {
1563 index_drm->message_id = RDECODE_MESSAGE_DRM;
1564 index_drm->offset = offset_drm;
1565 index_drm->size = sizeof(rvcn_dec_message_drm_t);
1566 index_drm->filled = 0;
1567 ++header->num_buffers;
1568 }
1569
1570 if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
1571 index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
1572 index_dynamic_dpb->offset = offset_dynamic_dpb;
1573 index_dynamic_dpb->filled = 0;
1574 ++header->num_buffers;
1575 if (dec->dpb_type == DPB_DYNAMIC_TIER_1)
1576 index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t);
1577 else if (dec->dpb_type == DPB_DYNAMIC_TIER_2)
1578 index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
1579 }
1580
1581 if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
1582 index_hevc_direct_reflist->message_id = RDECODE_MESSAGE_HEVC_DIRECT_REF_LIST;
1583 index_hevc_direct_reflist->offset = offset_hevc_direct_reflist;
1584 index_hevc_direct_reflist->size = align((4 + 2 * 15 * ((struct pipe_h265_picture_desc *)picture)->slice_parameter.slice_count), 4);
1585 index_hevc_direct_reflist->filled = 0;
1586 ++header->num_buffers;
1587 }
1588
1589 decode->stream_type = dec->stream_type;
1590 decode->decode_flags = 0;
1591 decode->width_in_samples = dec->base.width;
1592 decode->height_in_samples = dec->base.height;
1593
1594 decode->bsd_size = align(dec->bs_size, 128);
1595
1596 if (dec->dpb_type != DPB_DYNAMIC_TIER_2) {
1597 bool r;
1598 if (!dec->dpb.res && dec->dpb_size) {
1599 if (encrypted) {
1600 r = si_vid_create_tmz_buffer(dec->screen, &dec->dpb, dec->dpb_size, PIPE_USAGE_DEFAULT);
1601 } else {
1602 r = si_vid_create_buffer(dec->screen, &dec->dpb, dec->dpb_size, PIPE_USAGE_DEFAULT);
1603 }
1604 assert(encrypted == (bool)(dec->dpb.res->flags & RADEON_FLAG_ENCRYPTED));
1605 if (!r) {
1606 RADEON_DEC_ERR("Can't allocate dpb.\n");
1607 return NULL;
1608 }
1609 } else if (dec->dpb_type == DPB_DYNAMIC_TIER_1 && dec->dpb.res &&
1610 (dec->max_width < dec->base.width || dec->max_height < dec->base.height)) {
1611 struct rvid_buf_offset_info buf_offset_info;
1612
1613 buf_offset_info.num_units = (NUM_VP9_REFS + 1);
1614 buf_offset_info.old_offset = (align(dec->max_width, dec->db_alignment) *
1615 align(dec->max_height, dec->db_alignment) * 3 / 2);
1616 buf_offset_info.new_offset = (align(dec->base.width, dec->db_alignment) *
1617 align(dec->base.height, dec->db_alignment) * 3 / 2);
1618
1619 dec->dpb_size = calc_dpb_size(dec);
1620 r = si_vid_resize_buffer(dec->base.context, &dec->cs, &dec->dpb, dec->dpb_size, &buf_offset_info);
1621 if (!r) {
1622 RADEON_DEC_ERR("Can't resize dpb.\n");
1623 return NULL;
1624 }
1625 dec->max_width = dec->base.width;
1626 dec->max_height = dec->base.height;
1627 dpb_resize = true;
1628 }
1629 }
1630
1631 if (!dec->ctx.res) {
1632 enum pipe_video_format fmt = u_reduce_video_profile(picture->profile);
1633 if (dec->stream_type == RDECODE_CODEC_H264_PERF) {
1634 unsigned ctx_size = calc_ctx_size_h264_perf(dec);
1635 bool r;
1636 if (encrypted && dec->tmz_ctx) {
1637 r = si_vid_create_tmz_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
1638 } else {
1639 r = si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
1640 }
1641 assert((encrypted && dec->tmz_ctx) == (bool)(dec->ctx.res->flags & RADEON_FLAG_ENCRYPTED));
1642
1643 if (!r) {
1644 RADEON_DEC_ERR("Can't allocate context buffer.\n");
1645 return NULL;
1646 }
1647 } else if (fmt == PIPE_VIDEO_FORMAT_VP9) {
1648 unsigned ctx_size;
1649 uint8_t *ptr;
1650 bool r;
1651
1652 /* default probability + probability data */
1653 ctx_size = 2304 * 5;
1654
1655 if (((struct si_screen *)dec->screen)->info.vcn_ip_version >= VCN_2_0_0) {
1656 /* SRE collocated context data */
1657 ctx_size += 32 * 2 * 128 * 68;
1658 /* SMP collocated context data */
1659 ctx_size += 9 * 64 * 2 * 128 * 68;
1660 /* SDB left tile pixel */
1661 ctx_size += 8 * 2 * 2 * 8192;
1662 } else {
1663 ctx_size += 32 * 2 * 64 * 64;
1664 ctx_size += 9 * 64 * 2 * 64 * 64;
1665 ctx_size += 8 * 2 * 4096;
1666 }
1667
1668 if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
1669 ctx_size += 8 * 2 * 4096;
1670
1671 if (encrypted && dec->tmz_ctx) {
1672 r = si_vid_create_tmz_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
1673 } else {
1674 r = si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
1675 }
1676 if (!r) {
1677 RADEON_DEC_ERR("Can't allocate context buffer.\n");
1678 return NULL;
1679 }
1680
1681 /* ctx needs probs table */
1682 ptr = dec->ws->buffer_map(dec->ws, dec->ctx.res->buf, &dec->cs,
1683 PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
1684 fill_probs_table(ptr);
1685 dec->ws->buffer_unmap(dec->ws, dec->ctx.res->buf);
1686 dec->bs_ptr = NULL;
1687 } else if (fmt == PIPE_VIDEO_FORMAT_HEVC) {
1688 unsigned ctx_size;
1689 bool r;
1690 if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
1691 ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc *)picture);
1692 else
1693 ctx_size = calc_ctx_size_h265_main(dec);
1694
1695 if (encrypted && dec->tmz_ctx) {
1696 r = si_vid_create_tmz_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
1697 } else {
1698 r = si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
1699 }
1700 if (!r) {
1701 RADEON_DEC_ERR("Can't allocate context buffer.\n");
1702 return NULL;
1703 }
1704 }
1705 }
1706 if (encrypted != dec->ws->cs_is_secure(&dec->cs)) {
1707 dec->ws->cs_flush(&dec->cs, RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL);
1708 }
1709
1710 if (dec->stream_type == RDECODE_CODEC_AV1)
1711 rvcn_dec_av1_film_grain_surface(&out_surf, (struct pipe_av1_picture_desc *)picture);
1712
1713 luma = (struct si_texture *)((struct vl_video_buffer *)out_surf)->resources[0];
1714 chroma = (struct si_texture *)((struct vl_video_buffer *)out_surf)->resources[1];
1715
1716 decode->dpb_size = (dec->dpb_type != DPB_DYNAMIC_TIER_2) ? dec->dpb.res->buf->size : 0;
1717
1718 /* When texture being created, the bo will be created with total size of planes,
1719 * and all planes point to the same buffer */
1720 assert(si_resource(((struct vl_video_buffer *)out_surf)->resources[0])->buf->size ==
1721 si_resource(((struct vl_video_buffer *)out_surf)->resources[1])->buf->size);
1722
1723 decode->dt_size = si_resource(((struct vl_video_buffer *)out_surf)->resources[0])->buf->size;
1724
1725 decode->sct_size = 0;
1726 decode->sc_coeff_size = 0;
1727
1728 decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
1729 decode->db_pitch = align(dec->base.width, dec->db_alignment);
1730
1731 if ((((struct si_screen*)dec->screen)->info.vcn_ip_version >= VCN_3_0_0) &&
1732 (dec->stream_type == RDECODE_CODEC_VP9 || dec->stream_type == RDECODE_CODEC_AV1 ||
1733 dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10))
1734 decode->db_aligned_height = align(dec->base.height, 64);
1735
1736 decode->db_surf_tile_config = 0;
1737 decode->db_array_mode = dec->addr_gfx_mode;
1738
1739 decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
1740 decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
1741
1742 if (luma->surface.meta_offset) {
1743 RADEON_DEC_ERR("DCC surfaces not supported.\n");
1744 return NULL;
1745 }
1746
1747 decode->dt_tiling_mode = 0;
1748 decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
1749 decode->dt_array_mode = dec->addr_gfx_mode;
1750 decode->dt_field_mode = ((struct vl_video_buffer *)out_surf)->base.interlaced;
1751 decode->dt_surf_tile_config = 0;
1752 decode->dt_uv_surf_tile_config = 0;
1753
1754 decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
1755 decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
1756 if (decode->dt_field_mode) {
1757 decode->dt_luma_bottom_offset =
1758 luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
1759 decode->dt_chroma_bottom_offset =
1760 chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
1761 } else {
1762 decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
1763 decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
1764 }
1765 decode->mif_wrc_en = sscreen->info.vcn_ip_version >= VCN_3_0_0;
1766 if (dec->stream_type == RDECODE_CODEC_AV1)
1767 decode->db_pitch_uv = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
1768
1769 if (encrypted) {
1770 assert(sscreen->info.has_tmz_support);
1771 set_drm_keys(drm, decrypt);
1772 }
1773
1774 if (dec->dpb_type == DPB_DYNAMIC_TIER_1) {
1775 decode->decode_flags |= (RDECODE_FLAGS_USE_DYNAMIC_DPB_MASK | RDECODE_FLAGS_USE_PAL_MASK);
1776 // Add decode flag for RESIZE_DPB ,when we do resize
1777 if (dpb_resize == true)
1778 decode->decode_flags |= RDECODE_FLAGS_DPB_RESIZE_MASK;
1779
1780 dynamic_dpb->dpbArraySize = NUM_VP9_REFS + 1;
1781 dynamic_dpb->dpbLumaPitch = align(dec->max_width, dec->db_alignment);
1782 dynamic_dpb->dpbLumaAlignedHeight = align(dec->max_height, dec->db_alignment);
1783 dynamic_dpb->dpbLumaAlignedSize =
1784 dynamic_dpb->dpbLumaPitch * dynamic_dpb->dpbLumaAlignedHeight;
1785 dynamic_dpb->dpbChromaPitch = dynamic_dpb->dpbLumaPitch >> 1;
1786 dynamic_dpb->dpbChromaAlignedHeight = dynamic_dpb->dpbLumaAlignedHeight >> 1;
1787 dynamic_dpb->dpbChromaAlignedSize =
1788 dynamic_dpb->dpbChromaPitch * dynamic_dpb->dpbChromaAlignedHeight * 2;
1789 dynamic_dpb->dpbReserved0[0] = dec->db_alignment;
1790
1791 if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) {
1792 dynamic_dpb->dpbLumaAlignedSize = dynamic_dpb->dpbLumaAlignedSize * 3 / 2;
1793 dynamic_dpb->dpbChromaAlignedSize = dynamic_dpb->dpbChromaAlignedSize * 3 / 2;
1794 }
1795 }
1796
1797 if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC)
1798 get_h265_reflist(hevc_reflist, (struct pipe_h265_picture_desc *)picture);
1799
1800 switch (u_reduce_video_profile(picture->profile)) {
1801 case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
1802 rvcn_dec_message_avc_t avc = get_h264_msg(dec, target, (struct pipe_h264_picture_desc *)picture);
1803 memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t));
1804 index_codec->message_id = RDECODE_MESSAGE_AVC;
1805 index_codec->size = sizeof(rvcn_dec_message_avc_t);
1806 break;
1807 }
1808 case PIPE_VIDEO_FORMAT_HEVC: {
1809 rvcn_dec_message_hevc_t hevc =
1810 get_h265_msg(dec, target, (struct pipe_h265_picture_desc *)picture);
1811
1812 memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t));
1813 index_codec->message_id = RDECODE_MESSAGE_HEVC;
1814 index_codec->size = sizeof(rvcn_dec_message_hevc_t);
1815 break;
1816 }
1817 case PIPE_VIDEO_FORMAT_VC1: {
1818 rvcn_dec_message_vc1_t vc1 = get_vc1_msg((struct pipe_vc1_picture_desc *)picture);
1819
1820 memcpy(codec, (void *)&vc1, sizeof(rvcn_dec_message_vc1_t));
1821 if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
1822 (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
1823 decode->width_in_samples = align(decode->width_in_samples, 16) / 16;
1824 decode->height_in_samples = align(decode->height_in_samples, 16) / 16;
1825 }
1826 index_codec->message_id = RDECODE_MESSAGE_VC1;
1827 index_codec->size = sizeof(rvcn_dec_message_vc1_t);
1828 break;
1829 }
1830 case PIPE_VIDEO_FORMAT_MPEG12: {
1831 rvcn_dec_message_mpeg2_vld_t mpeg2 =
1832 get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc *)picture);
1833
1834 memcpy(codec, (void *)&mpeg2, sizeof(rvcn_dec_message_mpeg2_vld_t));
1835 index_codec->message_id = RDECODE_MESSAGE_MPEG2_VLD;
1836 index_codec->size = sizeof(rvcn_dec_message_mpeg2_vld_t);
1837 break;
1838 }
1839 case PIPE_VIDEO_FORMAT_MPEG4: {
1840 rvcn_dec_message_mpeg4_asp_vld_t mpeg4 =
1841 get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc *)picture);
1842
1843 memcpy(codec, (void *)&mpeg4, sizeof(rvcn_dec_message_mpeg4_asp_vld_t));
1844 index_codec->message_id = RDECODE_MESSAGE_MPEG4_ASP_VLD;
1845 index_codec->size = sizeof(rvcn_dec_message_mpeg4_asp_vld_t);
1846 break;
1847 }
1848 case PIPE_VIDEO_FORMAT_VP9: {
1849 rvcn_dec_message_vp9_t vp9 =
1850 get_vp9_msg(dec, target, (struct pipe_vp9_picture_desc *)picture);
1851
1852 memcpy(codec, (void *)&vp9, sizeof(rvcn_dec_message_vp9_t));
1853 index_codec->message_id = RDECODE_MESSAGE_VP9;
1854 index_codec->size = sizeof(rvcn_dec_message_vp9_t);
1855 break;
1856 }
1857 case PIPE_VIDEO_FORMAT_AV1: {
1858 rvcn_dec_message_av1_t av1 =
1859 get_av1_msg(dec, target, (struct pipe_av1_picture_desc *)picture);
1860
1861 memcpy(codec, (void *)&av1, sizeof(rvcn_dec_message_av1_t));
1862 index_codec->message_id = RDECODE_MESSAGE_AV1;
1863 index_codec->size = sizeof(rvcn_dec_message_av1_t);
1864
1865 if (dec->ctx.res == NULL) {
1866 unsigned ctx_size = ac_vcn_dec_calc_ctx_size_av1(dec->av1_version);
1867 uint8_t *ptr;
1868
1869 if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
1870 RADEON_DEC_ERR("Can't allocate context buffer.\n");
1871
1872 ptr = dec->ws->buffer_map(dec->ws, dec->ctx.res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
1873
1874 ac_vcn_av1_init_probs(dec->av1_version, ptr);
1875 dec->ws->buffer_unmap(dec->ws, dec->ctx.res->buf);
1876 }
1877
1878 break;
1879 }
1880 default:
1881 assert(0);
1882 return NULL;
1883 }
1884
1885 header->total_size += index_codec->size;
1886
1887 if (dec->ctx.res)
1888 decode->hw_ctxt_size = dec->ctx.res->buf->size;
1889
1890 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
1891 if (rvcn_dec_dynamic_dpb_t2_message(dec, decode, dynamic_dpb_t2, encrypted))
1892 return NULL;
1893 } else if (((struct si_screen *)dec->screen)->info.vcn_ip_version == VCN_5_0_0 &&
1894 dec->dpb_type == DPB_MAX_RES)
1895 decode->db_swizzle_mode = RDECODE_VCN5_256B_D;
1896
1897 return luma->buffer.buf;
1898 }
1899
rvcn_dec_message_destroy(struct radeon_decoder * dec)1900 static void rvcn_dec_message_destroy(struct radeon_decoder *dec)
1901 {
1902 rvcn_dec_message_header_t *header = dec->msg;
1903
1904 memset(dec->msg, 0, sizeof(rvcn_dec_message_header_t));
1905 header->header_size = sizeof(rvcn_dec_message_header_t);
1906 header->total_size = sizeof(rvcn_dec_message_header_t) - sizeof(rvcn_dec_message_index_t);
1907 header->num_buffers = 0;
1908 header->msg_type = RDECODE_MSG_DESTROY;
1909 header->stream_handle = dec->stream_handle;
1910 header->status_report_feedback_number = 0;
1911 }
1912
rvcn_dec_message_feedback(struct radeon_decoder * dec)1913 static void rvcn_dec_message_feedback(struct radeon_decoder *dec)
1914 {
1915 rvcn_dec_feedback_header_t *header = (void *)dec->fb;
1916
1917 header->header_size = sizeof(rvcn_dec_feedback_header_t);
1918 header->total_size = sizeof(rvcn_dec_feedback_header_t);
1919 header->num_buffers = 0;
1920 }
1921
rvcn_dec_sq_tail(struct radeon_decoder * dec)1922 static void rvcn_dec_sq_tail(struct radeon_decoder *dec)
1923 {
1924 if (dec->vcn_dec_sw_ring == false)
1925 return;
1926
1927 rvcn_sq_tail(&dec->cs, &dec->sq);
1928 }
1929 /* flush IB to the hardware */
flush(struct radeon_decoder * dec,unsigned flags,struct pipe_fence_handle ** fence)1930 static int flush(struct radeon_decoder *dec, unsigned flags,
1931 struct pipe_fence_handle **fence)
1932 {
1933 struct si_screen *sscreen = (struct si_screen *)dec->screen;
1934
1935 rvcn_dec_sq_tail(dec);
1936
1937 if (sscreen->debug_flags & DBG(IB)) {
1938 struct ac_ib_parser ib_parser = {
1939 .f = stderr,
1940 .ib = dec->cs.current.buf,
1941 .num_dw = dec->cs.current.cdw,
1942 .gfx_level = sscreen->info.gfx_level,
1943 .vcn_version = sscreen->info.vcn_ip_version,
1944 .family = sscreen->info.family,
1945 .ip_type = dec->stream_type == RDECODE_CODEC_JPEG ? AMD_IP_VCN_JPEG :
1946 dec->vcn_dec_sw_ring ? AMD_IP_VCN_ENC : AMD_IP_VCN_DEC,
1947 };
1948 ac_parse_ib(&ib_parser, "IB");
1949 }
1950
1951 return dec->ws->cs_flush(&dec->cs, flags, fence);
1952 }
1953
1954 /* add a new set register command to the IB */
set_reg(struct radeon_decoder * dec,unsigned reg,uint32_t val)1955 static void set_reg(struct radeon_decoder *dec, unsigned reg, uint32_t val)
1956 {
1957 radeon_emit(&dec->cs, RDECODE_PKT0(reg >> 2, 0));
1958 radeon_emit(&dec->cs, val);
1959 }
1960
1961 /* send a command to the VCPU through the GPCOM registers */
send_cmd(struct radeon_decoder * dec,unsigned cmd,struct pb_buffer_lean * buf,uint32_t off,unsigned usage,enum radeon_bo_domain domain)1962 static void send_cmd(struct radeon_decoder *dec, unsigned cmd, struct pb_buffer_lean *buf, uint32_t off,
1963 unsigned usage, enum radeon_bo_domain domain)
1964 {
1965 uint64_t addr;
1966
1967 dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
1968 addr = dec->ws->buffer_get_virtual_address(buf);
1969 addr = addr + off;
1970
1971 if (dec->vcn_dec_sw_ring == false) {
1972 set_reg(dec, dec->reg.data0, addr);
1973 set_reg(dec, dec->reg.data1, addr >> 32);
1974 set_reg(dec, dec->reg.cmd, cmd << 1);
1975 return;
1976 }
1977
1978 if (!dec->cs.current.cdw) {
1979 rvcn_sq_header(&dec->cs, &dec->sq, false);
1980 rvcn_decode_ib_package_t *ib_header =
1981 (rvcn_decode_ib_package_t *)&(dec->cs.current.buf[dec->cs.current.cdw]);
1982
1983 ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
1984 sizeof(struct rvcn_decode_ib_package_s);
1985 dec->cs.current.cdw++;
1986 ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
1987 dec->cs.current.cdw++;
1988
1989 dec->decode_buffer =
1990 (rvcn_decode_buffer_t *)&(dec->cs.current.buf[dec->cs.current.cdw]);
1991
1992 dec->cs.current.cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
1993 memset(dec->decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
1994 }
1995
1996 switch(cmd) {
1997 case RDECODE_CMD_MSG_BUFFER:
1998 dec->decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
1999 dec->decode_buffer->msg_buffer_address_hi = (addr >> 32);
2000 dec->decode_buffer->msg_buffer_address_lo = (addr);
2001 break;
2002 case RDECODE_CMD_DPB_BUFFER:
2003 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
2004 dec->decode_buffer->dpb_buffer_address_hi = (addr >> 32);
2005 dec->decode_buffer->dpb_buffer_address_lo = (addr);
2006 break;
2007 case RDECODE_CMD_DECODING_TARGET_BUFFER:
2008 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
2009 dec->decode_buffer->target_buffer_address_hi = (addr >> 32);
2010 dec->decode_buffer->target_buffer_address_lo = (addr);
2011 break;
2012 case RDECODE_CMD_FEEDBACK_BUFFER:
2013 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
2014 dec->decode_buffer->feedback_buffer_address_hi = (addr >> 32);
2015 dec->decode_buffer->feedback_buffer_address_lo = (addr);
2016 break;
2017 case RDECODE_CMD_PROB_TBL_BUFFER:
2018 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
2019 dec->decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
2020 dec->decode_buffer->prob_tbl_buffer_address_lo = (addr);
2021 break;
2022 case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
2023 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
2024 dec->decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
2025 dec->decode_buffer->session_contex_buffer_address_lo = (addr);
2026 break;
2027 case RDECODE_CMD_BITSTREAM_BUFFER:
2028 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
2029 dec->decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
2030 dec->decode_buffer->bitstream_buffer_address_lo = (addr);
2031 break;
2032 case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
2033 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
2034 dec->decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
2035 dec->decode_buffer->it_sclr_table_buffer_address_lo = (addr);
2036 break;
2037 case RDECODE_CMD_CONTEXT_BUFFER:
2038 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
2039 dec->decode_buffer->context_buffer_address_hi = (addr >> 32);
2040 dec->decode_buffer->context_buffer_address_lo = (addr);
2041 break;
2042 default:
2043 printf("Not Support!");
2044 }
2045 }
2046
2047 /* do the codec needs an IT buffer ?*/
have_it(struct radeon_decoder * dec)2048 static bool have_it(struct radeon_decoder *dec)
2049 {
2050 return dec->stream_type == RDECODE_CODEC_H264_PERF || dec->stream_type == RDECODE_CODEC_H265;
2051 }
2052
2053 /* do the codec needs an probs buffer? */
have_probs(struct radeon_decoder * dec)2054 static bool have_probs(struct radeon_decoder *dec)
2055 {
2056 return (dec->stream_type == RDECODE_CODEC_VP9 || dec->stream_type == RDECODE_CODEC_AV1);
2057 }
2058
2059 /* map the next available message/feedback/itscaling buffer */
map_msg_fb_it_probs_buf(struct radeon_decoder * dec)2060 static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec)
2061 {
2062 struct rvid_buffer *buf;
2063 uint8_t *ptr;
2064
2065 /* grab the current message/feedback buffer */
2066 buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
2067
2068 /* and map it for CPU access */
2069 ptr =
2070 dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2071
2072 /* calc buffer offsets */
2073 dec->msg = ptr;
2074
2075 dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
2076 if (have_it(dec))
2077 dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
2078 else if (have_probs(dec))
2079 dec->probs = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
2080 }
2081
2082 /* unmap and send a message command to the VCPU */
send_msg_buf(struct radeon_decoder * dec)2083 static void send_msg_buf(struct radeon_decoder *dec)
2084 {
2085 struct rvid_buffer *buf;
2086
2087 /* ignore the request if message/feedback buffer isn't mapped */
2088 if (!dec->msg || !dec->fb)
2089 return;
2090
2091 /* grab the current message buffer */
2092 buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
2093
2094 /* unmap the buffer */
2095 dec->ws->buffer_unmap(dec->ws, buf->res->buf);
2096 dec->bs_ptr = NULL;
2097 dec->msg = NULL;
2098 dec->fb = NULL;
2099 dec->it = NULL;
2100 dec->probs = NULL;
2101
2102 if (dec->sessionctx.res)
2103 send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER, dec->sessionctx.res->buf, 0,
2104 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
2105
2106 /* and send it to the hardware */
2107 send_cmd(dec, RDECODE_CMD_MSG_BUFFER, buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
2108 }
2109
2110 /* cycle to the next set of buffers */
next_buffer(struct radeon_decoder * dec)2111 static void next_buffer(struct radeon_decoder *dec)
2112 {
2113 ++dec->cur_buffer;
2114 dec->cur_buffer %= dec->num_dec_bufs;
2115 }
2116
calc_ctx_size_h264_perf(struct radeon_decoder * dec)2117 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder *dec)
2118 {
2119 unsigned width_in_mb, height_in_mb, ctx_size;
2120 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
2121 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
2122
2123 unsigned max_references = dec->base.max_references + 1;
2124
2125 // picture width & height in 16 pixel units
2126 width_in_mb = width / VL_MACROBLOCK_WIDTH;
2127 height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
2128
2129 unsigned fs_in_mb = width_in_mb * height_in_mb;
2130 unsigned num_dpb_buffer_lean;
2131 switch (dec->base.level) {
2132 case 30:
2133 num_dpb_buffer_lean = 8100 / fs_in_mb;
2134 break;
2135 case 31:
2136 num_dpb_buffer_lean = 18000 / fs_in_mb;
2137 break;
2138 case 32:
2139 num_dpb_buffer_lean = 20480 / fs_in_mb;
2140 break;
2141 case 41:
2142 num_dpb_buffer_lean = 32768 / fs_in_mb;
2143 break;
2144 case 42:
2145 num_dpb_buffer_lean = 34816 / fs_in_mb;
2146 break;
2147 case 50:
2148 num_dpb_buffer_lean = 110400 / fs_in_mb;
2149 break;
2150 case 51:
2151 num_dpb_buffer_lean = 184320 / fs_in_mb;
2152 break;
2153 default:
2154 num_dpb_buffer_lean = 184320 / fs_in_mb;
2155 break;
2156 }
2157 num_dpb_buffer_lean++;
2158 max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer_lean), max_references);
2159 ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
2160
2161 return ctx_size;
2162 }
2163
2164 /* calculate size of reference picture buffer */
calc_dpb_size(struct radeon_decoder * dec)2165 static unsigned calc_dpb_size(struct radeon_decoder *dec)
2166 {
2167 unsigned width_in_mb, height_in_mb, image_size, dpb_size;
2168
2169 // always align them to MB size for dpb calculation
2170 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
2171 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
2172
2173 // always one more for currently decoded picture
2174 unsigned max_references = dec->base.max_references + 1;
2175
2176 // aligned size of a single frame
2177 image_size = align(width, dec->db_alignment) * align(height, dec->db_alignment);
2178 image_size += image_size / 2;
2179 image_size = align(image_size, 1024);
2180
2181 // picture width & height in 16 pixel units
2182 width_in_mb = width / VL_MACROBLOCK_WIDTH;
2183 height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
2184
2185 switch (u_reduce_video_profile(dec->base.profile)) {
2186 case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
2187 unsigned fs_in_mb = width_in_mb * height_in_mb;
2188 unsigned num_dpb_buffer_lean;
2189
2190 switch (dec->base.level) {
2191 case 30:
2192 num_dpb_buffer_lean = 8100 / fs_in_mb;
2193 break;
2194 case 31:
2195 num_dpb_buffer_lean = 18000 / fs_in_mb;
2196 break;
2197 case 32:
2198 num_dpb_buffer_lean = 20480 / fs_in_mb;
2199 break;
2200 case 41:
2201 num_dpb_buffer_lean = 32768 / fs_in_mb;
2202 break;
2203 case 42:
2204 num_dpb_buffer_lean = 34816 / fs_in_mb;
2205 break;
2206 case 50:
2207 num_dpb_buffer_lean = 110400 / fs_in_mb;
2208 break;
2209 case 51:
2210 num_dpb_buffer_lean = 184320 / fs_in_mb;
2211 break;
2212 default:
2213 num_dpb_buffer_lean = 184320 / fs_in_mb;
2214 break;
2215 }
2216 num_dpb_buffer_lean++;
2217 max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer_lean), max_references);
2218 dpb_size = image_size * max_references;
2219 break;
2220 }
2221
2222 case PIPE_VIDEO_FORMAT_HEVC:
2223 if (dec->base.width * dec->base.height >= 4096 * 2000)
2224 max_references = MAX2(max_references, 8);
2225 else
2226 max_references = MAX2(max_references, 17);
2227
2228 if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
2229 dpb_size = align((align(width, dec->db_alignment) *
2230 align(height, dec->db_alignment) * 9) / 4, 256) * max_references;
2231 else
2232 dpb_size = align((align(width, dec->db_alignment) *
2233 align(height, dec->db_alignment) * 3) / 2, 256) * max_references;
2234 break;
2235
2236 case PIPE_VIDEO_FORMAT_VC1:
2237 // the firmware seems to always assume a minimum of ref frames
2238 max_references = MAX2(NUM_VC1_REFS, max_references);
2239
2240 // reference picture buffer
2241 dpb_size = image_size * max_references;
2242
2243 // CONTEXT_BUFFER
2244 dpb_size += width_in_mb * height_in_mb * 128;
2245
2246 // IT surface buffer
2247 dpb_size += width_in_mb * 64;
2248
2249 // DB surface buffer
2250 dpb_size += width_in_mb * 128;
2251
2252 // BP
2253 dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
2254 break;
2255
2256 case PIPE_VIDEO_FORMAT_MPEG12:
2257 // reference picture buffer, must be big enough for all frames
2258 dpb_size = image_size * NUM_MPEG2_REFS;
2259 break;
2260
2261 case PIPE_VIDEO_FORMAT_MPEG4:
2262 // reference picture buffer
2263 dpb_size = image_size * max_references;
2264
2265 // CM
2266 dpb_size += width_in_mb * height_in_mb * 64;
2267
2268 // IT surface buffer
2269 dpb_size += align(width_in_mb * height_in_mb * 32, 64);
2270
2271 dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
2272 break;
2273
2274 case PIPE_VIDEO_FORMAT_VP9:
2275 max_references = MAX2(max_references, 9);
2276
2277 if (dec->dpb_type == DPB_MAX_RES)
2278 dpb_size = (((struct si_screen *)dec->screen)->info.vcn_ip_version >= VCN_2_0_0)
2279 ? (8192 * 4320 * 3 / 2) * max_references
2280 : (4096 * 3000 * 3 / 2) * max_references;
2281 else
2282 dpb_size = (align(dec->base.width, dec->db_alignment) *
2283 align(dec->base.height, dec->db_alignment) * 3 / 2) * max_references;
2284
2285 if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
2286 dpb_size = dpb_size * 3 / 2;
2287 break;
2288
2289 case PIPE_VIDEO_FORMAT_AV1:
2290 max_references = MAX2(max_references, 9);
2291 dpb_size = 8192 * 4320 * 3 / 2 * max_references * 3 / 2;
2292 break;
2293
2294 case PIPE_VIDEO_FORMAT_JPEG:
2295 dpb_size = 0;
2296 break;
2297
2298 default:
2299 // something is missing here
2300 assert(0);
2301
2302 // at least use a sane default value
2303 dpb_size = 32 * 1024 * 1024;
2304 break;
2305 }
2306 return dpb_size;
2307 }
2308
2309 /**
2310 * destroy this video decoder
2311 */
radeon_dec_destroy(struct pipe_video_codec * decoder)2312 static void radeon_dec_destroy(struct pipe_video_codec *decoder)
2313 {
2314 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2315 unsigned i;
2316
2317 assert(decoder);
2318
2319 if (dec->bs_ptr) {
2320 dec->ws->buffer_unmap(dec->ws, dec->bs_buffers[dec->cur_buffer].res->buf);
2321 dec->bs_ptr = NULL;
2322 }
2323
2324 if (dec->msg) {
2325 dec->ws->buffer_unmap(dec->ws, dec->msg_fb_it_probs_buffers[dec->cur_buffer].res->buf);
2326 dec->msg = NULL;
2327 }
2328
2329 if (dec->stream_type != RDECODE_CODEC_JPEG) {
2330 struct pipe_fence_handle *fence = NULL;
2331 map_msg_fb_it_probs_buf(dec);
2332 rvcn_dec_message_destroy(dec);
2333 send_msg_buf(dec);
2334 flush(dec, 0, &fence);
2335 dec->ws->fence_wait(dec->ws, fence, OS_TIMEOUT_INFINITE);
2336 dec->ws->fence_reference(dec->ws, &fence, NULL);
2337 }
2338
2339 dec->ws->cs_destroy(&dec->cs);
2340 if (dec->ectx)
2341 dec->ectx->destroy(dec->ectx);
2342
2343 if (dec->stream_type == RDECODE_CODEC_JPEG) {
2344 for (i = 0; i < dec->njctx; i++) {
2345 dec->ws->cs_destroy(&dec->jcs[i]);
2346 dec->ws->ctx_destroy(dec->jctx[i]);
2347 }
2348 }
2349
2350 if (dec->msg_fb_it_probs_buffers && dec->bs_buffers) {
2351 for (i = 0; i < dec->num_dec_bufs; ++i) {
2352 si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]);
2353 si_vid_destroy_buffer(&dec->bs_buffers[i]);
2354 }
2355 FREE(dec->msg_fb_it_probs_buffers);
2356 FREE(dec->bs_buffers);
2357 }
2358 dec->num_dec_bufs = 0;
2359
2360 if (dec->dpb_type != DPB_DYNAMIC_TIER_2) {
2361 si_vid_destroy_buffer(&dec->dpb);
2362 } else {
2363 list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
2364 list_del(&d->list);
2365 d->vbuf->destroy(d->vbuf);
2366 FREE(d);
2367 }
2368 }
2369 si_vid_destroy_buffer(&dec->ctx);
2370 si_vid_destroy_buffer(&dec->sessionctx);
2371
2372 FREE(dec->jcs);
2373 FREE(dec->jctx);
2374 FREE(dec);
2375 }
2376
2377 /**
2378 * start decoding of a new frame
2379 */
radeon_dec_begin_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2380 static void radeon_dec_begin_frame(struct pipe_video_codec *decoder,
2381 struct pipe_video_buffer *target,
2382 struct pipe_picture_desc *picture)
2383 {
2384 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2385 uintptr_t frame;
2386
2387 assert(decoder);
2388
2389 if (dec->error)
2390 return;
2391
2392 switch (dec->stream_type) {
2393 case RDECODE_CODEC_VP9: {
2394 struct pipe_vp9_picture_desc *pic = (struct pipe_vp9_picture_desc *)picture;
2395 /* Only 10 bit is supported for Profile 2 */
2396 if (pic->picture_parameter.bit_depth > 10) {
2397 dec->error = true;
2398 return;
2399 }
2400 break;
2401 }
2402 case RDECODE_CODEC_AV1: {
2403 struct pipe_av1_picture_desc *pic = (struct pipe_av1_picture_desc *)picture;
2404 /* Only 4:2:0 is supported for Profile 2 */
2405 if (!pic->picture_parameter.seq_info_fields.subsampling_x ||
2406 !pic->picture_parameter.seq_info_fields.subsampling_y) {
2407 dec->error = true;
2408 return;
2409 }
2410 break;
2411 }
2412 default:
2413 break;
2414 }
2415
2416 frame = ++dec->frame_number;
2417 if (dec->stream_type != RDECODE_CODEC_VP9 && dec->stream_type != RDECODE_CODEC_AV1
2418 && dec->stream_type != RDECODE_CODEC_H264_PERF)
2419 vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
2420 &radeon_dec_destroy_associated_data);
2421
2422 dec->bs_size = 0;
2423 dec->bs_ptr = dec->ws->buffer_map(dec->ws, dec->bs_buffers[dec->cur_buffer].res->buf, &dec->cs,
2424 PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2425 }
2426
2427 /**
2428 * decode a macroblock
2429 */
radeon_dec_decode_macroblock(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,const struct pipe_macroblock * macroblocks,unsigned num_macroblocks)2430 static void radeon_dec_decode_macroblock(struct pipe_video_codec *decoder,
2431 struct pipe_video_buffer *target,
2432 struct pipe_picture_desc *picture,
2433 const struct pipe_macroblock *macroblocks,
2434 unsigned num_macroblocks)
2435 {
2436 /* not supported (yet) */
2437 assert(0);
2438 }
2439
2440 /**
2441 * decode a bitstream
2442 */
radeon_dec_decode_bitstream(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,unsigned num_buffers,const void * const * buffers,const unsigned * sizes)2443 static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,
2444 struct pipe_video_buffer *target,
2445 struct pipe_picture_desc *picture, unsigned num_buffers,
2446 const void *const *buffers, const unsigned *sizes)
2447 {
2448 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2449 unsigned i;
2450
2451 assert(decoder);
2452
2453 if (dec->error)
2454 return;
2455
2456 if (!dec->bs_ptr) {
2457 RADEON_DEC_ERR("Invalid bitstream ptr!\n");
2458 return;
2459 }
2460
2461 unsigned long total_bs_size = dec->bs_size;
2462 for (i = 0; i < num_buffers; ++i)
2463 total_bs_size += sizes[i];
2464
2465 struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
2466
2467 if (total_bs_size > buf->res->buf->size) {
2468 dec->ws->buffer_unmap(dec->ws, buf->res->buf);
2469 dec->bs_ptr = NULL;
2470
2471 total_bs_size = align(total_bs_size, 128);
2472
2473 if (!dec->bs_size) {
2474 struct rvid_buffer old_buf = *buf;
2475 if (!si_vid_create_buffer(dec->screen, buf, total_bs_size, buf->usage)) {
2476 RADEON_DEC_ERR("Can't create bitstream buffer!");
2477 return;
2478 }
2479 si_vid_destroy_buffer(&old_buf);
2480 } else if (!si_vid_resize_buffer(dec->base.context, &dec->cs, buf, total_bs_size, NULL)) {
2481 RADEON_DEC_ERR("Can't resize bitstream buffer!");
2482 return;
2483 }
2484
2485 dec->bs_ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
2486 PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2487 if (!dec->bs_ptr)
2488 return;
2489
2490 dec->bs_ptr += dec->bs_size;
2491 }
2492
2493 for (i = 0; i < num_buffers; ++i) {
2494 memcpy(dec->bs_ptr, buffers[i], sizes[i]);
2495 dec->bs_size += sizes[i];
2496 dec->bs_ptr += sizes[i];
2497 }
2498 }
2499
2500 /**
2501 * send cmd for vcn dec
2502 */
send_cmd_dec(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2503 bool send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target,
2504 struct pipe_picture_desc *picture)
2505 {
2506 struct pb_buffer_lean *dt;
2507 struct rvid_buffer *msg_fb_it_probs_buf, *bs_buf;
2508
2509 msg_fb_it_probs_buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
2510 bs_buf = &dec->bs_buffers[dec->cur_buffer];
2511
2512 memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size);
2513 dec->ws->buffer_unmap(dec->ws, bs_buf->res->buf);
2514 dec->bs_ptr = NULL;
2515
2516 map_msg_fb_it_probs_buf(dec);
2517 dt = rvcn_dec_message_decode(dec, target, picture);
2518 if (!dt)
2519 return false;
2520 rvcn_dec_message_feedback(dec);
2521 send_msg_buf(dec);
2522
2523 if (dec->dpb_type != DPB_DYNAMIC_TIER_2)
2524 send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE,
2525 RADEON_DOMAIN_VRAM);
2526 if (dec->ctx.res)
2527 send_cmd(dec, RDECODE_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, RADEON_USAGE_READWRITE,
2528 RADEON_DOMAIN_VRAM);
2529 send_cmd(dec, RDECODE_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 0, RADEON_USAGE_READ,
2530 RADEON_DOMAIN_GTT);
2531 send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
2532 send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_probs_buf->res->buf, FB_BUFFER_OFFSET,
2533 RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
2534 if (have_it(dec))
2535 send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_probs_buf->res->buf,
2536 FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
2537 else if (have_probs(dec))
2538 send_cmd(dec, RDECODE_CMD_PROB_TBL_BUFFER, msg_fb_it_probs_buf->res->buf,
2539 FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
2540
2541 if (dec->vcn_dec_sw_ring == false)
2542 set_reg(dec, dec->reg.cntl, 1);
2543
2544 return true;
2545 }
2546
2547 /**
2548 * end decoding of the current frame
2549 */
radeon_dec_end_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2550 static int radeon_dec_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,
2551 struct pipe_picture_desc *picture)
2552 {
2553 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2554
2555 assert(decoder);
2556
2557 if (dec->error)
2558 return 1;
2559
2560 if (!dec->send_cmd(dec, target, picture))
2561 return 1;
2562
2563 flush(dec, picture->flush_flags, picture->fence);
2564
2565 next_buffer(dec);
2566 return 0;
2567 }
2568
radeon_dec_jpeg_check_format(struct radeon_decoder * dec,enum pipe_format format,unsigned sampling_factor)2569 static bool radeon_dec_jpeg_check_format(struct radeon_decoder *dec, enum pipe_format format,
2570 unsigned sampling_factor)
2571 {
2572 enum pipe_format expected_format;
2573
2574 switch (sampling_factor) {
2575 case 0x221111:
2576 expected_format = PIPE_FORMAT_NV12;
2577 break;
2578 case 0x211111:
2579 case 0x221212:
2580 case 0x222121:
2581 expected_format = PIPE_FORMAT_YUYV;
2582 break;
2583 case 0x111111:
2584 case 0x222222:
2585 case 0x444444:
2586 expected_format = PIPE_FORMAT_Y8_U8_V8_444_UNORM;
2587 break;
2588 case 0x121111:
2589 expected_format = PIPE_FORMAT_Y8_U8_V8_440_UNORM;
2590 break;
2591 case 0x11:
2592 case 0x44:
2593 expected_format = PIPE_FORMAT_Y8_400_UNORM;
2594 break;
2595 default:
2596 RADEON_DEC_ERR("Unsupported sampling factor 0x%x\n", sampling_factor);
2597 return false;
2598 }
2599
2600 /* Format conversion */
2601 if (format == PIPE_FORMAT_R8G8B8A8_UNORM ||
2602 format == PIPE_FORMAT_A8R8G8B8_UNORM ||
2603 format == PIPE_FORMAT_R8_G8_B8_UNORM)
2604 return true;
2605
2606 return expected_format == format;
2607 }
2608
2609 /**
2610 * end decoding of the current jpeg frame
2611 */
radeon_dec_jpeg_end_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2612 static int radeon_dec_jpeg_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,
2613 struct pipe_picture_desc *picture)
2614 {
2615 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2616 struct pipe_mjpeg_picture_desc *pic = (struct pipe_mjpeg_picture_desc *)picture;
2617
2618 assert(decoder);
2619
2620 if (!radeon_dec_jpeg_check_format(dec, target->buffer_format, pic->picture_parameter.sampling_factor))
2621 RADEON_DEC_ERR("Decode format check failed\n");
2622
2623 if (dec->error)
2624 return 1;
2625
2626 dec->jpg.crop_x = ROUND_DOWN_TO(pic->picture_parameter.crop_x, VL_MACROBLOCK_WIDTH);
2627 dec->jpg.crop_y = ROUND_DOWN_TO(pic->picture_parameter.crop_y, VL_MACROBLOCK_HEIGHT);
2628 dec->jpg.crop_width = align(pic->picture_parameter.crop_width, VL_MACROBLOCK_WIDTH);
2629 dec->jpg.crop_height = align(pic->picture_parameter.crop_height, VL_MACROBLOCK_HEIGHT);
2630 if (dec->jpg.crop_x + dec->jpg.crop_width > pic->picture_parameter.picture_width)
2631 dec->jpg.crop_width = 0;
2632 if (dec->jpg.crop_y + dec->jpg.crop_height > pic->picture_parameter.picture_height)
2633 dec->jpg.crop_height = 0;
2634 dec->send_cmd(dec, target, picture);
2635 dec->ws->cs_flush(&dec->jcs[dec->cb_idx], picture->flush_flags, NULL);
2636 next_buffer(dec);
2637 dec->cb_idx = (dec->cb_idx+1) % dec->njctx;
2638 return 0;
2639 }
2640
2641 /**
2642 * flush any outstanding command buffers to the hardware
2643 */
radeon_dec_flush(struct pipe_video_codec * decoder)2644 static void radeon_dec_flush(struct pipe_video_codec *decoder)
2645 {
2646 }
2647
radeon_dec_fence_wait(struct pipe_video_codec * decoder,struct pipe_fence_handle * fence,uint64_t timeout)2648 static int radeon_dec_fence_wait(struct pipe_video_codec *decoder,
2649 struct pipe_fence_handle *fence,
2650 uint64_t timeout)
2651 {
2652 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2653
2654 return dec->ws->fence_wait(dec->ws, fence, timeout);
2655 }
2656
radeon_dec_destroy_fence(struct pipe_video_codec * decoder,struct pipe_fence_handle * fence)2657 static void radeon_dec_destroy_fence(struct pipe_video_codec *decoder,
2658 struct pipe_fence_handle *fence)
2659 {
2660 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2661
2662 dec->ws->fence_reference(dec->ws, &fence, NULL);
2663 }
2664
2665 /**
2666 * create and HW decoder
2667 */
radeon_create_decoder(struct pipe_context * context,const struct pipe_video_codec * templ)2668 struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
2669 const struct pipe_video_codec *templ)
2670 {
2671 struct si_context *sctx = (struct si_context *)context;
2672 struct radeon_winsys *ws = sctx->ws;
2673 unsigned width = templ->width, height = templ->height;
2674 unsigned bs_buf_size, stream_type = 0, ring = AMD_IP_VCN_DEC;
2675 struct radeon_decoder *dec;
2676 int r, i;
2677
2678 switch (u_reduce_video_profile(templ->profile)) {
2679 case PIPE_VIDEO_FORMAT_MPEG12:
2680 if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
2681 return vl_create_mpeg12_decoder(context, templ);
2682 stream_type = RDECODE_CODEC_MPEG2_VLD;
2683 break;
2684 case PIPE_VIDEO_FORMAT_MPEG4:
2685 width = align(width, VL_MACROBLOCK_WIDTH);
2686 height = align(height, VL_MACROBLOCK_HEIGHT);
2687 stream_type = RDECODE_CODEC_MPEG4;
2688 break;
2689 case PIPE_VIDEO_FORMAT_VC1:
2690 stream_type = RDECODE_CODEC_VC1;
2691 break;
2692 case PIPE_VIDEO_FORMAT_MPEG4_AVC:
2693 width = align(width, VL_MACROBLOCK_WIDTH);
2694 height = align(height, VL_MACROBLOCK_HEIGHT);
2695 stream_type = RDECODE_CODEC_H264_PERF;
2696 break;
2697 case PIPE_VIDEO_FORMAT_HEVC:
2698 stream_type = RDECODE_CODEC_H265;
2699 break;
2700 case PIPE_VIDEO_FORMAT_VP9:
2701 stream_type = RDECODE_CODEC_VP9;
2702 break;
2703 case PIPE_VIDEO_FORMAT_AV1:
2704 stream_type = RDECODE_CODEC_AV1;
2705 break;
2706 case PIPE_VIDEO_FORMAT_JPEG:
2707 stream_type = RDECODE_CODEC_JPEG;
2708 ring = AMD_IP_VCN_JPEG;
2709 break;
2710 default:
2711 assert(0);
2712 break;
2713 }
2714
2715 dec = CALLOC_STRUCT(radeon_decoder);
2716
2717 if (!dec)
2718 return NULL;
2719
2720 if (sctx->vcn_has_ctx) {
2721 dec->ectx = context->screen->context_create(context->screen, NULL, PIPE_CONTEXT_COMPUTE_ONLY);
2722 if (!dec->ectx)
2723 sctx->vcn_has_ctx = false;
2724 }
2725
2726 dec->base = *templ;
2727 dec->base.context = (sctx->vcn_has_ctx) ? dec->ectx : context;
2728 dec->base.width = width;
2729 dec->base.height = height;
2730 dec->max_width = width;
2731 dec->max_height = height;
2732 dec->base.destroy = radeon_dec_destroy;
2733 dec->base.begin_frame = radeon_dec_begin_frame;
2734 dec->base.decode_macroblock = radeon_dec_decode_macroblock;
2735 dec->base.decode_bitstream = radeon_dec_decode_bitstream;
2736 dec->base.end_frame = radeon_dec_end_frame;
2737 dec->base.flush = radeon_dec_flush;
2738 dec->base.fence_wait = radeon_dec_fence_wait;
2739 dec->base.destroy_fence = radeon_dec_destroy_fence;
2740
2741 dec->stream_type = stream_type;
2742 dec->stream_handle = si_vid_alloc_stream_handle();
2743 dec->screen = context->screen;
2744 dec->ws = ws;
2745
2746 if (u_reduce_video_profile(templ->profile) != PIPE_VIDEO_FORMAT_JPEG &&
2747 (sctx->vcn_ip_ver >= VCN_4_0_0)) {
2748 dec->vcn_dec_sw_ring = true;
2749 ring = AMD_IP_VCN_UNIFIED;
2750 }
2751
2752 dec->sq.signature_ib_total_size_in_dw = NULL;
2753 dec->sq.signature_ib_checksum = NULL;
2754 dec->sq.engine_ib_size_of_packages = NULL;
2755
2756 if (!ws->cs_create(&dec->cs,
2757 (sctx->vcn_has_ctx) ? ((struct si_context *)dec->ectx)->ctx : sctx->ctx,
2758 ring, NULL, NULL)) {
2759 RADEON_DEC_ERR("Can't get command submission context.\n");
2760 goto error;
2761 }
2762
2763 if (dec->stream_type == RDECODE_CODEC_JPEG) {
2764
2765 if (((struct si_screen*)dec->screen)->info.ip[AMD_IP_VCN_JPEG].num_instances > 1 &&
2766 ((struct si_screen*)dec->screen)->info.ip[AMD_IP_VCN_JPEG].num_instances <= MAX_JPEG_INST)
2767 dec->njctx = ((struct si_screen*)dec->screen)->info.ip[AMD_IP_VCN_JPEG].num_instances;
2768 else
2769 dec->njctx = 1;
2770
2771 dec->jctx = (struct radeon_winsys_ctx **) CALLOC(dec->njctx,
2772 sizeof(struct radeon_winsys_ctx *));
2773 dec->jcs = (struct radeon_cmdbuf *) CALLOC(dec->njctx, sizeof(struct radeon_cmdbuf));
2774 if(!dec->jctx || !dec->jcs)
2775 goto err;
2776 for (i = 0; i < dec->njctx; i++) {
2777 /* Initialize the context handle and the command stream. */
2778 dec->jctx[i] = dec->ws->ctx_create(dec->ws, RADEON_CTX_PRIORITY_MEDIUM,
2779 sctx->context_flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET);
2780 if (!sctx->ctx)
2781 goto error;
2782 if (!dec->ws->cs_create(&dec->jcs[i], dec->jctx[i], ring, NULL, NULL)) {
2783 RADEON_DEC_ERR("Can't get additional command submission context for mJPEG.\n");
2784 goto error;
2785 }
2786 }
2787 dec->base.end_frame = radeon_dec_jpeg_end_frame;
2788 dec->cb_idx = 0;
2789 }
2790
2791 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++)
2792 dec->render_pic_list[i] = NULL;
2793
2794 if ((sctx->vcn_ip_ver >= VCN_3_0_0) && (stream_type == RDECODE_CODEC_H264_PERF)) {
2795 for (i = 0; i < ARRAY_SIZE(dec->h264_valid_ref_num); i++)
2796 dec->h264_valid_ref_num[i] = (unsigned) -1;
2797 for (i = 0; i < ARRAY_SIZE(dec->h264_valid_poc_num); i++)
2798 dec->h264_valid_poc_num[i] = (unsigned) -1;
2799 }
2800
2801 if (dec->stream_type == RDECODE_CODEC_JPEG) {
2802 if (sctx->vcn_ip_ver == VCN_4_0_3)
2803 dec->num_dec_bufs = dec->njctx;
2804 else
2805 dec->num_dec_bufs = dec->njctx * NUM_BUFFERS;
2806 } else
2807 dec->num_dec_bufs = NUM_BUFFERS;
2808
2809 bs_buf_size = align(width * height / 32, 128);
2810 dec->msg_fb_it_probs_buffers = (struct rvid_buffer *) CALLOC(dec->num_dec_bufs, sizeof(struct rvid_buffer));
2811 dec->bs_buffers = (struct rvid_buffer *) CALLOC(dec->num_dec_bufs, sizeof(struct rvid_buffer));
2812 if(!dec->msg_fb_it_probs_buffers || !dec->bs_buffers)
2813 goto error;
2814
2815 for (i = 0; i < dec->num_dec_bufs; ++i) {
2816 unsigned msg_fb_it_probs_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
2817 if (have_it(dec))
2818 msg_fb_it_probs_size += IT_SCALING_TABLE_SIZE;
2819 else if (have_probs(dec))
2820 msg_fb_it_probs_size += (dec->stream_type == RDECODE_CODEC_VP9) ?
2821 VP9_PROBS_TABLE_SIZE :
2822 sizeof(rvcn_dec_av1_segment_fg_t);
2823 /* use vram to improve performance, workaround an unknown bug */
2824 if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_probs_buffers[i], msg_fb_it_probs_size,
2825 PIPE_USAGE_DEFAULT)) {
2826 RADEON_DEC_ERR("Can't allocate message buffers.\n");
2827 goto error;
2828 }
2829
2830 if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], bs_buf_size,
2831 PIPE_USAGE_STAGING)) {
2832 RADEON_DEC_ERR("Can't allocate bitstream buffers.\n");
2833 goto error;
2834 }
2835
2836 if (have_probs(dec) && dec->stream_type == RDECODE_CODEC_VP9) {
2837 struct rvid_buffer *buf;
2838 void *ptr;
2839
2840 buf = &dec->msg_fb_it_probs_buffers[i];
2841 ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
2842 PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2843 ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
2844 fill_probs_table(ptr);
2845 dec->ws->buffer_unmap(dec->ws, buf->res->buf);
2846 dec->bs_ptr = NULL;
2847 }
2848 }
2849
2850 if ((sctx->vcn_ip_ver >= VCN_3_0_0) &&
2851 (stream_type == RDECODE_CODEC_VP9 ||
2852 stream_type == RDECODE_CODEC_AV1 ||
2853 ((stream_type == RDECODE_CODEC_H265) && templ->expect_chunked_decode) ||
2854 ((stream_type == RDECODE_CODEC_H264_PERF) && templ->expect_chunked_decode)))
2855 dec->dpb_type = DPB_DYNAMIC_TIER_2;
2856 else if (sctx->vcn_ip_ver <= VCN_2_6_0 && stream_type == RDECODE_CODEC_VP9)
2857 dec->dpb_type = DPB_DYNAMIC_TIER_1;
2858 else
2859 dec->dpb_type = DPB_MAX_RES;
2860
2861 dec->db_alignment = (sctx->vcn_ip_ver >= VCN_2_0_0 &&
2862 dec->base.width > 32 && (dec->stream_type == RDECODE_CODEC_VP9 ||
2863 dec->stream_type == RDECODE_CODEC_AV1 ||
2864 dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)) ? 64 : 32;
2865
2866 if (sctx->vcn_ip_ver >= VCN_5_0_0) {
2867 if (stream_type == RDECODE_CODEC_VP9 ||
2868 stream_type == RDECODE_CODEC_AV1 ||
2869 stream_type == RDECODE_CODEC_H265 ||
2870 stream_type == RDECODE_CODEC_H264_PERF)
2871 dec->db_alignment = 64;
2872 }
2873
2874 if (dec->dpb_type != DPB_DYNAMIC_TIER_2)
2875 dec->dpb_size = calc_dpb_size(dec);
2876
2877 if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, RDECODE_SESSION_CONTEXT_SIZE,
2878 PIPE_USAGE_DEFAULT)) {
2879 RADEON_DEC_ERR("Can't allocate session ctx.\n");
2880 goto error;
2881 }
2882
2883 dec->addr_gfx_mode = RDECODE_ARRAY_MODE_LINEAR;
2884 dec->av1_version = RDECODE_AV1_VER_0;
2885
2886 switch (sctx->vcn_ip_ver) {
2887 case VCN_1_0_0:
2888 case VCN_1_0_1:
2889 dec->reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0;
2890 dec->reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1;
2891 dec->reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD;
2892 dec->reg.cntl = RDECODE_VCN1_ENGINE_CNTL;
2893 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V1;
2894 break;
2895 case VCN_2_0_0:
2896 case VCN_2_0_2:
2897 case VCN_2_0_3:
2898 case VCN_2_2_0:
2899 dec->reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0;
2900 dec->reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1;
2901 dec->reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD;
2902 dec->reg.cntl = RDECODE_VCN2_ENGINE_CNTL;
2903 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V2;
2904 break;
2905 case VCN_2_5_0:
2906 case VCN_2_6_0:
2907 case VCN_3_0_0:
2908 case VCN_3_0_2:
2909 case VCN_3_0_16:
2910 case VCN_3_0_33:
2911 case VCN_3_1_1:
2912 case VCN_3_1_2:
2913 dec->reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0;
2914 dec->reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1;
2915 dec->reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
2916 dec->reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
2917 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V2;
2918 break;
2919 case VCN_4_0_3:
2920 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V3;
2921 dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
2922 dec->av1_version = RDECODE_AV1_VER_1;
2923 break;
2924 case VCN_4_0_0:
2925 case VCN_4_0_2:
2926 case VCN_4_0_4:
2927 case VCN_4_0_5:
2928 case VCN_4_0_6:
2929 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V2;
2930 dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
2931 dec->av1_version = RDECODE_AV1_VER_1;
2932 break;
2933 case VCN_5_0_0:
2934 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V3;
2935 dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
2936 dec->av1_version = RDECODE_AV1_VER_2;
2937 break;
2938 case VCN_5_0_1:
2939 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V3;
2940 dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
2941 dec->av1_version = RDECODE_AV1_VER_2;
2942 break;
2943 default:
2944 RADEON_DEC_ERR("VCN is not supported.\n");
2945 goto error;
2946 }
2947
2948 if (dec->stream_type != RDECODE_CODEC_JPEG) {
2949 map_msg_fb_it_probs_buf(dec);
2950 rvcn_dec_message_create(dec);
2951 send_msg_buf(dec);
2952 r = flush(dec, 0, NULL);
2953 if (r)
2954 goto error;
2955 } else if (dec->jpg_reg.version != RDECODE_JPEG_REG_VER_V1) {
2956 dec->jpg_reg.jrbc_ib_cond_rd_timer = vcnipUVD_JRBC_IB_COND_RD_TIMER;
2957 dec->jpg_reg.jrbc_ib_ref_data = vcnipUVD_JRBC_IB_REF_DATA;
2958 dec->jpg_reg.jpeg_rb_base = vcnipUVD_JPEG_RB_BASE;
2959 dec->jpg_reg.jpeg_rb_size = vcnipUVD_JPEG_RB_SIZE;
2960 dec->jpg_reg.jpeg_rb_wptr = vcnipUVD_JPEG_RB_WPTR;
2961 dec->jpg_reg.jpeg_int_en = vcnipUVD_JPEG_INT_EN;
2962 dec->jpg_reg.jpeg_cntl = vcnipUVD_JPEG_CNTL;
2963 dec->jpg_reg.jpeg_rb_rptr = vcnipUVD_JPEG_RB_RPTR;
2964 if (dec->jpg_reg.version == RDECODE_JPEG_REG_VER_V2) {
2965 dec->jpg_reg.jpeg_dec_soft_rst = vcnipUVD_JPEG_DEC_SOFT_RST;
2966 dec->jpg_reg.lmi_jpeg_read_64bit_bar_high = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH;
2967 dec->jpg_reg.lmi_jpeg_read_64bit_bar_low = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW;
2968 dec->jpg_reg.jpeg_pitch = vcnipUVD_JPEG_PITCH;
2969 dec->jpg_reg.jpeg_uv_pitch = vcnipUVD_JPEG_UV_PITCH;
2970 dec->jpg_reg.dec_addr_mode = vcnipJPEG_DEC_ADDR_MODE;
2971 dec->jpg_reg.dec_y_gfx10_tiling_surface = vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE;
2972 dec->jpg_reg.dec_uv_gfx10_tiling_surface = vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE;
2973 dec->jpg_reg.lmi_jpeg_write_64bit_bar_high = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH;
2974 dec->jpg_reg.lmi_jpeg_write_64bit_bar_low = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW;
2975 dec->jpg_reg.jpeg_tier_cntl2 = vcnipUVD_JPEG_TIER_CNTL2;
2976 dec->jpg_reg.jpeg_outbuf_cntl = vcnipUVD_JPEG_OUTBUF_CNTL;
2977 dec->jpg_reg.jpeg_outbuf_rptr = vcnipUVD_JPEG_OUTBUF_RPTR;
2978 dec->jpg_reg.jpeg_outbuf_wptr = vcnipUVD_JPEG_OUTBUF_WPTR;
2979 dec->jpg_reg.jpeg_index = vcnipUVD_JPEG_INDEX;
2980 dec->jpg_reg.jpeg_data = vcnipUVD_JPEG_DATA;
2981 } else {
2982 dec->jpg_reg.jpeg_dec_soft_rst = vcnipUVD_JPEG_DEC_SOFT_RST_1;
2983 dec->jpg_reg.lmi_jpeg_read_64bit_bar_high = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH_1;
2984 dec->jpg_reg.lmi_jpeg_read_64bit_bar_low = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW_1;
2985 dec->jpg_reg.jpeg_pitch = vcnipUVD_JPEG_PITCH_1;
2986 dec->jpg_reg.jpeg_uv_pitch = vcnipUVD_JPEG_UV_PITCH_1;
2987 dec->jpg_reg.dec_addr_mode = vcnipJPEG_DEC_ADDR_MODE_1;
2988 dec->jpg_reg.dec_y_gfx10_tiling_surface = vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE_1;
2989 dec->jpg_reg.dec_uv_gfx10_tiling_surface = vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE_1;
2990 dec->jpg_reg.lmi_jpeg_write_64bit_bar_high = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH_1;
2991 dec->jpg_reg.lmi_jpeg_write_64bit_bar_low = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW_1;
2992 dec->jpg_reg.jpeg_tier_cntl2 = vcnipUVD_JPEG_TIER_CNTL2_1;
2993 dec->jpg_reg.jpeg_outbuf_cntl = vcnipUVD_JPEG_OUTBUF_CNTL_1;
2994 dec->jpg_reg.jpeg_outbuf_rptr = vcnipUVD_JPEG_OUTBUF_RPTR_1;
2995 dec->jpg_reg.jpeg_outbuf_wptr = vcnipUVD_JPEG_OUTBUF_WPTR_1;
2996 dec->jpg_reg.jpeg_luma_base0_0 = vcnipUVD_JPEG_LUMA_BASE0_0;
2997 dec->jpg_reg.jpeg_chroma_base0_0 = vcnipUVD_JPEG_CHROMA_BASE0_0;
2998 dec->jpg_reg.jpeg_chromav_base0_0 = vcnipUVD_JPEG_CHROMAV_BASE0_0;
2999 }
3000 }
3001
3002 next_buffer(dec);
3003
3004 if (stream_type == RDECODE_CODEC_JPEG)
3005 dec->send_cmd = send_cmd_jpeg;
3006 else
3007 dec->send_cmd = send_cmd_dec;
3008
3009
3010 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
3011 list_inithead(&dec->dpb_ref_list);
3012 list_inithead(&dec->dpb_unref_list);
3013 }
3014
3015 dec->tmz_ctx = sctx->vcn_ip_ver < VCN_2_2_0 && sctx->vcn_ip_ver != VCN_UNKNOWN;
3016
3017 return &dec->base;
3018
3019 error:
3020 dec->ws->cs_destroy(&dec->cs);
3021 if (dec->ectx)
3022 dec->ectx->destroy(dec->ectx);
3023
3024 if (dec->stream_type == RDECODE_CODEC_JPEG) {
3025 for (i = 0; i < dec->njctx; i++) {
3026 dec->ws->cs_destroy(&dec->jcs[i]);
3027 dec->ws->ctx_destroy(dec->jctx[i]);
3028 }
3029 }
3030
3031 if (dec->msg_fb_it_probs_buffers && dec->bs_buffers) {
3032 for (i = 0; i < dec->num_dec_bufs; ++i) {
3033 si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]);
3034 si_vid_destroy_buffer(&dec->bs_buffers[i]);
3035 }
3036 FREE(dec->msg_fb_it_probs_buffers);
3037 FREE(dec->bs_buffers);
3038 }
3039
3040 if (dec->dpb_type != DPB_DYNAMIC_TIER_2)
3041 si_vid_destroy_buffer(&dec->dpb);
3042 si_vid_destroy_buffer(&dec->ctx);
3043 si_vid_destroy_buffer(&dec->sessionctx);
3044
3045 err:
3046 if (dec->jcs)
3047 FREE(dec->jcs);
3048 if (dec->jctx)
3049 FREE(dec->jctx);
3050 FREE(dec);
3051
3052 return NULL;
3053 }
3054
get_buffer_format(struct radeon_decoder * dec)3055 static enum pipe_format get_buffer_format(struct radeon_decoder *dec)
3056 {
3057 switch (dec->ref_codec.bts) {
3058 case CODEC_10_BITS:
3059 return PIPE_FORMAT_P010;
3060 case CODEC_12_BITS:
3061 return PIPE_FORMAT_P012;
3062 default:
3063 return PIPE_FORMAT_NV12;
3064 }
3065 }
3066