1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 *
5 * SPDX-License-Identifier: MIT
6 *
7 **************************************************************************/
8
9 #include "radeon_vcn_dec.h"
10
11 #include "pipe/p_video_codec.h"
12 #include "radeonsi/si_pipe.h"
13 #include "util/u_memory.h"
14 #include "util/u_video.h"
15 #include "vl/vl_mpeg12_decoder.h"
16 #include "vl/vl_probs_table.h"
17 #include "pspdecryptionparam.h"
18
19 #include <assert.h>
20 #include <stdio.h>
21
22 #include "ac_vcn_av1_default.h"
23
24 #define FB_BUFFER_OFFSET 0x2000
25 #define FB_BUFFER_SIZE 2048
26 #define IT_SCALING_TABLE_SIZE 992
27 #define VP9_PROBS_TABLE_SIZE (RDECODE_VP9_PROBS_DATA_SIZE + 256)
28
29 #define NUM_MPEG2_REFS 6
30 #define NUM_H264_REFS 17
31 #define NUM_VC1_REFS 5
32 #define NUM_VP9_REFS 8
33 #define NUM_AV1_REFS 8
34 #define NUM_AV1_REFS_PER_FRAME 7
35
36 static unsigned calc_dpb_size(struct radeon_decoder *dec);
37 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder *dec);
38 static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec);
39 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec,
40 struct pipe_h265_picture_desc *pic);
41
radeon_dec_destroy_associated_data(void * data)42 static void radeon_dec_destroy_associated_data(void *data)
43 {
44 /* NOOP, since we only use an intptr */
45 }
46
get_current_pic_index(struct radeon_decoder * dec,struct pipe_video_buffer * target,unsigned char * curr_pic_idx)47 static void get_current_pic_index(struct radeon_decoder *dec,
48 struct pipe_video_buffer *target,
49 unsigned char *curr_pic_idx)
50 {
51 for (int i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
52 if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) {
53 if (target->codec != NULL)
54 *curr_pic_idx = (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base);
55 else {
56 *curr_pic_idx = i;
57 vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
58 &radeon_dec_destroy_associated_data);
59 }
60 break;
61 } else if (!dec->render_pic_list[i]) {
62 dec->render_pic_list[i] = target;
63 *curr_pic_idx = i;
64 vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
65 &radeon_dec_destroy_associated_data);
66 break;
67 }
68 }
69 }
70
get_h264_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_h264_picture_desc * pic)71 static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec,
72 struct pipe_video_buffer *target,
73 struct pipe_h264_picture_desc *pic)
74 {
75 rvcn_dec_message_avc_t result;
76 unsigned i, j, k;
77
78 memset(&result, 0, sizeof(result));
79 switch (pic->base.profile) {
80 case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
81 case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
82 result.profile = RDECODE_H264_PROFILE_BASELINE;
83 break;
84
85 case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
86 result.profile = RDECODE_H264_PROFILE_MAIN;
87 break;
88
89 case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
90 result.profile = RDECODE_H264_PROFILE_HIGH;
91 break;
92
93 default:
94 assert(0);
95 break;
96 }
97
98 result.level = dec->base.level;
99
100 result.sps_info_flags = 0;
101 result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
102 result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
103 result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
104 result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
105 result.sps_info_flags |= ((dec->dpb_type == DPB_DYNAMIC_TIER_2) ? 0 : 1)
106 << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
107
108 result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
109 result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
110 result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
111 result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
112 result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
113
114 switch (dec->base.chroma_format) {
115 case PIPE_VIDEO_CHROMA_FORMAT_NONE:
116 break;
117 case PIPE_VIDEO_CHROMA_FORMAT_400:
118 result.chroma_format = 0;
119 break;
120 case PIPE_VIDEO_CHROMA_FORMAT_420:
121 result.chroma_format = 1;
122 break;
123 case PIPE_VIDEO_CHROMA_FORMAT_422:
124 result.chroma_format = 2;
125 break;
126 case PIPE_VIDEO_CHROMA_FORMAT_444:
127 result.chroma_format = 3;
128 break;
129 }
130
131 result.pps_info_flags = 0;
132 result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
133 result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
134 result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
135 result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
136 result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
137 result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
138 result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
139 result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
140
141 result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
142 result.slice_group_map_type = pic->pps->slice_group_map_type;
143 result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
144 result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
145 result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
146 result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
147
148 memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6 * 16);
149 memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2 * 64);
150
151 memcpy(dec->it, result.scaling_list_4x4, 6 * 16);
152 memcpy((dec->it + 96), result.scaling_list_8x8, 2 * 64);
153
154 result.num_ref_frames = pic->num_ref_frames;
155
156 result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
157 result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
158
159 result.frame_num = pic->frame_num;
160 memcpy(result.frame_num_list, pic->frame_num_list, 4 * 16);
161 result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
162 result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
163 memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4 * 16 * 2);
164 result.non_existing_frame_flags = 0;
165 result.used_for_reference_flags = 0;
166
167 if (dec->dpb_type != DPB_DYNAMIC_TIER_2) {
168 result.decoded_pic_idx = pic->frame_num;
169 goto end;
170 }
171
172 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
173 for (j = 0; (pic->ref[j] != NULL) && (j < ARRAY_SIZE(dec->render_pic_list)); j++) {
174 if (dec->render_pic_list[i] == pic->ref[j])
175 break;
176 if (j == ARRAY_SIZE(dec->render_pic_list) - 1)
177 dec->render_pic_list[i] = NULL;
178 else if (pic->ref[j + 1] == NULL)
179 dec->render_pic_list[i] = NULL;
180 }
181 }
182 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
183 if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) {
184 if (target->codec != NULL){
185 result.decoded_pic_idx =
186 (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base);
187 } else {
188 result.decoded_pic_idx = i;
189 vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
190 &radeon_dec_destroy_associated_data);
191 }
192 break;
193 }
194 }
195 if (i == ARRAY_SIZE(dec->render_pic_list)) {
196 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
197 if (!dec->render_pic_list[i]) {
198 dec->render_pic_list[i] = target;
199 result.decoded_pic_idx = i;
200 vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
201 &radeon_dec_destroy_associated_data);
202 break;
203 }
204 }
205 }
206 for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) {
207 result.ref_frame_list[i] = pic->ref[i] ?
208 (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) : 0xff;
209
210 if (result.ref_frame_list[i] != 0xff) {
211 if (pic->top_is_reference[i])
212 result.used_for_reference_flags |= (1 << (2 * i));
213 if (pic->bottom_is_reference[i])
214 result.used_for_reference_flags |= (1 << (2 * i + 1));
215
216 if (pic->is_long_term[i])
217 result.ref_frame_list[i] |= 0x80;
218
219 result.curr_pic_ref_frame_num++;
220
221 for (j = 0; j < ARRAY_SIZE(dec->h264_valid_ref_num); j++) {
222 if ((dec->h264_valid_ref_num[j] != (unsigned)-1)
223 && (dec->h264_valid_ref_num[j] == result.frame_num_list[i]))
224 break;
225 }
226
227 for (k = 0; k < ARRAY_SIZE(dec->h264_valid_poc_num); k++) {
228 if ((dec->h264_valid_poc_num[k] != (unsigned)-1)
229 && ((dec->h264_valid_poc_num[k] == result.field_order_cnt_list[i][0])
230 || dec->h264_valid_poc_num[k] == result.field_order_cnt_list[i][1]))
231 break;
232 }
233 }
234 if (result.ref_frame_list[i] != 0xff && (j == ARRAY_SIZE(dec->h264_valid_ref_num))
235 && (k == ARRAY_SIZE(dec->h264_valid_poc_num))) {
236 result.non_existing_frame_flags |= 1 << i;
237 result.curr_pic_ref_frame_num--;
238 result.ref_frame_list[i] = 0xff;
239 }
240 }
241
242 /* if reference picture exists, however no reference picture found at the end
243 curr_pic_ref_frame_num == 0, which is not reasonable, should be corrected. */
244 if (result.used_for_reference_flags && (result.curr_pic_ref_frame_num == 0)) {
245 for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) {
246 result.ref_frame_list[i] = pic->ref[i] ?
247 (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) : 0xff;
248 if (result.ref_frame_list[i] != 0xff) {
249 result.curr_pic_ref_frame_num++;
250 result.non_existing_frame_flags &= ~(1 << i);
251 break;
252 }
253 }
254 }
255
256 for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) {
257 if (result.ref_frame_list[i] != 0xff) {
258 dec->h264_valid_ref_num[i] = result.frame_num_list[i];
259 dec->h264_valid_poc_num[2 * i] = pic->top_is_reference[i] ?
260 result.field_order_cnt_list[i][0] : (unsigned) -1;
261 dec->h264_valid_poc_num[2 * i + 1] = pic->bottom_is_reference[i] ?
262 result.field_order_cnt_list[i][1] : (unsigned) -1;
263 } else {
264 dec->h264_valid_ref_num[i] =
265 dec->h264_valid_poc_num[2 * i] =
266 dec->h264_valid_poc_num[2 * i + 1] = (unsigned) -1;
267 }
268 }
269
270 dec->h264_valid_ref_num[ARRAY_SIZE(dec->h264_valid_ref_num) - 1] = result.frame_num;
271 dec->h264_valid_poc_num[ARRAY_SIZE(dec->h264_valid_poc_num) - 2] =
272 pic->field_pic_flag && pic->bottom_field_flag ?
273 (unsigned) -1 : result.curr_field_order_cnt_list[0];
274 dec->h264_valid_poc_num[ARRAY_SIZE(dec->h264_valid_poc_num) - 1] =
275 pic->field_pic_flag && !pic->bottom_field_flag ?
276 (unsigned) -1 : result.curr_field_order_cnt_list[1];
277
278 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
279 dec->ref_codec.bts = CODEC_8_BITS;
280 dec->ref_codec.index = result.decoded_pic_idx;
281 dec->ref_codec.ref_size = 16;
282 memset(dec->ref_codec.ref_list, 0xff, sizeof(dec->ref_codec.ref_list));
283 memcpy(dec->ref_codec.ref_list, result.ref_frame_list, sizeof(result.ref_frame_list));
284 }
285
286 end:
287 return result;
288 }
289
get_h265_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_h265_picture_desc * pic)290 static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec,
291 struct pipe_video_buffer *target,
292 struct pipe_h265_picture_desc *pic)
293 {
294 rvcn_dec_message_hevc_t result;
295 unsigned i, j;
296
297 memset(&result, 0, sizeof(result));
298 result.sps_info_flags = 0;
299 result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
300 result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
301 result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
302 result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
303 result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
304 result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
305 result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
306 result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
307 result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
308 if (((struct si_screen *)dec->screen)->info.family == CHIP_CARRIZO)
309 result.sps_info_flags |= 1 << 9;
310 if (pic->UseRefPicList == true) {
311 result.sps_info_flags |= 1 << 10;
312 result.sps_info_flags |= 1 << 12;
313 }
314 if (pic->UseStRpsBits == true && pic->pps->st_rps_bits != 0) {
315 result.sps_info_flags |= 1 << 11;
316 result.st_rps_bits = pic->pps->st_rps_bits;
317 }
318
319 result.chroma_format = pic->pps->sps->chroma_format_idc;
320 result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
321 result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
322 result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
323 result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
324 result.log2_min_luma_coding_block_size_minus3 =
325 pic->pps->sps->log2_min_luma_coding_block_size_minus3;
326 result.log2_diff_max_min_luma_coding_block_size =
327 pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
328 result.log2_min_transform_block_size_minus2 =
329 pic->pps->sps->log2_min_transform_block_size_minus2;
330 result.log2_diff_max_min_transform_block_size =
331 pic->pps->sps->log2_diff_max_min_transform_block_size;
332 result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
333 result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
334 result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
335 result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
336 result.log2_min_pcm_luma_coding_block_size_minus3 =
337 pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
338 result.log2_diff_max_min_pcm_luma_coding_block_size =
339 pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
340 result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
341
342 result.pps_info_flags = 0;
343 result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
344 result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
345 result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
346 result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
347 result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
348 result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
349 result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
350 result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
351 result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
352 result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
353 result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
354 result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
355 result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
356 result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
357 result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
358 result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
359 result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
360 result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
361 result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
362 result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
363
364 result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
365 result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
366 result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
367 result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
368 result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
369 result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
370 result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
371 result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
372 result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
373 result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
374 result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
375 result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
376 result.init_qp_minus26 = pic->pps->init_qp_minus26;
377
378 for (i = 0; i < 19; ++i)
379 result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
380
381 for (i = 0; i < 21; ++i)
382 result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
383
384 result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
385 result.curr_poc = pic->CurrPicOrderCntVal;
386
387 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
388 for (j = 0;
389 (pic->ref[j] != NULL) && (j < ARRAY_SIZE(dec->render_pic_list));
390 j++) {
391 if (dec->render_pic_list[i] == pic->ref[j])
392 break;
393 if (j == ARRAY_SIZE(dec->render_pic_list) - 1)
394 dec->render_pic_list[i] = NULL;
395 else if (pic->ref[j + 1] == NULL)
396 dec->render_pic_list[i] = NULL;
397 }
398 }
399 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
400 if (dec->render_pic_list[i] == NULL) {
401 dec->render_pic_list[i] = target;
402 result.curr_idx = i;
403 break;
404 }
405 }
406
407 vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)result.curr_idx,
408 &radeon_dec_destroy_associated_data);
409
410 for (i = 0; i < 16; ++i) {
411 struct pipe_video_buffer *ref = pic->ref[i];
412 uintptr_t ref_pic = 0;
413
414 result.poc_list[i] = pic->PicOrderCntVal[i];
415
416 if (ref)
417 ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
418 else
419 ref_pic = 0x7F;
420 result.ref_pic_list[i] = ref_pic;
421 }
422
423 for (i = 0; i < 8; ++i) {
424 result.ref_pic_set_st_curr_before[i] = 0xFF;
425 result.ref_pic_set_st_curr_after[i] = 0xFF;
426 result.ref_pic_set_lt_curr[i] = 0xFF;
427 }
428
429 for (i = 0; i < pic->NumPocStCurrBefore; ++i)
430 result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
431
432 for (i = 0; i < pic->NumPocStCurrAfter; ++i)
433 result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
434
435 for (i = 0; i < pic->NumPocLtCurr; ++i)
436 result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
437
438 for (i = 0; i < 6; ++i)
439 result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
440
441 for (i = 0; i < 2; ++i)
442 result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
443
444 memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
445 memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
446 memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
447 memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
448
449 for (i = 0; i < 2; i++) {
450 for (j = 0; j < 15; j++)
451 result.direct_reflist[i][j] = pic->RefPicList[0][i][j];
452 }
453
454 if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
455 if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) {
456 result.p010_mode = 1;
457 result.msb_mode = 1;
458 } else {
459 result.p010_mode = 0;
460 result.luma_10to8 = 5;
461 result.chroma_10to8 = 5;
462 result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
463 result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
464 }
465 }
466
467 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
468 dec->ref_codec.bts = (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) ?
469 CODEC_10_BITS : CODEC_8_BITS;
470 dec->ref_codec.index = result.curr_idx;
471 dec->ref_codec.ref_size = 15;
472 memset(dec->ref_codec.ref_list, 0x7f, sizeof(dec->ref_codec.ref_list));
473 memcpy(dec->ref_codec.ref_list, result.ref_pic_list, sizeof(result.ref_pic_list));
474 }
475 return result;
476 }
477
fill_probs_table(void * ptr)478 static void fill_probs_table(void *ptr)
479 {
480 rvcn_dec_vp9_probs_t *probs = (rvcn_dec_vp9_probs_t *)ptr;
481
482 memcpy(&probs->coef_probs[0], default_coef_probs_4x4, sizeof(default_coef_probs_4x4));
483 memcpy(&probs->coef_probs[1], default_coef_probs_8x8, sizeof(default_coef_probs_8x8));
484 memcpy(&probs->coef_probs[2], default_coef_probs_16x16, sizeof(default_coef_probs_16x16));
485 memcpy(&probs->coef_probs[3], default_coef_probs_32x32, sizeof(default_coef_probs_32x32));
486 memcpy(probs->y_mode_prob, default_if_y_probs, sizeof(default_if_y_probs));
487 memcpy(probs->uv_mode_prob, default_if_uv_probs, sizeof(default_if_uv_probs));
488 memcpy(probs->single_ref_prob, default_single_ref_p, sizeof(default_single_ref_p));
489 memcpy(probs->switchable_interp_prob, default_switchable_interp_prob,
490 sizeof(default_switchable_interp_prob));
491 memcpy(probs->partition_prob, default_partition_probs, sizeof(default_partition_probs));
492 memcpy(probs->inter_mode_probs, default_inter_mode_probs, sizeof(default_inter_mode_probs));
493 memcpy(probs->mbskip_probs, default_skip_probs, sizeof(default_skip_probs));
494 memcpy(probs->intra_inter_prob, default_intra_inter_p, sizeof(default_intra_inter_p));
495 memcpy(probs->comp_inter_prob, default_comp_inter_p, sizeof(default_comp_inter_p));
496 memcpy(probs->comp_ref_prob, default_comp_ref_p, sizeof(default_comp_ref_p));
497 memcpy(probs->tx_probs_32x32, default_tx_probs_32x32, sizeof(default_tx_probs_32x32));
498 memcpy(probs->tx_probs_16x16, default_tx_probs_16x16, sizeof(default_tx_probs_16x16));
499 memcpy(probs->tx_probs_8x8, default_tx_probs_8x8, sizeof(default_tx_probs_8x8));
500 memcpy(probs->mv_joints, default_nmv_joints, sizeof(default_nmv_joints));
501 memcpy(&probs->mv_comps[0], default_nmv_components, sizeof(default_nmv_components));
502 memset(&probs->nmvc_mask, 0, sizeof(rvcn_dec_vp9_nmv_ctx_mask_t));
503 }
504
get_vp9_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_vp9_picture_desc * pic)505 static rvcn_dec_message_vp9_t get_vp9_msg(struct radeon_decoder *dec,
506 struct pipe_video_buffer *target,
507 struct pipe_vp9_picture_desc *pic)
508 {
509 rvcn_dec_message_vp9_t result;
510 unsigned i ,j;
511
512 memset(&result, 0, sizeof(result));
513
514 /* segment table */
515 rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(dec->probs);
516
517 if (pic->picture_parameter.pic_fields.segmentation_enabled) {
518 for (i = 0; i < 8; ++i) {
519 prbs->seg.feature_data[i] =
520 (pic->slice_parameter.seg_param[i].alt_quant & 0xffff) |
521 ((pic->slice_parameter.seg_param[i].alt_lf & 0xff) << 16) |
522 ((pic->slice_parameter.seg_param[i].segment_flags.segment_reference & 0xf) << 24);
523 prbs->seg.feature_mask[i] =
524 (pic->slice_parameter.seg_param[i].alt_quant_enabled << 0) |
525 (pic->slice_parameter.seg_param[i].alt_lf_enabled << 1) |
526 (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_enabled << 2) |
527 (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_skipped << 3);
528 }
529
530 for (i = 0; i < 7; ++i)
531 prbs->seg.tree_probs[i] = pic->picture_parameter.mb_segment_tree_probs[i];
532
533 for (i = 0; i < 3; ++i)
534 prbs->seg.pred_probs[i] = pic->picture_parameter.segment_pred_probs[i];
535
536 prbs->seg.abs_delta = pic->picture_parameter.abs_delta;
537 } else
538 memset(&prbs->seg, 0, 256);
539
540 result.frame_header_flags = (pic->picture_parameter.pic_fields.frame_type
541 << RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) &
542 RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK;
543
544 result.frame_header_flags |= (pic->picture_parameter.pic_fields.error_resilient_mode
545 << RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) &
546 RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK;
547
548 result.frame_header_flags |= (pic->picture_parameter.pic_fields.intra_only
549 << RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) &
550 RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK;
551
552 result.frame_header_flags |= (pic->picture_parameter.pic_fields.allow_high_precision_mv
553 << RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) &
554 RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK;
555
556 result.frame_header_flags |= (pic->picture_parameter.pic_fields.frame_parallel_decoding_mode
557 << RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT) &
558 RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK;
559
560 result.frame_header_flags |= (pic->picture_parameter.pic_fields.refresh_frame_context
561 << RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) &
562 RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK;
563
564 result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_enabled
565 << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) &
566 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK;
567
568 result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_update_map
569 << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) &
570 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK;
571
572 result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_temporal_update
573 << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
574 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK;
575
576 result.frame_header_flags |= (pic->picture_parameter.mode_ref_delta_enabled
577 << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) &
578 RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK;
579
580 result.frame_header_flags |= (pic->picture_parameter.mode_ref_delta_update
581 << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) &
582 RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK;
583
584 result.frame_header_flags |=
585 ((dec->show_frame && !pic->picture_parameter.pic_fields.error_resilient_mode &&
586 dec->last_width == dec->base.width && dec->last_height == dec->base.height)
587 << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) &
588 RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK;
589 dec->show_frame = pic->picture_parameter.pic_fields.show_frame;
590
591 result.frame_header_flags |= (1 << RDECODE_FRAME_HDR_INFO_VP9_USE_UNCOMPRESSED_HEADER_SHIFT) &
592 RDECODE_FRAME_HDR_INFO_VP9_USE_UNCOMPRESSED_HEADER_MASK;
593
594 result.interp_filter = pic->picture_parameter.pic_fields.mcomp_filter_type;
595
596 result.frame_context_idx = pic->picture_parameter.pic_fields.frame_context_idx;
597 result.reset_frame_context = pic->picture_parameter.pic_fields.reset_frame_context;
598
599 result.filter_level = pic->picture_parameter.filter_level;
600 result.sharpness_level = pic->picture_parameter.sharpness_level;
601
602 for (i = 0; i < 8; ++i)
603 memcpy(result.lf_adj_level[i], pic->slice_parameter.seg_param[i].filter_level, 4 * 2);
604
605 if (pic->picture_parameter.pic_fields.lossless_flag) {
606 result.base_qindex = 0;
607 result.y_dc_delta_q = 0;
608 result.uv_ac_delta_q = 0;
609 result.uv_dc_delta_q = 0;
610 } else {
611 result.base_qindex = pic->picture_parameter.base_qindex;
612 result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q;
613 result.uv_ac_delta_q = pic->picture_parameter.uv_ac_delta_q;
614 result.uv_dc_delta_q = pic->picture_parameter.uv_dc_delta_q;
615 }
616
617 result.log2_tile_cols = pic->picture_parameter.log2_tile_columns;
618 result.log2_tile_rows = pic->picture_parameter.log2_tile_rows;
619 result.chroma_format = 1;
620 result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 =
621 (pic->picture_parameter.bit_depth - 8);
622
623 result.vp9_frame_size = align(dec->bs_size, 128);
624 result.uncompressed_header_size = pic->picture_parameter.frame_header_length_in_bytes;
625 result.compressed_header_size = pic->picture_parameter.first_partition_size;
626
627 assert(dec->base.max_references + 1 <= ARRAY_SIZE(dec->render_pic_list));
628
629 //clear the dec->render list if it is not used as a reference
630 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
631 if (dec->render_pic_list[i]) {
632 for (j=0;j<8;j++) {
633 if (dec->render_pic_list[i] == pic->ref[j])
634 break;
635 }
636 if (j == 8)
637 dec->render_pic_list[i] = NULL;
638 }
639 }
640
641 get_current_pic_index(dec, target, &result.curr_pic_idx);
642
643 for (i = 0; i < 8; i++) {
644 result.ref_frame_map[i] =
645 (pic->ref[i]) ? (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base)
646 : 0x7f;
647 }
648
649 result.frame_refs[0] = result.ref_frame_map[pic->picture_parameter.pic_fields.last_ref_frame];
650 result.ref_frame_sign_bias[0] = pic->picture_parameter.pic_fields.last_ref_frame_sign_bias;
651 result.frame_refs[1] = result.ref_frame_map[pic->picture_parameter.pic_fields.golden_ref_frame];
652 result.ref_frame_sign_bias[1] = pic->picture_parameter.pic_fields.golden_ref_frame_sign_bias;
653 result.frame_refs[2] = result.ref_frame_map[pic->picture_parameter.pic_fields.alt_ref_frame];
654 result.ref_frame_sign_bias[2] = pic->picture_parameter.pic_fields.alt_ref_frame_sign_bias;
655
656 if (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) {
657 if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) {
658 result.p010_mode = 1;
659 result.msb_mode = 1;
660 } else {
661 result.p010_mode = 0;
662 result.luma_10to8 = 1;
663 result.chroma_10to8 = 1;
664 }
665 }
666
667 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
668 dec->ref_codec.bts = (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) ?
669 CODEC_10_BITS : CODEC_8_BITS;
670 dec->ref_codec.index = result.curr_pic_idx;
671 dec->ref_codec.ref_size = 8;
672 memset(dec->ref_codec.ref_list, 0x7f, sizeof(dec->ref_codec.ref_list));
673 memcpy(dec->ref_codec.ref_list, result.ref_frame_map, sizeof(result.ref_frame_map));
674 }
675
676 dec->last_width = dec->base.width;
677 dec->last_height = dec->base.height;
678
679 return result;
680 }
681
get_h265_reflist(rvcn_dec_message_hevc_direct_ref_list_t * hevc_reflist,struct pipe_h265_picture_desc * pic)682 static void get_h265_reflist(rvcn_dec_message_hevc_direct_ref_list_t *hevc_reflist,
683 struct pipe_h265_picture_desc *pic)
684 {
685 hevc_reflist->num_direct_reflist = pic->slice_parameter.slice_count;
686 for (int i = 0; i <hevc_reflist->num_direct_reflist; i++) {
687 for (int j = 0; j < 2; j++) {
688 for (int k = 0; k < 15; k++)
689 hevc_reflist->multi_direct_reflist[i][j][k] = pic->RefPicList[i][j][k];
690 }
691 }
692 }
693
set_drm_keys(rvcn_dec_message_drm_t * drm,DECRYPT_PARAMETERS * decrypted)694 static void set_drm_keys(rvcn_dec_message_drm_t *drm, DECRYPT_PARAMETERS *decrypted)
695 {
696 int cbc = decrypted->u.s.cbc;
697 int ctr = decrypted->u.s.ctr;
698 int id = decrypted->u.s.drm_id;
699 int ekc = 1;
700 int data1 = 1;
701 int data2 = 1;
702
703 drm->drm_cmd = 0;
704 drm->drm_cntl = 0;
705
706 drm->drm_cntl = 1 << DRM_CNTL_BYPASS_SHIFT;
707
708 if (cbc || ctr) {
709 drm->drm_cntl = 0 << DRM_CNTL_BYPASS_SHIFT;
710 drm->drm_cmd |= 0xff << DRM_CMD_BYTE_MASK_SHIFT;
711
712 if (ctr)
713 drm->drm_cmd |= 0x00 << DRM_CMD_ALGORITHM_SHIFT;
714 else if (cbc)
715 drm->drm_cmd |= 0x02 << DRM_CMD_ALGORITHM_SHIFT;
716
717 drm->drm_cmd |= 1 << DRM_CMD_GEN_MASK_SHIFT;
718 drm->drm_cmd |= ekc << DRM_CMD_UNWRAP_KEY_SHIFT;
719 drm->drm_cmd |= 0 << DRM_CMD_OFFSET_SHIFT;
720 drm->drm_cmd |= data2 << DRM_CMD_CNT_DATA_SHIFT;
721 drm->drm_cmd |= data1 << DRM_CMD_CNT_KEY_SHIFT;
722 drm->drm_cmd |= ekc << DRM_CMD_KEY_SHIFT;
723 drm->drm_cmd |= id << DRM_CMD_SESSION_SEL_SHIFT;
724
725 if (ekc)
726 memcpy(drm->drm_wrapped_key, decrypted->encrypted_key, 16);
727 if (data1)
728 memcpy(drm->drm_key, decrypted->session_iv, 16);
729 if (data2)
730 memcpy(drm->drm_counter, decrypted->encrypted_iv, 16);
731 drm->drm_offset = 0;
732 }
733 }
734
rvcn_av1_film_grain_random_number(unsigned short * seed,int32_t bits)735 static int32_t rvcn_av1_film_grain_random_number(unsigned short *seed, int32_t bits)
736 {
737 unsigned short bit;
738 unsigned short value = *seed;
739
740 bit = ((value >> 0) ^ (value >> 1) ^ (value >> 3) ^ (value >> 12)) & 1;
741 value = (value >> 1) | (bit << 15);
742 *seed = value;
743
744 return (value >> (16 - bits)) & ((1 << bits) - 1);
745 }
746
rvcn_av1_film_grain_init_scaling(uint8_t scaling_points[][2],uint8_t num,short scaling_lut[])747 static void rvcn_av1_film_grain_init_scaling(uint8_t scaling_points[][2],
748 uint8_t num,
749 short scaling_lut[])
750 {
751 int32_t i, x, delta_x, delta_y;
752 int64_t delta;
753
754 if (num == 0)
755 return;
756
757 for ( i = 0; i < scaling_points[0][0]; i++ )
758 scaling_lut[i] = scaling_points[0][1];
759
760 for ( i = 0; i < num - 1; i++ ) {
761 delta_y = scaling_points[i + 1][1] - scaling_points[i][1];
762 delta_x = scaling_points[i + 1][0] - scaling_points[i][0];
763
764 delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
765
766 for ( x = 0; x < delta_x; x++ )
767 scaling_lut[scaling_points[i][0] + x] =
768 (short)(scaling_points[i][1] + (int32_t)((x * delta + 32768) >> 16));
769 }
770
771 for ( i = scaling_points[num - 1][0]; i < 256; i++ )
772 scaling_lut[i] = scaling_points[num - 1][1];
773 }
774
rvcn_av1_init_film_grain_buffer(rvcn_dec_film_grain_params_t * fg_params,rvcn_dec_av1_fg_init_buf_t * fg_buf)775 static void rvcn_av1_init_film_grain_buffer(rvcn_dec_film_grain_params_t *fg_params,
776 rvcn_dec_av1_fg_init_buf_t *fg_buf)
777 {
778 const int32_t luma_block_size_y = 73;
779 const int32_t luma_block_size_x = 82;
780 const int32_t chroma_block_size_y = 38;
781 const int32_t chroma_block_size_x = 44;
782 const int32_t gauss_bits = 11;
783 int32_t filt_luma_grain_block[luma_block_size_y][luma_block_size_x];
784 int32_t filt_cb_grain_block[chroma_block_size_y][chroma_block_size_x];
785 int32_t filt_cr_grain_block[chroma_block_size_y][chroma_block_size_x];
786 int32_t chroma_subsamp_y = 1;
787 int32_t chroma_subsamp_x = 1;
788 unsigned short seed = fg_params->random_seed;
789 int32_t ar_coeff_lag = fg_params->ar_coeff_lag;
790 int32_t bit_depth = fg_params->bit_depth_minus_8 + 8;
791 short grain_center = 128 << (bit_depth - 8);
792 short grain_min = 0 - grain_center;
793 short grain_max = (256 << (bit_depth - 8)) - 1 - grain_center;
794 int32_t shift = 12 - bit_depth + fg_params->grain_scale_shift;
795 short luma_grain_block_tmp[64][80];
796 short cb_grain_block_tmp[32][40];
797 short cr_grain_block_tmp[32][40];
798 short *align_ptr, *align_ptr0, *align_ptr1;
799 int32_t x, y, g, i, j, c, c0, c1, delta_row, delta_col;
800 int32_t s, s0, s1, pos, r;
801
802 /* generate luma grain block */
803 memset(filt_luma_grain_block, 0, sizeof(filt_luma_grain_block));
804 for ( y = 0; y < luma_block_size_y; y++ ) {
805 for ( x = 0; x < luma_block_size_x; x++ ) {
806 g = 0;
807 if (fg_params->num_y_points > 0) {
808 r = rvcn_av1_film_grain_random_number(&seed, gauss_bits);
809 g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
810 }
811 filt_luma_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
812 }
813 }
814
815 for ( y = 3; y < luma_block_size_y; y++ ) {
816 for ( x = 3; x < luma_block_size_x - 3; x++ ) {
817 s = 0;
818 pos = 0;
819 for (delta_row = -ar_coeff_lag; delta_row <= 0; delta_row++) {
820 for (delta_col = -ar_coeff_lag; delta_col <= ar_coeff_lag; delta_col++) {
821 if (delta_row == 0 && delta_col == 0)
822 break;
823 c = fg_params->ar_coeffs_y[pos];
824 s += filt_luma_grain_block[y + delta_row][x + delta_col] * c;
825 pos++;
826 }
827 }
828 filt_luma_grain_block[y][x] =
829 AV1_CLAMP(filt_luma_grain_block[y][x]
830 + ROUND_POWER_OF_TWO(s, fg_params->ar_coeff_shift),
831 grain_min, grain_max);
832 }
833 }
834
835 /* generate chroma grain block */
836 memset(filt_cb_grain_block, 0, sizeof(filt_cb_grain_block));
837 shift = 12 - bit_depth + fg_params->grain_scale_shift;
838 seed = fg_params->random_seed ^ 0xb524;
839 for (y = 0; y < chroma_block_size_y; y++) {
840 for (x = 0; x < chroma_block_size_x; x++) {
841 g = 0;
842 if (fg_params->num_cb_points || fg_params->chroma_scaling_from_luma) {
843 r = rvcn_av1_film_grain_random_number(&seed, gauss_bits);
844 g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
845 }
846 filt_cb_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
847 }
848 }
849
850 memset(filt_cr_grain_block, 0, sizeof(filt_cr_grain_block));
851 seed = fg_params->random_seed ^ 0x49d8;
852 for (y = 0; y < chroma_block_size_y; y++) {
853 for (x = 0; x < chroma_block_size_x; x++) {
854 g = 0;
855 if (fg_params->num_cr_points || fg_params->chroma_scaling_from_luma) {
856 r = rvcn_av1_film_grain_random_number(&seed, gauss_bits);
857 g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
858 }
859 filt_cr_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
860 }
861 }
862
863 for (y = 3; y < chroma_block_size_y; y++) {
864 for (x = 3; x < chroma_block_size_x - 3; x++) {
865 s0 = 0, s1 = 0, pos = 0;
866 for (delta_row = -ar_coeff_lag; delta_row <= 0; delta_row++) {
867 for (delta_col = -ar_coeff_lag; delta_col <= ar_coeff_lag; delta_col++) {
868 c0 = fg_params->ar_coeffs_cb[pos];
869 c1 = fg_params->ar_coeffs_cr[pos];
870 if (delta_row == 0 && delta_col == 0) {
871 if (fg_params->num_y_points > 0) {
872 int luma = 0;
873 int luma_x = ((x - 3) << chroma_subsamp_x) + 3;
874 int luma_y = ((y - 3) << chroma_subsamp_y) + 3;
875 for ( i = 0; i <= chroma_subsamp_y; i++)
876 for ( j = 0; j <= chroma_subsamp_x; j++)
877 luma += filt_luma_grain_block[luma_y + i][luma_x + j];
878
879 luma = ROUND_POWER_OF_TWO(luma, chroma_subsamp_x + chroma_subsamp_y);
880 s0 += luma * c0;
881 s1 += luma * c1;
882 }
883 break;
884 }
885 s0 += filt_cb_grain_block[y + delta_row][x + delta_col] * c0;
886 s1 += filt_cr_grain_block[y + delta_row][x + delta_col] * c1;
887 pos++;
888 }
889 }
890 filt_cb_grain_block[y][x] = AV1_CLAMP(filt_cb_grain_block[y][x] +
891 ROUND_POWER_OF_TWO(s0, fg_params->ar_coeff_shift),
892 grain_min, grain_max);
893 filt_cr_grain_block[y][x] = AV1_CLAMP(filt_cr_grain_block[y][x] +
894 ROUND_POWER_OF_TWO(s1, fg_params->ar_coeff_shift),
895 grain_min, grain_max);
896 }
897 }
898
899 for ( i = 9; i < luma_block_size_y; i++ )
900 for ( j = 9; j < luma_block_size_x; j++ )
901 luma_grain_block_tmp[i - 9][j - 9] = filt_luma_grain_block[i][j];
902
903 for ( i = 6; i < chroma_block_size_y; i++ )
904 for ( j = 6; j < chroma_block_size_x; j++ ) {
905 cb_grain_block_tmp[i - 6][j - 6] = filt_cb_grain_block[i][j];
906 cr_grain_block_tmp[i - 6][j - 6] = filt_cr_grain_block[i][j];
907 }
908
909 align_ptr = &fg_buf->luma_grain_block[0][0];
910 for ( i = 0; i < 64; i++ ) {
911 for ( j = 0; j < 80; j++)
912 *align_ptr++ = luma_grain_block_tmp[i][j];
913
914 if (((i + 1) % 4) == 0)
915 align_ptr += 64;
916 }
917
918 align_ptr0 = &fg_buf->cb_grain_block[0][0];
919 align_ptr1 = &fg_buf->cr_grain_block[0][0];
920 for ( i = 0; i < 32; i++) {
921 for ( j = 0; j < 40; j++) {
922 *align_ptr0++ = cb_grain_block_tmp[i][j];
923 *align_ptr1++ = cr_grain_block_tmp[i][j];
924 }
925 if (((i + 1) % 8) == 0) {
926 align_ptr0 += 64;
927 align_ptr1 += 64;
928 }
929 }
930
931 memset(fg_buf->scaling_lut_y, 0, sizeof(fg_buf->scaling_lut_y));
932 rvcn_av1_film_grain_init_scaling(fg_params->scaling_points_y, fg_params->num_y_points,
933 fg_buf->scaling_lut_y);
934 if (fg_params->chroma_scaling_from_luma) {
935 memcpy(fg_buf->scaling_lut_cb, fg_buf->scaling_lut_y, sizeof(fg_buf->scaling_lut_y));
936 memcpy(fg_buf->scaling_lut_cr, fg_buf->scaling_lut_y, sizeof(fg_buf->scaling_lut_y));
937 } else {
938 memset(fg_buf->scaling_lut_cb, 0, sizeof(fg_buf->scaling_lut_cb));
939 memset(fg_buf->scaling_lut_cr, 0, sizeof(fg_buf->scaling_lut_cr));
940 rvcn_av1_film_grain_init_scaling(fg_params->scaling_points_cb, fg_params->num_cb_points,
941 fg_buf->scaling_lut_cb);
942 rvcn_av1_film_grain_init_scaling(fg_params->scaling_points_cr, fg_params->num_cr_points,
943 fg_buf->scaling_lut_cr);
944 }
945 }
946
rvcn_dec_av1_film_grain_surface(struct pipe_video_buffer ** target,struct pipe_av1_picture_desc * pic)947 static void rvcn_dec_av1_film_grain_surface(struct pipe_video_buffer **target,
948 struct pipe_av1_picture_desc *pic)
949 {
950 if (!pic->picture_parameter.film_grain_info.film_grain_info_fields.apply_grain ||
951 !pic->film_grain_target)
952 return;
953
954 *target = pic->film_grain_target;
955 }
956
get_av1_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_av1_picture_desc * pic)957 static rvcn_dec_message_av1_t get_av1_msg(struct radeon_decoder *dec,
958 struct pipe_video_buffer *target,
959 struct pipe_av1_picture_desc *pic)
960 {
961 rvcn_dec_message_av1_t result;
962 unsigned i, j;
963 uint16_t tile_count = pic->picture_parameter.tile_cols * pic->picture_parameter.tile_rows;
964
965 memset(&result, 0, sizeof(result));
966
967 result.frame_header_flags = (pic->picture_parameter.pic_info_fields.show_frame
968 << RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_SHIFT) &
969 RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_MASK;
970
971 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.disable_cdf_update
972 << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_SHIFT) &
973 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_MASK;
974
975 result.frame_header_flags |= ((!pic->picture_parameter.pic_info_fields.disable_frame_end_update_cdf)
976 << RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_SHIFT) &
977 RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_MASK;
978
979 result.frame_header_flags |= ((pic->picture_parameter.pic_info_fields.frame_type ==
980 2 /* INTRA_ONLY_FRAME */) << RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_SHIFT) &
981 RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_MASK;
982
983 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_intrabc
984 << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_SHIFT) &
985 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_MASK;
986
987 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_high_precision_mv
988 << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) &
989 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK;
990
991 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.mono_chrome
992 << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) &
993 RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK;
994
995 result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.skip_mode_present
996 << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) &
997 RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK;
998
999 result.frame_header_flags |= (((pic->picture_parameter.qmatrix_fields.qm_y == 0xf) ? 0 : 1)
1000 << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) &
1001 RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK;
1002
1003 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_filter_intra
1004 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) &
1005 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_MASK;
1006
1007 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_intra_edge_filter
1008 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_SHIFT) &
1009 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_MASK;
1010
1011 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_interintra_compound
1012 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_SHIFT) &
1013 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_MASK;
1014
1015 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_masked_compound
1016 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_SHIFT) &
1017 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_MASK;
1018
1019 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_warped_motion
1020 << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_SHIFT) &
1021 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_MASK;
1022
1023 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_dual_filter
1024 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_SHIFT) &
1025 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_MASK;
1026
1027 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_order_hint
1028 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_SHIFT) &
1029 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_MASK;
1030
1031 result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_jnt_comp
1032 << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_SHIFT) &
1033 RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_MASK;
1034
1035 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.use_ref_frame_mvs
1036 << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_SHIFT) &
1037 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_MASK;
1038
1039 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_screen_content_tools
1040 << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_SHIFT) &
1041 RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_MASK;
1042
1043 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.force_integer_mv
1044 << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) &
1045 RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK;
1046
1047 result.frame_header_flags |= (pic->picture_parameter.loop_filter_info_fields.mode_ref_delta_enabled
1048 << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) &
1049 RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK;
1050
1051 result.frame_header_flags |= (pic->picture_parameter.loop_filter_info_fields.mode_ref_delta_update
1052 << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) &
1053 RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK;
1054
1055 result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.delta_q_present_flag
1056 << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) &
1057 RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK;
1058
1059 result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.delta_lf_present_flag
1060 << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_SHIFT) &
1061 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_MASK;
1062
1063 result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.reduced_tx_set_used
1064 << RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_SHIFT) &
1065 RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_MASK;
1066
1067 result.frame_header_flags |= (pic->picture_parameter.seg_info.segment_info_fields.enabled
1068 << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_SHIFT) &
1069 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_MASK;
1070
1071 result.frame_header_flags |= (pic->picture_parameter.seg_info.segment_info_fields.update_map
1072 << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_SHIFT) &
1073 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_MASK;
1074
1075 result.frame_header_flags |= (pic->picture_parameter.seg_info.segment_info_fields.temporal_update
1076 << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
1077 RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_MASK;
1078
1079 result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.delta_lf_multi
1080 << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_SHIFT) &
1081 RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_MASK;
1082
1083 result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.is_motion_mode_switchable
1084 << RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_SHIFT) &
1085 RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_MASK;
1086
1087 result.frame_header_flags |= ((!pic->picture_parameter.refresh_frame_flags)
1088 << RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_SHIFT) &
1089 RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_MASK;
1090
1091 result.frame_header_flags |= ((!pic->picture_parameter.seq_info_fields.ref_frame_mvs)
1092 << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_SHIFT) &
1093 RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_MASK;
1094
1095 result.current_frame_id = pic->picture_parameter.current_frame_id;
1096 result.frame_offset = pic->picture_parameter.order_hint;
1097
1098 result.profile = pic->picture_parameter.profile;
1099 result.is_annexb = 0;
1100 result.frame_type = pic->picture_parameter.pic_info_fields.frame_type;
1101 result.primary_ref_frame = pic->picture_parameter.primary_ref_frame;
1102
1103 get_current_pic_index(dec, target, &result.curr_pic_idx);
1104
1105 result.sb_size = pic->picture_parameter.seq_info_fields.use_128x128_superblock;
1106 result.interp_filter = pic->picture_parameter.interp_filter;
1107 for (i = 0; i < 2; ++i)
1108 result.filter_level[i] = pic->picture_parameter.filter_level[i];
1109 result.filter_level_u = pic->picture_parameter.filter_level_u;
1110 result.filter_level_v = pic->picture_parameter.filter_level_v;
1111 result.sharpness_level = pic->picture_parameter.loop_filter_info_fields.sharpness_level;
1112 for (i = 0; i < 8; ++i)
1113 result.ref_deltas[i] = pic->picture_parameter.ref_deltas[i];
1114 for (i = 0; i < 2; ++i)
1115 result.mode_deltas[i] = pic->picture_parameter.mode_deltas[i];
1116 result.base_qindex = pic->picture_parameter.base_qindex;
1117 result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q;
1118 result.u_dc_delta_q = pic->picture_parameter.u_dc_delta_q;
1119 result.v_dc_delta_q = pic->picture_parameter.v_dc_delta_q;
1120 result.u_ac_delta_q = pic->picture_parameter.u_ac_delta_q;
1121 result.v_ac_delta_q = pic->picture_parameter.v_ac_delta_q;
1122 result.qm_y = pic->picture_parameter.qmatrix_fields.qm_y | 0xf0;
1123 result.qm_u = pic->picture_parameter.qmatrix_fields.qm_u | 0xf0;
1124 result.qm_v = pic->picture_parameter.qmatrix_fields.qm_v | 0xf0;
1125 result.delta_q_res = 1 << pic->picture_parameter.mode_control_fields.log2_delta_q_res;
1126 result.delta_lf_res = 1 << pic->picture_parameter.mode_control_fields.log2_delta_lf_res;
1127
1128 result.tile_cols = pic->picture_parameter.tile_cols;
1129 result.tile_rows = pic->picture_parameter.tile_rows;
1130 result.tx_mode = pic->picture_parameter.mode_control_fields.tx_mode;
1131 result.reference_mode = (pic->picture_parameter.mode_control_fields.reference_select == 1) ? 2 : 0;
1132 result.chroma_format = pic->picture_parameter.seq_info_fields.mono_chrome ? 0 : 1;
1133 result.tile_size_bytes = 0xff;
1134 result.context_update_tile_id = pic->picture_parameter.context_update_tile_id;
1135 for (i = 0; i < 65; ++i) {
1136 result.tile_col_start_sb[i] = pic->picture_parameter.tile_col_start_sb[i];
1137 result.tile_row_start_sb[i] = pic->picture_parameter.tile_row_start_sb[i];
1138 }
1139 result.max_width = pic->picture_parameter.max_width;
1140 result.max_height = pic->picture_parameter.max_height;
1141 if (pic->picture_parameter.pic_info_fields.use_superres) {
1142 result.width = (pic->picture_parameter.frame_width * 8 + pic->picture_parameter.superres_scale_denominator / 2) /
1143 pic->picture_parameter.superres_scale_denominator;
1144 result.superres_scale_denominator = pic->picture_parameter.superres_scale_denominator;
1145 } else {
1146 result.width = pic->picture_parameter.frame_width;
1147 result.superres_scale_denominator = pic->picture_parameter.superres_scale_denominator;
1148 }
1149 result.height = pic->picture_parameter.frame_height;
1150 result.superres_upscaled_width = pic->picture_parameter.frame_width;
1151 result.order_hint_bits = pic->picture_parameter.order_hint_bits_minus_1 + 1;
1152
1153 for (i = 0; i < NUM_AV1_REFS; ++i) {
1154 result.ref_frame_map[i] =
1155 (pic->ref[i]) ? (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base)
1156 : 0x7f;
1157 }
1158 for (i = 0; i < NUM_AV1_REFS_PER_FRAME; ++i)
1159 result.frame_refs[i] = result.ref_frame_map[pic->picture_parameter.ref_frame_idx[i]];
1160
1161 result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = pic->picture_parameter.bit_depth_idx << 1;
1162
1163 for (i = 0; i < 8; ++i) {
1164 for (j = 0; j < 8; ++j)
1165 result.feature_data[i][j] = pic->picture_parameter.seg_info.feature_data[i][j];
1166 result.feature_mask[i] = pic->picture_parameter.seg_info.feature_mask[i];
1167 }
1168 memcpy(dec->probs, &pic->picture_parameter.seg_info.feature_data, 128);
1169 memcpy((dec->probs + 128), &pic->picture_parameter.seg_info.feature_mask, 8);
1170
1171 result.cdef_damping = pic->picture_parameter.cdef_damping_minus_3 + 3;
1172 result.cdef_bits = pic->picture_parameter.cdef_bits;
1173 for (i = 0; i < 8; ++i) {
1174 result.cdef_strengths[i] = pic->picture_parameter.cdef_y_strengths[i];
1175 result.cdef_uv_strengths[i] = pic->picture_parameter.cdef_uv_strengths[i];
1176 }
1177 result.frame_restoration_type[0] = pic->picture_parameter.loop_restoration_fields.yframe_restoration_type;
1178 result.frame_restoration_type[1] = pic->picture_parameter.loop_restoration_fields.cbframe_restoration_type;
1179 result.frame_restoration_type[2] = pic->picture_parameter.loop_restoration_fields.crframe_restoration_type;
1180 for (i = 0; i < 3; ++i) {
1181 int log2_num = 0;
1182 int unit_size = pic->picture_parameter.lr_unit_size[i];
1183 if (unit_size) {
1184 while (unit_size >>= 1)
1185 log2_num++;
1186 result.log2_restoration_unit_size_minus5[i] = log2_num - 5;
1187 } else {
1188 result.log2_restoration_unit_size_minus5[i] = 0;
1189 }
1190 }
1191
1192 if (pic->picture_parameter.bit_depth_idx) {
1193 if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) {
1194 result.p010_mode = 1;
1195 result.msb_mode = 1;
1196 } else {
1197 result.luma_10to8 = 1;
1198 result.chroma_10to8 = 1;
1199 }
1200 }
1201
1202 result.preskip_segid = 0;
1203 result.last_active_segid = 0;
1204 for (i = 0; i < 8; i++) {
1205 for (j = 0; j < 8; j++) {
1206 if (pic->picture_parameter.seg_info.feature_mask[i] & (1 << j)) {
1207 result.last_active_segid = i;
1208 if (j >= 5)
1209 result.preskip_segid = 1;
1210 }
1211 }
1212 }
1213
1214 result.seg_lossless_flag = 0;
1215 for (i = 0; i < 8; ++i) {
1216 int av1_get_qindex, qindex;
1217 int segfeature_active = pic->picture_parameter.seg_info.feature_mask[i] & (1 << 0);
1218 if (segfeature_active) {
1219 int seg_qindex = pic->picture_parameter.base_qindex +
1220 pic->picture_parameter.seg_info.feature_data[i][0];
1221 av1_get_qindex = seg_qindex < 0 ? 0 : (seg_qindex > 255 ? 255 : seg_qindex);
1222 } else {
1223 av1_get_qindex = pic->picture_parameter.base_qindex;
1224 }
1225 qindex = pic->picture_parameter.seg_info.segment_info_fields.enabled ?
1226 av1_get_qindex :
1227 pic->picture_parameter.base_qindex;
1228 result.seg_lossless_flag |= (((qindex == 0) && result.y_dc_delta_q == 0 &&
1229 result.u_dc_delta_q == 0 && result.v_dc_delta_q == 0 &&
1230 result.u_ac_delta_q == 0 && result.v_ac_delta_q == 0) << i);
1231 }
1232
1233 rvcn_dec_film_grain_params_t* fg_params = &result.film_grain;
1234 fg_params->apply_grain = pic->picture_parameter.film_grain_info.film_grain_info_fields.apply_grain;
1235 if (fg_params->apply_grain) {
1236 rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)(dec->probs + 256);
1237
1238 fg_params->random_seed = pic->picture_parameter.film_grain_info.grain_seed;
1239 fg_params->grain_scale_shift =
1240 pic->picture_parameter.film_grain_info.film_grain_info_fields.grain_scale_shift;
1241 fg_params->scaling_shift =
1242 pic->picture_parameter.film_grain_info.film_grain_info_fields.grain_scaling_minus_8 + 8;
1243 fg_params->chroma_scaling_from_luma =
1244 pic->picture_parameter.film_grain_info.film_grain_info_fields.chroma_scaling_from_luma;
1245 fg_params->num_y_points = pic->picture_parameter.film_grain_info.num_y_points;
1246 fg_params->num_cb_points = pic->picture_parameter.film_grain_info.num_cb_points;
1247 fg_params->num_cr_points = pic->picture_parameter.film_grain_info.num_cr_points;
1248 fg_params->cb_mult = pic->picture_parameter.film_grain_info.cb_mult;
1249 fg_params->cb_luma_mult = pic->picture_parameter.film_grain_info.cb_luma_mult;
1250 fg_params->cb_offset = pic->picture_parameter.film_grain_info.cb_offset;
1251 fg_params->cr_mult = pic->picture_parameter.film_grain_info.cr_mult;
1252 fg_params->cr_luma_mult = pic->picture_parameter.film_grain_info.cr_luma_mult;
1253 fg_params->cr_offset = pic->picture_parameter.film_grain_info.cr_offset;
1254 fg_params->bit_depth_minus_8 = pic->picture_parameter.bit_depth_idx << 1;
1255
1256 for (i = 0; i < fg_params->num_y_points; ++i) {
1257 fg_params->scaling_points_y[i][0] = pic->picture_parameter.film_grain_info.point_y_value[i];
1258 fg_params->scaling_points_y[i][1] = pic->picture_parameter.film_grain_info.point_y_scaling[i];
1259 }
1260 for (i = 0; i < fg_params->num_cb_points; ++i) {
1261 fg_params->scaling_points_cb[i][0] = pic->picture_parameter.film_grain_info.point_cb_value[i];
1262 fg_params->scaling_points_cb[i][1] = pic->picture_parameter.film_grain_info.point_cb_scaling[i];
1263 }
1264 for (i = 0; i < fg_params->num_cr_points; ++i) {
1265 fg_params->scaling_points_cr[i][0] = pic->picture_parameter.film_grain_info.point_cr_value[i];
1266 fg_params->scaling_points_cr[i][1] = pic->picture_parameter.film_grain_info.point_cr_scaling[i];
1267 }
1268
1269 fg_params->ar_coeff_lag = pic->picture_parameter.film_grain_info.film_grain_info_fields.ar_coeff_lag;
1270 fg_params->ar_coeff_shift =
1271 pic->picture_parameter.film_grain_info.film_grain_info_fields.ar_coeff_shift_minus_6 + 6;
1272
1273 for (i = 0; i < 24; ++i)
1274 fg_params->ar_coeffs_y[i] = pic->picture_parameter.film_grain_info.ar_coeffs_y[i];
1275
1276 for (i = 0; i < 25; ++i) {
1277 fg_params->ar_coeffs_cb[i] = pic->picture_parameter.film_grain_info.ar_coeffs_cb[i];
1278 fg_params->ar_coeffs_cr[i] = pic->picture_parameter.film_grain_info.ar_coeffs_cr[i];
1279 }
1280
1281 fg_params->overlap_flag = pic->picture_parameter.film_grain_info.film_grain_info_fields.overlap_flag;
1282 fg_params->clip_to_restricted_range =
1283 pic->picture_parameter.film_grain_info.film_grain_info_fields.clip_to_restricted_range;
1284
1285 rvcn_av1_init_film_grain_buffer(fg_params, fg_buf);
1286 }
1287
1288 result.uncompressed_header_size = 0;
1289 for (i = 0; i < 7; ++i) {
1290 result.global_motion[i + 1].wmtype = (rvcn_dec_transformation_type_e)pic->picture_parameter.wm[i].wmtype;
1291 for (j = 0; j < 6; ++j)
1292 result.global_motion[i + 1].wmmat[j] = pic->picture_parameter.wm[i].wmmat[j];
1293 }
1294 for (i = 0; i < tile_count && i < 256; ++i) {
1295 result.tile_info[i].offset = pic->slice_parameter.slice_data_offset[i];
1296 result.tile_info[i].size = pic->slice_parameter.slice_data_size[i];
1297 }
1298
1299 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
1300 dec->ref_codec.bts = pic->picture_parameter.bit_depth_idx ? CODEC_10_BITS : CODEC_8_BITS;
1301 dec->ref_codec.index = result.curr_pic_idx;
1302 dec->ref_codec.ref_size = 8;
1303 memset(dec->ref_codec.ref_list, 0x7f, sizeof(dec->ref_codec.ref_list));
1304 memcpy(dec->ref_codec.ref_list, result.ref_frame_map, sizeof(result.ref_frame_map));
1305 }
1306
1307 return result;
1308 }
1309
rvcn_init_mode_probs(void * prob)1310 static void rvcn_init_mode_probs(void *prob)
1311 {
1312 rvcn_av1_frame_context_t * fc = (rvcn_av1_frame_context_t*)prob;
1313 int i;
1314
1315 memcpy(fc->palette_y_size_cdf, default_palette_y_size_cdf, sizeof(default_palette_y_size_cdf));
1316 memcpy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf, sizeof(default_palette_uv_size_cdf));
1317 memcpy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf, sizeof(default_palette_y_color_index_cdf));
1318 memcpy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf, sizeof(default_palette_uv_color_index_cdf));
1319 memcpy(fc->kf_y_cdf, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf));
1320 memcpy(fc->angle_delta_cdf, default_angle_delta_cdf, sizeof(default_angle_delta_cdf));
1321 memcpy(fc->comp_inter_cdf, default_comp_inter_cdf, sizeof(default_comp_inter_cdf));
1322 memcpy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf,sizeof(default_comp_ref_type_cdf));
1323 memcpy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf, sizeof(default_uni_comp_ref_cdf));
1324 memcpy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf, sizeof(default_palette_y_mode_cdf));
1325 memcpy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf, sizeof(default_palette_uv_mode_cdf));
1326 memcpy(fc->comp_ref_cdf, default_comp_ref_cdf, sizeof(default_comp_ref_cdf));
1327 memcpy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf, sizeof(default_comp_bwdref_cdf));
1328 memcpy(fc->single_ref_cdf, default_single_ref_cdf, sizeof(default_single_ref_cdf));
1329 memcpy(fc->txfm_partition_cdf, default_txfm_partition_cdf, sizeof(default_txfm_partition_cdf));
1330 memcpy(fc->compound_index_cdf, default_compound_idx_cdfs, sizeof(default_compound_idx_cdfs));
1331 memcpy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs, sizeof(default_comp_group_idx_cdfs));
1332 memcpy(fc->newmv_cdf, default_newmv_cdf, sizeof(default_newmv_cdf));
1333 memcpy(fc->zeromv_cdf, default_zeromv_cdf, sizeof(default_zeromv_cdf));
1334 memcpy(fc->refmv_cdf, default_refmv_cdf, sizeof(default_refmv_cdf));
1335 memcpy(fc->drl_cdf, default_drl_cdf, sizeof(default_drl_cdf));
1336 memcpy(fc->motion_mode_cdf, default_motion_mode_cdf, sizeof(default_motion_mode_cdf));
1337 memcpy(fc->obmc_cdf, default_obmc_cdf, sizeof(default_obmc_cdf));
1338 memcpy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf, sizeof(default_inter_compound_mode_cdf));
1339 memcpy(fc->compound_type_cdf, default_compound_type_cdf, sizeof(default_compound_type_cdf));
1340 memcpy(fc->wedge_idx_cdf, default_wedge_idx_cdf, sizeof(default_wedge_idx_cdf));
1341 memcpy(fc->interintra_cdf, default_interintra_cdf, sizeof(default_interintra_cdf));
1342 memcpy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf, sizeof(default_wedge_interintra_cdf));
1343 memcpy(fc->interintra_mode_cdf, default_interintra_mode_cdf, sizeof(default_interintra_mode_cdf));
1344 memcpy(fc->pred_cdf, default_segment_pred_cdf, sizeof(default_segment_pred_cdf));
1345 memcpy(fc->switchable_restore_cdf, default_switchable_restore_cdf, sizeof(default_switchable_restore_cdf));
1346 memcpy(fc->wiener_restore_cdf, default_wiener_restore_cdf, sizeof(default_wiener_restore_cdf));
1347 memcpy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf, sizeof(default_sgrproj_restore_cdf));
1348 memcpy(fc->y_mode_cdf, default_if_y_mode_cdf, sizeof(default_if_y_mode_cdf));
1349 memcpy(fc->uv_mode_cdf, default_uv_mode_cdf, sizeof(default_uv_mode_cdf));
1350 memcpy(fc->switchable_interp_cdf, default_switchable_interp_cdf, sizeof(default_switchable_interp_cdf));
1351 memcpy(fc->partition_cdf, default_partition_cdf, sizeof(default_partition_cdf));
1352 memcpy(fc->intra_ext_tx_cdf, default_intra_ext_tx_cdf, sizeof(default_intra_ext_tx_cdf));
1353 memcpy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf, sizeof(default_inter_ext_tx_cdf));
1354 memcpy(fc->skip_cdfs, default_skip_cdfs, sizeof(default_skip_cdfs));
1355 memcpy(fc->intra_inter_cdf, default_intra_inter_cdf, sizeof(default_intra_inter_cdf));
1356 memcpy(fc->tree_cdf, default_seg_tree_cdf, sizeof(default_seg_tree_cdf));
1357 for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i)
1358 memcpy(fc->spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i], sizeof(default_spatial_pred_seg_tree_cdf[i]));
1359 memcpy(fc->tx_size_cdf, default_tx_size_cdf, sizeof(default_tx_size_cdf));
1360 memcpy(fc->delta_q_cdf, default_delta_q_cdf, sizeof(default_delta_q_cdf));
1361 memcpy(fc->skip_mode_cdfs, default_skip_mode_cdfs, sizeof(default_skip_mode_cdfs));
1362 memcpy(fc->delta_lf_cdf, default_delta_lf_cdf, sizeof(default_delta_lf_cdf));
1363 memcpy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf, sizeof(default_delta_lf_multi_cdf));
1364 memcpy(fc->cfl_sign_cdf, default_cfl_sign_cdf, sizeof(default_cfl_sign_cdf));
1365 memcpy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf, sizeof(default_cfl_alpha_cdf));
1366 memcpy(fc->filter_intra_cdfs, default_filter_intra_cdfs, sizeof(default_filter_intra_cdfs));
1367 memcpy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf, sizeof(default_filter_intra_mode_cdf));
1368 memcpy(fc->intrabc_cdf, default_intrabc_cdf, sizeof(default_intrabc_cdf));
1369 }
1370
rvcn_vcn4_init_mode_probs(void * prob)1371 static void rvcn_vcn4_init_mode_probs(void *prob)
1372 {
1373 rvcn_av1_vcn4_frame_context_t * fc = (rvcn_av1_vcn4_frame_context_t*)prob;
1374 int i;
1375
1376 memcpy(fc->palette_y_size_cdf, default_palette_y_size_cdf, sizeof(default_palette_y_size_cdf));
1377 memcpy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf, sizeof(default_palette_uv_size_cdf));
1378 memcpy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf, sizeof(default_palette_y_color_index_cdf));
1379 memcpy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf, sizeof(default_palette_uv_color_index_cdf));
1380 memcpy(fc->kf_y_cdf, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf));
1381 memcpy(fc->angle_delta_cdf, default_angle_delta_cdf, sizeof(default_angle_delta_cdf));
1382 memcpy(fc->comp_inter_cdf, default_comp_inter_cdf, sizeof(default_comp_inter_cdf));
1383 memcpy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf,sizeof(default_comp_ref_type_cdf));
1384 memcpy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf, sizeof(default_uni_comp_ref_cdf));
1385 memcpy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf, sizeof(default_palette_y_mode_cdf));
1386 memcpy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf, sizeof(default_palette_uv_mode_cdf));
1387 memcpy(fc->comp_ref_cdf, default_comp_ref_cdf, sizeof(default_comp_ref_cdf));
1388 memcpy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf, sizeof(default_comp_bwdref_cdf));
1389 memcpy(fc->single_ref_cdf, default_single_ref_cdf, sizeof(default_single_ref_cdf));
1390 memcpy(fc->txfm_partition_cdf, default_txfm_partition_cdf, sizeof(default_txfm_partition_cdf));
1391 memcpy(fc->compound_index_cdf, default_compound_idx_cdfs, sizeof(default_compound_idx_cdfs));
1392 memcpy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs, sizeof(default_comp_group_idx_cdfs));
1393 memcpy(fc->newmv_cdf, default_newmv_cdf, sizeof(default_newmv_cdf));
1394 memcpy(fc->zeromv_cdf, default_zeromv_cdf, sizeof(default_zeromv_cdf));
1395 memcpy(fc->refmv_cdf, default_refmv_cdf, sizeof(default_refmv_cdf));
1396 memcpy(fc->drl_cdf, default_drl_cdf, sizeof(default_drl_cdf));
1397 memcpy(fc->motion_mode_cdf, default_motion_mode_cdf, sizeof(default_motion_mode_cdf));
1398 memcpy(fc->obmc_cdf, default_obmc_cdf, sizeof(default_obmc_cdf));
1399 memcpy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf, sizeof(default_inter_compound_mode_cdf));
1400 memcpy(fc->compound_type_cdf, default_compound_type_cdf, sizeof(default_compound_type_cdf));
1401 memcpy(fc->wedge_idx_cdf, default_wedge_idx_cdf, sizeof(default_wedge_idx_cdf));
1402 memcpy(fc->interintra_cdf, default_interintra_cdf, sizeof(default_interintra_cdf));
1403 memcpy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf, sizeof(default_wedge_interintra_cdf));
1404 memcpy(fc->interintra_mode_cdf, default_interintra_mode_cdf, sizeof(default_interintra_mode_cdf));
1405 memcpy(fc->pred_cdf, default_segment_pred_cdf, sizeof(default_segment_pred_cdf));
1406 memcpy(fc->switchable_restore_cdf, default_switchable_restore_cdf, sizeof(default_switchable_restore_cdf));
1407 memcpy(fc->wiener_restore_cdf, default_wiener_restore_cdf, sizeof(default_wiener_restore_cdf));
1408 memcpy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf, sizeof(default_sgrproj_restore_cdf));
1409 memcpy(fc->y_mode_cdf, default_if_y_mode_cdf, sizeof(default_if_y_mode_cdf));
1410 memcpy(fc->uv_mode_cdf, default_uv_mode_cdf, sizeof(default_uv_mode_cdf));
1411 memcpy(fc->switchable_interp_cdf, default_switchable_interp_cdf, sizeof(default_switchable_interp_cdf));
1412 memcpy(fc->partition_cdf, default_partition_cdf, sizeof(default_partition_cdf));
1413 memcpy(fc->intra_ext_tx_cdf, &default_intra_ext_tx_cdf[1], sizeof(default_intra_ext_tx_cdf[1]) * 2);
1414 memcpy(fc->inter_ext_tx_cdf, &default_inter_ext_tx_cdf[1], sizeof(default_inter_ext_tx_cdf[1]) * 3);
1415 memcpy(fc->skip_cdfs, default_skip_cdfs, sizeof(default_skip_cdfs));
1416 memcpy(fc->intra_inter_cdf, default_intra_inter_cdf, sizeof(default_intra_inter_cdf));
1417 memcpy(fc->tree_cdf, default_seg_tree_cdf, sizeof(default_seg_tree_cdf));
1418 for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i)
1419 memcpy(fc->spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i], sizeof(default_spatial_pred_seg_tree_cdf[i]));
1420 memcpy(fc->tx_size_cdf, default_tx_size_cdf, sizeof(default_tx_size_cdf));
1421 memcpy(fc->delta_q_cdf, default_delta_q_cdf, sizeof(default_delta_q_cdf));
1422 memcpy(fc->skip_mode_cdfs, default_skip_mode_cdfs, sizeof(default_skip_mode_cdfs));
1423 memcpy(fc->delta_lf_cdf, default_delta_lf_cdf, sizeof(default_delta_lf_cdf));
1424 memcpy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf, sizeof(default_delta_lf_multi_cdf));
1425 memcpy(fc->cfl_sign_cdf, default_cfl_sign_cdf, sizeof(default_cfl_sign_cdf));
1426 memcpy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf, sizeof(default_cfl_alpha_cdf));
1427 memcpy(fc->filter_intra_cdfs, default_filter_intra_cdfs, sizeof(default_filter_intra_cdfs));
1428 memcpy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf, sizeof(default_filter_intra_mode_cdf));
1429 memcpy(fc->intrabc_cdf, default_intrabc_cdf, sizeof(default_intrabc_cdf));
1430 }
1431
rvcn_av1_init_mv_probs(void * prob)1432 static void rvcn_av1_init_mv_probs(void *prob)
1433 {
1434 rvcn_av1_frame_context_t * fc = (rvcn_av1_frame_context_t*)prob;
1435
1436 memcpy(fc->nmvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1437 memcpy(fc->nmvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1438 memcpy(fc->nmvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, sizeof(default_nmv_context.comps[0].class0_cdf));
1439 memcpy(fc->nmvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1440 memcpy(fc->nmvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1441 memcpy(fc->nmvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, sizeof(default_nmv_context.comps[0].classes_cdf));
1442 memcpy(fc->nmvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1443 memcpy(fc->nmvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1444 memcpy(fc->nmvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1445 memcpy(fc->nmvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1446 memcpy(fc->nmvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, sizeof(default_nmv_context.comps[1].class0_cdf));
1447 memcpy(fc->nmvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1448 memcpy(fc->nmvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1449 memcpy(fc->nmvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, sizeof(default_nmv_context.comps[1].classes_cdf));
1450 memcpy(fc->nmvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1451 memcpy(fc->nmvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1452 memcpy(fc->nmvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1453 memcpy(fc->ndvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1454 memcpy(fc->ndvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1455 memcpy(fc->ndvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, sizeof(default_nmv_context.comps[0].class0_cdf));
1456 memcpy(fc->ndvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1457 memcpy(fc->ndvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1458 memcpy(fc->ndvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, sizeof(default_nmv_context.comps[0].classes_cdf));
1459 memcpy(fc->ndvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1460 memcpy(fc->ndvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1461 memcpy(fc->ndvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1462 memcpy(fc->ndvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1463 memcpy(fc->ndvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, sizeof(default_nmv_context.comps[1].class0_cdf));
1464 memcpy(fc->ndvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1465 memcpy(fc->ndvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1466 memcpy(fc->ndvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, sizeof(default_nmv_context.comps[1].classes_cdf));
1467 memcpy(fc->ndvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1468 memcpy(fc->ndvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1469 memcpy(fc->ndvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1470 }
1471
rvcn_vcn4_av1_init_mv_probs(void * prob)1472 static void rvcn_vcn4_av1_init_mv_probs(void *prob)
1473 {
1474 rvcn_av1_vcn4_frame_context_t * fc = (rvcn_av1_vcn4_frame_context_t*)prob;
1475
1476 memcpy(fc->nmvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1477 memcpy(fc->nmvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1478 memcpy(fc->nmvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, sizeof(default_nmv_context.comps[0].class0_cdf));
1479 memcpy(fc->nmvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1480 memcpy(fc->nmvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1481 memcpy(fc->nmvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, sizeof(default_nmv_context.comps[0].classes_cdf));
1482 memcpy(fc->nmvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1483 memcpy(fc->nmvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1484 memcpy(fc->nmvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1485 memcpy(fc->nmvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1486 memcpy(fc->nmvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, sizeof(default_nmv_context.comps[1].class0_cdf));
1487 memcpy(fc->nmvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1488 memcpy(fc->nmvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1489 memcpy(fc->nmvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, sizeof(default_nmv_context.comps[1].classes_cdf));
1490 memcpy(fc->nmvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1491 memcpy(fc->nmvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1492 memcpy(fc->nmvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1493 memcpy(fc->ndvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1494 memcpy(fc->ndvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1495 memcpy(fc->ndvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, sizeof(default_nmv_context.comps[0].class0_cdf));
1496 memcpy(fc->ndvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1497 memcpy(fc->ndvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1498 memcpy(fc->ndvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, sizeof(default_nmv_context.comps[0].classes_cdf));
1499 memcpy(fc->ndvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1500 memcpy(fc->ndvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1501 memcpy(fc->ndvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1502 memcpy(fc->ndvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1503 memcpy(fc->ndvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, sizeof(default_nmv_context.comps[1].class0_cdf));
1504 memcpy(fc->ndvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1505 memcpy(fc->ndvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1506 memcpy(fc->ndvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, sizeof(default_nmv_context.comps[1].classes_cdf));
1507 memcpy(fc->ndvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1508 memcpy(fc->ndvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1509 memcpy(fc->ndvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1510 }
1511
rvcn_av1_default_coef_probs(void * prob,int index)1512 static void rvcn_av1_default_coef_probs(void *prob, int index)
1513 {
1514 rvcn_av1_frame_context_t * fc = (rvcn_av1_frame_context_t*)prob;
1515
1516 memcpy(fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index], sizeof(av1_default_txb_skip_cdfs[index]));
1517 memcpy(fc->eob_extra_cdf, av1_default_eob_extra_cdfs[index], sizeof(av1_default_eob_extra_cdfs[index]));
1518 memcpy(fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index], sizeof(av1_default_dc_sign_cdfs[index]));
1519 memcpy(fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index], sizeof(av1_default_coeff_lps_multi_cdfs[index]));
1520 memcpy(fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index], sizeof(av1_default_coeff_base_multi_cdfs[index]));
1521 memcpy(fc->coeff_base_eob_cdf, av1_default_coeff_base_eob_multi_cdfs[index], sizeof(av1_default_coeff_base_eob_multi_cdfs[index]));
1522 memcpy(fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index], sizeof(av1_default_eob_multi16_cdfs[index]));
1523 memcpy(fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index], sizeof(av1_default_eob_multi32_cdfs[index]));
1524 memcpy(fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index], sizeof(av1_default_eob_multi64_cdfs[index]));
1525 memcpy(fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index], sizeof(av1_default_eob_multi128_cdfs[index]));
1526 memcpy(fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index], sizeof(av1_default_eob_multi256_cdfs[index]));
1527 memcpy(fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index], sizeof(av1_default_eob_multi512_cdfs[index]));
1528 memcpy(fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index], sizeof(av1_default_eob_multi1024_cdfs[index]));
1529 }
1530
rvcn_vcn4_av1_default_coef_probs(void * prob,int index)1531 static void rvcn_vcn4_av1_default_coef_probs(void *prob, int index)
1532 {
1533 rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t*)prob;
1534 void *p;
1535 int i, j;
1536 unsigned size;
1537
1538 memcpy(fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index], sizeof(av1_default_txb_skip_cdfs[index]));
1539
1540 p = (void *)fc->eob_extra_cdf;
1541 size = sizeof(av1_default_eob_extra_cdfs[0][0][0][0]) * EOB_COEF_CONTEXTS_VCN4;
1542 for (i = 0; i < AV1_TX_SIZES; i++) {
1543 for ( j = 0; j < AV1_PLANE_TYPES; j++) {
1544 memcpy(p, &av1_default_eob_extra_cdfs[index][i][j][3], size);
1545 p += size;
1546 }
1547 }
1548
1549 memcpy(fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index], sizeof(av1_default_dc_sign_cdfs[index]));
1550 memcpy(fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index], sizeof(av1_default_coeff_lps_multi_cdfs[index]));
1551 memcpy(fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index], sizeof(av1_default_coeff_base_multi_cdfs[index]));
1552 memcpy(fc->coeff_base_eob_cdf, av1_default_coeff_base_eob_multi_cdfs[index], sizeof(av1_default_coeff_base_eob_multi_cdfs[index]));
1553 memcpy(fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index], sizeof(av1_default_eob_multi16_cdfs[index]));
1554 memcpy(fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index], sizeof(av1_default_eob_multi32_cdfs[index]));
1555 memcpy(fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index], sizeof(av1_default_eob_multi64_cdfs[index]));
1556 memcpy(fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index], sizeof(av1_default_eob_multi128_cdfs[index]));
1557 memcpy(fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index], sizeof(av1_default_eob_multi256_cdfs[index]));
1558 memcpy(fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index], sizeof(av1_default_eob_multi512_cdfs[index]));
1559 memcpy(fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index], sizeof(av1_default_eob_multi1024_cdfs[index]));
1560 }
1561
calc_ctx_size_h265_main(struct radeon_decoder * dec)1562 static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec)
1563 {
1564 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
1565 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
1566
1567 unsigned max_references = dec->base.max_references + 1;
1568
1569 if (dec->base.width * dec->base.height >= 4096 * 2000)
1570 max_references = MAX2(max_references, 8);
1571 else
1572 max_references = MAX2(max_references, 17);
1573
1574 width = align(width, 16);
1575 height = align(height, 16);
1576 return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
1577 }
1578
calc_ctx_size_h265_main10(struct radeon_decoder * dec,struct pipe_h265_picture_desc * pic)1579 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec,
1580 struct pipe_h265_picture_desc *pic)
1581 {
1582 unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
1583 unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
1584 unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
1585
1586 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
1587 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
1588 unsigned coeff_10bit =
1589 (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
1590
1591 unsigned max_references = dec->base.max_references + 1;
1592
1593 if (dec->base.width * dec->base.height >= 4096 * 2000)
1594 max_references = MAX2(max_references, 8);
1595 else
1596 max_references = MAX2(max_references, 17);
1597
1598 log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 +
1599 pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
1600
1601 width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
1602 height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
1603
1604 num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
1605 context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
1606 max_mb_address = (unsigned)ceil(height * 8 / 2048.0);
1607
1608 cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
1609 db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
1610
1611 return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
1612 }
1613
get_vc1_msg(struct pipe_vc1_picture_desc * pic)1614 static rvcn_dec_message_vc1_t get_vc1_msg(struct pipe_vc1_picture_desc *pic)
1615 {
1616 rvcn_dec_message_vc1_t result;
1617
1618 memset(&result, 0, sizeof(result));
1619 switch (pic->base.profile) {
1620 case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
1621 result.profile = RDECODE_VC1_PROFILE_SIMPLE;
1622 result.level = 1;
1623 break;
1624
1625 case PIPE_VIDEO_PROFILE_VC1_MAIN:
1626 result.profile = RDECODE_VC1_PROFILE_MAIN;
1627 result.level = 2;
1628 break;
1629
1630 case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
1631 result.profile = RDECODE_VC1_PROFILE_ADVANCED;
1632 result.level = 4;
1633 break;
1634
1635 default:
1636 assert(0);
1637 }
1638
1639 result.sps_info_flags |= pic->postprocflag << 7;
1640 result.sps_info_flags |= pic->pulldown << 6;
1641 result.sps_info_flags |= pic->interlace << 5;
1642 result.sps_info_flags |= pic->tfcntrflag << 4;
1643 result.sps_info_flags |= pic->finterpflag << 3;
1644 result.sps_info_flags |= pic->psf << 1;
1645
1646 result.pps_info_flags |= pic->range_mapy_flag << 31;
1647 result.pps_info_flags |= pic->range_mapy << 28;
1648 result.pps_info_flags |= pic->range_mapuv_flag << 27;
1649 result.pps_info_flags |= pic->range_mapuv << 24;
1650 result.pps_info_flags |= pic->multires << 21;
1651 result.pps_info_flags |= pic->maxbframes << 16;
1652 result.pps_info_flags |= pic->overlap << 11;
1653 result.pps_info_flags |= pic->quantizer << 9;
1654 result.pps_info_flags |= pic->panscan_flag << 7;
1655 result.pps_info_flags |= pic->refdist_flag << 6;
1656 result.pps_info_flags |= pic->vstransform << 0;
1657
1658 if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
1659 result.pps_info_flags |= pic->syncmarker << 20;
1660 result.pps_info_flags |= pic->rangered << 19;
1661 result.pps_info_flags |= pic->loopfilter << 5;
1662 result.pps_info_flags |= pic->fastuvmc << 4;
1663 result.pps_info_flags |= pic->extended_mv << 3;
1664 result.pps_info_flags |= pic->extended_dmv << 8;
1665 result.pps_info_flags |= pic->dquant << 1;
1666 }
1667
1668 result.chroma_format = 1;
1669
1670 return result;
1671 }
1672
get_ref_pic_idx(struct radeon_decoder * dec,struct pipe_video_buffer * ref)1673 static uint32_t get_ref_pic_idx(struct radeon_decoder *dec, struct pipe_video_buffer *ref)
1674 {
1675 uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
1676 uint32_t max = MAX2(dec->frame_number, 1) - 1;
1677 uintptr_t frame;
1678
1679 /* seems to be the most sane fallback */
1680 if (!ref)
1681 return max;
1682
1683 /* get the frame number from the associated data */
1684 frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
1685
1686 /* limit the frame number to a valid range */
1687 return MAX2(MIN2(frame, max), min);
1688 }
1689
get_mpeg2_msg(struct radeon_decoder * dec,struct pipe_mpeg12_picture_desc * pic)1690 static rvcn_dec_message_mpeg2_vld_t get_mpeg2_msg(struct radeon_decoder *dec,
1691 struct pipe_mpeg12_picture_desc *pic)
1692 {
1693 const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
1694 rvcn_dec_message_mpeg2_vld_t result;
1695 unsigned i;
1696
1697 memset(&result, 0, sizeof(result));
1698 result.decoded_pic_idx = dec->frame_number;
1699
1700 result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]);
1701 result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]);
1702
1703 if (pic->intra_matrix) {
1704 result.load_intra_quantiser_matrix = 1;
1705 for (i = 0; i < 64; ++i) {
1706 result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
1707 }
1708 }
1709 if (pic->non_intra_matrix) {
1710 result.load_nonintra_quantiser_matrix = 1;
1711 for (i = 0; i < 64; ++i) {
1712 result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
1713 }
1714 }
1715
1716 result.profile_and_level_indication = 0;
1717 result.chroma_format = 0x1;
1718
1719 result.picture_coding_type = pic->picture_coding_type;
1720 result.f_code[0][0] = pic->f_code[0][0] + 1;
1721 result.f_code[0][1] = pic->f_code[0][1] + 1;
1722 result.f_code[1][0] = pic->f_code[1][0] + 1;
1723 result.f_code[1][1] = pic->f_code[1][1] + 1;
1724 result.intra_dc_precision = pic->intra_dc_precision;
1725 result.pic_structure = pic->picture_structure;
1726 result.top_field_first = pic->top_field_first;
1727 result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
1728 result.concealment_motion_vectors = pic->concealment_motion_vectors;
1729 result.q_scale_type = pic->q_scale_type;
1730 result.intra_vlc_format = pic->intra_vlc_format;
1731 result.alternate_scan = pic->alternate_scan;
1732
1733 return result;
1734 }
1735
get_mpeg4_msg(struct radeon_decoder * dec,struct pipe_mpeg4_picture_desc * pic)1736 static rvcn_dec_message_mpeg4_asp_vld_t get_mpeg4_msg(struct radeon_decoder *dec,
1737 struct pipe_mpeg4_picture_desc *pic)
1738 {
1739 rvcn_dec_message_mpeg4_asp_vld_t result;
1740 unsigned i;
1741
1742 memset(&result, 0, sizeof(result));
1743 result.decoded_pic_idx = dec->frame_number;
1744
1745 result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]);
1746 result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]);
1747
1748 result.variant_type = 0;
1749 result.profile_and_level_indication = 0xF0;
1750
1751 result.video_object_layer_verid = 0x5;
1752 result.video_object_layer_shape = 0x0;
1753
1754 result.video_object_layer_width = dec->base.width;
1755 result.video_object_layer_height = dec->base.height;
1756
1757 result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
1758
1759 result.short_video_header = pic->short_video_header;
1760 result.interlaced = pic->interlaced;
1761 result.load_intra_quant_mat = 1;
1762 result.load_nonintra_quant_mat = 1;
1763 result.quarter_sample = pic->quarter_sample;
1764 result.complexity_estimation_disable = 1;
1765 result.resync_marker_disable = pic->resync_marker_disable;
1766 result.newpred_enable = 0;
1767 result.reduced_resolution_vop_enable = 0;
1768
1769 result.quant_type = pic->quant_type;
1770
1771 for (i = 0; i < 64; ++i) {
1772 result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
1773 result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
1774 }
1775
1776 return result;
1777 }
1778
rvcn_dec_message_create(struct radeon_decoder * dec)1779 static void rvcn_dec_message_create(struct radeon_decoder *dec)
1780 {
1781 rvcn_dec_message_header_t *header = dec->msg;
1782 rvcn_dec_message_create_t *create = dec->msg + sizeof(rvcn_dec_message_header_t);
1783 unsigned sizes = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
1784
1785 memset(dec->msg, 0, sizes);
1786 header->header_size = sizeof(rvcn_dec_message_header_t);
1787 header->total_size = sizes;
1788 header->num_buffers = 1;
1789 header->msg_type = RDECODE_MSG_CREATE;
1790 header->stream_handle = dec->stream_handle;
1791 header->status_report_feedback_number = 0;
1792
1793 header->index[0].message_id = RDECODE_MESSAGE_CREATE;
1794 header->index[0].offset = sizeof(rvcn_dec_message_header_t);
1795 header->index[0].size = sizeof(rvcn_dec_message_create_t);
1796 header->index[0].filled = 0;
1797
1798 create->stream_type = dec->stream_type;
1799 create->session_flags = 0;
1800 create->width_in_samples = dec->base.width;
1801 create->height_in_samples = dec->base.height;
1802 }
1803
rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder * dec,rvcn_dec_message_decode_t * decode,rvcn_dec_message_dynamic_dpb_t2_t * dynamic_dpb_t2,bool encrypted)1804 static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn_dec_message_decode_t *decode,
1805 rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2, bool encrypted)
1806 {
1807 struct rvcn_dec_dynamic_dpb_t2 *dpb = NULL, *dummy = NULL;
1808 unsigned width, height, size;
1809 uint64_t addr;
1810 int i;
1811
1812 width = align(decode->width_in_samples, dec->db_alignment);
1813 height = align(decode->height_in_samples, dec->db_alignment);
1814 size = align((width * height * 3) / 2, 256);
1815 if (dec->ref_codec.bts == CODEC_10_BITS)
1816 size = size * 3 / 2;
1817
1818 list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
1819 for (i = 0; i < dec->ref_codec.ref_size; ++i) {
1820 if (((dec->ref_codec.ref_list[i] & 0x7f) != 0x7f) && (d->index == (dec->ref_codec.ref_list[i] & 0x7f))) {
1821 if (!dummy)
1822 dummy = d;
1823
1824 addr = dec->ws->buffer_get_virtual_address(d->dpb.res->buf);
1825 if (!addr && dummy) {
1826 RVID_ERR("Ref list from application is incorrect, using dummy buffer instead.\n");
1827 addr = dec->ws->buffer_get_virtual_address(dummy->dpb.res->buf);
1828 }
1829 dynamic_dpb_t2->dpbAddrLo[i] = addr;
1830 dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
1831 ++dynamic_dpb_t2->dpbArraySize;
1832 break;
1833 }
1834 }
1835 if (i == dec->ref_codec.ref_size) {
1836 if (d->dpb.res->b.b.width0 * d->dpb.res->b.b.height0 != size) {
1837 list_del(&d->list);
1838 list_addtail(&d->list, &dec->dpb_unref_list);
1839 } else {
1840 d->index = 0x7f;
1841 }
1842 }
1843 }
1844
1845 list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
1846 if (d->dpb.res->b.b.width0 * d->dpb.res->b.b.height0 == size && d->index == dec->ref_codec.index) {
1847 dpb = d;
1848 break;
1849 }
1850 }
1851
1852 if (!dpb) {
1853 list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
1854 if (d->index == 0x7f) {
1855 d->index = dec->ref_codec.index;
1856 dpb = d;
1857 break;
1858 }
1859 }
1860 }
1861
1862 list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_unref_list, list) {
1863 if (dec->prev_fence)
1864 dec->ws->fence_wait(dec->ws, dec->prev_fence, PIPE_DEFAULT_DECODER_FEEDBACK_TIMEOUT_NS);
1865 list_del(&d->list);
1866 si_vid_destroy_buffer(&d->dpb);
1867 FREE(d);
1868 }
1869
1870 if (!dpb) {
1871 bool r;
1872 dpb = CALLOC_STRUCT(rvcn_dec_dynamic_dpb_t2);
1873 if (!dpb)
1874 return 1;
1875 dpb->index = dec->ref_codec.index;
1876 if (encrypted)
1877 r = si_vid_create_tmz_buffer(dec->screen, &dpb->dpb, size, PIPE_USAGE_DEFAULT);
1878 else
1879 r = si_vid_create_buffer(dec->screen, &dpb->dpb, size, PIPE_USAGE_DEFAULT);
1880 assert(encrypted == (bool)(dpb->dpb.res->flags & RADEON_FLAG_ENCRYPTED));
1881
1882 if (!r) {
1883 RVID_ERR("Can't allocated dpb buffer.\n");
1884 FREE(dpb);
1885 return 1;
1886 }
1887 list_addtail(&dpb->list, &dec->dpb_ref_list);
1888 }
1889
1890 dec->ws->cs_add_buffer(&dec->cs, dpb->dpb.res->buf,
1891 RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM);
1892 addr = dec->ws->buffer_get_virtual_address(dpb->dpb.res->buf);
1893 dynamic_dpb_t2->dpbCurrLo = addr;
1894 dynamic_dpb_t2->dpbCurrHi = addr >> 32;
1895
1896 decode->decode_flags = 1;
1897 dynamic_dpb_t2->dpbConfigFlags = 0;
1898 dynamic_dpb_t2->dpbLumaPitch = align(decode->width_in_samples, dec->db_alignment);
1899 dynamic_dpb_t2->dpbLumaAlignedHeight = align(decode->height_in_samples, dec->db_alignment);
1900 dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaPitch *
1901 dynamic_dpb_t2->dpbLumaAlignedHeight;
1902 dynamic_dpb_t2->dpbChromaPitch = dynamic_dpb_t2->dpbLumaPitch >> 1;
1903 dynamic_dpb_t2->dpbChromaAlignedHeight = dynamic_dpb_t2->dpbLumaAlignedHeight >> 1;
1904 dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaPitch *
1905 dynamic_dpb_t2->dpbChromaAlignedHeight * 2;
1906
1907 if (dec->ref_codec.bts == CODEC_10_BITS) {
1908 dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaAlignedSize * 3 / 2;
1909 dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaAlignedSize * 3 / 2;
1910 }
1911
1912 return 0;
1913 }
1914
rvcn_dec_message_decode(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)1915 static struct pb_buffer_lean *rvcn_dec_message_decode(struct radeon_decoder *dec,
1916 struct pipe_video_buffer *target,
1917 struct pipe_picture_desc *picture)
1918 {
1919 DECRYPT_PARAMETERS *decrypt = (DECRYPT_PARAMETERS *)picture->decrypt_key;
1920 bool encrypted = picture->protected_playback;
1921 struct si_texture *luma;
1922 struct si_texture *chroma;
1923 struct pipe_video_buffer *out_surf = target;
1924 ASSERTED struct si_screen *sscreen = (struct si_screen *)dec->screen;
1925 rvcn_dec_message_header_t *header;
1926 rvcn_dec_message_index_t *index_codec;
1927 rvcn_dec_message_index_t *index_drm = NULL;
1928 rvcn_dec_message_index_t *index_dynamic_dpb = NULL;
1929 rvcn_dec_message_index_t *index_hevc_direct_reflist = NULL;
1930 rvcn_dec_message_decode_t *decode;
1931 unsigned sizes = 0, offset_decode, offset_codec;
1932 unsigned offset_drm = 0, offset_dynamic_dpb = 0, offset_hevc_direct_reflist = 0;
1933 void *codec;
1934 rvcn_dec_message_drm_t *drm = NULL;
1935 rvcn_dec_message_dynamic_dpb_t *dynamic_dpb = NULL;
1936 rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
1937 rvcn_dec_message_hevc_direct_ref_list_t *hevc_reflist = NULL;
1938 bool dpb_resize = false;
1939 header = dec->msg;
1940 sizes += sizeof(rvcn_dec_message_header_t);
1941
1942 index_codec = (void*)header + sizes;
1943 sizes += sizeof(rvcn_dec_message_index_t);
1944
1945 if (encrypted) {
1946 index_drm = (void*)header + sizes;
1947 sizes += sizeof(rvcn_dec_message_index_t);
1948 }
1949
1950 if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
1951 index_dynamic_dpb = (void*)header + sizes;
1952 sizes += sizeof(rvcn_dec_message_index_t);
1953 }
1954
1955 if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
1956 index_hevc_direct_reflist = (void*)header + sizes;
1957 sizes += sizeof(rvcn_dec_message_index_t);
1958 }
1959
1960 offset_decode = sizes;
1961 decode = (void*)header + sizes;
1962 sizes += sizeof(rvcn_dec_message_decode_t);
1963
1964 if (encrypted) {
1965 offset_drm = sizes;
1966 drm = (void*)header + sizes;
1967 sizes += sizeof(rvcn_dec_message_drm_t);
1968 }
1969
1970 if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
1971 offset_dynamic_dpb = sizes;
1972 if (dec->dpb_type == DPB_DYNAMIC_TIER_1) {
1973 dynamic_dpb = (void*)header + sizes;
1974 sizes += sizeof(rvcn_dec_message_dynamic_dpb_t);
1975 }
1976 else if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
1977 dynamic_dpb_t2 = (void*)header + sizes;
1978 sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
1979 }
1980 }
1981
1982 if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
1983 offset_hevc_direct_reflist = sizes;
1984 hevc_reflist = (void*)header + sizes;
1985 sizes += align((4 + 2 * 15 * ((struct pipe_h265_picture_desc *)picture)->slice_parameter.slice_count), 4);
1986 }
1987
1988 offset_codec = sizes;
1989 codec = (void*)header + sizes;
1990
1991 memset(dec->msg, 0, sizes);
1992 header->header_size = sizeof(rvcn_dec_message_header_t);
1993 header->total_size = sizes;
1994 header->msg_type = RDECODE_MSG_DECODE;
1995 header->stream_handle = dec->stream_handle;
1996 header->status_report_feedback_number = dec->frame_number;
1997
1998 header->index[0].message_id = RDECODE_MESSAGE_DECODE;
1999 header->index[0].offset = offset_decode;
2000 header->index[0].size = sizeof(rvcn_dec_message_decode_t);
2001 header->index[0].filled = 0;
2002 header->num_buffers = 1;
2003
2004 index_codec->offset = offset_codec;
2005 index_codec->size = sizeof(rvcn_dec_message_avc_t);
2006 index_codec->filled = 0;
2007 ++header->num_buffers;
2008
2009 if (encrypted) {
2010 index_drm->message_id = RDECODE_MESSAGE_DRM;
2011 index_drm->offset = offset_drm;
2012 index_drm->size = sizeof(rvcn_dec_message_drm_t);
2013 index_drm->filled = 0;
2014 ++header->num_buffers;
2015 }
2016
2017 if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
2018 index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
2019 index_dynamic_dpb->offset = offset_dynamic_dpb;
2020 index_dynamic_dpb->filled = 0;
2021 ++header->num_buffers;
2022 if (dec->dpb_type == DPB_DYNAMIC_TIER_1)
2023 index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t);
2024 else if (dec->dpb_type == DPB_DYNAMIC_TIER_2)
2025 index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2026 }
2027
2028 if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
2029 index_hevc_direct_reflist->message_id = RDECODE_MESSAGE_HEVC_DIRECT_REF_LIST;
2030 index_hevc_direct_reflist->offset = offset_hevc_direct_reflist;
2031 index_hevc_direct_reflist->size = align((4 + 2 * 15 * ((struct pipe_h265_picture_desc *)picture)->slice_parameter.slice_count), 4);
2032 index_hevc_direct_reflist->filled = 0;
2033 ++header->num_buffers;
2034 }
2035
2036 decode->stream_type = dec->stream_type;
2037 decode->decode_flags = 0;
2038 decode->width_in_samples = dec->base.width;
2039 decode->height_in_samples = dec->base.height;
2040
2041 decode->bsd_size = align(dec->bs_size, 128);
2042
2043 if (dec->dpb_type != DPB_DYNAMIC_TIER_2) {
2044 bool r;
2045 if (!dec->dpb.res && dec->dpb_size) {
2046 if (encrypted) {
2047 r = si_vid_create_tmz_buffer(dec->screen, &dec->dpb, dec->dpb_size, PIPE_USAGE_DEFAULT);
2048 } else {
2049 r = si_vid_create_buffer(dec->screen, &dec->dpb, dec->dpb_size, PIPE_USAGE_DEFAULT);
2050 }
2051 assert(encrypted == (bool)(dec->dpb.res->flags & RADEON_FLAG_ENCRYPTED));
2052 if (!r) {
2053 RVID_ERR("Can't allocated dpb.\n");
2054 return NULL;
2055 }
2056 si_vid_clear_buffer(dec->base.context, &dec->dpb);
2057 } else if (dec->dpb_type == DPB_DYNAMIC_TIER_1 && dec->dpb.res &&
2058 (dec->max_width < dec->base.width || dec->max_height < dec->base.height)) {
2059 struct rvid_buf_offset_info buf_offset_info;
2060
2061 buf_offset_info.num_units = (NUM_VP9_REFS + 1);
2062 buf_offset_info.old_offset = (align(dec->max_width, dec->db_alignment) *
2063 align(dec->max_height, dec->db_alignment) * 3 / 2);
2064 buf_offset_info.new_offset = (align(dec->base.width, dec->db_alignment) *
2065 align(dec->base.height, dec->db_alignment) * 3 / 2);
2066
2067 dec->dpb_size = calc_dpb_size(dec);
2068 r = si_vid_resize_buffer(dec->screen, &dec->cs, &dec->dpb, dec->dpb_size, &buf_offset_info);
2069 if (!r) {
2070 RVID_ERR("Can't resize dpb.\n");
2071 return NULL;
2072 }
2073 dec->max_width = dec->base.width;
2074 dec->max_height = dec->base.height;
2075 dpb_resize = true;
2076 }
2077 }
2078
2079 if (!dec->ctx.res) {
2080 enum pipe_video_format fmt = u_reduce_video_profile(picture->profile);
2081 if (dec->stream_type == RDECODE_CODEC_H264_PERF) {
2082 unsigned ctx_size = calc_ctx_size_h264_perf(dec);
2083 bool r;
2084 if (encrypted && dec->tmz_ctx) {
2085 r = si_vid_create_tmz_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2086 } else {
2087 r = si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2088 }
2089 assert((encrypted && dec->tmz_ctx) == (bool)(dec->ctx.res->flags & RADEON_FLAG_ENCRYPTED));
2090
2091 if (!r) {
2092 RVID_ERR("Can't allocated context buffer.\n");
2093 return NULL;
2094 }
2095 si_vid_clear_buffer(dec->base.context, &dec->ctx);
2096 } else if (fmt == PIPE_VIDEO_FORMAT_VP9) {
2097 unsigned ctx_size;
2098 uint8_t *ptr;
2099 bool r;
2100
2101 /* default probability + probability data */
2102 ctx_size = 2304 * 5;
2103
2104 if (((struct si_screen *)dec->screen)->info.vcn_ip_version >= VCN_2_0_0) {
2105 /* SRE collocated context data */
2106 ctx_size += 32 * 2 * 128 * 68;
2107 /* SMP collocated context data */
2108 ctx_size += 9 * 64 * 2 * 128 * 68;
2109 /* SDB left tile pixel */
2110 ctx_size += 8 * 2 * 2 * 8192;
2111 } else {
2112 ctx_size += 32 * 2 * 64 * 64;
2113 ctx_size += 9 * 64 * 2 * 64 * 64;
2114 ctx_size += 8 * 2 * 4096;
2115 }
2116
2117 if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
2118 ctx_size += 8 * 2 * 4096;
2119
2120 if (encrypted && dec->tmz_ctx) {
2121 r = si_vid_create_tmz_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2122 } else {
2123 r = si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2124 }
2125 if (!r) {
2126 RVID_ERR("Can't allocated context buffer.\n");
2127 return NULL;
2128 }
2129 si_vid_clear_buffer(dec->base.context, &dec->ctx);
2130
2131 /* ctx needs probs table */
2132 ptr = dec->ws->buffer_map(dec->ws, dec->ctx.res->buf, &dec->cs,
2133 PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2134 fill_probs_table(ptr);
2135 dec->ws->buffer_unmap(dec->ws, dec->ctx.res->buf);
2136 dec->bs_ptr = NULL;
2137 } else if (fmt == PIPE_VIDEO_FORMAT_HEVC) {
2138 unsigned ctx_size;
2139 bool r;
2140 if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
2141 ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc *)picture);
2142 else
2143 ctx_size = calc_ctx_size_h265_main(dec);
2144
2145 if (encrypted && dec->tmz_ctx) {
2146 r = si_vid_create_tmz_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2147 } else {
2148 r = si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2149 }
2150 if (!r) {
2151 RVID_ERR("Can't allocated context buffer.\n");
2152 return NULL;
2153 }
2154 si_vid_clear_buffer(dec->base.context, &dec->ctx);
2155 }
2156 }
2157 if (encrypted != dec->ws->cs_is_secure(&dec->cs)) {
2158 dec->ws->cs_flush(&dec->cs, RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL);
2159 }
2160
2161 if (dec->stream_type == RDECODE_CODEC_AV1)
2162 rvcn_dec_av1_film_grain_surface(&out_surf, (struct pipe_av1_picture_desc *)picture);
2163
2164 luma = (struct si_texture *)((struct vl_video_buffer *)out_surf)->resources[0];
2165 chroma = (struct si_texture *)((struct vl_video_buffer *)out_surf)->resources[1];
2166
2167 decode->dpb_size = (dec->dpb_type != DPB_DYNAMIC_TIER_2) ? dec->dpb.res->buf->size : 0;
2168
2169 /* When texture being created, the bo will be created with total size of planes,
2170 * and all planes point to the same buffer */
2171 assert(si_resource(((struct vl_video_buffer *)out_surf)->resources[0])->buf->size ==
2172 si_resource(((struct vl_video_buffer *)out_surf)->resources[1])->buf->size);
2173
2174 decode->dt_size = si_resource(((struct vl_video_buffer *)out_surf)->resources[0])->buf->size;
2175
2176 decode->sct_size = 0;
2177 decode->sc_coeff_size = 0;
2178
2179 decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
2180 decode->db_pitch = align(dec->base.width, dec->db_alignment);
2181
2182 if ((((struct si_screen*)dec->screen)->info.vcn_ip_version >= VCN_3_0_0) &&
2183 (dec->stream_type == RDECODE_CODEC_VP9 || dec->stream_type == RDECODE_CODEC_AV1 ||
2184 dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10))
2185 decode->db_aligned_height = align(dec->base.height, 64);
2186
2187 decode->db_surf_tile_config = 0;
2188 decode->db_array_mode = dec->addr_gfx_mode;
2189
2190 decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
2191 decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
2192
2193 if (luma->surface.meta_offset) {
2194 RVID_ERR("DCC surfaces not supported.\n");
2195 return NULL;
2196 }
2197
2198 decode->dt_tiling_mode = 0;
2199 decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
2200 decode->dt_array_mode = dec->addr_gfx_mode;
2201 decode->dt_field_mode = ((struct vl_video_buffer *)out_surf)->base.interlaced;
2202 decode->dt_surf_tile_config = 0;
2203 decode->dt_uv_surf_tile_config = 0;
2204
2205 decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
2206 decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
2207 if (decode->dt_field_mode) {
2208 decode->dt_luma_bottom_offset =
2209 luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
2210 decode->dt_chroma_bottom_offset =
2211 chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
2212 } else {
2213 decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
2214 decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
2215 }
2216 if (dec->stream_type == RDECODE_CODEC_AV1)
2217 decode->db_pitch_uv = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
2218
2219 if (encrypted) {
2220 assert(sscreen->info.has_tmz_support);
2221 set_drm_keys(drm, decrypt);
2222 }
2223
2224 if (dec->dpb_type == DPB_DYNAMIC_TIER_1) {
2225 decode->decode_flags |= (RDECODE_FLAGS_USE_DYNAMIC_DPB_MASK | RDECODE_FLAGS_USE_PAL_MASK);
2226 // Add decode flag for RESIZE_DPB ,when we do resize
2227 if (dpb_resize == true)
2228 decode->decode_flags |= RDECODE_FLAGS_DPB_RESIZE_MASK;
2229
2230 dynamic_dpb->dpbArraySize = NUM_VP9_REFS + 1;
2231 dynamic_dpb->dpbLumaPitch = align(dec->max_width, dec->db_alignment);
2232 dynamic_dpb->dpbLumaAlignedHeight = align(dec->max_height, dec->db_alignment);
2233 dynamic_dpb->dpbLumaAlignedSize =
2234 dynamic_dpb->dpbLumaPitch * dynamic_dpb->dpbLumaAlignedHeight;
2235 dynamic_dpb->dpbChromaPitch = dynamic_dpb->dpbLumaPitch >> 1;
2236 dynamic_dpb->dpbChromaAlignedHeight = dynamic_dpb->dpbLumaAlignedHeight >> 1;
2237 dynamic_dpb->dpbChromaAlignedSize =
2238 dynamic_dpb->dpbChromaPitch * dynamic_dpb->dpbChromaAlignedHeight * 2;
2239 dynamic_dpb->dpbReserved0[0] = dec->db_alignment;
2240
2241 if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) {
2242 dynamic_dpb->dpbLumaAlignedSize = dynamic_dpb->dpbLumaAlignedSize * 3 / 2;
2243 dynamic_dpb->dpbChromaAlignedSize = dynamic_dpb->dpbChromaAlignedSize * 3 / 2;
2244 }
2245 }
2246
2247 if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC)
2248 get_h265_reflist(hevc_reflist, (struct pipe_h265_picture_desc *)picture);
2249
2250 switch (u_reduce_video_profile(picture->profile)) {
2251 case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
2252 rvcn_dec_message_avc_t avc = get_h264_msg(dec, target, (struct pipe_h264_picture_desc *)picture);
2253 memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t));
2254 index_codec->message_id = RDECODE_MESSAGE_AVC;
2255 break;
2256 }
2257 case PIPE_VIDEO_FORMAT_HEVC: {
2258 rvcn_dec_message_hevc_t hevc =
2259 get_h265_msg(dec, target, (struct pipe_h265_picture_desc *)picture);
2260
2261 memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t));
2262 index_codec->message_id = RDECODE_MESSAGE_HEVC;
2263 break;
2264 }
2265 case PIPE_VIDEO_FORMAT_VC1: {
2266 rvcn_dec_message_vc1_t vc1 = get_vc1_msg((struct pipe_vc1_picture_desc *)picture);
2267
2268 memcpy(codec, (void *)&vc1, sizeof(rvcn_dec_message_vc1_t));
2269 if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
2270 (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
2271 decode->width_in_samples = align(decode->width_in_samples, 16) / 16;
2272 decode->height_in_samples = align(decode->height_in_samples, 16) / 16;
2273 }
2274 index_codec->message_id = RDECODE_MESSAGE_VC1;
2275 break;
2276 }
2277 case PIPE_VIDEO_FORMAT_MPEG12: {
2278 rvcn_dec_message_mpeg2_vld_t mpeg2 =
2279 get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc *)picture);
2280
2281 memcpy(codec, (void *)&mpeg2, sizeof(rvcn_dec_message_mpeg2_vld_t));
2282 index_codec->message_id = RDECODE_MESSAGE_MPEG2_VLD;
2283 break;
2284 }
2285 case PIPE_VIDEO_FORMAT_MPEG4: {
2286 rvcn_dec_message_mpeg4_asp_vld_t mpeg4 =
2287 get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc *)picture);
2288
2289 memcpy(codec, (void *)&mpeg4, sizeof(rvcn_dec_message_mpeg4_asp_vld_t));
2290 index_codec->message_id = RDECODE_MESSAGE_MPEG4_ASP_VLD;
2291 break;
2292 }
2293 case PIPE_VIDEO_FORMAT_VP9: {
2294 rvcn_dec_message_vp9_t vp9 =
2295 get_vp9_msg(dec, target, (struct pipe_vp9_picture_desc *)picture);
2296
2297 memcpy(codec, (void *)&vp9, sizeof(rvcn_dec_message_vp9_t));
2298 index_codec->message_id = RDECODE_MESSAGE_VP9;
2299 break;
2300 }
2301 case PIPE_VIDEO_FORMAT_AV1: {
2302 rvcn_dec_message_av1_t av1 =
2303 get_av1_msg(dec, target, (struct pipe_av1_picture_desc *)picture);
2304
2305 memcpy(codec, (void *)&av1, sizeof(rvcn_dec_message_av1_t));
2306 index_codec->message_id = RDECODE_MESSAGE_AV1;
2307
2308 if (dec->ctx.res == NULL) {
2309 unsigned frame_ctxt_size = dec->av1_version == RDECODE_AV1_VER_0
2310 ? align(sizeof(rvcn_av1_frame_context_t), 2048)
2311 : align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048);
2312
2313 unsigned ctx_size = (9 + 4) * frame_ctxt_size + 9 * 64 * 34 * 512 + 9 * 64 * 34 * 256 * 5;
2314 int num_64x64_CTB_8k = 68;
2315 int num_128x128_CTB_8k = 34;
2316 int sdb_pitch_64x64 = align(32 * num_64x64_CTB_8k, 256) * 2;
2317 int sdb_pitch_128x128 = align(32 * num_128x128_CTB_8k, 256) * 2;
2318 int sdb_lf_size_ctb_64x64 = sdb_pitch_64x64 * (align(1728, 64) / 64);
2319 int sdb_lf_size_ctb_128x128 = sdb_pitch_128x128 * (align(3008, 64) / 64);
2320 int sdb_superres_size_ctb_64x64 = sdb_pitch_64x64 * (align(3232, 64) / 64);
2321 int sdb_superres_size_ctb_128x128 = sdb_pitch_128x128 * (align(6208, 64) / 64);
2322 int sdb_output_size_ctb_64x64 = sdb_pitch_64x64 * (align(1312, 64) / 64);
2323 int sdb_output_size_ctb_128x128 = sdb_pitch_128x128 * (align(2336, 64) / 64);
2324 int sdb_fg_avg_luma_size_ctb_64x64 = sdb_pitch_64x64 * (align(384, 64) / 64);
2325 int sdb_fg_avg_luma_size_ctb_128x128 = sdb_pitch_128x128 * (align(640, 64) / 64);
2326 uint8_t *ptr;
2327 int i;
2328 struct rvcn_av1_prob_funcs prob;
2329
2330 if (dec->av1_version == RDECODE_AV1_VER_0) {
2331 prob.init_mode_probs = rvcn_init_mode_probs;
2332 prob.init_mv_probs = rvcn_av1_init_mv_probs;
2333 prob.default_coef_probs = rvcn_av1_default_coef_probs;
2334 } else {
2335 prob.init_mode_probs = rvcn_vcn4_init_mode_probs;
2336 prob.init_mv_probs = rvcn_vcn4_av1_init_mv_probs;
2337 prob.default_coef_probs = rvcn_vcn4_av1_default_coef_probs;
2338 }
2339
2340 ctx_size += (MAX2(sdb_lf_size_ctb_64x64, sdb_lf_size_ctb_128x128) +
2341 MAX2(sdb_superres_size_ctb_64x64, sdb_superres_size_ctb_128x128) +
2342 MAX2(sdb_output_size_ctb_64x64, sdb_output_size_ctb_128x128) +
2343 MAX2(sdb_fg_avg_luma_size_ctb_64x64, sdb_fg_avg_luma_size_ctb_128x128)) * 2 + 68 * 512;
2344
2345 if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
2346 RVID_ERR("Can't allocated context buffer.\n");
2347 si_vid_clear_buffer(dec->base.context, &dec->ctx);
2348
2349 ptr = dec->ws->buffer_map(dec->ws, dec->ctx.res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2350
2351 for (i = 0; i < 4; ++i) {
2352 prob.init_mode_probs((void*)(ptr + i * frame_ctxt_size));
2353 prob.init_mv_probs((void*)(ptr + i * frame_ctxt_size));
2354 prob.default_coef_probs((void*)(ptr + i * frame_ctxt_size), i);
2355 }
2356 dec->ws->buffer_unmap(dec->ws, dec->ctx.res->buf);
2357 }
2358
2359 break;
2360 }
2361 default:
2362 assert(0);
2363 return NULL;
2364 }
2365
2366 if (dec->ctx.res)
2367 decode->hw_ctxt_size = dec->ctx.res->buf->size;
2368
2369 if (dec->dpb_type == DPB_DYNAMIC_TIER_2)
2370 if (rvcn_dec_dynamic_dpb_t2_message(dec, decode, dynamic_dpb_t2, encrypted))
2371 return NULL;
2372
2373 return luma->buffer.buf;
2374 }
2375
rvcn_dec_message_destroy(struct radeon_decoder * dec)2376 static void rvcn_dec_message_destroy(struct radeon_decoder *dec)
2377 {
2378 rvcn_dec_message_header_t *header = dec->msg;
2379
2380 memset(dec->msg, 0, sizeof(rvcn_dec_message_header_t));
2381 header->header_size = sizeof(rvcn_dec_message_header_t);
2382 header->total_size = sizeof(rvcn_dec_message_header_t) - sizeof(rvcn_dec_message_index_t);
2383 header->num_buffers = 0;
2384 header->msg_type = RDECODE_MSG_DESTROY;
2385 header->stream_handle = dec->stream_handle;
2386 header->status_report_feedback_number = 0;
2387 }
2388
rvcn_dec_message_feedback(struct radeon_decoder * dec)2389 static void rvcn_dec_message_feedback(struct radeon_decoder *dec)
2390 {
2391 rvcn_dec_feedback_header_t *header = (void *)dec->fb;
2392
2393 header->header_size = sizeof(rvcn_dec_feedback_header_t);
2394 header->total_size = sizeof(rvcn_dec_feedback_header_t);
2395 header->num_buffers = 0;
2396 }
2397
rvcn_dec_sq_tail(struct radeon_decoder * dec)2398 static void rvcn_dec_sq_tail(struct radeon_decoder *dec)
2399 {
2400 if (dec->vcn_dec_sw_ring == false)
2401 return;
2402
2403 rvcn_sq_tail(&dec->cs, &dec->sq);
2404 }
2405 /* flush IB to the hardware */
flush(struct radeon_decoder * dec,unsigned flags,struct pipe_fence_handle ** fence)2406 static int flush(struct radeon_decoder *dec, unsigned flags,
2407 struct pipe_fence_handle **fence) {
2408 rvcn_dec_sq_tail(dec);
2409
2410 return dec->ws->cs_flush(&dec->cs, flags, fence);
2411 }
2412
2413 /* add a new set register command to the IB */
set_reg(struct radeon_decoder * dec,unsigned reg,uint32_t val)2414 static void set_reg(struct radeon_decoder *dec, unsigned reg, uint32_t val)
2415 {
2416 radeon_emit(&dec->cs, RDECODE_PKT0(reg >> 2, 0));
2417 radeon_emit(&dec->cs, val);
2418 }
2419
2420 /* send a command to the VCPU through the GPCOM registers */
send_cmd(struct radeon_decoder * dec,unsigned cmd,struct pb_buffer_lean * buf,uint32_t off,unsigned usage,enum radeon_bo_domain domain)2421 static void send_cmd(struct radeon_decoder *dec, unsigned cmd, struct pb_buffer_lean *buf, uint32_t off,
2422 unsigned usage, enum radeon_bo_domain domain)
2423 {
2424 uint64_t addr;
2425
2426 dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
2427 addr = dec->ws->buffer_get_virtual_address(buf);
2428 addr = addr + off;
2429
2430 if (dec->vcn_dec_sw_ring == false) {
2431 set_reg(dec, dec->reg.data0, addr);
2432 set_reg(dec, dec->reg.data1, addr >> 32);
2433 set_reg(dec, dec->reg.cmd, cmd << 1);
2434 return;
2435 }
2436
2437 if (!dec->cs.current.cdw) {
2438 rvcn_sq_header(&dec->cs, &dec->sq, false);
2439 rvcn_decode_ib_package_t *ib_header =
2440 (rvcn_decode_ib_package_t *)&(dec->cs.current.buf[dec->cs.current.cdw]);
2441
2442 ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
2443 sizeof(struct rvcn_decode_ib_package_s);
2444 dec->cs.current.cdw++;
2445 ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
2446 dec->cs.current.cdw++;
2447
2448 dec->decode_buffer =
2449 (rvcn_decode_buffer_t *)&(dec->cs.current.buf[dec->cs.current.cdw]);
2450
2451 dec->cs.current.cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
2452 memset(dec->decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
2453 }
2454
2455 switch(cmd) {
2456 case RDECODE_CMD_MSG_BUFFER:
2457 dec->decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
2458 dec->decode_buffer->msg_buffer_address_hi = (addr >> 32);
2459 dec->decode_buffer->msg_buffer_address_lo = (addr);
2460 break;
2461 case RDECODE_CMD_DPB_BUFFER:
2462 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
2463 dec->decode_buffer->dpb_buffer_address_hi = (addr >> 32);
2464 dec->decode_buffer->dpb_buffer_address_lo = (addr);
2465 break;
2466 case RDECODE_CMD_DECODING_TARGET_BUFFER:
2467 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
2468 dec->decode_buffer->target_buffer_address_hi = (addr >> 32);
2469 dec->decode_buffer->target_buffer_address_lo = (addr);
2470 break;
2471 case RDECODE_CMD_FEEDBACK_BUFFER:
2472 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
2473 dec->decode_buffer->feedback_buffer_address_hi = (addr >> 32);
2474 dec->decode_buffer->feedback_buffer_address_lo = (addr);
2475 break;
2476 case RDECODE_CMD_PROB_TBL_BUFFER:
2477 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
2478 dec->decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
2479 dec->decode_buffer->prob_tbl_buffer_address_lo = (addr);
2480 break;
2481 case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
2482 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
2483 dec->decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
2484 dec->decode_buffer->session_contex_buffer_address_lo = (addr);
2485 break;
2486 case RDECODE_CMD_BITSTREAM_BUFFER:
2487 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
2488 dec->decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
2489 dec->decode_buffer->bitstream_buffer_address_lo = (addr);
2490 break;
2491 case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
2492 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
2493 dec->decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
2494 dec->decode_buffer->it_sclr_table_buffer_address_lo = (addr);
2495 break;
2496 case RDECODE_CMD_CONTEXT_BUFFER:
2497 dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
2498 dec->decode_buffer->context_buffer_address_hi = (addr >> 32);
2499 dec->decode_buffer->context_buffer_address_lo = (addr);
2500 break;
2501 default:
2502 printf("Not Support!");
2503 }
2504 }
2505
2506 /* do the codec needs an IT buffer ?*/
have_it(struct radeon_decoder * dec)2507 static bool have_it(struct radeon_decoder *dec)
2508 {
2509 return dec->stream_type == RDECODE_CODEC_H264_PERF || dec->stream_type == RDECODE_CODEC_H265;
2510 }
2511
2512 /* do the codec needs an probs buffer? */
have_probs(struct radeon_decoder * dec)2513 static bool have_probs(struct radeon_decoder *dec)
2514 {
2515 return (dec->stream_type == RDECODE_CODEC_VP9 || dec->stream_type == RDECODE_CODEC_AV1);
2516 }
2517
2518 /* map the next available message/feedback/itscaling buffer */
map_msg_fb_it_probs_buf(struct radeon_decoder * dec)2519 static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec)
2520 {
2521 struct rvid_buffer *buf;
2522 uint8_t *ptr;
2523
2524 /* grab the current message/feedback buffer */
2525 buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
2526
2527 /* and map it for CPU access */
2528 ptr =
2529 dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2530
2531 /* calc buffer offsets */
2532 dec->msg = ptr;
2533
2534 dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
2535 if (have_it(dec))
2536 dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
2537 else if (have_probs(dec))
2538 dec->probs = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
2539 }
2540
2541 /* unmap and send a message command to the VCPU */
send_msg_buf(struct radeon_decoder * dec)2542 static void send_msg_buf(struct radeon_decoder *dec)
2543 {
2544 struct rvid_buffer *buf;
2545
2546 /* ignore the request if message/feedback buffer isn't mapped */
2547 if (!dec->msg || !dec->fb)
2548 return;
2549
2550 /* grab the current message buffer */
2551 buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
2552
2553 /* unmap the buffer */
2554 dec->ws->buffer_unmap(dec->ws, buf->res->buf);
2555 dec->bs_ptr = NULL;
2556 dec->msg = NULL;
2557 dec->fb = NULL;
2558 dec->it = NULL;
2559 dec->probs = NULL;
2560
2561 if (dec->sessionctx.res)
2562 send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER, dec->sessionctx.res->buf, 0,
2563 RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
2564
2565 /* and send it to the hardware */
2566 send_cmd(dec, RDECODE_CMD_MSG_BUFFER, buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
2567 }
2568
2569 /* cycle to the next set of buffers */
next_buffer(struct radeon_decoder * dec)2570 static void next_buffer(struct radeon_decoder *dec)
2571 {
2572 ++dec->cur_buffer;
2573 dec->cur_buffer %= dec->num_dec_bufs;
2574 }
2575
calc_ctx_size_h264_perf(struct radeon_decoder * dec)2576 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder *dec)
2577 {
2578 unsigned width_in_mb, height_in_mb, ctx_size;
2579 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
2580 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
2581
2582 unsigned max_references = dec->base.max_references + 1;
2583
2584 // picture width & height in 16 pixel units
2585 width_in_mb = width / VL_MACROBLOCK_WIDTH;
2586 height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
2587
2588 unsigned fs_in_mb = width_in_mb * height_in_mb;
2589 unsigned num_dpb_buffer_lean;
2590 switch (dec->base.level) {
2591 case 30:
2592 num_dpb_buffer_lean = 8100 / fs_in_mb;
2593 break;
2594 case 31:
2595 num_dpb_buffer_lean = 18000 / fs_in_mb;
2596 break;
2597 case 32:
2598 num_dpb_buffer_lean = 20480 / fs_in_mb;
2599 break;
2600 case 41:
2601 num_dpb_buffer_lean = 32768 / fs_in_mb;
2602 break;
2603 case 42:
2604 num_dpb_buffer_lean = 34816 / fs_in_mb;
2605 break;
2606 case 50:
2607 num_dpb_buffer_lean = 110400 / fs_in_mb;
2608 break;
2609 case 51:
2610 num_dpb_buffer_lean = 184320 / fs_in_mb;
2611 break;
2612 default:
2613 num_dpb_buffer_lean = 184320 / fs_in_mb;
2614 break;
2615 }
2616 num_dpb_buffer_lean++;
2617 max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer_lean), max_references);
2618 ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
2619
2620 return ctx_size;
2621 }
2622
2623 /* calculate size of reference picture buffer */
calc_dpb_size(struct radeon_decoder * dec)2624 static unsigned calc_dpb_size(struct radeon_decoder *dec)
2625 {
2626 unsigned width_in_mb, height_in_mb, image_size, dpb_size;
2627
2628 // always align them to MB size for dpb calculation
2629 unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
2630 unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
2631
2632 // always one more for currently decoded picture
2633 unsigned max_references = dec->base.max_references + 1;
2634
2635 // aligned size of a single frame
2636 image_size = align(width, 32) * height;
2637 image_size += image_size / 2;
2638 image_size = align(image_size, 1024);
2639
2640 // picture width & height in 16 pixel units
2641 width_in_mb = width / VL_MACROBLOCK_WIDTH;
2642 height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
2643
2644 switch (u_reduce_video_profile(dec->base.profile)) {
2645 case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
2646 unsigned fs_in_mb = width_in_mb * height_in_mb;
2647 unsigned num_dpb_buffer_lean;
2648
2649 switch (dec->base.level) {
2650 case 30:
2651 num_dpb_buffer_lean = 8100 / fs_in_mb;
2652 break;
2653 case 31:
2654 num_dpb_buffer_lean = 18000 / fs_in_mb;
2655 break;
2656 case 32:
2657 num_dpb_buffer_lean = 20480 / fs_in_mb;
2658 break;
2659 case 41:
2660 num_dpb_buffer_lean = 32768 / fs_in_mb;
2661 break;
2662 case 42:
2663 num_dpb_buffer_lean = 34816 / fs_in_mb;
2664 break;
2665 case 50:
2666 num_dpb_buffer_lean = 110400 / fs_in_mb;
2667 break;
2668 case 51:
2669 num_dpb_buffer_lean = 184320 / fs_in_mb;
2670 break;
2671 default:
2672 num_dpb_buffer_lean = 184320 / fs_in_mb;
2673 break;
2674 }
2675 num_dpb_buffer_lean++;
2676 max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer_lean), max_references);
2677 dpb_size = image_size * max_references;
2678 break;
2679 }
2680
2681 case PIPE_VIDEO_FORMAT_HEVC:
2682 if (dec->base.width * dec->base.height >= 4096 * 2000)
2683 max_references = MAX2(max_references, 8);
2684 else
2685 max_references = MAX2(max_references, 17);
2686
2687 width = align(width, 16);
2688 height = align(height, 16);
2689 if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
2690 dpb_size = align((align(width, 64) * align(height, 64) * 9) / 4, 256) * max_references;
2691 else
2692 dpb_size = align((align(width, 32) * height * 3) / 2, 256) * max_references;
2693 break;
2694
2695 case PIPE_VIDEO_FORMAT_VC1:
2696 // the firmware seems to always assume a minimum of ref frames
2697 max_references = MAX2(NUM_VC1_REFS, max_references);
2698
2699 // reference picture buffer
2700 dpb_size = image_size * max_references;
2701
2702 // CONTEXT_BUFFER
2703 dpb_size += width_in_mb * height_in_mb * 128;
2704
2705 // IT surface buffer
2706 dpb_size += width_in_mb * 64;
2707
2708 // DB surface buffer
2709 dpb_size += width_in_mb * 128;
2710
2711 // BP
2712 dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
2713 break;
2714
2715 case PIPE_VIDEO_FORMAT_MPEG12:
2716 // reference picture buffer, must be big enough for all frames
2717 dpb_size = image_size * NUM_MPEG2_REFS;
2718 break;
2719
2720 case PIPE_VIDEO_FORMAT_MPEG4:
2721 // reference picture buffer
2722 dpb_size = image_size * max_references;
2723
2724 // CM
2725 dpb_size += width_in_mb * height_in_mb * 64;
2726
2727 // IT surface buffer
2728 dpb_size += align(width_in_mb * height_in_mb * 32, 64);
2729
2730 dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
2731 break;
2732
2733 case PIPE_VIDEO_FORMAT_VP9:
2734 max_references = MAX2(max_references, 9);
2735
2736 if (dec->dpb_type == DPB_MAX_RES)
2737 dpb_size = (((struct si_screen *)dec->screen)->info.vcn_ip_version >= VCN_2_0_0)
2738 ? (8192 * 4320 * 3 / 2) * max_references
2739 : (4096 * 3000 * 3 / 2) * max_references;
2740 else
2741 dpb_size = (align(dec->base.width, dec->db_alignment) *
2742 align(dec->base.height, dec->db_alignment) * 3 / 2) * max_references;
2743
2744 if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
2745 dpb_size = dpb_size * 3 / 2;
2746 break;
2747
2748 case PIPE_VIDEO_FORMAT_AV1:
2749 max_references = MAX2(max_references, 9);
2750 dpb_size = 8192 * 4320 * 3 / 2 * max_references * 3 / 2;
2751 break;
2752
2753 case PIPE_VIDEO_FORMAT_JPEG:
2754 dpb_size = 0;
2755 break;
2756
2757 default:
2758 // something is missing here
2759 assert(0);
2760
2761 // at least use a sane default value
2762 dpb_size = 32 * 1024 * 1024;
2763 break;
2764 }
2765 return dpb_size;
2766 }
2767
2768 /**
2769 * destroy this video decoder
2770 */
radeon_dec_destroy(struct pipe_video_codec * decoder)2771 static void radeon_dec_destroy(struct pipe_video_codec *decoder)
2772 {
2773 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2774 unsigned i;
2775
2776 assert(decoder);
2777
2778 if (dec->stream_type != RDECODE_CODEC_JPEG) {
2779 map_msg_fb_it_probs_buf(dec);
2780 rvcn_dec_message_destroy(dec);
2781 send_msg_buf(dec);
2782 flush(dec, 0, &dec->destroy_fence);
2783 dec->ws->fence_wait(dec->ws, dec->destroy_fence, PIPE_DEFAULT_DECODER_FEEDBACK_TIMEOUT_NS);
2784 dec->ws->fence_reference(dec->ws, &dec->destroy_fence, NULL);
2785 }
2786
2787 dec->ws->fence_reference(dec->ws, &dec->prev_fence, NULL);
2788 dec->ws->cs_destroy(&dec->cs);
2789
2790 if (dec->stream_type == RDECODE_CODEC_JPEG) {
2791 for (i = 0; i < dec->njctx; i++) {
2792 dec->ws->cs_destroy(&dec->jcs[i]);
2793 dec->ws->ctx_destroy(dec->jctx[i]);
2794 }
2795 }
2796
2797 if (dec->msg_fb_it_probs_buffers && dec->bs_buffers) {
2798 for (i = 0; i < dec->num_dec_bufs; ++i) {
2799 si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]);
2800 si_vid_destroy_buffer(&dec->bs_buffers[i]);
2801 }
2802 FREE(dec->msg_fb_it_probs_buffers);
2803 FREE(dec->bs_buffers);
2804 }
2805 dec->num_dec_bufs = 0;
2806
2807 if (dec->dpb_type != DPB_DYNAMIC_TIER_2) {
2808 si_vid_destroy_buffer(&dec->dpb);
2809 } else {
2810 list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
2811 list_del(&d->list);
2812 si_vid_destroy_buffer(&d->dpb);
2813 FREE(d);
2814 }
2815 }
2816 si_vid_destroy_buffer(&dec->ctx);
2817 si_vid_destroy_buffer(&dec->sessionctx);
2818
2819 FREE(dec->jcs);
2820 FREE(dec->jctx);
2821 FREE(dec);
2822 }
2823
2824 /**
2825 * start decoding of a new frame
2826 */
radeon_dec_begin_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2827 static void radeon_dec_begin_frame(struct pipe_video_codec *decoder,
2828 struct pipe_video_buffer *target,
2829 struct pipe_picture_desc *picture)
2830 {
2831 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2832 uintptr_t frame;
2833
2834 assert(decoder);
2835
2836 frame = ++dec->frame_number;
2837 if (dec->stream_type != RDECODE_CODEC_VP9 && dec->stream_type != RDECODE_CODEC_AV1
2838 && dec->stream_type != RDECODE_CODEC_H264_PERF)
2839 vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
2840 &radeon_dec_destroy_associated_data);
2841
2842 dec->bs_size = 0;
2843 dec->bs_ptr = dec->ws->buffer_map(dec->ws, dec->bs_buffers[dec->cur_buffer].res->buf, &dec->cs,
2844 PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2845 }
2846
2847 /**
2848 * decode a macroblock
2849 */
radeon_dec_decode_macroblock(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,const struct pipe_macroblock * macroblocks,unsigned num_macroblocks)2850 static void radeon_dec_decode_macroblock(struct pipe_video_codec *decoder,
2851 struct pipe_video_buffer *target,
2852 struct pipe_picture_desc *picture,
2853 const struct pipe_macroblock *macroblocks,
2854 unsigned num_macroblocks)
2855 {
2856 /* not supported (yet) */
2857 assert(0);
2858 }
2859
2860 /**
2861 * decode a bitstream
2862 */
radeon_dec_decode_bitstream(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,unsigned num_buffers,const void * const * buffers,const unsigned * sizes)2863 static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,
2864 struct pipe_video_buffer *target,
2865 struct pipe_picture_desc *picture, unsigned num_buffers,
2866 const void *const *buffers, const unsigned *sizes)
2867 {
2868 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2869 unsigned i;
2870
2871 assert(decoder);
2872
2873 if (!dec->bs_ptr)
2874 return;
2875
2876 if (dec->bs_size && dec->stream_type == RDECODE_CODEC_AV1)
2877 return;
2878
2879 unsigned long total_bs_size = dec->bs_size;
2880 for (i = 0; i < num_buffers; ++i)
2881 total_bs_size += sizes[i];
2882
2883 struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
2884
2885 if (total_bs_size > buf->res->buf->size) {
2886 dec->ws->buffer_unmap(dec->ws, buf->res->buf);
2887 dec->bs_ptr = NULL;
2888 if (!si_vid_resize_buffer(dec->screen, &dec->cs, buf, total_bs_size, NULL)) {
2889 RVID_ERR("Can't resize bitstream buffer!");
2890 return;
2891 }
2892
2893 dec->bs_ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
2894 PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2895 if (!dec->bs_ptr)
2896 return;
2897
2898 dec->bs_ptr += dec->bs_size;
2899 }
2900
2901 for (i = 0; i < num_buffers; ++i) {
2902 memcpy(dec->bs_ptr, buffers[i], sizes[i]);
2903 dec->bs_size += sizes[i];
2904 dec->bs_ptr += sizes[i];
2905 }
2906 }
2907
2908 /**
2909 * send cmd for vcn dec
2910 */
send_cmd_dec(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2911 void send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target,
2912 struct pipe_picture_desc *picture)
2913 {
2914 struct pb_buffer_lean *dt;
2915 struct rvid_buffer *msg_fb_it_probs_buf, *bs_buf;
2916
2917 msg_fb_it_probs_buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
2918 bs_buf = &dec->bs_buffers[dec->cur_buffer];
2919
2920 memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size);
2921 dec->ws->buffer_unmap(dec->ws, bs_buf->res->buf);
2922 dec->bs_ptr = NULL;
2923
2924 map_msg_fb_it_probs_buf(dec);
2925 dt = rvcn_dec_message_decode(dec, target, picture);
2926 rvcn_dec_message_feedback(dec);
2927 send_msg_buf(dec);
2928
2929 if (dec->dpb_type != DPB_DYNAMIC_TIER_2)
2930 send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE,
2931 RADEON_DOMAIN_VRAM);
2932 if (dec->ctx.res)
2933 send_cmd(dec, RDECODE_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, RADEON_USAGE_READWRITE,
2934 RADEON_DOMAIN_VRAM);
2935 send_cmd(dec, RDECODE_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 0, RADEON_USAGE_READ,
2936 RADEON_DOMAIN_GTT);
2937 send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
2938 send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_probs_buf->res->buf, FB_BUFFER_OFFSET,
2939 RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
2940 if (have_it(dec))
2941 send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_probs_buf->res->buf,
2942 FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
2943 else if (have_probs(dec))
2944 send_cmd(dec, RDECODE_CMD_PROB_TBL_BUFFER, msg_fb_it_probs_buf->res->buf,
2945 FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
2946
2947 if (dec->vcn_dec_sw_ring == false)
2948 set_reg(dec, dec->reg.cntl, 1);
2949 }
2950
2951 /**
2952 * end decoding of the current frame
2953 */
radeon_dec_end_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2954 static void radeon_dec_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,
2955 struct pipe_picture_desc *picture)
2956 {
2957 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2958
2959 assert(decoder);
2960
2961 if (!dec->bs_ptr)
2962 return;
2963
2964 dec->send_cmd(dec, target, picture);
2965 flush(dec, PIPE_FLUSH_ASYNC, picture->fence);
2966 if (picture->fence)
2967 dec->ws->fence_reference(dec->ws, &dec->prev_fence, *picture->fence);
2968 next_buffer(dec);
2969 }
2970
2971 /**
2972 * end decoding of the current jpeg frame
2973 */
radeon_dec_jpeg_end_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2974 static void radeon_dec_jpeg_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,
2975 struct pipe_picture_desc *picture)
2976 {
2977 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2978 struct pipe_mjpeg_picture_desc *pic = (struct pipe_mjpeg_picture_desc *)picture;
2979
2980 assert(decoder);
2981
2982 if (!dec->bs_ptr)
2983 return;
2984
2985 dec->jpg.crop_x = ROUND_DOWN_TO(pic->picture_parameter.crop_x, VL_MACROBLOCK_WIDTH);
2986 dec->jpg.crop_y = ROUND_DOWN_TO(pic->picture_parameter.crop_y, VL_MACROBLOCK_HEIGHT);
2987 dec->jpg.crop_width = align(pic->picture_parameter.crop_width, VL_MACROBLOCK_WIDTH);
2988 dec->jpg.crop_height = align(pic->picture_parameter.crop_height, VL_MACROBLOCK_HEIGHT);
2989 if (dec->jpg.crop_x + dec->jpg.crop_width > pic->picture_parameter.picture_width)
2990 dec->jpg.crop_width = 0;
2991 if (dec->jpg.crop_y + dec->jpg.crop_height > pic->picture_parameter.picture_height)
2992 dec->jpg.crop_height = 0;
2993 dec->send_cmd(dec, target, picture);
2994 dec->ws->cs_flush(&dec->jcs[dec->cb_idx], PIPE_FLUSH_ASYNC, NULL);
2995 next_buffer(dec);
2996 dec->cb_idx = (dec->cb_idx+1) % dec->njctx;
2997 }
2998
2999 /**
3000 * flush any outstanding command buffers to the hardware
3001 */
radeon_dec_flush(struct pipe_video_codec * decoder)3002 static void radeon_dec_flush(struct pipe_video_codec *decoder)
3003 {
3004 }
3005
radeon_dec_get_decoder_fence(struct pipe_video_codec * decoder,struct pipe_fence_handle * fence,uint64_t timeout)3006 static int radeon_dec_get_decoder_fence(struct pipe_video_codec *decoder,
3007 struct pipe_fence_handle *fence,
3008 uint64_t timeout) {
3009
3010 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
3011
3012 return dec->ws->fence_wait(dec->ws, fence, timeout);
3013 }
3014
radeon_dec_destroy_fence(struct pipe_video_codec * decoder,struct pipe_fence_handle * fence)3015 static void radeon_dec_destroy_fence(struct pipe_video_codec *decoder,
3016 struct pipe_fence_handle *fence)
3017 {
3018 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
3019
3020 dec->ws->fence_reference(dec->ws, &fence, NULL);
3021 }
3022
3023 /**
3024 * update render list when target buffer got updated, use the existing
3025 * index and update the new buffer to associate with it.
3026 */
radeon_dec_update_render_list(struct pipe_video_codec * decoder,struct pipe_video_buffer * old,struct pipe_video_buffer * updated)3027 static void radeon_dec_update_render_list(struct pipe_video_codec *decoder,
3028 struct pipe_video_buffer *old,
3029 struct pipe_video_buffer *updated)
3030 {
3031 struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
3032 void *index = vl_video_buffer_get_associated_data(old, decoder);
3033
3034 vl_video_buffer_set_associated_data(updated, decoder, index,
3035 old->destroy_associated_data);
3036 for (int i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
3037 if (dec->render_pic_list[i] == old) {
3038 dec->render_pic_list[i] = updated;
3039 break;
3040 }
3041 }
3042 }
3043 /**
3044 * create and HW decoder
3045 */
radeon_create_decoder(struct pipe_context * context,const struct pipe_video_codec * templ)3046 struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
3047 const struct pipe_video_codec *templ)
3048 {
3049 struct si_context *sctx = (struct si_context *)context;
3050 struct radeon_winsys *ws = sctx->ws;
3051 unsigned width = templ->width, height = templ->height;
3052 unsigned bs_buf_size, stream_type = 0, ring = AMD_IP_VCN_DEC;
3053 struct radeon_decoder *dec;
3054 int r, i;
3055
3056 switch (u_reduce_video_profile(templ->profile)) {
3057 case PIPE_VIDEO_FORMAT_MPEG12:
3058 if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
3059 return vl_create_mpeg12_decoder(context, templ);
3060 stream_type = RDECODE_CODEC_MPEG2_VLD;
3061 break;
3062 case PIPE_VIDEO_FORMAT_MPEG4:
3063 width = align(width, VL_MACROBLOCK_WIDTH);
3064 height = align(height, VL_MACROBLOCK_HEIGHT);
3065 stream_type = RDECODE_CODEC_MPEG4;
3066 break;
3067 case PIPE_VIDEO_FORMAT_VC1:
3068 stream_type = RDECODE_CODEC_VC1;
3069 break;
3070 case PIPE_VIDEO_FORMAT_MPEG4_AVC:
3071 width = align(width, VL_MACROBLOCK_WIDTH);
3072 height = align(height, VL_MACROBLOCK_HEIGHT);
3073 stream_type = RDECODE_CODEC_H264_PERF;
3074 break;
3075 case PIPE_VIDEO_FORMAT_HEVC:
3076 stream_type = RDECODE_CODEC_H265;
3077 break;
3078 case PIPE_VIDEO_FORMAT_VP9:
3079 stream_type = RDECODE_CODEC_VP9;
3080 break;
3081 case PIPE_VIDEO_FORMAT_AV1:
3082 stream_type = RDECODE_CODEC_AV1;
3083 break;
3084 case PIPE_VIDEO_FORMAT_JPEG:
3085 stream_type = RDECODE_CODEC_JPEG;
3086 ring = AMD_IP_VCN_JPEG;
3087 break;
3088 default:
3089 assert(0);
3090 break;
3091 }
3092
3093 dec = CALLOC_STRUCT(radeon_decoder);
3094
3095 if (!dec)
3096 return NULL;
3097
3098 dec->base = *templ;
3099 dec->base.context = context;
3100 dec->base.width = width;
3101 dec->base.height = height;
3102 dec->max_width = width;
3103 dec->max_height = height;
3104 dec->base.destroy = radeon_dec_destroy;
3105 dec->base.begin_frame = radeon_dec_begin_frame;
3106 dec->base.decode_macroblock = radeon_dec_decode_macroblock;
3107 dec->base.decode_bitstream = radeon_dec_decode_bitstream;
3108 dec->base.end_frame = radeon_dec_end_frame;
3109 dec->base.flush = radeon_dec_flush;
3110 dec->base.get_decoder_fence = radeon_dec_get_decoder_fence;
3111 dec->base.destroy_fence = radeon_dec_destroy_fence;
3112 dec->base.update_decoder_target = radeon_dec_update_render_list;
3113
3114 dec->stream_type = stream_type;
3115 dec->stream_handle = si_vid_alloc_stream_handle();
3116 dec->screen = context->screen;
3117 dec->ws = ws;
3118
3119 if (u_reduce_video_profile(templ->profile) != PIPE_VIDEO_FORMAT_JPEG &&
3120 (sctx->vcn_ip_ver >= VCN_4_0_0)) {
3121 dec->vcn_dec_sw_ring = true;
3122 ring = AMD_IP_VCN_UNIFIED;
3123 }
3124
3125 dec->sq.ib_total_size_in_dw = NULL;
3126 dec->sq.ib_checksum = NULL;
3127
3128 if (!ws->cs_create(&dec->cs, sctx->ctx, ring, NULL, NULL)) {
3129 RVID_ERR("Can't get command submission context.\n");
3130 goto error;
3131 }
3132
3133 if (dec->stream_type == RDECODE_CODEC_JPEG) {
3134
3135 if (sctx->vcn_ip_ver == VCN_2_5_0 || sctx->vcn_ip_ver == VCN_2_6_0)
3136 dec->njctx = 2;
3137 else if (sctx->vcn_ip_ver == VCN_4_0_3)
3138 dec->njctx = 24;
3139 else
3140 dec->njctx = 1;
3141
3142 dec->jctx = (struct radeon_winsys_ctx **) CALLOC(dec->njctx,
3143 sizeof(struct radeon_winsys_ctx *));
3144 dec->jcs = (struct radeon_cmdbuf *) CALLOC(dec->njctx, sizeof(struct radeon_cmdbuf));
3145 if(!dec->jctx || !dec->jcs)
3146 goto err;
3147 for (i = 0; i < dec->njctx; i++) {
3148 /* Initialize the context handle and the command stream. */
3149 dec->jctx[i] = dec->ws->ctx_create(dec->ws, RADEON_CTX_PRIORITY_MEDIUM,
3150 sctx->context_flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET);
3151 if (!sctx->ctx)
3152 goto error;
3153 if (!dec->ws->cs_create(&dec->jcs[i], dec->jctx[i], ring, NULL, NULL)) {
3154 RVID_ERR("Can't get additional command submission context for mJPEG.\n");
3155 goto error;
3156 }
3157 }
3158 dec->base.end_frame = radeon_dec_jpeg_end_frame;
3159 dec->cb_idx = 0;
3160 }
3161
3162 for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++)
3163 dec->render_pic_list[i] = NULL;
3164
3165 if ((sctx->vcn_ip_ver >= VCN_3_0_0) && (stream_type == RDECODE_CODEC_H264_PERF)) {
3166 for (i = 0; i < ARRAY_SIZE(dec->h264_valid_ref_num); i++)
3167 dec->h264_valid_ref_num[i] = (unsigned) -1;
3168 for (i = 0; i < ARRAY_SIZE(dec->h264_valid_poc_num); i++)
3169 dec->h264_valid_poc_num[i] = (unsigned) -1;
3170 }
3171
3172 if (dec->stream_type == RDECODE_CODEC_JPEG) {
3173 if (sctx->vcn_ip_ver == VCN_4_0_3)
3174 dec->num_dec_bufs = dec->njctx;
3175 else
3176 dec->num_dec_bufs = dec->njctx * NUM_BUFFERS;
3177 } else
3178 dec->num_dec_bufs = NUM_BUFFERS;
3179
3180 bs_buf_size = align(width * height / 32, 128);
3181 dec->msg_fb_it_probs_buffers = (struct rvid_buffer *) CALLOC(dec->num_dec_bufs, sizeof(struct rvid_buffer));
3182 dec->bs_buffers = (struct rvid_buffer *) CALLOC(dec->num_dec_bufs, sizeof(struct rvid_buffer));
3183 if(!dec->msg_fb_it_probs_buffers || !dec->bs_buffers)
3184 goto error;
3185
3186 for (i = 0; i < dec->num_dec_bufs; ++i) {
3187 unsigned msg_fb_it_probs_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
3188 if (have_it(dec))
3189 msg_fb_it_probs_size += IT_SCALING_TABLE_SIZE;
3190 else if (have_probs(dec))
3191 msg_fb_it_probs_size += (dec->stream_type == RDECODE_CODEC_VP9) ?
3192 VP9_PROBS_TABLE_SIZE :
3193 sizeof(rvcn_dec_av1_segment_fg_t);
3194 /* use vram to improve performance, workaround an unknown bug */
3195 if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_probs_buffers[i], msg_fb_it_probs_size,
3196 PIPE_USAGE_DEFAULT)) {
3197 RVID_ERR("Can't allocated message buffers.\n");
3198 goto error;
3199 }
3200
3201 if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], bs_buf_size,
3202 PIPE_USAGE_STAGING)) {
3203 RVID_ERR("Can't allocated bitstream buffers.\n");
3204 goto error;
3205 }
3206
3207 si_vid_clear_buffer(context, &dec->msg_fb_it_probs_buffers[i]);
3208 si_vid_clear_buffer(context, &dec->bs_buffers[i]);
3209
3210 if (have_probs(dec) && dec->stream_type == RDECODE_CODEC_VP9) {
3211 struct rvid_buffer *buf;
3212 void *ptr;
3213
3214 buf = &dec->msg_fb_it_probs_buffers[i];
3215 ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
3216 PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
3217 ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
3218 fill_probs_table(ptr);
3219 dec->ws->buffer_unmap(dec->ws, buf->res->buf);
3220 dec->bs_ptr = NULL;
3221 }
3222 }
3223
3224 if ((sctx->vcn_ip_ver >= VCN_3_0_0) &&
3225 (stream_type == RDECODE_CODEC_VP9 ||
3226 stream_type == RDECODE_CODEC_AV1 ||
3227 ((stream_type == RDECODE_CODEC_H265) && templ->expect_chunked_decode) ||
3228 ((stream_type == RDECODE_CODEC_H264_PERF) && templ->expect_chunked_decode)))
3229 dec->dpb_type = DPB_DYNAMIC_TIER_2;
3230 else if (sctx->vcn_ip_ver <= VCN_2_6_0 && stream_type == RDECODE_CODEC_VP9)
3231 dec->dpb_type = DPB_DYNAMIC_TIER_1;
3232 else
3233 dec->dpb_type = DPB_MAX_RES;
3234
3235 dec->db_alignment = (((struct si_screen *)dec->screen)->info.vcn_ip_version >= VCN_2_0_0 &&
3236 dec->base.width > 32 && (dec->stream_type == RDECODE_CODEC_VP9 ||
3237 dec->stream_type == RDECODE_CODEC_AV1 ||
3238 dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)) ? 64 : 32;
3239
3240 dec->dpb_size = calc_dpb_size(dec);
3241
3242 if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, RDECODE_SESSION_CONTEXT_SIZE,
3243 PIPE_USAGE_DEFAULT)) {
3244 RVID_ERR("Can't allocated session ctx.\n");
3245 goto error;
3246 }
3247 si_vid_clear_buffer(context, &dec->sessionctx);
3248
3249 dec->addr_gfx_mode = RDECODE_ARRAY_MODE_LINEAR;
3250 dec->av1_version = RDECODE_AV1_VER_0;
3251
3252 switch (sctx->vcn_ip_ver) {
3253 case VCN_1_0_0:
3254 case VCN_1_0_1:
3255 dec->reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0;
3256 dec->reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1;
3257 dec->reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD;
3258 dec->reg.cntl = RDECODE_VCN1_ENGINE_CNTL;
3259 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V1;
3260 break;
3261 case VCN_2_0_0:
3262 case VCN_2_0_2:
3263 case VCN_2_0_3:
3264 case VCN_2_2_0:
3265 dec->reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0;
3266 dec->reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1;
3267 dec->reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD;
3268 dec->reg.cntl = RDECODE_VCN2_ENGINE_CNTL;
3269 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V2;
3270 break;
3271 case VCN_2_5_0:
3272 case VCN_2_6_0:
3273 case VCN_3_0_0:
3274 case VCN_3_0_2:
3275 case VCN_3_0_16:
3276 case VCN_3_0_33:
3277 case VCN_3_1_1:
3278 case VCN_3_1_2:
3279 dec->reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0;
3280 dec->reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1;
3281 dec->reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
3282 dec->reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
3283 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V2;
3284 break;
3285 case VCN_4_0_3:
3286 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V3;
3287 dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
3288 dec->av1_version = RDECODE_AV1_VER_1;
3289 break;
3290 case VCN_4_0_0:
3291 case VCN_4_0_2:
3292 case VCN_4_0_4:
3293 case VCN_4_0_5:
3294 dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V2;
3295 dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
3296 dec->av1_version = RDECODE_AV1_VER_1;
3297 break;
3298 default:
3299 RVID_ERR("VCN is not supported.\n");
3300 goto error;
3301 }
3302
3303 if (dec->stream_type != RDECODE_CODEC_JPEG) {
3304 map_msg_fb_it_probs_buf(dec);
3305 rvcn_dec_message_create(dec);
3306 send_msg_buf(dec);
3307 r = flush(dec, 0, NULL);
3308 if (r)
3309 goto error;
3310 } else if (dec->jpg_reg.version != RDECODE_JPEG_REG_VER_V1) {
3311 dec->jpg_reg.jrbc_ib_cond_rd_timer = vcnipUVD_JRBC_IB_COND_RD_TIMER;
3312 dec->jpg_reg.jrbc_ib_ref_data = vcnipUVD_JRBC_IB_REF_DATA;
3313 dec->jpg_reg.jpeg_rb_base = vcnipUVD_JPEG_RB_BASE;
3314 dec->jpg_reg.jpeg_rb_size = vcnipUVD_JPEG_RB_SIZE;
3315 dec->jpg_reg.jpeg_rb_wptr = vcnipUVD_JPEG_RB_WPTR;
3316 dec->jpg_reg.jpeg_int_en = vcnipUVD_JPEG_INT_EN;
3317 dec->jpg_reg.jpeg_cntl = vcnipUVD_JPEG_CNTL;
3318 dec->jpg_reg.jpeg_rb_rptr = vcnipUVD_JPEG_RB_RPTR;
3319 if (dec->jpg_reg.version == RDECODE_JPEG_REG_VER_V2) {
3320 dec->jpg_reg.jpeg_dec_soft_rst = vcnipUVD_JPEG_DEC_SOFT_RST;
3321 dec->jpg_reg.lmi_jpeg_read_64bit_bar_high = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH;
3322 dec->jpg_reg.lmi_jpeg_read_64bit_bar_low = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW;
3323 dec->jpg_reg.jpeg_pitch = vcnipUVD_JPEG_PITCH;
3324 dec->jpg_reg.jpeg_uv_pitch = vcnipUVD_JPEG_UV_PITCH;
3325 dec->jpg_reg.dec_addr_mode = vcnipJPEG_DEC_ADDR_MODE;
3326 dec->jpg_reg.dec_y_gfx10_tiling_surface = vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE;
3327 dec->jpg_reg.dec_uv_gfx10_tiling_surface = vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE;
3328 dec->jpg_reg.lmi_jpeg_write_64bit_bar_high = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH;
3329 dec->jpg_reg.lmi_jpeg_write_64bit_bar_low = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW;
3330 dec->jpg_reg.jpeg_tier_cntl2 = vcnipUVD_JPEG_TIER_CNTL2;
3331 dec->jpg_reg.jpeg_outbuf_cntl = vcnipUVD_JPEG_OUTBUF_CNTL;
3332 dec->jpg_reg.jpeg_outbuf_rptr = vcnipUVD_JPEG_OUTBUF_RPTR;
3333 dec->jpg_reg.jpeg_outbuf_wptr = vcnipUVD_JPEG_OUTBUF_WPTR;
3334 dec->jpg_reg.jpeg_index = vcnipUVD_JPEG_INDEX;
3335 dec->jpg_reg.jpeg_data = vcnipUVD_JPEG_DATA;
3336 } else {
3337 dec->jpg_reg.jpeg_dec_soft_rst = vcnipUVD_JPEG_DEC_SOFT_RST_1;
3338 dec->jpg_reg.lmi_jpeg_read_64bit_bar_high = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH_1;
3339 dec->jpg_reg.lmi_jpeg_read_64bit_bar_low = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW_1;
3340 dec->jpg_reg.jpeg_pitch = vcnipUVD_JPEG_PITCH_1;
3341 dec->jpg_reg.jpeg_uv_pitch = vcnipUVD_JPEG_UV_PITCH_1;
3342 dec->jpg_reg.dec_addr_mode = vcnipJPEG_DEC_ADDR_MODE_1;
3343 dec->jpg_reg.dec_y_gfx10_tiling_surface = vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE_1;
3344 dec->jpg_reg.dec_uv_gfx10_tiling_surface = vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE_1;
3345 dec->jpg_reg.lmi_jpeg_write_64bit_bar_high = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH_1;
3346 dec->jpg_reg.lmi_jpeg_write_64bit_bar_low = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW_1;
3347 dec->jpg_reg.jpeg_tier_cntl2 = vcnipUVD_JPEG_TIER_CNTL2_1;
3348 dec->jpg_reg.jpeg_outbuf_cntl = vcnipUVD_JPEG_OUTBUF_CNTL_1;
3349 dec->jpg_reg.jpeg_outbuf_rptr = vcnipUVD_JPEG_OUTBUF_RPTR_1;
3350 dec->jpg_reg.jpeg_outbuf_wptr = vcnipUVD_JPEG_OUTBUF_WPTR_1;
3351 dec->jpg_reg.jpeg_luma_base0_0 = vcnipUVD_JPEG_LUMA_BASE0_0;
3352 dec->jpg_reg.jpeg_chroma_base0_0 = vcnipUVD_JPEG_CHROMA_BASE0_0;
3353 dec->jpg_reg.jpeg_chromav_base0_0 = vcnipUVD_JPEG_CHROMAV_BASE0_0;
3354 }
3355 }
3356
3357 next_buffer(dec);
3358
3359 if (stream_type == RDECODE_CODEC_JPEG)
3360 dec->send_cmd = send_cmd_jpeg;
3361 else
3362 dec->send_cmd = send_cmd_dec;
3363
3364
3365 if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
3366 list_inithead(&dec->dpb_ref_list);
3367 list_inithead(&dec->dpb_unref_list);
3368 }
3369
3370 dec->tmz_ctx = sctx->vcn_ip_ver < VCN_2_2_0 && sctx->vcn_ip_ver != VCN_UNKNOWN;
3371
3372 return &dec->base;
3373
3374 error:
3375 dec->ws->cs_destroy(&dec->cs);
3376
3377 if (dec->stream_type == RDECODE_CODEC_JPEG) {
3378 for (i = 0; i < dec->njctx; i++) {
3379 dec->ws->cs_destroy(&dec->jcs[i]);
3380 dec->ws->ctx_destroy(dec->jctx[i]);
3381 }
3382 }
3383
3384 if (dec->msg_fb_it_probs_buffers && dec->bs_buffers) {
3385 for (i = 0; i < dec->num_dec_bufs; ++i) {
3386 si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]);
3387 si_vid_destroy_buffer(&dec->bs_buffers[i]);
3388 }
3389 FREE(dec->msg_fb_it_probs_buffers);
3390 FREE(dec->bs_buffers);
3391 }
3392
3393 if (dec->dpb_type != DPB_DYNAMIC_TIER_2)
3394 si_vid_destroy_buffer(&dec->dpb);
3395 si_vid_destroy_buffer(&dec->ctx);
3396 si_vid_destroy_buffer(&dec->sessionctx);
3397
3398 err:
3399 if (dec->jcs)
3400 FREE(dec->jcs);
3401 if (dec->jctx)
3402 FREE(dec->jctx);
3403 FREE(dec);
3404
3405 return NULL;
3406 }
3407