• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2017 Advanced Micro Devices, Inc.
4  *
5  * SPDX-License-Identifier: MIT
6  *
7  **************************************************************************/
8 
9 #include "radeon_vcn_dec.h"
10 
11 #include "pipe/p_video_codec.h"
12 #include "radeonsi/si_pipe.h"
13 #include "util/u_memory.h"
14 #include "util/u_video.h"
15 #include "vl/vl_mpeg12_decoder.h"
16 #include "vl/vl_probs_table.h"
17 #include "pspdecryptionparam.h"
18 
19 #include <assert.h>
20 #include <stdio.h>
21 
22 #include "ac_vcn_av1_default.h"
23 
24 #define FB_BUFFER_OFFSET             0x2000
25 #define FB_BUFFER_SIZE               2048
26 #define IT_SCALING_TABLE_SIZE        992
27 #define VP9_PROBS_TABLE_SIZE         (RDECODE_VP9_PROBS_DATA_SIZE + 256)
28 
29 #define NUM_MPEG2_REFS 6
30 #define NUM_H264_REFS  17
31 #define NUM_VC1_REFS   5
32 #define NUM_VP9_REFS   8
33 #define NUM_AV1_REFS   8
34 #define NUM_AV1_REFS_PER_FRAME 7
35 
36 static unsigned calc_dpb_size(struct radeon_decoder *dec);
37 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder *dec);
38 static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec);
39 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec,
40                                           struct pipe_h265_picture_desc *pic);
41 
radeon_dec_destroy_associated_data(void * data)42 static void radeon_dec_destroy_associated_data(void *data)
43 {
44    /* NOOP, since we only use an intptr */
45 }
46 
get_current_pic_index(struct radeon_decoder * dec,struct pipe_video_buffer * target,unsigned char * curr_pic_idx)47 static void get_current_pic_index(struct radeon_decoder *dec,
48                                     struct pipe_video_buffer *target,
49                                     unsigned char *curr_pic_idx)
50 {
51    for (int i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
52       if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) {
53          if (target->codec != NULL)
54             *curr_pic_idx = (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base);
55          else {
56             *curr_pic_idx = i;
57             vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
58                                                 &radeon_dec_destroy_associated_data);
59          }
60          break;
61       } else if (!dec->render_pic_list[i]) {
62          dec->render_pic_list[i] = target;
63          *curr_pic_idx = i;
64          vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
65                                              &radeon_dec_destroy_associated_data);
66          break;
67       }
68    }
69 }
70 
get_h264_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_h264_picture_desc * pic)71 static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec,
72                                            struct pipe_video_buffer *target,
73                                            struct pipe_h264_picture_desc *pic)
74 {
75    rvcn_dec_message_avc_t result;
76    unsigned i, j, k;
77 
78    memset(&result, 0, sizeof(result));
79    switch (pic->base.profile) {
80    case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
81    case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
82       result.profile = RDECODE_H264_PROFILE_BASELINE;
83       break;
84 
85    case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
86       result.profile = RDECODE_H264_PROFILE_MAIN;
87       break;
88 
89    case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
90       result.profile = RDECODE_H264_PROFILE_HIGH;
91       break;
92 
93    default:
94       assert(0);
95       break;
96    }
97 
98    result.level = dec->base.level;
99 
100    result.sps_info_flags = 0;
101    result.sps_info_flags |= pic->pps->sps->direct_8x8_inference_flag << 0;
102    result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 1;
103    result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
104    result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag << 3;
105    result.sps_info_flags |= ((dec->dpb_type == DPB_DYNAMIC_TIER_2) ? 0 : 1)
106                               << RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
107 
108    result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
109    result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
110    result.log2_max_frame_num_minus4 = pic->pps->sps->log2_max_frame_num_minus4;
111    result.pic_order_cnt_type = pic->pps->sps->pic_order_cnt_type;
112    result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
113 
114    switch (dec->base.chroma_format) {
115    case PIPE_VIDEO_CHROMA_FORMAT_NONE:
116       break;
117    case PIPE_VIDEO_CHROMA_FORMAT_400:
118       result.chroma_format = 0;
119       break;
120    case PIPE_VIDEO_CHROMA_FORMAT_420:
121       result.chroma_format = 1;
122       break;
123    case PIPE_VIDEO_CHROMA_FORMAT_422:
124       result.chroma_format = 2;
125       break;
126    case PIPE_VIDEO_CHROMA_FORMAT_444:
127       result.chroma_format = 3;
128       break;
129    }
130 
131    result.pps_info_flags = 0;
132    result.pps_info_flags |= pic->pps->transform_8x8_mode_flag << 0;
133    result.pps_info_flags |= pic->pps->redundant_pic_cnt_present_flag << 1;
134    result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 2;
135    result.pps_info_flags |= pic->pps->deblocking_filter_control_present_flag << 3;
136    result.pps_info_flags |= pic->pps->weighted_bipred_idc << 4;
137    result.pps_info_flags |= pic->pps->weighted_pred_flag << 6;
138    result.pps_info_flags |= pic->pps->bottom_field_pic_order_in_frame_present_flag << 7;
139    result.pps_info_flags |= pic->pps->entropy_coding_mode_flag << 8;
140 
141    result.num_slice_groups_minus1 = pic->pps->num_slice_groups_minus1;
142    result.slice_group_map_type = pic->pps->slice_group_map_type;
143    result.slice_group_change_rate_minus1 = pic->pps->slice_group_change_rate_minus1;
144    result.pic_init_qp_minus26 = pic->pps->pic_init_qp_minus26;
145    result.chroma_qp_index_offset = pic->pps->chroma_qp_index_offset;
146    result.second_chroma_qp_index_offset = pic->pps->second_chroma_qp_index_offset;
147 
148    memcpy(result.scaling_list_4x4, pic->pps->ScalingList4x4, 6 * 16);
149    memcpy(result.scaling_list_8x8, pic->pps->ScalingList8x8, 2 * 64);
150 
151    memcpy(dec->it, result.scaling_list_4x4, 6 * 16);
152    memcpy((dec->it + 96), result.scaling_list_8x8, 2 * 64);
153 
154    result.num_ref_frames = pic->num_ref_frames;
155 
156    result.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1;
157    result.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1;
158 
159    result.frame_num = pic->frame_num;
160    memcpy(result.frame_num_list, pic->frame_num_list, 4 * 16);
161    result.curr_field_order_cnt_list[0] = pic->field_order_cnt[0];
162    result.curr_field_order_cnt_list[1] = pic->field_order_cnt[1];
163    memcpy(result.field_order_cnt_list, pic->field_order_cnt_list, 4 * 16 * 2);
164    result.non_existing_frame_flags = 0;
165    result.used_for_reference_flags = 0;
166 
167    if (dec->dpb_type != DPB_DYNAMIC_TIER_2) {
168       result.decoded_pic_idx = pic->frame_num;
169       goto end;
170    }
171 
172    for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
173       for (j = 0; (pic->ref[j] != NULL) && (j < ARRAY_SIZE(dec->render_pic_list)); j++) {
174          if (dec->render_pic_list[i] == pic->ref[j])
175             break;
176          if (j == ARRAY_SIZE(dec->render_pic_list) - 1)
177             dec->render_pic_list[i] = NULL;
178          else if (pic->ref[j + 1] == NULL)
179             dec->render_pic_list[i] = NULL;
180       }
181    }
182    for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
183       if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) {
184          if (target->codec != NULL){
185             result.decoded_pic_idx =
186                (uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base);
187          } else {
188             result.decoded_pic_idx = i;
189             vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
190                   &radeon_dec_destroy_associated_data);
191          }
192          break;
193       }
194    }
195    if (i == ARRAY_SIZE(dec->render_pic_list)) {
196       for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
197          if (!dec->render_pic_list[i]) {
198             dec->render_pic_list[i] = target;
199             result.decoded_pic_idx = i;
200             vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)i,
201                   &radeon_dec_destroy_associated_data);
202             break;
203          }
204       }
205    }
206    for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) {
207       result.ref_frame_list[i] = pic->ref[i] ?
208               (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) : 0xff;
209 
210       if (result.ref_frame_list[i] != 0xff) {
211          if (pic->top_is_reference[i])
212             result.used_for_reference_flags |= (1 << (2 * i));
213          if (pic->bottom_is_reference[i])
214             result.used_for_reference_flags |= (1 << (2 * i + 1));
215 
216          if (pic->is_long_term[i])
217             result.ref_frame_list[i] |= 0x80;
218 
219          result.curr_pic_ref_frame_num++;
220 
221          for (j = 0; j < ARRAY_SIZE(dec->h264_valid_ref_num); j++) {
222             if ((dec->h264_valid_ref_num[j] != (unsigned)-1)
223                 && (dec->h264_valid_ref_num[j] == result.frame_num_list[i]))
224                break;
225          }
226 
227          for (k = 0; k < ARRAY_SIZE(dec->h264_valid_poc_num); k++) {
228             if ((dec->h264_valid_poc_num[k] != (unsigned)-1)
229                   && ((dec->h264_valid_poc_num[k] == result.field_order_cnt_list[i][0])
230                     || dec->h264_valid_poc_num[k] == result.field_order_cnt_list[i][1]))
231                break;
232          }
233       }
234       if (result.ref_frame_list[i] != 0xff && (j == ARRAY_SIZE(dec->h264_valid_ref_num))
235                                            && (k == ARRAY_SIZE(dec->h264_valid_poc_num))) {
236          result.non_existing_frame_flags |= 1 << i;
237          result.curr_pic_ref_frame_num--;
238          result.ref_frame_list[i] = 0xff;
239       }
240    }
241 
242    /* if reference picture exists, however no reference picture found at the end
243       curr_pic_ref_frame_num == 0, which is not reasonable, should be corrected. */
244    if (result.used_for_reference_flags && (result.curr_pic_ref_frame_num == 0)) {
245       for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) {
246          result.ref_frame_list[i] = pic->ref[i] ?
247                 (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base) : 0xff;
248          if (result.ref_frame_list[i] != 0xff) {
249             result.curr_pic_ref_frame_num++;
250             result.non_existing_frame_flags &= ~(1 << i);
251             break;
252          }
253       }
254    }
255 
256    for (i = 0; i < ARRAY_SIZE(result.ref_frame_list); i++) {
257       if (result.ref_frame_list[i] != 0xff) {
258          dec->h264_valid_ref_num[i]         = result.frame_num_list[i];
259          dec->h264_valid_poc_num[2 * i]     = pic->top_is_reference[i] ?
260                       result.field_order_cnt_list[i][0] : (unsigned) -1;
261          dec->h264_valid_poc_num[2 * i + 1] = pic->bottom_is_reference[i] ?
262                       result.field_order_cnt_list[i][1] : (unsigned) -1;
263       } else {
264          dec->h264_valid_ref_num[i]         =
265          dec->h264_valid_poc_num[2 * i]     =
266          dec->h264_valid_poc_num[2 * i + 1] = (unsigned) -1;
267       }
268    }
269 
270    dec->h264_valid_ref_num[ARRAY_SIZE(dec->h264_valid_ref_num) - 1] = result.frame_num;
271    dec->h264_valid_poc_num[ARRAY_SIZE(dec->h264_valid_poc_num) - 2] =
272                      pic->field_pic_flag && pic->bottom_field_flag ?
273                      (unsigned) -1 : result.curr_field_order_cnt_list[0];
274    dec->h264_valid_poc_num[ARRAY_SIZE(dec->h264_valid_poc_num) - 1] =
275                      pic->field_pic_flag && !pic->bottom_field_flag ?
276                      (unsigned) -1 : result.curr_field_order_cnt_list[1];
277 
278    if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
279       dec->ref_codec.bts = CODEC_8_BITS;
280       dec->ref_codec.index = result.decoded_pic_idx;
281       dec->ref_codec.ref_size = 16;
282       memset(dec->ref_codec.ref_list, 0xff, sizeof(dec->ref_codec.ref_list));
283       memcpy(dec->ref_codec.ref_list, result.ref_frame_list, sizeof(result.ref_frame_list));
284    }
285 
286 end:
287    return result;
288 }
289 
get_h265_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_h265_picture_desc * pic)290 static rvcn_dec_message_hevc_t get_h265_msg(struct radeon_decoder *dec,
291                                             struct pipe_video_buffer *target,
292                                             struct pipe_h265_picture_desc *pic)
293 {
294    rvcn_dec_message_hevc_t result;
295    unsigned i, j;
296 
297    memset(&result, 0, sizeof(result));
298    result.sps_info_flags = 0;
299    result.sps_info_flags |= pic->pps->sps->scaling_list_enabled_flag << 0;
300    result.sps_info_flags |= pic->pps->sps->amp_enabled_flag << 1;
301    result.sps_info_flags |= pic->pps->sps->sample_adaptive_offset_enabled_flag << 2;
302    result.sps_info_flags |= pic->pps->sps->pcm_enabled_flag << 3;
303    result.sps_info_flags |= pic->pps->sps->pcm_loop_filter_disabled_flag << 4;
304    result.sps_info_flags |= pic->pps->sps->long_term_ref_pics_present_flag << 5;
305    result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
306    result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
307    result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
308    if (((struct si_screen *)dec->screen)->info.family == CHIP_CARRIZO)
309       result.sps_info_flags |= 1 << 9;
310    if (pic->UseRefPicList == true) {
311       result.sps_info_flags |= 1 << 10;
312       result.sps_info_flags |= 1 << 12;
313    }
314    if (pic->UseStRpsBits == true && pic->pps->st_rps_bits != 0) {
315       result.sps_info_flags |= 1 << 11;
316       result.st_rps_bits = pic->pps->st_rps_bits;
317    }
318 
319    result.chroma_format = pic->pps->sps->chroma_format_idc;
320    result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;
321    result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
322    result.log2_max_pic_order_cnt_lsb_minus4 = pic->pps->sps->log2_max_pic_order_cnt_lsb_minus4;
323    result.sps_max_dec_pic_buffering_minus1 = pic->pps->sps->sps_max_dec_pic_buffering_minus1;
324    result.log2_min_luma_coding_block_size_minus3 =
325       pic->pps->sps->log2_min_luma_coding_block_size_minus3;
326    result.log2_diff_max_min_luma_coding_block_size =
327       pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
328    result.log2_min_transform_block_size_minus2 =
329       pic->pps->sps->log2_min_transform_block_size_minus2;
330    result.log2_diff_max_min_transform_block_size =
331       pic->pps->sps->log2_diff_max_min_transform_block_size;
332    result.max_transform_hierarchy_depth_inter = pic->pps->sps->max_transform_hierarchy_depth_inter;
333    result.max_transform_hierarchy_depth_intra = pic->pps->sps->max_transform_hierarchy_depth_intra;
334    result.pcm_sample_bit_depth_luma_minus1 = pic->pps->sps->pcm_sample_bit_depth_luma_minus1;
335    result.pcm_sample_bit_depth_chroma_minus1 = pic->pps->sps->pcm_sample_bit_depth_chroma_minus1;
336    result.log2_min_pcm_luma_coding_block_size_minus3 =
337       pic->pps->sps->log2_min_pcm_luma_coding_block_size_minus3;
338    result.log2_diff_max_min_pcm_luma_coding_block_size =
339       pic->pps->sps->log2_diff_max_min_pcm_luma_coding_block_size;
340    result.num_short_term_ref_pic_sets = pic->pps->sps->num_short_term_ref_pic_sets;
341 
342    result.pps_info_flags = 0;
343    result.pps_info_flags |= pic->pps->dependent_slice_segments_enabled_flag << 0;
344    result.pps_info_flags |= pic->pps->output_flag_present_flag << 1;
345    result.pps_info_flags |= pic->pps->sign_data_hiding_enabled_flag << 2;
346    result.pps_info_flags |= pic->pps->cabac_init_present_flag << 3;
347    result.pps_info_flags |= pic->pps->constrained_intra_pred_flag << 4;
348    result.pps_info_flags |= pic->pps->transform_skip_enabled_flag << 5;
349    result.pps_info_flags |= pic->pps->cu_qp_delta_enabled_flag << 6;
350    result.pps_info_flags |= pic->pps->pps_slice_chroma_qp_offsets_present_flag << 7;
351    result.pps_info_flags |= pic->pps->weighted_pred_flag << 8;
352    result.pps_info_flags |= pic->pps->weighted_bipred_flag << 9;
353    result.pps_info_flags |= pic->pps->transquant_bypass_enabled_flag << 10;
354    result.pps_info_flags |= pic->pps->tiles_enabled_flag << 11;
355    result.pps_info_flags |= pic->pps->entropy_coding_sync_enabled_flag << 12;
356    result.pps_info_flags |= pic->pps->uniform_spacing_flag << 13;
357    result.pps_info_flags |= pic->pps->loop_filter_across_tiles_enabled_flag << 14;
358    result.pps_info_flags |= pic->pps->pps_loop_filter_across_slices_enabled_flag << 15;
359    result.pps_info_flags |= pic->pps->deblocking_filter_override_enabled_flag << 16;
360    result.pps_info_flags |= pic->pps->pps_deblocking_filter_disabled_flag << 17;
361    result.pps_info_flags |= pic->pps->lists_modification_present_flag << 18;
362    result.pps_info_flags |= pic->pps->slice_segment_header_extension_present_flag << 19;
363 
364    result.num_extra_slice_header_bits = pic->pps->num_extra_slice_header_bits;
365    result.num_long_term_ref_pic_sps = pic->pps->sps->num_long_term_ref_pics_sps;
366    result.num_ref_idx_l0_default_active_minus1 = pic->pps->num_ref_idx_l0_default_active_minus1;
367    result.num_ref_idx_l1_default_active_minus1 = pic->pps->num_ref_idx_l1_default_active_minus1;
368    result.pps_cb_qp_offset = pic->pps->pps_cb_qp_offset;
369    result.pps_cr_qp_offset = pic->pps->pps_cr_qp_offset;
370    result.pps_beta_offset_div2 = pic->pps->pps_beta_offset_div2;
371    result.pps_tc_offset_div2 = pic->pps->pps_tc_offset_div2;
372    result.diff_cu_qp_delta_depth = pic->pps->diff_cu_qp_delta_depth;
373    result.num_tile_columns_minus1 = pic->pps->num_tile_columns_minus1;
374    result.num_tile_rows_minus1 = pic->pps->num_tile_rows_minus1;
375    result.log2_parallel_merge_level_minus2 = pic->pps->log2_parallel_merge_level_minus2;
376    result.init_qp_minus26 = pic->pps->init_qp_minus26;
377 
378    for (i = 0; i < 19; ++i)
379       result.column_width_minus1[i] = pic->pps->column_width_minus1[i];
380 
381    for (i = 0; i < 21; ++i)
382       result.row_height_minus1[i] = pic->pps->row_height_minus1[i];
383 
384    result.num_delta_pocs_ref_rps_idx = pic->NumDeltaPocsOfRefRpsIdx;
385    result.curr_poc = pic->CurrPicOrderCntVal;
386 
387    for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
388       for (j = 0;
389            (pic->ref[j] != NULL) && (j < ARRAY_SIZE(dec->render_pic_list));
390            j++) {
391          if (dec->render_pic_list[i] == pic->ref[j])
392             break;
393          if (j == ARRAY_SIZE(dec->render_pic_list) - 1)
394             dec->render_pic_list[i] = NULL;
395          else if (pic->ref[j + 1] == NULL)
396             dec->render_pic_list[i] = NULL;
397       }
398    }
399    for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
400       if (dec->render_pic_list[i] == NULL) {
401          dec->render_pic_list[i] = target;
402          result.curr_idx = i;
403          break;
404       }
405    }
406 
407    vl_video_buffer_set_associated_data(target, &dec->base, (void *)(uintptr_t)result.curr_idx,
408                                        &radeon_dec_destroy_associated_data);
409 
410    for (i = 0; i < 16; ++i) {
411       struct pipe_video_buffer *ref = pic->ref[i];
412       uintptr_t ref_pic = 0;
413 
414       result.poc_list[i] = pic->PicOrderCntVal[i];
415 
416       if (ref)
417          ref_pic = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
418       else
419          ref_pic = 0x7F;
420       result.ref_pic_list[i] = ref_pic;
421    }
422 
423    for (i = 0; i < 8; ++i) {
424       result.ref_pic_set_st_curr_before[i] = 0xFF;
425       result.ref_pic_set_st_curr_after[i] = 0xFF;
426       result.ref_pic_set_lt_curr[i] = 0xFF;
427    }
428 
429    for (i = 0; i < pic->NumPocStCurrBefore; ++i)
430       result.ref_pic_set_st_curr_before[i] = pic->RefPicSetStCurrBefore[i];
431 
432    for (i = 0; i < pic->NumPocStCurrAfter; ++i)
433       result.ref_pic_set_st_curr_after[i] = pic->RefPicSetStCurrAfter[i];
434 
435    for (i = 0; i < pic->NumPocLtCurr; ++i)
436       result.ref_pic_set_lt_curr[i] = pic->RefPicSetLtCurr[i];
437 
438    for (i = 0; i < 6; ++i)
439       result.ucScalingListDCCoefSizeID2[i] = pic->pps->sps->ScalingListDCCoeff16x16[i];
440 
441    for (i = 0; i < 2; ++i)
442       result.ucScalingListDCCoefSizeID3[i] = pic->pps->sps->ScalingListDCCoeff32x32[i];
443 
444    memcpy(dec->it, pic->pps->sps->ScalingList4x4, 6 * 16);
445    memcpy(dec->it + 96, pic->pps->sps->ScalingList8x8, 6 * 64);
446    memcpy(dec->it + 480, pic->pps->sps->ScalingList16x16, 6 * 64);
447    memcpy(dec->it + 864, pic->pps->sps->ScalingList32x32, 2 * 64);
448 
449    for (i = 0; i < 2; i++) {
450       for (j = 0; j < 15; j++)
451          result.direct_reflist[i][j] = pic->RefPicList[0][i][j];
452    }
453 
454    if (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) {
455       if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) {
456          result.p010_mode = 1;
457          result.msb_mode = 1;
458       } else {
459          result.p010_mode = 0;
460          result.luma_10to8 = 5;
461          result.chroma_10to8 = 5;
462          result.hevc_reserved[0] = 4; /* sclr_luma10to8 */
463          result.hevc_reserved[1] = 4; /* sclr_chroma10to8 */
464       }
465    }
466 
467    if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
468       dec->ref_codec.bts = (pic->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) ?
469          CODEC_10_BITS : CODEC_8_BITS;
470       dec->ref_codec.index = result.curr_idx;
471       dec->ref_codec.ref_size = 15;
472       memset(dec->ref_codec.ref_list, 0x7f, sizeof(dec->ref_codec.ref_list));
473       memcpy(dec->ref_codec.ref_list, result.ref_pic_list, sizeof(result.ref_pic_list));
474    }
475    return result;
476 }
477 
fill_probs_table(void * ptr)478 static void fill_probs_table(void *ptr)
479 {
480    rvcn_dec_vp9_probs_t *probs = (rvcn_dec_vp9_probs_t *)ptr;
481 
482    memcpy(&probs->coef_probs[0], default_coef_probs_4x4, sizeof(default_coef_probs_4x4));
483    memcpy(&probs->coef_probs[1], default_coef_probs_8x8, sizeof(default_coef_probs_8x8));
484    memcpy(&probs->coef_probs[2], default_coef_probs_16x16, sizeof(default_coef_probs_16x16));
485    memcpy(&probs->coef_probs[3], default_coef_probs_32x32, sizeof(default_coef_probs_32x32));
486    memcpy(probs->y_mode_prob, default_if_y_probs, sizeof(default_if_y_probs));
487    memcpy(probs->uv_mode_prob, default_if_uv_probs, sizeof(default_if_uv_probs));
488    memcpy(probs->single_ref_prob, default_single_ref_p, sizeof(default_single_ref_p));
489    memcpy(probs->switchable_interp_prob, default_switchable_interp_prob,
490           sizeof(default_switchable_interp_prob));
491    memcpy(probs->partition_prob, default_partition_probs, sizeof(default_partition_probs));
492    memcpy(probs->inter_mode_probs, default_inter_mode_probs, sizeof(default_inter_mode_probs));
493    memcpy(probs->mbskip_probs, default_skip_probs, sizeof(default_skip_probs));
494    memcpy(probs->intra_inter_prob, default_intra_inter_p, sizeof(default_intra_inter_p));
495    memcpy(probs->comp_inter_prob, default_comp_inter_p, sizeof(default_comp_inter_p));
496    memcpy(probs->comp_ref_prob, default_comp_ref_p, sizeof(default_comp_ref_p));
497    memcpy(probs->tx_probs_32x32, default_tx_probs_32x32, sizeof(default_tx_probs_32x32));
498    memcpy(probs->tx_probs_16x16, default_tx_probs_16x16, sizeof(default_tx_probs_16x16));
499    memcpy(probs->tx_probs_8x8, default_tx_probs_8x8, sizeof(default_tx_probs_8x8));
500    memcpy(probs->mv_joints, default_nmv_joints, sizeof(default_nmv_joints));
501    memcpy(&probs->mv_comps[0], default_nmv_components, sizeof(default_nmv_components));
502    memset(&probs->nmvc_mask, 0, sizeof(rvcn_dec_vp9_nmv_ctx_mask_t));
503 }
504 
get_vp9_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_vp9_picture_desc * pic)505 static rvcn_dec_message_vp9_t get_vp9_msg(struct radeon_decoder *dec,
506                                           struct pipe_video_buffer *target,
507                                           struct pipe_vp9_picture_desc *pic)
508 {
509    rvcn_dec_message_vp9_t result;
510    unsigned i ,j;
511 
512    memset(&result, 0, sizeof(result));
513 
514    /* segment table */
515    rvcn_dec_vp9_probs_segment_t *prbs = (rvcn_dec_vp9_probs_segment_t *)(dec->probs);
516 
517    if (pic->picture_parameter.pic_fields.segmentation_enabled) {
518       for (i = 0; i < 8; ++i) {
519          prbs->seg.feature_data[i] =
520             (pic->slice_parameter.seg_param[i].alt_quant & 0xffff) |
521             ((pic->slice_parameter.seg_param[i].alt_lf & 0xff) << 16) |
522             ((pic->slice_parameter.seg_param[i].segment_flags.segment_reference & 0xf) << 24);
523          prbs->seg.feature_mask[i] =
524             (pic->slice_parameter.seg_param[i].alt_quant_enabled << 0) |
525             (pic->slice_parameter.seg_param[i].alt_lf_enabled << 1) |
526             (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_enabled << 2) |
527             (pic->slice_parameter.seg_param[i].segment_flags.segment_reference_skipped << 3);
528       }
529 
530       for (i = 0; i < 7; ++i)
531          prbs->seg.tree_probs[i] = pic->picture_parameter.mb_segment_tree_probs[i];
532 
533       for (i = 0; i < 3; ++i)
534          prbs->seg.pred_probs[i] = pic->picture_parameter.segment_pred_probs[i];
535 
536       prbs->seg.abs_delta = pic->picture_parameter.abs_delta;
537    } else
538       memset(&prbs->seg, 0, 256);
539 
540    result.frame_header_flags = (pic->picture_parameter.pic_fields.frame_type
541                                 << RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_SHIFT) &
542                                RDECODE_FRAME_HDR_INFO_VP9_FRAME_TYPE_MASK;
543 
544    result.frame_header_flags |= (pic->picture_parameter.pic_fields.error_resilient_mode
545                                  << RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_SHIFT) &
546                                 RDECODE_FRAME_HDR_INFO_VP9_ERROR_RESILIENT_MODE_MASK;
547 
548    result.frame_header_flags |= (pic->picture_parameter.pic_fields.intra_only
549                                  << RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_SHIFT) &
550                                 RDECODE_FRAME_HDR_INFO_VP9_INTRA_ONLY_MASK;
551 
552    result.frame_header_flags |= (pic->picture_parameter.pic_fields.allow_high_precision_mv
553                                  << RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_SHIFT) &
554                                 RDECODE_FRAME_HDR_INFO_VP9_ALLOW_HIGH_PRECISION_MV_MASK;
555 
556    result.frame_header_flags |= (pic->picture_parameter.pic_fields.frame_parallel_decoding_mode
557                                  << RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_SHIFT) &
558                                 RDECODE_FRAME_HDR_INFO_VP9_FRAME_PARALLEL_DECODING_MODE_MASK;
559 
560    result.frame_header_flags |= (pic->picture_parameter.pic_fields.refresh_frame_context
561                                  << RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_SHIFT) &
562                                 RDECODE_FRAME_HDR_INFO_VP9_REFRESH_FRAME_CONTEXT_MASK;
563 
564    result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_enabled
565                                  << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_SHIFT) &
566                                 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_ENABLED_MASK;
567 
568    result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_update_map
569                                  << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_SHIFT) &
570                                 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_UPDATE_MAP_MASK;
571 
572    result.frame_header_flags |= (pic->picture_parameter.pic_fields.segmentation_temporal_update
573                                  << RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
574                                 RDECODE_FRAME_HDR_INFO_VP9_SEGMENTATION_TEMPORAL_UPDATE_MASK;
575 
576    result.frame_header_flags |= (pic->picture_parameter.mode_ref_delta_enabled
577                                  << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_SHIFT) &
578                                 RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_ENABLED_MASK;
579 
580    result.frame_header_flags |= (pic->picture_parameter.mode_ref_delta_update
581                                  << RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_SHIFT) &
582                                 RDECODE_FRAME_HDR_INFO_VP9_MODE_REF_DELTA_UPDATE_MASK;
583 
584    result.frame_header_flags |=
585       ((dec->show_frame && !pic->picture_parameter.pic_fields.error_resilient_mode &&
586         dec->last_width == dec->base.width && dec->last_height == dec->base.height)
587        << RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_SHIFT) &
588       RDECODE_FRAME_HDR_INFO_VP9_USE_PREV_IN_FIND_MV_REFS_MASK;
589    dec->show_frame = pic->picture_parameter.pic_fields.show_frame;
590 
591    result.frame_header_flags |=  (1 << RDECODE_FRAME_HDR_INFO_VP9_USE_UNCOMPRESSED_HEADER_SHIFT) &
592                                  RDECODE_FRAME_HDR_INFO_VP9_USE_UNCOMPRESSED_HEADER_MASK;
593 
594    result.interp_filter = pic->picture_parameter.pic_fields.mcomp_filter_type;
595 
596    result.frame_context_idx = pic->picture_parameter.pic_fields.frame_context_idx;
597    result.reset_frame_context = pic->picture_parameter.pic_fields.reset_frame_context;
598 
599    result.filter_level = pic->picture_parameter.filter_level;
600    result.sharpness_level = pic->picture_parameter.sharpness_level;
601 
602    for (i = 0; i < 8; ++i)
603       memcpy(result.lf_adj_level[i], pic->slice_parameter.seg_param[i].filter_level, 4 * 2);
604 
605    if (pic->picture_parameter.pic_fields.lossless_flag) {
606       result.base_qindex = 0;
607       result.y_dc_delta_q = 0;
608       result.uv_ac_delta_q = 0;
609       result.uv_dc_delta_q = 0;
610    } else {
611       result.base_qindex = pic->picture_parameter.base_qindex;
612       result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q;
613       result.uv_ac_delta_q = pic->picture_parameter.uv_ac_delta_q;
614       result.uv_dc_delta_q = pic->picture_parameter.uv_dc_delta_q;
615    }
616 
617    result.log2_tile_cols = pic->picture_parameter.log2_tile_columns;
618    result.log2_tile_rows = pic->picture_parameter.log2_tile_rows;
619    result.chroma_format = 1;
620    result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 =
621       (pic->picture_parameter.bit_depth - 8);
622 
623    result.vp9_frame_size = align(dec->bs_size, 128);
624    result.uncompressed_header_size = pic->picture_parameter.frame_header_length_in_bytes;
625    result.compressed_header_size = pic->picture_parameter.first_partition_size;
626 
627    assert(dec->base.max_references + 1 <= ARRAY_SIZE(dec->render_pic_list));
628 
629    //clear the dec->render list if it is not used as a reference
630    for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++) {
631       if (dec->render_pic_list[i]) {
632          for (j=0;j<8;j++) {
633             if (dec->render_pic_list[i] == pic->ref[j])
634                break;
635          }
636          if (j == 8)
637             dec->render_pic_list[i] = NULL;
638       }
639    }
640 
641    get_current_pic_index(dec, target, &result.curr_pic_idx);
642 
643    for (i = 0; i < 8; i++) {
644       result.ref_frame_map[i] =
645          (pic->ref[i]) ? (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base)
646                        : 0x7f;
647    }
648 
649    result.frame_refs[0] = result.ref_frame_map[pic->picture_parameter.pic_fields.last_ref_frame];
650    result.ref_frame_sign_bias[0] = pic->picture_parameter.pic_fields.last_ref_frame_sign_bias;
651    result.frame_refs[1] = result.ref_frame_map[pic->picture_parameter.pic_fields.golden_ref_frame];
652    result.ref_frame_sign_bias[1] = pic->picture_parameter.pic_fields.golden_ref_frame_sign_bias;
653    result.frame_refs[2] = result.ref_frame_map[pic->picture_parameter.pic_fields.alt_ref_frame];
654    result.ref_frame_sign_bias[2] = pic->picture_parameter.pic_fields.alt_ref_frame_sign_bias;
655 
656    if (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) {
657       if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) {
658          result.p010_mode = 1;
659          result.msb_mode = 1;
660       } else {
661          result.p010_mode = 0;
662          result.luma_10to8 = 1;
663          result.chroma_10to8 = 1;
664       }
665    }
666 
667    if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
668       dec->ref_codec.bts = (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) ?
669          CODEC_10_BITS : CODEC_8_BITS;
670       dec->ref_codec.index = result.curr_pic_idx;
671       dec->ref_codec.ref_size = 8;
672       memset(dec->ref_codec.ref_list, 0x7f, sizeof(dec->ref_codec.ref_list));
673       memcpy(dec->ref_codec.ref_list, result.ref_frame_map, sizeof(result.ref_frame_map));
674    }
675 
676    dec->last_width = dec->base.width;
677    dec->last_height = dec->base.height;
678 
679    return result;
680 }
681 
get_h265_reflist(rvcn_dec_message_hevc_direct_ref_list_t * hevc_reflist,struct pipe_h265_picture_desc * pic)682 static void get_h265_reflist(rvcn_dec_message_hevc_direct_ref_list_t *hevc_reflist,
683                              struct pipe_h265_picture_desc *pic)
684 {
685    hevc_reflist->num_direct_reflist = pic->slice_parameter.slice_count;
686    for (int i = 0; i <hevc_reflist->num_direct_reflist; i++) {
687       for (int j = 0; j < 2; j++) {
688          for (int k = 0; k < 15; k++)
689             hevc_reflist->multi_direct_reflist[i][j][k] = pic->RefPicList[i][j][k];
690       }
691    }
692 }
693 
set_drm_keys(rvcn_dec_message_drm_t * drm,DECRYPT_PARAMETERS * decrypted)694 static void set_drm_keys(rvcn_dec_message_drm_t *drm, DECRYPT_PARAMETERS *decrypted)
695 {
696    int cbc = decrypted->u.s.cbc;
697    int ctr = decrypted->u.s.ctr;
698    int id = decrypted->u.s.drm_id;
699    int ekc = 1;
700    int data1 = 1;
701    int data2 = 1;
702 
703    drm->drm_cmd = 0;
704    drm->drm_cntl = 0;
705 
706    drm->drm_cntl = 1 << DRM_CNTL_BYPASS_SHIFT;
707 
708    if (cbc || ctr) {
709       drm->drm_cntl = 0 << DRM_CNTL_BYPASS_SHIFT;
710       drm->drm_cmd |= 0xff << DRM_CMD_BYTE_MASK_SHIFT;
711 
712       if (ctr)
713          drm->drm_cmd |= 0x00 << DRM_CMD_ALGORITHM_SHIFT;
714       else if (cbc)
715          drm->drm_cmd |= 0x02 << DRM_CMD_ALGORITHM_SHIFT;
716 
717       drm->drm_cmd |= 1 << DRM_CMD_GEN_MASK_SHIFT;
718       drm->drm_cmd |= ekc << DRM_CMD_UNWRAP_KEY_SHIFT;
719       drm->drm_cmd |= 0 << DRM_CMD_OFFSET_SHIFT;
720       drm->drm_cmd |= data2 << DRM_CMD_CNT_DATA_SHIFT;
721       drm->drm_cmd |= data1 << DRM_CMD_CNT_KEY_SHIFT;
722       drm->drm_cmd |= ekc << DRM_CMD_KEY_SHIFT;
723       drm->drm_cmd |= id << DRM_CMD_SESSION_SEL_SHIFT;
724 
725       if (ekc)
726          memcpy(drm->drm_wrapped_key, decrypted->encrypted_key, 16);
727       if (data1)
728          memcpy(drm->drm_key, decrypted->session_iv, 16);
729       if (data2)
730          memcpy(drm->drm_counter, decrypted->encrypted_iv, 16);
731       drm->drm_offset = 0;
732    }
733 }
734 
rvcn_av1_film_grain_random_number(unsigned short * seed,int32_t bits)735 static int32_t rvcn_av1_film_grain_random_number(unsigned short *seed, int32_t bits)
736 {
737    unsigned short bit;
738    unsigned short value = *seed;
739 
740    bit = ((value >> 0) ^ (value >> 1) ^ (value >> 3) ^ (value >> 12)) & 1;
741    value = (value >> 1) | (bit << 15);
742    *seed = value;
743 
744    return (value >> (16 - bits)) & ((1 << bits) - 1);
745 }
746 
rvcn_av1_film_grain_init_scaling(uint8_t scaling_points[][2],uint8_t num,short scaling_lut[])747 static void rvcn_av1_film_grain_init_scaling(uint8_t scaling_points[][2],
748                                              uint8_t num,
749                                              short scaling_lut[])
750 {
751    int32_t i, x, delta_x, delta_y;
752    int64_t delta;
753 
754    if (num == 0)
755       return;
756 
757    for ( i = 0; i < scaling_points[0][0]; i++ )
758       scaling_lut[i] = scaling_points[0][1];
759 
760    for ( i = 0; i < num - 1; i++ ) {
761       delta_y = scaling_points[i + 1][1] - scaling_points[i][1];
762       delta_x = scaling_points[i + 1][0] - scaling_points[i][0];
763 
764       delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
765 
766       for ( x = 0; x < delta_x; x++ )
767          scaling_lut[scaling_points[i][0] + x] =
768             (short)(scaling_points[i][1] + (int32_t)((x * delta + 32768) >> 16));
769    }
770 
771    for ( i = scaling_points[num - 1][0]; i < 256; i++ )
772       scaling_lut[i] = scaling_points[num - 1][1];
773 }
774 
rvcn_av1_init_film_grain_buffer(rvcn_dec_film_grain_params_t * fg_params,rvcn_dec_av1_fg_init_buf_t * fg_buf)775 static void rvcn_av1_init_film_grain_buffer(rvcn_dec_film_grain_params_t *fg_params,
776                                             rvcn_dec_av1_fg_init_buf_t *fg_buf)
777 {
778    const int32_t luma_block_size_y = 73;
779    const int32_t luma_block_size_x = 82;
780    const int32_t chroma_block_size_y = 38;
781    const int32_t chroma_block_size_x = 44;
782    const int32_t gauss_bits = 11;
783    int32_t filt_luma_grain_block[luma_block_size_y][luma_block_size_x];
784    int32_t filt_cb_grain_block[chroma_block_size_y][chroma_block_size_x];
785    int32_t filt_cr_grain_block[chroma_block_size_y][chroma_block_size_x];
786    int32_t chroma_subsamp_y = 1;
787    int32_t chroma_subsamp_x = 1;
788    unsigned short seed = fg_params->random_seed;
789    int32_t ar_coeff_lag = fg_params->ar_coeff_lag;
790    int32_t bit_depth = fg_params->bit_depth_minus_8 + 8;
791    short grain_center = 128 << (bit_depth - 8);
792    short grain_min = 0 - grain_center;
793    short grain_max = (256 << (bit_depth - 8)) - 1 - grain_center;
794    int32_t shift = 12 - bit_depth + fg_params->grain_scale_shift;
795    short luma_grain_block_tmp[64][80];
796    short cb_grain_block_tmp[32][40];
797    short cr_grain_block_tmp[32][40];
798    short *align_ptr, *align_ptr0, *align_ptr1;
799    int32_t x, y, g, i, j, c, c0, c1, delta_row, delta_col;
800    int32_t s, s0, s1, pos, r;
801 
802    /* generate luma grain block */
803    memset(filt_luma_grain_block, 0, sizeof(filt_luma_grain_block));
804    for ( y = 0; y < luma_block_size_y; y++ ) {
805       for ( x = 0; x < luma_block_size_x; x++ ) {
806          g = 0;
807          if (fg_params->num_y_points > 0) {
808             r = rvcn_av1_film_grain_random_number(&seed, gauss_bits);
809             g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
810          }
811          filt_luma_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
812       }
813    }
814 
815    for ( y = 3; y < luma_block_size_y; y++ ) {
816       for ( x = 3; x < luma_block_size_x - 3; x++ ) {
817          s = 0;
818          pos = 0;
819          for (delta_row = -ar_coeff_lag; delta_row <= 0; delta_row++) {
820             for (delta_col = -ar_coeff_lag; delta_col <= ar_coeff_lag; delta_col++) {
821                if (delta_row == 0 && delta_col == 0)
822                   break;
823                c = fg_params->ar_coeffs_y[pos];
824                s += filt_luma_grain_block[y + delta_row][x + delta_col] * c;
825                pos++;
826             }
827          }
828          filt_luma_grain_block[y][x] =
829             AV1_CLAMP(filt_luma_grain_block[y][x]
830                       + ROUND_POWER_OF_TWO(s, fg_params->ar_coeff_shift),
831                       grain_min, grain_max);
832       }
833    }
834 
835    /* generate chroma grain block */
836    memset(filt_cb_grain_block, 0, sizeof(filt_cb_grain_block));
837    shift = 12 - bit_depth + fg_params->grain_scale_shift;
838    seed = fg_params->random_seed ^ 0xb524;
839    for (y = 0; y < chroma_block_size_y; y++) {
840       for (x = 0; x < chroma_block_size_x; x++) {
841          g = 0;
842          if (fg_params->num_cb_points || fg_params->chroma_scaling_from_luma) {
843             r = rvcn_av1_film_grain_random_number(&seed, gauss_bits);
844             g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
845          }
846          filt_cb_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
847       }
848    }
849 
850    memset(filt_cr_grain_block, 0, sizeof(filt_cr_grain_block));
851    seed = fg_params->random_seed ^ 0x49d8;
852    for (y = 0; y < chroma_block_size_y; y++) {
853       for (x = 0; x < chroma_block_size_x; x++) {
854          g = 0;
855          if (fg_params->num_cr_points || fg_params->chroma_scaling_from_luma) {
856             r = rvcn_av1_film_grain_random_number(&seed, gauss_bits);
857             g = gaussian_sequence[CLAMP(r, 0, 2048 - 1)];
858          }
859          filt_cr_grain_block[y][x] = ROUND_POWER_OF_TWO(g, shift);
860       }
861    }
862 
863    for (y = 3; y < chroma_block_size_y; y++) {
864       for (x = 3; x < chroma_block_size_x - 3; x++) {
865          s0 = 0, s1 = 0, pos = 0;
866          for (delta_row = -ar_coeff_lag; delta_row <= 0; delta_row++) {
867             for (delta_col = -ar_coeff_lag; delta_col <= ar_coeff_lag; delta_col++) {
868                c0 = fg_params->ar_coeffs_cb[pos];
869                c1 = fg_params->ar_coeffs_cr[pos];
870                if (delta_row == 0 && delta_col == 0) {
871                   if (fg_params->num_y_points > 0) {
872                      int luma = 0;
873                      int luma_x = ((x - 3) << chroma_subsamp_x) + 3;
874                      int luma_y = ((y - 3) << chroma_subsamp_y) + 3;
875                      for ( i = 0; i <= chroma_subsamp_y; i++)
876                         for ( j = 0; j <= chroma_subsamp_x; j++)
877                            luma += filt_luma_grain_block[luma_y + i][luma_x + j];
878 
879                      luma = ROUND_POWER_OF_TWO(luma, chroma_subsamp_x + chroma_subsamp_y);
880                      s0 += luma * c0;
881                      s1 += luma * c1;
882                   }
883                   break;
884                }
885                s0 += filt_cb_grain_block[y + delta_row][x + delta_col] * c0;
886                s1 += filt_cr_grain_block[y + delta_row][x + delta_col] * c1;
887                pos++;
888             }
889          }
890          filt_cb_grain_block[y][x] = AV1_CLAMP(filt_cb_grain_block[y][x] +
891                                        ROUND_POWER_OF_TWO(s0, fg_params->ar_coeff_shift),
892                                      grain_min, grain_max);
893          filt_cr_grain_block[y][x] = AV1_CLAMP(filt_cr_grain_block[y][x] +
894                                        ROUND_POWER_OF_TWO(s1, fg_params->ar_coeff_shift),
895                                      grain_min, grain_max);
896       }
897    }
898 
899    for ( i = 9; i < luma_block_size_y; i++ )
900       for ( j = 9; j < luma_block_size_x; j++ )
901          luma_grain_block_tmp[i - 9][j - 9] = filt_luma_grain_block[i][j];
902 
903    for ( i = 6; i < chroma_block_size_y; i++ )
904       for ( j = 6; j < chroma_block_size_x; j++ ) {
905          cb_grain_block_tmp[i - 6][j - 6] = filt_cb_grain_block[i][j];
906          cr_grain_block_tmp[i - 6][j - 6] = filt_cr_grain_block[i][j];
907       }
908 
909    align_ptr = &fg_buf->luma_grain_block[0][0];
910    for ( i = 0; i < 64; i++ ) {
911       for ( j = 0; j < 80; j++)
912          *align_ptr++ = luma_grain_block_tmp[i][j];
913 
914       if (((i + 1) % 4) == 0)
915          align_ptr += 64;
916    }
917 
918    align_ptr0 = &fg_buf->cb_grain_block[0][0];
919    align_ptr1 = &fg_buf->cr_grain_block[0][0];
920    for ( i = 0; i < 32; i++) {
921       for ( j = 0; j < 40; j++) {
922          *align_ptr0++ = cb_grain_block_tmp[i][j];
923          *align_ptr1++ = cr_grain_block_tmp[i][j];
924       }
925       if (((i + 1) % 8) == 0) {
926          align_ptr0 += 64;
927          align_ptr1 += 64;
928       }
929    }
930 
931    memset(fg_buf->scaling_lut_y, 0, sizeof(fg_buf->scaling_lut_y));
932    rvcn_av1_film_grain_init_scaling(fg_params->scaling_points_y, fg_params->num_y_points,
933                                     fg_buf->scaling_lut_y);
934    if (fg_params->chroma_scaling_from_luma) {
935       memcpy(fg_buf->scaling_lut_cb, fg_buf->scaling_lut_y, sizeof(fg_buf->scaling_lut_y));
936       memcpy(fg_buf->scaling_lut_cr, fg_buf->scaling_lut_y, sizeof(fg_buf->scaling_lut_y));
937    } else {
938       memset(fg_buf->scaling_lut_cb, 0, sizeof(fg_buf->scaling_lut_cb));
939       memset(fg_buf->scaling_lut_cr, 0, sizeof(fg_buf->scaling_lut_cr));
940       rvcn_av1_film_grain_init_scaling(fg_params->scaling_points_cb, fg_params->num_cb_points,
941                                        fg_buf->scaling_lut_cb);
942       rvcn_av1_film_grain_init_scaling(fg_params->scaling_points_cr, fg_params->num_cr_points,
943                                        fg_buf->scaling_lut_cr);
944    }
945 }
946 
rvcn_dec_av1_film_grain_surface(struct pipe_video_buffer ** target,struct pipe_av1_picture_desc * pic)947 static void rvcn_dec_av1_film_grain_surface(struct pipe_video_buffer **target,
948                                             struct pipe_av1_picture_desc *pic)
949 {
950    if (!pic->picture_parameter.film_grain_info.film_grain_info_fields.apply_grain ||
951        !pic->film_grain_target)
952       return;
953 
954    *target = pic->film_grain_target;
955 }
956 
get_av1_msg(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_av1_picture_desc * pic)957 static rvcn_dec_message_av1_t get_av1_msg(struct radeon_decoder *dec,
958                                           struct pipe_video_buffer *target,
959                                           struct pipe_av1_picture_desc *pic)
960 {
961    rvcn_dec_message_av1_t result;
962    unsigned i, j;
963    uint16_t tile_count = pic->picture_parameter.tile_cols * pic->picture_parameter.tile_rows;
964 
965    memset(&result, 0, sizeof(result));
966 
967    result.frame_header_flags = (pic->picture_parameter.pic_info_fields.show_frame
968                                 << RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_SHIFT) &
969                                 RDECODE_FRAME_HDR_INFO_AV1_SHOW_FRAME_MASK;
970 
971    result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.disable_cdf_update
972                                  << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_SHIFT) &
973                                  RDECODE_FRAME_HDR_INFO_AV1_DISABLE_CDF_UPDATE_MASK;
974 
975    result.frame_header_flags |= ((!pic->picture_parameter.pic_info_fields.disable_frame_end_update_cdf)
976                                  << RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_SHIFT) &
977                                  RDECODE_FRAME_HDR_INFO_AV1_REFRESH_FRAME_CONTEXT_MASK;
978 
979    result.frame_header_flags |= ((pic->picture_parameter.pic_info_fields.frame_type ==
980                                  2 /* INTRA_ONLY_FRAME */) << RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_SHIFT) &
981                                  RDECODE_FRAME_HDR_INFO_AV1_INTRA_ONLY_MASK;
982 
983    result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_intrabc
984                                  << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_SHIFT) &
985                                  RDECODE_FRAME_HDR_INFO_AV1_ALLOW_INTRABC_MASK;
986 
987    result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_high_precision_mv
988                                  << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_SHIFT) &
989                                  RDECODE_FRAME_HDR_INFO_AV1_ALLOW_HIGH_PRECISION_MV_MASK;
990 
991    result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.mono_chrome
992                                  << RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_SHIFT) &
993                                  RDECODE_FRAME_HDR_INFO_AV1_MONOCHROME_MASK;
994 
995    result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.skip_mode_present
996                                  << RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_SHIFT) &
997                                  RDECODE_FRAME_HDR_INFO_AV1_SKIP_MODE_FLAG_MASK;
998 
999    result.frame_header_flags |= (((pic->picture_parameter.qmatrix_fields.qm_y == 0xf) ? 0 : 1)
1000                                  << RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_SHIFT) &
1001                                  RDECODE_FRAME_HDR_INFO_AV1_USING_QMATRIX_MASK;
1002 
1003    result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_filter_intra
1004                                  << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_SHIFT) &
1005                                  RDECODE_FRAME_HDR_INFO_AV1_ENABLE_FILTER_INTRA_MASK;
1006 
1007    result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_intra_edge_filter
1008                                  << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_SHIFT) &
1009                                  RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTRA_EDGE_FILTER_MASK;
1010 
1011    result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_interintra_compound
1012                                  << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_SHIFT) &
1013                                  RDECODE_FRAME_HDR_INFO_AV1_ENABLE_INTERINTRA_COMPOUND_MASK;
1014 
1015    result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_masked_compound
1016                                  << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_SHIFT) &
1017                                  RDECODE_FRAME_HDR_INFO_AV1_ENABLE_MASKED_COMPOUND_MASK;
1018 
1019    result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_warped_motion
1020                                  << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_SHIFT) &
1021                                  RDECODE_FRAME_HDR_INFO_AV1_ALLOW_WARPED_MOTION_MASK;
1022 
1023    result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_dual_filter
1024                                  << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_SHIFT) &
1025                                  RDECODE_FRAME_HDR_INFO_AV1_ENABLE_DUAL_FILTER_MASK;
1026 
1027    result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_order_hint
1028                                  << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_SHIFT) &
1029                                  RDECODE_FRAME_HDR_INFO_AV1_ENABLE_ORDER_HINT_MASK;
1030 
1031    result.frame_header_flags |= (pic->picture_parameter.seq_info_fields.enable_jnt_comp
1032                                  << RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_SHIFT) &
1033                                  RDECODE_FRAME_HDR_INFO_AV1_ENABLE_JNT_COMP_MASK;
1034 
1035    result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.use_ref_frame_mvs
1036                                  << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_SHIFT) &
1037                                  RDECODE_FRAME_HDR_INFO_AV1_ALLOW_REF_FRAME_MVS_MASK;
1038 
1039    result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.allow_screen_content_tools
1040                                  << RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_SHIFT) &
1041                                  RDECODE_FRAME_HDR_INFO_AV1_ALLOW_SCREEN_CONTENT_TOOLS_MASK;
1042 
1043    result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.force_integer_mv
1044                                  << RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_SHIFT) &
1045                                  RDECODE_FRAME_HDR_INFO_AV1_CUR_FRAME_FORCE_INTEGER_MV_MASK;
1046 
1047    result.frame_header_flags |= (pic->picture_parameter.loop_filter_info_fields.mode_ref_delta_enabled
1048                                  << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_SHIFT) &
1049                                  RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_ENABLED_MASK;
1050 
1051    result.frame_header_flags |= (pic->picture_parameter.loop_filter_info_fields.mode_ref_delta_update
1052                                  << RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_SHIFT) &
1053                                  RDECODE_FRAME_HDR_INFO_AV1_MODE_REF_DELTA_UPDATE_MASK;
1054 
1055    result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.delta_q_present_flag
1056                                  << RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_SHIFT) &
1057                                  RDECODE_FRAME_HDR_INFO_AV1_DELTA_Q_PRESENT_FLAG_MASK;
1058 
1059    result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.delta_lf_present_flag
1060                                  << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_SHIFT) &
1061                                  RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_PRESENT_FLAG_MASK;
1062 
1063    result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.reduced_tx_set_used
1064                                  << RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_SHIFT) &
1065                                  RDECODE_FRAME_HDR_INFO_AV1_REDUCED_TX_SET_USED_MASK;
1066 
1067    result.frame_header_flags |= (pic->picture_parameter.seg_info.segment_info_fields.enabled
1068                                  << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_SHIFT) &
1069                                  RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_ENABLED_MASK;
1070 
1071    result.frame_header_flags |= (pic->picture_parameter.seg_info.segment_info_fields.update_map
1072                                  << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_SHIFT) &
1073                                  RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_UPDATE_MAP_MASK;
1074 
1075    result.frame_header_flags |= (pic->picture_parameter.seg_info.segment_info_fields.temporal_update
1076                                  << RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_SHIFT) &
1077                                  RDECODE_FRAME_HDR_INFO_AV1_SEGMENTATION_TEMPORAL_UPDATE_MASK;
1078 
1079    result.frame_header_flags |= (pic->picture_parameter.mode_control_fields.delta_lf_multi
1080                                  << RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_SHIFT) &
1081                                  RDECODE_FRAME_HDR_INFO_AV1_DELTA_LF_MULTI_MASK;
1082 
1083    result.frame_header_flags |= (pic->picture_parameter.pic_info_fields.is_motion_mode_switchable
1084                                  << RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_SHIFT) &
1085                                  RDECODE_FRAME_HDR_INFO_AV1_SWITCHABLE_SKIP_MODE_MASK;
1086 
1087    result.frame_header_flags |= ((!pic->picture_parameter.refresh_frame_flags)
1088                                  << RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_SHIFT) &
1089                                  RDECODE_FRAME_HDR_INFO_AV1_SKIP_REFERENCE_UPDATE_MASK;
1090 
1091    result.frame_header_flags |= ((!pic->picture_parameter.seq_info_fields.ref_frame_mvs)
1092                                  << RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_SHIFT) &
1093                                  RDECODE_FRAME_HDR_INFO_AV1_DISABLE_REF_FRAME_MVS_MASK;
1094 
1095    result.current_frame_id = pic->picture_parameter.current_frame_id;
1096    result.frame_offset = pic->picture_parameter.order_hint;
1097 
1098    result.profile = pic->picture_parameter.profile;
1099    result.is_annexb = 0;
1100    result.frame_type = pic->picture_parameter.pic_info_fields.frame_type;
1101    result.primary_ref_frame = pic->picture_parameter.primary_ref_frame;
1102 
1103    get_current_pic_index(dec, target, &result.curr_pic_idx);
1104 
1105    result.sb_size = pic->picture_parameter.seq_info_fields.use_128x128_superblock;
1106    result.interp_filter = pic->picture_parameter.interp_filter;
1107    for (i = 0; i < 2; ++i)
1108       result.filter_level[i] = pic->picture_parameter.filter_level[i];
1109    result.filter_level_u = pic->picture_parameter.filter_level_u;
1110    result.filter_level_v = pic->picture_parameter.filter_level_v;
1111    result.sharpness_level = pic->picture_parameter.loop_filter_info_fields.sharpness_level;
1112    for (i = 0; i < 8; ++i)
1113       result.ref_deltas[i] = pic->picture_parameter.ref_deltas[i];
1114    for (i = 0; i < 2; ++i)
1115       result.mode_deltas[i] = pic->picture_parameter.mode_deltas[i];
1116    result.base_qindex = pic->picture_parameter.base_qindex;
1117    result.y_dc_delta_q = pic->picture_parameter.y_dc_delta_q;
1118    result.u_dc_delta_q = pic->picture_parameter.u_dc_delta_q;
1119    result.v_dc_delta_q = pic->picture_parameter.v_dc_delta_q;
1120    result.u_ac_delta_q = pic->picture_parameter.u_ac_delta_q;
1121    result.v_ac_delta_q = pic->picture_parameter.v_ac_delta_q;
1122    result.qm_y = pic->picture_parameter.qmatrix_fields.qm_y | 0xf0;
1123    result.qm_u = pic->picture_parameter.qmatrix_fields.qm_u | 0xf0;
1124    result.qm_v = pic->picture_parameter.qmatrix_fields.qm_v | 0xf0;
1125    result.delta_q_res = 1 << pic->picture_parameter.mode_control_fields.log2_delta_q_res;
1126    result.delta_lf_res = 1 << pic->picture_parameter.mode_control_fields.log2_delta_lf_res;
1127 
1128    result.tile_cols = pic->picture_parameter.tile_cols;
1129    result.tile_rows = pic->picture_parameter.tile_rows;
1130    result.tx_mode = pic->picture_parameter.mode_control_fields.tx_mode;
1131    result.reference_mode = (pic->picture_parameter.mode_control_fields.reference_select == 1) ? 2 : 0;
1132    result.chroma_format = pic->picture_parameter.seq_info_fields.mono_chrome ? 0 : 1;
1133    result.tile_size_bytes = 0xff;
1134    result.context_update_tile_id = pic->picture_parameter.context_update_tile_id;
1135    for (i = 0; i < 65; ++i) {
1136       result.tile_col_start_sb[i] = pic->picture_parameter.tile_col_start_sb[i];
1137       result.tile_row_start_sb[i] = pic->picture_parameter.tile_row_start_sb[i];
1138    }
1139    result.max_width = pic->picture_parameter.max_width;
1140    result.max_height = pic->picture_parameter.max_height;
1141    if (pic->picture_parameter.pic_info_fields.use_superres) {
1142       result.width = (pic->picture_parameter.frame_width * 8 + pic->picture_parameter.superres_scale_denominator / 2) /
1143          pic->picture_parameter.superres_scale_denominator;
1144       result.superres_scale_denominator = pic->picture_parameter.superres_scale_denominator;
1145    } else {
1146       result.width = pic->picture_parameter.frame_width;
1147       result.superres_scale_denominator = pic->picture_parameter.superres_scale_denominator;
1148    }
1149    result.height = pic->picture_parameter.frame_height;
1150    result.superres_upscaled_width = pic->picture_parameter.frame_width;
1151    result.order_hint_bits = pic->picture_parameter.order_hint_bits_minus_1 + 1;
1152 
1153    for (i = 0; i < NUM_AV1_REFS; ++i) {
1154       result.ref_frame_map[i] =
1155          (pic->ref[i]) ? (uintptr_t)vl_video_buffer_get_associated_data(pic->ref[i], &dec->base)
1156                        : 0x7f;
1157    }
1158    for (i = 0; i < NUM_AV1_REFS_PER_FRAME; ++i)
1159        result.frame_refs[i] = result.ref_frame_map[pic->picture_parameter.ref_frame_idx[i]];
1160 
1161    result.bit_depth_luma_minus8 = result.bit_depth_chroma_minus8 = pic->picture_parameter.bit_depth_idx << 1;
1162 
1163    for (i = 0; i < 8; ++i) {
1164       for (j = 0; j < 8; ++j)
1165          result.feature_data[i][j] = pic->picture_parameter.seg_info.feature_data[i][j];
1166       result.feature_mask[i] = pic->picture_parameter.seg_info.feature_mask[i];
1167    }
1168    memcpy(dec->probs, &pic->picture_parameter.seg_info.feature_data, 128);
1169    memcpy((dec->probs + 128), &pic->picture_parameter.seg_info.feature_mask, 8);
1170 
1171    result.cdef_damping = pic->picture_parameter.cdef_damping_minus_3 + 3;
1172    result.cdef_bits = pic->picture_parameter.cdef_bits;
1173    for (i = 0; i < 8; ++i) {
1174       result.cdef_strengths[i] = pic->picture_parameter.cdef_y_strengths[i];
1175       result.cdef_uv_strengths[i] = pic->picture_parameter.cdef_uv_strengths[i];
1176    }
1177    result.frame_restoration_type[0] = pic->picture_parameter.loop_restoration_fields.yframe_restoration_type;
1178    result.frame_restoration_type[1] = pic->picture_parameter.loop_restoration_fields.cbframe_restoration_type;
1179    result.frame_restoration_type[2] = pic->picture_parameter.loop_restoration_fields.crframe_restoration_type;
1180    for (i = 0; i < 3; ++i) {
1181       int log2_num = 0;
1182       int unit_size = pic->picture_parameter.lr_unit_size[i];
1183       if (unit_size) {
1184          while (unit_size >>= 1)
1185             log2_num++;
1186          result.log2_restoration_unit_size_minus5[i] = log2_num - 5;
1187       } else {
1188          result.log2_restoration_unit_size_minus5[i] = 0;
1189       }
1190    }
1191 
1192    if (pic->picture_parameter.bit_depth_idx) {
1193       if (target->buffer_format == PIPE_FORMAT_P010 || target->buffer_format == PIPE_FORMAT_P016) {
1194          result.p010_mode = 1;
1195          result.msb_mode = 1;
1196       } else {
1197          result.luma_10to8 = 1;
1198          result.chroma_10to8 = 1;
1199       }
1200    }
1201 
1202    result.preskip_segid = 0;
1203    result.last_active_segid = 0;
1204    for (i = 0; i < 8; i++) {
1205       for (j = 0; j < 8; j++) {
1206          if (pic->picture_parameter.seg_info.feature_mask[i] & (1 << j)) {
1207             result.last_active_segid = i;
1208             if (j >= 5)
1209                result.preskip_segid = 1;
1210          }
1211       }
1212    }
1213 
1214    result.seg_lossless_flag = 0;
1215    for (i = 0; i < 8; ++i) {
1216       int av1_get_qindex, qindex;
1217       int segfeature_active = pic->picture_parameter.seg_info.feature_mask[i] & (1 << 0);
1218       if (segfeature_active) {
1219          int seg_qindex = pic->picture_parameter.base_qindex +
1220                           pic->picture_parameter.seg_info.feature_data[i][0];
1221          av1_get_qindex = seg_qindex < 0 ? 0 : (seg_qindex > 255 ? 255 : seg_qindex);
1222       } else {
1223          av1_get_qindex = pic->picture_parameter.base_qindex;
1224       }
1225       qindex = pic->picture_parameter.seg_info.segment_info_fields.enabled ?
1226                av1_get_qindex :
1227                pic->picture_parameter.base_qindex;
1228       result.seg_lossless_flag |= (((qindex == 0) && result.y_dc_delta_q == 0 &&
1229                                     result.u_dc_delta_q == 0 && result.v_dc_delta_q == 0 &&
1230                                     result.u_ac_delta_q == 0 && result.v_ac_delta_q == 0) << i);
1231    }
1232 
1233    rvcn_dec_film_grain_params_t* fg_params = &result.film_grain;
1234    fg_params->apply_grain = pic->picture_parameter.film_grain_info.film_grain_info_fields.apply_grain;
1235    if (fg_params->apply_grain) {
1236       rvcn_dec_av1_fg_init_buf_t *fg_buf = (rvcn_dec_av1_fg_init_buf_t *)(dec->probs + 256);
1237 
1238       fg_params->random_seed = pic->picture_parameter.film_grain_info.grain_seed;
1239       fg_params->grain_scale_shift =
1240          pic->picture_parameter.film_grain_info.film_grain_info_fields.grain_scale_shift;
1241       fg_params->scaling_shift =
1242          pic->picture_parameter.film_grain_info.film_grain_info_fields.grain_scaling_minus_8 + 8;
1243       fg_params->chroma_scaling_from_luma =
1244          pic->picture_parameter.film_grain_info.film_grain_info_fields.chroma_scaling_from_luma;
1245       fg_params->num_y_points = pic->picture_parameter.film_grain_info.num_y_points;
1246       fg_params->num_cb_points = pic->picture_parameter.film_grain_info.num_cb_points;
1247       fg_params->num_cr_points = pic->picture_parameter.film_grain_info.num_cr_points;
1248       fg_params->cb_mult = pic->picture_parameter.film_grain_info.cb_mult;
1249       fg_params->cb_luma_mult = pic->picture_parameter.film_grain_info.cb_luma_mult;
1250       fg_params->cb_offset = pic->picture_parameter.film_grain_info.cb_offset;
1251       fg_params->cr_mult = pic->picture_parameter.film_grain_info.cr_mult;
1252       fg_params->cr_luma_mult = pic->picture_parameter.film_grain_info.cr_luma_mult;
1253       fg_params->cr_offset = pic->picture_parameter.film_grain_info.cr_offset;
1254       fg_params->bit_depth_minus_8 = pic->picture_parameter.bit_depth_idx << 1;
1255 
1256       for (i = 0; i < fg_params->num_y_points; ++i) {
1257          fg_params->scaling_points_y[i][0] = pic->picture_parameter.film_grain_info.point_y_value[i];
1258          fg_params->scaling_points_y[i][1] = pic->picture_parameter.film_grain_info.point_y_scaling[i];
1259       }
1260       for (i = 0; i < fg_params->num_cb_points; ++i) {
1261          fg_params->scaling_points_cb[i][0] = pic->picture_parameter.film_grain_info.point_cb_value[i];
1262          fg_params->scaling_points_cb[i][1] = pic->picture_parameter.film_grain_info.point_cb_scaling[i];
1263       }
1264       for (i = 0; i < fg_params->num_cr_points; ++i) {
1265          fg_params->scaling_points_cr[i][0] = pic->picture_parameter.film_grain_info.point_cr_value[i];
1266          fg_params->scaling_points_cr[i][1] = pic->picture_parameter.film_grain_info.point_cr_scaling[i];
1267       }
1268 
1269       fg_params->ar_coeff_lag = pic->picture_parameter.film_grain_info.film_grain_info_fields.ar_coeff_lag;
1270       fg_params->ar_coeff_shift =
1271          pic->picture_parameter.film_grain_info.film_grain_info_fields.ar_coeff_shift_minus_6 + 6;
1272 
1273       for (i = 0; i < 24; ++i)
1274          fg_params->ar_coeffs_y[i] = pic->picture_parameter.film_grain_info.ar_coeffs_y[i];
1275 
1276       for (i = 0; i < 25; ++i) {
1277          fg_params->ar_coeffs_cb[i] = pic->picture_parameter.film_grain_info.ar_coeffs_cb[i];
1278          fg_params->ar_coeffs_cr[i] = pic->picture_parameter.film_grain_info.ar_coeffs_cr[i];
1279       }
1280 
1281       fg_params->overlap_flag = pic->picture_parameter.film_grain_info.film_grain_info_fields.overlap_flag;
1282       fg_params->clip_to_restricted_range =
1283          pic->picture_parameter.film_grain_info.film_grain_info_fields.clip_to_restricted_range;
1284 
1285       rvcn_av1_init_film_grain_buffer(fg_params, fg_buf);
1286    }
1287 
1288    result.uncompressed_header_size = 0;
1289    for (i = 0; i < 7; ++i) {
1290       result.global_motion[i + 1].wmtype = (rvcn_dec_transformation_type_e)pic->picture_parameter.wm[i].wmtype;
1291       for (j = 0; j < 6; ++j)
1292          result.global_motion[i + 1].wmmat[j] = pic->picture_parameter.wm[i].wmmat[j];
1293    }
1294    for (i = 0; i < tile_count && i < 256; ++i) {
1295       result.tile_info[i].offset = pic->slice_parameter.slice_data_offset[i];
1296       result.tile_info[i].size = pic->slice_parameter.slice_data_size[i];
1297    }
1298 
1299    if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
1300       dec->ref_codec.bts = pic->picture_parameter.bit_depth_idx ? CODEC_10_BITS : CODEC_8_BITS;
1301       dec->ref_codec.index = result.curr_pic_idx;
1302       dec->ref_codec.ref_size = 8;
1303       memset(dec->ref_codec.ref_list, 0x7f, sizeof(dec->ref_codec.ref_list));
1304       memcpy(dec->ref_codec.ref_list, result.ref_frame_map, sizeof(result.ref_frame_map));
1305    }
1306 
1307    return result;
1308 }
1309 
rvcn_init_mode_probs(void * prob)1310 static void rvcn_init_mode_probs(void *prob)
1311 {
1312    rvcn_av1_frame_context_t * fc = (rvcn_av1_frame_context_t*)prob;
1313    int i;
1314 
1315    memcpy(fc->palette_y_size_cdf, default_palette_y_size_cdf, sizeof(default_palette_y_size_cdf));
1316    memcpy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf, sizeof(default_palette_uv_size_cdf));
1317    memcpy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf, sizeof(default_palette_y_color_index_cdf));
1318    memcpy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf, sizeof(default_palette_uv_color_index_cdf));
1319    memcpy(fc->kf_y_cdf, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf));
1320    memcpy(fc->angle_delta_cdf, default_angle_delta_cdf, sizeof(default_angle_delta_cdf));
1321    memcpy(fc->comp_inter_cdf, default_comp_inter_cdf, sizeof(default_comp_inter_cdf));
1322    memcpy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf,sizeof(default_comp_ref_type_cdf));
1323    memcpy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf, sizeof(default_uni_comp_ref_cdf));
1324    memcpy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf, sizeof(default_palette_y_mode_cdf));
1325    memcpy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf, sizeof(default_palette_uv_mode_cdf));
1326    memcpy(fc->comp_ref_cdf, default_comp_ref_cdf, sizeof(default_comp_ref_cdf));
1327    memcpy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf, sizeof(default_comp_bwdref_cdf));
1328    memcpy(fc->single_ref_cdf, default_single_ref_cdf, sizeof(default_single_ref_cdf));
1329    memcpy(fc->txfm_partition_cdf, default_txfm_partition_cdf, sizeof(default_txfm_partition_cdf));
1330    memcpy(fc->compound_index_cdf, default_compound_idx_cdfs, sizeof(default_compound_idx_cdfs));
1331    memcpy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs, sizeof(default_comp_group_idx_cdfs));
1332    memcpy(fc->newmv_cdf, default_newmv_cdf, sizeof(default_newmv_cdf));
1333    memcpy(fc->zeromv_cdf, default_zeromv_cdf, sizeof(default_zeromv_cdf));
1334    memcpy(fc->refmv_cdf, default_refmv_cdf, sizeof(default_refmv_cdf));
1335    memcpy(fc->drl_cdf, default_drl_cdf, sizeof(default_drl_cdf));
1336    memcpy(fc->motion_mode_cdf, default_motion_mode_cdf, sizeof(default_motion_mode_cdf));
1337    memcpy(fc->obmc_cdf, default_obmc_cdf, sizeof(default_obmc_cdf));
1338    memcpy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf, sizeof(default_inter_compound_mode_cdf));
1339    memcpy(fc->compound_type_cdf, default_compound_type_cdf, sizeof(default_compound_type_cdf));
1340    memcpy(fc->wedge_idx_cdf, default_wedge_idx_cdf, sizeof(default_wedge_idx_cdf));
1341    memcpy(fc->interintra_cdf, default_interintra_cdf, sizeof(default_interintra_cdf));
1342    memcpy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf, sizeof(default_wedge_interintra_cdf));
1343    memcpy(fc->interintra_mode_cdf, default_interintra_mode_cdf, sizeof(default_interintra_mode_cdf));
1344    memcpy(fc->pred_cdf, default_segment_pred_cdf, sizeof(default_segment_pred_cdf));
1345    memcpy(fc->switchable_restore_cdf, default_switchable_restore_cdf, sizeof(default_switchable_restore_cdf));
1346    memcpy(fc->wiener_restore_cdf, default_wiener_restore_cdf, sizeof(default_wiener_restore_cdf));
1347    memcpy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf, sizeof(default_sgrproj_restore_cdf));
1348    memcpy(fc->y_mode_cdf, default_if_y_mode_cdf, sizeof(default_if_y_mode_cdf));
1349    memcpy(fc->uv_mode_cdf, default_uv_mode_cdf, sizeof(default_uv_mode_cdf));
1350    memcpy(fc->switchable_interp_cdf, default_switchable_interp_cdf, sizeof(default_switchable_interp_cdf));
1351    memcpy(fc->partition_cdf, default_partition_cdf, sizeof(default_partition_cdf));
1352    memcpy(fc->intra_ext_tx_cdf, default_intra_ext_tx_cdf, sizeof(default_intra_ext_tx_cdf));
1353    memcpy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf, sizeof(default_inter_ext_tx_cdf));
1354    memcpy(fc->skip_cdfs, default_skip_cdfs, sizeof(default_skip_cdfs));
1355    memcpy(fc->intra_inter_cdf, default_intra_inter_cdf, sizeof(default_intra_inter_cdf));
1356    memcpy(fc->tree_cdf, default_seg_tree_cdf, sizeof(default_seg_tree_cdf));
1357    for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i)
1358       memcpy(fc->spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i], sizeof(default_spatial_pred_seg_tree_cdf[i]));
1359    memcpy(fc->tx_size_cdf, default_tx_size_cdf, sizeof(default_tx_size_cdf));
1360    memcpy(fc->delta_q_cdf, default_delta_q_cdf, sizeof(default_delta_q_cdf));
1361    memcpy(fc->skip_mode_cdfs, default_skip_mode_cdfs, sizeof(default_skip_mode_cdfs));
1362    memcpy(fc->delta_lf_cdf, default_delta_lf_cdf, sizeof(default_delta_lf_cdf));
1363    memcpy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf, sizeof(default_delta_lf_multi_cdf));
1364    memcpy(fc->cfl_sign_cdf, default_cfl_sign_cdf, sizeof(default_cfl_sign_cdf));
1365    memcpy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf, sizeof(default_cfl_alpha_cdf));
1366    memcpy(fc->filter_intra_cdfs, default_filter_intra_cdfs, sizeof(default_filter_intra_cdfs));
1367    memcpy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf, sizeof(default_filter_intra_mode_cdf));
1368    memcpy(fc->intrabc_cdf, default_intrabc_cdf, sizeof(default_intrabc_cdf));
1369 }
1370 
rvcn_vcn4_init_mode_probs(void * prob)1371 static void rvcn_vcn4_init_mode_probs(void *prob)
1372 {
1373    rvcn_av1_vcn4_frame_context_t * fc = (rvcn_av1_vcn4_frame_context_t*)prob;
1374    int i;
1375 
1376    memcpy(fc->palette_y_size_cdf, default_palette_y_size_cdf, sizeof(default_palette_y_size_cdf));
1377    memcpy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf, sizeof(default_palette_uv_size_cdf));
1378    memcpy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf, sizeof(default_palette_y_color_index_cdf));
1379    memcpy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf, sizeof(default_palette_uv_color_index_cdf));
1380    memcpy(fc->kf_y_cdf, default_kf_y_mode_cdf, sizeof(default_kf_y_mode_cdf));
1381    memcpy(fc->angle_delta_cdf, default_angle_delta_cdf, sizeof(default_angle_delta_cdf));
1382    memcpy(fc->comp_inter_cdf, default_comp_inter_cdf, sizeof(default_comp_inter_cdf));
1383    memcpy(fc->comp_ref_type_cdf, default_comp_ref_type_cdf,sizeof(default_comp_ref_type_cdf));
1384    memcpy(fc->uni_comp_ref_cdf, default_uni_comp_ref_cdf, sizeof(default_uni_comp_ref_cdf));
1385    memcpy(fc->palette_y_mode_cdf, default_palette_y_mode_cdf, sizeof(default_palette_y_mode_cdf));
1386    memcpy(fc->palette_uv_mode_cdf, default_palette_uv_mode_cdf, sizeof(default_palette_uv_mode_cdf));
1387    memcpy(fc->comp_ref_cdf, default_comp_ref_cdf, sizeof(default_comp_ref_cdf));
1388    memcpy(fc->comp_bwdref_cdf, default_comp_bwdref_cdf, sizeof(default_comp_bwdref_cdf));
1389    memcpy(fc->single_ref_cdf, default_single_ref_cdf, sizeof(default_single_ref_cdf));
1390    memcpy(fc->txfm_partition_cdf, default_txfm_partition_cdf, sizeof(default_txfm_partition_cdf));
1391    memcpy(fc->compound_index_cdf, default_compound_idx_cdfs, sizeof(default_compound_idx_cdfs));
1392    memcpy(fc->comp_group_idx_cdf, default_comp_group_idx_cdfs, sizeof(default_comp_group_idx_cdfs));
1393    memcpy(fc->newmv_cdf, default_newmv_cdf, sizeof(default_newmv_cdf));
1394    memcpy(fc->zeromv_cdf, default_zeromv_cdf, sizeof(default_zeromv_cdf));
1395    memcpy(fc->refmv_cdf, default_refmv_cdf, sizeof(default_refmv_cdf));
1396    memcpy(fc->drl_cdf, default_drl_cdf, sizeof(default_drl_cdf));
1397    memcpy(fc->motion_mode_cdf, default_motion_mode_cdf, sizeof(default_motion_mode_cdf));
1398    memcpy(fc->obmc_cdf, default_obmc_cdf, sizeof(default_obmc_cdf));
1399    memcpy(fc->inter_compound_mode_cdf, default_inter_compound_mode_cdf, sizeof(default_inter_compound_mode_cdf));
1400    memcpy(fc->compound_type_cdf, default_compound_type_cdf, sizeof(default_compound_type_cdf));
1401    memcpy(fc->wedge_idx_cdf, default_wedge_idx_cdf, sizeof(default_wedge_idx_cdf));
1402    memcpy(fc->interintra_cdf, default_interintra_cdf, sizeof(default_interintra_cdf));
1403    memcpy(fc->wedge_interintra_cdf, default_wedge_interintra_cdf, sizeof(default_wedge_interintra_cdf));
1404    memcpy(fc->interintra_mode_cdf, default_interintra_mode_cdf, sizeof(default_interintra_mode_cdf));
1405    memcpy(fc->pred_cdf, default_segment_pred_cdf, sizeof(default_segment_pred_cdf));
1406    memcpy(fc->switchable_restore_cdf, default_switchable_restore_cdf, sizeof(default_switchable_restore_cdf));
1407    memcpy(fc->wiener_restore_cdf, default_wiener_restore_cdf, sizeof(default_wiener_restore_cdf));
1408    memcpy(fc->sgrproj_restore_cdf, default_sgrproj_restore_cdf, sizeof(default_sgrproj_restore_cdf));
1409    memcpy(fc->y_mode_cdf, default_if_y_mode_cdf, sizeof(default_if_y_mode_cdf));
1410    memcpy(fc->uv_mode_cdf, default_uv_mode_cdf, sizeof(default_uv_mode_cdf));
1411    memcpy(fc->switchable_interp_cdf, default_switchable_interp_cdf, sizeof(default_switchable_interp_cdf));
1412    memcpy(fc->partition_cdf, default_partition_cdf, sizeof(default_partition_cdf));
1413    memcpy(fc->intra_ext_tx_cdf, &default_intra_ext_tx_cdf[1], sizeof(default_intra_ext_tx_cdf[1]) * 2);
1414    memcpy(fc->inter_ext_tx_cdf, &default_inter_ext_tx_cdf[1], sizeof(default_inter_ext_tx_cdf[1]) * 3);
1415    memcpy(fc->skip_cdfs, default_skip_cdfs, sizeof(default_skip_cdfs));
1416    memcpy(fc->intra_inter_cdf, default_intra_inter_cdf, sizeof(default_intra_inter_cdf));
1417    memcpy(fc->tree_cdf, default_seg_tree_cdf, sizeof(default_seg_tree_cdf));
1418    for (i = 0; i < SPATIAL_PREDICTION_PROBS; ++i)
1419       memcpy(fc->spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i], sizeof(default_spatial_pred_seg_tree_cdf[i]));
1420    memcpy(fc->tx_size_cdf, default_tx_size_cdf, sizeof(default_tx_size_cdf));
1421    memcpy(fc->delta_q_cdf, default_delta_q_cdf, sizeof(default_delta_q_cdf));
1422    memcpy(fc->skip_mode_cdfs, default_skip_mode_cdfs, sizeof(default_skip_mode_cdfs));
1423    memcpy(fc->delta_lf_cdf, default_delta_lf_cdf, sizeof(default_delta_lf_cdf));
1424    memcpy(fc->delta_lf_multi_cdf, default_delta_lf_multi_cdf, sizeof(default_delta_lf_multi_cdf));
1425    memcpy(fc->cfl_sign_cdf, default_cfl_sign_cdf, sizeof(default_cfl_sign_cdf));
1426    memcpy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf, sizeof(default_cfl_alpha_cdf));
1427    memcpy(fc->filter_intra_cdfs, default_filter_intra_cdfs, sizeof(default_filter_intra_cdfs));
1428    memcpy(fc->filter_intra_mode_cdf, default_filter_intra_mode_cdf, sizeof(default_filter_intra_mode_cdf));
1429    memcpy(fc->intrabc_cdf, default_intrabc_cdf, sizeof(default_intrabc_cdf));
1430 }
1431 
rvcn_av1_init_mv_probs(void * prob)1432 static void rvcn_av1_init_mv_probs(void *prob)
1433 {
1434    rvcn_av1_frame_context_t * fc = (rvcn_av1_frame_context_t*)prob;
1435 
1436    memcpy(fc->nmvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1437    memcpy(fc->nmvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1438    memcpy(fc->nmvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, sizeof(default_nmv_context.comps[0].class0_cdf));
1439    memcpy(fc->nmvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1440    memcpy(fc->nmvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1441    memcpy(fc->nmvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, sizeof(default_nmv_context.comps[0].classes_cdf));
1442    memcpy(fc->nmvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1443    memcpy(fc->nmvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1444    memcpy(fc->nmvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1445    memcpy(fc->nmvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1446    memcpy(fc->nmvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, sizeof(default_nmv_context.comps[1].class0_cdf));
1447    memcpy(fc->nmvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1448    memcpy(fc->nmvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1449    memcpy(fc->nmvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, sizeof(default_nmv_context.comps[1].classes_cdf));
1450    memcpy(fc->nmvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1451    memcpy(fc->nmvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1452    memcpy(fc->nmvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1453    memcpy(fc->ndvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1454    memcpy(fc->ndvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1455    memcpy(fc->ndvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, sizeof(default_nmv_context.comps[0].class0_cdf));
1456    memcpy(fc->ndvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1457    memcpy(fc->ndvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1458    memcpy(fc->ndvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, sizeof(default_nmv_context.comps[0].classes_cdf));
1459    memcpy(fc->ndvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1460    memcpy(fc->ndvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1461    memcpy(fc->ndvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1462    memcpy(fc->ndvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1463    memcpy(fc->ndvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, sizeof(default_nmv_context.comps[1].class0_cdf));
1464    memcpy(fc->ndvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1465    memcpy(fc->ndvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1466    memcpy(fc->ndvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, sizeof(default_nmv_context.comps[1].classes_cdf));
1467    memcpy(fc->ndvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1468    memcpy(fc->ndvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1469    memcpy(fc->ndvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1470 }
1471 
rvcn_vcn4_av1_init_mv_probs(void * prob)1472 static void rvcn_vcn4_av1_init_mv_probs(void *prob)
1473 {
1474    rvcn_av1_vcn4_frame_context_t * fc = (rvcn_av1_vcn4_frame_context_t*)prob;
1475 
1476    memcpy(fc->nmvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1477    memcpy(fc->nmvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1478    memcpy(fc->nmvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, sizeof(default_nmv_context.comps[0].class0_cdf));
1479    memcpy(fc->nmvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1480    memcpy(fc->nmvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1481    memcpy(fc->nmvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, sizeof(default_nmv_context.comps[0].classes_cdf));
1482    memcpy(fc->nmvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1483    memcpy(fc->nmvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1484    memcpy(fc->nmvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1485    memcpy(fc->nmvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1486    memcpy(fc->nmvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, sizeof(default_nmv_context.comps[1].class0_cdf));
1487    memcpy(fc->nmvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1488    memcpy(fc->nmvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1489    memcpy(fc->nmvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, sizeof(default_nmv_context.comps[1].classes_cdf));
1490    memcpy(fc->nmvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1491    memcpy(fc->nmvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1492    memcpy(fc->nmvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1493    memcpy(fc->ndvc_joints_cdf, default_nmv_context.joints_cdf, sizeof(default_nmv_context.joints_cdf));
1494    memcpy(fc->ndvc_0_bits_cdf, default_nmv_context.comps[0].bits_cdf, sizeof(default_nmv_context.comps[0].bits_cdf));
1495    memcpy(fc->ndvc_0_class0_cdf, default_nmv_context.comps[0].class0_cdf, sizeof(default_nmv_context.comps[0].class0_cdf));
1496    memcpy(fc->ndvc_0_class0_fp_cdf, default_nmv_context.comps[0].class0_fp_cdf, sizeof(default_nmv_context.comps[0].class0_fp_cdf));
1497    memcpy(fc->ndvc_0_class0_hp_cdf, default_nmv_context.comps[0].class0_hp_cdf, sizeof(default_nmv_context.comps[0].class0_hp_cdf));
1498    memcpy(fc->ndvc_0_classes_cdf, default_nmv_context.comps[0].classes_cdf, sizeof(default_nmv_context.comps[0].classes_cdf));
1499    memcpy(fc->ndvc_0_fp_cdf, default_nmv_context.comps[0].fp_cdf, sizeof(default_nmv_context.comps[0].fp_cdf));
1500    memcpy(fc->ndvc_0_hp_cdf, default_nmv_context.comps[0].hp_cdf, sizeof(default_nmv_context.comps[0].hp_cdf));
1501    memcpy(fc->ndvc_0_sign_cdf, default_nmv_context.comps[0].sign_cdf, sizeof(default_nmv_context.comps[0].sign_cdf));
1502    memcpy(fc->ndvc_1_bits_cdf, default_nmv_context.comps[1].bits_cdf, sizeof(default_nmv_context.comps[1].bits_cdf));
1503    memcpy(fc->ndvc_1_class0_cdf, default_nmv_context.comps[1].class0_cdf, sizeof(default_nmv_context.comps[1].class0_cdf));
1504    memcpy(fc->ndvc_1_class0_fp_cdf, default_nmv_context.comps[1].class0_fp_cdf, sizeof(default_nmv_context.comps[1].class0_fp_cdf));
1505    memcpy(fc->ndvc_1_class0_hp_cdf, default_nmv_context.comps[1].class0_hp_cdf, sizeof(default_nmv_context.comps[1].class0_hp_cdf));
1506    memcpy(fc->ndvc_1_classes_cdf, default_nmv_context.comps[1].classes_cdf, sizeof(default_nmv_context.comps[1].classes_cdf));
1507    memcpy(fc->ndvc_1_fp_cdf, default_nmv_context.comps[1].fp_cdf, sizeof(default_nmv_context.comps[1].fp_cdf));
1508    memcpy(fc->ndvc_1_hp_cdf, default_nmv_context.comps[1].hp_cdf, sizeof(default_nmv_context.comps[1].hp_cdf));
1509    memcpy(fc->ndvc_1_sign_cdf, default_nmv_context.comps[1].sign_cdf, sizeof(default_nmv_context.comps[1].sign_cdf));
1510 }
1511 
rvcn_av1_default_coef_probs(void * prob,int index)1512 static void rvcn_av1_default_coef_probs(void *prob, int index)
1513 {
1514    rvcn_av1_frame_context_t * fc = (rvcn_av1_frame_context_t*)prob;
1515 
1516    memcpy(fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index], sizeof(av1_default_txb_skip_cdfs[index]));
1517    memcpy(fc->eob_extra_cdf, av1_default_eob_extra_cdfs[index], sizeof(av1_default_eob_extra_cdfs[index]));
1518    memcpy(fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index], sizeof(av1_default_dc_sign_cdfs[index]));
1519    memcpy(fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index], sizeof(av1_default_coeff_lps_multi_cdfs[index]));
1520    memcpy(fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index], sizeof(av1_default_coeff_base_multi_cdfs[index]));
1521    memcpy(fc->coeff_base_eob_cdf, av1_default_coeff_base_eob_multi_cdfs[index], sizeof(av1_default_coeff_base_eob_multi_cdfs[index]));
1522    memcpy(fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index], sizeof(av1_default_eob_multi16_cdfs[index]));
1523    memcpy(fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index], sizeof(av1_default_eob_multi32_cdfs[index]));
1524    memcpy(fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index], sizeof(av1_default_eob_multi64_cdfs[index]));
1525    memcpy(fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index], sizeof(av1_default_eob_multi128_cdfs[index]));
1526    memcpy(fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index], sizeof(av1_default_eob_multi256_cdfs[index]));
1527    memcpy(fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index], sizeof(av1_default_eob_multi512_cdfs[index]));
1528    memcpy(fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index], sizeof(av1_default_eob_multi1024_cdfs[index]));
1529 }
1530 
rvcn_vcn4_av1_default_coef_probs(void * prob,int index)1531 static void rvcn_vcn4_av1_default_coef_probs(void *prob, int index)
1532 {
1533    rvcn_av1_vcn4_frame_context_t *fc = (rvcn_av1_vcn4_frame_context_t*)prob;
1534    void *p;
1535    int i, j;
1536    unsigned size;
1537 
1538    memcpy(fc->txb_skip_cdf, av1_default_txb_skip_cdfs[index], sizeof(av1_default_txb_skip_cdfs[index]));
1539 
1540    p = (void *)fc->eob_extra_cdf;
1541    size = sizeof(av1_default_eob_extra_cdfs[0][0][0][0]) * EOB_COEF_CONTEXTS_VCN4;
1542    for (i = 0; i < AV1_TX_SIZES; i++) {
1543       for ( j = 0; j < AV1_PLANE_TYPES; j++) {
1544          memcpy(p, &av1_default_eob_extra_cdfs[index][i][j][3], size);
1545          p += size;
1546       }
1547    }
1548 
1549    memcpy(fc->dc_sign_cdf, av1_default_dc_sign_cdfs[index], sizeof(av1_default_dc_sign_cdfs[index]));
1550    memcpy(fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index], sizeof(av1_default_coeff_lps_multi_cdfs[index]));
1551    memcpy(fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index], sizeof(av1_default_coeff_base_multi_cdfs[index]));
1552    memcpy(fc->coeff_base_eob_cdf, av1_default_coeff_base_eob_multi_cdfs[index], sizeof(av1_default_coeff_base_eob_multi_cdfs[index]));
1553    memcpy(fc->eob_flag_cdf16, av1_default_eob_multi16_cdfs[index], sizeof(av1_default_eob_multi16_cdfs[index]));
1554    memcpy(fc->eob_flag_cdf32, av1_default_eob_multi32_cdfs[index], sizeof(av1_default_eob_multi32_cdfs[index]));
1555    memcpy(fc->eob_flag_cdf64, av1_default_eob_multi64_cdfs[index], sizeof(av1_default_eob_multi64_cdfs[index]));
1556    memcpy(fc->eob_flag_cdf128, av1_default_eob_multi128_cdfs[index], sizeof(av1_default_eob_multi128_cdfs[index]));
1557    memcpy(fc->eob_flag_cdf256, av1_default_eob_multi256_cdfs[index], sizeof(av1_default_eob_multi256_cdfs[index]));
1558    memcpy(fc->eob_flag_cdf512, av1_default_eob_multi512_cdfs[index], sizeof(av1_default_eob_multi512_cdfs[index]));
1559    memcpy(fc->eob_flag_cdf1024, av1_default_eob_multi1024_cdfs[index], sizeof(av1_default_eob_multi1024_cdfs[index]));
1560 }
1561 
calc_ctx_size_h265_main(struct radeon_decoder * dec)1562 static unsigned calc_ctx_size_h265_main(struct radeon_decoder *dec)
1563 {
1564    unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
1565    unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
1566 
1567    unsigned max_references = dec->base.max_references + 1;
1568 
1569    if (dec->base.width * dec->base.height >= 4096 * 2000)
1570       max_references = MAX2(max_references, 8);
1571    else
1572       max_references = MAX2(max_references, 17);
1573 
1574    width = align(width, 16);
1575    height = align(height, 16);
1576    return ((width + 255) / 16) * ((height + 255) / 16) * 16 * max_references + 52 * 1024;
1577 }
1578 
calc_ctx_size_h265_main10(struct radeon_decoder * dec,struct pipe_h265_picture_desc * pic)1579 static unsigned calc_ctx_size_h265_main10(struct radeon_decoder *dec,
1580                                           struct pipe_h265_picture_desc *pic)
1581 {
1582    unsigned log2_ctb_size, width_in_ctb, height_in_ctb, num_16x16_block_per_ctb;
1583    unsigned context_buffer_size_per_ctb_row, cm_buffer_size, max_mb_address, db_left_tile_pxl_size;
1584    unsigned db_left_tile_ctx_size = 4096 / 16 * (32 + 16 * 4);
1585 
1586    unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
1587    unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
1588    unsigned coeff_10bit =
1589       (pic->pps->sps->bit_depth_luma_minus8 || pic->pps->sps->bit_depth_chroma_minus8) ? 2 : 1;
1590 
1591    unsigned max_references = dec->base.max_references + 1;
1592 
1593    if (dec->base.width * dec->base.height >= 4096 * 2000)
1594       max_references = MAX2(max_references, 8);
1595    else
1596       max_references = MAX2(max_references, 17);
1597 
1598    log2_ctb_size = pic->pps->sps->log2_min_luma_coding_block_size_minus3 + 3 +
1599                    pic->pps->sps->log2_diff_max_min_luma_coding_block_size;
1600 
1601    width_in_ctb = (width + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
1602    height_in_ctb = (height + ((1 << log2_ctb_size) - 1)) >> log2_ctb_size;
1603 
1604    num_16x16_block_per_ctb = ((1 << log2_ctb_size) >> 4) * ((1 << log2_ctb_size) >> 4);
1605    context_buffer_size_per_ctb_row = align(width_in_ctb * num_16x16_block_per_ctb * 16, 256);
1606    max_mb_address = (unsigned)ceil(height * 8 / 2048.0);
1607 
1608    cm_buffer_size = max_references * context_buffer_size_per_ctb_row * height_in_ctb;
1609    db_left_tile_pxl_size = coeff_10bit * (max_mb_address * 2 * 2048 + 1024);
1610 
1611    return cm_buffer_size + db_left_tile_ctx_size + db_left_tile_pxl_size;
1612 }
1613 
get_vc1_msg(struct pipe_vc1_picture_desc * pic)1614 static rvcn_dec_message_vc1_t get_vc1_msg(struct pipe_vc1_picture_desc *pic)
1615 {
1616    rvcn_dec_message_vc1_t result;
1617 
1618    memset(&result, 0, sizeof(result));
1619    switch (pic->base.profile) {
1620    case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
1621       result.profile = RDECODE_VC1_PROFILE_SIMPLE;
1622       result.level = 1;
1623       break;
1624 
1625    case PIPE_VIDEO_PROFILE_VC1_MAIN:
1626       result.profile = RDECODE_VC1_PROFILE_MAIN;
1627       result.level = 2;
1628       break;
1629 
1630    case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
1631       result.profile = RDECODE_VC1_PROFILE_ADVANCED;
1632       result.level = 4;
1633       break;
1634 
1635    default:
1636       assert(0);
1637    }
1638 
1639    result.sps_info_flags |= pic->postprocflag << 7;
1640    result.sps_info_flags |= pic->pulldown << 6;
1641    result.sps_info_flags |= pic->interlace << 5;
1642    result.sps_info_flags |= pic->tfcntrflag << 4;
1643    result.sps_info_flags |= pic->finterpflag << 3;
1644    result.sps_info_flags |= pic->psf << 1;
1645 
1646    result.pps_info_flags |= pic->range_mapy_flag << 31;
1647    result.pps_info_flags |= pic->range_mapy << 28;
1648    result.pps_info_flags |= pic->range_mapuv_flag << 27;
1649    result.pps_info_flags |= pic->range_mapuv << 24;
1650    result.pps_info_flags |= pic->multires << 21;
1651    result.pps_info_flags |= pic->maxbframes << 16;
1652    result.pps_info_flags |= pic->overlap << 11;
1653    result.pps_info_flags |= pic->quantizer << 9;
1654    result.pps_info_flags |= pic->panscan_flag << 7;
1655    result.pps_info_flags |= pic->refdist_flag << 6;
1656    result.pps_info_flags |= pic->vstransform << 0;
1657 
1658    if (pic->base.profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE) {
1659       result.pps_info_flags |= pic->syncmarker << 20;
1660       result.pps_info_flags |= pic->rangered << 19;
1661       result.pps_info_flags |= pic->loopfilter << 5;
1662       result.pps_info_flags |= pic->fastuvmc << 4;
1663       result.pps_info_flags |= pic->extended_mv << 3;
1664       result.pps_info_flags |= pic->extended_dmv << 8;
1665       result.pps_info_flags |= pic->dquant << 1;
1666    }
1667 
1668    result.chroma_format = 1;
1669 
1670    return result;
1671 }
1672 
get_ref_pic_idx(struct radeon_decoder * dec,struct pipe_video_buffer * ref)1673 static uint32_t get_ref_pic_idx(struct radeon_decoder *dec, struct pipe_video_buffer *ref)
1674 {
1675    uint32_t min = MAX2(dec->frame_number, NUM_MPEG2_REFS) - NUM_MPEG2_REFS;
1676    uint32_t max = MAX2(dec->frame_number, 1) - 1;
1677    uintptr_t frame;
1678 
1679    /* seems to be the most sane fallback */
1680    if (!ref)
1681       return max;
1682 
1683    /* get the frame number from the associated data */
1684    frame = (uintptr_t)vl_video_buffer_get_associated_data(ref, &dec->base);
1685 
1686    /* limit the frame number to a valid range */
1687    return MAX2(MIN2(frame, max), min);
1688 }
1689 
get_mpeg2_msg(struct radeon_decoder * dec,struct pipe_mpeg12_picture_desc * pic)1690 static rvcn_dec_message_mpeg2_vld_t get_mpeg2_msg(struct radeon_decoder *dec,
1691                                                   struct pipe_mpeg12_picture_desc *pic)
1692 {
1693    const int *zscan = pic->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
1694    rvcn_dec_message_mpeg2_vld_t result;
1695    unsigned i;
1696 
1697    memset(&result, 0, sizeof(result));
1698    result.decoded_pic_idx = dec->frame_number;
1699 
1700    result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]);
1701    result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]);
1702 
1703    if (pic->intra_matrix) {
1704       result.load_intra_quantiser_matrix = 1;
1705       for (i = 0; i < 64; ++i) {
1706          result.intra_quantiser_matrix[i] = pic->intra_matrix[zscan[i]];
1707       }
1708    }
1709    if (pic->non_intra_matrix) {
1710       result.load_nonintra_quantiser_matrix = 1;
1711       for (i = 0; i < 64; ++i) {
1712          result.nonintra_quantiser_matrix[i] = pic->non_intra_matrix[zscan[i]];
1713       }
1714    }
1715 
1716    result.profile_and_level_indication = 0;
1717    result.chroma_format = 0x1;
1718 
1719    result.picture_coding_type = pic->picture_coding_type;
1720    result.f_code[0][0] = pic->f_code[0][0] + 1;
1721    result.f_code[0][1] = pic->f_code[0][1] + 1;
1722    result.f_code[1][0] = pic->f_code[1][0] + 1;
1723    result.f_code[1][1] = pic->f_code[1][1] + 1;
1724    result.intra_dc_precision = pic->intra_dc_precision;
1725    result.pic_structure = pic->picture_structure;
1726    result.top_field_first = pic->top_field_first;
1727    result.frame_pred_frame_dct = pic->frame_pred_frame_dct;
1728    result.concealment_motion_vectors = pic->concealment_motion_vectors;
1729    result.q_scale_type = pic->q_scale_type;
1730    result.intra_vlc_format = pic->intra_vlc_format;
1731    result.alternate_scan = pic->alternate_scan;
1732 
1733    return result;
1734 }
1735 
get_mpeg4_msg(struct radeon_decoder * dec,struct pipe_mpeg4_picture_desc * pic)1736 static rvcn_dec_message_mpeg4_asp_vld_t get_mpeg4_msg(struct radeon_decoder *dec,
1737                                                       struct pipe_mpeg4_picture_desc *pic)
1738 {
1739    rvcn_dec_message_mpeg4_asp_vld_t result;
1740    unsigned i;
1741 
1742    memset(&result, 0, sizeof(result));
1743    result.decoded_pic_idx = dec->frame_number;
1744 
1745    result.forward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[0]);
1746    result.backward_ref_pic_idx = get_ref_pic_idx(dec, pic->ref[1]);
1747 
1748    result.variant_type = 0;
1749    result.profile_and_level_indication = 0xF0;
1750 
1751    result.video_object_layer_verid = 0x5;
1752    result.video_object_layer_shape = 0x0;
1753 
1754    result.video_object_layer_width = dec->base.width;
1755    result.video_object_layer_height = dec->base.height;
1756 
1757    result.vop_time_increment_resolution = pic->vop_time_increment_resolution;
1758 
1759    result.short_video_header = pic->short_video_header;
1760    result.interlaced = pic->interlaced;
1761    result.load_intra_quant_mat = 1;
1762    result.load_nonintra_quant_mat = 1;
1763    result.quarter_sample = pic->quarter_sample;
1764    result.complexity_estimation_disable = 1;
1765    result.resync_marker_disable = pic->resync_marker_disable;
1766    result.newpred_enable = 0;
1767    result.reduced_resolution_vop_enable = 0;
1768 
1769    result.quant_type = pic->quant_type;
1770 
1771    for (i = 0; i < 64; ++i) {
1772       result.intra_quant_mat[i] = pic->intra_matrix[vl_zscan_normal[i]];
1773       result.nonintra_quant_mat[i] = pic->non_intra_matrix[vl_zscan_normal[i]];
1774    }
1775 
1776    return result;
1777 }
1778 
rvcn_dec_message_create(struct radeon_decoder * dec)1779 static void rvcn_dec_message_create(struct radeon_decoder *dec)
1780 {
1781    rvcn_dec_message_header_t *header = dec->msg;
1782    rvcn_dec_message_create_t *create = dec->msg + sizeof(rvcn_dec_message_header_t);
1783    unsigned sizes = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
1784 
1785    memset(dec->msg, 0, sizes);
1786    header->header_size = sizeof(rvcn_dec_message_header_t);
1787    header->total_size = sizes;
1788    header->num_buffers = 1;
1789    header->msg_type = RDECODE_MSG_CREATE;
1790    header->stream_handle = dec->stream_handle;
1791    header->status_report_feedback_number = 0;
1792 
1793    header->index[0].message_id = RDECODE_MESSAGE_CREATE;
1794    header->index[0].offset = sizeof(rvcn_dec_message_header_t);
1795    header->index[0].size = sizeof(rvcn_dec_message_create_t);
1796    header->index[0].filled = 0;
1797 
1798    create->stream_type = dec->stream_type;
1799    create->session_flags = 0;
1800    create->width_in_samples = dec->base.width;
1801    create->height_in_samples = dec->base.height;
1802 }
1803 
rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder * dec,rvcn_dec_message_decode_t * decode,rvcn_dec_message_dynamic_dpb_t2_t * dynamic_dpb_t2,bool encrypted)1804 static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn_dec_message_decode_t *decode,
1805       rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2, bool encrypted)
1806 {
1807    struct rvcn_dec_dynamic_dpb_t2 *dpb = NULL, *dummy = NULL;
1808    unsigned width, height, size;
1809    uint64_t addr;
1810    int i;
1811 
1812    width = align(decode->width_in_samples, dec->db_alignment);
1813    height = align(decode->height_in_samples, dec->db_alignment);
1814    size = align((width * height * 3) / 2, 256);
1815    if (dec->ref_codec.bts == CODEC_10_BITS)
1816       size = size * 3 / 2;
1817 
1818    list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
1819       for (i = 0; i < dec->ref_codec.ref_size; ++i) {
1820          if (((dec->ref_codec.ref_list[i] & 0x7f) != 0x7f) && (d->index == (dec->ref_codec.ref_list[i] & 0x7f))) {
1821             if (!dummy)
1822                dummy = d;
1823 
1824             addr = dec->ws->buffer_get_virtual_address(d->dpb.res->buf);
1825             if (!addr && dummy) {
1826                RVID_ERR("Ref list from application is incorrect, using dummy buffer instead.\n");
1827                addr = dec->ws->buffer_get_virtual_address(dummy->dpb.res->buf);
1828             }
1829             dynamic_dpb_t2->dpbAddrLo[i] = addr;
1830             dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
1831             ++dynamic_dpb_t2->dpbArraySize;
1832             break;
1833          }
1834       }
1835       if (i == dec->ref_codec.ref_size) {
1836          if (d->dpb.res->b.b.width0 * d->dpb.res->b.b.height0 != size) {
1837             list_del(&d->list);
1838             list_addtail(&d->list, &dec->dpb_unref_list);
1839          } else {
1840             d->index = 0x7f;
1841          }
1842       }
1843    }
1844 
1845    list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
1846       if (d->dpb.res->b.b.width0 * d->dpb.res->b.b.height0 == size && d->index == dec->ref_codec.index) {
1847          dpb = d;
1848          break;
1849       }
1850    }
1851 
1852    if (!dpb) {
1853       list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
1854          if (d->index == 0x7f) {
1855             d->index = dec->ref_codec.index;
1856             dpb = d;
1857             break;
1858          }
1859       }
1860    }
1861 
1862    list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_unref_list, list) {
1863       if (dec->prev_fence)
1864          dec->ws->fence_wait(dec->ws, dec->prev_fence, PIPE_DEFAULT_DECODER_FEEDBACK_TIMEOUT_NS);
1865       list_del(&d->list);
1866       si_vid_destroy_buffer(&d->dpb);
1867       FREE(d);
1868    }
1869 
1870    if (!dpb) {
1871       bool r;
1872       dpb = CALLOC_STRUCT(rvcn_dec_dynamic_dpb_t2);
1873       if (!dpb)
1874          return 1;
1875       dpb->index = dec->ref_codec.index;
1876       if (encrypted)
1877          r = si_vid_create_tmz_buffer(dec->screen, &dpb->dpb, size, PIPE_USAGE_DEFAULT);
1878       else
1879          r = si_vid_create_buffer(dec->screen, &dpb->dpb, size, PIPE_USAGE_DEFAULT);
1880       assert(encrypted == (bool)(dpb->dpb.res->flags & RADEON_FLAG_ENCRYPTED));
1881 
1882       if (!r) {
1883          RVID_ERR("Can't allocated dpb buffer.\n");
1884          FREE(dpb);
1885          return 1;
1886       }
1887       list_addtail(&dpb->list, &dec->dpb_ref_list);
1888    }
1889 
1890    dec->ws->cs_add_buffer(&dec->cs, dpb->dpb.res->buf,
1891       RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM);
1892    addr = dec->ws->buffer_get_virtual_address(dpb->dpb.res->buf);
1893    dynamic_dpb_t2->dpbCurrLo = addr;
1894    dynamic_dpb_t2->dpbCurrHi = addr >> 32;
1895 
1896    decode->decode_flags = 1;
1897    dynamic_dpb_t2->dpbConfigFlags = 0;
1898    dynamic_dpb_t2->dpbLumaPitch = align(decode->width_in_samples, dec->db_alignment);
1899    dynamic_dpb_t2->dpbLumaAlignedHeight = align(decode->height_in_samples, dec->db_alignment);
1900    dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaPitch *
1901       dynamic_dpb_t2->dpbLumaAlignedHeight;
1902    dynamic_dpb_t2->dpbChromaPitch = dynamic_dpb_t2->dpbLumaPitch >> 1;
1903    dynamic_dpb_t2->dpbChromaAlignedHeight = dynamic_dpb_t2->dpbLumaAlignedHeight >> 1;
1904    dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaPitch *
1905       dynamic_dpb_t2->dpbChromaAlignedHeight * 2;
1906 
1907    if (dec->ref_codec.bts == CODEC_10_BITS) {
1908       dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaAlignedSize * 3 / 2;
1909       dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaAlignedSize * 3 / 2;
1910    }
1911 
1912    return 0;
1913 }
1914 
rvcn_dec_message_decode(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)1915 static struct pb_buffer_lean *rvcn_dec_message_decode(struct radeon_decoder *dec,
1916                                                       struct pipe_video_buffer *target,
1917                                                       struct pipe_picture_desc *picture)
1918 {
1919    DECRYPT_PARAMETERS *decrypt = (DECRYPT_PARAMETERS *)picture->decrypt_key;
1920    bool encrypted = picture->protected_playback;
1921    struct si_texture *luma;
1922    struct si_texture *chroma;
1923    struct pipe_video_buffer *out_surf = target;
1924    ASSERTED struct si_screen *sscreen = (struct si_screen *)dec->screen;
1925    rvcn_dec_message_header_t *header;
1926    rvcn_dec_message_index_t *index_codec;
1927    rvcn_dec_message_index_t *index_drm = NULL;
1928    rvcn_dec_message_index_t *index_dynamic_dpb = NULL;
1929    rvcn_dec_message_index_t *index_hevc_direct_reflist = NULL;
1930    rvcn_dec_message_decode_t *decode;
1931    unsigned sizes = 0, offset_decode, offset_codec;
1932    unsigned offset_drm = 0, offset_dynamic_dpb = 0, offset_hevc_direct_reflist = 0;
1933    void *codec;
1934    rvcn_dec_message_drm_t *drm = NULL;
1935    rvcn_dec_message_dynamic_dpb_t *dynamic_dpb = NULL;
1936    rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
1937    rvcn_dec_message_hevc_direct_ref_list_t *hevc_reflist = NULL;
1938    bool dpb_resize = false;
1939    header = dec->msg;
1940    sizes += sizeof(rvcn_dec_message_header_t);
1941 
1942    index_codec = (void*)header + sizes;
1943    sizes += sizeof(rvcn_dec_message_index_t);
1944 
1945    if (encrypted) {
1946       index_drm = (void*)header + sizes;
1947       sizes += sizeof(rvcn_dec_message_index_t);
1948    }
1949 
1950    if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
1951       index_dynamic_dpb = (void*)header + sizes;
1952       sizes += sizeof(rvcn_dec_message_index_t);
1953    }
1954 
1955    if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
1956       index_hevc_direct_reflist = (void*)header + sizes;
1957       sizes += sizeof(rvcn_dec_message_index_t);
1958    }
1959 
1960    offset_decode = sizes;
1961    decode = (void*)header + sizes;
1962    sizes += sizeof(rvcn_dec_message_decode_t);
1963 
1964    if (encrypted) {
1965       offset_drm = sizes;
1966       drm = (void*)header + sizes;
1967       sizes += sizeof(rvcn_dec_message_drm_t);
1968    }
1969 
1970    if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
1971       offset_dynamic_dpb = sizes;
1972       if (dec->dpb_type == DPB_DYNAMIC_TIER_1) {
1973          dynamic_dpb = (void*)header + sizes;
1974          sizes += sizeof(rvcn_dec_message_dynamic_dpb_t);
1975       }
1976       else if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
1977          dynamic_dpb_t2 = (void*)header + sizes;
1978          sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
1979       }
1980    }
1981 
1982    if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
1983       offset_hevc_direct_reflist = sizes;
1984       hevc_reflist = (void*)header + sizes;
1985       sizes += align((4 + 2 * 15 * ((struct pipe_h265_picture_desc *)picture)->slice_parameter.slice_count), 4);
1986    }
1987 
1988    offset_codec = sizes;
1989    codec = (void*)header + sizes;
1990 
1991    memset(dec->msg, 0, sizes);
1992    header->header_size = sizeof(rvcn_dec_message_header_t);
1993    header->total_size = sizes;
1994    header->msg_type = RDECODE_MSG_DECODE;
1995    header->stream_handle = dec->stream_handle;
1996    header->status_report_feedback_number = dec->frame_number;
1997 
1998    header->index[0].message_id = RDECODE_MESSAGE_DECODE;
1999    header->index[0].offset = offset_decode;
2000    header->index[0].size = sizeof(rvcn_dec_message_decode_t);
2001    header->index[0].filled = 0;
2002    header->num_buffers = 1;
2003 
2004    index_codec->offset = offset_codec;
2005    index_codec->size = sizeof(rvcn_dec_message_avc_t);
2006    index_codec->filled = 0;
2007    ++header->num_buffers;
2008 
2009    if (encrypted) {
2010       index_drm->message_id = RDECODE_MESSAGE_DRM;
2011       index_drm->offset = offset_drm;
2012       index_drm->size = sizeof(rvcn_dec_message_drm_t);
2013       index_drm->filled = 0;
2014       ++header->num_buffers;
2015    }
2016 
2017    if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
2018       index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
2019       index_dynamic_dpb->offset = offset_dynamic_dpb;
2020       index_dynamic_dpb->filled = 0;
2021       ++header->num_buffers;
2022       if (dec->dpb_type == DPB_DYNAMIC_TIER_1)
2023          index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t);
2024       else if (dec->dpb_type == DPB_DYNAMIC_TIER_2)
2025          index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
2026    }
2027 
2028    if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
2029       index_hevc_direct_reflist->message_id = RDECODE_MESSAGE_HEVC_DIRECT_REF_LIST;
2030       index_hevc_direct_reflist->offset = offset_hevc_direct_reflist;
2031       index_hevc_direct_reflist->size = align((4 + 2 * 15 * ((struct pipe_h265_picture_desc *)picture)->slice_parameter.slice_count), 4);
2032       index_hevc_direct_reflist->filled = 0;
2033       ++header->num_buffers;
2034    }
2035 
2036    decode->stream_type = dec->stream_type;
2037    decode->decode_flags = 0;
2038    decode->width_in_samples = dec->base.width;
2039    decode->height_in_samples = dec->base.height;
2040 
2041    decode->bsd_size = align(dec->bs_size, 128);
2042 
2043    if (dec->dpb_type != DPB_DYNAMIC_TIER_2) {
2044       bool r;
2045       if (!dec->dpb.res && dec->dpb_size) {
2046          if (encrypted) {
2047             r = si_vid_create_tmz_buffer(dec->screen, &dec->dpb, dec->dpb_size, PIPE_USAGE_DEFAULT);
2048          } else {
2049             r = si_vid_create_buffer(dec->screen, &dec->dpb, dec->dpb_size, PIPE_USAGE_DEFAULT);
2050          }
2051          assert(encrypted == (bool)(dec->dpb.res->flags & RADEON_FLAG_ENCRYPTED));
2052          if (!r) {
2053             RVID_ERR("Can't allocated dpb.\n");
2054             return NULL;
2055          }
2056          si_vid_clear_buffer(dec->base.context, &dec->dpb);
2057       } else if (dec->dpb_type == DPB_DYNAMIC_TIER_1 && dec->dpb.res &&
2058                  (dec->max_width < dec->base.width || dec->max_height < dec->base.height)) {
2059          struct rvid_buf_offset_info buf_offset_info;
2060 
2061          buf_offset_info.num_units = (NUM_VP9_REFS + 1);
2062          buf_offset_info.old_offset = (align(dec->max_width, dec->db_alignment) *
2063             align(dec->max_height, dec->db_alignment) * 3 / 2);
2064          buf_offset_info.new_offset = (align(dec->base.width, dec->db_alignment) *
2065             align(dec->base.height, dec->db_alignment) * 3 / 2);
2066 
2067          dec->dpb_size = calc_dpb_size(dec);
2068          r = si_vid_resize_buffer(dec->screen, &dec->cs, &dec->dpb, dec->dpb_size, &buf_offset_info);
2069          if (!r) {
2070             RVID_ERR("Can't resize dpb.\n");
2071             return NULL;
2072 	 }
2073          dec->max_width = dec->base.width;
2074          dec->max_height = dec->base.height;
2075          dpb_resize = true;
2076       }
2077    }
2078 
2079    if (!dec->ctx.res) {
2080       enum pipe_video_format fmt = u_reduce_video_profile(picture->profile);
2081       if (dec->stream_type == RDECODE_CODEC_H264_PERF) {
2082          unsigned ctx_size = calc_ctx_size_h264_perf(dec);
2083          bool r;
2084          if (encrypted && dec->tmz_ctx) {
2085             r = si_vid_create_tmz_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2086          } else {
2087             r = si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2088          }
2089          assert((encrypted && dec->tmz_ctx) == (bool)(dec->ctx.res->flags & RADEON_FLAG_ENCRYPTED));
2090 
2091          if (!r) {
2092             RVID_ERR("Can't allocated context buffer.\n");
2093             return NULL;
2094          }
2095          si_vid_clear_buffer(dec->base.context, &dec->ctx);
2096       } else if (fmt == PIPE_VIDEO_FORMAT_VP9) {
2097          unsigned ctx_size;
2098          uint8_t *ptr;
2099          bool r;
2100 
2101          /* default probability + probability data */
2102          ctx_size = 2304 * 5;
2103 
2104          if (((struct si_screen *)dec->screen)->info.vcn_ip_version >= VCN_2_0_0) {
2105             /* SRE collocated context data */
2106             ctx_size += 32 * 2 * 128 * 68;
2107             /* SMP collocated context data */
2108             ctx_size += 9 * 64 * 2 * 128 * 68;
2109             /* SDB left tile pixel */
2110             ctx_size += 8 * 2 * 2 * 8192;
2111          } else {
2112             ctx_size += 32 * 2 * 64 * 64;
2113             ctx_size += 9 * 64 * 2 * 64 * 64;
2114             ctx_size += 8 * 2 * 4096;
2115          }
2116 
2117          if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
2118             ctx_size += 8 * 2 * 4096;
2119 
2120          if (encrypted && dec->tmz_ctx) {
2121             r = si_vid_create_tmz_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2122          } else {
2123             r = si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2124          }
2125          if (!r) {
2126             RVID_ERR("Can't allocated context buffer.\n");
2127             return NULL;
2128          }
2129          si_vid_clear_buffer(dec->base.context, &dec->ctx);
2130 
2131          /* ctx needs probs table */
2132          ptr = dec->ws->buffer_map(dec->ws, dec->ctx.res->buf, &dec->cs,
2133                                    PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2134          fill_probs_table(ptr);
2135          dec->ws->buffer_unmap(dec->ws, dec->ctx.res->buf);
2136          dec->bs_ptr = NULL;
2137       } else if (fmt == PIPE_VIDEO_FORMAT_HEVC) {
2138          unsigned ctx_size;
2139          bool r;
2140          if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
2141             ctx_size = calc_ctx_size_h265_main10(dec, (struct pipe_h265_picture_desc *)picture);
2142          else
2143             ctx_size = calc_ctx_size_h265_main(dec);
2144 
2145          if (encrypted && dec->tmz_ctx) {
2146             r = si_vid_create_tmz_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2147          } else {
2148             r = si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT);
2149          }
2150          if (!r) {
2151             RVID_ERR("Can't allocated context buffer.\n");
2152             return NULL;
2153          }
2154          si_vid_clear_buffer(dec->base.context, &dec->ctx);
2155       }
2156    }
2157    if (encrypted != dec->ws->cs_is_secure(&dec->cs)) {
2158       dec->ws->cs_flush(&dec->cs, RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL);
2159    }
2160 
2161    if (dec->stream_type == RDECODE_CODEC_AV1)
2162       rvcn_dec_av1_film_grain_surface(&out_surf, (struct pipe_av1_picture_desc *)picture);
2163 
2164    luma   = (struct si_texture *)((struct vl_video_buffer *)out_surf)->resources[0];
2165    chroma = (struct si_texture *)((struct vl_video_buffer *)out_surf)->resources[1];
2166 
2167    decode->dpb_size = (dec->dpb_type != DPB_DYNAMIC_TIER_2) ? dec->dpb.res->buf->size : 0;
2168 
2169    /* When texture being created, the bo will be created with total size of planes,
2170     * and all planes point to the same buffer */
2171    assert(si_resource(((struct vl_video_buffer *)out_surf)->resources[0])->buf->size ==
2172       si_resource(((struct vl_video_buffer *)out_surf)->resources[1])->buf->size);
2173 
2174    decode->dt_size = si_resource(((struct vl_video_buffer *)out_surf)->resources[0])->buf->size;
2175 
2176    decode->sct_size = 0;
2177    decode->sc_coeff_size = 0;
2178 
2179    decode->sw_ctxt_size = RDECODE_SESSION_CONTEXT_SIZE;
2180    decode->db_pitch = align(dec->base.width, dec->db_alignment);
2181 
2182    if ((((struct si_screen*)dec->screen)->info.vcn_ip_version >= VCN_3_0_0) &&
2183        (dec->stream_type == RDECODE_CODEC_VP9 || dec->stream_type == RDECODE_CODEC_AV1 ||
2184         dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10))
2185       decode->db_aligned_height = align(dec->base.height, 64);
2186 
2187    decode->db_surf_tile_config = 0;
2188    decode->db_array_mode = dec->addr_gfx_mode;
2189 
2190    decode->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
2191    decode->dt_uv_pitch = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
2192 
2193    if (luma->surface.meta_offset) {
2194       RVID_ERR("DCC surfaces not supported.\n");
2195       return NULL;
2196    }
2197 
2198    decode->dt_tiling_mode = 0;
2199    decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
2200    decode->dt_array_mode = dec->addr_gfx_mode;
2201    decode->dt_field_mode = ((struct vl_video_buffer *)out_surf)->base.interlaced;
2202    decode->dt_surf_tile_config = 0;
2203    decode->dt_uv_surf_tile_config = 0;
2204 
2205    decode->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
2206    decode->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
2207    if (decode->dt_field_mode) {
2208       decode->dt_luma_bottom_offset =
2209          luma->surface.u.gfx9.surf_offset + luma->surface.u.gfx9.surf_slice_size;
2210       decode->dt_chroma_bottom_offset =
2211          chroma->surface.u.gfx9.surf_offset + chroma->surface.u.gfx9.surf_slice_size;
2212    } else {
2213       decode->dt_luma_bottom_offset = decode->dt_luma_top_offset;
2214       decode->dt_chroma_bottom_offset = decode->dt_chroma_top_offset;
2215    }
2216    if (dec->stream_type == RDECODE_CODEC_AV1)
2217       decode->db_pitch_uv = chroma->surface.u.gfx9.surf_pitch * chroma->surface.blk_w;
2218 
2219    if (encrypted) {
2220       assert(sscreen->info.has_tmz_support);
2221       set_drm_keys(drm, decrypt);
2222    }
2223 
2224    if (dec->dpb_type == DPB_DYNAMIC_TIER_1) {
2225       decode->decode_flags |= (RDECODE_FLAGS_USE_DYNAMIC_DPB_MASK | RDECODE_FLAGS_USE_PAL_MASK);
2226       // Add decode flag for RESIZE_DPB ,when we do resize
2227       if (dpb_resize == true)
2228         decode->decode_flags |= RDECODE_FLAGS_DPB_RESIZE_MASK;
2229 
2230       dynamic_dpb->dpbArraySize = NUM_VP9_REFS + 1;
2231       dynamic_dpb->dpbLumaPitch = align(dec->max_width, dec->db_alignment);
2232       dynamic_dpb->dpbLumaAlignedHeight = align(dec->max_height, dec->db_alignment);
2233       dynamic_dpb->dpbLumaAlignedSize =
2234          dynamic_dpb->dpbLumaPitch * dynamic_dpb->dpbLumaAlignedHeight;
2235       dynamic_dpb->dpbChromaPitch = dynamic_dpb->dpbLumaPitch >> 1;
2236       dynamic_dpb->dpbChromaAlignedHeight = dynamic_dpb->dpbLumaAlignedHeight >> 1;
2237       dynamic_dpb->dpbChromaAlignedSize =
2238          dynamic_dpb->dpbChromaPitch * dynamic_dpb->dpbChromaAlignedHeight * 2;
2239       dynamic_dpb->dpbReserved0[0] = dec->db_alignment;
2240 
2241       if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) {
2242          dynamic_dpb->dpbLumaAlignedSize = dynamic_dpb->dpbLumaAlignedSize * 3 / 2;
2243          dynamic_dpb->dpbChromaAlignedSize = dynamic_dpb->dpbChromaAlignedSize * 3 / 2;
2244       }
2245    }
2246 
2247    if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC)
2248       get_h265_reflist(hevc_reflist, (struct pipe_h265_picture_desc *)picture);
2249 
2250    switch (u_reduce_video_profile(picture->profile)) {
2251    case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
2252       rvcn_dec_message_avc_t avc = get_h264_msg(dec, target, (struct pipe_h264_picture_desc *)picture);
2253       memcpy(codec, (void *)&avc, sizeof(rvcn_dec_message_avc_t));
2254       index_codec->message_id = RDECODE_MESSAGE_AVC;
2255       break;
2256    }
2257    case PIPE_VIDEO_FORMAT_HEVC: {
2258       rvcn_dec_message_hevc_t hevc =
2259          get_h265_msg(dec, target, (struct pipe_h265_picture_desc *)picture);
2260 
2261       memcpy(codec, (void *)&hevc, sizeof(rvcn_dec_message_hevc_t));
2262       index_codec->message_id = RDECODE_MESSAGE_HEVC;
2263       break;
2264    }
2265    case PIPE_VIDEO_FORMAT_VC1: {
2266       rvcn_dec_message_vc1_t vc1 = get_vc1_msg((struct pipe_vc1_picture_desc *)picture);
2267 
2268       memcpy(codec, (void *)&vc1, sizeof(rvcn_dec_message_vc1_t));
2269       if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
2270           (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
2271          decode->width_in_samples = align(decode->width_in_samples, 16) / 16;
2272          decode->height_in_samples = align(decode->height_in_samples, 16) / 16;
2273       }
2274       index_codec->message_id = RDECODE_MESSAGE_VC1;
2275       break;
2276    }
2277    case PIPE_VIDEO_FORMAT_MPEG12: {
2278       rvcn_dec_message_mpeg2_vld_t mpeg2 =
2279          get_mpeg2_msg(dec, (struct pipe_mpeg12_picture_desc *)picture);
2280 
2281       memcpy(codec, (void *)&mpeg2, sizeof(rvcn_dec_message_mpeg2_vld_t));
2282       index_codec->message_id = RDECODE_MESSAGE_MPEG2_VLD;
2283       break;
2284    }
2285    case PIPE_VIDEO_FORMAT_MPEG4: {
2286       rvcn_dec_message_mpeg4_asp_vld_t mpeg4 =
2287          get_mpeg4_msg(dec, (struct pipe_mpeg4_picture_desc *)picture);
2288 
2289       memcpy(codec, (void *)&mpeg4, sizeof(rvcn_dec_message_mpeg4_asp_vld_t));
2290       index_codec->message_id = RDECODE_MESSAGE_MPEG4_ASP_VLD;
2291       break;
2292    }
2293    case PIPE_VIDEO_FORMAT_VP9: {
2294       rvcn_dec_message_vp9_t vp9 =
2295          get_vp9_msg(dec, target, (struct pipe_vp9_picture_desc *)picture);
2296 
2297       memcpy(codec, (void *)&vp9, sizeof(rvcn_dec_message_vp9_t));
2298       index_codec->message_id = RDECODE_MESSAGE_VP9;
2299       break;
2300    }
2301    case PIPE_VIDEO_FORMAT_AV1: {
2302       rvcn_dec_message_av1_t av1 =
2303          get_av1_msg(dec, target, (struct pipe_av1_picture_desc *)picture);
2304 
2305       memcpy(codec, (void *)&av1, sizeof(rvcn_dec_message_av1_t));
2306       index_codec->message_id = RDECODE_MESSAGE_AV1;
2307 
2308       if (dec->ctx.res == NULL) {
2309          unsigned frame_ctxt_size = dec->av1_version == RDECODE_AV1_VER_0
2310                                        ? align(sizeof(rvcn_av1_frame_context_t), 2048)
2311                                        : align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048);
2312 
2313          unsigned ctx_size = (9 + 4) * frame_ctxt_size + 9 * 64 * 34 * 512 + 9 * 64 * 34 * 256 * 5;
2314          int num_64x64_CTB_8k = 68;
2315          int num_128x128_CTB_8k = 34;
2316          int sdb_pitch_64x64 = align(32 * num_64x64_CTB_8k, 256) * 2;
2317          int sdb_pitch_128x128 = align(32 * num_128x128_CTB_8k, 256) * 2;
2318          int sdb_lf_size_ctb_64x64 = sdb_pitch_64x64 * (align(1728, 64) / 64);
2319          int sdb_lf_size_ctb_128x128 = sdb_pitch_128x128 * (align(3008, 64) / 64);
2320          int sdb_superres_size_ctb_64x64 = sdb_pitch_64x64 * (align(3232, 64) / 64);
2321          int sdb_superres_size_ctb_128x128 = sdb_pitch_128x128 * (align(6208, 64) / 64);
2322          int sdb_output_size_ctb_64x64 = sdb_pitch_64x64 * (align(1312, 64) / 64);
2323          int sdb_output_size_ctb_128x128 = sdb_pitch_128x128 * (align(2336, 64) / 64);
2324          int sdb_fg_avg_luma_size_ctb_64x64 = sdb_pitch_64x64 * (align(384, 64) / 64);
2325          int sdb_fg_avg_luma_size_ctb_128x128 = sdb_pitch_128x128 * (align(640, 64) / 64);
2326          uint8_t *ptr;
2327          int i;
2328          struct rvcn_av1_prob_funcs prob;
2329 
2330          if (dec->av1_version == RDECODE_AV1_VER_0) {
2331             prob.init_mode_probs = rvcn_init_mode_probs;
2332             prob.init_mv_probs = rvcn_av1_init_mv_probs;
2333             prob.default_coef_probs = rvcn_av1_default_coef_probs;
2334          } else {
2335             prob.init_mode_probs = rvcn_vcn4_init_mode_probs;
2336             prob.init_mv_probs = rvcn_vcn4_av1_init_mv_probs;
2337             prob.default_coef_probs = rvcn_vcn4_av1_default_coef_probs;
2338          }
2339 
2340          ctx_size += (MAX2(sdb_lf_size_ctb_64x64, sdb_lf_size_ctb_128x128) +
2341                       MAX2(sdb_superres_size_ctb_64x64, sdb_superres_size_ctb_128x128) +
2342                       MAX2(sdb_output_size_ctb_64x64, sdb_output_size_ctb_128x128) +
2343                       MAX2(sdb_fg_avg_luma_size_ctb_64x64, sdb_fg_avg_luma_size_ctb_128x128)) * 2  + 68 * 512;
2344 
2345          if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT))
2346             RVID_ERR("Can't allocated context buffer.\n");
2347          si_vid_clear_buffer(dec->base.context, &dec->ctx);
2348 
2349          ptr = dec->ws->buffer_map(dec->ws, dec->ctx.res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2350 
2351          for (i = 0; i < 4; ++i) {
2352             prob.init_mode_probs((void*)(ptr + i * frame_ctxt_size));
2353             prob.init_mv_probs((void*)(ptr + i * frame_ctxt_size));
2354             prob.default_coef_probs((void*)(ptr + i * frame_ctxt_size), i);
2355          }
2356          dec->ws->buffer_unmap(dec->ws, dec->ctx.res->buf);
2357       }
2358 
2359       break;
2360    }
2361    default:
2362       assert(0);
2363       return NULL;
2364    }
2365 
2366    if (dec->ctx.res)
2367       decode->hw_ctxt_size = dec->ctx.res->buf->size;
2368 
2369    if (dec->dpb_type == DPB_DYNAMIC_TIER_2)
2370       if (rvcn_dec_dynamic_dpb_t2_message(dec, decode, dynamic_dpb_t2, encrypted))
2371          return NULL;
2372 
2373    return luma->buffer.buf;
2374 }
2375 
rvcn_dec_message_destroy(struct radeon_decoder * dec)2376 static void rvcn_dec_message_destroy(struct radeon_decoder *dec)
2377 {
2378    rvcn_dec_message_header_t *header = dec->msg;
2379 
2380    memset(dec->msg, 0, sizeof(rvcn_dec_message_header_t));
2381    header->header_size = sizeof(rvcn_dec_message_header_t);
2382    header->total_size = sizeof(rvcn_dec_message_header_t) - sizeof(rvcn_dec_message_index_t);
2383    header->num_buffers = 0;
2384    header->msg_type = RDECODE_MSG_DESTROY;
2385    header->stream_handle = dec->stream_handle;
2386    header->status_report_feedback_number = 0;
2387 }
2388 
rvcn_dec_message_feedback(struct radeon_decoder * dec)2389 static void rvcn_dec_message_feedback(struct radeon_decoder *dec)
2390 {
2391    rvcn_dec_feedback_header_t *header = (void *)dec->fb;
2392 
2393    header->header_size = sizeof(rvcn_dec_feedback_header_t);
2394    header->total_size = sizeof(rvcn_dec_feedback_header_t);
2395    header->num_buffers = 0;
2396 }
2397 
rvcn_dec_sq_tail(struct radeon_decoder * dec)2398 static void rvcn_dec_sq_tail(struct radeon_decoder *dec)
2399 {
2400    if (dec->vcn_dec_sw_ring == false)
2401       return;
2402 
2403    rvcn_sq_tail(&dec->cs, &dec->sq);
2404 }
2405 /* flush IB to the hardware */
flush(struct radeon_decoder * dec,unsigned flags,struct pipe_fence_handle ** fence)2406 static int flush(struct radeon_decoder *dec, unsigned flags,
2407                  struct pipe_fence_handle **fence) {
2408    rvcn_dec_sq_tail(dec);
2409 
2410    return dec->ws->cs_flush(&dec->cs, flags, fence);
2411 }
2412 
2413 /* add a new set register command to the IB */
set_reg(struct radeon_decoder * dec,unsigned reg,uint32_t val)2414 static void set_reg(struct radeon_decoder *dec, unsigned reg, uint32_t val)
2415 {
2416    radeon_emit(&dec->cs, RDECODE_PKT0(reg >> 2, 0));
2417    radeon_emit(&dec->cs, val);
2418 }
2419 
2420 /* send a command to the VCPU through the GPCOM registers */
send_cmd(struct radeon_decoder * dec,unsigned cmd,struct pb_buffer_lean * buf,uint32_t off,unsigned usage,enum radeon_bo_domain domain)2421 static void send_cmd(struct radeon_decoder *dec, unsigned cmd, struct pb_buffer_lean *buf, uint32_t off,
2422                      unsigned usage, enum radeon_bo_domain domain)
2423 {
2424    uint64_t addr;
2425 
2426    dec->ws->cs_add_buffer(&dec->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
2427    addr = dec->ws->buffer_get_virtual_address(buf);
2428    addr = addr + off;
2429 
2430    if (dec->vcn_dec_sw_ring == false) {
2431       set_reg(dec, dec->reg.data0, addr);
2432       set_reg(dec, dec->reg.data1, addr >> 32);
2433       set_reg(dec, dec->reg.cmd, cmd << 1);
2434       return;
2435    }
2436 
2437    if (!dec->cs.current.cdw) {
2438       rvcn_sq_header(&dec->cs, &dec->sq, false);
2439       rvcn_decode_ib_package_t *ib_header =
2440          (rvcn_decode_ib_package_t *)&(dec->cs.current.buf[dec->cs.current.cdw]);
2441 
2442       ib_header->package_size = sizeof(struct rvcn_decode_buffer_s) +
2443          sizeof(struct rvcn_decode_ib_package_s);
2444       dec->cs.current.cdw++;
2445       ib_header->package_type = (RDECODE_IB_PARAM_DECODE_BUFFER);
2446       dec->cs.current.cdw++;
2447 
2448       dec->decode_buffer =
2449          (rvcn_decode_buffer_t *)&(dec->cs.current.buf[dec->cs.current.cdw]);
2450 
2451       dec->cs.current.cdw += sizeof(struct rvcn_decode_buffer_s) / 4;
2452       memset(dec->decode_buffer, 0, sizeof(struct rvcn_decode_buffer_s));
2453    }
2454 
2455    switch(cmd) {
2456       case RDECODE_CMD_MSG_BUFFER:
2457             dec->decode_buffer->valid_buf_flag |= RDECODE_CMDBUF_FLAGS_MSG_BUFFER;
2458             dec->decode_buffer->msg_buffer_address_hi = (addr >> 32);
2459             dec->decode_buffer->msg_buffer_address_lo = (addr);
2460          break;
2461       case RDECODE_CMD_DPB_BUFFER:
2462             dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DPB_BUFFER);
2463             dec->decode_buffer->dpb_buffer_address_hi = (addr >> 32);
2464             dec->decode_buffer->dpb_buffer_address_lo = (addr);
2465          break;
2466       case RDECODE_CMD_DECODING_TARGET_BUFFER:
2467             dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_DECODING_TARGET_BUFFER);
2468             dec->decode_buffer->target_buffer_address_hi = (addr >> 32);
2469             dec->decode_buffer->target_buffer_address_lo = (addr);
2470          break;
2471       case RDECODE_CMD_FEEDBACK_BUFFER:
2472             dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_FEEDBACK_BUFFER);
2473             dec->decode_buffer->feedback_buffer_address_hi = (addr >> 32);
2474             dec->decode_buffer->feedback_buffer_address_lo = (addr);
2475          break;
2476       case RDECODE_CMD_PROB_TBL_BUFFER:
2477             dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_PROB_TBL_BUFFER);
2478             dec->decode_buffer->prob_tbl_buffer_address_hi = (addr >> 32);
2479             dec->decode_buffer->prob_tbl_buffer_address_lo = (addr);
2480          break;
2481       case RDECODE_CMD_SESSION_CONTEXT_BUFFER:
2482             dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_SESSION_CONTEXT_BUFFER);
2483             dec->decode_buffer->session_contex_buffer_address_hi = (addr >> 32);
2484             dec->decode_buffer->session_contex_buffer_address_lo = (addr);
2485          break;
2486       case RDECODE_CMD_BITSTREAM_BUFFER:
2487             dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_BITSTREAM_BUFFER);
2488             dec->decode_buffer->bitstream_buffer_address_hi = (addr >> 32);
2489             dec->decode_buffer->bitstream_buffer_address_lo = (addr);
2490          break;
2491       case RDECODE_CMD_IT_SCALING_TABLE_BUFFER:
2492             dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_IT_SCALING_BUFFER);
2493             dec->decode_buffer->it_sclr_table_buffer_address_hi = (addr >> 32);
2494             dec->decode_buffer->it_sclr_table_buffer_address_lo = (addr);
2495          break;
2496       case RDECODE_CMD_CONTEXT_BUFFER:
2497             dec->decode_buffer->valid_buf_flag |= (RDECODE_CMDBUF_FLAGS_CONTEXT_BUFFER);
2498             dec->decode_buffer->context_buffer_address_hi = (addr >> 32);
2499             dec->decode_buffer->context_buffer_address_lo = (addr);
2500          break;
2501       default:
2502             printf("Not Support!");
2503    }
2504 }
2505 
2506 /* do the codec needs an IT buffer ?*/
have_it(struct radeon_decoder * dec)2507 static bool have_it(struct radeon_decoder *dec)
2508 {
2509    return dec->stream_type == RDECODE_CODEC_H264_PERF || dec->stream_type == RDECODE_CODEC_H265;
2510 }
2511 
2512 /* do the codec needs an probs buffer? */
have_probs(struct radeon_decoder * dec)2513 static bool have_probs(struct radeon_decoder *dec)
2514 {
2515    return (dec->stream_type == RDECODE_CODEC_VP9 || dec->stream_type == RDECODE_CODEC_AV1);
2516 }
2517 
2518 /* map the next available message/feedback/itscaling buffer */
map_msg_fb_it_probs_buf(struct radeon_decoder * dec)2519 static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec)
2520 {
2521    struct rvid_buffer *buf;
2522    uint8_t *ptr;
2523 
2524    /* grab the current message/feedback buffer */
2525    buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
2526 
2527    /* and map it for CPU access */
2528    ptr =
2529       dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs, PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2530 
2531    /* calc buffer offsets */
2532    dec->msg = ptr;
2533 
2534    dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
2535    if (have_it(dec))
2536       dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
2537    else if (have_probs(dec))
2538       dec->probs = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
2539 }
2540 
2541 /* unmap and send a message command to the VCPU */
send_msg_buf(struct radeon_decoder * dec)2542 static void send_msg_buf(struct radeon_decoder *dec)
2543 {
2544    struct rvid_buffer *buf;
2545 
2546    /* ignore the request if message/feedback buffer isn't mapped */
2547    if (!dec->msg || !dec->fb)
2548       return;
2549 
2550    /* grab the current message buffer */
2551    buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
2552 
2553    /* unmap the buffer */
2554    dec->ws->buffer_unmap(dec->ws, buf->res->buf);
2555    dec->bs_ptr = NULL;
2556    dec->msg = NULL;
2557    dec->fb = NULL;
2558    dec->it = NULL;
2559    dec->probs = NULL;
2560 
2561    if (dec->sessionctx.res)
2562       send_cmd(dec, RDECODE_CMD_SESSION_CONTEXT_BUFFER, dec->sessionctx.res->buf, 0,
2563                RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM);
2564 
2565    /* and send it to the hardware */
2566    send_cmd(dec, RDECODE_CMD_MSG_BUFFER, buf->res->buf, 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
2567 }
2568 
2569 /* cycle to the next set of buffers */
next_buffer(struct radeon_decoder * dec)2570 static void next_buffer(struct radeon_decoder *dec)
2571 {
2572    ++dec->cur_buffer;
2573    dec->cur_buffer %= dec->num_dec_bufs;
2574 }
2575 
calc_ctx_size_h264_perf(struct radeon_decoder * dec)2576 static unsigned calc_ctx_size_h264_perf(struct radeon_decoder *dec)
2577 {
2578    unsigned width_in_mb, height_in_mb, ctx_size;
2579    unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
2580    unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
2581 
2582    unsigned max_references = dec->base.max_references + 1;
2583 
2584    // picture width & height in 16 pixel units
2585    width_in_mb = width / VL_MACROBLOCK_WIDTH;
2586    height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
2587 
2588    unsigned fs_in_mb = width_in_mb * height_in_mb;
2589    unsigned num_dpb_buffer_lean;
2590    switch (dec->base.level) {
2591    case 30:
2592       num_dpb_buffer_lean = 8100 / fs_in_mb;
2593       break;
2594    case 31:
2595       num_dpb_buffer_lean = 18000 / fs_in_mb;
2596       break;
2597    case 32:
2598       num_dpb_buffer_lean = 20480 / fs_in_mb;
2599       break;
2600    case 41:
2601       num_dpb_buffer_lean = 32768 / fs_in_mb;
2602       break;
2603    case 42:
2604       num_dpb_buffer_lean = 34816 / fs_in_mb;
2605       break;
2606    case 50:
2607       num_dpb_buffer_lean = 110400 / fs_in_mb;
2608       break;
2609    case 51:
2610       num_dpb_buffer_lean = 184320 / fs_in_mb;
2611       break;
2612    default:
2613       num_dpb_buffer_lean = 184320 / fs_in_mb;
2614       break;
2615    }
2616    num_dpb_buffer_lean++;
2617    max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer_lean), max_references);
2618    ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
2619 
2620    return ctx_size;
2621 }
2622 
2623 /* calculate size of reference picture buffer */
calc_dpb_size(struct radeon_decoder * dec)2624 static unsigned calc_dpb_size(struct radeon_decoder *dec)
2625 {
2626    unsigned width_in_mb, height_in_mb, image_size, dpb_size;
2627 
2628    // always align them to MB size for dpb calculation
2629    unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
2630    unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
2631 
2632    // always one more for currently decoded picture
2633    unsigned max_references = dec->base.max_references + 1;
2634 
2635    // aligned size of a single frame
2636    image_size = align(width, 32) * height;
2637    image_size += image_size / 2;
2638    image_size = align(image_size, 1024);
2639 
2640    // picture width & height in 16 pixel units
2641    width_in_mb = width / VL_MACROBLOCK_WIDTH;
2642    height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
2643 
2644    switch (u_reduce_video_profile(dec->base.profile)) {
2645    case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
2646       unsigned fs_in_mb = width_in_mb * height_in_mb;
2647       unsigned num_dpb_buffer_lean;
2648 
2649       switch (dec->base.level) {
2650       case 30:
2651          num_dpb_buffer_lean = 8100 / fs_in_mb;
2652          break;
2653       case 31:
2654          num_dpb_buffer_lean = 18000 / fs_in_mb;
2655          break;
2656       case 32:
2657          num_dpb_buffer_lean = 20480 / fs_in_mb;
2658          break;
2659       case 41:
2660          num_dpb_buffer_lean = 32768 / fs_in_mb;
2661          break;
2662       case 42:
2663          num_dpb_buffer_lean = 34816 / fs_in_mb;
2664          break;
2665       case 50:
2666          num_dpb_buffer_lean = 110400 / fs_in_mb;
2667          break;
2668       case 51:
2669          num_dpb_buffer_lean = 184320 / fs_in_mb;
2670          break;
2671       default:
2672          num_dpb_buffer_lean = 184320 / fs_in_mb;
2673          break;
2674       }
2675       num_dpb_buffer_lean++;
2676       max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer_lean), max_references);
2677       dpb_size = image_size * max_references;
2678       break;
2679    }
2680 
2681    case PIPE_VIDEO_FORMAT_HEVC:
2682       if (dec->base.width * dec->base.height >= 4096 * 2000)
2683          max_references = MAX2(max_references, 8);
2684       else
2685          max_references = MAX2(max_references, 17);
2686 
2687       width = align(width, 16);
2688       height = align(height, 16);
2689       if (dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
2690          dpb_size = align((align(width, 64) * align(height, 64) * 9) / 4, 256) * max_references;
2691       else
2692          dpb_size = align((align(width, 32) * height * 3) / 2, 256) * max_references;
2693       break;
2694 
2695    case PIPE_VIDEO_FORMAT_VC1:
2696       // the firmware seems to always assume a minimum of ref frames
2697       max_references = MAX2(NUM_VC1_REFS, max_references);
2698 
2699       // reference picture buffer
2700       dpb_size = image_size * max_references;
2701 
2702       // CONTEXT_BUFFER
2703       dpb_size += width_in_mb * height_in_mb * 128;
2704 
2705       // IT surface buffer
2706       dpb_size += width_in_mb * 64;
2707 
2708       // DB surface buffer
2709       dpb_size += width_in_mb * 128;
2710 
2711       // BP
2712       dpb_size += align(MAX2(width_in_mb, height_in_mb) * 7 * 16, 64);
2713       break;
2714 
2715    case PIPE_VIDEO_FORMAT_MPEG12:
2716       // reference picture buffer, must be big enough for all frames
2717       dpb_size = image_size * NUM_MPEG2_REFS;
2718       break;
2719 
2720    case PIPE_VIDEO_FORMAT_MPEG4:
2721       // reference picture buffer
2722       dpb_size = image_size * max_references;
2723 
2724       // CM
2725       dpb_size += width_in_mb * height_in_mb * 64;
2726 
2727       // IT surface buffer
2728       dpb_size += align(width_in_mb * height_in_mb * 32, 64);
2729 
2730       dpb_size = MAX2(dpb_size, 30 * 1024 * 1024);
2731       break;
2732 
2733    case PIPE_VIDEO_FORMAT_VP9:
2734       max_references = MAX2(max_references, 9);
2735 
2736       if (dec->dpb_type == DPB_MAX_RES)
2737          dpb_size = (((struct si_screen *)dec->screen)->info.vcn_ip_version >= VCN_2_0_0)
2738             ? (8192 * 4320 * 3 / 2) * max_references
2739             : (4096 * 3000 * 3 / 2) * max_references;
2740       else
2741          dpb_size = (align(dec->base.width, dec->db_alignment) *
2742             align(dec->base.height, dec->db_alignment) * 3 / 2) * max_references;
2743 
2744       if (dec->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2)
2745          dpb_size = dpb_size * 3 / 2;
2746       break;
2747 
2748    case PIPE_VIDEO_FORMAT_AV1:
2749       max_references = MAX2(max_references, 9);
2750       dpb_size = 8192 * 4320 * 3 / 2 * max_references * 3 / 2;
2751       break;
2752 
2753    case PIPE_VIDEO_FORMAT_JPEG:
2754       dpb_size = 0;
2755       break;
2756 
2757    default:
2758       // something is missing here
2759       assert(0);
2760 
2761       // at least use a sane default value
2762       dpb_size = 32 * 1024 * 1024;
2763       break;
2764    }
2765    return dpb_size;
2766 }
2767 
2768 /**
2769  * destroy this video decoder
2770  */
radeon_dec_destroy(struct pipe_video_codec * decoder)2771 static void radeon_dec_destroy(struct pipe_video_codec *decoder)
2772 {
2773    struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2774    unsigned i;
2775 
2776    assert(decoder);
2777 
2778    if (dec->stream_type != RDECODE_CODEC_JPEG) {
2779       map_msg_fb_it_probs_buf(dec);
2780       rvcn_dec_message_destroy(dec);
2781       send_msg_buf(dec);
2782       flush(dec, 0, &dec->destroy_fence);
2783       dec->ws->fence_wait(dec->ws, dec->destroy_fence, PIPE_DEFAULT_DECODER_FEEDBACK_TIMEOUT_NS);
2784       dec->ws->fence_reference(dec->ws, &dec->destroy_fence, NULL);
2785    }
2786 
2787    dec->ws->fence_reference(dec->ws, &dec->prev_fence, NULL);
2788    dec->ws->cs_destroy(&dec->cs);
2789 
2790    if (dec->stream_type == RDECODE_CODEC_JPEG) {
2791       for (i = 0; i < dec->njctx; i++) {
2792          dec->ws->cs_destroy(&dec->jcs[i]);
2793          dec->ws->ctx_destroy(dec->jctx[i]);
2794       }
2795    }
2796 
2797    if (dec->msg_fb_it_probs_buffers && dec->bs_buffers) {
2798       for (i = 0; i < dec->num_dec_bufs; ++i) {
2799             si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]);
2800             si_vid_destroy_buffer(&dec->bs_buffers[i]);
2801       }
2802       FREE(dec->msg_fb_it_probs_buffers);
2803       FREE(dec->bs_buffers);
2804    }
2805    dec->num_dec_bufs = 0;
2806 
2807    if (dec->dpb_type != DPB_DYNAMIC_TIER_2) {
2808       si_vid_destroy_buffer(&dec->dpb);
2809    } else {
2810       list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
2811          list_del(&d->list);
2812          si_vid_destroy_buffer(&d->dpb);
2813          FREE(d);
2814       }
2815    }
2816    si_vid_destroy_buffer(&dec->ctx);
2817    si_vid_destroy_buffer(&dec->sessionctx);
2818 
2819    FREE(dec->jcs);
2820    FREE(dec->jctx);
2821    FREE(dec);
2822 }
2823 
2824 /**
2825  * start decoding of a new frame
2826  */
radeon_dec_begin_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2827 static void radeon_dec_begin_frame(struct pipe_video_codec *decoder,
2828                                    struct pipe_video_buffer *target,
2829                                    struct pipe_picture_desc *picture)
2830 {
2831    struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2832    uintptr_t frame;
2833 
2834    assert(decoder);
2835 
2836    frame = ++dec->frame_number;
2837    if (dec->stream_type != RDECODE_CODEC_VP9 && dec->stream_type != RDECODE_CODEC_AV1
2838                                              && dec->stream_type != RDECODE_CODEC_H264_PERF)
2839       vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
2840                                           &radeon_dec_destroy_associated_data);
2841 
2842    dec->bs_size = 0;
2843    dec->bs_ptr = dec->ws->buffer_map(dec->ws, dec->bs_buffers[dec->cur_buffer].res->buf, &dec->cs,
2844                                      PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2845 }
2846 
2847 /**
2848  * decode a macroblock
2849  */
radeon_dec_decode_macroblock(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,const struct pipe_macroblock * macroblocks,unsigned num_macroblocks)2850 static void radeon_dec_decode_macroblock(struct pipe_video_codec *decoder,
2851                                          struct pipe_video_buffer *target,
2852                                          struct pipe_picture_desc *picture,
2853                                          const struct pipe_macroblock *macroblocks,
2854                                          unsigned num_macroblocks)
2855 {
2856    /* not supported (yet) */
2857    assert(0);
2858 }
2859 
2860 /**
2861  * decode a bitstream
2862  */
radeon_dec_decode_bitstream(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture,unsigned num_buffers,const void * const * buffers,const unsigned * sizes)2863 static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder,
2864                                         struct pipe_video_buffer *target,
2865                                         struct pipe_picture_desc *picture, unsigned num_buffers,
2866                                         const void *const *buffers, const unsigned *sizes)
2867 {
2868    struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2869    unsigned i;
2870 
2871    assert(decoder);
2872 
2873    if (!dec->bs_ptr)
2874       return;
2875 
2876    if (dec->bs_size && dec->stream_type == RDECODE_CODEC_AV1)
2877       return;
2878 
2879    unsigned long total_bs_size = dec->bs_size;
2880    for (i = 0; i < num_buffers; ++i)
2881       total_bs_size += sizes[i];
2882 
2883    struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
2884 
2885    if (total_bs_size > buf->res->buf->size) {
2886       dec->ws->buffer_unmap(dec->ws, buf->res->buf);
2887       dec->bs_ptr = NULL;
2888       if (!si_vid_resize_buffer(dec->screen, &dec->cs, buf, total_bs_size, NULL)) {
2889          RVID_ERR("Can't resize bitstream buffer!");
2890          return;
2891       }
2892 
2893       dec->bs_ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
2894                                           PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
2895       if (!dec->bs_ptr)
2896          return;
2897 
2898       dec->bs_ptr += dec->bs_size;
2899    }
2900 
2901    for (i = 0; i < num_buffers; ++i) {
2902       memcpy(dec->bs_ptr, buffers[i], sizes[i]);
2903       dec->bs_size += sizes[i];
2904       dec->bs_ptr += sizes[i];
2905    }
2906 }
2907 
2908 /**
2909  * send cmd for vcn dec
2910  */
send_cmd_dec(struct radeon_decoder * dec,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2911 void send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target,
2912                   struct pipe_picture_desc *picture)
2913 {
2914    struct pb_buffer_lean *dt;
2915    struct rvid_buffer *msg_fb_it_probs_buf, *bs_buf;
2916 
2917    msg_fb_it_probs_buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer];
2918    bs_buf = &dec->bs_buffers[dec->cur_buffer];
2919 
2920    memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size);
2921    dec->ws->buffer_unmap(dec->ws, bs_buf->res->buf);
2922    dec->bs_ptr = NULL;
2923 
2924    map_msg_fb_it_probs_buf(dec);
2925    dt = rvcn_dec_message_decode(dec, target, picture);
2926    rvcn_dec_message_feedback(dec);
2927    send_msg_buf(dec);
2928 
2929    if (dec->dpb_type != DPB_DYNAMIC_TIER_2)
2930       send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE,
2931             RADEON_DOMAIN_VRAM);
2932    if (dec->ctx.res)
2933       send_cmd(dec, RDECODE_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, RADEON_USAGE_READWRITE,
2934                RADEON_DOMAIN_VRAM);
2935    send_cmd(dec, RDECODE_CMD_BITSTREAM_BUFFER, bs_buf->res->buf, 0, RADEON_USAGE_READ,
2936             RADEON_DOMAIN_GTT);
2937    send_cmd(dec, RDECODE_CMD_DECODING_TARGET_BUFFER, dt, 0, RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
2938    send_cmd(dec, RDECODE_CMD_FEEDBACK_BUFFER, msg_fb_it_probs_buf->res->buf, FB_BUFFER_OFFSET,
2939             RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
2940    if (have_it(dec))
2941       send_cmd(dec, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, msg_fb_it_probs_buf->res->buf,
2942                FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
2943    else if (have_probs(dec))
2944       send_cmd(dec, RDECODE_CMD_PROB_TBL_BUFFER, msg_fb_it_probs_buf->res->buf,
2945                FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
2946 
2947    if (dec->vcn_dec_sw_ring == false)
2948       set_reg(dec, dec->reg.cntl, 1);
2949 }
2950 
2951 /**
2952  * end decoding of the current frame
2953  */
radeon_dec_end_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2954 static void radeon_dec_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,
2955                                  struct pipe_picture_desc *picture)
2956 {
2957    struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2958 
2959    assert(decoder);
2960 
2961    if (!dec->bs_ptr)
2962       return;
2963 
2964    dec->send_cmd(dec, target, picture);
2965    flush(dec, PIPE_FLUSH_ASYNC, picture->fence);
2966    if (picture->fence)
2967       dec->ws->fence_reference(dec->ws, &dec->prev_fence, *picture->fence);
2968    next_buffer(dec);
2969 }
2970 
2971 /**
2972  * end decoding of the current jpeg frame
2973  */
radeon_dec_jpeg_end_frame(struct pipe_video_codec * decoder,struct pipe_video_buffer * target,struct pipe_picture_desc * picture)2974 static void radeon_dec_jpeg_end_frame(struct pipe_video_codec *decoder, struct pipe_video_buffer *target,
2975                                  struct pipe_picture_desc *picture)
2976 {
2977    struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
2978    struct pipe_mjpeg_picture_desc *pic = (struct pipe_mjpeg_picture_desc *)picture;
2979 
2980    assert(decoder);
2981 
2982    if (!dec->bs_ptr)
2983       return;
2984 
2985    dec->jpg.crop_x = ROUND_DOWN_TO(pic->picture_parameter.crop_x, VL_MACROBLOCK_WIDTH);
2986    dec->jpg.crop_y = ROUND_DOWN_TO(pic->picture_parameter.crop_y, VL_MACROBLOCK_HEIGHT);
2987    dec->jpg.crop_width = align(pic->picture_parameter.crop_width, VL_MACROBLOCK_WIDTH);
2988    dec->jpg.crop_height = align(pic->picture_parameter.crop_height, VL_MACROBLOCK_HEIGHT);
2989    if (dec->jpg.crop_x + dec->jpg.crop_width > pic->picture_parameter.picture_width)
2990       dec->jpg.crop_width = 0;
2991    if (dec->jpg.crop_y + dec->jpg.crop_height > pic->picture_parameter.picture_height)
2992       dec->jpg.crop_height = 0;
2993    dec->send_cmd(dec, target, picture);
2994    dec->ws->cs_flush(&dec->jcs[dec->cb_idx], PIPE_FLUSH_ASYNC, NULL);
2995    next_buffer(dec);
2996    dec->cb_idx = (dec->cb_idx+1) % dec->njctx;
2997 }
2998 
2999 /**
3000  * flush any outstanding command buffers to the hardware
3001  */
radeon_dec_flush(struct pipe_video_codec * decoder)3002 static void radeon_dec_flush(struct pipe_video_codec *decoder)
3003 {
3004 }
3005 
radeon_dec_get_decoder_fence(struct pipe_video_codec * decoder,struct pipe_fence_handle * fence,uint64_t timeout)3006 static int radeon_dec_get_decoder_fence(struct pipe_video_codec *decoder,
3007                                         struct pipe_fence_handle *fence,
3008                                         uint64_t timeout) {
3009 
3010    struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
3011 
3012    return dec->ws->fence_wait(dec->ws, fence, timeout);
3013 }
3014 
radeon_dec_destroy_fence(struct pipe_video_codec * decoder,struct pipe_fence_handle * fence)3015 static void radeon_dec_destroy_fence(struct pipe_video_codec *decoder,
3016                                      struct pipe_fence_handle *fence)
3017 {
3018    struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
3019 
3020    dec->ws->fence_reference(dec->ws, &fence, NULL);
3021 }
3022 
3023 /**
3024  * update render list when target buffer got updated, use the existing
3025  * index and update the new buffer to associate with it.
3026  */
radeon_dec_update_render_list(struct pipe_video_codec * decoder,struct pipe_video_buffer * old,struct pipe_video_buffer * updated)3027 static void radeon_dec_update_render_list(struct pipe_video_codec *decoder,
3028                                           struct pipe_video_buffer *old,
3029                                           struct pipe_video_buffer *updated)
3030 {
3031    struct radeon_decoder *dec = (struct radeon_decoder *)decoder;
3032    void *index = vl_video_buffer_get_associated_data(old, decoder);
3033 
3034    vl_video_buffer_set_associated_data(updated, decoder, index,
3035                                        old->destroy_associated_data);
3036    for (int i = 0; i < ARRAY_SIZE(dec->render_pic_list); ++i) {
3037       if (dec->render_pic_list[i] == old) {
3038          dec->render_pic_list[i] = updated;
3039          break;
3040       }
3041    }
3042 }
3043 /**
3044  * create and HW decoder
3045  */
radeon_create_decoder(struct pipe_context * context,const struct pipe_video_codec * templ)3046 struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
3047                                                const struct pipe_video_codec *templ)
3048 {
3049    struct si_context *sctx = (struct si_context *)context;
3050    struct radeon_winsys *ws = sctx->ws;
3051    unsigned width = templ->width, height = templ->height;
3052    unsigned bs_buf_size, stream_type = 0, ring = AMD_IP_VCN_DEC;
3053    struct radeon_decoder *dec;
3054    int r, i;
3055 
3056    switch (u_reduce_video_profile(templ->profile)) {
3057    case PIPE_VIDEO_FORMAT_MPEG12:
3058       if (templ->entrypoint > PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
3059          return vl_create_mpeg12_decoder(context, templ);
3060       stream_type = RDECODE_CODEC_MPEG2_VLD;
3061       break;
3062    case PIPE_VIDEO_FORMAT_MPEG4:
3063       width = align(width, VL_MACROBLOCK_WIDTH);
3064       height = align(height, VL_MACROBLOCK_HEIGHT);
3065       stream_type = RDECODE_CODEC_MPEG4;
3066       break;
3067    case PIPE_VIDEO_FORMAT_VC1:
3068       stream_type = RDECODE_CODEC_VC1;
3069       break;
3070    case PIPE_VIDEO_FORMAT_MPEG4_AVC:
3071       width = align(width, VL_MACROBLOCK_WIDTH);
3072       height = align(height, VL_MACROBLOCK_HEIGHT);
3073       stream_type = RDECODE_CODEC_H264_PERF;
3074       break;
3075    case PIPE_VIDEO_FORMAT_HEVC:
3076       stream_type = RDECODE_CODEC_H265;
3077       break;
3078    case PIPE_VIDEO_FORMAT_VP9:
3079       stream_type = RDECODE_CODEC_VP9;
3080       break;
3081    case PIPE_VIDEO_FORMAT_AV1:
3082       stream_type = RDECODE_CODEC_AV1;
3083       break;
3084    case PIPE_VIDEO_FORMAT_JPEG:
3085       stream_type = RDECODE_CODEC_JPEG;
3086       ring = AMD_IP_VCN_JPEG;
3087       break;
3088    default:
3089       assert(0);
3090       break;
3091    }
3092 
3093    dec = CALLOC_STRUCT(radeon_decoder);
3094 
3095    if (!dec)
3096       return NULL;
3097 
3098    dec->base = *templ;
3099    dec->base.context = context;
3100    dec->base.width = width;
3101    dec->base.height = height;
3102    dec->max_width = width;
3103    dec->max_height = height;
3104    dec->base.destroy = radeon_dec_destroy;
3105    dec->base.begin_frame = radeon_dec_begin_frame;
3106    dec->base.decode_macroblock = radeon_dec_decode_macroblock;
3107    dec->base.decode_bitstream = radeon_dec_decode_bitstream;
3108    dec->base.end_frame = radeon_dec_end_frame;
3109    dec->base.flush = radeon_dec_flush;
3110    dec->base.get_decoder_fence = radeon_dec_get_decoder_fence;
3111    dec->base.destroy_fence = radeon_dec_destroy_fence;
3112    dec->base.update_decoder_target =  radeon_dec_update_render_list;
3113 
3114    dec->stream_type = stream_type;
3115    dec->stream_handle = si_vid_alloc_stream_handle();
3116    dec->screen = context->screen;
3117    dec->ws = ws;
3118 
3119    if (u_reduce_video_profile(templ->profile) != PIPE_VIDEO_FORMAT_JPEG &&
3120        (sctx->vcn_ip_ver >= VCN_4_0_0)) {
3121       dec->vcn_dec_sw_ring = true;
3122       ring = AMD_IP_VCN_UNIFIED;
3123    }
3124 
3125    dec->sq.ib_total_size_in_dw = NULL;
3126    dec->sq.ib_checksum = NULL;
3127 
3128    if (!ws->cs_create(&dec->cs, sctx->ctx, ring, NULL, NULL)) {
3129       RVID_ERR("Can't get command submission context.\n");
3130       goto error;
3131    }
3132 
3133    if (dec->stream_type == RDECODE_CODEC_JPEG) {
3134 
3135       if (sctx->vcn_ip_ver == VCN_2_5_0 || sctx->vcn_ip_ver == VCN_2_6_0)
3136          dec->njctx = 2;
3137       else if (sctx->vcn_ip_ver == VCN_4_0_3)
3138          dec->njctx = 24;
3139       else
3140          dec->njctx = 1;
3141 
3142       dec->jctx = (struct radeon_winsys_ctx **) CALLOC(dec->njctx,
3143                                                        sizeof(struct radeon_winsys_ctx *));
3144       dec->jcs = (struct radeon_cmdbuf *) CALLOC(dec->njctx, sizeof(struct radeon_cmdbuf));
3145       if(!dec->jctx || !dec->jcs)
3146          goto err;
3147       for (i = 0; i < dec->njctx; i++) {
3148       /* Initialize the context handle and the command stream. */
3149          dec->jctx[i] = dec->ws->ctx_create(dec->ws, RADEON_CTX_PRIORITY_MEDIUM,
3150                                             sctx->context_flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET);
3151          if (!sctx->ctx)
3152             goto error;
3153          if (!dec->ws->cs_create(&dec->jcs[i], dec->jctx[i], ring, NULL, NULL)) {
3154             RVID_ERR("Can't get additional command submission context for mJPEG.\n");
3155             goto error;
3156          }
3157       }
3158       dec->base.end_frame = radeon_dec_jpeg_end_frame;
3159       dec->cb_idx = 0;
3160    }
3161 
3162    for (i = 0; i < ARRAY_SIZE(dec->render_pic_list); i++)
3163       dec->render_pic_list[i] = NULL;
3164 
3165    if ((sctx->vcn_ip_ver >= VCN_3_0_0) && (stream_type == RDECODE_CODEC_H264_PERF)) {
3166       for (i = 0; i < ARRAY_SIZE(dec->h264_valid_ref_num); i++)
3167          dec->h264_valid_ref_num[i] = (unsigned) -1;
3168       for (i = 0; i < ARRAY_SIZE(dec->h264_valid_poc_num); i++)
3169          dec->h264_valid_poc_num[i] = (unsigned) -1;
3170    }
3171 
3172    if (dec->stream_type == RDECODE_CODEC_JPEG) {
3173       if (sctx->vcn_ip_ver == VCN_4_0_3)
3174          dec->num_dec_bufs = dec->njctx;
3175       else
3176          dec->num_dec_bufs = dec->njctx * NUM_BUFFERS;
3177    } else
3178       dec->num_dec_bufs = NUM_BUFFERS;
3179 
3180    bs_buf_size = align(width * height / 32, 128);
3181    dec->msg_fb_it_probs_buffers = (struct rvid_buffer *) CALLOC(dec->num_dec_bufs, sizeof(struct rvid_buffer));
3182    dec->bs_buffers = (struct rvid_buffer *) CALLOC(dec->num_dec_bufs, sizeof(struct rvid_buffer));
3183    if(!dec->msg_fb_it_probs_buffers || !dec->bs_buffers)
3184       goto error;
3185 
3186    for (i = 0; i < dec->num_dec_bufs; ++i) {
3187       unsigned msg_fb_it_probs_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
3188       if (have_it(dec))
3189          msg_fb_it_probs_size += IT_SCALING_TABLE_SIZE;
3190       else if (have_probs(dec))
3191          msg_fb_it_probs_size += (dec->stream_type == RDECODE_CODEC_VP9) ?
3192                                  VP9_PROBS_TABLE_SIZE :
3193                                  sizeof(rvcn_dec_av1_segment_fg_t);
3194       /* use vram to improve performance, workaround an unknown bug */
3195       if (!si_vid_create_buffer(dec->screen, &dec->msg_fb_it_probs_buffers[i], msg_fb_it_probs_size,
3196                                 PIPE_USAGE_DEFAULT)) {
3197          RVID_ERR("Can't allocated message buffers.\n");
3198          goto error;
3199       }
3200 
3201       if (!si_vid_create_buffer(dec->screen, &dec->bs_buffers[i], bs_buf_size,
3202                                 PIPE_USAGE_STAGING)) {
3203          RVID_ERR("Can't allocated bitstream buffers.\n");
3204          goto error;
3205       }
3206 
3207       si_vid_clear_buffer(context, &dec->msg_fb_it_probs_buffers[i]);
3208       si_vid_clear_buffer(context, &dec->bs_buffers[i]);
3209 
3210       if (have_probs(dec) && dec->stream_type == RDECODE_CODEC_VP9) {
3211          struct rvid_buffer *buf;
3212          void *ptr;
3213 
3214          buf = &dec->msg_fb_it_probs_buffers[i];
3215          ptr = dec->ws->buffer_map(dec->ws, buf->res->buf, &dec->cs,
3216                                    PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
3217          ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
3218          fill_probs_table(ptr);
3219          dec->ws->buffer_unmap(dec->ws, buf->res->buf);
3220          dec->bs_ptr = NULL;
3221       }
3222    }
3223 
3224    if ((sctx->vcn_ip_ver >= VCN_3_0_0) &&
3225          (stream_type == RDECODE_CODEC_VP9 ||
3226           stream_type == RDECODE_CODEC_AV1 ||
3227         ((stream_type == RDECODE_CODEC_H265) && templ->expect_chunked_decode) ||
3228         ((stream_type == RDECODE_CODEC_H264_PERF) && templ->expect_chunked_decode)))
3229       dec->dpb_type = DPB_DYNAMIC_TIER_2;
3230    else if (sctx->vcn_ip_ver <= VCN_2_6_0 && stream_type == RDECODE_CODEC_VP9)
3231       dec->dpb_type = DPB_DYNAMIC_TIER_1;
3232    else
3233       dec->dpb_type = DPB_MAX_RES;
3234 
3235    dec->db_alignment = (((struct si_screen *)dec->screen)->info.vcn_ip_version >= VCN_2_0_0 &&
3236                    dec->base.width > 32 && (dec->stream_type == RDECODE_CODEC_VP9 ||
3237                    dec->stream_type == RDECODE_CODEC_AV1 ||
3238                    dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)) ? 64 : 32;
3239 
3240    dec->dpb_size = calc_dpb_size(dec);
3241 
3242    if (!si_vid_create_buffer(dec->screen, &dec->sessionctx, RDECODE_SESSION_CONTEXT_SIZE,
3243                              PIPE_USAGE_DEFAULT)) {
3244       RVID_ERR("Can't allocated session ctx.\n");
3245       goto error;
3246    }
3247    si_vid_clear_buffer(context, &dec->sessionctx);
3248 
3249    dec->addr_gfx_mode = RDECODE_ARRAY_MODE_LINEAR;
3250    dec->av1_version = RDECODE_AV1_VER_0;
3251 
3252    switch (sctx->vcn_ip_ver) {
3253    case VCN_1_0_0:
3254    case VCN_1_0_1:
3255       dec->reg.data0 = RDECODE_VCN1_GPCOM_VCPU_DATA0;
3256       dec->reg.data1 = RDECODE_VCN1_GPCOM_VCPU_DATA1;
3257       dec->reg.cmd = RDECODE_VCN1_GPCOM_VCPU_CMD;
3258       dec->reg.cntl = RDECODE_VCN1_ENGINE_CNTL;
3259       dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V1;
3260       break;
3261    case VCN_2_0_0:
3262    case VCN_2_0_2:
3263    case VCN_2_0_3:
3264    case VCN_2_2_0:
3265       dec->reg.data0 = RDECODE_VCN2_GPCOM_VCPU_DATA0;
3266       dec->reg.data1 = RDECODE_VCN2_GPCOM_VCPU_DATA1;
3267       dec->reg.cmd = RDECODE_VCN2_GPCOM_VCPU_CMD;
3268       dec->reg.cntl = RDECODE_VCN2_ENGINE_CNTL;
3269       dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V2;
3270       break;
3271    case VCN_2_5_0:
3272    case VCN_2_6_0:
3273    case VCN_3_0_0:
3274    case VCN_3_0_2:
3275    case VCN_3_0_16:
3276    case VCN_3_0_33:
3277    case VCN_3_1_1:
3278    case VCN_3_1_2:
3279       dec->reg.data0 = RDECODE_VCN2_5_GPCOM_VCPU_DATA0;
3280       dec->reg.data1 = RDECODE_VCN2_5_GPCOM_VCPU_DATA1;
3281       dec->reg.cmd = RDECODE_VCN2_5_GPCOM_VCPU_CMD;
3282       dec->reg.cntl = RDECODE_VCN2_5_ENGINE_CNTL;
3283       dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V2;
3284       break;
3285    case VCN_4_0_3:
3286       dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V3;
3287       dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX9;
3288       dec->av1_version = RDECODE_AV1_VER_1;
3289       break;
3290    case VCN_4_0_0:
3291    case VCN_4_0_2:
3292    case VCN_4_0_4:
3293    case VCN_4_0_5:
3294       dec->jpg_reg.version = RDECODE_JPEG_REG_VER_V2;
3295       dec->addr_gfx_mode = RDECODE_ARRAY_MODE_ADDRLIB_SEL_GFX11;
3296       dec->av1_version = RDECODE_AV1_VER_1;
3297       break;
3298    default:
3299       RVID_ERR("VCN is not supported.\n");
3300       goto error;
3301    }
3302 
3303    if (dec->stream_type != RDECODE_CODEC_JPEG) {
3304       map_msg_fb_it_probs_buf(dec);
3305       rvcn_dec_message_create(dec);
3306       send_msg_buf(dec);
3307       r = flush(dec, 0, NULL);
3308       if (r)
3309          goto error;
3310    } else if (dec->jpg_reg.version != RDECODE_JPEG_REG_VER_V1) {
3311       dec->jpg_reg.jrbc_ib_cond_rd_timer = vcnipUVD_JRBC_IB_COND_RD_TIMER;
3312       dec->jpg_reg.jrbc_ib_ref_data = vcnipUVD_JRBC_IB_REF_DATA;
3313       dec->jpg_reg.jpeg_rb_base = vcnipUVD_JPEG_RB_BASE;
3314       dec->jpg_reg.jpeg_rb_size = vcnipUVD_JPEG_RB_SIZE;
3315       dec->jpg_reg.jpeg_rb_wptr = vcnipUVD_JPEG_RB_WPTR;
3316       dec->jpg_reg.jpeg_int_en = vcnipUVD_JPEG_INT_EN;
3317       dec->jpg_reg.jpeg_cntl = vcnipUVD_JPEG_CNTL;
3318       dec->jpg_reg.jpeg_rb_rptr = vcnipUVD_JPEG_RB_RPTR;
3319       if (dec->jpg_reg.version == RDECODE_JPEG_REG_VER_V2) {
3320          dec->jpg_reg.jpeg_dec_soft_rst = vcnipUVD_JPEG_DEC_SOFT_RST;
3321          dec->jpg_reg.lmi_jpeg_read_64bit_bar_high = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH;
3322          dec->jpg_reg.lmi_jpeg_read_64bit_bar_low = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW;
3323          dec->jpg_reg.jpeg_pitch = vcnipUVD_JPEG_PITCH;
3324          dec->jpg_reg.jpeg_uv_pitch = vcnipUVD_JPEG_UV_PITCH;
3325          dec->jpg_reg.dec_addr_mode = vcnipJPEG_DEC_ADDR_MODE;
3326          dec->jpg_reg.dec_y_gfx10_tiling_surface = vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE;
3327          dec->jpg_reg.dec_uv_gfx10_tiling_surface = vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE;
3328          dec->jpg_reg.lmi_jpeg_write_64bit_bar_high = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH;
3329          dec->jpg_reg.lmi_jpeg_write_64bit_bar_low = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW;
3330          dec->jpg_reg.jpeg_tier_cntl2 = vcnipUVD_JPEG_TIER_CNTL2;
3331          dec->jpg_reg.jpeg_outbuf_cntl = vcnipUVD_JPEG_OUTBUF_CNTL;
3332          dec->jpg_reg.jpeg_outbuf_rptr = vcnipUVD_JPEG_OUTBUF_RPTR;
3333          dec->jpg_reg.jpeg_outbuf_wptr = vcnipUVD_JPEG_OUTBUF_WPTR;
3334          dec->jpg_reg.jpeg_index = vcnipUVD_JPEG_INDEX;
3335          dec->jpg_reg.jpeg_data = vcnipUVD_JPEG_DATA;
3336       } else {
3337          dec->jpg_reg.jpeg_dec_soft_rst = vcnipUVD_JPEG_DEC_SOFT_RST_1;
3338          dec->jpg_reg.lmi_jpeg_read_64bit_bar_high = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_HIGH_1;
3339          dec->jpg_reg.lmi_jpeg_read_64bit_bar_low = vcnipUVD_LMI_JPEG_READ_64BIT_BAR_LOW_1;
3340          dec->jpg_reg.jpeg_pitch = vcnipUVD_JPEG_PITCH_1;
3341          dec->jpg_reg.jpeg_uv_pitch = vcnipUVD_JPEG_UV_PITCH_1;
3342          dec->jpg_reg.dec_addr_mode = vcnipJPEG_DEC_ADDR_MODE_1;
3343          dec->jpg_reg.dec_y_gfx10_tiling_surface = vcnipJPEG_DEC_Y_GFX10_TILING_SURFACE_1;
3344          dec->jpg_reg.dec_uv_gfx10_tiling_surface = vcnipJPEG_DEC_UV_GFX10_TILING_SURFACE_1;
3345          dec->jpg_reg.lmi_jpeg_write_64bit_bar_high = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_HIGH_1;
3346          dec->jpg_reg.lmi_jpeg_write_64bit_bar_low = vcnipUVD_LMI_JPEG_WRITE_64BIT_BAR_LOW_1;
3347          dec->jpg_reg.jpeg_tier_cntl2 = vcnipUVD_JPEG_TIER_CNTL2_1;
3348          dec->jpg_reg.jpeg_outbuf_cntl = vcnipUVD_JPEG_OUTBUF_CNTL_1;
3349          dec->jpg_reg.jpeg_outbuf_rptr = vcnipUVD_JPEG_OUTBUF_RPTR_1;
3350          dec->jpg_reg.jpeg_outbuf_wptr = vcnipUVD_JPEG_OUTBUF_WPTR_1;
3351          dec->jpg_reg.jpeg_luma_base0_0 = vcnipUVD_JPEG_LUMA_BASE0_0;
3352          dec->jpg_reg.jpeg_chroma_base0_0 = vcnipUVD_JPEG_CHROMA_BASE0_0;
3353          dec->jpg_reg.jpeg_chromav_base0_0 = vcnipUVD_JPEG_CHROMAV_BASE0_0;
3354       }
3355    }
3356 
3357    next_buffer(dec);
3358 
3359    if (stream_type == RDECODE_CODEC_JPEG)
3360       dec->send_cmd = send_cmd_jpeg;
3361    else
3362       dec->send_cmd = send_cmd_dec;
3363 
3364 
3365    if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
3366       list_inithead(&dec->dpb_ref_list);
3367       list_inithead(&dec->dpb_unref_list);
3368    }
3369 
3370    dec->tmz_ctx = sctx->vcn_ip_ver < VCN_2_2_0 && sctx->vcn_ip_ver != VCN_UNKNOWN;
3371 
3372    return &dec->base;
3373 
3374 error:
3375    dec->ws->cs_destroy(&dec->cs);
3376 
3377    if (dec->stream_type == RDECODE_CODEC_JPEG) {
3378       for (i = 0; i < dec->njctx; i++) {
3379          dec->ws->cs_destroy(&dec->jcs[i]);
3380          dec->ws->ctx_destroy(dec->jctx[i]);
3381       }
3382    }
3383 
3384    if (dec->msg_fb_it_probs_buffers && dec->bs_buffers) {
3385       for (i = 0; i < dec->num_dec_bufs; ++i) {
3386             si_vid_destroy_buffer(&dec->msg_fb_it_probs_buffers[i]);
3387             si_vid_destroy_buffer(&dec->bs_buffers[i]);
3388       }
3389       FREE(dec->msg_fb_it_probs_buffers);
3390       FREE(dec->bs_buffers);
3391    }
3392 
3393    if (dec->dpb_type != DPB_DYNAMIC_TIER_2)
3394       si_vid_destroy_buffer(&dec->dpb);
3395    si_vid_destroy_buffer(&dec->ctx);
3396    si_vid_destroy_buffer(&dec->sessionctx);
3397 
3398 err:
3399    if (dec->jcs)
3400       FREE(dec->jcs);
3401    if (dec->jctx)
3402       FREE(dec->jctx);
3403    FREE(dec);
3404 
3405    return NULL;
3406 }
3407