1 /**************************************************************************
2 *
3 * Copyright 2018 Advanced Micro Devices, Inc.
4 *
5 * SPDX-License-Identifier: MIT
6 *
7 **************************************************************************/
8
9 #include "radeon_uvd_enc.h"
10
11 #include "pipe/p_video_codec.h"
12 #include "radeon_video.h"
13 #include "radeonsi/si_pipe.h"
14 #include "util/u_memory.h"
15 #include "util/u_video.h"
16 #include "vl/vl_video_buffer.h"
17
18 #include <stdio.h>
19
radeon_uvd_enc_get_param(struct radeon_uvd_encoder * enc,struct pipe_h265_enc_picture_desc * pic)20 static void radeon_uvd_enc_get_param(struct radeon_uvd_encoder *enc,
21 struct pipe_h265_enc_picture_desc *pic)
22 {
23 enc->enc_pic.desc = pic;
24 enc->enc_pic.picture_type = pic->picture_type;
25 enc->enc_pic.nal_unit_type = pic->pic.nal_unit_type;
26 enc->enc_pic.enc_params.reference_picture_index =
27 pic->ref_list0[0] == PIPE_H2645_LIST_REF_INVALID_ENTRY ? 0xffffffff : pic->ref_list0[0];
28 enc->enc_pic.enc_params.reconstructed_picture_index = pic->dpb_curr_pic;
29
30 enc->enc_pic.session_init.pre_encode_mode =
31 pic->quality_modes.pre_encode_mode ? RENC_UVD_PREENCODE_MODE_4X : RENC_UVD_PREENCODE_MODE_NONE;
32 enc->enc_pic.session_init.pre_encode_chroma_enabled = !!enc->enc_pic.session_init.pre_encode_mode;
33 enc->enc_pic.quality_params.vbaq_mode =
34 pic->rc[0].rate_ctrl_method != PIPE_H2645_ENC_RATE_CONTROL_METHOD_DISABLE &&
35 pic->quality_modes.vbaq_mode;
36
37 enc->enc_pic.layer_ctrl.num_temporal_layers = pic->seq.num_temporal_layers ? pic->seq.num_temporal_layers : 1;
38 enc->enc_pic.layer_ctrl.max_num_temporal_layers = enc->enc_pic.layer_ctrl.num_temporal_layers;
39 enc->enc_pic.temporal_id = MIN2(pic->pic.temporal_id, enc->enc_pic.layer_ctrl.num_temporal_layers - 1);
40
41 for (uint32_t i = 0; i < enc->enc_pic.layer_ctrl.num_temporal_layers; i++) {
42 enc->enc_pic.rc_layer_init[i].target_bit_rate = pic->rc[i].target_bitrate;
43 enc->enc_pic.rc_layer_init[i].peak_bit_rate = pic->rc[i].peak_bitrate;
44 enc->enc_pic.rc_layer_init[i].frame_rate_num = pic->rc[i].frame_rate_num;
45 enc->enc_pic.rc_layer_init[i].frame_rate_den = pic->rc[i].frame_rate_den;
46 enc->enc_pic.rc_layer_init[i].vbv_buffer_size = pic->rc[i].vbv_buffer_size;
47 enc->enc_pic.rc_layer_init[i].avg_target_bits_per_picture =
48 pic->rc[i].target_bitrate * ((float)pic->rc[i].frame_rate_den / pic->rc[i].frame_rate_num);
49 enc->enc_pic.rc_layer_init[i].peak_bits_per_picture_integer =
50 pic->rc[i].peak_bitrate * ((float)pic->rc[i].frame_rate_den / pic->rc[i].frame_rate_num);
51 enc->enc_pic.rc_layer_init[i].peak_bits_per_picture_fractional =
52 (((pic->rc[i].peak_bitrate * (uint64_t)pic->rc[i].frame_rate_den) % pic->rc[i].frame_rate_num) << 32) /
53 pic->rc[i].frame_rate_num;
54 }
55 enc->enc_pic.rc_per_pic.qp = pic->rc[0].quant_i_frames;
56 enc->enc_pic.rc_per_pic.min_qp_app = pic->rc[0].min_qp;
57 enc->enc_pic.rc_per_pic.max_qp_app = pic->rc[0].max_qp ? pic->rc[0].max_qp : 51;
58 enc->enc_pic.rc_per_pic.max_au_size = pic->rc[0].max_au_size;
59 enc->enc_pic.rc_per_pic.enabled_filler_data = pic->rc[0].fill_data_enable;
60 enc->enc_pic.rc_per_pic.skip_frame_enable = false;
61 enc->enc_pic.rc_per_pic.enforce_hrd = pic->rc[0].enforce_hrd;
62 }
63
flush(struct radeon_uvd_encoder * enc,unsigned flags,struct pipe_fence_handle ** fence)64 static int flush(struct radeon_uvd_encoder *enc, unsigned flags, struct pipe_fence_handle **fence)
65 {
66 return enc->ws->cs_flush(&enc->cs, flags, fence);
67 }
68
radeon_uvd_enc_flush(struct pipe_video_codec * encoder)69 static void radeon_uvd_enc_flush(struct pipe_video_codec *encoder)
70 {
71 struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder;
72 flush(enc, PIPE_FLUSH_ASYNC, NULL);
73 }
74
radeon_uvd_enc_cs_flush(void * ctx,unsigned flags,struct pipe_fence_handle ** fence)75 static void radeon_uvd_enc_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence)
76 {
77 // just ignored
78 }
79
setup_dpb(struct radeon_uvd_encoder * enc,uint32_t num_reconstructed_pictures)80 static uint32_t setup_dpb(struct radeon_uvd_encoder *enc, uint32_t num_reconstructed_pictures)
81 {
82 uint32_t i;
83 uint32_t alignment = 256;
84 uint32_t aligned_width = align(enc->base.width, 64);
85 uint32_t aligned_height = align(enc->base.height, 16);
86 uint32_t pitch = align(aligned_width, alignment);
87 uint32_t luma_size = align(pitch * MAX2(256, aligned_height), alignment);
88 uint32_t chroma_size = align(luma_size / 2, alignment);
89 uint32_t offset = 0;
90 uint32_t pre_encode_luma_size, pre_encode_chroma_size;
91
92 assert(num_reconstructed_pictures <= RENC_UVD_MAX_NUM_RECONSTRUCTED_PICTURES);
93
94 enc->enc_pic.ctx_buf.rec_luma_pitch = pitch;
95 enc->enc_pic.ctx_buf.rec_chroma_pitch = pitch;
96 enc->enc_pic.ctx_buf.num_reconstructed_pictures = num_reconstructed_pictures;
97
98 if (enc->enc_pic.session_init.pre_encode_mode) {
99 uint32_t pre_encode_pitch =
100 align(pitch / enc->enc_pic.session_init.pre_encode_mode, alignment);
101 uint32_t pre_encode_aligned_height =
102 align(aligned_height / enc->enc_pic.session_init.pre_encode_mode, alignment);
103 pre_encode_luma_size =
104 align(pre_encode_pitch * MAX2(256, pre_encode_aligned_height), alignment);
105 pre_encode_chroma_size = align(pre_encode_luma_size / 2, alignment);
106
107 enc->enc_pic.ctx_buf.pre_encode_picture_luma_pitch = pre_encode_pitch;
108 enc->enc_pic.ctx_buf.pre_encode_picture_chroma_pitch = pre_encode_pitch;
109
110 enc->enc_pic.ctx_buf.pre_encode_input_picture.luma_offset = offset;
111 offset += pre_encode_luma_size;
112 enc->enc_pic.ctx_buf.pre_encode_input_picture.chroma_offset = offset;
113 offset += pre_encode_chroma_size;
114 }
115
116 for (i = 0; i < num_reconstructed_pictures; i++) {
117 enc->enc_pic.ctx_buf.reconstructed_pictures[i].luma_offset = offset;
118 offset += luma_size;
119 enc->enc_pic.ctx_buf.reconstructed_pictures[i].chroma_offset = offset;
120 offset += chroma_size;
121
122 if (enc->enc_pic.session_init.pre_encode_mode) {
123 enc->enc_pic.ctx_buf.pre_encode_reconstructed_pictures[i].luma_offset = offset;
124 offset += pre_encode_luma_size;
125 enc->enc_pic.ctx_buf.pre_encode_reconstructed_pictures[i].chroma_offset = offset;
126 offset += pre_encode_chroma_size;
127 }
128 }
129
130 enc->dpb_slots = num_reconstructed_pictures;
131
132 return offset;
133 }
134
radeon_uvd_enc_begin_frame(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_picture_desc * picture)135 static void radeon_uvd_enc_begin_frame(struct pipe_video_codec *encoder,
136 struct pipe_video_buffer *source,
137 struct pipe_picture_desc *picture)
138 {
139 struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder;
140 struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
141 struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture;
142
143 enc->need_rate_control =
144 (enc->enc_pic.rc_layer_init[0].target_bit_rate != pic->rc[0].target_bitrate) ||
145 (enc->enc_pic.rc_layer_init[0].frame_rate_num != pic->rc[0].frame_rate_num) ||
146 (enc->enc_pic.rc_layer_init[0].frame_rate_den != pic->rc[0].frame_rate_den);
147
148 enc->need_rc_per_pic =
149 (enc->enc_pic.rc_per_pic.qp != pic->rc[0].quant_i_frames) ||
150 (enc->enc_pic.rc_per_pic.max_au_size != pic->rc[0].max_au_size);
151
152 radeon_uvd_enc_get_param(enc, pic);
153
154 enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
155 enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
156
157 enc->source = source;
158 enc->need_feedback = false;
159
160 unsigned dpb_slots = MAX2(pic->seq.sps_max_dec_pic_buffering_minus1[0] + 1, pic->dpb_size);
161
162 if (enc->dpb_slots < dpb_slots) {
163 uint32_t dpb_size = setup_dpb(enc, dpb_slots);
164 if (!enc->dpb.res) {
165 if (!si_vid_create_buffer(enc->screen, &enc->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
166 RVID_ERR("Can't create DPB buffer.\n");
167 return;
168 }
169 } else if (!si_vid_resize_buffer(enc->base.context, &enc->cs, &enc->dpb, dpb_size, NULL)) {
170 RVID_ERR("Can't resize DPB buffer.\n");
171 return;
172 }
173 }
174
175 if (!enc->stream_handle) {
176 struct rvid_buffer fb;
177 enc->stream_handle = si_vid_alloc_stream_handle();
178 enc->si = CALLOC_STRUCT(rvid_buffer);
179 si_vid_create_buffer(enc->screen, enc->si, 128 * 1024, PIPE_USAGE_DEFAULT);
180 si_vid_create_buffer(enc->screen, &fb, 4096, PIPE_USAGE_STAGING);
181 enc->fb = &fb;
182 enc->begin(enc, picture);
183 flush(enc, PIPE_FLUSH_ASYNC, NULL);
184 si_vid_destroy_buffer(&fb);
185 }
186 }
187
radeon_uvd_enc_encode_headers(struct radeon_uvd_encoder * enc)188 static void *radeon_uvd_enc_encode_headers(struct radeon_uvd_encoder *enc)
189 {
190 unsigned num_slices = 0, num_headers = 0;
191
192 util_dynarray_foreach(&enc->enc_pic.desc->raw_headers, struct pipe_enc_raw_header, header) {
193 if (header->is_slice)
194 num_slices++;
195 num_headers++;
196 }
197
198 if (!num_headers || !num_slices || num_headers == num_slices)
199 return NULL;
200
201 size_t segments_size =
202 sizeof(struct ruvd_enc_output_unit_segment) * (num_headers - num_slices + 1);
203 struct ruvd_enc_feedback_data *data =
204 CALLOC_VARIANT_LENGTH_STRUCT(ruvd_enc_feedback_data, segments_size);
205 if (!data)
206 return NULL;
207
208 uint8_t *ptr = enc->ws->buffer_map(enc->ws, enc->bs_handle, &enc->cs,
209 PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
210 if (!ptr) {
211 RVID_ERR("Can't map bs buffer.\n");
212 FREE(data);
213 return NULL;
214 }
215
216 unsigned offset = 0;
217 struct ruvd_enc_output_unit_segment *slice_segment = NULL;
218
219 util_dynarray_foreach(&enc->enc_pic.desc->raw_headers, struct pipe_enc_raw_header, header) {
220 if (header->is_slice) {
221 if (slice_segment)
222 continue;
223 slice_segment = &data->segments[data->num_segments];
224 slice_segment->is_slice = true;
225 } else {
226 unsigned size;
227 switch (header->type) {
228 case PIPE_H265_NAL_VPS:
229 size = radeon_uvd_enc_write_vps(enc, ptr + offset);
230 break;
231 case PIPE_H265_NAL_SPS:
232 size = radeon_uvd_enc_write_sps(enc, ptr + offset);
233 break;
234 case PIPE_H265_NAL_PPS:
235 size = radeon_uvd_enc_write_pps(enc, ptr + offset);
236 break;
237 default:
238 assert(header->buffer);
239 memcpy(ptr + offset, header->buffer, header->size);
240 size = header->size;
241 break;
242 }
243 data->segments[data->num_segments].size = size;
244 data->segments[data->num_segments].offset = offset;
245 offset += size;
246 }
247 data->num_segments++;
248 }
249
250 enc->bs_offset = align(offset, 16);
251 assert(enc->bs_offset < enc->bs_size);
252
253 assert(slice_segment);
254 slice_segment->offset = enc->bs_offset;
255
256 enc->ws->buffer_unmap(enc->ws, enc->bs_handle);
257
258 return data;
259 }
260
radeon_uvd_enc_encode_bitstream(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_resource * destination,void ** fb)261 static void radeon_uvd_enc_encode_bitstream(struct pipe_video_codec *encoder,
262 struct pipe_video_buffer *source,
263 struct pipe_resource *destination, void **fb)
264 {
265 struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder;
266 enc->get_buffer(destination, &enc->bs_handle, NULL);
267 enc->bs_size = destination->width0;
268 enc->bs_offset = 0;
269
270 *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
271
272 if (!si_vid_create_buffer(enc->screen, enc->fb, 4096, PIPE_USAGE_STAGING)) {
273 RVID_ERR("Can't create feedback buffer.\n");
274 return;
275 }
276
277 enc->fb->user_data = radeon_uvd_enc_encode_headers(enc);
278
279 enc->need_feedback = true;
280 enc->encode(enc);
281 }
282
radeon_uvd_enc_end_frame(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_picture_desc * picture)283 static int radeon_uvd_enc_end_frame(struct pipe_video_codec *encoder,
284 struct pipe_video_buffer *source,
285 struct pipe_picture_desc *picture)
286 {
287 struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder;
288 return flush(enc, picture->flush_flags, picture->fence);
289 }
290
radeon_uvd_enc_destroy(struct pipe_video_codec * encoder)291 static void radeon_uvd_enc_destroy(struct pipe_video_codec *encoder)
292 {
293 struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder;
294
295 if (enc->stream_handle) {
296 struct rvid_buffer fb;
297 enc->need_feedback = false;
298 si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
299 enc->fb = &fb;
300 enc->destroy(enc);
301 flush(enc, PIPE_FLUSH_ASYNC, NULL);
302 if (enc->si) {
303 si_vid_destroy_buffer(enc->si);
304 FREE(enc->si);
305 }
306 si_vid_destroy_buffer(&fb);
307 }
308
309 if (enc->dpb.res)
310 si_vid_destroy_buffer(&enc->dpb);
311 enc->ws->cs_destroy(&enc->cs);
312 FREE(enc);
313 }
314
radeon_uvd_enc_get_feedback(struct pipe_video_codec * encoder,void * feedback,unsigned * size,struct pipe_enc_feedback_metadata * metadata)315 static void radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder, void *feedback,
316 unsigned *size, struct pipe_enc_feedback_metadata* metadata)
317 {
318 struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder;
319 struct rvid_buffer *fb = feedback;
320
321 radeon_uvd_enc_feedback_t *fb_data = (radeon_uvd_enc_feedback_t *)enc->ws->buffer_map(
322 enc->ws, fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY);
323
324 if (!fb_data->status)
325 *size = fb_data->bitstream_size;
326 else
327 *size = 0;
328
329 enc->ws->buffer_unmap(enc->ws, fb->res->buf);
330
331 metadata->present_metadata = PIPE_VIDEO_FEEDBACK_METADATA_TYPE_CODEC_UNIT_LOCATION;
332
333 if (fb->user_data) {
334 struct ruvd_enc_feedback_data *data = fb->user_data;
335 metadata->codec_unit_metadata_count = data->num_segments;
336 for (unsigned i = 0; i < data->num_segments; i++) {
337 metadata->codec_unit_metadata[i].offset = data->segments[i].offset;
338 if (data->segments[i].is_slice) {
339 metadata->codec_unit_metadata[i].size = *size;
340 metadata->codec_unit_metadata[i].flags = 0;
341 } else {
342 metadata->codec_unit_metadata[i].size = data->segments[i].size;
343 metadata->codec_unit_metadata[i].flags = PIPE_VIDEO_CODEC_UNIT_LOCATION_FLAG_SINGLE_NALU;
344 }
345 }
346 FREE(fb->user_data);
347 fb->user_data = NULL;
348 } else {
349 metadata->codec_unit_metadata_count = 1;
350 metadata->codec_unit_metadata[0].offset = 0;
351 metadata->codec_unit_metadata[0].size = *size;
352 metadata->codec_unit_metadata[0].flags = 0;
353 }
354
355 si_vid_destroy_buffer(fb);
356 FREE(fb);
357 }
358
radeon_uvd_enc_fence_wait(struct pipe_video_codec * encoder,struct pipe_fence_handle * fence,uint64_t timeout)359 static int radeon_uvd_enc_fence_wait(struct pipe_video_codec *encoder,
360 struct pipe_fence_handle *fence,
361 uint64_t timeout)
362 {
363 struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder;
364
365 return enc->ws->fence_wait(enc->ws, fence, timeout);
366 }
367
radeon_uvd_enc_destroy_fence(struct pipe_video_codec * encoder,struct pipe_fence_handle * fence)368 static void radeon_uvd_enc_destroy_fence(struct pipe_video_codec *encoder,
369 struct pipe_fence_handle *fence)
370 {
371 struct radeon_uvd_encoder *enc = (struct radeon_uvd_encoder *)encoder;
372
373 enc->ws->fence_reference(enc->ws, &fence, NULL);
374 }
375
radeon_uvd_create_encoder(struct pipe_context * context,const struct pipe_video_codec * templ,struct radeon_winsys * ws,radeon_uvd_enc_get_buffer get_buffer)376 struct pipe_video_codec *radeon_uvd_create_encoder(struct pipe_context *context,
377 const struct pipe_video_codec *templ,
378 struct radeon_winsys *ws,
379 radeon_uvd_enc_get_buffer get_buffer)
380 {
381 struct si_screen *sscreen = (struct si_screen *)context->screen;
382 struct si_context *sctx = (struct si_context *)context;
383 struct radeon_uvd_encoder *enc;
384
385 if (!si_radeon_uvd_enc_supported(sscreen)) {
386 RVID_ERR("Unsupported UVD ENC fw version loaded!\n");
387 return NULL;
388 }
389
390 enc = CALLOC_STRUCT(radeon_uvd_encoder);
391
392 if (!enc)
393 return NULL;
394
395 enc->base = *templ;
396 enc->base.context = context;
397 enc->base.destroy = radeon_uvd_enc_destroy;
398 enc->base.begin_frame = radeon_uvd_enc_begin_frame;
399 enc->base.encode_bitstream = radeon_uvd_enc_encode_bitstream;
400 enc->base.end_frame = radeon_uvd_enc_end_frame;
401 enc->base.flush = radeon_uvd_enc_flush;
402 enc->base.get_feedback = radeon_uvd_enc_get_feedback;
403 enc->base.fence_wait = radeon_uvd_enc_fence_wait;
404 enc->base.destroy_fence = radeon_uvd_enc_destroy_fence;
405 enc->get_buffer = get_buffer;
406 enc->screen = context->screen;
407 enc->ws = ws;
408
409 if (!ws->cs_create(&enc->cs, sctx->ctx, AMD_IP_UVD_ENC, radeon_uvd_enc_cs_flush, enc)) {
410 RVID_ERR("Can't get command submission context.\n");
411 goto error;
412 }
413
414 radeon_uvd_enc_1_1_init(enc);
415
416 return &enc->base;
417
418 error:
419 enc->ws->cs_destroy(&enc->cs);
420
421 FREE(enc);
422 return NULL;
423 }
424
si_radeon_uvd_enc_supported(struct si_screen * sscreen)425 bool si_radeon_uvd_enc_supported(struct si_screen *sscreen)
426 {
427 return sscreen->info.ip[AMD_IP_UVD_ENC].num_queues;
428 }
429