1 /**************************************************************************
2 *
3 * Copyright 2013 Advanced Micro Devices, Inc.
4 *
5 * SPDX-License-Identifier: MIT
6 *
7 **************************************************************************/
8
9 #include "radeon_vce.h"
10
11 #include "pipe/p_video_codec.h"
12 #include "radeon_video.h"
13 #include "radeonsi/si_pipe.h"
14 #include "util/u_memory.h"
15 #include "util/u_video.h"
16 #include "vl/vl_video_buffer.h"
17
18 #include <stdio.h>
19
20 #define FW_52_0_3 ((52 << 24) | (0 << 16) | (3 << 8))
21 #define FW_52_4_3 ((52 << 24) | (4 << 16) | (3 << 8))
22 #define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8))
23 #define FW_53 (53 << 24)
24
25 /**
26 * flush commands to the hardware
27 */
flush(struct rvce_encoder * enc,unsigned flags,struct pipe_fence_handle ** fence)28 static void flush(struct rvce_encoder *enc, unsigned flags, struct pipe_fence_handle **fence)
29 {
30 enc->ws->cs_flush(&enc->cs, flags, fence);
31 }
32
33 #if 0
34 static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
35 {
36 uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE);
37 unsigned i = 0;
38 fprintf(stderr, "\n");
39 fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
40 fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);
41 fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);
42 fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);
43 fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);
44 fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);
45 fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);
46 fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);
47 fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);
48 fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);
49 fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);
50 fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);
51 fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);
52 fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
53 fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
54 fprintf(stderr, "\n");
55 enc->ws->buffer_unmap(fb->res->buf);
56 }
57 #endif
58
59 /**
60 * Calculate the offsets into the DPB
61 */
si_vce_frame_offset(struct rvce_encoder * enc,unsigned slot,signed * luma_offset,signed * chroma_offset)62 void si_vce_frame_offset(struct rvce_encoder *enc, unsigned slot, signed *luma_offset,
63 signed *chroma_offset)
64 {
65 struct si_screen *sscreen = (struct si_screen *)enc->screen;
66 unsigned pitch, vpitch, fsize, offset = 0;
67
68 if (enc->dual_pipe)
69 offset += RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
70
71 if (sscreen->info.gfx_level < GFX9) {
72 pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
73 vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
74 } else {
75 pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256);
76 vpitch = align(enc->luma->u.gfx9.surf_height, 16);
77 }
78 fsize = pitch * (vpitch + vpitch / 2);
79
80 *luma_offset = offset + slot * fsize;
81 *chroma_offset = *luma_offset + pitch * vpitch;
82 }
83
84 /**
85 * destroy this video encoder
86 */
rvce_destroy(struct pipe_video_codec * encoder)87 static void rvce_destroy(struct pipe_video_codec *encoder)
88 {
89 struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
90 if (enc->stream_handle) {
91 struct rvid_buffer fb;
92 si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
93 enc->fb = &fb;
94 enc->session(enc);
95 enc->destroy(enc);
96 flush(enc, PIPE_FLUSH_ASYNC, NULL);
97 si_vid_destroy_buffer(&fb);
98 }
99 si_vid_destroy_buffer(&enc->dpb);
100 enc->ws->cs_destroy(&enc->cs);
101 FREE(enc);
102 }
103
get_dpb_size(struct rvce_encoder * enc,unsigned slots)104 static unsigned get_dpb_size(struct rvce_encoder *enc, unsigned slots)
105 {
106 struct si_screen *sscreen = (struct si_screen *)enc->screen;
107 unsigned dpb_size;
108
109 dpb_size = (sscreen->info.gfx_level < GFX9)
110 ? align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128) *
111 align(enc->luma->u.legacy.level[0].nblk_y, 32)
112 :
113
114 align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256) *
115 align(enc->luma->u.gfx9.surf_height, 32);
116
117 dpb_size = dpb_size * 3 / 2;
118 dpb_size = dpb_size * slots;
119 if (enc->dual_pipe)
120 dpb_size += RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
121
122 enc->dpb_slots = slots;
123
124 return dpb_size;
125 }
126
rvce_begin_frame(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_picture_desc * picture)127 static void rvce_begin_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source,
128 struct pipe_picture_desc *picture)
129 {
130 struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
131 struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
132 struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;
133
134 bool need_rate_control =
135 enc->pic.rate_ctrl[0].rate_ctrl_method != pic->rate_ctrl[0].rate_ctrl_method ||
136 enc->pic.quant_i_frames != pic->quant_i_frames ||
137 enc->pic.quant_p_frames != pic->quant_p_frames ||
138 enc->pic.quant_b_frames != pic->quant_b_frames ||
139 enc->pic.rate_ctrl[0].target_bitrate != pic->rate_ctrl[0].target_bitrate ||
140 enc->pic.rate_ctrl[0].frame_rate_num != pic->rate_ctrl[0].frame_rate_num ||
141 enc->pic.rate_ctrl[0].frame_rate_den != pic->rate_ctrl[0].frame_rate_den;
142
143 enc->pic = *pic;
144 enc->si_get_pic_param(enc, pic);
145
146 enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
147 enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
148
149 unsigned dpb_slots = MAX2(pic->seq.max_num_ref_frames + 1, pic->dpb_size);
150
151 if (enc->dpb_slots < dpb_slots) {
152 unsigned dpb_size;
153
154 dpb_size = get_dpb_size(enc, dpb_slots);
155 if (!enc->dpb.res) {
156 if (!si_vid_create_buffer(enc->screen, &enc->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
157 RVID_ERR("Can't create DPB buffer.\n");
158 return;
159 }
160 } else if (!si_vid_resize_buffer(enc->base.context, &enc->cs, &enc->dpb, dpb_size, NULL)) {
161 RVID_ERR("Can't resize DPB buffer.\n");
162 return;
163 }
164 }
165
166 if (!enc->stream_handle) {
167 struct rvid_buffer fb;
168 enc->stream_handle = si_vid_alloc_stream_handle();
169 si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
170 enc->fb = &fb;
171 enc->session(enc);
172 enc->create(enc);
173 enc->config(enc);
174 enc->feedback(enc);
175 flush(enc, PIPE_FLUSH_ASYNC, NULL);
176 // dump_feedback(enc, &fb);
177 si_vid_destroy_buffer(&fb);
178 need_rate_control = false;
179 }
180
181 if (need_rate_control) {
182 enc->session(enc);
183 enc->config(enc);
184 flush(enc, PIPE_FLUSH_ASYNC, NULL);
185 }
186 }
187
si_vce_encode_headers(struct rvce_encoder * enc)188 static void *si_vce_encode_headers(struct rvce_encoder *enc)
189 {
190 unsigned num_slices = 0, num_headers = 0;
191
192 util_dynarray_foreach(&enc->pic.raw_headers, struct pipe_enc_raw_header, header) {
193 if (header->is_slice)
194 num_slices++;
195 num_headers++;
196 }
197
198 if (!num_headers || !num_slices || num_headers == num_slices)
199 return NULL;
200
201 size_t segments_size =
202 sizeof(struct rvce_output_unit_segment) * (num_headers - num_slices + 1);
203 struct rvce_feedback_data *data =
204 CALLOC_VARIANT_LENGTH_STRUCT(rvce_feedback_data, segments_size);
205 if (!data)
206 return NULL;
207
208 uint8_t *ptr = enc->ws->buffer_map(enc->ws, enc->bs_handle, &enc->cs,
209 PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
210 if (!ptr) {
211 RVID_ERR("Can't map bs buffer.\n");
212 FREE(data);
213 return NULL;
214 }
215
216 unsigned offset = 0;
217 struct rvce_output_unit_segment *slice_segment = NULL;
218
219 util_dynarray_foreach(&enc->pic.raw_headers, struct pipe_enc_raw_header, header) {
220 if (header->is_slice) {
221 if (slice_segment)
222 continue;
223 slice_segment = &data->segments[data->num_segments];
224 slice_segment->is_slice = true;
225 } else {
226 unsigned size;
227 /* Startcode may be 3 or 4 bytes. */
228 const uint8_t nal_byte = header->buffer[header->buffer[2] == 0x1 ? 3 : 4];
229
230 switch (header->type) {
231 case PIPE_H264_NAL_SPS:
232 size = si_vce_write_sps(enc, nal_byte, ptr + offset);
233 break;
234 case PIPE_H264_NAL_PPS:
235 size = si_vce_write_pps(enc, nal_byte, ptr + offset);
236 break;
237 default:
238 assert(header->buffer);
239 memcpy(ptr + offset, header->buffer, header->size);
240 size = header->size;
241 break;
242 }
243 data->segments[data->num_segments].size = size;
244 data->segments[data->num_segments].offset = offset;
245 offset += size;
246 }
247 data->num_segments++;
248 }
249
250 enc->bs_offset = align(offset, 16);
251 assert(enc->bs_offset < enc->bs_size);
252
253 assert(slice_segment);
254 slice_segment->offset = enc->bs_offset;
255
256 enc->ws->buffer_unmap(enc->ws, enc->bs_handle);
257
258 return data;
259 }
260
rvce_encode_bitstream(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_resource * destination,void ** fb)261 static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
262 struct pipe_video_buffer *source,
263 struct pipe_resource *destination, void **fb)
264 {
265 struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
266 enc->get_buffer(destination, &enc->bs_handle, NULL);
267 enc->bs_size = destination->width0;
268 enc->bs_offset = 0;
269
270 *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
271 if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
272 RVID_ERR("Can't create feedback buffer.\n");
273 return;
274 }
275
276 enc->fb->user_data = si_vce_encode_headers(enc);
277
278 if (!radeon_emitted(&enc->cs, 0))
279 enc->session(enc);
280 enc->encode(enc);
281 enc->feedback(enc);
282 }
283
rvce_end_frame(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_picture_desc * picture)284 static int rvce_end_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source,
285 struct pipe_picture_desc *picture)
286 {
287 struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
288
289 flush(enc, picture->flush_flags, picture->fence);
290
291 return 0;
292 }
293
rvce_get_feedback(struct pipe_video_codec * encoder,void * feedback,unsigned * size,struct pipe_enc_feedback_metadata * metadata)294 static void rvce_get_feedback(struct pipe_video_codec *encoder, void *feedback, unsigned *size,
295 struct pipe_enc_feedback_metadata* metadata)
296 {
297 struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
298 struct rvid_buffer *fb = feedback;
299
300 uint32_t *ptr = enc->ws->buffer_map(enc->ws, fb->res->buf, &enc->cs,
301 PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY);
302
303 if (ptr[1]) {
304 *size = ptr[4] - ptr[9];
305 } else {
306 *size = 0;
307 }
308
309 enc->ws->buffer_unmap(enc->ws, fb->res->buf);
310
311 metadata->present_metadata = PIPE_VIDEO_FEEDBACK_METADATA_TYPE_CODEC_UNIT_LOCATION;
312
313 if (fb->user_data) {
314 struct rvce_feedback_data *data = fb->user_data;
315 metadata->codec_unit_metadata_count = data->num_segments;
316 for (unsigned i = 0; i < data->num_segments; i++) {
317 metadata->codec_unit_metadata[i].offset = data->segments[i].offset;
318 if (data->segments[i].is_slice) {
319 metadata->codec_unit_metadata[i].size = *size;
320 metadata->codec_unit_metadata[i].flags = 0;
321 } else {
322 metadata->codec_unit_metadata[i].size = data->segments[i].size;
323 metadata->codec_unit_metadata[i].flags = PIPE_VIDEO_CODEC_UNIT_LOCATION_FLAG_SINGLE_NALU;
324 }
325 }
326 FREE(fb->user_data);
327 fb->user_data = NULL;
328 } else {
329 metadata->codec_unit_metadata_count = 1;
330 metadata->codec_unit_metadata[0].offset = 0;
331 metadata->codec_unit_metadata[0].size = *size;
332 metadata->codec_unit_metadata[0].flags = 0;
333 }
334
335 // dump_feedback(enc, fb);
336 si_vid_destroy_buffer(fb);
337 FREE(fb);
338 }
339
rvce_fence_wait(struct pipe_video_codec * encoder,struct pipe_fence_handle * fence,uint64_t timeout)340 static int rvce_fence_wait(struct pipe_video_codec *encoder,
341 struct pipe_fence_handle *fence,
342 uint64_t timeout)
343 {
344 struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
345
346 return enc->ws->fence_wait(enc->ws, fence, timeout);
347 }
348
rvce_destroy_fence(struct pipe_video_codec * encoder,struct pipe_fence_handle * fence)349 static void rvce_destroy_fence(struct pipe_video_codec *encoder,
350 struct pipe_fence_handle *fence)
351 {
352 struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
353
354 enc->ws->fence_reference(enc->ws, &fence, NULL);
355 }
356
357 /**
358 * flush any outstanding command buffers to the hardware
359 */
rvce_flush(struct pipe_video_codec * encoder)360 static void rvce_flush(struct pipe_video_codec *encoder)
361 {
362 struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
363
364 flush(enc, PIPE_FLUSH_ASYNC, NULL);
365 }
366
rvce_cs_flush(void * ctx,unsigned flags,struct pipe_fence_handle ** fence)367 static void rvce_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence)
368 {
369 // just ignored
370 }
371
si_vce_create_encoder(struct pipe_context * context,const struct pipe_video_codec * templ,struct radeon_winsys * ws,rvce_get_buffer get_buffer)372 struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
373 const struct pipe_video_codec *templ,
374 struct radeon_winsys *ws, rvce_get_buffer get_buffer)
375 {
376 struct si_screen *sscreen = (struct si_screen *)context->screen;
377 struct si_context *sctx = (struct si_context *)context;
378 struct rvce_encoder *enc;
379
380 if (!sscreen->info.vce_fw_version) {
381 RVID_ERR("Kernel doesn't supports VCE!\n");
382 return NULL;
383
384 } else if (!si_vce_is_fw_version_supported(sscreen)) {
385 RVID_ERR("Unsupported VCE fw version loaded!\n");
386 return NULL;
387 }
388
389 enc = CALLOC_STRUCT(rvce_encoder);
390 if (!enc)
391 return NULL;
392
393 if (sscreen->info.is_amdgpu)
394 enc->use_vm = true;
395
396 if (sscreen->info.family >= CHIP_TONGA && sscreen->info.family != CHIP_STONEY &&
397 sscreen->info.family != CHIP_POLARIS11 && sscreen->info.family != CHIP_POLARIS12 &&
398 sscreen->info.family != CHIP_VEGAM)
399 enc->dual_pipe = true;
400
401 enc->base = *templ;
402 enc->base.context = context;
403
404 enc->base.destroy = rvce_destroy;
405 enc->base.begin_frame = rvce_begin_frame;
406 enc->base.encode_bitstream = rvce_encode_bitstream;
407 enc->base.end_frame = rvce_end_frame;
408 enc->base.flush = rvce_flush;
409 enc->base.get_feedback = rvce_get_feedback;
410 enc->base.fence_wait = rvce_fence_wait;
411 enc->base.destroy_fence = rvce_destroy_fence;
412 enc->get_buffer = get_buffer;
413
414 enc->screen = context->screen;
415 enc->ws = ws;
416
417 if (!ws->cs_create(&enc->cs, sctx->ctx, AMD_IP_VCE, rvce_cs_flush, enc)) {
418 RVID_ERR("Can't get command submission context.\n");
419 goto error;
420 }
421
422 si_vce_52_init(enc);
423
424 return &enc->base;
425
426 error:
427 enc->ws->cs_destroy(&enc->cs);
428
429 FREE(enc);
430 return NULL;
431 }
432
433 /**
434 * check if kernel has the right fw version loaded
435 */
si_vce_is_fw_version_supported(struct si_screen * sscreen)436 bool si_vce_is_fw_version_supported(struct si_screen *sscreen)
437 {
438 switch (sscreen->info.vce_fw_version) {
439 case FW_52_0_3:
440 case FW_52_4_3:
441 case FW_52_8_3:
442 return true;
443 default:
444 if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53)
445 return true;
446 else
447 return false;
448 }
449 }
450
451 /**
452 * Add the buffer as relocation to the current command submission
453 */
si_vce_add_buffer(struct rvce_encoder * enc,struct pb_buffer_lean * buf,unsigned usage,enum radeon_bo_domain domain,signed offset)454 void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer_lean *buf, unsigned usage,
455 enum radeon_bo_domain domain, signed offset)
456 {
457 int reloc_idx;
458
459 reloc_idx = enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
460 if (enc->use_vm) {
461 uint64_t addr;
462 addr = enc->ws->buffer_get_virtual_address(buf);
463 addr = addr + offset;
464 RVCE_CS(addr >> 32);
465 RVCE_CS(addr);
466 } else {
467 offset += enc->ws->buffer_get_reloc_offset(buf);
468 RVCE_CS(reloc_idx * 4);
469 RVCE_CS(offset);
470 }
471 }
472