• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2013 Advanced Micro Devices, Inc.
4  *
5  * SPDX-License-Identifier: MIT
6  *
7  **************************************************************************/
8 
9 #include "radeon_vce.h"
10 
11 #include "pipe/p_video_codec.h"
12 #include "radeon_video.h"
13 #include "radeonsi/si_pipe.h"
14 #include "util/u_memory.h"
15 #include "util/u_video.h"
16 #include "vl/vl_video_buffer.h"
17 
18 #include <stdio.h>
19 
20 #define FW_52_0_3  ((52 << 24) | (0 << 16) | (3 << 8))
21 #define FW_52_4_3  ((52 << 24) | (4 << 16) | (3 << 8))
22 #define FW_52_8_3  ((52 << 24) | (8 << 16) | (3 << 8))
23 #define FW_53       (53 << 24)
24 
25 /**
26  * flush commands to the hardware
27  */
flush(struct rvce_encoder * enc,unsigned flags,struct pipe_fence_handle ** fence)28 static void flush(struct rvce_encoder *enc, unsigned flags, struct pipe_fence_handle **fence)
29 {
30    enc->ws->cs_flush(&enc->cs, flags, fence);
31 }
32 
33 #if 0
34 static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
35 {
36    uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE);
37    unsigned i = 0;
38    fprintf(stderr, "\n");
39    fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
40    fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);
41    fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);
42    fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);
43    fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);
44    fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);
45    fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);
46    fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);
47    fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);
48    fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);
49    fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);
50    fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);
51    fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);
52    fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
53    fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
54    fprintf(stderr, "\n");
55    enc->ws->buffer_unmap(fb->res->buf);
56 }
57 #endif
58 
59 /**
60  * Calculate the offsets into the DPB
61  */
si_vce_frame_offset(struct rvce_encoder * enc,unsigned slot,signed * luma_offset,signed * chroma_offset)62 void si_vce_frame_offset(struct rvce_encoder *enc, unsigned slot, signed *luma_offset,
63                          signed *chroma_offset)
64 {
65    struct si_screen *sscreen = (struct si_screen *)enc->screen;
66    unsigned pitch, vpitch, fsize, offset = 0;
67 
68    if (enc->dual_pipe)
69       offset += RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
70 
71    if (sscreen->info.gfx_level < GFX9) {
72       pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
73       vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
74    } else {
75       pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256);
76       vpitch = align(enc->luma->u.gfx9.surf_height, 16);
77    }
78    fsize = pitch * (vpitch + vpitch / 2);
79 
80    *luma_offset = offset + slot * fsize;
81    *chroma_offset = *luma_offset + pitch * vpitch;
82 }
83 
84 /**
85  * destroy this video encoder
86  */
rvce_destroy(struct pipe_video_codec * encoder)87 static void rvce_destroy(struct pipe_video_codec *encoder)
88 {
89    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
90    if (enc->stream_handle) {
91       struct rvid_buffer fb;
92       si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
93       enc->fb = &fb;
94       enc->session(enc);
95       enc->destroy(enc);
96       flush(enc, PIPE_FLUSH_ASYNC, NULL);
97       si_vid_destroy_buffer(&fb);
98    }
99    si_vid_destroy_buffer(&enc->dpb);
100    enc->ws->cs_destroy(&enc->cs);
101    FREE(enc);
102 }
103 
get_dpb_size(struct rvce_encoder * enc,unsigned slots)104 static unsigned get_dpb_size(struct rvce_encoder *enc, unsigned slots)
105 {
106    struct si_screen *sscreen = (struct si_screen *)enc->screen;
107    unsigned dpb_size;
108 
109    dpb_size = (sscreen->info.gfx_level < GFX9)
110                  ? align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128) *
111                       align(enc->luma->u.legacy.level[0].nblk_y, 32)
112                  :
113 
114                  align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256) *
115                     align(enc->luma->u.gfx9.surf_height, 32);
116 
117    dpb_size = dpb_size * 3 / 2;
118    dpb_size = dpb_size * slots;
119    if (enc->dual_pipe)
120       dpb_size += RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
121 
122    enc->dpb_slots = slots;
123 
124    return dpb_size;
125 }
126 
rvce_begin_frame(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_picture_desc * picture)127 static void rvce_begin_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source,
128                              struct pipe_picture_desc *picture)
129 {
130    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
131    struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
132    struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;
133 
134    bool need_rate_control =
135       enc->pic.rate_ctrl[0].rate_ctrl_method != pic->rate_ctrl[0].rate_ctrl_method ||
136       enc->pic.quant_i_frames != pic->quant_i_frames ||
137       enc->pic.quant_p_frames != pic->quant_p_frames ||
138       enc->pic.quant_b_frames != pic->quant_b_frames ||
139       enc->pic.rate_ctrl[0].target_bitrate != pic->rate_ctrl[0].target_bitrate ||
140       enc->pic.rate_ctrl[0].frame_rate_num != pic->rate_ctrl[0].frame_rate_num ||
141       enc->pic.rate_ctrl[0].frame_rate_den != pic->rate_ctrl[0].frame_rate_den;
142 
143    enc->pic = *pic;
144    enc->si_get_pic_param(enc, pic);
145 
146    enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
147    enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
148 
149    unsigned dpb_slots = MAX2(pic->seq.max_num_ref_frames + 1, pic->dpb_size);
150 
151    if (enc->dpb_slots < dpb_slots) {
152       unsigned dpb_size;
153 
154       dpb_size = get_dpb_size(enc, dpb_slots);
155       if (!enc->dpb.res) {
156          if (!si_vid_create_buffer(enc->screen, &enc->dpb, dpb_size, PIPE_USAGE_DEFAULT)) {
157             RVID_ERR("Can't create DPB buffer.\n");
158             return;
159          }
160       } else if (!si_vid_resize_buffer(enc->base.context, &enc->cs, &enc->dpb, dpb_size, NULL)) {
161          RVID_ERR("Can't resize DPB buffer.\n");
162          return;
163       }
164    }
165 
166    if (!enc->stream_handle) {
167       struct rvid_buffer fb;
168       enc->stream_handle = si_vid_alloc_stream_handle();
169       si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
170       enc->fb = &fb;
171       enc->session(enc);
172       enc->create(enc);
173       enc->config(enc);
174       enc->feedback(enc);
175       flush(enc, PIPE_FLUSH_ASYNC, NULL);
176       // dump_feedback(enc, &fb);
177       si_vid_destroy_buffer(&fb);
178       need_rate_control = false;
179    }
180 
181    if (need_rate_control) {
182       enc->session(enc);
183       enc->config(enc);
184       flush(enc, PIPE_FLUSH_ASYNC, NULL);
185    }
186 }
187 
si_vce_encode_headers(struct rvce_encoder * enc)188 static void *si_vce_encode_headers(struct rvce_encoder *enc)
189 {
190    unsigned num_slices = 0, num_headers = 0;
191 
192    util_dynarray_foreach(&enc->pic.raw_headers, struct pipe_enc_raw_header, header) {
193       if (header->is_slice)
194          num_slices++;
195       num_headers++;
196    }
197 
198    if (!num_headers || !num_slices || num_headers == num_slices)
199       return NULL;
200 
201    size_t segments_size =
202       sizeof(struct rvce_output_unit_segment) * (num_headers - num_slices + 1);
203    struct rvce_feedback_data *data =
204       CALLOC_VARIANT_LENGTH_STRUCT(rvce_feedback_data, segments_size);
205    if (!data)
206       return NULL;
207 
208    uint8_t *ptr = enc->ws->buffer_map(enc->ws, enc->bs_handle, &enc->cs,
209                                       PIPE_MAP_WRITE | RADEON_MAP_TEMPORARY);
210    if (!ptr) {
211       RVID_ERR("Can't map bs buffer.\n");
212       FREE(data);
213       return NULL;
214    }
215 
216    unsigned offset = 0;
217    struct rvce_output_unit_segment *slice_segment = NULL;
218 
219    util_dynarray_foreach(&enc->pic.raw_headers, struct pipe_enc_raw_header, header) {
220       if (header->is_slice) {
221          if (slice_segment)
222             continue;
223          slice_segment = &data->segments[data->num_segments];
224          slice_segment->is_slice = true;
225       } else {
226          unsigned size;
227          /* Startcode may be 3 or 4 bytes. */
228          const uint8_t nal_byte = header->buffer[header->buffer[2] == 0x1 ? 3 : 4];
229 
230          switch (header->type) {
231          case PIPE_H264_NAL_SPS:
232             size = si_vce_write_sps(enc, nal_byte, ptr + offset);
233             break;
234          case PIPE_H264_NAL_PPS:
235             size = si_vce_write_pps(enc, nal_byte, ptr + offset);
236             break;
237          default:
238             assert(header->buffer);
239             memcpy(ptr + offset, header->buffer, header->size);
240             size = header->size;
241             break;
242          }
243          data->segments[data->num_segments].size = size;
244          data->segments[data->num_segments].offset = offset;
245          offset += size;
246       }
247       data->num_segments++;
248    }
249 
250    enc->bs_offset = align(offset, 16);
251    assert(enc->bs_offset < enc->bs_size);
252 
253    assert(slice_segment);
254    slice_segment->offset = enc->bs_offset;
255 
256    enc->ws->buffer_unmap(enc->ws, enc->bs_handle);
257 
258    return data;
259 }
260 
rvce_encode_bitstream(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_resource * destination,void ** fb)261 static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
262                                   struct pipe_video_buffer *source,
263                                   struct pipe_resource *destination, void **fb)
264 {
265    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
266    enc->get_buffer(destination, &enc->bs_handle, NULL);
267    enc->bs_size = destination->width0;
268    enc->bs_offset = 0;
269 
270    *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
271    if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
272       RVID_ERR("Can't create feedback buffer.\n");
273       return;
274    }
275 
276    enc->fb->user_data = si_vce_encode_headers(enc);
277 
278    if (!radeon_emitted(&enc->cs, 0))
279       enc->session(enc);
280    enc->encode(enc);
281    enc->feedback(enc);
282 }
283 
rvce_end_frame(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_picture_desc * picture)284 static int rvce_end_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source,
285                           struct pipe_picture_desc *picture)
286 {
287    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
288 
289    flush(enc, picture->flush_flags, picture->fence);
290 
291    return 0;
292 }
293 
rvce_get_feedback(struct pipe_video_codec * encoder,void * feedback,unsigned * size,struct pipe_enc_feedback_metadata * metadata)294 static void rvce_get_feedback(struct pipe_video_codec *encoder, void *feedback, unsigned *size,
295                               struct pipe_enc_feedback_metadata* metadata)
296 {
297    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
298    struct rvid_buffer *fb = feedback;
299 
300    uint32_t *ptr = enc->ws->buffer_map(enc->ws, fb->res->buf, &enc->cs,
301                                        PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY);
302 
303    if (ptr[1]) {
304       *size = ptr[4] - ptr[9];
305    } else {
306       *size = 0;
307    }
308 
309    enc->ws->buffer_unmap(enc->ws, fb->res->buf);
310 
311    metadata->present_metadata = PIPE_VIDEO_FEEDBACK_METADATA_TYPE_CODEC_UNIT_LOCATION;
312 
313    if (fb->user_data) {
314       struct rvce_feedback_data *data = fb->user_data;
315       metadata->codec_unit_metadata_count = data->num_segments;
316       for (unsigned i = 0; i < data->num_segments; i++) {
317          metadata->codec_unit_metadata[i].offset = data->segments[i].offset;
318          if (data->segments[i].is_slice) {
319             metadata->codec_unit_metadata[i].size = *size;
320             metadata->codec_unit_metadata[i].flags = 0;
321          } else {
322             metadata->codec_unit_metadata[i].size = data->segments[i].size;
323             metadata->codec_unit_metadata[i].flags = PIPE_VIDEO_CODEC_UNIT_LOCATION_FLAG_SINGLE_NALU;
324          }
325       }
326       FREE(fb->user_data);
327       fb->user_data = NULL;
328    } else {
329       metadata->codec_unit_metadata_count = 1;
330       metadata->codec_unit_metadata[0].offset = 0;
331       metadata->codec_unit_metadata[0].size = *size;
332       metadata->codec_unit_metadata[0].flags = 0;
333    }
334 
335    // dump_feedback(enc, fb);
336    si_vid_destroy_buffer(fb);
337    FREE(fb);
338 }
339 
rvce_fence_wait(struct pipe_video_codec * encoder,struct pipe_fence_handle * fence,uint64_t timeout)340 static int rvce_fence_wait(struct pipe_video_codec *encoder,
341                            struct pipe_fence_handle *fence,
342                            uint64_t timeout)
343 {
344    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
345 
346    return enc->ws->fence_wait(enc->ws, fence, timeout);
347 }
348 
rvce_destroy_fence(struct pipe_video_codec * encoder,struct pipe_fence_handle * fence)349 static void rvce_destroy_fence(struct pipe_video_codec *encoder,
350                                struct pipe_fence_handle *fence)
351 {
352    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
353 
354    enc->ws->fence_reference(enc->ws, &fence, NULL);
355 }
356 
357 /**
358  * flush any outstanding command buffers to the hardware
359  */
rvce_flush(struct pipe_video_codec * encoder)360 static void rvce_flush(struct pipe_video_codec *encoder)
361 {
362    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
363 
364    flush(enc, PIPE_FLUSH_ASYNC, NULL);
365 }
366 
rvce_cs_flush(void * ctx,unsigned flags,struct pipe_fence_handle ** fence)367 static void rvce_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence)
368 {
369    // just ignored
370 }
371 
si_vce_create_encoder(struct pipe_context * context,const struct pipe_video_codec * templ,struct radeon_winsys * ws,rvce_get_buffer get_buffer)372 struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
373                                                const struct pipe_video_codec *templ,
374                                                struct radeon_winsys *ws, rvce_get_buffer get_buffer)
375 {
376    struct si_screen *sscreen = (struct si_screen *)context->screen;
377    struct si_context *sctx = (struct si_context *)context;
378    struct rvce_encoder *enc;
379 
380    if (!sscreen->info.vce_fw_version) {
381       RVID_ERR("Kernel doesn't supports VCE!\n");
382       return NULL;
383 
384    } else if (!si_vce_is_fw_version_supported(sscreen)) {
385       RVID_ERR("Unsupported VCE fw version loaded!\n");
386       return NULL;
387    }
388 
389    enc = CALLOC_STRUCT(rvce_encoder);
390    if (!enc)
391       return NULL;
392 
393    if (sscreen->info.is_amdgpu)
394       enc->use_vm = true;
395 
396    if (sscreen->info.family >= CHIP_TONGA && sscreen->info.family != CHIP_STONEY &&
397        sscreen->info.family != CHIP_POLARIS11 && sscreen->info.family != CHIP_POLARIS12 &&
398        sscreen->info.family != CHIP_VEGAM)
399       enc->dual_pipe = true;
400 
401    enc->base = *templ;
402    enc->base.context = context;
403 
404    enc->base.destroy = rvce_destroy;
405    enc->base.begin_frame = rvce_begin_frame;
406    enc->base.encode_bitstream = rvce_encode_bitstream;
407    enc->base.end_frame = rvce_end_frame;
408    enc->base.flush = rvce_flush;
409    enc->base.get_feedback = rvce_get_feedback;
410    enc->base.fence_wait = rvce_fence_wait;
411    enc->base.destroy_fence = rvce_destroy_fence;
412    enc->get_buffer = get_buffer;
413 
414    enc->screen = context->screen;
415    enc->ws = ws;
416 
417    if (!ws->cs_create(&enc->cs, sctx->ctx, AMD_IP_VCE, rvce_cs_flush, enc)) {
418       RVID_ERR("Can't get command submission context.\n");
419       goto error;
420    }
421 
422    si_vce_52_init(enc);
423 
424    return &enc->base;
425 
426 error:
427    enc->ws->cs_destroy(&enc->cs);
428 
429    FREE(enc);
430    return NULL;
431 }
432 
433 /**
434  * check if kernel has the right fw version loaded
435  */
si_vce_is_fw_version_supported(struct si_screen * sscreen)436 bool si_vce_is_fw_version_supported(struct si_screen *sscreen)
437 {
438    switch (sscreen->info.vce_fw_version) {
439    case FW_52_0_3:
440    case FW_52_4_3:
441    case FW_52_8_3:
442       return true;
443    default:
444       if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53)
445          return true;
446       else
447          return false;
448    }
449 }
450 
451 /**
452  * Add the buffer as relocation to the current command submission
453  */
si_vce_add_buffer(struct rvce_encoder * enc,struct pb_buffer_lean * buf,unsigned usage,enum radeon_bo_domain domain,signed offset)454 void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer_lean *buf, unsigned usage,
455                        enum radeon_bo_domain domain, signed offset)
456 {
457    int reloc_idx;
458 
459    reloc_idx = enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
460    if (enc->use_vm) {
461       uint64_t addr;
462       addr = enc->ws->buffer_get_virtual_address(buf);
463       addr = addr + offset;
464       RVCE_CS(addr >> 32);
465       RVCE_CS(addr);
466    } else {
467       offset += enc->ws->buffer_get_reloc_offset(buf);
468       RVCE_CS(reloc_idx * 4);
469       RVCE_CS(offset);
470    }
471 }
472