• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2013 Advanced Micro Devices, Inc.
4  *
5  * SPDX-License-Identifier: MIT
6  *
7  **************************************************************************/
8 
9 #include "radeon_vce.h"
10 
11 #include "pipe/p_video_codec.h"
12 #include "radeon_video.h"
13 #include "radeonsi/si_pipe.h"
14 #include "util/u_memory.h"
15 #include "util/u_video.h"
16 #include "vl/vl_video_buffer.h"
17 
18 #include <stdio.h>
19 
20 #define FW_40_2_2  ((40 << 24) | (2 << 16) | (2 << 8))
21 #define FW_50_0_1  ((50 << 24) | (0 << 16) | (1 << 8))
22 #define FW_50_1_2  ((50 << 24) | (1 << 16) | (2 << 8))
23 #define FW_50_10_2 ((50 << 24) | (10 << 16) | (2 << 8))
24 #define FW_50_17_3 ((50 << 24) | (17 << 16) | (3 << 8))
25 #define FW_52_0_3  ((52 << 24) | (0 << 16) | (3 << 8))
26 #define FW_52_4_3  ((52 << 24) | (4 << 16) | (3 << 8))
27 #define FW_52_8_3  ((52 << 24) | (8 << 16) | (3 << 8))
28 #define FW_53       (53 << 24)
29 
30 /**
31  * flush commands to the hardware
32  */
flush(struct rvce_encoder * enc)33 static void flush(struct rvce_encoder *enc)
34 {
35    enc->ws->cs_flush(&enc->cs, PIPE_FLUSH_ASYNC, NULL);
36    enc->task_info_idx = 0;
37    enc->bs_idx = 0;
38 }
39 
40 #if 0
41 static void dump_feedback(struct rvce_encoder *enc, struct rvid_buffer *fb)
42 {
43    uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, &enc->cs, PIPE_MAP_READ_WRITE);
44    unsigned i = 0;
45    fprintf(stderr, "\n");
46    fprintf(stderr, "encStatus:\t\t\t%08x\n", ptr[i++]);
47    fprintf(stderr, "encHasBitstream:\t\t%08x\n", ptr[i++]);
48    fprintf(stderr, "encHasAudioBitstream:\t\t%08x\n", ptr[i++]);
49    fprintf(stderr, "encBitstreamOffset:\t\t%08x\n", ptr[i++]);
50    fprintf(stderr, "encBitstreamSize:\t\t%08x\n", ptr[i++]);
51    fprintf(stderr, "encAudioBitstreamOffset:\t%08x\n", ptr[i++]);
52    fprintf(stderr, "encAudioBitstreamSize:\t\t%08x\n", ptr[i++]);
53    fprintf(stderr, "encExtrabytes:\t\t\t%08x\n", ptr[i++]);
54    fprintf(stderr, "encAudioExtrabytes:\t\t%08x\n", ptr[i++]);
55    fprintf(stderr, "videoTimeStamp:\t\t\t%08x\n", ptr[i++]);
56    fprintf(stderr, "audioTimeStamp:\t\t\t%08x\n", ptr[i++]);
57    fprintf(stderr, "videoOutputType:\t\t%08x\n", ptr[i++]);
58    fprintf(stderr, "attributeFlags:\t\t\t%08x\n", ptr[i++]);
59    fprintf(stderr, "seiPrivatePackageOffset:\t%08x\n", ptr[i++]);
60    fprintf(stderr, "seiPrivatePackageSize:\t\t%08x\n", ptr[i++]);
61    fprintf(stderr, "\n");
62    enc->ws->buffer_unmap(fb->res->buf);
63 }
64 #endif
65 
66 /**
67  * reset the CPB handling
68  */
reset_cpb(struct rvce_encoder * enc)69 static void reset_cpb(struct rvce_encoder *enc)
70 {
71    unsigned i;
72 
73    list_inithead(&enc->cpb_slots);
74    for (i = 0; i < enc->cpb_num; ++i) {
75       struct rvce_cpb_slot *slot = &enc->cpb_array[i];
76       slot->index = i;
77       slot->picture_type = PIPE_H2645_ENC_PICTURE_TYPE_SKIP;
78       slot->frame_num = 0;
79       slot->pic_order_cnt = 0;
80       list_addtail(&slot->list, &enc->cpb_slots);
81    }
82 }
83 
84 /**
85  * sort l0 and l1 to the top of the list
86  */
sort_cpb(struct rvce_encoder * enc)87 static void sort_cpb(struct rvce_encoder *enc)
88 {
89    struct rvce_cpb_slot *i, *l0 = NULL, *l1 = NULL;
90 
91    LIST_FOR_EACH_ENTRY (i, &enc->cpb_slots, list) {
92       if (i->frame_num == enc->pic.ref_idx_l0_list[0])
93          l0 = i;
94 
95       if (i->frame_num == enc->pic.ref_idx_l1_list[0])
96          l1 = i;
97 
98       if (enc->pic.picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P && l0)
99          break;
100 
101       if (enc->pic.picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B && l0 && l1)
102          break;
103    }
104 
105    if (l1) {
106       list_del(&l1->list);
107       list_add(&l1->list, &enc->cpb_slots);
108    }
109 
110    if (l0) {
111       list_del(&l0->list);
112       list_add(&l0->list, &enc->cpb_slots);
113    }
114 }
115 
116 /**
117  * get number of cpbs based on dpb
118  */
get_cpb_num(struct rvce_encoder * enc)119 static unsigned get_cpb_num(struct rvce_encoder *enc)
120 {
121    unsigned w = align(enc->base.width, 16) / 16;
122    unsigned h = align(enc->base.height, 16) / 16;
123    unsigned dpb;
124 
125    switch (enc->base.level) {
126    case 10:
127       dpb = 396;
128       break;
129    case 11:
130       dpb = 900;
131       break;
132    case 12:
133    case 13:
134    case 20:
135       dpb = 2376;
136       break;
137    case 21:
138       dpb = 4752;
139       break;
140    case 22:
141    case 30:
142       dpb = 8100;
143       break;
144    case 31:
145       dpb = 18000;
146       break;
147    case 32:
148       dpb = 20480;
149       break;
150    case 40:
151    case 41:
152       dpb = 32768;
153       break;
154    case 42:
155       dpb = 34816;
156       break;
157    case 50:
158       dpb = 110400;
159       break;
160    default:
161    case 51:
162    case 52:
163       dpb = 184320;
164       break;
165    }
166 
167    return MIN2(dpb / (w * h), 16);
168 }
169 
170 /**
171  * Get the slot for the currently encoded frame
172  */
si_current_slot(struct rvce_encoder * enc)173 struct rvce_cpb_slot *si_current_slot(struct rvce_encoder *enc)
174 {
175    return list_entry(enc->cpb_slots.prev, struct rvce_cpb_slot, list);
176 }
177 
178 /**
179  * Get the slot for L0
180  */
si_l0_slot(struct rvce_encoder * enc)181 struct rvce_cpb_slot *si_l0_slot(struct rvce_encoder *enc)
182 {
183    return list_entry(enc->cpb_slots.next, struct rvce_cpb_slot, list);
184 }
185 
186 /**
187  * Get the slot for L1
188  */
si_l1_slot(struct rvce_encoder * enc)189 struct rvce_cpb_slot *si_l1_slot(struct rvce_encoder *enc)
190 {
191    return list_entry(enc->cpb_slots.next->next, struct rvce_cpb_slot, list);
192 }
193 
194 /**
195  * Calculate the offsets into the CPB
196  */
si_vce_frame_offset(struct rvce_encoder * enc,struct rvce_cpb_slot * slot,signed * luma_offset,signed * chroma_offset)197 void si_vce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, signed *luma_offset,
198                          signed *chroma_offset)
199 {
200    struct si_screen *sscreen = (struct si_screen *)enc->screen;
201    unsigned pitch, vpitch, fsize;
202 
203    if (sscreen->info.gfx_level < GFX9) {
204       pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
205       vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
206    } else {
207       pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256);
208       vpitch = align(enc->luma->u.gfx9.surf_height, 16);
209    }
210    fsize = pitch * (vpitch + vpitch / 2);
211 
212    *luma_offset = slot->index * fsize;
213    *chroma_offset = *luma_offset + pitch * vpitch;
214 }
215 
216 /**
217  * destroy this video encoder
218  */
rvce_destroy(struct pipe_video_codec * encoder)219 static void rvce_destroy(struct pipe_video_codec *encoder)
220 {
221    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
222    if (enc->stream_handle) {
223       struct rvid_buffer fb;
224       si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
225       enc->fb = &fb;
226       enc->session(enc);
227       enc->destroy(enc);
228       flush(enc);
229       si_vid_destroy_buffer(&fb);
230    }
231    si_vid_destroy_buffer(&enc->cpb);
232    enc->ws->cs_destroy(&enc->cs);
233    FREE(enc->cpb_array);
234    FREE(enc);
235 }
236 
rvce_begin_frame(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_picture_desc * picture)237 static void rvce_begin_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source,
238                              struct pipe_picture_desc *picture)
239 {
240    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
241    struct vl_video_buffer *vid_buf = (struct vl_video_buffer *)source;
242    struct pipe_h264_enc_picture_desc *pic = (struct pipe_h264_enc_picture_desc *)picture;
243 
244    bool need_rate_control =
245       enc->pic.rate_ctrl[0].rate_ctrl_method != pic->rate_ctrl[0].rate_ctrl_method ||
246       enc->pic.quant_i_frames != pic->quant_i_frames ||
247       enc->pic.quant_p_frames != pic->quant_p_frames ||
248       enc->pic.quant_b_frames != pic->quant_b_frames ||
249       enc->pic.rate_ctrl[0].target_bitrate != pic->rate_ctrl[0].target_bitrate ||
250       enc->pic.rate_ctrl[0].frame_rate_num != pic->rate_ctrl[0].frame_rate_num ||
251       enc->pic.rate_ctrl[0].frame_rate_den != pic->rate_ctrl[0].frame_rate_den;
252 
253    enc->pic = *pic;
254    enc->si_get_pic_param(enc, pic);
255 
256    enc->get_buffer(vid_buf->resources[0], &enc->handle, &enc->luma);
257    enc->get_buffer(vid_buf->resources[1], NULL, &enc->chroma);
258 
259    if (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_IDR)
260       reset_cpb(enc);
261    else if (pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_P ||
262             pic->picture_type == PIPE_H2645_ENC_PICTURE_TYPE_B)
263       sort_cpb(enc);
264 
265    if (!enc->stream_handle) {
266       struct rvid_buffer fb;
267       enc->stream_handle = si_vid_alloc_stream_handle();
268       si_vid_create_buffer(enc->screen, &fb, 512, PIPE_USAGE_STAGING);
269       enc->fb = &fb;
270       enc->session(enc);
271       enc->create(enc);
272       enc->config(enc);
273       enc->feedback(enc);
274       flush(enc);
275       // dump_feedback(enc, &fb);
276       si_vid_destroy_buffer(&fb);
277       need_rate_control = false;
278    }
279 
280    if (need_rate_control) {
281       enc->session(enc);
282       enc->config(enc);
283       flush(enc);
284    }
285 }
286 
rvce_encode_bitstream(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_resource * destination,void ** fb)287 static void rvce_encode_bitstream(struct pipe_video_codec *encoder,
288                                   struct pipe_video_buffer *source,
289                                   struct pipe_resource *destination, void **fb)
290 {
291    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
292    enc->get_buffer(destination, &enc->bs_handle, NULL);
293    enc->bs_size = destination->width0;
294 
295    *fb = enc->fb = CALLOC_STRUCT(rvid_buffer);
296    if (!si_vid_create_buffer(enc->screen, enc->fb, 512, PIPE_USAGE_STAGING)) {
297       RVID_ERR("Can't create feedback buffer.\n");
298       return;
299    }
300    if (!radeon_emitted(&enc->cs, 0))
301       enc->session(enc);
302    enc->encode(enc);
303    enc->feedback(enc);
304 }
305 
rvce_end_frame(struct pipe_video_codec * encoder,struct pipe_video_buffer * source,struct pipe_picture_desc * picture)306 static void rvce_end_frame(struct pipe_video_codec *encoder, struct pipe_video_buffer *source,
307                            struct pipe_picture_desc *picture)
308 {
309    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
310    struct rvce_cpb_slot *slot = list_entry(enc->cpb_slots.prev, struct rvce_cpb_slot, list);
311 
312    if (!enc->dual_inst || enc->bs_idx > 1)
313       flush(enc);
314 
315    /* update the CPB backtrack with the just encoded frame */
316    slot->picture_type = enc->pic.picture_type;
317    slot->frame_num = enc->pic.frame_num;
318    slot->pic_order_cnt = enc->pic.pic_order_cnt;
319    if (!enc->pic.not_referenced) {
320       list_del(&slot->list);
321       list_add(&slot->list, &enc->cpb_slots);
322    }
323 }
324 
rvce_get_feedback(struct pipe_video_codec * encoder,void * feedback,unsigned * size,struct pipe_enc_feedback_metadata * metadata)325 static void rvce_get_feedback(struct pipe_video_codec *encoder, void *feedback, unsigned *size,
326                               struct pipe_enc_feedback_metadata* metadata)
327 {
328    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
329    struct rvid_buffer *fb = feedback;
330 
331    if (size) {
332       uint32_t *ptr = enc->ws->buffer_map(enc->ws, fb->res->buf, &enc->cs,
333                                           PIPE_MAP_READ_WRITE | RADEON_MAP_TEMPORARY);
334 
335       if (ptr[1]) {
336          *size = ptr[4] - ptr[9];
337       } else {
338          *size = 0;
339       }
340 
341       enc->ws->buffer_unmap(enc->ws, fb->res->buf);
342    }
343    // dump_feedback(enc, fb);
344    si_vid_destroy_buffer(fb);
345    FREE(fb);
346 }
347 
rvce_destroy_fence(struct pipe_video_codec * encoder,struct pipe_fence_handle * fence)348 static void rvce_destroy_fence(struct pipe_video_codec *encoder,
349                                struct pipe_fence_handle *fence)
350 {
351    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
352 
353    enc->ws->fence_reference(enc->ws, &fence, NULL);
354 }
355 
356 /**
357  * flush any outstanding command buffers to the hardware
358  */
rvce_flush(struct pipe_video_codec * encoder)359 static void rvce_flush(struct pipe_video_codec *encoder)
360 {
361    struct rvce_encoder *enc = (struct rvce_encoder *)encoder;
362 
363    flush(enc);
364 }
365 
rvce_cs_flush(void * ctx,unsigned flags,struct pipe_fence_handle ** fence)366 static void rvce_cs_flush(void *ctx, unsigned flags, struct pipe_fence_handle **fence)
367 {
368    // just ignored
369 }
370 
si_vce_create_encoder(struct pipe_context * context,const struct pipe_video_codec * templ,struct radeon_winsys * ws,rvce_get_buffer get_buffer)371 struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
372                                                const struct pipe_video_codec *templ,
373                                                struct radeon_winsys *ws, rvce_get_buffer get_buffer)
374 {
375    struct si_screen *sscreen = (struct si_screen *)context->screen;
376    struct si_context *sctx = (struct si_context *)context;
377    struct rvce_encoder *enc;
378    struct pipe_video_buffer *tmp_buf, templat = {};
379    struct radeon_surf *tmp_surf;
380    unsigned cpb_size;
381 
382    if (!sscreen->info.vce_fw_version) {
383       RVID_ERR("Kernel doesn't supports VCE!\n");
384       return NULL;
385 
386    } else if (!si_vce_is_fw_version_supported(sscreen)) {
387       RVID_ERR("Unsupported VCE fw version loaded!\n");
388       return NULL;
389    }
390 
391    enc = CALLOC_STRUCT(rvce_encoder);
392    if (!enc)
393       return NULL;
394 
395    if (sscreen->info.is_amdgpu)
396       enc->use_vm = true;
397 
398    enc->use_vui = true;
399 
400    if (sscreen->info.family >= CHIP_TONGA && sscreen->info.family != CHIP_STONEY &&
401        sscreen->info.family != CHIP_POLARIS11 && sscreen->info.family != CHIP_POLARIS12 &&
402        sscreen->info.family != CHIP_VEGAM)
403       enc->dual_pipe = true;
404    /* TODO enable B frame with dual instance */
405    if ((sscreen->info.family >= CHIP_TONGA) && (templ->max_references == 1) &&
406        (sscreen->info.vce_harvest_config == 0))
407       enc->dual_inst = true;
408 
409    enc->base = *templ;
410    enc->base.context = context;
411 
412    enc->base.destroy = rvce_destroy;
413    enc->base.begin_frame = rvce_begin_frame;
414    enc->base.encode_bitstream = rvce_encode_bitstream;
415    enc->base.end_frame = rvce_end_frame;
416    enc->base.flush = rvce_flush;
417    enc->base.get_feedback = rvce_get_feedback;
418    enc->base.destroy_fence = rvce_destroy_fence;
419    enc->get_buffer = get_buffer;
420 
421    enc->screen = context->screen;
422    enc->ws = ws;
423 
424    if (!ws->cs_create(&enc->cs, sctx->ctx, AMD_IP_VCE, rvce_cs_flush, enc)) {
425       RVID_ERR("Can't get command submission context.\n");
426       goto error;
427    }
428 
429    templat.buffer_format = PIPE_FORMAT_NV12;
430    templat.width = enc->base.width;
431    templat.height = enc->base.height;
432    templat.interlaced = false;
433    if (!(tmp_buf = context->create_video_buffer(context, &templat))) {
434       RVID_ERR("Can't create video buffer.\n");
435       goto error;
436    }
437 
438    enc->cpb_num = get_cpb_num(enc);
439    if (!enc->cpb_num)
440       goto error;
441 
442    get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);
443 
444    cpb_size = (sscreen->info.gfx_level < GFX9)
445                  ? align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
446                       align(tmp_surf->u.legacy.level[0].nblk_y, 32)
447                  :
448 
449                  align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *
450                     align(tmp_surf->u.gfx9.surf_height, 32);
451 
452    cpb_size = cpb_size * 3 / 2;
453    cpb_size = cpb_size * enc->cpb_num;
454    if (enc->dual_pipe)
455       cpb_size += RVCE_MAX_AUX_BUFFER_NUM * RVCE_MAX_BITSTREAM_OUTPUT_ROW_SIZE * 2;
456    tmp_buf->destroy(tmp_buf);
457    if (!si_vid_create_buffer(enc->screen, &enc->cpb, cpb_size, PIPE_USAGE_DEFAULT)) {
458       RVID_ERR("Can't create CPB buffer.\n");
459       goto error;
460    }
461 
462    enc->cpb_array = CALLOC(enc->cpb_num, sizeof(struct rvce_cpb_slot));
463    if (!enc->cpb_array)
464       goto error;
465 
466    reset_cpb(enc);
467 
468    switch (sscreen->info.vce_fw_version) {
469    case FW_40_2_2:
470       si_vce_40_2_2_init(enc);
471       break;
472 
473    case FW_50_0_1:
474    case FW_50_1_2:
475    case FW_50_10_2:
476    case FW_50_17_3:
477       si_vce_50_init(enc);
478       break;
479 
480    case FW_52_0_3:
481    case FW_52_4_3:
482    case FW_52_8_3:
483       si_vce_52_init(enc);
484       break;
485 
486    default:
487       if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) {
488          si_vce_52_init(enc);
489       } else
490          goto error;
491    }
492 
493    return &enc->base;
494 
495 error:
496    enc->ws->cs_destroy(&enc->cs);
497 
498    si_vid_destroy_buffer(&enc->cpb);
499 
500    FREE(enc->cpb_array);
501    FREE(enc);
502    return NULL;
503 }
504 
505 /**
506  * check if kernel has the right fw version loaded
507  */
si_vce_is_fw_version_supported(struct si_screen * sscreen)508 bool si_vce_is_fw_version_supported(struct si_screen *sscreen)
509 {
510    switch (sscreen->info.vce_fw_version) {
511    case FW_40_2_2:
512    case FW_50_0_1:
513    case FW_50_1_2:
514    case FW_50_10_2:
515    case FW_50_17_3:
516    case FW_52_0_3:
517    case FW_52_4_3:
518    case FW_52_8_3:
519       return true;
520    default:
521       if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53)
522          return true;
523       else
524          return false;
525    }
526 }
527 
528 /**
529  * Add the buffer as relocation to the current command submission
530  */
si_vce_add_buffer(struct rvce_encoder * enc,struct pb_buffer_lean * buf,unsigned usage,enum radeon_bo_domain domain,signed offset)531 void si_vce_add_buffer(struct rvce_encoder *enc, struct pb_buffer_lean *buf, unsigned usage,
532                        enum radeon_bo_domain domain, signed offset)
533 {
534    int reloc_idx;
535 
536    reloc_idx = enc->ws->cs_add_buffer(&enc->cs, buf, usage | RADEON_USAGE_SYNCHRONIZED, domain);
537    if (enc->use_vm) {
538       uint64_t addr;
539       addr = enc->ws->buffer_get_virtual_address(buf);
540       addr = addr + offset;
541       RVCE_CS(addr >> 32);
542       RVCE_CS(addr);
543    } else {
544       offset += enc->ws->buffer_get_reloc_offset(buf);
545       RVCE_CS(reloc_idx * 4);
546       RVCE_CS(offset);
547    }
548 }
549