1 // Copyright 2024 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::any::Any;
6 use std::marker::PhantomData;
7 use std::rc::Rc;
8
9 use anyhow::anyhow;
10 use libva::Config;
11 use libva::Context;
12 use libva::Display;
13 use libva::EncCodedBuffer;
14 use libva::MappedCodedBuffer;
15 use libva::Picture;
16 use libva::PictureEnd;
17 use libva::Surface;
18 use libva::SurfaceMemoryDescriptor;
19 use libva::UsageHint;
20 use libva::VAEntrypoint::VAEntrypointEncSlice;
21 use libva::VAEntrypoint::VAEntrypointEncSliceLP;
22 use libva::VAProfile;
23 use libva::VASurfaceStatus;
24
25 use crate::backend::vaapi::surface_pool::PooledVaSurface;
26 use crate::backend::vaapi::surface_pool::VaSurfacePool;
27 use crate::backend::vaapi::FORMAT_MAP;
28 use crate::decoder::FramePool;
29 use crate::encoder::stateless::BackendPromise;
30 use crate::encoder::stateless::StatelessBackendError;
31 use crate::encoder::stateless::StatelessBackendResult;
32 use crate::encoder::stateless::StatelessEncoderBackendImport;
33 use crate::encoder::FrameMetadata;
34 use crate::encoder::RateControl;
35 use crate::encoder::Tunings;
36 use crate::video_frame::VideoFrame;
37 use crate::Fourcc;
38 use crate::Resolution;
39
40 /// The number of frames that encoder backend should initialize scratch pool with.
41 const INITIAL_SCRATCH_POOL_SIZE: usize = 16;
42 /// The maximum size of scratch pool size, after which the backend will refure to allocate more
43 /// scratch frames.
44 const MAX_SCRATCH_POOL_SIZE: usize = INITIAL_SCRATCH_POOL_SIZE * 4;
45
46 impl From<libva::VaError> for StatelessBackendError {
from(value: libva::VaError) -> Self47 fn from(value: libva::VaError) -> Self {
48 Self::Other(value.into())
49 }
50 }
51
tunings_to_libva_rc<const CLAMP_MIN_QP: u32, const CLAMP_MAX_QP: u32>( tunings: &Tunings, ) -> StatelessBackendResult<libva::EncMiscParameterRateControl>52 pub(crate) fn tunings_to_libva_rc<const CLAMP_MIN_QP: u32, const CLAMP_MAX_QP: u32>(
53 tunings: &Tunings,
54 ) -> StatelessBackendResult<libva::EncMiscParameterRateControl> {
55 let bits_per_second = tunings.rate_control.bitrate_target().unwrap_or(0);
56 let bits_per_second = u32::try_from(bits_per_second).map_err(|e| anyhow::anyhow!(e))?;
57
58 // At the moment we don't support variable bitrate therefore target 100%
59 const TARGET_PERCENTAGE: u32 = 100;
60
61 // Window size in ms that the RC should apply to
62 const WINDOW_SIZE: u32 = 1_500;
63
64 // Clamp minium QP
65 let min_qp = tunings.min_quality.clamp(CLAMP_MIN_QP, CLAMP_MAX_QP);
66
67 let basic_unit_size = 0;
68
69 // Don't reset the rate controller
70 const RESET: u32 = 0;
71
72 // Don't skip frames
73 const DISABLE_FRAME_SKIP: u32 = 1;
74
75 // Allow bit stuffing
76 const DISABLE_BIT_STUFFING: u32 = 0;
77
78 // Use default
79 const MB_RATE_CONTROL: u32 = 0;
80
81 // SVC encoding is not supported for now
82 const TEMPORAL_ID: u32 = 0;
83
84 // Don't ensure intraframe size
85 const CFS_I_FRAMES: u32 = 0;
86
87 // We don't use hierarchical B frames currently
88 const ENABLE_PARALLEL_BRC: u32 = 0;
89
90 // Disable dynamic scaling
91 const ENABLE_DYNAMIC_SCALING: u32 = 0;
92
93 // Use default tolerance mode
94 const FRAME_TOLERANCE_MODE: u32 = 0;
95
96 // ICQ mode is not used
97 const ICQ_QUALITY_FACTOR: u32 = 0;
98
99 // Clamp maximum QP
100 let max_qp = tunings.max_quality.clamp(CLAMP_MIN_QP, CLAMP_MAX_QP);
101
102 // Unsed
103 const QUALITY_FACTOR: u32 = 0;
104
105 // No limits
106 const TARGET_FRAME_SIZE: u32 = 0;
107
108 // If ConstantQuality is used then set to it's value, otherwise use middle
109 let initial_qp = match tunings.rate_control {
110 RateControl::ConstantQuality(qp) => qp.clamp(min_qp, max_qp),
111 _ => (min_qp + max_qp) / 2,
112 };
113
114 Ok(libva::EncMiscParameterRateControl::new(
115 bits_per_second,
116 TARGET_PERCENTAGE,
117 WINDOW_SIZE,
118 initial_qp,
119 min_qp,
120 basic_unit_size,
121 libva::RcFlags::new(
122 RESET,
123 DISABLE_FRAME_SKIP,
124 DISABLE_BIT_STUFFING,
125 MB_RATE_CONTROL,
126 TEMPORAL_ID,
127 CFS_I_FRAMES,
128 ENABLE_PARALLEL_BRC,
129 ENABLE_DYNAMIC_SCALING,
130 FRAME_TOLERANCE_MODE,
131 ),
132 ICQ_QUALITY_FACTOR,
133 max_qp,
134 QUALITY_FACTOR,
135 TARGET_FRAME_SIZE,
136 ))
137 }
138
139 pub struct Reconstructed(PooledVaSurface<()>);
140
141 impl Reconstructed {
surface(&self) -> &Surface<()>142 pub(crate) fn surface(&self) -> &Surface<()> {
143 use std::borrow::Borrow;
144 Borrow::<Surface<()>>::borrow(&self.0)
145 }
146
surface_id(&self) -> u32147 pub(crate) fn surface_id(&self) -> u32 {
148 self.surface().id()
149 }
150 }
151
152 pub struct VaapiBackend<M, H>
153 where
154 M: SurfaceMemoryDescriptor,
155 H: std::borrow::Borrow<Surface<M>> + 'static,
156 {
157 /// VA config.
158 #[allow(dead_code)]
159 va_config: Config,
160
161 /// VA context used for encoding.
162 context: Rc<Context>,
163
164 _va_profile: VAProfile::Type,
165 scratch_pool: VaSurfacePool<()>,
166 _phantom: PhantomData<(M, H)>,
167 }
168
169 impl<M, H> VaapiBackend<M, H>
170 where
171 M: SurfaceMemoryDescriptor,
172 H: std::borrow::Borrow<Surface<M>>,
173 {
new( display: Rc<Display>, va_profile: VAProfile::Type, fourcc: Fourcc, coded_size: Resolution, bitrate_control: u32, low_power: bool, ) -> StatelessBackendResult<Self>174 pub fn new(
175 display: Rc<Display>,
176 va_profile: VAProfile::Type,
177 fourcc: Fourcc,
178 coded_size: Resolution,
179 bitrate_control: u32,
180 low_power: bool,
181 ) -> StatelessBackendResult<Self> {
182 let format_map = FORMAT_MAP
183 .iter()
184 .find(|&map| map.va_fourcc == fourcc.0)
185 .ok_or_else(|| StatelessBackendError::UnsupportedFormat)?;
186
187 let rt_format = format_map.rt_format;
188
189 let va_config = display.create_config(
190 vec![
191 libva::VAConfigAttrib {
192 type_: libva::VAConfigAttribType::VAConfigAttribRTFormat,
193 value: rt_format,
194 },
195 libva::VAConfigAttrib {
196 type_: libva::VAConfigAttribType::VAConfigAttribRateControl,
197 value: bitrate_control,
198 },
199 ],
200 va_profile,
201 if low_power { VAEntrypointEncSliceLP } else { VAEntrypointEncSlice },
202 )?;
203
204 let context = display.create_context::<M>(
205 &va_config,
206 coded_size.width,
207 coded_size.height,
208 None,
209 true,
210 )?;
211
212 let mut scratch_pool = VaSurfacePool::new(
213 Rc::clone(&display),
214 rt_format,
215 Some(UsageHint::USAGE_HINT_ENCODER),
216 coded_size,
217 );
218
219 // TODO: Allow initial size to be changed
220 scratch_pool.add_frames(vec![(); INITIAL_SCRATCH_POOL_SIZE])?;
221
222 Ok(Self {
223 va_config,
224 context,
225 scratch_pool,
226 _va_profile: va_profile,
227 _phantom: Default::default(),
228 })
229 }
230
context(&self) -> &Rc<Context>231 pub(crate) fn context(&self) -> &Rc<Context> {
232 &self.context
233 }
234
new_coded_buffer( &self, rate_control: &RateControl, ) -> StatelessBackendResult<EncCodedBuffer>235 pub(crate) fn new_coded_buffer(
236 &self,
237 rate_control: &RateControl,
238 ) -> StatelessBackendResult<EncCodedBuffer> {
239 // Coded buffer size multiplier. It's inteded to give head room for the encoder.
240 const CODED_SIZE_MUL: usize = 2;
241
242 // Default coded buffer size if bitrate control is not used.
243 const DEFAULT_CODED_SIZE: usize = 1_500_000;
244
245 let coded_size = rate_control
246 .bitrate_target()
247 .map(|e| e as usize * CODED_SIZE_MUL)
248 .unwrap_or(DEFAULT_CODED_SIZE);
249
250 Ok(self.context().create_enc_coded(coded_size)?)
251 }
252
253 // Creates an empty surface that will be filled with reconstructed picture during encoding
254 // which will be later used as frame reference
new_scratch_picture(&mut self) -> StatelessBackendResult<Reconstructed>255 pub(crate) fn new_scratch_picture(&mut self) -> StatelessBackendResult<Reconstructed> {
256 if self.scratch_pool.num_free_frames() == 0 {
257 if self.scratch_pool.num_managed_frames() >= MAX_SCRATCH_POOL_SIZE {
258 log::error!("Scratch pool is exhausted and hit the size limit");
259 return Err(StatelessBackendError::OutOfResources);
260 }
261
262 log::debug!(
263 "Scratch pool empty, allocating one more surface. (previous pool size: {})",
264 self.scratch_pool.num_managed_frames()
265 );
266 self.scratch_pool.add_frames(vec![()])?;
267 }
268
269 let surface =
270 self.scratch_pool.get_surface().ok_or(StatelessBackendError::OutOfResources)?;
271
272 Ok(Reconstructed(surface))
273 }
274 }
275
276 impl<M, Handle> StatelessEncoderBackendImport<Handle, Handle> for VaapiBackend<M, Handle>
277 where
278 M: SurfaceMemoryDescriptor,
279 Handle: std::borrow::Borrow<Surface<M>>,
280 {
import_picture( &mut self, _metadata: &FrameMetadata, handle: Handle, ) -> StatelessBackendResult<Handle>281 fn import_picture(
282 &mut self,
283 _metadata: &FrameMetadata,
284 handle: Handle,
285 ) -> StatelessBackendResult<Handle> {
286 Ok(handle)
287 }
288 }
289
290 impl<V: VideoFrame> StatelessEncoderBackendImport<V, Surface<V::MemDescriptor>>
291 for VaapiBackend<V::MemDescriptor, Surface<V::MemDescriptor>>
292 {
import_picture( &mut self, _metadata: &FrameMetadata, handle: V, ) -> StatelessBackendResult<Surface<V::MemDescriptor>>293 fn import_picture(
294 &mut self,
295 _metadata: &FrameMetadata,
296 handle: V,
297 ) -> StatelessBackendResult<Surface<V::MemDescriptor>> {
298 Ok(handle.to_native_handle(self.context.display()).map_err(|err| anyhow!(err))?.into())
299 }
300 }
301
302 /// Vaapi's implementation of [`crate::encoder::stateless::BackendPromise`]
303 pub struct CodedOutputPromise<M, P>
304 where
305 M: SurfaceMemoryDescriptor,
306 P: std::borrow::Borrow<Surface<M>>,
307 {
308 /// Currently processed picture/surface.
309 handle: Picture<PictureEnd, P>,
310
311 /// Hold reference frames/object from being dropped while `handle` is processed.
312 references: Vec<Rc<dyn Any>>,
313
314 // VaBuffer where the coded output will be present after processing
315 // is finished.
316 coded_buf: EncCodedBuffer,
317
318 /// Container for the request output. Moved from
319 /// [`crate::encoder::stateless::StatelessVideoEncoderBackend`] request. The output will be
320 /// appended to it.
321 coded_output: Vec<u8>,
322
323 _phantom: PhantomData<M>,
324 }
325
326 impl<M, P> CodedOutputPromise<M, P>
327 where
328 M: SurfaceMemoryDescriptor,
329 P: std::borrow::Borrow<Surface<M>>,
330 {
new( handle: Picture<PictureEnd, P>, references: Vec<Rc<dyn Any>>, coded_buf: EncCodedBuffer, coded_output: Vec<u8>, ) -> Self331 pub fn new(
332 handle: Picture<PictureEnd, P>,
333 references: Vec<Rc<dyn Any>>,
334 coded_buf: EncCodedBuffer,
335 coded_output: Vec<u8>,
336 ) -> Self {
337 Self { handle, references, coded_buf, coded_output, _phantom: Default::default() }
338 }
339 }
340
341 impl<M, H> BackendPromise for CodedOutputPromise<M, H>
342 where
343 M: SurfaceMemoryDescriptor,
344 H: std::borrow::Borrow<Surface<M>>,
345 {
346 type Output = Vec<u8>;
347
sync(mut self) -> StatelessBackendResult<Self::Output>348 fn sync(mut self) -> StatelessBackendResult<Self::Output> {
349 if let Err((err, _)) = self.handle.sync() {
350 // TODO consider going back to PictureEnd
351 return Err(err.into());
352 }
353
354 // Drop all references as processing is finished
355 self.references.clear();
356
357 // Map coded buffer and collect bitstream
358 let coded = MappedCodedBuffer::new(&self.coded_buf)?;
359 let mut bitstream = self.coded_output;
360 for segment in coded.segments() {
361 // TODO: Handle flags?
362 // NOTE: on flags: 0-7 bits are average QP value
363 if segment.bit_offset > 0 {
364 log::warn!("unsupported bit_offset != 0 (yet)");
365 }
366 bitstream.extend(segment.buf)
367 }
368
369 Ok(bitstream)
370 }
371
is_ready(&self) -> bool372 fn is_ready(&self) -> bool {
373 match self.handle.surface().query_status() {
374 Ok(status) => status == VASurfaceStatus::VASurfaceReady,
375 Err(_) => {
376 // An error occurred while processing or checking the status of the underlying
377 // processing, in both cases consider it is done. In either cases it will be
378 // returned with [`sync`].
379 true
380 }
381 }
382 }
383 }
384
385 #[cfg(test)]
386 pub(crate) mod tests {
387 use std::borrow::Borrow;
388
389 use libva::VA_FOURCC_NV12;
390 use libva::VA_FOURCC_P010;
391
392 use super::*;
393 use crate::encoder::tests::fill_test_frame_nv12;
394 use crate::encoder::tests::fill_test_frame_p010;
395 use crate::encoder::tests::get_test_frame_t;
396 use crate::encoder::FrameMetadata;
397 use crate::FrameLayout;
398
map_surface<'a, M: SurfaceMemoryDescriptor>( display: &Rc<Display>, surface: &'a Surface<M>, fourcc: u32, ) -> libva::Image<'a>399 fn map_surface<'a, M: SurfaceMemoryDescriptor>(
400 display: &Rc<Display>,
401 surface: &'a Surface<M>,
402 fourcc: u32,
403 ) -> libva::Image<'a> {
404 let image_fmts = display.query_image_formats().unwrap();
405 let image_fmt = image_fmts.into_iter().find(|f| f.fourcc == fourcc).unwrap();
406
407 libva::Image::create_from(surface, image_fmt, surface.size(), surface.size()).unwrap()
408 }
409
map_surface_nv12<'a, M: SurfaceMemoryDescriptor>( display: &Rc<Display>, surface: &'a Surface<M>, ) -> libva::Image<'a>410 fn map_surface_nv12<'a, M: SurfaceMemoryDescriptor>(
411 display: &Rc<Display>,
412 surface: &'a Surface<M>,
413 ) -> libva::Image<'a> {
414 map_surface(display, surface, VA_FOURCC_NV12)
415 }
416
map_surface_p010<'a, M: SurfaceMemoryDescriptor>( display: &Rc<Display>, surface: &'a Surface<M>, ) -> libva::Image<'a>417 fn map_surface_p010<'a, M: SurfaceMemoryDescriptor>(
418 display: &Rc<Display>,
419 surface: &'a Surface<M>,
420 ) -> libva::Image<'a> {
421 map_surface(display, surface, VA_FOURCC_P010)
422 }
423
424 /// Uploads raw NV12 to Surface
upload_nv12_img<M: SurfaceMemoryDescriptor>( display: &Rc<Display>, surface: &Surface<M>, width: u32, height: u32, data: &[u8], )425 pub fn upload_nv12_img<M: SurfaceMemoryDescriptor>(
426 display: &Rc<Display>,
427 surface: &Surface<M>,
428 width: u32,
429 height: u32,
430 data: &[u8],
431 ) {
432 let mut image = map_surface_nv12(display, surface);
433
434 let va_image = *image.image();
435 let dest = image.as_mut();
436 let width = width as usize;
437 let height = height as usize;
438
439 let mut src: &[u8] = data;
440 let mut dst = &mut dest[va_image.offsets[0] as usize..];
441
442 // Copy luma
443 for _ in 0..height {
444 dst[..width].copy_from_slice(&src[..width]);
445 dst = &mut dst[va_image.pitches[0] as usize..];
446 src = &src[width..];
447 }
448
449 // Advance to the offset of the chroma plane
450 let mut src = &data[width * height..];
451 let mut dst = &mut dest[va_image.offsets[1] as usize..];
452
453 let height = height / 2;
454
455 // Copy chroma
456 for _ in 0..height {
457 dst[..width].copy_from_slice(&src[..width]);
458 dst = &mut dst[va_image.pitches[1] as usize..];
459 src = &src[width..];
460 }
461
462 surface.sync().unwrap();
463 drop(image);
464 }
465
466 /// Helper struct. [`Iterator`] to fetch frames from [`SurfacePool`].
467 pub struct PooledFrameIterator {
468 counter: u64,
469 display: Rc<Display>,
470 pool: VaSurfacePool<()>,
471 frame_layout: FrameLayout,
472 }
473
474 impl PooledFrameIterator {
new( display: Rc<Display>, pool: VaSurfacePool<()>, frame_layout: FrameLayout, ) -> Self475 pub fn new(
476 display: Rc<Display>,
477 pool: VaSurfacePool<()>,
478 frame_layout: FrameLayout,
479 ) -> Self {
480 Self { counter: 0, display, pool, frame_layout }
481 }
482 }
483
484 impl Iterator for PooledFrameIterator {
485 type Item = (FrameMetadata, PooledVaSurface<()>);
486
next(&mut self) -> Option<Self::Item>487 fn next(&mut self) -> Option<Self::Item> {
488 let handle = self.pool.get_surface().unwrap();
489
490 let meta = FrameMetadata {
491 layout: self.frame_layout.clone(),
492 force_keyframe: false,
493 timestamp: self.counter,
494 };
495
496 self.counter += 1;
497
498 Some((meta, handle))
499 }
500 }
501
502 /// Helper struct. Uses [`Iterator`] with raw chunks and uploads to pooled surface from
503 /// [`SurfacePool`] to produce frames.
504 pub struct NV12FrameProducer<'l, I>
505 where
506 I: Iterator<Item = &'l [u8]>,
507 {
508 raw_iterator: I,
509 pool_iter: PooledFrameIterator,
510 }
511
512 impl<'l, I> NV12FrameProducer<'l, I>
513 where
514 I: Iterator<Item = &'l [u8]>,
515 {
516 #[allow(dead_code)]
new( raw_iterator: I, display: Rc<Display>, pool: VaSurfacePool<()>, frame_layout: FrameLayout, ) -> Self517 pub fn new(
518 raw_iterator: I,
519 display: Rc<Display>,
520 pool: VaSurfacePool<()>,
521 frame_layout: FrameLayout,
522 ) -> Self {
523 Self { raw_iterator, pool_iter: PooledFrameIterator::new(display, pool, frame_layout) }
524 }
525 }
526
527 impl<'l, I> Iterator for NV12FrameProducer<'l, I>
528 where
529 I: Iterator<Item = &'l [u8]>,
530 {
531 type Item = (FrameMetadata, PooledVaSurface<()>);
532
next(&mut self) -> Option<Self::Item>533 fn next(&mut self) -> Option<Self::Item> {
534 let raw = match self.raw_iterator.next() {
535 Some(raw) => raw,
536 None => return None,
537 };
538
539 let (meta, handle) = self.pool_iter.next().unwrap();
540
541 let width = meta.layout.size.width;
542 let height = meta.layout.size.height;
543 debug_assert_eq!((width * height + width * height / 2) as usize, raw.len());
544
545 upload_nv12_img(&self.pool_iter.display, handle.borrow(), width, height, raw);
546
547 Some((meta, handle))
548 }
549 }
550
upload_test_frame_nv12<M: SurfaceMemoryDescriptor>( display: &Rc<Display>, surface: &Surface<M>, t: f32, )551 pub fn upload_test_frame_nv12<M: SurfaceMemoryDescriptor>(
552 display: &Rc<Display>,
553 surface: &Surface<M>,
554 t: f32,
555 ) {
556 let mut image = map_surface_nv12(display, surface);
557
558 let (width, height) = image.display_resolution();
559
560 let offsets = image.image().offsets;
561 let pitches = image.image().pitches;
562
563 fill_test_frame_nv12(
564 width as usize,
565 height as usize,
566 [pitches[0] as usize, pitches[1] as usize],
567 [offsets[0] as usize, offsets[1] as usize],
568 t,
569 image.as_mut(),
570 );
571
572 drop(image);
573 surface.sync().unwrap();
574 }
575
upload_test_frame_p010<M: SurfaceMemoryDescriptor>( display: &Rc<Display>, surface: &Surface<M>, t: f32, )576 pub fn upload_test_frame_p010<M: SurfaceMemoryDescriptor>(
577 display: &Rc<Display>,
578 surface: &Surface<M>,
579 t: f32,
580 ) {
581 let mut image = map_surface_p010(display, surface);
582
583 let (width, height) = image.display_resolution();
584
585 let offsets = image.image().offsets;
586 let pitches = image.image().pitches;
587
588 fill_test_frame_p010(
589 width as usize,
590 height as usize,
591 [pitches[0] as usize, pitches[1] as usize],
592 [offsets[0] as usize, offsets[1] as usize],
593 t,
594 image.as_mut(),
595 );
596
597 drop(image);
598 surface.sync().unwrap();
599 }
600
601 /// Helper struct. Procedurally generate NV12 frames for test purposes.
602 pub struct TestFrameGenerator {
603 counter: u64,
604 max_count: u64,
605 pool_iter: PooledFrameIterator,
606 display: Rc<Display>,
607 fourcc: Fourcc,
608 }
609
610 impl TestFrameGenerator {
new( max_count: u64, display: Rc<Display>, pool: VaSurfacePool<()>, frame_layout: FrameLayout, ) -> Self611 pub fn new(
612 max_count: u64,
613 display: Rc<Display>,
614 pool: VaSurfacePool<()>,
615 frame_layout: FrameLayout,
616 ) -> Self {
617 Self {
618 counter: 0,
619 max_count,
620 fourcc: frame_layout.format.0,
621 pool_iter: PooledFrameIterator::new(display.clone(), pool, frame_layout),
622 display,
623 }
624 }
625 }
626
627 impl Iterator for TestFrameGenerator {
628 type Item = (FrameMetadata, PooledVaSurface<()>);
629
next(&mut self) -> Option<Self::Item>630 fn next(&mut self) -> Option<Self::Item> {
631 if self.counter > self.max_count {
632 return None;
633 }
634
635 self.counter += 1;
636
637 let (meta, handle) = self.pool_iter.next().unwrap();
638
639 let surface: &Surface<()> = handle.borrow();
640
641 let t = get_test_frame_t(meta.timestamp, self.max_count);
642 match self.fourcc.0 {
643 VA_FOURCC_NV12 => upload_test_frame_nv12(&self.display, surface, t),
644 VA_FOURCC_P010 => upload_test_frame_p010(&self.display, surface, t),
645 _ => unreachable!(),
646 }
647
648 Some((meta, handle))
649 }
650 }
651 }
652