// Copyright 2023 The ChromiumOS Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. use std::os::fd::AsFd; use std::os::fd::BorrowedFd; use anyhow::anyhow; use crate::codec::av1::parser::FrameHeaderObu; use crate::codec::av1::parser::FrameObu; use crate::codec::av1::parser::FrameType; use crate::codec::av1::parser::ObuAction; use crate::codec::av1::parser::ObuType; use crate::codec::av1::parser::ParsedObu; use crate::codec::av1::parser::Parser; use crate::codec::av1::parser::StreamInfo; use crate::codec::av1::parser::TileGroupObu; use crate::codec::av1::parser::NUM_REF_FRAMES; use crate::decoder::stateless::DecodeError; use crate::decoder::stateless::DecodingState; use crate::decoder::stateless::NewPictureResult; use crate::decoder::stateless::StatelessBackendResult; use crate::decoder::stateless::StatelessCodec; use crate::decoder::stateless::StatelessDecoder; use crate::decoder::stateless::StatelessDecoderBackend; use crate::decoder::stateless::StatelessDecoderBackendPicture; use crate::decoder::stateless::StatelessVideoDecoder; use crate::decoder::BlockingMode; use crate::decoder::DecodedHandle; use crate::Resolution; #[cfg(test)] mod dummy; #[cfg(feature = "v4l2")] mod v4l2; #[cfg(feature = "vaapi")] mod vaapi; /// Stateless backend methods specific to AV1. pub trait StatelessAV1DecoderBackend: StatelessDecoderBackend + StatelessDecoderBackendPicture { /// Called when a new Sequence Header OBU is parsed. The /// `highest_spatial_layer` argument refers to the maximum layer selected by /// the client through `set_operating_point()` and the scalability /// information present in the stream, if any. fn change_stream_info(&mut self, stream_info: &StreamInfo) -> StatelessBackendResult<()>; /// Called when the decoder determines that a new picture was found. The backend allocates all /// the resources it needs to process that picture. fn new_picture( &mut self, hdr: &FrameHeaderObu, timestamp: u64, alloc_cb: &mut dyn FnMut() -> Option< <::Handle as DecodedHandle>::Frame, >, ) -> NewPictureResult; /// Called to set the global parameters of a picture. fn begin_picture( &mut self, picture: &mut Self::Picture, stream_info: &StreamInfo, hdr: &FrameHeaderObu, reference_frames: &[Option; NUM_REF_FRAMES], ) -> StatelessBackendResult<()>; /// Called to dispatch a decode operation to the backend. #[allow(clippy::too_many_arguments)] fn decode_tile_group( &mut self, picture: &mut Self::Picture, tile_group: TileGroupObu, ) -> StatelessBackendResult<()>; /// Called when the decoder wants the backend to finish the decoding /// operations for `picture`. At this point, `decode_tile` has been called /// for all tiles. fn submit_picture(&mut self, picture: Self::Picture) -> StatelessBackendResult; } /// State of the picture being currently decoded. /// /// Stored between calls to [`StatelessDecoder::decode_tile_group`] that belong to the same /// picture. enum CurrentPicState { /// A regular frame RegularFrame { /// Data for the current picture as extracted from the stream. header: FrameHeaderObu, /// Backend-specific data for that picture. backend_picture: P, }, /// A frame that has 'show_existing_frame' set. ShowExistingFrame { /// Data for the current picture as extracted from the stream. header: FrameHeaderObu, /// The handle of the reference frame that this frame points to. handle: H, }, } pub struct AV1DecoderState { /// AV1 bitstream parser. parser: Parser, /// The reference frames in use. reference_frames: [Option; NUM_REF_FRAMES], /// Keeps track of the last values seen for negotiation purposes. stream_info: Option, /// The picture currently being decoded. We need to preserve it between /// calls to `decode` because multiple tiles will be processed in different /// calls to `decode`. current_pic: Option>, /// Keep track of the number of frames we've processed for logging purposes. frame_count: u32, /// For SVC streams, we only want to output the highest layer possible given /// the choice of operating point. highest_spatial_layer: Option, } impl Default for AV1DecoderState where H: DecodedHandle, { fn default() -> Self { Self { parser: Default::default(), reference_frames: Default::default(), stream_info: Default::default(), current_pic: Default::default(), frame_count: Default::default(), highest_spatial_layer: Default::default(), } } } /// [`StatelessCodec`] structure to use in order to create a AV1 stateless decoder. /// /// # Accepted input /// /// the VP9 specification requires the last byte of the chunk to contain the superframe marker. /// Thus, a decoder using this codec processes exactly one encoded chunk per call to /// [`StatelessDecoder::decode`], and always returns the size of the passed input if successful. pub struct Av1; impl StatelessCodec for Av1 { type FormatInfo = StreamInfo; type DecoderState = AV1DecoderState; } impl StatelessDecoder where B: StatelessAV1DecoderBackend, B::Handle: Clone, { fn decode_frame_header( &mut self, frame_header: FrameHeaderObu, timestamp: u64, alloc_cb: &mut dyn FnMut() -> Option< <::Handle as DecodedHandle>::Frame, >, ) -> Result<(), DecodeError> { log::debug!("Processing frame {} with timestamp {}", self.codec.frame_count, timestamp); if frame_header.show_existing_frame { let ref_frame = self.codec.reference_frames [frame_header.frame_to_show_map_idx as usize] .as_ref() .ok_or(anyhow!("Broken stream: no reference picture to display"))?; self.codec.current_pic = Some(CurrentPicState::ShowExistingFrame { header: frame_header, handle: ref_frame.clone(), }); } else if let Some(stream_info) = &self.codec.stream_info { let mut backend_picture = self.backend.new_picture(&frame_header, timestamp, alloc_cb)?; self.backend.begin_picture( &mut backend_picture, stream_info, &frame_header, &self.codec.reference_frames, )?; self.codec.current_pic = Some(CurrentPicState::RegularFrame { header: frame_header.clone(), backend_picture, }); } else { log::warn!("invalid stream: frame header received while no valid sequence ongoing"); } Ok(()) } fn decode_tile_group(&mut self, tile_group: TileGroupObu) -> anyhow::Result<()> { let picture = match self.codec.current_pic.as_mut() { Some(CurrentPicState::RegularFrame { backend_picture, .. }) => backend_picture, Some(CurrentPicState::ShowExistingFrame { .. }) => { return Err(anyhow!("Broken stream: cannot decode a tile group for a frame with show_existing_frame set")); } None => { return Err(anyhow!( "Broken stream: cannot decode a tile group without first decoding a frame header" )) } }; self.backend.decode_tile_group(picture, tile_group)?; Ok(()) } fn decode_frame( &mut self, frame: FrameObu, timestamp: u64, alloc_cb: &mut dyn FnMut() -> Option< <::Handle as DecodedHandle>::Frame, >, ) -> Result<(), DecodeError> { let FrameObu { header, tile_group } = frame; self.decode_frame_header(header, timestamp, alloc_cb)?; self.decode_tile_group(tile_group)?; Ok(()) } fn submit_frame(&mut self, timestamp: u64) -> anyhow::Result<()> { log::debug!("Finishing frame {} with timestamp: {}", self.codec.frame_count, timestamp); let picture = self.codec.current_pic.take(); let (handle, header) = match picture { Some(CurrentPicState::RegularFrame { header, backend_picture }) => { let handle = self.backend.submit_picture(backend_picture)?; if self.blocking_mode == BlockingMode::Blocking { handle.sync()?; } (handle, header) } Some(CurrentPicState::ShowExistingFrame { header, handle }) => (handle, header), None => return Err(anyhow!("Broken stream: no picture to submit")), }; let update_refs = if header.show_existing_frame { header.frame_type == FrameType::KeyFrame } else { true }; if update_refs { let mut refresh_frame_flags = header.refresh_frame_flags; #[allow(clippy::needless_range_loop)] for i in 0..NUM_REF_FRAMES { if (refresh_frame_flags & 1) == 1 { log::debug!( "Replacing reference frame {} to new timestamp {} on frame count: {}", i, timestamp, self.codec.frame_count ); self.codec.reference_frames[i] = Some(handle.clone()); } refresh_frame_flags >>= 1; } } let show_existing_frame = header.show_existing_frame; if header.show_frame || show_existing_frame { match self.codec.highest_spatial_layer { None => self.ready_queue.push(handle), Some(highest_spatial_layer) => { if header.obu_header.spatial_id >= highest_spatial_layer { self.ready_queue.push(handle); } else { log::debug!( "Dropping frame with spatial_id {}", header.obu_header.spatial_id ); } } } } self.codec.parser.ref_frame_update(&header).map_err(|err| anyhow!(err))?; self.codec.frame_count += 1; Ok(()) } } impl StatelessVideoDecoder for StatelessDecoder where B: StatelessAV1DecoderBackend, B::Handle: Clone + 'static, { type Handle = B::Handle; /// Decode an AV1 stream. /// /// `bitstream` should initially be submitted as a whole temporal unit, however a call to this /// method will only consume a single OBU. The caller must be careful to check the return value /// and resubmit the remainder if the whole bitstream has not been consumed. fn decode( &mut self, timestamp: u64, bitstream: &[u8], alloc_cb: &mut dyn FnMut() -> Option< <::Handle as DecodedHandle>::Frame, >, ) -> Result { let obu = match self .codec .parser .read_obu(bitstream) .map_err(|err| DecodeError::ParseFrameError(err))? { ObuAction::Process(obu) => obu, // This OBU should be dropped. ObuAction::Drop(length) => return Ok(length as usize), }; let obu_length = obu.bytes_used; let is_decode_op = matches!( obu.header.obu_type, ObuType::Frame | ObuType::FrameHeader | ObuType::TileGroup ); if is_decode_op { match self.decoding_state { /* we want to be here */ DecodingState::Decoding => (), /* otherwise... */ DecodingState::AwaitingStreamInfo => { /* Skip input until we get information from the stream. */ return Ok(obu_length); } /* Ask the client to confirm the format before we can process this. */ DecodingState::FlushingForDRC | DecodingState::AwaitingFormat(_) => { // Start signaling the awaiting format event to process a format change. self.awaiting_format_event.write(1).unwrap(); return Err(DecodeError::CheckEvents); } DecodingState::Reset => { let mut parser = self.codec.parser.clone(); let is_key_frame = match obu.header.obu_type { ObuType::Frame | ObuType::FrameHeader => { let fh = parser .parse_frame_header_obu(&obu) .map_err(|err| DecodeError::ParseFrameError(err))?; fh.frame_type == FrameType::KeyFrame } _ => false, }; /* we can only resume from key frames */ if !is_key_frame { return Ok(obu_length); } else { self.decoding_state = DecodingState::Decoding; } } } } /* We are in `Decoding` state if we reached here */ match self.codec.parser.parse_obu(obu).map_err(|err| DecodeError::ParseFrameError(err))? { ParsedObu::SequenceHeader(sequence) => { let sequence_differs = match &self.codec.stream_info { Some(old_stream_info) => *old_stream_info.seq_header != *sequence, None => true, }; if matches!(self.decoding_state, DecodingState::AwaitingStreamInfo) || sequence_differs { if self.codec.current_pic.is_some() { return Err(DecodeError::DecoderError(anyhow!( "broken stream: a picture is being decoded while a new sequence header is encountered" ))); } /* make sure we sync *before* we clear any state in the backend */ for f in &mut self.ready_queue.queue { /* TODO: this fixes av1-1-b8-03-sizeup on Intel * gen12, but we apparently do not do the same in * VP9. How is it that we do not get similar crashes there? * * TODO: syncing before calling new_sequence() in VP9 may fix some tests */ f.sync()?; } log::debug!( "found new sequence, resolution: {:?}, profile: {:?}, bit depth: {:?}", Resolution::from(( sequence.max_frame_width_minus_1 as u32 + 1, sequence.max_frame_height_minus_1 as u32 + 1 )), sequence.seq_profile, sequence.bit_depth ); /* there is nothing to drain, much like vp8 and vp9 */ self.codec.highest_spatial_layer = self.codec.parser.highest_operating_point(); let stream_info = match &self.codec.parser.last_frame_header { Some(fh) => StreamInfo { seq_header: sequence.clone(), render_width: fh.render_width, render_height: fh.render_height, }, None => StreamInfo { seq_header: sequence.clone(), render_width: sequence.max_frame_width_minus_1 as u32 + 1, render_height: sequence.max_frame_height_minus_1 as u32 + 1, }, }; self.backend.change_stream_info(&stream_info)?; self.await_format_change(stream_info); } } ParsedObu::FrameHeader(frame_header) => { if self.codec.current_pic.is_some() { /* submit this frame immediately, as we need to update the * DPB and the reference info state *before* processing the * next frame */ self.submit_frame(timestamp)?; } self.decode_frame_header(frame_header, timestamp, alloc_cb)?; } ParsedObu::TileGroup(tile_group) => { self.decode_tile_group(tile_group)?; } ParsedObu::Frame(frame) => { let stream_info = self.codec.stream_info.as_ref().ok_or(DecodeError::DecoderError(anyhow!( "broken stream: a picture is being decoded without a sequence header" )))?; if stream_info.render_width != frame.header.render_width || stream_info.render_height != frame.header.render_height { let new_stream_info = StreamInfo { seq_header: stream_info.seq_header.clone(), render_width: frame.header.render_width, render_height: frame.header.render_height, }; self.backend.change_stream_info(&new_stream_info)?; } if self.codec.current_pic.is_some() { /* submit this frame immediately, as we need to update the * DPB and the reference info state *before* processing the * next frame */ self.submit_frame(timestamp)?; } self.decode_frame(frame, timestamp, alloc_cb)?; /* submit this frame immediately, as we need to update the * DPB and the reference info state *before* processing the * next frame */ self.submit_frame(timestamp)?; } ParsedObu::TileList => { return Err(DecodeError::DecoderError(anyhow!( "large tile scale mode is not supported" ))); } other => { log::debug!("skipping OBU of type {:?}", other.obu_type()); } } /* Submit the last frame if we have reached the end of the temporal unit. */ if bitstream.len() == obu_length && self.codec.current_pic.is_some() { self.submit_frame(timestamp)?; } Ok(obu_length) } fn flush(&mut self) -> Result<(), super::DecodeError> { // Note: all the submitted frames are already in the ready queue. self.codec.reference_frames = Default::default(); self.decoding_state = DecodingState::Reset; Ok(()) } fn stream_info(&self) -> Option<&crate::decoder::StreamInfo> { self.backend.stream_info() } fn next_event(&mut self) -> Option> { self.query_next_event(|decoder, stream_info| { decoder.codec.stream_info = Some(stream_info.clone()); }) } fn poll_fd(&self) -> BorrowedFd { self.epoll_fd.0.as_fd() } } #[cfg(test)] pub mod tests { use crate::bitstream_utils::IvfIterator; use crate::decoder::stateless::av1::Av1; use crate::decoder::stateless::tests::test_decode_stream; use crate::decoder::stateless::tests::TestStream; use crate::decoder::stateless::StatelessDecoder; use crate::decoder::BlockingMode; use crate::utils::simple_playback_loop; use crate::utils::simple_playback_loop_owned_frames; use crate::DecodedFormat; /// Run `test` using the dummy decoder, in both blocking and non-blocking modes. fn test_decoder_dummy(test: &TestStream, blocking_mode: BlockingMode) { let decoder = StatelessDecoder::::new_dummy(blocking_mode).unwrap(); test_decode_stream( |d, s, f| { simple_playback_loop( d, IvfIterator::new(s), f, &mut simple_playback_loop_owned_frames, DecodedFormat::NV12, blocking_mode, ) }, decoder, test, false, false, ); } /// Same as Chromium's test-25fps.av1.ivf pub const DECODE_TEST_25FPS: TestStream = TestStream { stream: include_bytes!("../../codec/av1/test_data/test-25fps.ivf.av1"), crcs: include_str!("../../codec/av1/test_data/test-25fps.ivf.av1.crc"), }; #[test] fn test_25fps_block() { test_decoder_dummy(&DECODE_TEST_25FPS, BlockingMode::Blocking); } #[test] fn test_25fps_nonblock() { test_decoder_dummy(&DECODE_TEST_25FPS, BlockingMode::NonBlocking); } }