• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2023 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::rc::Rc;
6 
7 use anyhow::anyhow;
8 use anyhow::Context;
9 use libva::{
10     BufferType, Display, HevcSliceExtFlags, IQMatrix, IQMatrixBufferHEVC, PictureHEVC,
11     PictureParameterBufferHEVC, SliceParameter, SliceParameterBufferHEVC,
12     SliceParameterBufferHEVCRext,
13 };
14 
15 use crate::backend::vaapi::decoder::DecodedHandle as VADecodedHandle;
16 use crate::backend::vaapi::decoder::VaStreamInfo;
17 use crate::backend::vaapi::decoder::VaapiBackend;
18 use crate::backend::vaapi::decoder::VaapiPicture;
19 use crate::codec::h265::dpb::Dpb;
20 use crate::codec::h265::parser::NaluType;
21 use crate::codec::h265::parser::Pps;
22 use crate::codec::h265::parser::Profile;
23 use crate::codec::h265::parser::Slice;
24 use crate::codec::h265::parser::Sps;
25 use crate::codec::h265::picture::PictureData;
26 use crate::codec::h265::picture::Reference;
27 use crate::decoder::stateless::h265::RefPicListEntry;
28 use crate::decoder::stateless::h265::RefPicSet;
29 use crate::decoder::stateless::h265::StatelessH265DecoderBackend;
30 use crate::decoder::stateless::h265::H265;
31 use crate::decoder::stateless::NewPictureError;
32 use crate::decoder::stateless::NewPictureResult;
33 use crate::decoder::stateless::NewStatelessDecoderError;
34 use crate::decoder::stateless::StatelessBackendResult;
35 use crate::decoder::stateless::StatelessDecoder;
36 use crate::decoder::stateless::StatelessDecoderBackend;
37 use crate::decoder::stateless::StatelessDecoderBackendPicture;
38 use crate::decoder::BlockingMode;
39 use crate::decoder::DecodedHandle;
40 use crate::video_frame::VideoFrame;
41 use crate::Rect;
42 use crate::Resolution;
43 
44 // Equation 5-8
clip3(x: i32, y: i32, z: i32) -> i3245 fn clip3(x: i32, y: i32, z: i32) -> i32 {
46     if z < x {
47         x
48     } else if z > y {
49         y
50     } else {
51         z
52     }
53 }
54 
55 // See 6.5.3
up_right_diagonal<const N: usize, const ROWS: usize>() -> [usize; N]56 const fn up_right_diagonal<const N: usize, const ROWS: usize>() -> [usize; N] {
57     // Generics can't be used in const operations for now, so [0; ROWS * ROWS]
58     // is rejected by the compiler
59     assert!(ROWS * ROWS == N);
60 
61     let mut i = 0;
62     let mut x = 0i32;
63     let mut y = 0i32;
64     let mut ret = [0; N];
65 
66     loop {
67         while y >= 0 {
68             if x < (ROWS as i32) && y < (ROWS as i32) {
69                 ret[i] = (x + ROWS as i32 * y) as usize;
70                 i += 1;
71             }
72             y -= 1;
73             x += 1;
74         }
75 
76         y = x;
77         x = 0;
78         if i >= N {
79             break;
80         }
81     }
82 
83     ret
84 }
85 
86 const UP_RIGHT_DIAGONAL_4X4: [usize; 16] = up_right_diagonal::<16, 4>();
87 const UP_RIGHT_DIAGONAL_8X8: [usize; 64] = up_right_diagonal::<64, 8>();
88 
get_raster_from_up_right_diagonal_8x8(src: [u8; 64], dst: &mut [u8; 64])89 fn get_raster_from_up_right_diagonal_8x8(src: [u8; 64], dst: &mut [u8; 64]) {
90     for i in 0..64 {
91         dst[UP_RIGHT_DIAGONAL_8X8[i]] = src[i];
92     }
93 }
94 
get_raster_from_up_right_diagonal_4x4(src: [u8; 16], dst: &mut [u8; 16])95 fn get_raster_from_up_right_diagonal_4x4(src: [u8; 16], dst: &mut [u8; 16]) {
96     for i in 0..16 {
97         dst[UP_RIGHT_DIAGONAL_4X4[i]] = src[i];
98     }
99 }
100 
101 enum ScalingListType {
102     Sps,
103     Pps,
104     None,
105 }
106 
107 impl VaStreamInfo for &Sps {
va_profile(&self) -> anyhow::Result<i32>108     fn va_profile(&self) -> anyhow::Result<i32> {
109         let profile_idc = self.profile_tier_level.general_profile_idc;
110         let profile = Profile::try_from(profile_idc).map_err(|err| anyhow!(err))?;
111 
112         let bit_depth =
113             std::cmp::max(self.bit_depth_luma_minus8 + 8, self.bit_depth_chroma_minus8 + 8);
114 
115         let chroma_format_idc = self.chroma_format_idc;
116         let err = Err(anyhow!(
117             "Invalid combination of profile, bit depth an chroma_format_idc: ({:?}, {}, {}",
118             profile,
119             bit_depth,
120             chroma_format_idc
121         ));
122 
123         // TODO: This can still be much improved in light of table A.2.
124         match profile {
125             Profile::Main | Profile::MainStill | Profile::Main10 => {
126                 match (bit_depth, chroma_format_idc) {
127                     (8, 0) | (8, 1) => Ok(libva::VAProfile::VAProfileHEVCMain),
128                     (8, 3) => Ok(libva::VAProfile::VAProfileHEVCMain444),
129                     (10, 0) | (10, 1) => Ok(libva::VAProfile::VAProfileHEVCMain10),
130                     (10, 2) => Ok(libva::VAProfile::VAProfileHEVCMain422_10),
131                     (12, 1) => Ok(libva::VAProfile::VAProfileHEVCMain12),
132                     (12, 2) => Ok(libva::VAProfile::VAProfileHEVCMain422_12),
133                     (12, 3) => Ok(libva::VAProfile::VAProfileHEVCMain444_12),
134                     _ => err,
135                 }
136             }
137 
138             // See table A.4.
139             Profile::ScalableMain => match (bit_depth, chroma_format_idc) {
140                 (8, 1) => Ok(libva::VAProfile::VAProfileHEVCSccMain),
141                 (8, 3) => Ok(libva::VAProfile::VAProfileHEVCSccMain444),
142                 (10, 1) => Ok(libva::VAProfile::VAProfileHEVCSccMain10),
143                 (10, 3) => Ok(libva::VAProfile::VAProfileHEVCSccMain444_10),
144                 _ => err,
145             },
146 
147             _ => unimplemented!("Adding more profile support based on A.3. is still TODO"),
148         }
149     }
150 
rt_format(&self) -> anyhow::Result<u32>151     fn rt_format(&self) -> anyhow::Result<u32> {
152         let bit_depth =
153             std::cmp::max(self.bit_depth_luma_minus8 + 8, self.bit_depth_chroma_minus8 + 8);
154 
155         let chroma_format_idc = self.chroma_format_idc;
156 
157         match (bit_depth, chroma_format_idc) {
158             (8, 0) | (8, 1) => Ok(libva::VA_RT_FORMAT_YUV420),
159             (8, 2) => Ok(libva::VA_RT_FORMAT_YUV422),
160             (8, 3) => Ok(libva::VA_RT_FORMAT_YUV444),
161             (9, 0) | (9, 1) | (10, 0) | (10, 1) => Ok(libva::VA_RT_FORMAT_YUV420_10),
162             (9, 2) | (10, 2) => Ok(libva::VA_RT_FORMAT_YUV422_10),
163             (9, 3) | (10, 3) => Ok(libva::VA_RT_FORMAT_YUV444_10),
164             (11, 0) | (11, 1) | (12, 0) | (12, 1) => Ok(libva::VA_RT_FORMAT_YUV420_12),
165             (11, 2) | (12, 2) => Ok(libva::VA_RT_FORMAT_YUV422_12),
166             (11, 3) | (12, 3) => Ok(libva::VA_RT_FORMAT_YUV444_12),
167             _ => Err(anyhow!(
168                 "unsupported bit depth/chroma format pair {}, {}",
169                 bit_depth,
170                 chroma_format_idc
171             )),
172         }
173     }
174 
min_num_surfaces(&self) -> usize175     fn min_num_surfaces(&self) -> usize {
176         self.max_dpb_size() + 4
177     }
178 
coded_size(&self) -> Resolution179     fn coded_size(&self) -> Resolution {
180         Resolution::from((self.width().into(), self.height().into()))
181     }
182 
visible_rect(&self) -> Rect183     fn visible_rect(&self) -> Rect {
184         let rect = self.visible_rectangle();
185 
186         Rect { x: rect.min.x, y: rect.min.y, width: rect.max.x, height: rect.max.y }
187     }
188 }
189 
build_slice_ref_pic_list<V: VideoFrame>( ref_pic_list: &[Option<RefPicListEntry<VADecodedHandle<V>>>; 16], va_references: &[PictureHEVC; 15], ) -> [u8; 15]190 fn build_slice_ref_pic_list<V: VideoFrame>(
191     ref_pic_list: &[Option<RefPicListEntry<VADecodedHandle<V>>>; 16],
192     va_references: &[PictureHEVC; 15],
193 ) -> [u8; 15] {
194     let mut va_refs = [0xff; 15];
195 
196     for (ref_pic_list_idx, ref_pic_list_entry) in ref_pic_list.iter().enumerate() {
197         if ref_pic_list_idx == 15 {
198             break;
199         }
200 
201         if let Some(ref_pic_list_entry) = ref_pic_list_entry {
202             for (va_ref_idx, va_ref) in va_references.iter().enumerate() {
203                 if va_ref.picture_id() == libva::VA_INVALID_ID {
204                     break;
205                 }
206 
207                 let pic_order_cnt = match ref_pic_list_entry {
208                     RefPicListEntry::CurrentPicture(p) => p.pic_order_cnt_val,
209                     RefPicListEntry::DpbEntry(p) => p.0.borrow().pic_order_cnt_val,
210                 };
211 
212                 if va_ref.pic_order_cnt() == pic_order_cnt {
213                     va_refs[ref_pic_list_idx] = va_ref_idx as u8;
214                 }
215             }
216         }
217     }
218 
219     va_refs
220 }
221 
va_rps_flag<V: VideoFrame>(hevc_pic: &PictureData, rps: &RefPicSet<VADecodedHandle<V>>) -> u32222 fn va_rps_flag<V: VideoFrame>(hevc_pic: &PictureData, rps: &RefPicSet<VADecodedHandle<V>>) -> u32 {
223     if rps
224         .ref_pic_set_st_curr_before
225         .iter()
226         .flatten()
227         .any(|dpb_entry| *dpb_entry.0.borrow() == *hevc_pic)
228     {
229         libva::VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE
230     } else if rps
231         .ref_pic_set_st_curr_after
232         .iter()
233         .flatten()
234         .any(|dpb_entry| *dpb_entry.0.borrow() == *hevc_pic)
235     {
236         libva::VA_PICTURE_HEVC_RPS_ST_CURR_AFTER
237     } else if rps
238         .ref_pic_set_lt_curr
239         .iter()
240         .flatten()
241         .any(|dpb_entry| *dpb_entry.0.borrow() == *hevc_pic)
242     {
243         libva::VA_PICTURE_HEVC_RPS_LT_CURR
244     } else {
245         0
246     }
247 }
248 
249 /// Builds an invalid VaPictureHEVC. These pictures are used to fill empty
250 /// array slots there is no data to fill them with.
build_invalid_va_hevc_pic() -> libva::PictureHEVC251 fn build_invalid_va_hevc_pic() -> libva::PictureHEVC {
252     libva::PictureHEVC::new(libva::VA_INVALID_ID, 0, libva::VA_PICTURE_HEVC_INVALID)
253 }
254 
fill_va_hevc_pic<V: VideoFrame>( hevc_pic: &PictureData, surface_id: libva::VASurfaceID, rps: &RefPicSet<VADecodedHandle<V>>, ) -> libva::PictureHEVC255 fn fill_va_hevc_pic<V: VideoFrame>(
256     hevc_pic: &PictureData,
257     surface_id: libva::VASurfaceID,
258     rps: &RefPicSet<VADecodedHandle<V>>,
259 ) -> libva::PictureHEVC {
260     let mut flags = 0;
261 
262     if matches!(hevc_pic.reference(), Reference::LongTerm) {
263         flags |= libva::VA_PICTURE_HEVC_LONG_TERM_REFERENCE;
264     }
265 
266     flags |= va_rps_flag(hevc_pic, rps);
267 
268     libva::PictureHEVC::new(surface_id, hevc_pic.pic_order_cnt_val, flags)
269 }
270 
is_range_extension_profile(va_profile: libva::VAProfile::Type) -> bool271 fn is_range_extension_profile(va_profile: libva::VAProfile::Type) -> bool {
272     matches!(
273         va_profile,
274         libva::VAProfile::VAProfileHEVCMain422_10
275             | libva::VAProfile::VAProfileHEVCMain444
276             | libva::VAProfile::VAProfileHEVCMain444_10
277             | libva::VAProfile::VAProfileHEVCMain12
278             | libva::VAProfile::VAProfileHEVCMain422_12
279             | libva::VAProfile::VAProfileHEVCMain444_12
280     )
281 }
282 
is_scc_ext_profile(va_profile: libva::VAProfile::Type) -> bool283 fn is_scc_ext_profile(va_profile: libva::VAProfile::Type) -> bool {
284     matches!(
285         va_profile,
286         libva::VAProfile::VAProfileHEVCSccMain
287             | libva::VAProfile::VAProfileHEVCSccMain10
288             | libva::VAProfile::VAProfileHEVCSccMain444
289             | libva::VAProfile::VAProfileHEVCMain444_10,
290     )
291 }
292 
build_picture_rext(sps: &Sps, pps: &Pps) -> anyhow::Result<BufferType>293 fn build_picture_rext(sps: &Sps, pps: &Pps) -> anyhow::Result<BufferType> {
294     let sps_rext = &sps.range_extension;
295     let pps_rext = &pps.range_extension;
296 
297     let range_extension_pic_fields = libva::HevcRangeExtensionPicFields::new(
298         sps_rext.transform_skip_rotation_enabled_flag as u32,
299         sps_rext.transform_skip_context_enabled_flag as u32,
300         sps_rext.implicit_rdpcm_enabled_flag as u32,
301         sps_rext.explicit_rdpcm_enabled_flag as u32,
302         sps_rext.extended_precision_processing_flag as u32,
303         sps_rext.intra_smoothing_disabled_flag as u32,
304         sps_rext.high_precision_offsets_enabled_flag as u32,
305         sps_rext.persistent_rice_adaptation_enabled_flag as u32,
306         sps_rext.cabac_bypass_alignment_enabled_flag as u32,
307         pps_rext.cross_component_prediction_enabled_flag as u32,
308         pps_rext.chroma_qp_offset_list_enabled_flag as u32,
309     );
310 
311     let rext = libva::PictureParameterBufferHEVCRext::new(
312         &range_extension_pic_fields,
313         pps_rext.diff_cu_chroma_qp_offset_depth as u8,
314         pps_rext.chroma_qp_offset_list_len_minus1 as u8,
315         pps_rext.log2_sao_offset_scale_luma as u8,
316         pps_rext.log2_sao_offset_scale_chroma as u8,
317         pps_rext.log2_max_transform_skip_block_size_minus2 as u8,
318         pps_rext.cb_qp_offset_list.map(|x| x as i8),
319         pps_rext.cr_qp_offset_list.map(|x| x as i8),
320     );
321 
322     Ok(BufferType::PictureParameter(libva::PictureParameter::HEVCRext(rext)))
323 }
324 
build_picture_scc(sps: &Sps, pps: &Pps) -> anyhow::Result<BufferType>325 fn build_picture_scc(sps: &Sps, pps: &Pps) -> anyhow::Result<BufferType> {
326     let sps_scc = &sps.scc_extension;
327     let pps_scc = &pps.scc_extension;
328 
329     let scc_pic_fields = libva::HevcScreenContentPicFields::new(
330         pps_scc.curr_pic_ref_enabled_flag as u32,
331         sps_scc.palette_mode_enabled_flag as u32,
332         sps_scc.motion_vector_resolution_control_idc as u32,
333         sps_scc.intra_boundary_filtering_disabled_flag as u32,
334         pps_scc.residual_adaptive_colour_transform_enabled_flag as u32,
335         pps_scc.slice_act_qp_offsets_present_flag as u32,
336     );
337 
338     let (predictor_palette_entries, predictor_palette_size) =
339         if pps_scc.palette_predictor_initializers_present_flag {
340             (
341                 pps_scc.palette_predictor_initializer.map(|outer| outer.map(u16::from)),
342                 pps_scc.num_palette_predictor_initializers,
343             )
344         } else if sps_scc.palette_predictor_initializers_present_flag {
345             (
346                 sps_scc.palette_predictor_initializer.map(|outer| outer.map(|inner| inner as u16)),
347                 sps_scc.num_palette_predictor_initializer_minus1 + 1,
348             )
349         } else {
350             ([[0; 128]; 3], 0)
351         };
352 
353     let scc = libva::PictureParameterBufferHEVCScc::new(
354         &scc_pic_fields,
355         sps_scc.palette_max_size,
356         sps_scc.delta_palette_max_predictor_size,
357         predictor_palette_size,
358         predictor_palette_entries,
359         pps_scc.act_y_qp_offset_plus5,
360         pps_scc.act_cb_qp_offset_plus5,
361         pps_scc.act_cr_qp_offset_plus3,
362     );
363 
364     Ok(BufferType::PictureParameter(libva::PictureParameter::HEVCScc(scc)))
365 }
366 
build_pic_param<V: VideoFrame>( _: &Slice, current_picture: &PictureData, current_surface_id: libva::VASurfaceID, dpb: &Dpb<VADecodedHandle<V>>, rps: &RefPicSet<VADecodedHandle<V>>, sps: &Sps, pps: &Pps, ) -> anyhow::Result<(BufferType, [PictureHEVC; 15])>367 fn build_pic_param<V: VideoFrame>(
368     _: &Slice,
369     current_picture: &PictureData,
370     current_surface_id: libva::VASurfaceID,
371     dpb: &Dpb<VADecodedHandle<V>>,
372     rps: &RefPicSet<VADecodedHandle<V>>,
373     sps: &Sps,
374     pps: &Pps,
375 ) -> anyhow::Result<(BufferType, [PictureHEVC; 15])> {
376     let curr_pic = fill_va_hevc_pic(current_picture, current_surface_id, rps);
377 
378     let mut reference_frames = vec![];
379 
380     for ref_pic in dpb.get_all_references() {
381         let surface_id = ref_pic.1.borrow().surface().id();
382         let ref_pic = fill_va_hevc_pic(&ref_pic.0.borrow(), surface_id, rps);
383         reference_frames.push(ref_pic);
384     }
385 
386     // RefPicListL0 and RefPicListL1 may signal that they want to refer to
387     // the current picture. We must tell VA that it is a reference as it is
388     // not in the DPB at this point.
389     if pps.scc_extension.curr_pic_ref_enabled_flag {
390         if reference_frames.len() >= 15 {
391             log::warn!(
392                 "Bug: Trying to set the current picture as a VA reference, but the VA DPB is full."
393             )
394         } else {
395             reference_frames.push(curr_pic);
396         }
397     }
398 
399     for _ in reference_frames.len()..15 {
400         reference_frames.push(build_invalid_va_hevc_pic());
401     }
402 
403     let reference_frames = reference_frames.try_into();
404     let reference_frames = match reference_frames {
405         Ok(va_refs) => va_refs,
406         Err(_) => {
407             // Can't panic, we guarantee len() == 15.
408             panic!("Bug: wrong number of references, expected 15");
409         }
410     };
411 
412     let pic_fields = libva::HevcPicFields::new(
413         sps.chroma_format_idc as u32,
414         sps.separate_colour_plane_flag as u32,
415         sps.pcm_enabled_flag as u32,
416         sps.scaling_list_enabled_flag as u32,
417         pps.transform_skip_enabled_flag as u32,
418         sps.amp_enabled_flag as u32,
419         sps.strong_intra_smoothing_enabled_flag as u32,
420         pps.sign_data_hiding_enabled_flag as u32,
421         pps.constrained_intra_pred_flag as u32,
422         pps.cu_qp_delta_enabled_flag as u32,
423         pps.weighted_pred_flag as u32,
424         pps.weighted_bipred_flag as u32,
425         pps.transquant_bypass_enabled_flag as u32,
426         pps.tiles_enabled_flag as u32,
427         pps.entropy_coding_sync_enabled_flag as u32,
428         pps.loop_filter_across_slices_enabled_flag as u32,
429         pps.loop_filter_across_tiles_enabled_flag as u32,
430         sps.pcm_loop_filter_disabled_flag as u32,
431         /* lets follow the FFMPEG and GStreamer train and set these to false */
432         0,
433         0,
434     );
435 
436     let rap_pic_flag = current_picture.nalu_type as u32 >= NaluType::BlaWLp as u32
437         && current_picture.nalu_type as u32 <= NaluType::CraNut as u32;
438 
439     let slice_parsing_fields = libva::HevcSliceParsingFields::new(
440         pps.lists_modification_present_flag as u32,
441         sps.long_term_ref_pics_present_flag as u32,
442         sps.temporal_mvp_enabled_flag as u32,
443         pps.cabac_init_present_flag as u32,
444         pps.output_flag_present_flag as u32,
445         pps.dependent_slice_segments_enabled_flag as u32,
446         pps.slice_chroma_qp_offsets_present_flag as u32,
447         sps.sample_adaptive_offset_enabled_flag as u32,
448         pps.deblocking_filter_override_enabled_flag as u32,
449         pps.deblocking_filter_disabled_flag as u32,
450         pps.slice_segment_header_extension_present_flag as u32,
451         rap_pic_flag as u32,
452         current_picture.nalu_type.is_idr() as u32,
453         current_picture.nalu_type.is_irap() as u32,
454     );
455 
456     let pic_param = PictureParameterBufferHEVC::new(
457         curr_pic,
458         reference_frames,
459         sps.pic_width_in_luma_samples,
460         sps.pic_height_in_luma_samples,
461         &pic_fields,
462         sps.max_dec_pic_buffering_minus1[usize::from(sps.max_sub_layers_minus1)],
463         sps.bit_depth_luma_minus8,
464         sps.bit_depth_chroma_minus8,
465         sps.pcm_sample_bit_depth_luma_minus1,
466         sps.pcm_sample_bit_depth_chroma_minus1,
467         sps.log2_min_luma_coding_block_size_minus3,
468         sps.log2_diff_max_min_luma_coding_block_size,
469         sps.log2_min_luma_transform_block_size_minus2,
470         sps.log2_diff_max_min_luma_transform_block_size,
471         sps.log2_min_pcm_luma_coding_block_size_minus3,
472         sps.log2_diff_max_min_pcm_luma_coding_block_size,
473         sps.max_transform_hierarchy_depth_intra,
474         sps.max_transform_hierarchy_depth_inter,
475         pps.init_qp_minus26,
476         pps.diff_cu_qp_delta_depth,
477         pps.cb_qp_offset,
478         pps.cr_qp_offset,
479         pps.log2_parallel_merge_level_minus2,
480         pps.num_tile_columns_minus1,
481         pps.num_tile_rows_minus1,
482         pps.column_width_minus1.map(|x| x as u16),
483         pps.row_height_minus1.map(|x| x as u16),
484         &slice_parsing_fields,
485         sps.log2_max_pic_order_cnt_lsb_minus4,
486         sps.num_short_term_ref_pic_sets,
487         sps.num_long_term_ref_pics_sps,
488         pps.num_ref_idx_l0_default_active_minus1,
489         pps.num_ref_idx_l1_default_active_minus1,
490         pps.beta_offset_div2,
491         pps.tc_offset_div2,
492         pps.num_extra_slice_header_bits,
493         current_picture.short_term_ref_pic_set_size_bits,
494     );
495 
496     Ok((BufferType::PictureParameter(libva::PictureParameter::HEVC(pic_param)), reference_frames))
497 }
498 
find_scaling_list(sps: &Sps, pps: &Pps) -> ScalingListType499 fn find_scaling_list(sps: &Sps, pps: &Pps) -> ScalingListType {
500     if pps.scaling_list_data_present_flag
501         || (sps.scaling_list_enabled_flag && !sps.scaling_list_data_present_flag)
502     {
503         ScalingListType::Pps
504     } else if sps.scaling_list_enabled_flag && sps.scaling_list_data_present_flag {
505         ScalingListType::Sps
506     } else {
507         ScalingListType::None
508     }
509 }
510 
build_iq_matrix(sps: &Sps, pps: &Pps) -> BufferType511 fn build_iq_matrix(sps: &Sps, pps: &Pps) -> BufferType {
512     let scaling_lists = match find_scaling_list(sps, pps) {
513         ScalingListType::Sps => &sps.scaling_list,
514         ScalingListType::Pps => &pps.scaling_list,
515         ScalingListType::None => panic!("No scaling list data available"),
516     };
517 
518     let mut scaling_list_32x32 = [[0; 64]; 2];
519 
520     for i in (0..6).step_by(3) {
521         for j in 0..64 {
522             scaling_list_32x32[i / 3][j] = scaling_lists.scaling_list_32x32[i][j];
523         }
524     }
525 
526     let mut scaling_list_dc_32x32 = [0; 2];
527     for i in (0..6).step_by(3) {
528         scaling_list_dc_32x32[i / 3] =
529             (scaling_lists.scaling_list_dc_coef_minus8_32x32[i] + 8) as u8;
530     }
531 
532     let mut scaling_list_4x4 = [[0; 16]; 6];
533     let mut scaling_list_8x8 = [[0; 64]; 6];
534     let mut scaling_list_16x16 = [[0; 64]; 6];
535     let mut scaling_list_32x32_r = [[0; 64]; 2];
536 
537     (0..6).for_each(|i| {
538         get_raster_from_up_right_diagonal_4x4(
539             scaling_lists.scaling_list_4x4[i],
540             &mut scaling_list_4x4[i],
541         );
542 
543         get_raster_from_up_right_diagonal_8x8(
544             scaling_lists.scaling_list_8x8[i],
545             &mut scaling_list_8x8[i],
546         );
547 
548         get_raster_from_up_right_diagonal_8x8(
549             scaling_lists.scaling_list_16x16[i],
550             &mut scaling_list_16x16[i],
551         );
552     });
553 
554     (0..2).for_each(|i| {
555         get_raster_from_up_right_diagonal_8x8(scaling_list_32x32[i], &mut scaling_list_32x32_r[i]);
556     });
557 
558     BufferType::IQMatrix(IQMatrix::HEVC(IQMatrixBufferHEVC::new(
559         scaling_list_4x4,
560         scaling_list_8x8,
561         scaling_list_16x16,
562         scaling_list_32x32_r,
563         scaling_lists.scaling_list_dc_coef_minus8_16x16.map(|x| (x + 8) as u8),
564         scaling_list_dc_32x32,
565     )))
566 }
567 
568 impl<V: VideoFrame> VaapiBackend<V> {
submit_last_slice( &mut self, picture: &mut <Self as StatelessDecoderBackendPicture<H265>>::Picture, ) -> anyhow::Result<()>569     fn submit_last_slice(
570         &mut self,
571         picture: &mut <Self as StatelessDecoderBackendPicture<H265>>::Picture,
572     ) -> anyhow::Result<()> {
573         if let Some(last_slice) = picture.last_slice.take() {
574             let context = &self.context;
575             let picture = &mut picture.picture;
576 
577             let slice_param = BufferType::SliceParameter(SliceParameter::HEVC(last_slice.0));
578             let slice_param = context.create_buffer(slice_param)?;
579             picture.add_buffer(slice_param);
580 
581             if let Some(slice_param_rext) = last_slice.1 {
582                 let slice_param_rext =
583                     BufferType::SliceParameter(SliceParameter::HEVCRext(slice_param_rext));
584                 let slice_param_rext = context.create_buffer(slice_param_rext)?;
585                 picture.add_buffer(slice_param_rext);
586             }
587 
588             let slice_data = BufferType::SliceData(last_slice.2);
589             let slice_data = context.create_buffer(slice_data)?;
590             picture.add_buffer(slice_data);
591         }
592 
593         Ok(())
594     }
595 }
596 
597 pub struct VaapiH265Picture<Picture> {
598     picture: Picture,
599 
600     // We are always one slice behind, so that we can mark the last one in
601     // submit_picture()
602     last_slice: Option<(SliceParameterBufferHEVC, Option<SliceParameterBufferHEVCRext>, Vec<u8>)>,
603 
604     va_references: [PictureHEVC; 15],
605 }
606 
607 impl<V: VideoFrame> StatelessDecoderBackendPicture<H265> for VaapiBackend<V> {
608     type Picture = VaapiH265Picture<VaapiPicture<V>>;
609 }
610 
611 impl<V: VideoFrame> StatelessH265DecoderBackend for VaapiBackend<V> {
new_sequence(&mut self, sps: &Sps) -> StatelessBackendResult<()>612     fn new_sequence(&mut self, sps: &Sps) -> StatelessBackendResult<()> {
613         self.new_sequence(sps)
614     }
615 
new_picture( &mut self, timestamp: u64, alloc_cb: &mut dyn FnMut() -> Option< <<Self as StatelessDecoderBackend>::Handle as DecodedHandle>::Frame, >, ) -> NewPictureResult<Self::Picture>616     fn new_picture(
617         &mut self,
618         timestamp: u64,
619         alloc_cb: &mut dyn FnMut() -> Option<
620             <<Self as StatelessDecoderBackend>::Handle as DecodedHandle>::Frame,
621         >,
622     ) -> NewPictureResult<Self::Picture> {
623         Ok(VaapiH265Picture {
624             picture: VaapiPicture::new(
625                 timestamp,
626                 Rc::clone(&self.context),
627                 alloc_cb().ok_or(NewPictureError::OutOfOutputBuffers)?,
628             ),
629             last_slice: Default::default(),
630             va_references: Default::default(),
631         })
632     }
633 
begin_picture( &mut self, picture: &mut Self::Picture, picture_data: &PictureData, sps: &Sps, pps: &Pps, dpb: &Dpb<Self::Handle>, rps: &RefPicSet<Self::Handle>, slice: &Slice, ) -> crate::decoder::stateless::StatelessBackendResult<()>634     fn begin_picture(
635         &mut self,
636         picture: &mut Self::Picture,
637         picture_data: &PictureData,
638         sps: &Sps,
639         pps: &Pps,
640         dpb: &Dpb<Self::Handle>,
641         rps: &RefPicSet<Self::Handle>,
642         slice: &Slice,
643     ) -> crate::decoder::stateless::StatelessBackendResult<()> {
644         let context = &self.context;
645 
646         let surface_id = picture.picture.surface().id();
647 
648         let (pic_param, reference_frames) =
649             build_pic_param(slice, picture_data, surface_id, dpb, rps, sps, pps)?;
650 
651         picture.va_references = reference_frames;
652 
653         let picture = &mut picture.picture;
654 
655         let pic_param =
656             context.create_buffer(pic_param).context("while creating picture parameter buffer")?;
657 
658         picture.add_buffer(pic_param);
659 
660         if !matches!(find_scaling_list(sps, pps), ScalingListType::None) {
661             let iq_matrix = build_iq_matrix(sps, pps);
662             let iq_matrix =
663                 context.create_buffer(iq_matrix).context("while creating IQ matrix buffer")?;
664 
665             picture.add_buffer(iq_matrix);
666         }
667 
668         let va_profile = sps.va_profile()?;
669         if is_range_extension_profile(va_profile) || is_scc_ext_profile(va_profile) {
670             let rext = build_picture_rext(sps, pps)?;
671             let rext = context
672                 .create_buffer(rext)
673                 .context("while creating picture parameter range extension buffer")?;
674 
675             picture.add_buffer(rext);
676 
677             if is_scc_ext_profile(va_profile) {
678                 let scc = build_picture_scc(sps, pps)?;
679                 let scc = context
680                     .create_buffer(scc)
681                     .context("while creating picture screen content coding buffer")?;
682 
683                 picture.add_buffer(scc);
684             }
685         }
686 
687         Ok(())
688     }
689 
decode_slice( &mut self, picture: &mut Self::Picture, slice: &Slice, sps: &Sps, _: &Pps, ref_pic_list0: &[Option<RefPicListEntry<Self::Handle>>; 16], ref_pic_list1: &[Option<RefPicListEntry<Self::Handle>>; 16], ) -> crate::decoder::stateless::StatelessBackendResult<()>690     fn decode_slice(
691         &mut self,
692         picture: &mut Self::Picture,
693         slice: &Slice,
694         sps: &Sps,
695         _: &Pps,
696         ref_pic_list0: &[Option<RefPicListEntry<Self::Handle>>; 16],
697         ref_pic_list1: &[Option<RefPicListEntry<Self::Handle>>; 16],
698     ) -> crate::decoder::stateless::StatelessBackendResult<()> {
699         self.submit_last_slice(picture)?;
700         let hdr = &slice.header;
701 
702         let va_references = &picture.va_references;
703         let ref_pic_list0 = build_slice_ref_pic_list(ref_pic_list0, va_references);
704         let ref_pic_list1 = build_slice_ref_pic_list(ref_pic_list1, va_references);
705 
706         let long_slice_flags = libva::HevcLongSliceFlags::new(
707             0,
708             hdr.dependent_slice_segment_flag as u32,
709             hdr.type_ as u32,
710             hdr.colour_plane_id as u32,
711             hdr.sao_luma_flag as u32,
712             hdr.sao_chroma_flag as u32,
713             hdr.mvd_l1_zero_flag as u32,
714             hdr.cabac_init_flag as u32,
715             hdr.temporal_mvp_enabled_flag as u32,
716             hdr.deblocking_filter_disabled_flag as u32,
717             hdr.collocated_from_l0_flag as u32,
718             hdr.loop_filter_across_slices_enabled_flag as u32,
719         );
720 
721         let collocated_ref_idx =
722             if hdr.temporal_mvp_enabled_flag { hdr.collocated_ref_idx } else { 0xff };
723 
724         let pwt = &hdr.pred_weight_table;
725 
726         let mut delta_luma_weight_l0: [i8; 15usize] = Default::default();
727         let mut luma_offset_l0: [i8; 15usize] = Default::default();
728         let mut delta_chroma_weight_l0: [[i8; 2usize]; 15usize] = Default::default();
729         let mut chroma_offset_l0: [[i8; 2usize]; 15usize] = Default::default();
730         let mut delta_luma_weight_l1: [i8; 15usize] = Default::default();
731         let mut luma_offset_l1: [i8; 15usize] = Default::default();
732         let mut delta_chroma_weight_l1: [[i8; 2usize]; 15usize] = Default::default();
733         let mut chroma_offset_l1: [[i8; 2usize]; 15usize] = Default::default();
734 
735         for i in 0..15 {
736             delta_luma_weight_l0[i] = pwt.delta_luma_weight_l0[i];
737             luma_offset_l0[i] = pwt.luma_offset_l0[i];
738 
739             if hdr.type_.is_b() {
740                 delta_luma_weight_l1[i] = pwt.delta_luma_weight_l1[i];
741                 luma_offset_l1[i] = pwt.luma_offset_l1[i];
742             }
743 
744             for j in 0..2 {
745                 delta_chroma_weight_l0[i][j] = pwt.delta_chroma_weight_l0[i][j];
746                 let delta_chroma_offset = pwt.delta_chroma_offset_l0[i][j];
747 
748                 let chroma_weight_l0 = (1 << pwt.chroma_log2_weight_denom)
749                     + i32::from(pwt.delta_chroma_weight_l0[i][j]);
750 
751                 let offset = sps.wp_offset_half_range_c as i32 + delta_chroma_offset as i32
752                     - ((sps.wp_offset_half_range_c as i32 * chroma_weight_l0)
753                         >> pwt.chroma_log2_weight_denom);
754 
755                 chroma_offset_l0[i][j] = clip3(
756                     -(sps.wp_offset_half_range_c as i32),
757                     (sps.wp_offset_half_range_c - 1) as i32,
758                     offset,
759                 ) as _;
760 
761                 if hdr.type_.is_b() {
762                     delta_chroma_weight_l1[i][j] = pwt.delta_chroma_weight_l1[i][j];
763                     let delta_chroma_offset = pwt.delta_chroma_offset_l1[i][j];
764 
765                     let chroma_weight_l1 = (1 << pwt.chroma_log2_weight_denom)
766                         + i32::from(pwt.delta_chroma_weight_l1[i][j]);
767 
768                     let offset = sps.wp_offset_half_range_c as i32 + delta_chroma_offset as i32
769                         - ((sps.wp_offset_half_range_c as i32 * chroma_weight_l1)
770                             >> pwt.chroma_log2_weight_denom);
771 
772                     chroma_offset_l1[i][j] = clip3(
773                         -(sps.wp_offset_half_range_c as i32),
774                         (sps.wp_offset_half_range_c - 1) as i32,
775                         offset,
776                     ) as _;
777                 }
778             }
779         }
780 
781         let slice_param = SliceParameterBufferHEVC::new(
782             slice.nalu.size as u32,
783             0,
784             libva::VA_SLICE_DATA_FLAG_ALL,
785             (hdr.header_bit_size / 8) as _,
786             hdr.segment_address,
787             [ref_pic_list0, ref_pic_list1],
788             &long_slice_flags,
789             collocated_ref_idx,
790             hdr.num_ref_idx_l0_active_minus1,
791             hdr.num_ref_idx_l1_active_minus1,
792             hdr.qp_delta,
793             hdr.cb_qp_offset,
794             hdr.cr_qp_offset,
795             hdr.beta_offset_div2,
796             hdr.tc_offset_div2,
797             pwt.luma_log2_weight_denom,
798             pwt.delta_chroma_log2_weight_denom,
799             delta_luma_weight_l0,
800             luma_offset_l0,
801             delta_chroma_weight_l0,
802             chroma_offset_l0,
803             delta_luma_weight_l1,
804             luma_offset_l1,
805             delta_chroma_weight_l1,
806             chroma_offset_l1,
807             hdr.five_minus_max_num_merge_cand,
808             hdr.num_entry_point_offsets as _,
809             0,
810             hdr.n_emulation_prevention_bytes as _,
811         );
812 
813         let va_profile = sps.va_profile()?;
814 
815         let slice_param_ext =
816             if is_range_extension_profile(va_profile) || is_scc_ext_profile(va_profile) {
817                 let slice_ext_flags = HevcSliceExtFlags::new(
818                     hdr.cu_chroma_qp_offset_enabled_flag as u32,
819                     hdr.use_integer_mv_flag as u32,
820                 );
821 
822                 let slice_param_ext = SliceParameterBufferHEVCRext::new(
823                     luma_offset_l0.map(i16::from),
824                     chroma_offset_l0.map(|outer| outer.map(i16::from)),
825                     luma_offset_l1.map(i16::from),
826                     chroma_offset_l1.map(|outer| outer.map(i16::from)),
827                     &slice_ext_flags,
828                     hdr.slice_act_y_qp_offset,
829                     hdr.slice_act_cb_qp_offset,
830                     hdr.slice_act_cr_qp_offset,
831                 );
832 
833                 Some(slice_param_ext)
834             } else {
835                 None
836             };
837 
838         let slice_data = Vec::from(slice.nalu.as_ref());
839 
840         picture.last_slice = Some((slice_param, slice_param_ext, slice_data));
841 
842         Ok(())
843     }
844 
submit_picture( &mut self, mut picture: Self::Picture, ) -> StatelessBackendResult<Self::Handle>845     fn submit_picture(
846         &mut self,
847         mut picture: Self::Picture,
848     ) -> StatelessBackendResult<Self::Handle> {
849         if let Some(last_slice) = &mut picture.last_slice {
850             last_slice.0.set_as_last();
851         }
852         self.submit_last_slice(&mut picture)?;
853         self.process_picture::<H265>(picture.picture)
854     }
855 }
856 
857 impl<V: VideoFrame> StatelessDecoder<H265, VaapiBackend<V>> {
858     // Creates a new instance of the decoder using the VAAPI backend.
new_vaapi( display: Rc<Display>, blocking_mode: BlockingMode, ) -> Result<Self, NewStatelessDecoderError>859     pub fn new_vaapi(
860         display: Rc<Display>,
861         blocking_mode: BlockingMode,
862     ) -> Result<Self, NewStatelessDecoderError> {
863         Self::new(VaapiBackend::new(display, false), blocking_mode)
864     }
865 }
866 
867 #[cfg(test)]
868 mod tests {
869     use libva::Display;
870 
871     use crate::bitstream_utils::NalIterator;
872     use crate::codec::h265::parser::Nalu;
873     use crate::decoder::stateless::h265::H265;
874     use crate::decoder::stateless::tests::test_decode_stream;
875     use crate::decoder::stateless::tests::TestStream;
876     use crate::decoder::stateless::StatelessDecoder;
877     use crate::decoder::BlockingMode;
878     use crate::utils::simple_playback_loop;
879     use crate::utils::simple_playback_loop_owned_frames;
880     use crate::DecodedFormat;
881 
882     /// Run `test` using the vaapi decoder, in both blocking and non-blocking modes.
test_decoder_vaapi( test: &TestStream, output_format: DecodedFormat, blocking_mode: BlockingMode, )883     fn test_decoder_vaapi(
884         test: &TestStream,
885         output_format: DecodedFormat,
886         blocking_mode: BlockingMode,
887     ) {
888         let display = Display::open().unwrap();
889         let decoder = StatelessDecoder::<H265, _>::new_vaapi::<()>(display, blocking_mode).unwrap();
890 
891         test_decode_stream(
892             |d, s, f| {
893                 simple_playback_loop(
894                     d,
895                     NalIterator::<Nalu>::new(s),
896                     f,
897                     &mut simple_playback_loop_owned_frames,
898                     output_format,
899                     blocking_mode,
900                 )
901             },
902             decoder,
903             test,
904             true,
905             false,
906         );
907     }
908 
909     #[test]
910     // Ignore this test by default as it requires libva-compatible hardware.
911     #[ignore]
test_64x64_progressive_i_block()912     fn test_64x64_progressive_i_block() {
913         use crate::decoder::stateless::h265::tests::DECODE_64X64_PROGRESSIVE_I;
914         test_decoder_vaapi(
915             &DECODE_64X64_PROGRESSIVE_I,
916             DecodedFormat::NV12,
917             BlockingMode::Blocking,
918         );
919     }
920 
921     #[test]
922     // Ignore this test by default as it requires libva-compatible hardware.
923     #[ignore]
test_64x64_progressive_i_nonblock()924     fn test_64x64_progressive_i_nonblock() {
925         use crate::decoder::stateless::h265::tests::DECODE_64X64_PROGRESSIVE_I;
926         test_decoder_vaapi(
927             &DECODE_64X64_PROGRESSIVE_I,
928             DecodedFormat::NV12,
929             BlockingMode::NonBlocking,
930         );
931     }
932 
933     #[test]
934     // Ignore this test by default as it requires libva-compatible hardware.
935     #[ignore]
test_64x64_progressive_i_p_block()936     fn test_64x64_progressive_i_p_block() {
937         use crate::decoder::stateless::h265::tests::DECODE_64X64_PROGRESSIVE_I_P;
938         test_decoder_vaapi(
939             &DECODE_64X64_PROGRESSIVE_I_P,
940             DecodedFormat::NV12,
941             BlockingMode::Blocking,
942         );
943     }
944 
945     #[test]
946     // Ignore this test by default as it requires libva-compatible hardware.
947     #[ignore]
test_64x64_progressive_i_p_nonblock()948     fn test_64x64_progressive_i_p_nonblock() {
949         use crate::decoder::stateless::h265::tests::DECODE_64X64_PROGRESSIVE_I_P;
950         test_decoder_vaapi(
951             &DECODE_64X64_PROGRESSIVE_I_P,
952             DecodedFormat::NV12,
953             BlockingMode::NonBlocking,
954         );
955     }
956 
957     #[test]
958     // Ignore this test by default as it requires libva-compatible hardware.
959     #[ignore]
test_64x64_progressive_i_p_b_p_block()960     fn test_64x64_progressive_i_p_b_p_block() {
961         use crate::decoder::stateless::h265::tests::DECODE_64X64_PROGRESSIVE_I_P_B_P;
962         test_decoder_vaapi(
963             &DECODE_64X64_PROGRESSIVE_I_P_B_P,
964             DecodedFormat::NV12,
965             BlockingMode::Blocking,
966         );
967     }
968 
969     #[test]
970     // Ignore this test by default as it requires libva-compatible hardware.
971     #[ignore]
test_64x64_progressive_i_p_b_p_nonblock()972     fn test_64x64_progressive_i_p_b_p_nonblock() {
973         use crate::decoder::stateless::h265::tests::DECODE_64X64_PROGRESSIVE_I_P_B_P;
974         test_decoder_vaapi(
975             &DECODE_64X64_PROGRESSIVE_I_P_B_P,
976             DecodedFormat::NV12,
977             BlockingMode::NonBlocking,
978         );
979     }
980 }
981