1 // Copyright 2024 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::cmp::min;
6
7 use crate::video_frame::{VideoFrame, UV_PLANE, U_PLANE, V_PLANE, Y_PLANE};
8 use crate::DecodedFormat;
9
10 /// TODO(greenjustin): This entire file should be replaced with LibYUV.
11 use byteorder::ByteOrder;
12 use byteorder::LittleEndian;
13
14 #[cfg(feature = "v4l2")]
15 use std::arch::aarch64::*;
16
17 pub const MM21_TILE_WIDTH: usize = 16;
18 pub const MM21_TILE_HEIGHT: usize = 32;
19
20 /// Copies `src` into `dst` as NV12, handling padding.
nv12_copy( src_y: &[u8], src_y_stride: usize, dst_y: &mut [u8], dst_y_stride: usize, src_uv: &[u8], src_uv_stride: usize, dst_uv: &mut [u8], dst_uv_stride: usize, width: usize, height: usize, )21 pub fn nv12_copy(
22 src_y: &[u8],
23 src_y_stride: usize,
24 dst_y: &mut [u8],
25 dst_y_stride: usize,
26 src_uv: &[u8],
27 src_uv_stride: usize,
28 dst_uv: &mut [u8],
29 dst_uv_stride: usize,
30 width: usize,
31 height: usize,
32 ) {
33 for y in 0..height {
34 dst_y[(y * dst_y_stride)..(y * dst_y_stride + width)]
35 .copy_from_slice(&src_y[(y * src_y_stride)..(y * src_y_stride + width)]);
36 }
37 for y in 0..(height / 2) {
38 dst_uv[(y * dst_uv_stride)..(y * dst_uv_stride + width)]
39 .copy_from_slice(&src_uv[(y * src_uv_stride)..(y * src_uv_stride + width)]);
40 }
41 }
42
43 /// Replace 0 padding with the last pixels of the real image. This helps reduce compression
44 /// artifacts caused by the sharp transition between real image data and 0.
extend_border_nv12( y_plane: &mut [u8], uv_plane: &mut [u8], visible_width: usize, visible_height: usize, coded_width: usize, coded_height: usize, )45 pub fn extend_border_nv12(
46 y_plane: &mut [u8],
47 uv_plane: &mut [u8],
48 visible_width: usize,
49 visible_height: usize,
50 coded_width: usize,
51 coded_height: usize,
52 ) {
53 assert!(visible_width > 1);
54 assert!(visible_height > 1);
55 for y in 0..visible_height {
56 let row_start = y * coded_width;
57 for x in visible_width..coded_width {
58 y_plane[row_start + x] = y_plane[row_start + x - 1]
59 }
60 }
61 for y in visible_height..coded_height {
62 let (src, dst) = y_plane.split_at_mut(y * coded_width);
63 dst[0..coded_width].copy_from_slice(&src[((y - 1) * coded_width)..(y * coded_width)]);
64 }
65 for y in 0..(visible_height / 2) {
66 let row_start = y * coded_width;
67 for x in visible_width..coded_width {
68 // We use minus 2 here because we want to actually repeat the last 2 UV values.
69 uv_plane[row_start + x] = uv_plane[row_start + x - 2]
70 }
71 }
72 for y in (visible_height / 2)..(coded_height / 2) {
73 let (src, dst) = uv_plane.split_at_mut(y * coded_width);
74 dst[0..coded_width].copy_from_slice(&src[((y - 1) * coded_width)..(y * coded_width)]);
75 }
76 }
77
copy_plane( src: &[u8], src_stride: usize, dst: &mut [u8], dst_stride: usize, width: usize, height: usize, )78 pub fn copy_plane(
79 src: &[u8],
80 src_stride: usize,
81 dst: &mut [u8],
82 dst_stride: usize,
83 width: usize,
84 height: usize,
85 ) {
86 for y in 0..height {
87 dst[(y * dst_stride)..(y * dst_stride + width)]
88 .copy_from_slice(&src[(y * src_stride)..(y * src_stride + width)]);
89 }
90 }
91
92 /// Copies `src` into `dst` as I4xx (YUV tri-planar).
93 ///
94 /// `sub_h` and `sub_v` enable horizontal and vertical sub-sampling, respectively. E.g, if both
95 /// `sub_h` and `sub_v` are `true` the data will be `4:2:0`, if only `sub_v` is `true` then it will be
96 /// `4:2:2`, and if both are `false` then we have `4:4:4`.
i4xx_copy( src_y: &[u8], src_y_stride: usize, dst_y: &mut [u8], dst_y_stride: usize, src_u: &[u8], src_u_stride: usize, dst_u: &mut [u8], dst_u_stride: usize, src_v: &[u8], src_v_stride: usize, dst_v: &mut [u8], dst_v_stride: usize, width: usize, height: usize, (sub_h, sub_v): (bool, bool), )97 pub fn i4xx_copy(
98 src_y: &[u8],
99 src_y_stride: usize,
100 dst_y: &mut [u8],
101 dst_y_stride: usize,
102 src_u: &[u8],
103 src_u_stride: usize,
104 dst_u: &mut [u8],
105 dst_u_stride: usize,
106 src_v: &[u8],
107 src_v_stride: usize,
108 dst_v: &mut [u8],
109 dst_v_stride: usize,
110 width: usize,
111 height: usize,
112 (sub_h, sub_v): (bool, bool),
113 ) {
114 copy_plane(src_y, src_y_stride, dst_y, dst_y_stride, width, height);
115
116 // Align width and height of UV planes to 2 if sub-sampling is used.
117 let uv_width = if sub_h { (width + 1) / 2 } else { width };
118 let uv_height = if sub_v { (height + 1) / 2 } else { height };
119
120 copy_plane(src_u, src_u_stride, dst_u, dst_u_stride, uv_width, uv_height);
121 copy_plane(src_v, src_v_stride, dst_v, dst_v_stride, uv_width, uv_height);
122 }
123
124 /// Copies `src` into `dst` as I410, removing all padding and changing the layout from packed to
125 /// triplanar. Also drops the alpha channel.
y410_to_i410( src: &[u8], dst: &mut [u8], width: usize, height: usize, strides: [usize; 3], offsets: [usize; 3], )126 pub fn y410_to_i410(
127 src: &[u8],
128 dst: &mut [u8],
129 width: usize,
130 height: usize,
131 strides: [usize; 3],
132 offsets: [usize; 3],
133 ) {
134 let src_lines = src[offsets[0]..].chunks(strides[0]).map(|line| &line[..width * 4]);
135
136 let dst_y_size = width * 2 * height;
137 let dst_u_size = width * 2 * height;
138
139 let (dst_y_plane, dst_uv_planes) = dst.split_at_mut(dst_y_size);
140 let (dst_u_plane, dst_v_plane) = dst_uv_planes.split_at_mut(dst_u_size);
141 let dst_y_lines = dst_y_plane.chunks_mut(width * 2);
142 let dst_u_lines = dst_u_plane.chunks_mut(width * 2);
143 let dst_v_lines = dst_v_plane.chunks_mut(width * 2);
144
145 for (src_line, (dst_y_line, (dst_u_line, dst_v_line))) in
146 src_lines.zip(dst_y_lines.zip(dst_u_lines.zip(dst_v_lines))).take(height)
147 {
148 for (src, (dst_y, (dst_u, dst_v))) in src_line.chunks(4).zip(
149 dst_y_line.chunks_mut(2).zip(dst_u_line.chunks_mut(2).zip(dst_v_line.chunks_mut(2))),
150 ) {
151 let y = LittleEndian::read_u16(&[src[1] >> 2 | src[2] << 6, src[2] >> 2 & 0b11]);
152 let u = LittleEndian::read_u16(&[src[0], src[1] & 0b11]);
153 let v = LittleEndian::read_u16(&[src[2] >> 4 | src[3] << 4, src[3] >> 4 & 0b11]);
154 LittleEndian::write_u16(dst_y, y);
155 LittleEndian::write_u16(dst_u, u);
156 LittleEndian::write_u16(dst_v, v);
157 }
158 }
159 }
160
161 #[cfg(feature = "v4l2")]
162 // SAFETY: Verified by caller that |src| and |dst| is valid and not
163 // a NULL-pointer or invalid memory.
align_detile(src: *const u8, src_tile_stride: isize, dst: *mut u8, width: usize)164 pub unsafe fn align_detile(src: *const u8, src_tile_stride: isize, dst: *mut u8, width: usize) {
165 let mut vin = [0u8; MM21_TILE_WIDTH];
166 let mut vout = [0u8; MM21_TILE_WIDTH];
167
168 let bytes_per_pixel = 1;
169 let mask = MM21_TILE_WIDTH - 1;
170
171 let remainder = width & mask;
172 let width_aligned_down = width & !mask;
173 if width_aligned_down > 0 {
174 detile_row(src, src_tile_stride, dst, width_aligned_down);
175 }
176
177 let index = (width_aligned_down / MM21_TILE_WIDTH * (src_tile_stride as usize)) as usize;
178 let input_slice =
179 std::slice::from_raw_parts(src.offset(index as isize), remainder * bytes_per_pixel);
180 (&mut vin[0..remainder * bytes_per_pixel])
181 .copy_from_slice(&input_slice[0..remainder * bytes_per_pixel]);
182
183 detile_row(vin.as_ptr(), src_tile_stride, vout.as_mut_ptr(), MM21_TILE_WIDTH);
184
185 let output_slice = std::slice::from_raw_parts_mut(
186 dst.offset(width_aligned_down as isize),
187 remainder * bytes_per_pixel,
188 );
189 output_slice[0..remainder * bytes_per_pixel]
190 .copy_from_slice(&vout[0..remainder * bytes_per_pixel]);
191 }
192
193 #[cfg(feature = "v4l2")]
194 // SAFETY: Verified by caller that |src| and |dst| is valid and not
195 // a NULL-pointer or invalid memory.
detile_row( mut src: *const u8, src_tile_stride: isize, mut dst: *mut u8, width: usize, )196 pub unsafe fn detile_row(
197 mut src: *const u8,
198 src_tile_stride: isize,
199 mut dst: *mut u8,
200 width: usize,
201 ) {
202 let mut w = width;
203 while w > 0 {
204 let v0: uint8x16_t = vld1q_u8(src);
205 src = src.offset(src_tile_stride as isize);
206 w = w - MM21_TILE_WIDTH;
207 vst1q_u8(dst, v0);
208 dst = dst.offset(MM21_TILE_WIDTH as isize);
209 }
210 }
211
212 // TODO(bchoobineh): Use a fuzzer to verify the correctness of this SIMD
213 // code compared to its Rust equivalent
214 // Detiles a plane of data using implementation from LibYUV::DetilePlane.
215 #[cfg(feature = "v4l2")]
detile_plane( src: &[u8], src_stride: usize, dst: &mut [u8], mut dst_stride: isize, width: usize, mut height: isize, tile_height: usize, ) -> Result<(), String>216 pub fn detile_plane(
217 src: &[u8],
218 src_stride: usize,
219 dst: &mut [u8],
220 mut dst_stride: isize,
221 width: usize,
222 mut height: isize,
223 tile_height: usize,
224 ) -> Result<(), String> {
225 let src_tile_stride = (16 * tile_height) as isize;
226
227 if width == 0 || height == 0 || (((tile_height) & ((tile_height) - 1)) > 0) {
228 return Err("Invalid width, height, or tile height is not a power of 2.".to_owned());
229 }
230
231 let mut aligned = true;
232 if (width & (MM21_TILE_WIDTH - 1)) > 0 {
233 aligned = false;
234 }
235
236 if src.len() < (src_stride * (height.abs() as usize)) {
237 return Err("Src buffer not big enough.".to_owned());
238 }
239
240 if dst.len() < (dst_stride * height.abs()) as usize {
241 return Err("Dst buffer not big enough.".to_owned());
242 }
243
244 let mut src_ptr = src.as_ptr();
245 let mut dst_ptr = dst.as_mut_ptr();
246
247 // Image inversion
248 if height < 0 {
249 height = -height;
250 // SAFETY: Verified the validity of src buffer and height.
251 unsafe {
252 src_ptr = src_ptr.offset(((height - 1) * dst_stride) as isize);
253 }
254 dst_stride = -dst_stride;
255 }
256
257 // Detile Plane
258 for y in 0..height {
259 // SAFETY: Verified validity of src and dst pointers.
260 unsafe {
261 if aligned {
262 detile_row(src_ptr, src_tile_stride, dst_ptr, width);
263 } else {
264 align_detile(src_ptr, src_tile_stride, dst_ptr, width);
265 }
266 }
267
268 // SAFETY: Verified the validity of the src and dst buffers.
269 unsafe {
270 dst_ptr = dst_ptr.offset(dst_stride as isize);
271 src_ptr = src_ptr.offset(MM21_TILE_WIDTH as isize);
272 }
273 // Advance to next row of tiles.
274 if (y & (tile_height - 1) as isize) == ((tile_height - 1) as isize) {
275 // SAFETY: Verified validity of the src buffers.
276 unsafe {
277 src_ptr = src_ptr.offset(-src_tile_stride + (src_stride * tile_height) as isize);
278 }
279 }
280 }
281
282 Ok(())
283 }
284
285 // Converts MM21 to NV12 using the implementation from LibYUV::MM21ToNV12.
286 #[cfg(feature = "v4l2")]
mm21_to_nv12( src_y: &[u8], src_stride_y: usize, dst_y: &mut [u8], dst_stride_y: usize, src_uv: &[u8], src_stride_uv: usize, dst_uv: &mut [u8], dst_stride_uv: usize, width: usize, height: isize, ) -> Result<(), String>287 pub fn mm21_to_nv12(
288 src_y: &[u8],
289 src_stride_y: usize,
290 dst_y: &mut [u8],
291 dst_stride_y: usize,
292 src_uv: &[u8],
293 src_stride_uv: usize,
294 dst_uv: &mut [u8],
295 dst_stride_uv: usize,
296 width: usize,
297 height: isize,
298 ) -> Result<(), String> {
299 if width <= 0 {
300 return Err("Width must be greater than 0.".to_owned());
301 }
302
303 let sign = if height < 0 { -1 } else { 1 };
304
305 // Detile Plane Y
306 detile_plane(
307 src_y,
308 src_stride_y,
309 dst_y,
310 dst_stride_y as isize,
311 width,
312 height,
313 MM21_TILE_HEIGHT,
314 )?;
315
316 // Detile Plane UV
317 detile_plane(
318 src_uv,
319 src_stride_uv,
320 dst_uv,
321 dst_stride_uv as isize,
322 (width + 1) & !1,
323 (height + sign) / 2,
324 MM21_TILE_HEIGHT / 2,
325 )
326 }
327
nv12_to_i420( src_y: &[u8], src_y_stride: usize, dst_y: &mut [u8], dst_y_stride: usize, src_uv: &[u8], src_uv_stride: usize, dst_u: &mut [u8], dst_u_stride: usize, dst_v: &mut [u8], dst_v_stride: usize, width: usize, height: usize, )328 pub fn nv12_to_i420(
329 src_y: &[u8],
330 src_y_stride: usize,
331 dst_y: &mut [u8],
332 dst_y_stride: usize,
333 src_uv: &[u8],
334 src_uv_stride: usize,
335 dst_u: &mut [u8],
336 dst_u_stride: usize,
337 dst_v: &mut [u8],
338 dst_v_stride: usize,
339 width: usize,
340 height: usize,
341 ) {
342 copy_plane(src_y, src_y_stride, dst_y, dst_y_stride, width, height);
343
344 // We can just assume 4:2:0 subsampling
345 let aligned_width = (width + 1) & (!1);
346 for y in 0..((height + 1) / 2) {
347 let src_row = &src_uv[(y * src_uv_stride)..(y * src_uv_stride + aligned_width)];
348 let dst_u_row = &mut dst_u[(y * dst_u_stride)..(y * dst_u_stride + aligned_width / 2)];
349 let dst_v_row = &mut dst_v[(y * dst_v_stride)..(y * dst_v_stride + aligned_width / 2)];
350 for x in 0..aligned_width {
351 if x % 2 == 0 {
352 dst_u_row[x / 2] = src_row[x];
353 } else {
354 dst_v_row[x / 2] = src_row[x];
355 }
356 }
357 }
358 }
359
i420_to_nv12_chroma(src_u: &[u8], src_v: &[u8], dst_uv: &mut [u8])360 pub fn i420_to_nv12_chroma(src_u: &[u8], src_v: &[u8], dst_uv: &mut [u8]) {
361 for i in 0..dst_uv.len() {
362 if i % 2 == 0 {
363 dst_uv[i] = src_u[i / 2];
364 } else {
365 dst_uv[i] = src_v[i / 2];
366 }
367 }
368 }
369
i420_to_nv12(src_y: &[u8], dst_y: &mut [u8], src_u: &[u8], src_v: &[u8], dst_uv: &mut [u8])370 pub fn i420_to_nv12(src_y: &[u8], dst_y: &mut [u8], src_u: &[u8], src_v: &[u8], dst_uv: &mut [u8]) {
371 dst_y.copy_from_slice(src_y);
372 i420_to_nv12_chroma(src_u, src_v, dst_uv);
373 }
374
375 // TODO: Add more conversions. All supported conversion functions need to take stride parameters.
376 pub const SUPPORTED_CONVERSION: &'static [(DecodedFormat, DecodedFormat)] = &[
377 #[cfg(feature = "v4l2")]
378 (DecodedFormat::MM21, DecodedFormat::NV12),
379 (DecodedFormat::NV12, DecodedFormat::NV12),
380 (DecodedFormat::I420, DecodedFormat::I420),
381 (DecodedFormat::I422, DecodedFormat::I422),
382 (DecodedFormat::I444, DecodedFormat::I444),
383 ];
384
convert_video_frame(src: &impl VideoFrame, dst: &mut impl VideoFrame) -> Result<(), String>385 pub fn convert_video_frame(src: &impl VideoFrame, dst: &mut impl VideoFrame) -> Result<(), String> {
386 let width = min(dst.resolution().width, src.resolution().width) as usize;
387 let height = min(dst.resolution().height, src.resolution().height) as usize;
388
389 let conversion = (src.decoded_format()?, dst.decoded_format()?);
390 let src_pitches = src.get_plane_pitch();
391 let src_mapping = src.map().expect("Image processor src mapping failed!");
392 let src_planes = src_mapping.get();
393 let dst_pitches = dst.get_plane_pitch();
394 let dst_mapping = dst.map_mut().expect("Image processor dst mapping failed!");
395 let dst_planes = dst_mapping.get();
396 match conversion {
397 #[cfg(feature = "v4l2")]
398 (DecodedFormat::MM21, DecodedFormat::NV12) => mm21_to_nv12(
399 src_planes[Y_PLANE],
400 src_pitches[Y_PLANE],
401 *dst_planes[Y_PLANE].borrow_mut(),
402 dst_pitches[Y_PLANE],
403 src_planes[UV_PLANE],
404 src_pitches[UV_PLANE],
405 *dst_planes[UV_PLANE].borrow_mut(),
406 dst_pitches[UV_PLANE],
407 width,
408 height as isize,
409 ),
410 (DecodedFormat::NV12, DecodedFormat::NV12) => {
411 nv12_copy(
412 src_planes[Y_PLANE],
413 src_pitches[Y_PLANE],
414 *dst_planes[Y_PLANE].borrow_mut(),
415 dst_pitches[Y_PLANE],
416 src_planes[UV_PLANE],
417 src_pitches[UV_PLANE],
418 *dst_planes[UV_PLANE].borrow_mut(),
419 dst_pitches[UV_PLANE],
420 width,
421 height,
422 );
423 Ok(())
424 }
425 (DecodedFormat::I420, DecodedFormat::I420) => {
426 i4xx_copy(
427 src_planes[Y_PLANE],
428 src_pitches[Y_PLANE],
429 *dst_planes[Y_PLANE].borrow_mut(),
430 dst_pitches[Y_PLANE],
431 src_planes[U_PLANE],
432 src_pitches[U_PLANE],
433 *dst_planes[U_PLANE].borrow_mut(),
434 dst_pitches[U_PLANE],
435 src_planes[V_PLANE],
436 src_pitches[V_PLANE],
437 *dst_planes[V_PLANE].borrow_mut(),
438 dst_pitches[V_PLANE],
439 width,
440 height,
441 (true, true),
442 );
443 Ok(())
444 }
445 (DecodedFormat::I422, DecodedFormat::I422) => {
446 i4xx_copy(
447 src_planes[Y_PLANE],
448 src_pitches[Y_PLANE],
449 *dst_planes[Y_PLANE].borrow_mut(),
450 dst_pitches[Y_PLANE],
451 src_planes[U_PLANE],
452 src_pitches[U_PLANE],
453 *dst_planes[U_PLANE].borrow_mut(),
454 dst_pitches[U_PLANE],
455 src_planes[V_PLANE],
456 src_pitches[V_PLANE],
457 *dst_planes[V_PLANE].borrow_mut(),
458 dst_pitches[V_PLANE],
459 width,
460 height,
461 (true, false),
462 );
463 Ok(())
464 }
465 (DecodedFormat::I444, DecodedFormat::I444) => {
466 i4xx_copy(
467 src_planes[Y_PLANE],
468 src_pitches[Y_PLANE],
469 *dst_planes[Y_PLANE].borrow_mut(),
470 dst_pitches[Y_PLANE],
471 src_planes[U_PLANE],
472 src_pitches[U_PLANE],
473 *dst_planes[U_PLANE].borrow_mut(),
474 dst_pitches[U_PLANE],
475 src_planes[V_PLANE],
476 src_pitches[V_PLANE],
477 *dst_planes[V_PLANE].borrow_mut(),
478 dst_pitches[V_PLANE],
479 width,
480 height,
481 (false, false),
482 );
483 Ok(())
484 }
485 _ => Err(format!("Unsupported conversion {:?} -> {:?}", conversion.0, conversion.1)),
486 }
487 }
488
489 #[cfg(test)]
490 mod tests {
491 use super::*;
492
493 #[test]
494 #[cfg(feature = "v4l2")]
test_mm21_to_nv12()495 fn test_mm21_to_nv12() {
496 let test_input = include_bytes!("test_data/puppets-480x270_20230825.mm21.yuv");
497 let test_expected_output = include_bytes!("test_data/puppets-480x270_20230825.nv12.yuv");
498
499 let mut test_output = [0u8; 480 * 288 * 3 / 2];
500 let (test_y_output, test_uv_output) = test_output.split_at_mut(480 * 288);
501 mm21_to_nv12(
502 &test_input[0..480 * 288],
503 480,
504 test_y_output,
505 480,
506 &test_input[480 * 288..480 * 288 * 3 / 2],
507 480,
508 test_uv_output,
509 480,
510 480,
511 288,
512 )
513 .expect("Failed to detile!");
514 assert_eq!(test_output, *test_expected_output);
515 }
516
517 #[test]
test_nv12_to_i420()518 fn test_nv12_to_i420() {
519 let test_input = include_bytes!("test_data/puppets-480x270_20230825.nv12.yuv");
520 let test_expected_output = include_bytes!("test_data/puppets-480x270_20230825.i420.yuv");
521
522 let mut test_output = [0u8; 480 * 288 * 3 / 2];
523 let (test_y_output, test_uv_output) = test_output.split_at_mut(480 * 288);
524 let (test_u_output, test_v_output) = test_uv_output.split_at_mut(480 * 288 / 4);
525 nv12_to_i420(
526 &test_input[0..480 * 288],
527 test_y_output,
528 &test_input[480 * 288..480 * 288 * 3 / 2],
529 test_u_output,
530 test_v_output,
531 );
532 assert_eq!(test_output, *test_expected_output);
533 }
534
535 #[test]
test_i420_to_nv12()536 fn test_i420_to_nv12() {
537 let test_input = include_bytes!("test_data/puppets-480x270_20230825.i420.yuv");
538 let test_expected_output = include_bytes!("test_data/puppets-480x270_20230825.nv12.yuv");
539
540 let mut test_output = [0u8; 480 * 288 * 3 / 2];
541 let (test_y_output, test_uv_output) = test_output.split_at_mut(480 * 288);
542 i420_to_nv12(
543 &test_input[0..(480 * 288)],
544 test_y_output,
545 &test_input[(480 * 288)..(480 * 288 * 5 / 4)],
546 &test_input[(480 * 288 * 5 / 4)..(480 * 288 * 3 / 2)],
547 test_uv_output,
548 );
549 assert_eq!(test_output, *test_expected_output);
550 }
551 }
552