1 // Copyright 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 use crate::decoder::gainmap::GainMapMetadata;
16 use crate::decoder::track::*;
17 use crate::decoder::Extent;
18 use crate::decoder::GenericIO;
19 use crate::image::YuvRange;
20 use crate::image::MAX_PLANE_COUNT;
21 use crate::internal_utils::stream::*;
22 use crate::internal_utils::*;
23 use crate::utils::clap::CleanAperture;
24 use crate::*;
25
26 #[derive(Debug, PartialEq)]
27 pub enum BoxSize {
28 FixedSize(usize), // In bytes, header exclusive.
29 UntilEndOfStream, // The box goes on until the end of the input stream.
30 }
31
32 #[derive(Debug)]
33 struct BoxHeader {
34 size: BoxSize,
35 box_type: String,
36 }
37
38 impl BoxHeader {
size(&self) -> usize39 fn size(&self) -> usize {
40 match self.size {
41 BoxSize::FixedSize(size) => size, // not reached.
42 BoxSize::UntilEndOfStream => 0,
43 }
44 }
45 }
46
47 #[derive(Debug, Default)]
48 pub struct FileTypeBox {
49 pub major_brand: String,
50 // minor_version "is informative only" (section 4.3.1 of ISO/IEC 14496-12)
51 compatible_brands: Vec<String>,
52 }
53
54 impl FileTypeBox {
has_brand(&self, brand: &str) -> bool55 fn has_brand(&self, brand: &str) -> bool {
56 // As of 2024, section 4.3.1 of ISO/IEC 14496-12 does not explictly say that the file is
57 // compliant with the specification defining the major brand, but "the major_brand should be
58 // repeated in the compatible_brands". Later versions of the specification may explicitly
59 // consider the major brand as one of the compatible brands, even if not repeated.
60 if self.major_brand.as_str() == brand {
61 return true;
62 }
63 self.compatible_brands.iter().any(|x| x.as_str() == brand)
64 }
65
has_brand_any(&self, brands: &[&str]) -> bool66 fn has_brand_any(&self, brands: &[&str]) -> bool {
67 brands.iter().any(|brand| self.has_brand(brand))
68 }
69
is_avif(&self) -> bool70 pub(crate) fn is_avif(&self) -> bool {
71 // "avio" also exists but does not identify the file as AVIF on its own. See
72 // https://aomediacodec.github.io/av1-avif/v1.1.0.html#image-and-image-collection-brand
73 self.has_brand_any(&[
74 "avif",
75 "avis",
76 #[cfg(feature = "heic")]
77 "heic",
78 #[cfg(feature = "heic")]
79 "heix",
80 #[cfg(feature = "heic")]
81 "mif1",
82 ])
83 }
84
needs_meta(&self) -> bool85 pub(crate) fn needs_meta(&self) -> bool {
86 self.has_brand_any(&[
87 "avif",
88 #[cfg(feature = "heic")]
89 "heic",
90 #[cfg(feature = "heic")]
91 "heix",
92 #[cfg(feature = "heic")]
93 "mif1",
94 ])
95 }
96
needs_moov(&self) -> bool97 pub(crate) fn needs_moov(&self) -> bool {
98 self.has_brand_any(&[
99 "avis",
100 #[cfg(feature = "heic")]
101 "hevc",
102 #[cfg(feature = "heic")]
103 "msf1",
104 ])
105 }
106
has_tmap(&self) -> bool107 pub(crate) fn has_tmap(&self) -> bool {
108 self.has_brand("tmap")
109 }
110 }
111
112 #[derive(Debug, Default)]
113 pub struct ItemLocationEntry {
114 pub item_id: u32,
115 pub construction_method: u8,
116 pub base_offset: u64,
117 pub extent_count: u16,
118 pub extents: Vec<Extent>,
119 }
120
121 #[derive(Debug, Default)]
122 pub struct ItemLocationBox {
123 offset_size: u8,
124 length_size: u8,
125 base_offset_size: u8,
126 index_size: u8,
127 pub items: Vec<ItemLocationEntry>,
128 }
129
130 #[derive(Clone, Debug)]
131 pub struct ImageSpatialExtents {
132 pub width: u32,
133 pub height: u32,
134 }
135
136 #[derive(Clone, Debug, Default)]
137 pub struct PixelInformation {
138 pub plane_depths: Vec<u8>,
139 }
140
141 #[derive(Clone, Debug, Default, PartialEq)]
142 pub struct Av1CodecConfiguration {
143 pub seq_profile: u8,
144 pub seq_level_idx0: u8,
145 pub seq_tier0: u8,
146 pub high_bitdepth: bool,
147 pub twelve_bit: bool,
148 pub monochrome: bool,
149 pub chroma_subsampling_x: u8,
150 pub chroma_subsampling_y: u8,
151 pub chroma_sample_position: ChromaSamplePosition,
152 pub raw_data: Vec<u8>,
153 }
154
155 #[derive(Clone, Debug, Default, PartialEq)]
156 pub struct HevcCodecConfiguration {
157 pub bitdepth: u8,
158 pub nal_length_size: u8,
159 pub vps: Vec<u8>,
160 pub sps: Vec<u8>,
161 pub pps: Vec<u8>,
162 }
163
164 impl CodecConfiguration {
depth(&self) -> u8165 pub(crate) fn depth(&self) -> u8 {
166 match self {
167 Self::Av1(config) => match config.twelve_bit {
168 true => 12,
169 false => match config.high_bitdepth {
170 true => 10,
171 false => 8,
172 },
173 },
174 Self::Hevc(config) => config.bitdepth,
175 }
176 }
177
pixel_format(&self) -> PixelFormat178 pub(crate) fn pixel_format(&self) -> PixelFormat {
179 match self {
180 Self::Av1(config) => {
181 if config.monochrome {
182 PixelFormat::Yuv400
183 } else if config.chroma_subsampling_x == 1 && config.chroma_subsampling_y == 1 {
184 PixelFormat::Yuv420
185 } else if config.chroma_subsampling_x == 1 {
186 PixelFormat::Yuv422
187 } else {
188 PixelFormat::Yuv444
189 }
190 }
191 Self::Hevc(_) => {
192 // It is okay to always return Yuv420 here since that is the only format that
193 // android_mediacodec returns.
194 // TODO: b/370549923 - Identify the correct YUV subsampling type from the codec
195 // configuration data.
196 PixelFormat::Yuv420
197 }
198 }
199 }
200
chroma_sample_position(&self) -> ChromaSamplePosition201 pub(crate) fn chroma_sample_position(&self) -> ChromaSamplePosition {
202 match self {
203 Self::Av1(config) => config.chroma_sample_position,
204 Self::Hevc(_) => {
205 // It is okay to always return ChromaSamplePosition::default() here since that is
206 // the only format that android_mediacodec returns.
207 // TODO: b/370549923 - Identify the correct chroma sample position from the codec
208 // configuration data.
209 ChromaSamplePosition::default()
210 }
211 }
212 }
213
214 #[cfg(feature = "android_mediacodec")]
raw_data(&self) -> Vec<u8>215 pub(crate) fn raw_data(&self) -> Vec<u8> {
216 match self {
217 Self::Av1(config) => config.raw_data.clone(),
218 Self::Hevc(config) => {
219 // For HEVC, the codec specific data consists of the following 3 NAL units in
220 // order: VPS, SPS and PPS. Each unit should be preceded by a start code of
221 // "\x00\x00\x00\x01".
222 // https://developer.android.com/reference/android/media/MediaCodec#CSD
223 let mut data: Vec<u8> = Vec::new();
224 for nal_unit in [&config.vps, &config.sps, &config.pps] {
225 // Start code.
226 data.extend_from_slice(&[0, 0, 0, 1]);
227 // Data.
228 data.extend_from_slice(&nal_unit[..]);
229 }
230 data
231 }
232 }
233 }
234
profile(&self) -> u8235 pub fn profile(&self) -> u8 {
236 match self {
237 Self::Av1(config) => config.seq_profile,
238 Self::Hevc(_) => {
239 // TODO: b/370549923 - Identify the correct profile from the codec configuration
240 // data.
241 0
242 }
243 }
244 }
245
246 #[cfg(feature = "android_mediacodec")]
nal_length_size(&self) -> u8247 pub(crate) fn nal_length_size(&self) -> u8 {
248 match self {
249 Self::Av1(_) => 0, // Unused. This function is only used for HEVC.
250 Self::Hevc(config) => config.nal_length_size,
251 }
252 }
253
is_avif(&self) -> bool254 pub(crate) fn is_avif(&self) -> bool {
255 matches!(self, Self::Av1(_))
256 }
257
is_heic(&self) -> bool258 pub(crate) fn is_heic(&self) -> bool {
259 matches!(self, Self::Hevc(_))
260 }
261 }
262
263 #[derive(Clone, Debug)]
264 pub enum ColorInformation {
265 Icc(Vec<u8>),
266 Nclx(Nclx),
267 Unknown,
268 }
269
270 #[derive(Clone, Debug, PartialEq)]
271 pub enum CodecConfiguration {
272 Av1(Av1CodecConfiguration),
273 Hevc(HevcCodecConfiguration),
274 }
275
276 impl Default for CodecConfiguration {
default() -> Self277 fn default() -> Self {
278 Self::Av1(Av1CodecConfiguration::default())
279 }
280 }
281
282 #[derive(Clone, Debug)]
283 pub enum ItemProperty {
284 ImageSpatialExtents(ImageSpatialExtents),
285 PixelInformation(PixelInformation),
286 CodecConfiguration(CodecConfiguration),
287 ColorInformation(ColorInformation),
288 PixelAspectRatio(PixelAspectRatio),
289 AuxiliaryType(String),
290 CleanAperture(CleanAperture),
291 ImageRotation(u8),
292 ImageMirror(u8),
293 OperatingPointSelector(u8),
294 LayerSelector(u16),
295 AV1LayeredImageIndexing([usize; 3]),
296 ContentLightLevelInformation(ContentLightLevelInformation),
297 Unknown(String),
298 }
299
300 // Section 8.11.14 of ISO/IEC 14496-12.
301 #[derive(Debug, Default)]
302 pub struct ItemPropertyAssociation {
303 pub item_id: u32,
304 pub associations: Vec<(
305 u16, // 1-based property_index
306 bool, // essential
307 )>,
308 }
309
310 #[derive(Debug, Default)]
311 pub struct ItemInfo {
312 pub item_id: u32,
313 item_protection_index: u16,
314 pub item_type: String,
315 item_name: String,
316 pub content_type: String,
317 }
318
319 #[derive(Debug, Default)]
320 pub struct ItemPropertyBox {
321 pub properties: Vec<ItemProperty>,
322 pub associations: Vec<ItemPropertyAssociation>,
323 }
324
325 #[derive(Debug)]
326 pub struct ItemReference {
327 // Read this reference as "{from_item_id} is a {reference_type} for {to_item_id}"
328 // (except for dimg where it is in the opposite direction).
329 pub from_item_id: u32,
330 pub to_item_id: u32,
331 pub reference_type: String,
332 pub index: u32, // 0-based index of the reference within the iref type.
333 }
334
335 #[derive(Debug, Default)]
336 pub struct MetaBox {
337 pub iinf: Vec<ItemInfo>,
338 pub iloc: ItemLocationBox,
339 pub primary_item_id: u32, // pitm
340 pub iprp: ItemPropertyBox,
341 pub iref: Vec<ItemReference>,
342 pub idat: Vec<u8>,
343 }
344
345 #[derive(Debug)]
346 pub struct AvifBoxes {
347 pub ftyp: FileTypeBox,
348 pub meta: MetaBox,
349 pub tracks: Vec<Track>,
350 }
351
parse_header(stream: &mut IStream, top_level: bool) -> AvifResult<BoxHeader>352 fn parse_header(stream: &mut IStream, top_level: bool) -> AvifResult<BoxHeader> {
353 // Section 4.2.2 of ISO/IEC 14496-12.
354 let start_offset = stream.offset;
355 // unsigned int(32) size;
356 let mut size = stream.read_u32()? as u64;
357 // unsigned int(32) type = boxtype;
358 let box_type = stream.read_string(4)?;
359 if size == 1 {
360 // unsigned int(64) largesize;
361 size = stream.read_u64()?;
362 }
363 if box_type == "uuid" {
364 // unsigned int(8) usertype[16] = extended_type;
365 stream.skip(16)?;
366 }
367 if size == 0 {
368 // Section 4.2.2 of ISO/IEC 14496-12.
369 // if size is 0, then this box shall be in a top-level box (i.e. not contained in another
370 // box), and be the last box in its 'file', and its payload extends to the end of that
371 // enclosing 'file'. This is normally only used for a MediaDataBox.
372 if !top_level {
373 return Err(AvifError::BmffParseFailed(
374 "non-top-level box with size 0".into(),
375 ));
376 }
377 return Ok(BoxHeader {
378 box_type,
379 size: BoxSize::UntilEndOfStream,
380 });
381 }
382 checked_decr!(size, u64_from_usize(stream.offset - start_offset)?);
383 let size = usize_from_u64(size)?;
384 if !top_level && size > stream.bytes_left()? {
385 return Err(AvifError::BmffParseFailed("possibly truncated box".into()));
386 }
387 Ok(BoxHeader {
388 box_type,
389 size: BoxSize::FixedSize(size),
390 })
391 }
392
393 // Reads a truncated ftyp box. Populates as many brands as it can read.
parse_truncated_ftyp(stream: &mut IStream) -> FileTypeBox394 fn parse_truncated_ftyp(stream: &mut IStream) -> FileTypeBox {
395 // Section 4.3.2 of ISO/IEC 14496-12.
396 // unsigned int(32) major_brand;
397 let major_brand = match stream.read_string(4) {
398 Ok(major_brand) => major_brand,
399 Err(_) => return FileTypeBox::default(),
400 };
401 let mut compatible_brands: Vec<String> = Vec::new();
402 // unsigned int(32) compatible_brands[]; // to end of the box
403 while stream.has_bytes_left().unwrap_or_default() {
404 match stream.read_string(4) {
405 Ok(brand) => compatible_brands.push(brand),
406 Err(_) => break,
407 }
408 }
409 FileTypeBox {
410 major_brand,
411 compatible_brands,
412 }
413 }
414
parse_ftyp(stream: &mut IStream) -> AvifResult<FileTypeBox>415 fn parse_ftyp(stream: &mut IStream) -> AvifResult<FileTypeBox> {
416 // Section 4.3.2 of ISO/IEC 14496-12.
417 // unsigned int(32) major_brand;
418 let major_brand = stream.read_string(4)?;
419 // unsigned int(4) minor_version;
420 stream.skip_u32()?;
421 if stream.bytes_left()? % 4 != 0 {
422 return Err(AvifError::BmffParseFailed(format!(
423 "Box[ftyp] contains a compatible brands section that isn't divisible by 4 {}",
424 stream.bytes_left()?
425 )));
426 }
427 let mut compatible_brands: Vec<String> = create_vec_exact(stream.bytes_left()? / 4)?;
428 // unsigned int(32) compatible_brands[]; // to end of the box
429 while stream.has_bytes_left()? {
430 compatible_brands.push(stream.read_string(4)?);
431 }
432 Ok(FileTypeBox {
433 major_brand,
434 compatible_brands,
435 })
436 }
437
parse_hdlr(stream: &mut IStream) -> AvifResult<String>438 fn parse_hdlr(stream: &mut IStream) -> AvifResult<String> {
439 // Section 8.4.3.2 of ISO/IEC 14496-12.
440 let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
441 // unsigned int(32) pre_defined = 0;
442 let predefined = stream.read_u32()?;
443 if predefined != 0 {
444 return Err(AvifError::BmffParseFailed(
445 "Box[hdlr] contains a pre_defined value that is nonzero".into(),
446 ));
447 }
448 // unsigned int(32) handler_type;
449 let handler_type = stream.read_string(4)?;
450 // const unsigned int(32)[3] reserved = 0;
451 if stream.read_u32()? != 0 || stream.read_u32()? != 0 || stream.read_u32()? != 0 {
452 return Err(AvifError::BmffParseFailed(
453 "Box[hdlr] contains invalid reserved bits".into(),
454 ));
455 }
456 // string name;
457 // Verify that a valid string is here, but don't bother to store it:
458 // name gives a human-readable name for the track type (for debugging and inspection
459 // purposes).
460 stream.read_c_string()?;
461 Ok(handler_type)
462 }
463
parse_iloc(stream: &mut IStream) -> AvifResult<ItemLocationBox>464 fn parse_iloc(stream: &mut IStream) -> AvifResult<ItemLocationBox> {
465 // Section 8.11.3.2 of ISO/IEC 14496-12.
466 let (version, _flags) = stream.read_version_and_flags()?;
467 if version > 2 {
468 return Err(AvifError::BmffParseFailed(format!(
469 "Box[iloc] has an unsupported version: {version}"
470 )));
471 }
472 let mut iloc = ItemLocationBox::default();
473 let mut bits = stream.sub_bit_stream(2)?;
474 // unsigned int(4) offset_size;
475 iloc.offset_size = bits.read(4)? as u8;
476 // unsigned int(4) length_size;
477 iloc.length_size = bits.read(4)? as u8;
478 // unsigned int(4) base_offset_size;
479 iloc.base_offset_size = bits.read(4)? as u8;
480 iloc.index_size = if version == 1 || version == 2 {
481 // unsigned int(4) index_size;
482 bits.read(4)? as u8
483 } else {
484 // unsigned int(4) reserved;
485 bits.skip(4)?;
486 0
487 };
488 assert_eq!(bits.remaining_bits()?, 0);
489
490 // Section 8.11.3.3 of ISO/IEC 14496-12.
491 for size in [
492 iloc.offset_size,
493 iloc.length_size,
494 iloc.base_offset_size,
495 iloc.index_size,
496 ] {
497 if ![0u8, 4, 8].contains(&size) {
498 return Err(AvifError::BmffParseFailed(format!(
499 "Box[iloc] has invalid size: {size}"
500 )));
501 }
502 }
503
504 let item_count: u32 = if version < 2 {
505 // unsigned int(16) item_count;
506 stream.read_u16()? as u32
507 } else {
508 // unsigned int(32) item_count;
509 stream.read_u32()?
510 };
511 for _i in 0..item_count {
512 let mut entry = ItemLocationEntry {
513 item_id: if version < 2 {
514 // unsigned int(16) item_ID;
515 stream.read_u16()? as u32
516 } else {
517 // unsigned int(32) item_ID;
518 stream.read_u32()?
519 },
520 ..ItemLocationEntry::default()
521 };
522 if entry.item_id == 0 {
523 return Err(AvifError::BmffParseFailed(format!(
524 "Box[iloc] has invalid item id: {}",
525 entry.item_id
526 )));
527 }
528 if version == 1 || version == 2 {
529 let mut bits = stream.sub_bit_stream(2)?;
530 // unsigned int(12) reserved = 0;
531 if bits.read(12)? != 0 {
532 return Err(AvifError::BmffParseFailed(
533 "Box[iloc] has invalid reserved bits".into(),
534 ));
535 }
536 // unsigned int(4) construction_method;
537 entry.construction_method = bits.read(4)? as u8;
538 // 0: file offset, 1: idat offset, 2: item offset.
539 if entry.construction_method != 0 && entry.construction_method != 1 {
540 return Err(AvifError::BmffParseFailed(format!(
541 "Box[iloc] has unknown construction_method: {}",
542 entry.construction_method
543 )));
544 }
545 }
546 // unsigned int(16) data_reference_index;
547 stream.skip(2)?;
548 // unsigned int(base_offset_size*8) base_offset;
549 entry.base_offset = stream.read_uxx(iloc.base_offset_size)?;
550 // unsigned int(16) extent_count;
551 entry.extent_count = stream.read_u16()?;
552 for _j in 0..entry.extent_count {
553 // unsigned int(index_size*8) item_reference_index;
554 stream.skip(iloc.index_size as usize)?; // Only used for construction_method 2.
555 let extent = Extent {
556 // unsigned int(offset_size*8) extent_offset;
557 offset: stream.read_uxx(iloc.offset_size)?,
558 // unsigned int(length_size*8) extent_length;
559 size: usize_from_u64(stream.read_uxx(iloc.length_size)?)?,
560 };
561 entry.extents.push(extent);
562 }
563 iloc.items.push(entry);
564 }
565 Ok(iloc)
566 }
567
568 // Returns the primary item ID.
parse_pitm(stream: &mut IStream) -> AvifResult<u32>569 fn parse_pitm(stream: &mut IStream) -> AvifResult<u32> {
570 // Section 8.11.4.2 of ISO/IEC 14496-12.
571 let (version, _flags) = stream.read_version_and_flags()?;
572 if version == 0 {
573 // unsigned int(16) item_ID;
574 Ok(stream.read_u16()? as u32)
575 } else {
576 // unsigned int(32) item_ID;
577 Ok(stream.read_u32()?)
578 }
579 }
580
parse_ispe(stream: &mut IStream) -> AvifResult<ItemProperty>581 fn parse_ispe(stream: &mut IStream) -> AvifResult<ItemProperty> {
582 // Section 6.5.3.2 of ISO/IEC 23008-12.
583 let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
584 let ispe = ImageSpatialExtents {
585 // unsigned int(32) image_width;
586 width: stream.read_u32()?,
587 // unsigned int(32) image_height;
588 height: stream.read_u32()?,
589 };
590 Ok(ItemProperty::ImageSpatialExtents(ispe))
591 }
592
parse_pixi(stream: &mut IStream) -> AvifResult<ItemProperty>593 fn parse_pixi(stream: &mut IStream) -> AvifResult<ItemProperty> {
594 // Section 6.5.6.2 of ISO/IEC 23008-12.
595 let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
596 // unsigned int (8) num_channels;
597 let num_channels = stream.read_u8()? as usize;
598 if num_channels == 0 || num_channels > MAX_PLANE_COUNT {
599 return Err(AvifError::BmffParseFailed(format!(
600 "Invalid plane count {num_channels} in pixi box"
601 )));
602 }
603 let mut pixi = PixelInformation {
604 plane_depths: create_vec_exact(num_channels)?,
605 };
606 for _ in 0..num_channels {
607 // unsigned int (8) bits_per_channel;
608 pixi.plane_depths.push(stream.read_u8()?);
609 if pixi.plane_depths.last().unwrap() != pixi.plane_depths.first().unwrap() {
610 return Err(AvifError::UnsupportedDepth);
611 }
612 }
613 Ok(ItemProperty::PixelInformation(pixi))
614 }
615
616 #[allow(non_snake_case)]
parse_av1C(stream: &mut IStream) -> AvifResult<ItemProperty>617 fn parse_av1C(stream: &mut IStream) -> AvifResult<ItemProperty> {
618 let raw_data = stream.get_immutable_vec(stream.bytes_left()?)?;
619 // See https://aomediacodec.github.io/av1-isobmff/v1.2.0.html#av1codecconfigurationbox-syntax.
620 let mut bits = stream.sub_bit_stream(4)?;
621 // unsigned int (1) marker = 1;
622 let marker = bits.read(1)?;
623 if marker != 1 {
624 return Err(AvifError::BmffParseFailed(format!(
625 "Invalid marker ({marker}) in av1C"
626 )));
627 }
628 // unsigned int (7) version = 1;
629 let version = bits.read(7)?;
630 if version != 1 {
631 return Err(AvifError::BmffParseFailed(format!(
632 "Invalid version ({version}) in av1C"
633 )));
634 }
635 let av1C = Av1CodecConfiguration {
636 // unsigned int(3) seq_profile;
637 // unsigned int(5) seq_level_idx_0;
638 seq_profile: bits.read(3)? as u8,
639 seq_level_idx0: bits.read(5)? as u8,
640 // unsigned int(1) seq_tier_0;
641 // unsigned int(1) high_bitdepth;
642 // unsigned int(1) twelve_bit;
643 // unsigned int(1) monochrome;
644 // unsigned int(1) chroma_subsampling_x;
645 // unsigned int(1) chroma_subsampling_y;
646 // unsigned int(2) chroma_sample_position;
647 seq_tier0: bits.read(1)? as u8,
648 high_bitdepth: bits.read_bool()?,
649 twelve_bit: bits.read_bool()?,
650 monochrome: bits.read_bool()?,
651 chroma_subsampling_x: bits.read(1)? as u8,
652 chroma_subsampling_y: bits.read(1)? as u8,
653 chroma_sample_position: bits.read(2)?.into(),
654 raw_data,
655 };
656
657 // unsigned int(3) reserved = 0;
658 if bits.read(3)? != 0 {
659 return Err(AvifError::BmffParseFailed(
660 "Invalid reserved bits in av1C".into(),
661 ));
662 }
663 // unsigned int(1) initial_presentation_delay_present;
664 if bits.read(1)? == 1 {
665 // unsigned int(4) initial_presentation_delay_minus_one;
666 bits.read(4)?;
667 } else {
668 // unsigned int(4) reserved = 0;
669 if bits.read(4)? != 0 {
670 return Err(AvifError::BmffParseFailed(
671 "Invalid reserved bits in av1C".into(),
672 ));
673 }
674 }
675 assert_eq!(bits.remaining_bits()?, 0);
676
677 // https://aomediacodec.github.io/av1-avif/v1.1.0.html#av1-configuration-item-property:
678 // - Sequence Header OBUs should not be present in the AV1CodecConfigurationBox.
679 // This is ignored.
680 // - If a Sequence Header OBU is present in the AV1CodecConfigurationBox, it shall match the
681 // Sequence Header OBU in the AV1 Image Item Data.
682 // This is not enforced.
683 // - The values of the fields in the AV1CodecConfigurationBox shall match those of the
684 // Sequence Header OBU in the AV1 Image Item Data.
685 // This is not enforced (?).
686 // - Metadata OBUs, if present, shall match the values given in other item properties, such as
687 // the PixelInformationProperty or ColourInformationBox.
688 // This is not enforced.
689
690 // unsigned int(8) configOBUs[];
691
692 Ok(ItemProperty::CodecConfiguration(CodecConfiguration::Av1(
693 av1C,
694 )))
695 }
696
697 #[allow(non_snake_case)]
698 #[cfg(feature = "heic")]
parse_hvcC(stream: &mut IStream) -> AvifResult<ItemProperty>699 fn parse_hvcC(stream: &mut IStream) -> AvifResult<ItemProperty> {
700 // unsigned int(8) configurationVersion;
701 let configuration_version = stream.read_u8()?;
702 if configuration_version != 0 && configuration_version != 1 {
703 return Err(AvifError::BmffParseFailed(format!(
704 "Unknown configurationVersion({configuration_version}) in hvcC. Expected 0 or 1."
705 )));
706 }
707 let mut bits = stream.sub_bit_stream(21)?;
708 // unsigned int(2) general_profile_space;
709 // unsigned int(1) general_tier_flag;
710 // unsigned int(5) general_profile_idc;
711 // unsigned int(32) general_profile_compatibility_flags;
712 // unsigned int(48) general_constraint_indicator_flags;
713 // unsigned int(8) general_level_idc;
714 // bit(4) reserved = '1111'b;
715 // unsigned int(12) min_spatial_segmentation_idc;
716 // bit(6) reserved = '111111'b;
717 // unsigned int(2) parallelismType;
718 // bit(6) reserved = '111111'b;
719 // unsigned int(2) chroma_format_idc;
720 // bit(5) reserved = '11111'b;
721 bits.skip(2 + 1 + 5 + 32 + 48 + 8 + 4 + 12 + 6 + 2 + 6 + 2 + 5)?;
722 // unsigned int(3) bit_depth_luma_minus8;
723 let bitdepth = bits.read(3)? as u8 + 8;
724 // bit(5) reserved = '11111'b;
725 // unsigned int(3) bit_depth_chroma_minus8;
726 // unsigned int(16) avgFrameRate;
727 // unsigned int(2) constantFrameRate;
728 // unsigned int(3) numTemporalLayers;
729 // unsigned int(1) temporalIdNested;
730 bits.skip(5 + 3 + 16 + 2 + 3 + 1)?;
731 // unsigned int(2) lengthSizeMinusOne;
732 let nal_length_size = 1 + bits.read(2)? as u8;
733 assert!(bits.remaining_bits()? == 0);
734
735 // unsigned int(8) numOfArrays;
736 let num_of_arrays = stream.read_u8()?;
737 let mut vps: Vec<u8> = Vec::new();
738 let mut sps: Vec<u8> = Vec::new();
739 let mut pps: Vec<u8> = Vec::new();
740 for _i in 0..num_of_arrays {
741 // unsigned int(1) array_completeness;
742 // bit(1) reserved = 0;
743 // unsigned int(6) NAL_unit_type;
744 stream.skip(1)?;
745 // unsigned int(16) numNalus;
746 let num_nalus = stream.read_u16()?;
747 for _j in 0..num_nalus {
748 // unsigned int(16) nalUnitLength;
749 let nal_unit_length = stream.read_u16()?;
750 let nal_unit = stream.get_slice(nal_unit_length as usize)?;
751 let nal_unit_type = (nal_unit[0] >> 1) & 0x3f;
752 match nal_unit_type {
753 32 => vps = nal_unit.to_vec(),
754 33 => sps = nal_unit.to_vec(),
755 34 => pps = nal_unit.to_vec(),
756 _ => {}
757 }
758 }
759 }
760 Ok(ItemProperty::CodecConfiguration(CodecConfiguration::Hevc(
761 HevcCodecConfiguration {
762 bitdepth,
763 nal_length_size,
764 vps,
765 pps,
766 sps,
767 },
768 )))
769 }
770
parse_colr(stream: &mut IStream) -> AvifResult<ItemProperty>771 fn parse_colr(stream: &mut IStream) -> AvifResult<ItemProperty> {
772 // Section 12.1.5.2 of ISO/IEC 14496-12.
773
774 // unsigned int(32) colour_type;
775 let color_type = stream.read_string(4)?;
776 if color_type == "rICC" || color_type == "prof" {
777 if stream.bytes_left()? == 0 {
778 // Section 12.1.5.3 of ISO/IEC 14496-12:
779 // ICC_profile: an ICC profile as defined in ISO 15076-1 or ICC.1 is supplied.
780 // Section 7.2.1 of ICC.1:2010:
781 // The profile header is 128 bytes in length and contains 18 fields.
782 // So an empty ICC profile is invalid.
783 return Err(AvifError::BmffParseFailed(format!(
784 "colr box contains 0 bytes of {color_type}"
785 )));
786 }
787 // ICC_profile; // restricted ("rICC") or unrestricted ("prof") ICC profile
788 return Ok(ItemProperty::ColorInformation(ColorInformation::Icc(
789 stream.get_slice(stream.bytes_left()?)?.to_vec(),
790 )));
791 }
792 if color_type == "nclx" {
793 let mut nclx = Nclx {
794 // unsigned int(16) colour_primaries;
795 color_primaries: stream.read_u16()?.into(),
796 // unsigned int(16) transfer_characteristics;
797 transfer_characteristics: stream.read_u16()?.into(),
798 // unsigned int(16) matrix_coefficients;
799 matrix_coefficients: stream.read_u16()?.into(),
800 ..Nclx::default()
801 };
802 let mut bits = stream.sub_bit_stream(1)?;
803 // unsigned int(1) full_range_flag;
804 nclx.yuv_range = if bits.read_bool()? { YuvRange::Full } else { YuvRange::Limited };
805 // unsigned int(7) reserved = 0;
806 if bits.read(7)? != 0 {
807 return Err(AvifError::BmffParseFailed(
808 "colr box contains invalid reserved bits".into(),
809 ));
810 }
811 return Ok(ItemProperty::ColorInformation(ColorInformation::Nclx(nclx)));
812 }
813 Ok(ItemProperty::ColorInformation(ColorInformation::Unknown))
814 }
815
parse_pasp(stream: &mut IStream) -> AvifResult<ItemProperty>816 fn parse_pasp(stream: &mut IStream) -> AvifResult<ItemProperty> {
817 // Section 12.1.4.2 of ISO/IEC 14496-12.
818 let pasp = PixelAspectRatio {
819 // unsigned int(32) hSpacing;
820 h_spacing: stream.read_u32()?,
821 // unsigned int(32) vSpacing;
822 v_spacing: stream.read_u32()?,
823 };
824 Ok(ItemProperty::PixelAspectRatio(pasp))
825 }
826
827 #[allow(non_snake_case)]
parse_auxC(stream: &mut IStream) -> AvifResult<ItemProperty>828 fn parse_auxC(stream: &mut IStream) -> AvifResult<ItemProperty> {
829 // Section 6.5.8.2 of ISO/IEC 23008-12.
830 let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
831 // string aux_type;
832 let auxiliary_type = stream.read_c_string()?;
833 // template unsigned int(8) aux_subtype[];
834 // until the end of the box, the semantics depend on the aux_type value
835 Ok(ItemProperty::AuxiliaryType(auxiliary_type))
836 }
837
parse_clap(stream: &mut IStream) -> AvifResult<ItemProperty>838 fn parse_clap(stream: &mut IStream) -> AvifResult<ItemProperty> {
839 // Section 12.1.4.2 of ISO/IEC 14496-12.
840 let clap = CleanAperture {
841 // unsigned int(32) cleanApertureWidthN;
842 // unsigned int(32) cleanApertureWidthD;
843 width: stream.read_ufraction()?,
844 // unsigned int(32) cleanApertureHeightN;
845 // unsigned int(32) cleanApertureHeightD;
846 height: stream.read_ufraction()?,
847 // unsigned int(32) horizOffN;
848 // unsigned int(32) horizOffD;
849 horiz_off: stream.read_ufraction()?,
850 // unsigned int(32) vertOffN;
851 // unsigned int(32) vertOffD;
852 vert_off: stream.read_ufraction()?,
853 };
854 Ok(ItemProperty::CleanAperture(clap))
855 }
856
parse_irot(stream: &mut IStream) -> AvifResult<ItemProperty>857 fn parse_irot(stream: &mut IStream) -> AvifResult<ItemProperty> {
858 // Section 6.5.10.2 of ISO/IEC 23008-12.
859 let mut bits = stream.sub_bit_stream(1)?;
860 // unsigned int (6) reserved = 0;
861 if bits.read(6)? != 0 {
862 return Err(AvifError::BmffParseFailed(
863 "invalid reserved bits in irot".into(),
864 ));
865 }
866 // unsigned int (2) angle;
867 let angle = bits.read(2)? as u8;
868 Ok(ItemProperty::ImageRotation(angle))
869 }
870
parse_imir(stream: &mut IStream) -> AvifResult<ItemProperty>871 fn parse_imir(stream: &mut IStream) -> AvifResult<ItemProperty> {
872 // Section 6.5.12.1 of ISO/IEC 23008-12.
873 let mut bits = stream.sub_bit_stream(1)?;
874 // unsigned int(7) reserved = 0;
875 if bits.read(7)? != 0 {
876 return Err(AvifError::BmffParseFailed(
877 "invalid reserved bits in imir".into(),
878 ));
879 }
880 // unsigned int(1) axis;
881 let axis = bits.read(1)? as u8;
882 Ok(ItemProperty::ImageMirror(axis))
883 }
884
parse_a1op(stream: &mut IStream) -> AvifResult<ItemProperty>885 fn parse_a1op(stream: &mut IStream) -> AvifResult<ItemProperty> {
886 // https://aomediacodec.github.io/av1-avif/v1.1.0.html#operating-point-selector-property-syntax
887
888 // unsigned int(8) op_index;
889 let op_index = stream.read_u8()?;
890 if op_index > 31 {
891 // 31 is AV1's maximum operating point value (operating_points_cnt_minus_1).
892 return Err(AvifError::BmffParseFailed(format!(
893 "Invalid op_index ({op_index}) in a1op"
894 )));
895 }
896 Ok(ItemProperty::OperatingPointSelector(op_index))
897 }
898
parse_lsel(stream: &mut IStream) -> AvifResult<ItemProperty>899 fn parse_lsel(stream: &mut IStream) -> AvifResult<ItemProperty> {
900 // Section 6.5.11.1 of ISO/IEC 23008-12.
901
902 // unsigned int(16) layer_id;
903 let layer_id = stream.read_u16()?;
904
905 // https://aomediacodec.github.io/av1-avif/v1.1.0.html#layer-selector-property:
906 // The layer_id indicates the value of the spatial_id to render. The value shall be between 0
907 // and 3, or the special value 0xFFFF.
908 if layer_id != 0xFFFF && layer_id >= 4 {
909 return Err(AvifError::BmffParseFailed(format!(
910 "Invalid layer_id ({layer_id}) in lsel"
911 )));
912 }
913 Ok(ItemProperty::LayerSelector(layer_id))
914 }
915
parse_a1lx(stream: &mut IStream) -> AvifResult<ItemProperty>916 fn parse_a1lx(stream: &mut IStream) -> AvifResult<ItemProperty> {
917 // https://aomediacodec.github.io/av1-avif/v1.1.0.html#layered-image-indexing-property-syntax
918 let mut bits = stream.sub_bit_stream(1)?;
919 // unsigned int(7) reserved = 0;
920 if bits.read(7)? != 0 {
921 return Err(AvifError::BmffParseFailed(
922 "Invalid reserved bits in a1lx".into(),
923 ));
924 }
925 // unsigned int(1) large_size;
926 let large_size = bits.read_bool()?;
927 let mut layer_sizes: [usize; 3] = [0; 3];
928 for layer_size in &mut layer_sizes {
929 if large_size {
930 *layer_size = usize_from_u32(stream.read_u32()?)?;
931 } else {
932 *layer_size = usize_from_u16(stream.read_u16()?)?;
933 }
934 }
935 Ok(ItemProperty::AV1LayeredImageIndexing(layer_sizes))
936 }
937
parse_clli(stream: &mut IStream) -> AvifResult<ItemProperty>938 fn parse_clli(stream: &mut IStream) -> AvifResult<ItemProperty> {
939 // Section 12.1.6.2 of ISO/IEC 14496-12.
940 let clli = ContentLightLevelInformation {
941 // unsigned int(16) max_content_light_level
942 max_cll: stream.read_u16()?,
943 // unsigned int(16) max_pic_average_light_level
944 max_pall: stream.read_u16()?,
945 };
946 Ok(ItemProperty::ContentLightLevelInformation(clli))
947 }
948
parse_ipco(stream: &mut IStream, is_track: bool) -> AvifResult<Vec<ItemProperty>>949 fn parse_ipco(stream: &mut IStream, is_track: bool) -> AvifResult<Vec<ItemProperty>> {
950 // Section 8.11.14.2 of ISO/IEC 14496-12.
951 let mut properties: Vec<ItemProperty> = Vec::new();
952 while stream.has_bytes_left()? {
953 let header = parse_header(stream, /*top_level=*/ false)?;
954 let mut sub_stream = stream.sub_stream(&header.size)?;
955 match header.box_type.as_str() {
956 "ispe" => properties.push(parse_ispe(&mut sub_stream)?),
957 "pixi" => properties.push(parse_pixi(&mut sub_stream)?),
958 "av1C" => properties.push(parse_av1C(&mut sub_stream)?),
959 "colr" => properties.push(parse_colr(&mut sub_stream)?),
960 "pasp" => properties.push(parse_pasp(&mut sub_stream)?),
961 "auxC" if !is_track => properties.push(parse_auxC(&mut sub_stream)?),
962 "auxi" if is_track => properties.push(parse_auxC(&mut sub_stream)?),
963 "clap" => properties.push(parse_clap(&mut sub_stream)?),
964 "irot" => properties.push(parse_irot(&mut sub_stream)?),
965 "imir" => properties.push(parse_imir(&mut sub_stream)?),
966 "a1op" => properties.push(parse_a1op(&mut sub_stream)?),
967 "lsel" => properties.push(parse_lsel(&mut sub_stream)?),
968 "a1lx" => properties.push(parse_a1lx(&mut sub_stream)?),
969 "clli" => properties.push(parse_clli(&mut sub_stream)?),
970 #[cfg(feature = "heic")]
971 "hvcC" => properties.push(parse_hvcC(&mut sub_stream)?),
972 _ => properties.push(ItemProperty::Unknown(header.box_type)),
973 }
974 }
975 Ok(properties)
976 }
977
parse_ipma(stream: &mut IStream) -> AvifResult<Vec<ItemPropertyAssociation>>978 fn parse_ipma(stream: &mut IStream) -> AvifResult<Vec<ItemPropertyAssociation>> {
979 // Section 8.11.14.2 of ISO/IEC 14496-12.
980 let (version, flags) = stream.read_version_and_flags()?;
981 // unsigned int(32) entry_count;
982 let entry_count = stream.read_u32()?;
983 let mut ipma: Vec<ItemPropertyAssociation> = create_vec_exact(usize_from_u32(entry_count)?)?;
984 for _i in 0..entry_count {
985 let mut entry = ItemPropertyAssociation::default();
986 // ISO/IEC 23008-12, First edition, 2017-12, Section 9.3.1:
987 // Each ItemPropertyAssociation box shall be ordered by increasing item_ID, and there
988 // shall be at most one association box for each item_ID, in any
989 // ItemPropertyAssociation box.
990 if version < 1 {
991 // unsigned int(16) item_ID;
992 entry.item_id = stream.read_u16()? as u32;
993 } else {
994 // unsigned int(32) item_ID;
995 entry.item_id = stream.read_u32()?;
996 }
997 if entry.item_id == 0 {
998 return Err(AvifError::BmffParseFailed(format!(
999 "invalid item id ({}) in ipma",
1000 entry.item_id
1001 )));
1002 }
1003 if !ipma.is_empty() {
1004 let previous_item_id = ipma.last().unwrap().item_id;
1005 if entry.item_id <= previous_item_id {
1006 return Err(AvifError::BmffParseFailed(
1007 "ipma item ids are not ordered by increasing id".into(),
1008 ));
1009 }
1010 }
1011 // unsigned int(8) association_count;
1012 let association_count = stream.read_u8()?;
1013 for _j in 0..association_count {
1014 let mut bits = stream.sub_bit_stream(if flags & 0x1 == 1 { 2 } else { 1 })?;
1015 // bit(1) essential;
1016 let essential = bits.read_bool()?;
1017 if flags & 0x1 == 1 {
1018 // unsigned int(15) property_index;
1019 entry.associations.push((bits.read(15)? as u16, essential));
1020 } else {
1021 //unsigned int(7) property_index;
1022 entry.associations.push((bits.read(7)? as u16, essential));
1023 }
1024 }
1025 ipma.push(entry);
1026 }
1027 Ok(ipma)
1028 }
1029
parse_iprp(stream: &mut IStream) -> AvifResult<ItemPropertyBox>1030 fn parse_iprp(stream: &mut IStream) -> AvifResult<ItemPropertyBox> {
1031 // Section 8.11.14.2 of ISO/IEC 14496-12.
1032 let header = parse_header(stream, /*top_level=*/ false)?;
1033 if header.box_type != "ipco" {
1034 return Err(AvifError::BmffParseFailed(
1035 "First box in iprp is not ipco".into(),
1036 ));
1037 }
1038 let mut iprp = ItemPropertyBox::default();
1039 // Parse ipco box.
1040 {
1041 let mut sub_stream = stream.sub_stream(&header.size)?;
1042 iprp.properties = parse_ipco(&mut sub_stream, /*is_track=*/ false)?;
1043 }
1044 // Parse ipma boxes.
1045 while stream.has_bytes_left()? {
1046 let header = parse_header(stream, /*top_level=*/ false)?;
1047 if header.box_type != "ipma" {
1048 return Err(AvifError::BmffParseFailed(
1049 "Found non ipma box in iprp".into(),
1050 ));
1051 }
1052 let mut sub_stream = stream.sub_stream(&header.size)?;
1053 iprp.associations.append(&mut parse_ipma(&mut sub_stream)?);
1054 }
1055 Ok(iprp)
1056 }
1057
parse_infe(stream: &mut IStream) -> AvifResult<ItemInfo>1058 fn parse_infe(stream: &mut IStream) -> AvifResult<ItemInfo> {
1059 // Section 8.11.6.2 of ISO/IEC 14496-12.
1060 let (version, _flags) = stream.read_version_and_flags()?;
1061 if version != 2 && version != 3 {
1062 return Err(AvifError::BmffParseFailed(
1063 "infe box version 2 or 3 expected.".into(),
1064 ));
1065 }
1066
1067 // TODO: check flags. ISO/IEC 23008-12:2017, Section 9.2 says:
1068 // The flags field of ItemInfoEntry with version greater than or equal to 2 is specified
1069 // as follows:
1070 // (flags & 1) equal to 1 indicates that the item is not intended to be a part of the
1071 // presentation. For example, when (flags & 1) is equal to 1 for an image item, the
1072 // image item should not be displayed. (flags & 1) equal to 0 indicates that the item
1073 // is intended to be a part of the presentation.
1074 //
1075 // See also Section 6.4.2.
1076 let mut entry = ItemInfo::default();
1077 if version == 2 {
1078 // unsigned int(16) item_ID;
1079 entry.item_id = stream.read_u16()? as u32;
1080 } else {
1081 // unsigned int(32) item_ID;
1082 entry.item_id = stream.read_u32()?;
1083 }
1084 if entry.item_id == 0 {
1085 return Err(AvifError::BmffParseFailed(format!(
1086 "Invalid item id ({}) found in infe",
1087 entry.item_id
1088 )));
1089 }
1090 // unsigned int(16) item_protection_index;
1091 entry.item_protection_index = stream.read_u16()?;
1092 // unsigned int(32) item_type;
1093 entry.item_type = stream.read_string(4)?;
1094
1095 // utf8string item_name;
1096 entry.item_name = stream.read_c_string()?;
1097
1098 if entry.item_type == "mime" {
1099 // utf8string content_type;
1100 entry.content_type = stream.read_c_string()?;
1101 // utf8string content_encoding; //optional
1102 }
1103 // if (item_type == 'uri ') {
1104 // utf8string item_uri_type;
1105 // }
1106 Ok(entry)
1107 }
1108
parse_iinf(stream: &mut IStream) -> AvifResult<Vec<ItemInfo>>1109 fn parse_iinf(stream: &mut IStream) -> AvifResult<Vec<ItemInfo>> {
1110 // Section 8.11.6.2 of ISO/IEC 14496-12.
1111 let (version, _flags) = stream.read_version_and_flags()?;
1112 if version > 1 {
1113 return Err(AvifError::BmffParseFailed(format!(
1114 "Unsupported version {} in iinf box",
1115 version
1116 )));
1117 }
1118 let entry_count: u32 = if version == 0 {
1119 // unsigned int(16) entry_count;
1120 stream.read_u16()? as u32
1121 } else {
1122 // unsigned int(32) entry_count;
1123 stream.read_u32()?
1124 };
1125 let mut iinf: Vec<ItemInfo> = create_vec_exact(usize_from_u32(entry_count)?)?;
1126 for _i in 0..entry_count {
1127 let header = parse_header(stream, /*top_level=*/ false)?;
1128 if header.box_type != "infe" {
1129 return Err(AvifError::BmffParseFailed(
1130 "Found non infe box in iinf".into(),
1131 ));
1132 }
1133 let mut sub_stream = stream.sub_stream(&header.size)?;
1134 iinf.push(parse_infe(&mut sub_stream)?);
1135 }
1136 Ok(iinf)
1137 }
1138
parse_iref(stream: &mut IStream) -> AvifResult<Vec<ItemReference>>1139 fn parse_iref(stream: &mut IStream) -> AvifResult<Vec<ItemReference>> {
1140 // Section 8.11.12.2 of ISO/IEC 14496-12.
1141 let (version, _flags) = stream.read_version_and_flags()?;
1142 let mut iref: Vec<ItemReference> = Vec::new();
1143 // versions > 1 are not supported. ignore them.
1144 if version > 1 {
1145 return Ok(iref);
1146 }
1147 while stream.has_bytes_left()? {
1148 let header = parse_header(stream, /*top_level=*/ false)?;
1149 let from_item_id: u32 = if version == 0 {
1150 // unsigned int(16) from_item_ID;
1151 stream.read_u16()? as u32
1152 } else {
1153 // unsigned int(32) from_item_ID;
1154 stream.read_u32()?
1155 };
1156 if from_item_id == 0 {
1157 return Err(AvifError::BmffParseFailed(
1158 "invalid from_item_id (0) in iref".into(),
1159 ));
1160 }
1161 // unsigned int(16) reference_count;
1162 let reference_count = stream.read_u16()?;
1163 for index in 0..reference_count {
1164 let to_item_id: u32 = if version == 0 {
1165 // unsigned int(16) to_item_ID;
1166 stream.read_u16()? as u32
1167 } else {
1168 // unsigned int(32) to_item_ID;
1169 stream.read_u32()?
1170 };
1171 if to_item_id == 0 {
1172 return Err(AvifError::BmffParseFailed(
1173 "invalid to_item_id (0) in iref".into(),
1174 ));
1175 }
1176 iref.push(ItemReference {
1177 from_item_id,
1178 to_item_id,
1179 reference_type: header.box_type.clone(),
1180 index: index as u32,
1181 });
1182 }
1183 }
1184 Ok(iref)
1185 }
1186
parse_idat(stream: &mut IStream) -> AvifResult<Vec<u8>>1187 fn parse_idat(stream: &mut IStream) -> AvifResult<Vec<u8>> {
1188 // Section 8.11.11.2 of ISO/IEC 14496-12.
1189 if !stream.has_bytes_left()? {
1190 return Err(AvifError::BmffParseFailed("Invalid idat size (0)".into()));
1191 }
1192 let mut idat: Vec<u8> = Vec::with_capacity(stream.bytes_left()?);
1193 idat.extend_from_slice(stream.get_slice(stream.bytes_left()?)?);
1194 Ok(idat)
1195 }
1196
parse_meta(stream: &mut IStream) -> AvifResult<MetaBox>1197 fn parse_meta(stream: &mut IStream) -> AvifResult<MetaBox> {
1198 // Section 8.11.1.2 of ISO/IEC 14496-12.
1199 let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1200 let mut meta = MetaBox::default();
1201
1202 // Parse the first hdlr box.
1203 {
1204 let header = parse_header(stream, /*top_level=*/ false)?;
1205 if header.box_type != "hdlr" {
1206 return Err(AvifError::BmffParseFailed(
1207 "first box in meta is not hdlr".into(),
1208 ));
1209 }
1210 let handler_type = parse_hdlr(&mut stream.sub_stream(&header.size)?)?;
1211 if handler_type != "pict" {
1212 // Section 6.2 of ISO/IEC 23008-12:
1213 // The handler type for the MetaBox shall be 'pict'.
1214 // https://aomediacodec.github.io/av1-avif/v1.1.0.html#image-sequences does not apply
1215 // because this function is only called for the MetaBox but it would work too:
1216 // The track handler for an AV1 Image Sequence shall be pict.
1217 return Err(AvifError::BmffParseFailed(
1218 "Box[hdlr] handler_type is not 'pict'".into(),
1219 ));
1220 }
1221 }
1222
1223 let mut boxes_seen: HashSet<String> = HashSet::with_hasher(NonRandomHasherState);
1224 boxes_seen.insert(String::from("hdlr"));
1225 while stream.has_bytes_left()? {
1226 let header = parse_header(stream, /*top_level=*/ false)?;
1227 match header.box_type.as_str() {
1228 "hdlr" | "iloc" | "pitm" | "iprp" | "iinf" | "iref" | "idat" => {
1229 if boxes_seen.contains(&header.box_type) {
1230 return Err(AvifError::BmffParseFailed(format!(
1231 "duplicate {} box in meta.",
1232 header.box_type
1233 )));
1234 }
1235 boxes_seen.insert(header.box_type.clone());
1236 }
1237 _ => {}
1238 }
1239 let mut sub_stream = stream.sub_stream(&header.size)?;
1240 match header.box_type.as_str() {
1241 "iloc" => meta.iloc = parse_iloc(&mut sub_stream)?,
1242 "pitm" => meta.primary_item_id = parse_pitm(&mut sub_stream)?,
1243 "iprp" => meta.iprp = parse_iprp(&mut sub_stream)?,
1244 "iinf" => meta.iinf = parse_iinf(&mut sub_stream)?,
1245 "iref" => meta.iref = parse_iref(&mut sub_stream)?,
1246 "idat" => meta.idat = parse_idat(&mut sub_stream)?,
1247 _ => {}
1248 }
1249 }
1250 Ok(meta)
1251 }
1252
parse_tkhd(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1253 fn parse_tkhd(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1254 // Section 8.3.2.2 of ISO/IEC 14496-12.
1255 let (version, _flags) = stream.read_version_and_flags()?;
1256 if version == 1 {
1257 // unsigned int(64) creation_time;
1258 stream.skip_u64()?;
1259 // unsigned int(64) modification_time;
1260 stream.skip_u64()?;
1261 // unsigned int(32) track_ID;
1262 track.id = stream.read_u32()?;
1263 // const unsigned int(32) reserved = 0;
1264 if stream.read_u32()? != 0 {
1265 return Err(AvifError::BmffParseFailed(
1266 "Invalid reserved bits in tkhd".into(),
1267 ));
1268 }
1269 // unsigned int(64) duration;
1270 track.track_duration = stream.read_u64()?;
1271 } else if version == 0 {
1272 // unsigned int(32) creation_time;
1273 stream.skip_u32()?;
1274 // unsigned int(32) modification_time;
1275 stream.skip_u32()?;
1276 // unsigned int(32) track_ID;
1277 track.id = stream.read_u32()?;
1278 // const unsigned int(32) reserved = 0;
1279 if stream.read_u32()? != 0 {
1280 return Err(AvifError::BmffParseFailed(
1281 "Invalid reserved bits in tkhd".into(),
1282 ));
1283 }
1284 // unsigned int(32) duration;
1285 track.track_duration = stream.read_u32()? as u64;
1286 } else {
1287 return Err(AvifError::BmffParseFailed(format!(
1288 "unsupported version ({version}) in trak"
1289 )));
1290 }
1291
1292 // const unsigned int(32)[2] reserved = 0;
1293 if stream.read_u32()? != 0 || stream.read_u32()? != 0 {
1294 return Err(AvifError::BmffParseFailed(
1295 "Invalid reserved bits in tkhd".into(),
1296 ));
1297 }
1298 // The following fields should be 0 but are ignored instead.
1299 // template int(16) layer = 0;
1300 stream.skip(2)?;
1301 // template int(16) alternate_group = 0;
1302 stream.skip(2)?;
1303 // template int(16) volume = {if track_is_audio 0x0100 else 0};
1304 stream.skip(2)?;
1305 // const unsigned int(16) reserved = 0;
1306 if stream.read_u16()? != 0 {
1307 return Err(AvifError::BmffParseFailed(
1308 "Invalid reserved bits in tkhd".into(),
1309 ));
1310 }
1311 // template int(32)[9] matrix= { 0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 }; // unity matrix
1312 stream.skip(4 * 9)?;
1313
1314 // unsigned int(32) width;
1315 track.width = stream.read_u32()? >> 16;
1316 // unsigned int(32) height;
1317 track.height = stream.read_u32()? >> 16;
1318
1319 Ok(())
1320 }
1321
parse_mdhd(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1322 fn parse_mdhd(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1323 // Section 8.4.2.2 of ISO/IEC 14496-12.
1324 let (version, _flags) = stream.read_version_and_flags()?;
1325 if version == 1 {
1326 // unsigned int(64) creation_time;
1327 stream.skip_u64()?;
1328 // unsigned int(64) modification_time;
1329 stream.skip_u64()?;
1330 // unsigned int(32) timescale;
1331 track.media_timescale = stream.read_u32()?;
1332 // unsigned int(64) duration;
1333 track.media_duration = stream.read_u64()?;
1334 } else if version == 0 {
1335 // unsigned int(32) creation_time;
1336 stream.skip_u32()?;
1337 // unsigned int(32) modification_time;
1338 stream.skip_u32()?;
1339 // unsigned int(32) timescale;
1340 track.media_timescale = stream.read_u32()?;
1341 // unsigned int(32) duration;
1342 track.media_duration = stream.read_u32()? as u64;
1343 } else {
1344 return Err(AvifError::BmffParseFailed(format!(
1345 "unsupported version ({version}) in mdhd"
1346 )));
1347 }
1348
1349 let mut bits = stream.sub_bit_stream(4)?;
1350 // bit(1) pad = 0;
1351 if bits.read(1)? != 0 {
1352 return Err(AvifError::BmffParseFailed(
1353 "Invalid reserved bits in mdhd".into(),
1354 ));
1355 }
1356 // unsigned int(5)[3] language; // ISO-639-2/T language code
1357 bits.skip(5 * 3)?;
1358 // unsigned int(16) pre_defined = 0; ("Readers should expect any value")
1359 bits.skip(2)?;
1360 Ok(())
1361 }
1362
parse_stco( stream: &mut IStream, sample_table: &mut SampleTable, large_offset: bool, ) -> AvifResult<()>1363 fn parse_stco(
1364 stream: &mut IStream,
1365 sample_table: &mut SampleTable,
1366 large_offset: bool,
1367 ) -> AvifResult<()> {
1368 // Section 8.7.5.2 of ISO/IEC 14496-12.
1369 let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1370 // unsigned int(32) entry_count;
1371 let entry_count = usize_from_u32(stream.read_u32()?)?;
1372 sample_table.chunk_offsets = create_vec_exact(entry_count)?;
1373 for _ in 0..entry_count {
1374 let chunk_offset: u64 = if large_offset {
1375 // unsigned int(64) chunk_offset;
1376 stream.read_u64()?
1377 } else {
1378 // unsigned int(32) chunk_offset;
1379 stream.read_u32()? as u64
1380 };
1381 sample_table.chunk_offsets.push(chunk_offset);
1382 }
1383 Ok(())
1384 }
1385
parse_stsc(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()>1386 fn parse_stsc(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()> {
1387 // Section 8.7.4.2 of ISO/IEC 14496-12.
1388 let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1389 // unsigned int(32) entry_count;
1390 let entry_count = usize_from_u32(stream.read_u32()?)?;
1391 sample_table.sample_to_chunk = create_vec_exact(entry_count)?;
1392 for i in 0..entry_count {
1393 let stsc = SampleToChunk {
1394 // unsigned int(32) first_chunk;
1395 first_chunk: stream.read_u32()?,
1396 // unsigned int(32) samples_per_chunk;
1397 samples_per_chunk: stream.read_u32()?,
1398 // unsigned int(32) sample_description_index;
1399 sample_description_index: stream.read_u32()?,
1400 };
1401 if i == 0 {
1402 if stsc.first_chunk != 1 {
1403 return Err(AvifError::BmffParseFailed(
1404 "stsc does not begin with chunk 1.".into(),
1405 ));
1406 }
1407 } else if stsc.first_chunk <= sample_table.sample_to_chunk.last().unwrap().first_chunk {
1408 return Err(AvifError::BmffParseFailed(
1409 "stsc chunks are not strictly increasing.".into(),
1410 ));
1411 }
1412 if stsc.sample_description_index == 0 {
1413 return Err(AvifError::BmffParseFailed(format!(
1414 "sample_description_index is {} in stsc chunk.",
1415 stsc.sample_description_index
1416 )));
1417 }
1418 sample_table.sample_to_chunk.push(stsc);
1419 }
1420 Ok(())
1421 }
1422
parse_stsz(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()>1423 fn parse_stsz(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()> {
1424 // Section 8.7.3.2.1 of ISO/IEC 14496-12.
1425 let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1426 // unsigned int(32) sample_size;
1427 let sample_size = stream.read_u32()?;
1428 // unsigned int(32) sample_count;
1429 let sample_count = usize_from_u32(stream.read_u32()?)?;
1430
1431 if sample_size > 0 {
1432 sample_table.sample_size = SampleSize::FixedSize(sample_size);
1433 return Ok(());
1434 }
1435 let mut sample_sizes: Vec<u32> = create_vec_exact(sample_count)?;
1436 for _ in 0..sample_count {
1437 // unsigned int(32) entry_size;
1438 sample_sizes.push(stream.read_u32()?);
1439 }
1440 sample_table.sample_size = SampleSize::Sizes(sample_sizes);
1441 Ok(())
1442 }
1443
parse_stss(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()>1444 fn parse_stss(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()> {
1445 // Section 8.6.2.2 of ISO/IEC 14496-12.
1446 let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1447 // unsigned int(32) entry_count;
1448 let entry_count = usize_from_u32(stream.read_u32()?)?;
1449 sample_table.sync_samples = create_vec_exact(entry_count)?;
1450 for _ in 0..entry_count {
1451 // unsigned int(32) sample_number;
1452 sample_table.sync_samples.push(stream.read_u32()?);
1453 }
1454 Ok(())
1455 }
1456
parse_stts(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()>1457 fn parse_stts(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()> {
1458 // Section 8.6.1.2.2 of ISO/IEC 14496-12.
1459 let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1460 // unsigned int(32) entry_count;
1461 let entry_count = usize_from_u32(stream.read_u32()?)?;
1462 sample_table.time_to_sample = create_vec_exact(entry_count)?;
1463 for _ in 0..entry_count {
1464 let stts = TimeToSample {
1465 // unsigned int(32) sample_count;
1466 sample_count: stream.read_u32()?,
1467 // unsigned int(32) sample_delta;
1468 sample_delta: stream.read_u32()?,
1469 };
1470 sample_table.time_to_sample.push(stts);
1471 }
1472 Ok(())
1473 }
1474
parse_sample_entry(stream: &mut IStream, format: String) -> AvifResult<SampleDescription>1475 fn parse_sample_entry(stream: &mut IStream, format: String) -> AvifResult<SampleDescription> {
1476 // Section 8.5.2.2 of ISO/IEC 14496-12.
1477 let mut sample_entry = SampleDescription {
1478 format,
1479 ..SampleDescription::default()
1480 };
1481 // const unsigned int(8) reserved[6] = 0;
1482 if stream.read_u8()? != 0
1483 || stream.read_u8()? != 0
1484 || stream.read_u8()? != 0
1485 || stream.read_u8()? != 0
1486 || stream.read_u8()? != 0
1487 || stream.read_u8()? != 0
1488 {
1489 return Err(AvifError::BmffParseFailed(
1490 "Invalid reserved bits in SampleEntry of stsd".into(),
1491 ));
1492 }
1493 // unsigned int(16) data_reference_index;
1494 stream.skip(2)?;
1495
1496 if sample_entry.is_supported_format() {
1497 // https://aomediacodec.github.io/av1-isobmff/v1.2.0.html#av1sampleentry-syntax:
1498 // class AV1SampleEntry extends VisualSampleEntry('av01'){
1499 // AV1CodecConfigurationBox config;
1500 // }
1501 // https://aomediacodec.github.io/av1-isobmff/v1.2.0.html#av1codecconfigurationbox-syntax:
1502 // class AV1CodecConfigurationBox extends Box('av1C'){
1503 // AV1CodecConfigurationRecord av1Config;
1504 // }
1505
1506 // Section 12.1.3.2 of ISO/IEC 14496-12:
1507 // class VisualSampleEntry(codingname) extends SampleEntry(codingname)
1508
1509 // unsigned int(16) pre_defined = 0; ("Readers should expect any value")
1510 stream.skip(2)?;
1511 // const unsigned int(16) reserved = 0;
1512 if stream.read_u16()? != 0 {
1513 return Err(AvifError::BmffParseFailed(
1514 "Invalid reserved bits in VisualSampleEntry of stsd".into(),
1515 ));
1516 }
1517 // unsigned int(32) pre_defined[3] = 0;
1518 stream.skip(4 * 3)?;
1519 // unsigned int(16) width;
1520 stream.skip(2)?;
1521 // unsigned int(16) height;
1522 stream.skip(2)?;
1523 // template unsigned int(32) horizresolution = 0x00480000; // 72 dpi
1524 stream.skip_u32()?;
1525 // template unsigned int(32) vertresolution = 0x00480000; // 72 dpi
1526 stream.skip_u32()?;
1527 // const unsigned int(32) reserved = 0;
1528 if stream.read_u32()? != 0 {
1529 return Err(AvifError::BmffParseFailed(
1530 "Invalid reserved bits in VisualSampleEntry of stsd".into(),
1531 ));
1532 }
1533 // template unsigned int(16) frame_count;
1534 stream.skip(2)?;
1535 // uint(8) compressorname[32];
1536 stream.skip(32)?;
1537 // template unsigned int(16) depth = 0x0018;
1538 if stream.read_u16()? != 0x0018 {
1539 return Err(AvifError::BmffParseFailed(
1540 "Invalid depth in VisualSampleEntry of stsd".into(),
1541 ));
1542 }
1543 // unsigned int(16) pre_defined = 0; ("Readers should expect any value")
1544 stream.skip(2)?;
1545
1546 // other boxes from derived specifications
1547 // CleanApertureBox clap; // optional
1548 // PixelAspectRatioBox pasp; // optional
1549
1550 // Now read any of 'av1C', 'clap', 'pasp' etc.
1551 sample_entry.properties = parse_ipco(
1552 &mut stream.sub_stream(&BoxSize::UntilEndOfStream)?,
1553 /*is_track=*/ true,
1554 )?;
1555
1556 if !sample_entry
1557 .properties
1558 .iter()
1559 .any(|p| matches!(p, ItemProperty::CodecConfiguration(_)))
1560 {
1561 return Err(AvifError::BmffParseFailed(
1562 "AV1SampleEntry must contain an AV1CodecConfigurationRecord".into(),
1563 ));
1564 }
1565 }
1566 Ok(sample_entry)
1567 }
1568
parse_stsd(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()>1569 fn parse_stsd(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()> {
1570 // Section 8.5.2.2 of ISO/IEC 14496-12.
1571 let (version, _flags) = stream.read_version_and_flags()?;
1572 if version != 0 && version != 1 {
1573 // Section 8.5.2.3 of ISO/IEC 14496-12:
1574 // version is set to zero. A version number of 1 shall be treated as a version of 0.
1575 return Err(AvifError::BmffParseFailed(
1576 "stsd box version 0 or 1 expected.".into(),
1577 ));
1578 }
1579 // unsigned int(32) entry_count;
1580 let entry_count = usize_from_u32(stream.read_u32()?)?;
1581 sample_table.sample_descriptions = create_vec_exact(entry_count)?;
1582 for _ in 0..entry_count {
1583 // aligned(8) abstract class SampleEntry (unsigned int(32) format) extends Box(format)
1584 let header = parse_header(stream, /*top_level=*/ false)?;
1585 let sample_entry =
1586 parse_sample_entry(&mut stream.sub_stream(&header.size)?, header.box_type)?;
1587 sample_table.sample_descriptions.push(sample_entry);
1588 }
1589 Ok(())
1590 }
1591
parse_stbl(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1592 fn parse_stbl(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1593 // Section 8.5.1.2 of ISO/IEC 14496-12.
1594 if track.sample_table.is_some() {
1595 return Err(AvifError::BmffParseFailed(
1596 "duplicate stbl for track.".into(),
1597 ));
1598 }
1599 let mut sample_table = SampleTable::default();
1600 let mut boxes_seen: HashSet<String> = HashSet::with_hasher(NonRandomHasherState);
1601 while stream.has_bytes_left()? {
1602 let header = parse_header(stream, /*top_level=*/ false)?;
1603 if boxes_seen.contains(&header.box_type) {
1604 return Err(AvifError::BmffParseFailed(format!(
1605 "duplicate box in stbl: {}",
1606 header.box_type
1607 )));
1608 }
1609 let mut skipped_box = false;
1610 let mut sub_stream = stream.sub_stream(&header.size)?;
1611 match header.box_type.as_str() {
1612 "stco" => {
1613 if boxes_seen.contains("co64") {
1614 return Err(AvifError::BmffParseFailed(
1615 "exactly one of co64 or stco is allowed in stbl".into(),
1616 ));
1617 }
1618 parse_stco(&mut sub_stream, &mut sample_table, false)?;
1619 }
1620 "co64" => {
1621 if boxes_seen.contains("stco") {
1622 return Err(AvifError::BmffParseFailed(
1623 "exactly one of co64 or stco is allowed in stbl".into(),
1624 ));
1625 }
1626 parse_stco(&mut sub_stream, &mut sample_table, true)?;
1627 }
1628 "stsc" => parse_stsc(&mut sub_stream, &mut sample_table)?,
1629 "stsz" => parse_stsz(&mut sub_stream, &mut sample_table)?,
1630 "stss" => parse_stss(&mut sub_stream, &mut sample_table)?,
1631 "stts" => parse_stts(&mut sub_stream, &mut sample_table)?,
1632 "stsd" => parse_stsd(&mut sub_stream, &mut sample_table)?,
1633 _ => skipped_box = true,
1634 }
1635 // For boxes that are skipped, we do not need to validate if they occur exactly once or
1636 // not.
1637 if !skipped_box {
1638 boxes_seen.insert(header.box_type.clone());
1639 }
1640 }
1641 track.sample_table = Some(sample_table);
1642 Ok(())
1643 }
1644
parse_minf(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1645 fn parse_minf(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1646 // Section 8.4.4.2 of ISO/IEC 14496-12.
1647 while stream.has_bytes_left()? {
1648 let header = parse_header(stream, /*top_level=*/ false)?;
1649 let mut sub_stream = stream.sub_stream(&header.size)?;
1650 if header.box_type == "stbl" {
1651 parse_stbl(&mut sub_stream, track)?;
1652 }
1653 }
1654 Ok(())
1655 }
1656
parse_mdia(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1657 fn parse_mdia(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1658 // Section 8.4.1.2 of ISO/IEC 14496-12.
1659 while stream.has_bytes_left()? {
1660 let header = parse_header(stream, /*top_level=*/ false)?;
1661 let mut sub_stream = stream.sub_stream(&header.size)?;
1662 match header.box_type.as_str() {
1663 "mdhd" => parse_mdhd(&mut sub_stream, track)?,
1664 "minf" => parse_minf(&mut sub_stream, track)?,
1665 "hdlr" => track.handler_type = parse_hdlr(&mut sub_stream)?,
1666 _ => {}
1667 }
1668 }
1669 Ok(())
1670 }
1671
parse_tref(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1672 fn parse_tref(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1673 // Section 8.3.3.2 of ISO/IEC 14496-12.
1674
1675 // TrackReferenceTypeBox [];
1676 while stream.has_bytes_left()? {
1677 // aligned(8) class TrackReferenceTypeBox (reference_type) extends Box(reference_type)
1678 let header = parse_header(stream, /*top_level=*/ false)?;
1679 let mut sub_stream = stream.sub_stream(&header.size)?;
1680 match header.box_type.as_str() {
1681 "auxl" => {
1682 // unsigned int(32) track_IDs[];
1683 // Use only the first one and skip the rest.
1684 track.aux_for_id = Some(sub_stream.read_u32()?);
1685 }
1686 "prem" => {
1687 // unsigned int(32) track_IDs[];
1688 // Use only the first one and skip the rest.
1689 track.prem_by_id = Some(sub_stream.read_u32()?);
1690 }
1691 _ => {}
1692 }
1693 }
1694 Ok(())
1695 }
1696
parse_elst(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1697 fn parse_elst(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1698 if track.elst_seen {
1699 return Err(AvifError::BmffParseFailed(
1700 "more than one elst box was found for track".into(),
1701 ));
1702 }
1703 track.elst_seen = true;
1704
1705 // Section 8.6.6.2 of ISO/IEC 14496-12.
1706 let (version, flags) = stream.read_version_and_flags()?;
1707
1708 // Section 8.6.6.3 of ISO/IEC 14496-12:
1709 // flags - the following values are defined. The values of flags greater than 1 are reserved
1710 // RepeatEdits 1
1711 if (flags & 1) == 0 {
1712 // The only EditList feature that we support is repetition count for animated images. So in
1713 // this case, we know that the repetition count is zero and we do not care about the rest
1714 // of this box.
1715 track.is_repeating = false;
1716 return Ok(());
1717 }
1718 track.is_repeating = true;
1719
1720 // unsigned int(32) entry_count;
1721 let entry_count = stream.read_u32()?;
1722 if entry_count != 1 {
1723 return Err(AvifError::BmffParseFailed(format!(
1724 "elst has entry_count ({entry_count}) != 1"
1725 )));
1726 }
1727
1728 if version == 1 {
1729 // unsigned int(64) segment_duration;
1730 track.segment_duration = stream.read_u64()?;
1731 // int(64) media_time;
1732 stream.skip(8)?;
1733 } else if version == 0 {
1734 // unsigned int(32) segment_duration;
1735 track.segment_duration = stream.read_u32()? as u64;
1736 // int(32) media_time;
1737 stream.skip(4)?;
1738 } else {
1739 return Err(AvifError::BmffParseFailed(
1740 "unsupported version in elst".into(),
1741 ));
1742 }
1743 // int(16) media_rate_integer;
1744 stream.skip(2)?;
1745 // int(16) media_rate_fraction;
1746 stream.skip(2)?;
1747
1748 if track.segment_duration == 0 {
1749 return Err(AvifError::BmffParseFailed(
1750 "invalid value for segment_duration (0)".into(),
1751 ));
1752 }
1753 Ok(())
1754 }
1755
parse_edts(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1756 fn parse_edts(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1757 if track.elst_seen {
1758 // This function always exits with track.elst_seen set to true. So it is sufficient to
1759 // check track.elst_seen to verify the uniqueness of the edts box.
1760 return Err(AvifError::BmffParseFailed(
1761 "multiple edts boxes found for track.".into(),
1762 ));
1763 }
1764
1765 // Section 8.6.5.2 of ISO/IEC 14496-12.
1766 while stream.has_bytes_left()? {
1767 let header = parse_header(stream, /*top_level=*/ false)?;
1768 let mut sub_stream = stream.sub_stream(&header.size)?;
1769 if header.box_type == "elst" {
1770 parse_elst(&mut sub_stream, track)?;
1771 }
1772 }
1773
1774 if !track.elst_seen {
1775 return Err(AvifError::BmffParseFailed(
1776 "elst box was not found in edts".into(),
1777 ));
1778 }
1779 Ok(())
1780 }
1781
parse_trak(stream: &mut IStream) -> AvifResult<Track>1782 fn parse_trak(stream: &mut IStream) -> AvifResult<Track> {
1783 let mut track = Track::default();
1784 let mut tkhd_seen = false;
1785 // Section 8.3.1.2 of ISO/IEC 14496-12.
1786 while stream.has_bytes_left()? {
1787 let header = parse_header(stream, /*top_level=*/ false)?;
1788 let mut sub_stream = stream.sub_stream(&header.size)?;
1789 match header.box_type.as_str() {
1790 "tkhd" => {
1791 if tkhd_seen {
1792 return Err(AvifError::BmffParseFailed(
1793 "trak box contains multiple tkhd boxes".into(),
1794 ));
1795 }
1796 parse_tkhd(&mut sub_stream, &mut track)?;
1797 tkhd_seen = true;
1798 }
1799 "mdia" => parse_mdia(&mut sub_stream, &mut track)?,
1800 "tref" => parse_tref(&mut sub_stream, &mut track)?,
1801 "edts" => parse_edts(&mut sub_stream, &mut track)?,
1802 "meta" => track.meta = Some(parse_meta(&mut sub_stream)?),
1803 _ => {}
1804 }
1805 }
1806 if !tkhd_seen {
1807 return Err(AvifError::BmffParseFailed(
1808 "trak box did not contain a tkhd box".into(),
1809 ));
1810 }
1811 Ok(track)
1812 }
1813
parse_moov(stream: &mut IStream) -> AvifResult<Vec<Track>>1814 fn parse_moov(stream: &mut IStream) -> AvifResult<Vec<Track>> {
1815 let mut tracks: Vec<Track> = Vec::new();
1816 // Section 8.2.1.2 of ISO/IEC 14496-12.
1817 while stream.has_bytes_left()? {
1818 let header = parse_header(stream, /*top_level=*/ false)?;
1819 let mut sub_stream = stream.sub_stream(&header.size)?;
1820 if header.box_type == "trak" {
1821 let track = parse_trak(&mut sub_stream)?;
1822 if track.is_video_handler() && (track.width == 0 || track.height == 0) {
1823 return Err(AvifError::BmffParseFailed(
1824 "invalid track dimensions".into(),
1825 ));
1826 }
1827 tracks.push(track);
1828 }
1829 }
1830 if tracks.is_empty() {
1831 return Err(AvifError::BmffParseFailed(
1832 "moov box does not contain any tracks".into(),
1833 ));
1834 }
1835 Ok(tracks)
1836 }
1837
parse(io: &mut GenericIO) -> AvifResult<AvifBoxes>1838 pub(crate) fn parse(io: &mut GenericIO) -> AvifResult<AvifBoxes> {
1839 let mut ftyp: Option<FileTypeBox> = None;
1840 let mut meta: Option<MetaBox> = None;
1841 let mut tracks: Option<Vec<Track>> = None;
1842 let mut parse_offset: u64 = 0;
1843 loop {
1844 // Read just enough to get the longest possible valid box header (4+4+8+16 bytes).
1845 let header_data = io.read(parse_offset, 32)?;
1846 if header_data.is_empty() {
1847 // No error and size is 0. We have reached the end of the stream.
1848 break;
1849 }
1850 let mut header_stream = IStream::create(header_data);
1851 let header = parse_header(&mut header_stream, /*top_level=*/ true)?;
1852 parse_offset = parse_offset
1853 .checked_add(header_stream.offset as u64)
1854 .ok_or(AvifError::BmffParseFailed("invalid parse offset".into()))?;
1855
1856 // Read the rest of the box if necessary.
1857 match header.box_type.as_str() {
1858 "ftyp" | "meta" | "moov" => {
1859 if ftyp.is_none() && header.box_type != "ftyp" {
1860 // Section 6.3.4 of ISO/IEC 14496-12:
1861 // The FileTypeBox shall occur before any variable-length box. Only a
1862 // fixed-size box such as a file signature, if required, may precede it.
1863 return Err(AvifError::BmffParseFailed(format!(
1864 "expected ftyp box. found {}.",
1865 header.box_type,
1866 )));
1867 }
1868 let box_data = match header.size {
1869 BoxSize::UntilEndOfStream => io.read(parse_offset, usize::MAX)?,
1870 BoxSize::FixedSize(size) => io.read_exact(parse_offset, size)?,
1871 };
1872 let mut box_stream = IStream::create(box_data);
1873 match header.box_type.as_str() {
1874 "ftyp" => {
1875 ftyp = Some(parse_ftyp(&mut box_stream)?);
1876 if !ftyp.unwrap_ref().is_avif() {
1877 return Err(AvifError::InvalidFtyp);
1878 }
1879 }
1880 "meta" => meta = Some(parse_meta(&mut box_stream)?),
1881 "moov" => tracks = Some(parse_moov(&mut box_stream)?),
1882 _ => {} // Not reached.
1883 }
1884 if ftyp.is_some() {
1885 let ftyp = ftyp.unwrap_ref();
1886 if (!ftyp.needs_meta() || meta.is_some())
1887 && (!ftyp.needs_moov() || tracks.is_some())
1888 {
1889 // Enough information has been parsed to consider parse a success.
1890 break;
1891 }
1892 }
1893 }
1894 _ => {}
1895 }
1896 if header.size == BoxSize::UntilEndOfStream {
1897 // There is no other box after this one because it goes till the end of the stream.
1898 break;
1899 }
1900 parse_offset = parse_offset
1901 .checked_add(header.size() as u64)
1902 .ok_or(AvifError::BmffParseFailed("invalid parse offset".into()))?;
1903 }
1904 if ftyp.is_none() {
1905 return Err(AvifError::InvalidFtyp);
1906 }
1907 let ftyp = ftyp.unwrap();
1908 if (ftyp.needs_meta() && meta.is_none()) || (ftyp.needs_moov() && tracks.is_none()) {
1909 return Err(AvifError::TruncatedData);
1910 }
1911 Ok(AvifBoxes {
1912 ftyp,
1913 meta: meta.unwrap_or_default(),
1914 tracks: tracks.unwrap_or_default(),
1915 })
1916 }
1917
peek_compatible_file_type(data: &[u8]) -> AvifResult<bool>1918 pub(crate) fn peek_compatible_file_type(data: &[u8]) -> AvifResult<bool> {
1919 let mut stream = IStream::create(data);
1920 let header = parse_header(&mut stream, /*top_level=*/ true)?;
1921 if header.box_type != "ftyp" {
1922 // Section 6.3.4 of ISO/IEC 14496-12:
1923 // The FileTypeBox shall occur before any variable-length box.
1924 // Only a fixed-size box such as a file signature, if required, may precede it.
1925 return Ok(false);
1926 }
1927 let header_size = match header.size {
1928 BoxSize::FixedSize(size) => size,
1929 // The 'ftyp' box goes on till the end of the file. Either there is no brand requiring
1930 // anything in the file but a FileTypebox (so not AVIF), or it is invalid.
1931 BoxSize::UntilEndOfStream => return Ok(false),
1932 };
1933 let ftyp = if header_size > stream.bytes_left()? {
1934 let mut header_stream = stream.sub_stream(&BoxSize::FixedSize(stream.bytes_left()?))?;
1935 parse_truncated_ftyp(&mut header_stream)
1936 } else {
1937 let mut header_stream = stream.sub_stream(&header.size)?;
1938 parse_ftyp(&mut header_stream)?
1939 };
1940 Ok(ftyp.is_avif())
1941 }
1942
parse_tmap(stream: &mut IStream) -> AvifResult<Option<GainMapMetadata>>1943 pub(crate) fn parse_tmap(stream: &mut IStream) -> AvifResult<Option<GainMapMetadata>> {
1944 // Experimental, not yet specified.
1945
1946 // unsigned int(8) version = 0;
1947 let version = stream.read_u8()?;
1948 if version != 0 {
1949 return Ok(None); // Unsupported version.
1950 }
1951 // unsigned int(16) minimum_version;
1952 let minimum_version = stream.read_u16()?;
1953 let supported_version = 0;
1954 if minimum_version > supported_version {
1955 return Ok(None); // Unsupported version.
1956 }
1957 // unsigned int(16) writer_version;
1958 let writer_version = stream.read_u16()?;
1959
1960 let mut metadata = GainMapMetadata::default();
1961 let mut bits = stream.sub_bit_stream(1)?;
1962 // unsigned int(1) is_multichannel;
1963 let is_multichannel = bits.read_bool()?;
1964 let channel_count = if is_multichannel { 3 } else { 1 };
1965 // unsigned int(1) use_base_colour_space;
1966 metadata.use_base_color_space = bits.read_bool()?;
1967 // unsigned int(6) reserved;
1968 bits.skip(6)?;
1969
1970 // unsigned int(32) base_hdr_headroom_numerator;
1971 // unsigned int(32) base_hdr_headroom_denominator;
1972 metadata.base_hdr_headroom = stream.read_ufraction()?;
1973 // unsigned int(32) alternate_hdr_headroom_numerator;
1974 // unsigned int(32) alternate_hdr_headroom_denominator;
1975 metadata.alternate_hdr_headroom = stream.read_ufraction()?;
1976 for i in 0..channel_count {
1977 // int(32) gain_map_min_numerator;
1978 // unsigned int(32) gain_map_min_denominator
1979 metadata.min[i] = stream.read_fraction()?;
1980 // int(32) gain_map_max_numerator;
1981 // unsigned int(32) gain_map_max_denominator;
1982 metadata.max[i] = stream.read_fraction()?;
1983 // unsigned int(32) gamma_numerator;
1984 // unsigned int(32) gamma_denominator;
1985 metadata.gamma[i] = stream.read_ufraction()?;
1986 // int(32) base_offset_numerator;
1987 // unsigned int(32) base_offset_denominator;
1988 metadata.base_offset[i] = stream.read_fraction()?;
1989 // int(32) alternate_offset_numerator;
1990 // unsigned int(32) alternate_offset_denominator;
1991 metadata.alternate_offset[i] = stream.read_fraction()?;
1992 }
1993
1994 // Fill the remaining values by copying those from the first channel.
1995 for i in channel_count..3 {
1996 metadata.min[i] = metadata.min[0];
1997 metadata.max[i] = metadata.max[0];
1998 metadata.gamma[i] = metadata.gamma[0];
1999 metadata.base_offset[i] = metadata.base_offset[0];
2000 metadata.alternate_offset[i] = metadata.alternate_offset[0];
2001 }
2002 if writer_version <= supported_version && stream.has_bytes_left()? {
2003 return Err(AvifError::InvalidToneMappedImage(
2004 "invalid trailing bytes in tmap box".into(),
2005 ));
2006 }
2007 metadata.is_valid()?;
2008 Ok(Some(metadata))
2009 }
2010
2011 #[cfg(test)]
2012 mod tests {
2013 use crate::parser::mp4box;
2014 use crate::AvifResult;
2015
2016 #[test]
peek_compatible_file_type() -> AvifResult<()>2017 fn peek_compatible_file_type() -> AvifResult<()> {
2018 let buf = [
2019 0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, //
2020 0x61, 0x76, 0x69, 0x66, 0x00, 0x00, 0x00, 0x00, //
2021 0x61, 0x76, 0x69, 0x66, 0x6d, 0x69, 0x66, 0x31, //
2022 0x6d, 0x69, 0x61, 0x66, 0x4d, 0x41, 0x31, 0x41, //
2023 0x00, 0x00, 0x00, 0xf2, 0x6d, 0x65, 0x74, 0x61, //
2024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, //
2025 ];
2026 // Peeking should succeed starting from byte length 12. Since that is the end offset of the
2027 // first valid AVIF brand.
2028 let min_required_bytes = 12;
2029 for i in 0..buf.len() {
2030 let res = mp4box::peek_compatible_file_type(&buf[..i]);
2031 if i < min_required_bytes {
2032 // Not enough bytes. The return should either be an error or false.
2033 assert!(res.is_err() || !res.unwrap());
2034 } else {
2035 assert!(res?);
2036 }
2037 }
2038 Ok(())
2039 }
2040 }
2041