1 // Copyright 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 use crate::decoder::gainmap::GainMapMetadata;
16 use crate::decoder::track::*;
17 use crate::decoder::Extent;
18 use crate::decoder::GenericIO;
19 use crate::image::YuvRange;
20 use crate::image::MAX_PLANE_COUNT;
21 use crate::internal_utils::stream::*;
22 use crate::internal_utils::*;
23 use crate::utils::clap::CleanAperture;
24 use crate::*;
25 
26 #[derive(Debug, PartialEq)]
27 pub enum BoxSize {
28     FixedSize(usize), // In bytes, header exclusive.
29     UntilEndOfStream, // The box goes on until the end of the input stream.
30 }
31 
32 #[derive(Debug)]
33 struct BoxHeader {
34     size: BoxSize,
35     box_type: String,
36 }
37 
38 impl BoxHeader {
size(&self) -> usize39     fn size(&self) -> usize {
40         match self.size {
41             BoxSize::FixedSize(size) => size, // not reached.
42             BoxSize::UntilEndOfStream => 0,
43         }
44     }
45 }
46 
47 #[derive(Debug, Default)]
48 pub struct FileTypeBox {
49     pub major_brand: String,
50     // minor_version "is informative only" (section 4.3.1 of ISO/IEC 14496-12)
51     compatible_brands: Vec<String>,
52 }
53 
54 impl FileTypeBox {
has_brand(&self, brand: &str) -> bool55     fn has_brand(&self, brand: &str) -> bool {
56         // As of 2024, section 4.3.1 of ISO/IEC 14496-12 does not explictly say that the file is
57         // compliant with the specification defining the major brand, but "the major_brand should be
58         // repeated in the compatible_brands". Later versions of the specification may explicitly
59         // consider the major brand as one of the compatible brands, even if not repeated.
60         if self.major_brand.as_str() == brand {
61             return true;
62         }
63         self.compatible_brands.iter().any(|x| x.as_str() == brand)
64     }
65 
has_brand_any(&self, brands: &[&str]) -> bool66     fn has_brand_any(&self, brands: &[&str]) -> bool {
67         brands.iter().any(|brand| self.has_brand(brand))
68     }
69 
is_avif(&self) -> bool70     pub(crate) fn is_avif(&self) -> bool {
71         // "avio" also exists but does not identify the file as AVIF on its own. See
72         // https://aomediacodec.github.io/av1-avif/v1.1.0.html#image-and-image-collection-brand
73         self.has_brand_any(&[
74             "avif",
75             "avis",
76             #[cfg(feature = "heic")]
77             "heic",
78             #[cfg(feature = "heic")]
79             "heix",
80             #[cfg(feature = "heic")]
81             "mif1",
82         ])
83     }
84 
needs_meta(&self) -> bool85     pub(crate) fn needs_meta(&self) -> bool {
86         self.has_brand_any(&[
87             "avif",
88             #[cfg(feature = "heic")]
89             "heic",
90             #[cfg(feature = "heic")]
91             "heix",
92             #[cfg(feature = "heic")]
93             "mif1",
94         ])
95     }
96 
needs_moov(&self) -> bool97     pub(crate) fn needs_moov(&self) -> bool {
98         self.has_brand_any(&[
99             "avis",
100             #[cfg(feature = "heic")]
101             "hevc",
102             #[cfg(feature = "heic")]
103             "msf1",
104         ])
105     }
106 
has_tmap(&self) -> bool107     pub(crate) fn has_tmap(&self) -> bool {
108         self.has_brand("tmap")
109     }
110 }
111 
112 #[derive(Debug, Default)]
113 pub struct ItemLocationEntry {
114     pub item_id: u32,
115     pub construction_method: u8,
116     pub base_offset: u64,
117     pub extent_count: u16,
118     pub extents: Vec<Extent>,
119 }
120 
121 #[derive(Debug, Default)]
122 pub struct ItemLocationBox {
123     offset_size: u8,
124     length_size: u8,
125     base_offset_size: u8,
126     index_size: u8,
127     pub items: Vec<ItemLocationEntry>,
128 }
129 
130 #[derive(Clone, Debug)]
131 pub struct ImageSpatialExtents {
132     pub width: u32,
133     pub height: u32,
134 }
135 
136 #[derive(Clone, Debug, Default)]
137 pub struct PixelInformation {
138     pub plane_depths: Vec<u8>,
139 }
140 
141 #[derive(Clone, Debug, Default, PartialEq)]
142 pub struct Av1CodecConfiguration {
143     pub seq_profile: u8,
144     pub seq_level_idx0: u8,
145     pub seq_tier0: u8,
146     pub high_bitdepth: bool,
147     pub twelve_bit: bool,
148     pub monochrome: bool,
149     pub chroma_subsampling_x: u8,
150     pub chroma_subsampling_y: u8,
151     pub chroma_sample_position: ChromaSamplePosition,
152     pub raw_data: Vec<u8>,
153 }
154 
155 #[derive(Clone, Debug, Default, PartialEq)]
156 pub struct HevcCodecConfiguration {
157     pub bitdepth: u8,
158     pub nal_length_size: u8,
159     pub vps: Vec<u8>,
160     pub sps: Vec<u8>,
161     pub pps: Vec<u8>,
162 }
163 
164 impl CodecConfiguration {
depth(&self) -> u8165     pub(crate) fn depth(&self) -> u8 {
166         match self {
167             Self::Av1(config) => match config.twelve_bit {
168                 true => 12,
169                 false => match config.high_bitdepth {
170                     true => 10,
171                     false => 8,
172                 },
173             },
174             Self::Hevc(config) => config.bitdepth,
175         }
176     }
177 
pixel_format(&self) -> PixelFormat178     pub(crate) fn pixel_format(&self) -> PixelFormat {
179         match self {
180             Self::Av1(config) => {
181                 if config.monochrome {
182                     PixelFormat::Yuv400
183                 } else if config.chroma_subsampling_x == 1 && config.chroma_subsampling_y == 1 {
184                     PixelFormat::Yuv420
185                 } else if config.chroma_subsampling_x == 1 {
186                     PixelFormat::Yuv422
187                 } else {
188                     PixelFormat::Yuv444
189                 }
190             }
191             Self::Hevc(_) => {
192                 // It is okay to always return Yuv420 here since that is the only format that
193                 // android_mediacodec returns.
194                 // TODO: b/370549923 - Identify the correct YUV subsampling type from the codec
195                 // configuration data.
196                 PixelFormat::Yuv420
197             }
198         }
199     }
200 
chroma_sample_position(&self) -> ChromaSamplePosition201     pub(crate) fn chroma_sample_position(&self) -> ChromaSamplePosition {
202         match self {
203             Self::Av1(config) => config.chroma_sample_position,
204             Self::Hevc(_) => {
205                 // It is okay to always return ChromaSamplePosition::default() here since that is
206                 // the only format that android_mediacodec returns.
207                 // TODO: b/370549923 - Identify the correct chroma sample position from the codec
208                 // configuration data.
209                 ChromaSamplePosition::default()
210             }
211         }
212     }
213 
214     #[cfg(feature = "android_mediacodec")]
raw_data(&self) -> Vec<u8>215     pub(crate) fn raw_data(&self) -> Vec<u8> {
216         match self {
217             Self::Av1(config) => config.raw_data.clone(),
218             Self::Hevc(config) => {
219                 // For HEVC, the codec specific data consists of the following 3 NAL units in
220                 // order: VPS, SPS and PPS. Each unit should be preceded by a start code of
221                 // "\x00\x00\x00\x01".
222                 // https://developer.android.com/reference/android/media/MediaCodec#CSD
223                 let mut data: Vec<u8> = Vec::new();
224                 for nal_unit in [&config.vps, &config.sps, &config.pps] {
225                     // Start code.
226                     data.extend_from_slice(&[0, 0, 0, 1]);
227                     // Data.
228                     data.extend_from_slice(&nal_unit[..]);
229                 }
230                 data
231             }
232         }
233     }
234 
profile(&self) -> u8235     pub fn profile(&self) -> u8 {
236         match self {
237             Self::Av1(config) => config.seq_profile,
238             Self::Hevc(_) => {
239                 // TODO: b/370549923 - Identify the correct profile from the codec configuration
240                 // data.
241                 0
242             }
243         }
244     }
245 
246     #[cfg(feature = "android_mediacodec")]
nal_length_size(&self) -> u8247     pub(crate) fn nal_length_size(&self) -> u8 {
248         match self {
249             Self::Av1(_) => 0, // Unused. This function is only used for HEVC.
250             Self::Hevc(config) => config.nal_length_size,
251         }
252     }
253 
is_avif(&self) -> bool254     pub(crate) fn is_avif(&self) -> bool {
255         matches!(self, Self::Av1(_))
256     }
257 
is_heic(&self) -> bool258     pub(crate) fn is_heic(&self) -> bool {
259         matches!(self, Self::Hevc(_))
260     }
261 }
262 
263 #[derive(Clone, Debug)]
264 pub enum ColorInformation {
265     Icc(Vec<u8>),
266     Nclx(Nclx),
267     Unknown,
268 }
269 
270 #[derive(Clone, Debug, PartialEq)]
271 pub enum CodecConfiguration {
272     Av1(Av1CodecConfiguration),
273     Hevc(HevcCodecConfiguration),
274 }
275 
276 impl Default for CodecConfiguration {
default() -> Self277     fn default() -> Self {
278         Self::Av1(Av1CodecConfiguration::default())
279     }
280 }
281 
282 #[derive(Clone, Debug)]
283 pub enum ItemProperty {
284     ImageSpatialExtents(ImageSpatialExtents),
285     PixelInformation(PixelInformation),
286     CodecConfiguration(CodecConfiguration),
287     ColorInformation(ColorInformation),
288     PixelAspectRatio(PixelAspectRatio),
289     AuxiliaryType(String),
290     CleanAperture(CleanAperture),
291     ImageRotation(u8),
292     ImageMirror(u8),
293     OperatingPointSelector(u8),
294     LayerSelector(u16),
295     AV1LayeredImageIndexing([usize; 3]),
296     ContentLightLevelInformation(ContentLightLevelInformation),
297     Unknown(String),
298 }
299 
300 // Section 8.11.14 of ISO/IEC 14496-12.
301 #[derive(Debug, Default)]
302 pub struct ItemPropertyAssociation {
303     pub item_id: u32,
304     pub associations: Vec<(
305         u16,  // 1-based property_index
306         bool, // essential
307     )>,
308 }
309 
310 #[derive(Debug, Default)]
311 pub struct ItemInfo {
312     pub item_id: u32,
313     item_protection_index: u16,
314     pub item_type: String,
315     item_name: String,
316     pub content_type: String,
317 }
318 
319 #[derive(Debug, Default)]
320 pub struct ItemPropertyBox {
321     pub properties: Vec<ItemProperty>,
322     pub associations: Vec<ItemPropertyAssociation>,
323 }
324 
325 #[derive(Debug)]
326 pub struct ItemReference {
327     // Read this reference as "{from_item_id} is a {reference_type} for {to_item_id}"
328     // (except for dimg where it is in the opposite direction).
329     pub from_item_id: u32,
330     pub to_item_id: u32,
331     pub reference_type: String,
332     pub index: u32, // 0-based index of the reference within the iref type.
333 }
334 
335 #[derive(Debug, Default)]
336 pub struct MetaBox {
337     pub iinf: Vec<ItemInfo>,
338     pub iloc: ItemLocationBox,
339     pub primary_item_id: u32, // pitm
340     pub iprp: ItemPropertyBox,
341     pub iref: Vec<ItemReference>,
342     pub idat: Vec<u8>,
343 }
344 
345 #[derive(Debug)]
346 pub struct AvifBoxes {
347     pub ftyp: FileTypeBox,
348     pub meta: MetaBox,
349     pub tracks: Vec<Track>,
350 }
351 
parse_header(stream: &mut IStream, top_level: bool) -> AvifResult<BoxHeader>352 fn parse_header(stream: &mut IStream, top_level: bool) -> AvifResult<BoxHeader> {
353     // Section 4.2.2 of ISO/IEC 14496-12.
354     let start_offset = stream.offset;
355     // unsigned int(32) size;
356     let mut size = stream.read_u32()? as u64;
357     // unsigned int(32) type = boxtype;
358     let box_type = stream.read_string(4)?;
359     if size == 1 {
360         // unsigned int(64) largesize;
361         size = stream.read_u64()?;
362     }
363     if box_type == "uuid" {
364         // unsigned int(8) usertype[16] = extended_type;
365         stream.skip(16)?;
366     }
367     if size == 0 {
368         // Section 4.2.2 of ISO/IEC 14496-12.
369         //   if size is 0, then this box shall be in a top-level box (i.e. not contained in another
370         //   box), and be the last box in its 'file', and its payload extends to the end of that
371         //   enclosing 'file'. This is normally only used for a MediaDataBox.
372         if !top_level {
373             return Err(AvifError::BmffParseFailed(
374                 "non-top-level box with size 0".into(),
375             ));
376         }
377         return Ok(BoxHeader {
378             box_type,
379             size: BoxSize::UntilEndOfStream,
380         });
381     }
382     checked_decr!(size, u64_from_usize(stream.offset - start_offset)?);
383     let size = usize_from_u64(size)?;
384     if !top_level && size > stream.bytes_left()? {
385         return Err(AvifError::BmffParseFailed("possibly truncated box".into()));
386     }
387     Ok(BoxHeader {
388         box_type,
389         size: BoxSize::FixedSize(size),
390     })
391 }
392 
393 // Reads a truncated ftyp box. Populates as many brands as it can read.
parse_truncated_ftyp(stream: &mut IStream) -> FileTypeBox394 fn parse_truncated_ftyp(stream: &mut IStream) -> FileTypeBox {
395     // Section 4.3.2 of ISO/IEC 14496-12.
396     // unsigned int(32) major_brand;
397     let major_brand = match stream.read_string(4) {
398         Ok(major_brand) => major_brand,
399         Err(_) => return FileTypeBox::default(),
400     };
401     let mut compatible_brands: Vec<String> = Vec::new();
402     // unsigned int(32) compatible_brands[];  // to end of the box
403     while stream.has_bytes_left().unwrap_or_default() {
404         match stream.read_string(4) {
405             Ok(brand) => compatible_brands.push(brand),
406             Err(_) => break,
407         }
408     }
409     FileTypeBox {
410         major_brand,
411         compatible_brands,
412     }
413 }
414 
parse_ftyp(stream: &mut IStream) -> AvifResult<FileTypeBox>415 fn parse_ftyp(stream: &mut IStream) -> AvifResult<FileTypeBox> {
416     // Section 4.3.2 of ISO/IEC 14496-12.
417     // unsigned int(32) major_brand;
418     let major_brand = stream.read_string(4)?;
419     // unsigned int(4) minor_version;
420     stream.skip_u32()?;
421     if stream.bytes_left()? % 4 != 0 {
422         return Err(AvifError::BmffParseFailed(format!(
423             "Box[ftyp] contains a compatible brands section that isn't divisible by 4 {}",
424             stream.bytes_left()?
425         )));
426     }
427     let mut compatible_brands: Vec<String> = create_vec_exact(stream.bytes_left()? / 4)?;
428     // unsigned int(32) compatible_brands[];  // to end of the box
429     while stream.has_bytes_left()? {
430         compatible_brands.push(stream.read_string(4)?);
431     }
432     Ok(FileTypeBox {
433         major_brand,
434         compatible_brands,
435     })
436 }
437 
parse_hdlr(stream: &mut IStream) -> AvifResult<String>438 fn parse_hdlr(stream: &mut IStream) -> AvifResult<String> {
439     // Section 8.4.3.2 of ISO/IEC 14496-12.
440     let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
441     // unsigned int(32) pre_defined = 0;
442     let predefined = stream.read_u32()?;
443     if predefined != 0 {
444         return Err(AvifError::BmffParseFailed(
445             "Box[hdlr] contains a pre_defined value that is nonzero".into(),
446         ));
447     }
448     // unsigned int(32) handler_type;
449     let handler_type = stream.read_string(4)?;
450     // const unsigned int(32)[3] reserved = 0;
451     if stream.read_u32()? != 0 || stream.read_u32()? != 0 || stream.read_u32()? != 0 {
452         return Err(AvifError::BmffParseFailed(
453             "Box[hdlr] contains invalid reserved bits".into(),
454         ));
455     }
456     // string name;
457     // Verify that a valid string is here, but don't bother to store it:
458     //   name gives a human-readable name for the track type (for debugging and inspection
459     //   purposes).
460     stream.read_c_string()?;
461     Ok(handler_type)
462 }
463 
parse_iloc(stream: &mut IStream) -> AvifResult<ItemLocationBox>464 fn parse_iloc(stream: &mut IStream) -> AvifResult<ItemLocationBox> {
465     // Section 8.11.3.2 of ISO/IEC 14496-12.
466     let (version, _flags) = stream.read_version_and_flags()?;
467     if version > 2 {
468         return Err(AvifError::BmffParseFailed(format!(
469             "Box[iloc] has an unsupported version: {version}"
470         )));
471     }
472     let mut iloc = ItemLocationBox::default();
473     let mut bits = stream.sub_bit_stream(2)?;
474     // unsigned int(4) offset_size;
475     iloc.offset_size = bits.read(4)? as u8;
476     // unsigned int(4) length_size;
477     iloc.length_size = bits.read(4)? as u8;
478     // unsigned int(4) base_offset_size;
479     iloc.base_offset_size = bits.read(4)? as u8;
480     iloc.index_size = if version == 1 || version == 2 {
481         // unsigned int(4) index_size;
482         bits.read(4)? as u8
483     } else {
484         // unsigned int(4) reserved;
485         bits.skip(4)?;
486         0
487     };
488     assert_eq!(bits.remaining_bits()?, 0);
489 
490     // Section 8.11.3.3 of ISO/IEC 14496-12.
491     for size in [
492         iloc.offset_size,
493         iloc.length_size,
494         iloc.base_offset_size,
495         iloc.index_size,
496     ] {
497         if ![0u8, 4, 8].contains(&size) {
498             return Err(AvifError::BmffParseFailed(format!(
499                 "Box[iloc] has invalid size: {size}"
500             )));
501         }
502     }
503 
504     let item_count: u32 = if version < 2 {
505         // unsigned int(16) item_count;
506         stream.read_u16()? as u32
507     } else {
508         // unsigned int(32) item_count;
509         stream.read_u32()?
510     };
511     for _i in 0..item_count {
512         let mut entry = ItemLocationEntry {
513             item_id: if version < 2 {
514                 // unsigned int(16) item_ID;
515                 stream.read_u16()? as u32
516             } else {
517                 // unsigned int(32) item_ID;
518                 stream.read_u32()?
519             },
520             ..ItemLocationEntry::default()
521         };
522         if entry.item_id == 0 {
523             return Err(AvifError::BmffParseFailed(format!(
524                 "Box[iloc] has invalid item id: {}",
525                 entry.item_id
526             )));
527         }
528         if version == 1 || version == 2 {
529             let mut bits = stream.sub_bit_stream(2)?;
530             // unsigned int(12) reserved = 0;
531             if bits.read(12)? != 0 {
532                 return Err(AvifError::BmffParseFailed(
533                     "Box[iloc] has invalid reserved bits".into(),
534                 ));
535             }
536             // unsigned int(4) construction_method;
537             entry.construction_method = bits.read(4)? as u8;
538             // 0: file offset, 1: idat offset, 2: item offset.
539             if entry.construction_method != 0 && entry.construction_method != 1 {
540                 return Err(AvifError::BmffParseFailed(format!(
541                     "Box[iloc] has unknown construction_method: {}",
542                     entry.construction_method
543                 )));
544             }
545         }
546         // unsigned int(16) data_reference_index;
547         stream.skip(2)?;
548         // unsigned int(base_offset_size*8) base_offset;
549         entry.base_offset = stream.read_uxx(iloc.base_offset_size)?;
550         // unsigned int(16) extent_count;
551         entry.extent_count = stream.read_u16()?;
552         for _j in 0..entry.extent_count {
553             // unsigned int(index_size*8) item_reference_index;
554             stream.skip(iloc.index_size as usize)?; // Only used for construction_method 2.
555             let extent = Extent {
556                 // unsigned int(offset_size*8) extent_offset;
557                 offset: stream.read_uxx(iloc.offset_size)?,
558                 // unsigned int(length_size*8) extent_length;
559                 size: usize_from_u64(stream.read_uxx(iloc.length_size)?)?,
560             };
561             entry.extents.push(extent);
562         }
563         iloc.items.push(entry);
564     }
565     Ok(iloc)
566 }
567 
568 // Returns the primary item ID.
parse_pitm(stream: &mut IStream) -> AvifResult<u32>569 fn parse_pitm(stream: &mut IStream) -> AvifResult<u32> {
570     // Section 8.11.4.2 of ISO/IEC 14496-12.
571     let (version, _flags) = stream.read_version_and_flags()?;
572     if version == 0 {
573         // unsigned int(16) item_ID;
574         Ok(stream.read_u16()? as u32)
575     } else {
576         // unsigned int(32) item_ID;
577         Ok(stream.read_u32()?)
578     }
579 }
580 
parse_ispe(stream: &mut IStream) -> AvifResult<ItemProperty>581 fn parse_ispe(stream: &mut IStream) -> AvifResult<ItemProperty> {
582     // Section 6.5.3.2 of ISO/IEC 23008-12.
583     let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
584     let ispe = ImageSpatialExtents {
585         // unsigned int(32) image_width;
586         width: stream.read_u32()?,
587         // unsigned int(32) image_height;
588         height: stream.read_u32()?,
589     };
590     Ok(ItemProperty::ImageSpatialExtents(ispe))
591 }
592 
parse_pixi(stream: &mut IStream) -> AvifResult<ItemProperty>593 fn parse_pixi(stream: &mut IStream) -> AvifResult<ItemProperty> {
594     // Section 6.5.6.2 of ISO/IEC 23008-12.
595     let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
596     // unsigned int (8) num_channels;
597     let num_channels = stream.read_u8()? as usize;
598     if num_channels == 0 || num_channels > MAX_PLANE_COUNT {
599         return Err(AvifError::BmffParseFailed(format!(
600             "Invalid plane count {num_channels} in pixi box"
601         )));
602     }
603     let mut pixi = PixelInformation {
604         plane_depths: create_vec_exact(num_channels)?,
605     };
606     for _ in 0..num_channels {
607         // unsigned int (8) bits_per_channel;
608         pixi.plane_depths.push(stream.read_u8()?);
609         if pixi.plane_depths.last().unwrap() != pixi.plane_depths.first().unwrap() {
610             return Err(AvifError::UnsupportedDepth);
611         }
612     }
613     Ok(ItemProperty::PixelInformation(pixi))
614 }
615 
616 #[allow(non_snake_case)]
parse_av1C(stream: &mut IStream) -> AvifResult<ItemProperty>617 fn parse_av1C(stream: &mut IStream) -> AvifResult<ItemProperty> {
618     let raw_data = stream.get_immutable_vec(stream.bytes_left()?)?;
619     // See https://aomediacodec.github.io/av1-isobmff/v1.2.0.html#av1codecconfigurationbox-syntax.
620     let mut bits = stream.sub_bit_stream(4)?;
621     // unsigned int (1) marker = 1;
622     let marker = bits.read(1)?;
623     if marker != 1 {
624         return Err(AvifError::BmffParseFailed(format!(
625             "Invalid marker ({marker}) in av1C"
626         )));
627     }
628     // unsigned int (7) version = 1;
629     let version = bits.read(7)?;
630     if version != 1 {
631         return Err(AvifError::BmffParseFailed(format!(
632             "Invalid version ({version}) in av1C"
633         )));
634     }
635     let av1C = Av1CodecConfiguration {
636         // unsigned int(3) seq_profile;
637         // unsigned int(5) seq_level_idx_0;
638         seq_profile: bits.read(3)? as u8,
639         seq_level_idx0: bits.read(5)? as u8,
640         // unsigned int(1) seq_tier_0;
641         // unsigned int(1) high_bitdepth;
642         // unsigned int(1) twelve_bit;
643         // unsigned int(1) monochrome;
644         // unsigned int(1) chroma_subsampling_x;
645         // unsigned int(1) chroma_subsampling_y;
646         // unsigned int(2) chroma_sample_position;
647         seq_tier0: bits.read(1)? as u8,
648         high_bitdepth: bits.read_bool()?,
649         twelve_bit: bits.read_bool()?,
650         monochrome: bits.read_bool()?,
651         chroma_subsampling_x: bits.read(1)? as u8,
652         chroma_subsampling_y: bits.read(1)? as u8,
653         chroma_sample_position: bits.read(2)?.into(),
654         raw_data,
655     };
656 
657     // unsigned int(3) reserved = 0;
658     if bits.read(3)? != 0 {
659         return Err(AvifError::BmffParseFailed(
660             "Invalid reserved bits in av1C".into(),
661         ));
662     }
663     // unsigned int(1) initial_presentation_delay_present;
664     if bits.read(1)? == 1 {
665         // unsigned int(4) initial_presentation_delay_minus_one;
666         bits.read(4)?;
667     } else {
668         // unsigned int(4) reserved = 0;
669         if bits.read(4)? != 0 {
670             return Err(AvifError::BmffParseFailed(
671                 "Invalid reserved bits in av1C".into(),
672             ));
673         }
674     }
675     assert_eq!(bits.remaining_bits()?, 0);
676 
677     // https://aomediacodec.github.io/av1-avif/v1.1.0.html#av1-configuration-item-property:
678     //   - Sequence Header OBUs should not be present in the AV1CodecConfigurationBox.
679     // This is ignored.
680     //   - If a Sequence Header OBU is present in the AV1CodecConfigurationBox, it shall match the
681     //     Sequence Header OBU in the AV1 Image Item Data.
682     // This is not enforced.
683     //   - The values of the fields in the AV1CodecConfigurationBox shall match those of the
684     //     Sequence Header OBU in the AV1 Image Item Data.
685     // This is not enforced (?).
686     //   - Metadata OBUs, if present, shall match the values given in other item properties, such as
687     //     the PixelInformationProperty or ColourInformationBox.
688     // This is not enforced.
689 
690     // unsigned int(8) configOBUs[];
691 
692     Ok(ItemProperty::CodecConfiguration(CodecConfiguration::Av1(
693         av1C,
694     )))
695 }
696 
697 #[allow(non_snake_case)]
698 #[cfg(feature = "heic")]
parse_hvcC(stream: &mut IStream) -> AvifResult<ItemProperty>699 fn parse_hvcC(stream: &mut IStream) -> AvifResult<ItemProperty> {
700     // unsigned int(8) configurationVersion;
701     let configuration_version = stream.read_u8()?;
702     if configuration_version != 0 && configuration_version != 1 {
703         return Err(AvifError::BmffParseFailed(format!(
704             "Unknown configurationVersion({configuration_version}) in hvcC. Expected 0 or 1."
705         )));
706     }
707     let mut bits = stream.sub_bit_stream(21)?;
708     // unsigned int(2) general_profile_space;
709     // unsigned int(1) general_tier_flag;
710     // unsigned int(5) general_profile_idc;
711     // unsigned int(32) general_profile_compatibility_flags;
712     // unsigned int(48) general_constraint_indicator_flags;
713     // unsigned int(8) general_level_idc;
714     // bit(4) reserved = '1111'b;
715     // unsigned int(12) min_spatial_segmentation_idc;
716     // bit(6) reserved = '111111'b;
717     // unsigned int(2) parallelismType;
718     // bit(6) reserved = '111111'b;
719     // unsigned int(2) chroma_format_idc;
720     // bit(5) reserved = '11111'b;
721     bits.skip(2 + 1 + 5 + 32 + 48 + 8 + 4 + 12 + 6 + 2 + 6 + 2 + 5)?;
722     // unsigned int(3) bit_depth_luma_minus8;
723     let bitdepth = bits.read(3)? as u8 + 8;
724     // bit(5) reserved = '11111'b;
725     // unsigned int(3) bit_depth_chroma_minus8;
726     // unsigned int(16) avgFrameRate;
727     // unsigned int(2) constantFrameRate;
728     // unsigned int(3) numTemporalLayers;
729     // unsigned int(1) temporalIdNested;
730     bits.skip(5 + 3 + 16 + 2 + 3 + 1)?;
731     // unsigned int(2) lengthSizeMinusOne;
732     let nal_length_size = 1 + bits.read(2)? as u8;
733     assert!(bits.remaining_bits()? == 0);
734 
735     // unsigned int(8) numOfArrays;
736     let num_of_arrays = stream.read_u8()?;
737     let mut vps: Vec<u8> = Vec::new();
738     let mut sps: Vec<u8> = Vec::new();
739     let mut pps: Vec<u8> = Vec::new();
740     for _i in 0..num_of_arrays {
741         // unsigned int(1) array_completeness;
742         // bit(1) reserved = 0;
743         // unsigned int(6) NAL_unit_type;
744         stream.skip(1)?;
745         // unsigned int(16) numNalus;
746         let num_nalus = stream.read_u16()?;
747         for _j in 0..num_nalus {
748             // unsigned int(16) nalUnitLength;
749             let nal_unit_length = stream.read_u16()?;
750             let nal_unit = stream.get_slice(nal_unit_length as usize)?;
751             let nal_unit_type = (nal_unit[0] >> 1) & 0x3f;
752             match nal_unit_type {
753                 32 => vps = nal_unit.to_vec(),
754                 33 => sps = nal_unit.to_vec(),
755                 34 => pps = nal_unit.to_vec(),
756                 _ => {}
757             }
758         }
759     }
760     Ok(ItemProperty::CodecConfiguration(CodecConfiguration::Hevc(
761         HevcCodecConfiguration {
762             bitdepth,
763             nal_length_size,
764             vps,
765             pps,
766             sps,
767         },
768     )))
769 }
770 
parse_colr(stream: &mut IStream) -> AvifResult<ItemProperty>771 fn parse_colr(stream: &mut IStream) -> AvifResult<ItemProperty> {
772     // Section 12.1.5.2 of ISO/IEC 14496-12.
773 
774     // unsigned int(32) colour_type;
775     let color_type = stream.read_string(4)?;
776     if color_type == "rICC" || color_type == "prof" {
777         if stream.bytes_left()? == 0 {
778             // Section 12.1.5.3 of ISO/IEC 14496-12:
779             //   ICC_profile: an ICC profile as defined in ISO 15076-1 or ICC.1 is supplied.
780             // Section 7.2.1 of ICC.1:2010:
781             //   The profile header is 128 bytes in length and contains 18 fields.
782             // So an empty ICC profile is invalid.
783             return Err(AvifError::BmffParseFailed(format!(
784                 "colr box contains 0 bytes of {color_type}"
785             )));
786         }
787         // ICC_profile; // restricted ("rICC") or unrestricted ("prof") ICC profile
788         return Ok(ItemProperty::ColorInformation(ColorInformation::Icc(
789             stream.get_slice(stream.bytes_left()?)?.to_vec(),
790         )));
791     }
792     if color_type == "nclx" {
793         let mut nclx = Nclx {
794             // unsigned int(16) colour_primaries;
795             color_primaries: stream.read_u16()?.into(),
796             // unsigned int(16) transfer_characteristics;
797             transfer_characteristics: stream.read_u16()?.into(),
798             // unsigned int(16) matrix_coefficients;
799             matrix_coefficients: stream.read_u16()?.into(),
800             ..Nclx::default()
801         };
802         let mut bits = stream.sub_bit_stream(1)?;
803         // unsigned int(1) full_range_flag;
804         nclx.yuv_range = if bits.read_bool()? { YuvRange::Full } else { YuvRange::Limited };
805         // unsigned int(7) reserved = 0;
806         if bits.read(7)? != 0 {
807             return Err(AvifError::BmffParseFailed(
808                 "colr box contains invalid reserved bits".into(),
809             ));
810         }
811         return Ok(ItemProperty::ColorInformation(ColorInformation::Nclx(nclx)));
812     }
813     Ok(ItemProperty::ColorInformation(ColorInformation::Unknown))
814 }
815 
parse_pasp(stream: &mut IStream) -> AvifResult<ItemProperty>816 fn parse_pasp(stream: &mut IStream) -> AvifResult<ItemProperty> {
817     // Section 12.1.4.2 of ISO/IEC 14496-12.
818     let pasp = PixelAspectRatio {
819         // unsigned int(32) hSpacing;
820         h_spacing: stream.read_u32()?,
821         // unsigned int(32) vSpacing;
822         v_spacing: stream.read_u32()?,
823     };
824     Ok(ItemProperty::PixelAspectRatio(pasp))
825 }
826 
827 #[allow(non_snake_case)]
parse_auxC(stream: &mut IStream) -> AvifResult<ItemProperty>828 fn parse_auxC(stream: &mut IStream) -> AvifResult<ItemProperty> {
829     // Section 6.5.8.2 of ISO/IEC 23008-12.
830     let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
831     // string aux_type;
832     let auxiliary_type = stream.read_c_string()?;
833     // template unsigned int(8) aux_subtype[];
834     // until the end of the box, the semantics depend on the aux_type value
835     Ok(ItemProperty::AuxiliaryType(auxiliary_type))
836 }
837 
parse_clap(stream: &mut IStream) -> AvifResult<ItemProperty>838 fn parse_clap(stream: &mut IStream) -> AvifResult<ItemProperty> {
839     // Section 12.1.4.2 of ISO/IEC 14496-12.
840     let clap = CleanAperture {
841         // unsigned int(32) cleanApertureWidthN;
842         // unsigned int(32) cleanApertureWidthD;
843         width: stream.read_ufraction()?,
844         // unsigned int(32) cleanApertureHeightN;
845         // unsigned int(32) cleanApertureHeightD;
846         height: stream.read_ufraction()?,
847         // unsigned int(32) horizOffN;
848         // unsigned int(32) horizOffD;
849         horiz_off: stream.read_ufraction()?,
850         // unsigned int(32) vertOffN;
851         // unsigned int(32) vertOffD;
852         vert_off: stream.read_ufraction()?,
853     };
854     Ok(ItemProperty::CleanAperture(clap))
855 }
856 
parse_irot(stream: &mut IStream) -> AvifResult<ItemProperty>857 fn parse_irot(stream: &mut IStream) -> AvifResult<ItemProperty> {
858     // Section 6.5.10.2 of ISO/IEC 23008-12.
859     let mut bits = stream.sub_bit_stream(1)?;
860     // unsigned int (6) reserved = 0;
861     if bits.read(6)? != 0 {
862         return Err(AvifError::BmffParseFailed(
863             "invalid reserved bits in irot".into(),
864         ));
865     }
866     // unsigned int (2) angle;
867     let angle = bits.read(2)? as u8;
868     Ok(ItemProperty::ImageRotation(angle))
869 }
870 
parse_imir(stream: &mut IStream) -> AvifResult<ItemProperty>871 fn parse_imir(stream: &mut IStream) -> AvifResult<ItemProperty> {
872     // Section 6.5.12.1 of ISO/IEC 23008-12.
873     let mut bits = stream.sub_bit_stream(1)?;
874     // unsigned int(7) reserved = 0;
875     if bits.read(7)? != 0 {
876         return Err(AvifError::BmffParseFailed(
877             "invalid reserved bits in imir".into(),
878         ));
879     }
880     // unsigned int(1) axis;
881     let axis = bits.read(1)? as u8;
882     Ok(ItemProperty::ImageMirror(axis))
883 }
884 
parse_a1op(stream: &mut IStream) -> AvifResult<ItemProperty>885 fn parse_a1op(stream: &mut IStream) -> AvifResult<ItemProperty> {
886     // https://aomediacodec.github.io/av1-avif/v1.1.0.html#operating-point-selector-property-syntax
887 
888     // unsigned int(8) op_index;
889     let op_index = stream.read_u8()?;
890     if op_index > 31 {
891         // 31 is AV1's maximum operating point value (operating_points_cnt_minus_1).
892         return Err(AvifError::BmffParseFailed(format!(
893             "Invalid op_index ({op_index}) in a1op"
894         )));
895     }
896     Ok(ItemProperty::OperatingPointSelector(op_index))
897 }
898 
parse_lsel(stream: &mut IStream) -> AvifResult<ItemProperty>899 fn parse_lsel(stream: &mut IStream) -> AvifResult<ItemProperty> {
900     // Section 6.5.11.1 of ISO/IEC 23008-12.
901 
902     // unsigned int(16) layer_id;
903     let layer_id = stream.read_u16()?;
904 
905     // https://aomediacodec.github.io/av1-avif/v1.1.0.html#layer-selector-property:
906     //   The layer_id indicates the value of the spatial_id to render. The value shall be between 0
907     //   and 3, or the special value 0xFFFF.
908     if layer_id != 0xFFFF && layer_id >= 4 {
909         return Err(AvifError::BmffParseFailed(format!(
910             "Invalid layer_id ({layer_id}) in lsel"
911         )));
912     }
913     Ok(ItemProperty::LayerSelector(layer_id))
914 }
915 
parse_a1lx(stream: &mut IStream) -> AvifResult<ItemProperty>916 fn parse_a1lx(stream: &mut IStream) -> AvifResult<ItemProperty> {
917     // https://aomediacodec.github.io/av1-avif/v1.1.0.html#layered-image-indexing-property-syntax
918     let mut bits = stream.sub_bit_stream(1)?;
919     // unsigned int(7) reserved = 0;
920     if bits.read(7)? != 0 {
921         return Err(AvifError::BmffParseFailed(
922             "Invalid reserved bits in a1lx".into(),
923         ));
924     }
925     // unsigned int(1) large_size;
926     let large_size = bits.read_bool()?;
927     let mut layer_sizes: [usize; 3] = [0; 3];
928     for layer_size in &mut layer_sizes {
929         if large_size {
930             *layer_size = usize_from_u32(stream.read_u32()?)?;
931         } else {
932             *layer_size = usize_from_u16(stream.read_u16()?)?;
933         }
934     }
935     Ok(ItemProperty::AV1LayeredImageIndexing(layer_sizes))
936 }
937 
parse_clli(stream: &mut IStream) -> AvifResult<ItemProperty>938 fn parse_clli(stream: &mut IStream) -> AvifResult<ItemProperty> {
939     // Section 12.1.6.2 of ISO/IEC 14496-12.
940     let clli = ContentLightLevelInformation {
941         // unsigned int(16) max_content_light_level
942         max_cll: stream.read_u16()?,
943         // unsigned int(16) max_pic_average_light_level
944         max_pall: stream.read_u16()?,
945     };
946     Ok(ItemProperty::ContentLightLevelInformation(clli))
947 }
948 
parse_ipco(stream: &mut IStream, is_track: bool) -> AvifResult<Vec<ItemProperty>>949 fn parse_ipco(stream: &mut IStream, is_track: bool) -> AvifResult<Vec<ItemProperty>> {
950     // Section 8.11.14.2 of ISO/IEC 14496-12.
951     let mut properties: Vec<ItemProperty> = Vec::new();
952     while stream.has_bytes_left()? {
953         let header = parse_header(stream, /*top_level=*/ false)?;
954         let mut sub_stream = stream.sub_stream(&header.size)?;
955         match header.box_type.as_str() {
956             "ispe" => properties.push(parse_ispe(&mut sub_stream)?),
957             "pixi" => properties.push(parse_pixi(&mut sub_stream)?),
958             "av1C" => properties.push(parse_av1C(&mut sub_stream)?),
959             "colr" => properties.push(parse_colr(&mut sub_stream)?),
960             "pasp" => properties.push(parse_pasp(&mut sub_stream)?),
961             "auxC" if !is_track => properties.push(parse_auxC(&mut sub_stream)?),
962             "auxi" if is_track => properties.push(parse_auxC(&mut sub_stream)?),
963             "clap" => properties.push(parse_clap(&mut sub_stream)?),
964             "irot" => properties.push(parse_irot(&mut sub_stream)?),
965             "imir" => properties.push(parse_imir(&mut sub_stream)?),
966             "a1op" => properties.push(parse_a1op(&mut sub_stream)?),
967             "lsel" => properties.push(parse_lsel(&mut sub_stream)?),
968             "a1lx" => properties.push(parse_a1lx(&mut sub_stream)?),
969             "clli" => properties.push(parse_clli(&mut sub_stream)?),
970             #[cfg(feature = "heic")]
971             "hvcC" => properties.push(parse_hvcC(&mut sub_stream)?),
972             _ => properties.push(ItemProperty::Unknown(header.box_type)),
973         }
974     }
975     Ok(properties)
976 }
977 
parse_ipma(stream: &mut IStream) -> AvifResult<Vec<ItemPropertyAssociation>>978 fn parse_ipma(stream: &mut IStream) -> AvifResult<Vec<ItemPropertyAssociation>> {
979     // Section 8.11.14.2 of ISO/IEC 14496-12.
980     let (version, flags) = stream.read_version_and_flags()?;
981     // unsigned int(32) entry_count;
982     let entry_count = stream.read_u32()?;
983     let mut ipma: Vec<ItemPropertyAssociation> = create_vec_exact(usize_from_u32(entry_count)?)?;
984     for _i in 0..entry_count {
985         let mut entry = ItemPropertyAssociation::default();
986         // ISO/IEC 23008-12, First edition, 2017-12, Section 9.3.1:
987         //   Each ItemPropertyAssociation box shall be ordered by increasing item_ID, and there
988         //   shall be at most one association box for each item_ID, in any
989         //   ItemPropertyAssociation box.
990         if version < 1 {
991             // unsigned int(16) item_ID;
992             entry.item_id = stream.read_u16()? as u32;
993         } else {
994             // unsigned int(32) item_ID;
995             entry.item_id = stream.read_u32()?;
996         }
997         if entry.item_id == 0 {
998             return Err(AvifError::BmffParseFailed(format!(
999                 "invalid item id ({}) in ipma",
1000                 entry.item_id
1001             )));
1002         }
1003         if !ipma.is_empty() {
1004             let previous_item_id = ipma.last().unwrap().item_id;
1005             if entry.item_id <= previous_item_id {
1006                 return Err(AvifError::BmffParseFailed(
1007                     "ipma item ids are not ordered by increasing id".into(),
1008                 ));
1009             }
1010         }
1011         // unsigned int(8) association_count;
1012         let association_count = stream.read_u8()?;
1013         for _j in 0..association_count {
1014             let mut bits = stream.sub_bit_stream(if flags & 0x1 == 1 { 2 } else { 1 })?;
1015             // bit(1) essential;
1016             let essential = bits.read_bool()?;
1017             if flags & 0x1 == 1 {
1018                 // unsigned int(15) property_index;
1019                 entry.associations.push((bits.read(15)? as u16, essential));
1020             } else {
1021                 //unsigned int(7) property_index;
1022                 entry.associations.push((bits.read(7)? as u16, essential));
1023             }
1024         }
1025         ipma.push(entry);
1026     }
1027     Ok(ipma)
1028 }
1029 
parse_iprp(stream: &mut IStream) -> AvifResult<ItemPropertyBox>1030 fn parse_iprp(stream: &mut IStream) -> AvifResult<ItemPropertyBox> {
1031     // Section 8.11.14.2 of ISO/IEC 14496-12.
1032     let header = parse_header(stream, /*top_level=*/ false)?;
1033     if header.box_type != "ipco" {
1034         return Err(AvifError::BmffParseFailed(
1035             "First box in iprp is not ipco".into(),
1036         ));
1037     }
1038     let mut iprp = ItemPropertyBox::default();
1039     // Parse ipco box.
1040     {
1041         let mut sub_stream = stream.sub_stream(&header.size)?;
1042         iprp.properties = parse_ipco(&mut sub_stream, /*is_track=*/ false)?;
1043     }
1044     // Parse ipma boxes.
1045     while stream.has_bytes_left()? {
1046         let header = parse_header(stream, /*top_level=*/ false)?;
1047         if header.box_type != "ipma" {
1048             return Err(AvifError::BmffParseFailed(
1049                 "Found non ipma box in iprp".into(),
1050             ));
1051         }
1052         let mut sub_stream = stream.sub_stream(&header.size)?;
1053         iprp.associations.append(&mut parse_ipma(&mut sub_stream)?);
1054     }
1055     Ok(iprp)
1056 }
1057 
parse_infe(stream: &mut IStream) -> AvifResult<ItemInfo>1058 fn parse_infe(stream: &mut IStream) -> AvifResult<ItemInfo> {
1059     // Section 8.11.6.2 of ISO/IEC 14496-12.
1060     let (version, _flags) = stream.read_version_and_flags()?;
1061     if version != 2 && version != 3 {
1062         return Err(AvifError::BmffParseFailed(
1063             "infe box version 2 or 3 expected.".into(),
1064         ));
1065     }
1066 
1067     // TODO: check flags. ISO/IEC 23008-12:2017, Section 9.2 says:
1068     // The flags field of ItemInfoEntry with version greater than or equal to 2 is specified
1069     // as follows:
1070     //   (flags & 1) equal to 1 indicates that the item is not intended to be a part of the
1071     //   presentation. For example, when (flags & 1) is equal to 1 for an image item, the
1072     //   image item should not be displayed. (flags & 1) equal to 0 indicates that the item
1073     //   is intended to be a part of the presentation.
1074     //
1075     // See also Section 6.4.2.
1076     let mut entry = ItemInfo::default();
1077     if version == 2 {
1078         // unsigned int(16) item_ID;
1079         entry.item_id = stream.read_u16()? as u32;
1080     } else {
1081         // unsigned int(32) item_ID;
1082         entry.item_id = stream.read_u32()?;
1083     }
1084     if entry.item_id == 0 {
1085         return Err(AvifError::BmffParseFailed(format!(
1086             "Invalid item id ({}) found in infe",
1087             entry.item_id
1088         )));
1089     }
1090     // unsigned int(16) item_protection_index;
1091     entry.item_protection_index = stream.read_u16()?;
1092     // unsigned int(32) item_type;
1093     entry.item_type = stream.read_string(4)?;
1094 
1095     // utf8string item_name;
1096     entry.item_name = stream.read_c_string()?;
1097 
1098     if entry.item_type == "mime" {
1099         // utf8string content_type;
1100         entry.content_type = stream.read_c_string()?;
1101         // utf8string content_encoding; //optional
1102     }
1103     // if (item_type == 'uri ') {
1104     //  utf8string item_uri_type;
1105     // }
1106     Ok(entry)
1107 }
1108 
parse_iinf(stream: &mut IStream) -> AvifResult<Vec<ItemInfo>>1109 fn parse_iinf(stream: &mut IStream) -> AvifResult<Vec<ItemInfo>> {
1110     // Section 8.11.6.2 of ISO/IEC 14496-12.
1111     let (version, _flags) = stream.read_version_and_flags()?;
1112     if version > 1 {
1113         return Err(AvifError::BmffParseFailed(format!(
1114             "Unsupported version {} in iinf box",
1115             version
1116         )));
1117     }
1118     let entry_count: u32 = if version == 0 {
1119         // unsigned int(16) entry_count;
1120         stream.read_u16()? as u32
1121     } else {
1122         // unsigned int(32) entry_count;
1123         stream.read_u32()?
1124     };
1125     let mut iinf: Vec<ItemInfo> = create_vec_exact(usize_from_u32(entry_count)?)?;
1126     for _i in 0..entry_count {
1127         let header = parse_header(stream, /*top_level=*/ false)?;
1128         if header.box_type != "infe" {
1129             return Err(AvifError::BmffParseFailed(
1130                 "Found non infe box in iinf".into(),
1131             ));
1132         }
1133         let mut sub_stream = stream.sub_stream(&header.size)?;
1134         iinf.push(parse_infe(&mut sub_stream)?);
1135     }
1136     Ok(iinf)
1137 }
1138 
parse_iref(stream: &mut IStream) -> AvifResult<Vec<ItemReference>>1139 fn parse_iref(stream: &mut IStream) -> AvifResult<Vec<ItemReference>> {
1140     // Section 8.11.12.2 of ISO/IEC 14496-12.
1141     let (version, _flags) = stream.read_version_and_flags()?;
1142     let mut iref: Vec<ItemReference> = Vec::new();
1143     // versions > 1 are not supported. ignore them.
1144     if version > 1 {
1145         return Ok(iref);
1146     }
1147     while stream.has_bytes_left()? {
1148         let header = parse_header(stream, /*top_level=*/ false)?;
1149         let from_item_id: u32 = if version == 0 {
1150             // unsigned int(16) from_item_ID;
1151             stream.read_u16()? as u32
1152         } else {
1153             // unsigned int(32) from_item_ID;
1154             stream.read_u32()?
1155         };
1156         if from_item_id == 0 {
1157             return Err(AvifError::BmffParseFailed(
1158                 "invalid from_item_id (0) in iref".into(),
1159             ));
1160         }
1161         // unsigned int(16) reference_count;
1162         let reference_count = stream.read_u16()?;
1163         for index in 0..reference_count {
1164             let to_item_id: u32 = if version == 0 {
1165                 // unsigned int(16) to_item_ID;
1166                 stream.read_u16()? as u32
1167             } else {
1168                 // unsigned int(32) to_item_ID;
1169                 stream.read_u32()?
1170             };
1171             if to_item_id == 0 {
1172                 return Err(AvifError::BmffParseFailed(
1173                     "invalid to_item_id (0) in iref".into(),
1174                 ));
1175             }
1176             iref.push(ItemReference {
1177                 from_item_id,
1178                 to_item_id,
1179                 reference_type: header.box_type.clone(),
1180                 index: index as u32,
1181             });
1182         }
1183     }
1184     Ok(iref)
1185 }
1186 
parse_idat(stream: &mut IStream) -> AvifResult<Vec<u8>>1187 fn parse_idat(stream: &mut IStream) -> AvifResult<Vec<u8>> {
1188     // Section 8.11.11.2 of ISO/IEC 14496-12.
1189     if !stream.has_bytes_left()? {
1190         return Err(AvifError::BmffParseFailed("Invalid idat size (0)".into()));
1191     }
1192     let mut idat: Vec<u8> = Vec::with_capacity(stream.bytes_left()?);
1193     idat.extend_from_slice(stream.get_slice(stream.bytes_left()?)?);
1194     Ok(idat)
1195 }
1196 
parse_meta(stream: &mut IStream) -> AvifResult<MetaBox>1197 fn parse_meta(stream: &mut IStream) -> AvifResult<MetaBox> {
1198     // Section 8.11.1.2 of ISO/IEC 14496-12.
1199     let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1200     let mut meta = MetaBox::default();
1201 
1202     // Parse the first hdlr box.
1203     {
1204         let header = parse_header(stream, /*top_level=*/ false)?;
1205         if header.box_type != "hdlr" {
1206             return Err(AvifError::BmffParseFailed(
1207                 "first box in meta is not hdlr".into(),
1208             ));
1209         }
1210         let handler_type = parse_hdlr(&mut stream.sub_stream(&header.size)?)?;
1211         if handler_type != "pict" {
1212             // Section 6.2 of ISO/IEC 23008-12:
1213             //   The handler type for the MetaBox shall be 'pict'.
1214             // https://aomediacodec.github.io/av1-avif/v1.1.0.html#image-sequences does not apply
1215             // because this function is only called for the MetaBox but it would work too:
1216             //   The track handler for an AV1 Image Sequence shall be pict.
1217             return Err(AvifError::BmffParseFailed(
1218                 "Box[hdlr] handler_type is not 'pict'".into(),
1219             ));
1220         }
1221     }
1222 
1223     let mut boxes_seen: HashSet<String> = HashSet::with_hasher(NonRandomHasherState);
1224     boxes_seen.insert(String::from("hdlr"));
1225     while stream.has_bytes_left()? {
1226         let header = parse_header(stream, /*top_level=*/ false)?;
1227         match header.box_type.as_str() {
1228             "hdlr" | "iloc" | "pitm" | "iprp" | "iinf" | "iref" | "idat" => {
1229                 if boxes_seen.contains(&header.box_type) {
1230                     return Err(AvifError::BmffParseFailed(format!(
1231                         "duplicate {} box in meta.",
1232                         header.box_type
1233                     )));
1234                 }
1235                 boxes_seen.insert(header.box_type.clone());
1236             }
1237             _ => {}
1238         }
1239         let mut sub_stream = stream.sub_stream(&header.size)?;
1240         match header.box_type.as_str() {
1241             "iloc" => meta.iloc = parse_iloc(&mut sub_stream)?,
1242             "pitm" => meta.primary_item_id = parse_pitm(&mut sub_stream)?,
1243             "iprp" => meta.iprp = parse_iprp(&mut sub_stream)?,
1244             "iinf" => meta.iinf = parse_iinf(&mut sub_stream)?,
1245             "iref" => meta.iref = parse_iref(&mut sub_stream)?,
1246             "idat" => meta.idat = parse_idat(&mut sub_stream)?,
1247             _ => {}
1248         }
1249     }
1250     Ok(meta)
1251 }
1252 
parse_tkhd(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1253 fn parse_tkhd(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1254     // Section 8.3.2.2 of ISO/IEC 14496-12.
1255     let (version, _flags) = stream.read_version_and_flags()?;
1256     if version == 1 {
1257         // unsigned int(64) creation_time;
1258         stream.skip_u64()?;
1259         // unsigned int(64) modification_time;
1260         stream.skip_u64()?;
1261         // unsigned int(32) track_ID;
1262         track.id = stream.read_u32()?;
1263         // const unsigned int(32) reserved = 0;
1264         if stream.read_u32()? != 0 {
1265             return Err(AvifError::BmffParseFailed(
1266                 "Invalid reserved bits in tkhd".into(),
1267             ));
1268         }
1269         // unsigned int(64) duration;
1270         track.track_duration = stream.read_u64()?;
1271     } else if version == 0 {
1272         // unsigned int(32) creation_time;
1273         stream.skip_u32()?;
1274         // unsigned int(32) modification_time;
1275         stream.skip_u32()?;
1276         // unsigned int(32) track_ID;
1277         track.id = stream.read_u32()?;
1278         // const unsigned int(32) reserved = 0;
1279         if stream.read_u32()? != 0 {
1280             return Err(AvifError::BmffParseFailed(
1281                 "Invalid reserved bits in tkhd".into(),
1282             ));
1283         }
1284         // unsigned int(32) duration;
1285         track.track_duration = stream.read_u32()? as u64;
1286     } else {
1287         return Err(AvifError::BmffParseFailed(format!(
1288             "unsupported version ({version}) in trak"
1289         )));
1290     }
1291 
1292     // const unsigned int(32)[2] reserved = 0;
1293     if stream.read_u32()? != 0 || stream.read_u32()? != 0 {
1294         return Err(AvifError::BmffParseFailed(
1295             "Invalid reserved bits in tkhd".into(),
1296         ));
1297     }
1298     // The following fields should be 0 but are ignored instead.
1299     // template int(16) layer = 0;
1300     stream.skip(2)?;
1301     // template int(16) alternate_group = 0;
1302     stream.skip(2)?;
1303     // template int(16) volume = {if track_is_audio 0x0100 else 0};
1304     stream.skip(2)?;
1305     // const unsigned int(16) reserved = 0;
1306     if stream.read_u16()? != 0 {
1307         return Err(AvifError::BmffParseFailed(
1308             "Invalid reserved bits in tkhd".into(),
1309         ));
1310     }
1311     // template int(32)[9] matrix= { 0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 }; // unity matrix
1312     stream.skip(4 * 9)?;
1313 
1314     // unsigned int(32) width;
1315     track.width = stream.read_u32()? >> 16;
1316     // unsigned int(32) height;
1317     track.height = stream.read_u32()? >> 16;
1318 
1319     Ok(())
1320 }
1321 
parse_mdhd(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1322 fn parse_mdhd(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1323     // Section 8.4.2.2 of ISO/IEC 14496-12.
1324     let (version, _flags) = stream.read_version_and_flags()?;
1325     if version == 1 {
1326         // unsigned int(64) creation_time;
1327         stream.skip_u64()?;
1328         // unsigned int(64) modification_time;
1329         stream.skip_u64()?;
1330         // unsigned int(32) timescale;
1331         track.media_timescale = stream.read_u32()?;
1332         // unsigned int(64) duration;
1333         track.media_duration = stream.read_u64()?;
1334     } else if version == 0 {
1335         // unsigned int(32) creation_time;
1336         stream.skip_u32()?;
1337         // unsigned int(32) modification_time;
1338         stream.skip_u32()?;
1339         // unsigned int(32) timescale;
1340         track.media_timescale = stream.read_u32()?;
1341         // unsigned int(32) duration;
1342         track.media_duration = stream.read_u32()? as u64;
1343     } else {
1344         return Err(AvifError::BmffParseFailed(format!(
1345             "unsupported version ({version}) in mdhd"
1346         )));
1347     }
1348 
1349     let mut bits = stream.sub_bit_stream(4)?;
1350     // bit(1) pad = 0;
1351     if bits.read(1)? != 0 {
1352         return Err(AvifError::BmffParseFailed(
1353             "Invalid reserved bits in mdhd".into(),
1354         ));
1355     }
1356     // unsigned int(5)[3] language; // ISO-639-2/T language code
1357     bits.skip(5 * 3)?;
1358     // unsigned int(16) pre_defined = 0; ("Readers should expect any value")
1359     bits.skip(2)?;
1360     Ok(())
1361 }
1362 
parse_stco( stream: &mut IStream, sample_table: &mut SampleTable, large_offset: bool, ) -> AvifResult<()>1363 fn parse_stco(
1364     stream: &mut IStream,
1365     sample_table: &mut SampleTable,
1366     large_offset: bool,
1367 ) -> AvifResult<()> {
1368     // Section 8.7.5.2 of ISO/IEC 14496-12.
1369     let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1370     // unsigned int(32) entry_count;
1371     let entry_count = usize_from_u32(stream.read_u32()?)?;
1372     sample_table.chunk_offsets = create_vec_exact(entry_count)?;
1373     for _ in 0..entry_count {
1374         let chunk_offset: u64 = if large_offset {
1375             // unsigned int(64) chunk_offset;
1376             stream.read_u64()?
1377         } else {
1378             // unsigned int(32) chunk_offset;
1379             stream.read_u32()? as u64
1380         };
1381         sample_table.chunk_offsets.push(chunk_offset);
1382     }
1383     Ok(())
1384 }
1385 
parse_stsc(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()>1386 fn parse_stsc(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()> {
1387     // Section 8.7.4.2 of ISO/IEC 14496-12.
1388     let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1389     // unsigned int(32) entry_count;
1390     let entry_count = usize_from_u32(stream.read_u32()?)?;
1391     sample_table.sample_to_chunk = create_vec_exact(entry_count)?;
1392     for i in 0..entry_count {
1393         let stsc = SampleToChunk {
1394             // unsigned int(32) first_chunk;
1395             first_chunk: stream.read_u32()?,
1396             // unsigned int(32) samples_per_chunk;
1397             samples_per_chunk: stream.read_u32()?,
1398             // unsigned int(32) sample_description_index;
1399             sample_description_index: stream.read_u32()?,
1400         };
1401         if i == 0 {
1402             if stsc.first_chunk != 1 {
1403                 return Err(AvifError::BmffParseFailed(
1404                     "stsc does not begin with chunk 1.".into(),
1405                 ));
1406             }
1407         } else if stsc.first_chunk <= sample_table.sample_to_chunk.last().unwrap().first_chunk {
1408             return Err(AvifError::BmffParseFailed(
1409                 "stsc chunks are not strictly increasing.".into(),
1410             ));
1411         }
1412         if stsc.sample_description_index == 0 {
1413             return Err(AvifError::BmffParseFailed(format!(
1414                 "sample_description_index is {} in stsc chunk.",
1415                 stsc.sample_description_index
1416             )));
1417         }
1418         sample_table.sample_to_chunk.push(stsc);
1419     }
1420     Ok(())
1421 }
1422 
parse_stsz(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()>1423 fn parse_stsz(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()> {
1424     // Section 8.7.3.2.1 of ISO/IEC 14496-12.
1425     let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1426     // unsigned int(32) sample_size;
1427     let sample_size = stream.read_u32()?;
1428     // unsigned int(32) sample_count;
1429     let sample_count = usize_from_u32(stream.read_u32()?)?;
1430 
1431     if sample_size > 0 {
1432         sample_table.sample_size = SampleSize::FixedSize(sample_size);
1433         return Ok(());
1434     }
1435     let mut sample_sizes: Vec<u32> = create_vec_exact(sample_count)?;
1436     for _ in 0..sample_count {
1437         // unsigned int(32) entry_size;
1438         sample_sizes.push(stream.read_u32()?);
1439     }
1440     sample_table.sample_size = SampleSize::Sizes(sample_sizes);
1441     Ok(())
1442 }
1443 
parse_stss(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()>1444 fn parse_stss(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()> {
1445     // Section 8.6.2.2 of ISO/IEC 14496-12.
1446     let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1447     // unsigned int(32) entry_count;
1448     let entry_count = usize_from_u32(stream.read_u32()?)?;
1449     sample_table.sync_samples = create_vec_exact(entry_count)?;
1450     for _ in 0..entry_count {
1451         // unsigned int(32) sample_number;
1452         sample_table.sync_samples.push(stream.read_u32()?);
1453     }
1454     Ok(())
1455 }
1456 
parse_stts(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()>1457 fn parse_stts(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()> {
1458     // Section 8.6.1.2.2 of ISO/IEC 14496-12.
1459     let (_version, _flags) = stream.read_and_enforce_version_and_flags(0)?;
1460     // unsigned int(32) entry_count;
1461     let entry_count = usize_from_u32(stream.read_u32()?)?;
1462     sample_table.time_to_sample = create_vec_exact(entry_count)?;
1463     for _ in 0..entry_count {
1464         let stts = TimeToSample {
1465             // unsigned int(32) sample_count;
1466             sample_count: stream.read_u32()?,
1467             // unsigned int(32) sample_delta;
1468             sample_delta: stream.read_u32()?,
1469         };
1470         sample_table.time_to_sample.push(stts);
1471     }
1472     Ok(())
1473 }
1474 
parse_sample_entry(stream: &mut IStream, format: String) -> AvifResult<SampleDescription>1475 fn parse_sample_entry(stream: &mut IStream, format: String) -> AvifResult<SampleDescription> {
1476     // Section 8.5.2.2 of ISO/IEC 14496-12.
1477     let mut sample_entry = SampleDescription {
1478         format,
1479         ..SampleDescription::default()
1480     };
1481     // const unsigned int(8) reserved[6] = 0;
1482     if stream.read_u8()? != 0
1483         || stream.read_u8()? != 0
1484         || stream.read_u8()? != 0
1485         || stream.read_u8()? != 0
1486         || stream.read_u8()? != 0
1487         || stream.read_u8()? != 0
1488     {
1489         return Err(AvifError::BmffParseFailed(
1490             "Invalid reserved bits in SampleEntry of stsd".into(),
1491         ));
1492     }
1493     // unsigned int(16) data_reference_index;
1494     stream.skip(2)?;
1495 
1496     if sample_entry.is_supported_format() {
1497         // https://aomediacodec.github.io/av1-isobmff/v1.2.0.html#av1sampleentry-syntax:
1498         //   class AV1SampleEntry extends VisualSampleEntry('av01'){
1499         //     AV1CodecConfigurationBox config;
1500         //   }
1501         // https://aomediacodec.github.io/av1-isobmff/v1.2.0.html#av1codecconfigurationbox-syntax:
1502         //   class AV1CodecConfigurationBox extends Box('av1C'){
1503         //     AV1CodecConfigurationRecord av1Config;
1504         //   }
1505 
1506         // Section 12.1.3.2 of ISO/IEC 14496-12:
1507         //   class VisualSampleEntry(codingname) extends SampleEntry(codingname)
1508 
1509         // unsigned int(16) pre_defined = 0; ("Readers should expect any value")
1510         stream.skip(2)?;
1511         // const unsigned int(16) reserved = 0;
1512         if stream.read_u16()? != 0 {
1513             return Err(AvifError::BmffParseFailed(
1514                 "Invalid reserved bits in VisualSampleEntry of stsd".into(),
1515             ));
1516         }
1517         // unsigned int(32) pre_defined[3] = 0;
1518         stream.skip(4 * 3)?;
1519         // unsigned int(16) width;
1520         stream.skip(2)?;
1521         // unsigned int(16) height;
1522         stream.skip(2)?;
1523         // template unsigned int(32) horizresolution = 0x00480000; // 72 dpi
1524         stream.skip_u32()?;
1525         // template unsigned int(32) vertresolution = 0x00480000; // 72 dpi
1526         stream.skip_u32()?;
1527         // const unsigned int(32) reserved = 0;
1528         if stream.read_u32()? != 0 {
1529             return Err(AvifError::BmffParseFailed(
1530                 "Invalid reserved bits in VisualSampleEntry of stsd".into(),
1531             ));
1532         }
1533         // template unsigned int(16) frame_count;
1534         stream.skip(2)?;
1535         // uint(8) compressorname[32];
1536         stream.skip(32)?;
1537         // template unsigned int(16) depth = 0x0018;
1538         if stream.read_u16()? != 0x0018 {
1539             return Err(AvifError::BmffParseFailed(
1540                 "Invalid depth in VisualSampleEntry of stsd".into(),
1541             ));
1542         }
1543         // unsigned int(16) pre_defined = 0; ("Readers should expect any value")
1544         stream.skip(2)?;
1545 
1546         // other boxes from derived specifications
1547         // CleanApertureBox clap; // optional
1548         // PixelAspectRatioBox pasp; // optional
1549 
1550         // Now read any of 'av1C', 'clap', 'pasp' etc.
1551         sample_entry.properties = parse_ipco(
1552             &mut stream.sub_stream(&BoxSize::UntilEndOfStream)?,
1553             /*is_track=*/ true,
1554         )?;
1555 
1556         if !sample_entry
1557             .properties
1558             .iter()
1559             .any(|p| matches!(p, ItemProperty::CodecConfiguration(_)))
1560         {
1561             return Err(AvifError::BmffParseFailed(
1562                 "AV1SampleEntry must contain an AV1CodecConfigurationRecord".into(),
1563             ));
1564         }
1565     }
1566     Ok(sample_entry)
1567 }
1568 
parse_stsd(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()>1569 fn parse_stsd(stream: &mut IStream, sample_table: &mut SampleTable) -> AvifResult<()> {
1570     // Section 8.5.2.2 of ISO/IEC 14496-12.
1571     let (version, _flags) = stream.read_version_and_flags()?;
1572     if version != 0 && version != 1 {
1573         // Section 8.5.2.3 of ISO/IEC 14496-12:
1574         //   version is set to zero. A version number of 1 shall be treated as a version of 0.
1575         return Err(AvifError::BmffParseFailed(
1576             "stsd box version 0 or 1 expected.".into(),
1577         ));
1578     }
1579     // unsigned int(32) entry_count;
1580     let entry_count = usize_from_u32(stream.read_u32()?)?;
1581     sample_table.sample_descriptions = create_vec_exact(entry_count)?;
1582     for _ in 0..entry_count {
1583         // aligned(8) abstract class SampleEntry (unsigned int(32) format) extends Box(format)
1584         let header = parse_header(stream, /*top_level=*/ false)?;
1585         let sample_entry =
1586             parse_sample_entry(&mut stream.sub_stream(&header.size)?, header.box_type)?;
1587         sample_table.sample_descriptions.push(sample_entry);
1588     }
1589     Ok(())
1590 }
1591 
parse_stbl(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1592 fn parse_stbl(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1593     // Section 8.5.1.2 of ISO/IEC 14496-12.
1594     if track.sample_table.is_some() {
1595         return Err(AvifError::BmffParseFailed(
1596             "duplicate stbl for track.".into(),
1597         ));
1598     }
1599     let mut sample_table = SampleTable::default();
1600     let mut boxes_seen: HashSet<String> = HashSet::with_hasher(NonRandomHasherState);
1601     while stream.has_bytes_left()? {
1602         let header = parse_header(stream, /*top_level=*/ false)?;
1603         if boxes_seen.contains(&header.box_type) {
1604             return Err(AvifError::BmffParseFailed(format!(
1605                 "duplicate box in stbl: {}",
1606                 header.box_type
1607             )));
1608         }
1609         let mut skipped_box = false;
1610         let mut sub_stream = stream.sub_stream(&header.size)?;
1611         match header.box_type.as_str() {
1612             "stco" => {
1613                 if boxes_seen.contains("co64") {
1614                     return Err(AvifError::BmffParseFailed(
1615                         "exactly one of co64 or stco is allowed in stbl".into(),
1616                     ));
1617                 }
1618                 parse_stco(&mut sub_stream, &mut sample_table, false)?;
1619             }
1620             "co64" => {
1621                 if boxes_seen.contains("stco") {
1622                     return Err(AvifError::BmffParseFailed(
1623                         "exactly one of co64 or stco is allowed in stbl".into(),
1624                     ));
1625                 }
1626                 parse_stco(&mut sub_stream, &mut sample_table, true)?;
1627             }
1628             "stsc" => parse_stsc(&mut sub_stream, &mut sample_table)?,
1629             "stsz" => parse_stsz(&mut sub_stream, &mut sample_table)?,
1630             "stss" => parse_stss(&mut sub_stream, &mut sample_table)?,
1631             "stts" => parse_stts(&mut sub_stream, &mut sample_table)?,
1632             "stsd" => parse_stsd(&mut sub_stream, &mut sample_table)?,
1633             _ => skipped_box = true,
1634         }
1635         // For boxes that are skipped, we do not need to validate if they occur exactly once or
1636         // not.
1637         if !skipped_box {
1638             boxes_seen.insert(header.box_type.clone());
1639         }
1640     }
1641     track.sample_table = Some(sample_table);
1642     Ok(())
1643 }
1644 
parse_minf(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1645 fn parse_minf(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1646     // Section 8.4.4.2 of ISO/IEC 14496-12.
1647     while stream.has_bytes_left()? {
1648         let header = parse_header(stream, /*top_level=*/ false)?;
1649         let mut sub_stream = stream.sub_stream(&header.size)?;
1650         if header.box_type == "stbl" {
1651             parse_stbl(&mut sub_stream, track)?;
1652         }
1653     }
1654     Ok(())
1655 }
1656 
parse_mdia(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1657 fn parse_mdia(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1658     // Section 8.4.1.2 of ISO/IEC 14496-12.
1659     while stream.has_bytes_left()? {
1660         let header = parse_header(stream, /*top_level=*/ false)?;
1661         let mut sub_stream = stream.sub_stream(&header.size)?;
1662         match header.box_type.as_str() {
1663             "mdhd" => parse_mdhd(&mut sub_stream, track)?,
1664             "minf" => parse_minf(&mut sub_stream, track)?,
1665             "hdlr" => track.handler_type = parse_hdlr(&mut sub_stream)?,
1666             _ => {}
1667         }
1668     }
1669     Ok(())
1670 }
1671 
parse_tref(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1672 fn parse_tref(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1673     // Section 8.3.3.2 of ISO/IEC 14496-12.
1674 
1675     // TrackReferenceTypeBox [];
1676     while stream.has_bytes_left()? {
1677         // aligned(8) class TrackReferenceTypeBox (reference_type) extends Box(reference_type)
1678         let header = parse_header(stream, /*top_level=*/ false)?;
1679         let mut sub_stream = stream.sub_stream(&header.size)?;
1680         match header.box_type.as_str() {
1681             "auxl" => {
1682                 // unsigned int(32) track_IDs[];
1683                 // Use only the first one and skip the rest.
1684                 track.aux_for_id = Some(sub_stream.read_u32()?);
1685             }
1686             "prem" => {
1687                 // unsigned int(32) track_IDs[];
1688                 // Use only the first one and skip the rest.
1689                 track.prem_by_id = Some(sub_stream.read_u32()?);
1690             }
1691             _ => {}
1692         }
1693     }
1694     Ok(())
1695 }
1696 
parse_elst(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1697 fn parse_elst(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1698     if track.elst_seen {
1699         return Err(AvifError::BmffParseFailed(
1700             "more than one elst box was found for track".into(),
1701         ));
1702     }
1703     track.elst_seen = true;
1704 
1705     // Section 8.6.6.2 of ISO/IEC 14496-12.
1706     let (version, flags) = stream.read_version_and_flags()?;
1707 
1708     // Section 8.6.6.3 of ISO/IEC 14496-12:
1709     //   flags - the following values are defined. The values of flags greater than 1 are reserved
1710     //     RepeatEdits 1
1711     if (flags & 1) == 0 {
1712         // The only EditList feature that we support is repetition count for animated images. So in
1713         // this case, we know that the repetition count is zero and we do not care about the rest
1714         // of this box.
1715         track.is_repeating = false;
1716         return Ok(());
1717     }
1718     track.is_repeating = true;
1719 
1720     // unsigned int(32) entry_count;
1721     let entry_count = stream.read_u32()?;
1722     if entry_count != 1 {
1723         return Err(AvifError::BmffParseFailed(format!(
1724             "elst has entry_count ({entry_count}) != 1"
1725         )));
1726     }
1727 
1728     if version == 1 {
1729         // unsigned int(64) segment_duration;
1730         track.segment_duration = stream.read_u64()?;
1731         // int(64) media_time;
1732         stream.skip(8)?;
1733     } else if version == 0 {
1734         // unsigned int(32) segment_duration;
1735         track.segment_duration = stream.read_u32()? as u64;
1736         // int(32) media_time;
1737         stream.skip(4)?;
1738     } else {
1739         return Err(AvifError::BmffParseFailed(
1740             "unsupported version in elst".into(),
1741         ));
1742     }
1743     // int(16) media_rate_integer;
1744     stream.skip(2)?;
1745     // int(16) media_rate_fraction;
1746     stream.skip(2)?;
1747 
1748     if track.segment_duration == 0 {
1749         return Err(AvifError::BmffParseFailed(
1750             "invalid value for segment_duration (0)".into(),
1751         ));
1752     }
1753     Ok(())
1754 }
1755 
parse_edts(stream: &mut IStream, track: &mut Track) -> AvifResult<()>1756 fn parse_edts(stream: &mut IStream, track: &mut Track) -> AvifResult<()> {
1757     if track.elst_seen {
1758         // This function always exits with track.elst_seen set to true. So it is sufficient to
1759         // check track.elst_seen to verify the uniqueness of the edts box.
1760         return Err(AvifError::BmffParseFailed(
1761             "multiple edts boxes found for track.".into(),
1762         ));
1763     }
1764 
1765     // Section 8.6.5.2 of ISO/IEC 14496-12.
1766     while stream.has_bytes_left()? {
1767         let header = parse_header(stream, /*top_level=*/ false)?;
1768         let mut sub_stream = stream.sub_stream(&header.size)?;
1769         if header.box_type == "elst" {
1770             parse_elst(&mut sub_stream, track)?;
1771         }
1772     }
1773 
1774     if !track.elst_seen {
1775         return Err(AvifError::BmffParseFailed(
1776             "elst box was not found in edts".into(),
1777         ));
1778     }
1779     Ok(())
1780 }
1781 
parse_trak(stream: &mut IStream) -> AvifResult<Track>1782 fn parse_trak(stream: &mut IStream) -> AvifResult<Track> {
1783     let mut track = Track::default();
1784     let mut tkhd_seen = false;
1785     // Section 8.3.1.2 of ISO/IEC 14496-12.
1786     while stream.has_bytes_left()? {
1787         let header = parse_header(stream, /*top_level=*/ false)?;
1788         let mut sub_stream = stream.sub_stream(&header.size)?;
1789         match header.box_type.as_str() {
1790             "tkhd" => {
1791                 if tkhd_seen {
1792                     return Err(AvifError::BmffParseFailed(
1793                         "trak box contains multiple tkhd boxes".into(),
1794                     ));
1795                 }
1796                 parse_tkhd(&mut sub_stream, &mut track)?;
1797                 tkhd_seen = true;
1798             }
1799             "mdia" => parse_mdia(&mut sub_stream, &mut track)?,
1800             "tref" => parse_tref(&mut sub_stream, &mut track)?,
1801             "edts" => parse_edts(&mut sub_stream, &mut track)?,
1802             "meta" => track.meta = Some(parse_meta(&mut sub_stream)?),
1803             _ => {}
1804         }
1805     }
1806     if !tkhd_seen {
1807         return Err(AvifError::BmffParseFailed(
1808             "trak box did not contain a tkhd box".into(),
1809         ));
1810     }
1811     Ok(track)
1812 }
1813 
parse_moov(stream: &mut IStream) -> AvifResult<Vec<Track>>1814 fn parse_moov(stream: &mut IStream) -> AvifResult<Vec<Track>> {
1815     let mut tracks: Vec<Track> = Vec::new();
1816     // Section 8.2.1.2 of ISO/IEC 14496-12.
1817     while stream.has_bytes_left()? {
1818         let header = parse_header(stream, /*top_level=*/ false)?;
1819         let mut sub_stream = stream.sub_stream(&header.size)?;
1820         if header.box_type == "trak" {
1821             let track = parse_trak(&mut sub_stream)?;
1822             if track.is_video_handler() && (track.width == 0 || track.height == 0) {
1823                 return Err(AvifError::BmffParseFailed(
1824                     "invalid track dimensions".into(),
1825                 ));
1826             }
1827             tracks.push(track);
1828         }
1829     }
1830     if tracks.is_empty() {
1831         return Err(AvifError::BmffParseFailed(
1832             "moov box does not contain any tracks".into(),
1833         ));
1834     }
1835     Ok(tracks)
1836 }
1837 
parse(io: &mut GenericIO) -> AvifResult<AvifBoxes>1838 pub(crate) fn parse(io: &mut GenericIO) -> AvifResult<AvifBoxes> {
1839     let mut ftyp: Option<FileTypeBox> = None;
1840     let mut meta: Option<MetaBox> = None;
1841     let mut tracks: Option<Vec<Track>> = None;
1842     let mut parse_offset: u64 = 0;
1843     loop {
1844         // Read just enough to get the longest possible valid box header (4+4+8+16 bytes).
1845         let header_data = io.read(parse_offset, 32)?;
1846         if header_data.is_empty() {
1847             // No error and size is 0. We have reached the end of the stream.
1848             break;
1849         }
1850         let mut header_stream = IStream::create(header_data);
1851         let header = parse_header(&mut header_stream, /*top_level=*/ true)?;
1852         parse_offset = parse_offset
1853             .checked_add(header_stream.offset as u64)
1854             .ok_or(AvifError::BmffParseFailed("invalid parse offset".into()))?;
1855 
1856         // Read the rest of the box if necessary.
1857         match header.box_type.as_str() {
1858             "ftyp" | "meta" | "moov" => {
1859                 if ftyp.is_none() && header.box_type != "ftyp" {
1860                     // Section 6.3.4 of ISO/IEC 14496-12:
1861                     //   The FileTypeBox shall occur before any variable-length box. Only a
1862                     //   fixed-size box such as a file signature, if required, may precede it.
1863                     return Err(AvifError::BmffParseFailed(format!(
1864                         "expected ftyp box. found {}.",
1865                         header.box_type,
1866                     )));
1867                 }
1868                 let box_data = match header.size {
1869                     BoxSize::UntilEndOfStream => io.read(parse_offset, usize::MAX)?,
1870                     BoxSize::FixedSize(size) => io.read_exact(parse_offset, size)?,
1871                 };
1872                 let mut box_stream = IStream::create(box_data);
1873                 match header.box_type.as_str() {
1874                     "ftyp" => {
1875                         ftyp = Some(parse_ftyp(&mut box_stream)?);
1876                         if !ftyp.unwrap_ref().is_avif() {
1877                             return Err(AvifError::InvalidFtyp);
1878                         }
1879                     }
1880                     "meta" => meta = Some(parse_meta(&mut box_stream)?),
1881                     "moov" => tracks = Some(parse_moov(&mut box_stream)?),
1882                     _ => {} // Not reached.
1883                 }
1884                 if ftyp.is_some() {
1885                     let ftyp = ftyp.unwrap_ref();
1886                     if (!ftyp.needs_meta() || meta.is_some())
1887                         && (!ftyp.needs_moov() || tracks.is_some())
1888                     {
1889                         // Enough information has been parsed to consider parse a success.
1890                         break;
1891                     }
1892                 }
1893             }
1894             _ => {}
1895         }
1896         if header.size == BoxSize::UntilEndOfStream {
1897             // There is no other box after this one because it goes till the end of the stream.
1898             break;
1899         }
1900         parse_offset = parse_offset
1901             .checked_add(header.size() as u64)
1902             .ok_or(AvifError::BmffParseFailed("invalid parse offset".into()))?;
1903     }
1904     if ftyp.is_none() {
1905         return Err(AvifError::InvalidFtyp);
1906     }
1907     let ftyp = ftyp.unwrap();
1908     if (ftyp.needs_meta() && meta.is_none()) || (ftyp.needs_moov() && tracks.is_none()) {
1909         return Err(AvifError::TruncatedData);
1910     }
1911     Ok(AvifBoxes {
1912         ftyp,
1913         meta: meta.unwrap_or_default(),
1914         tracks: tracks.unwrap_or_default(),
1915     })
1916 }
1917 
peek_compatible_file_type(data: &[u8]) -> AvifResult<bool>1918 pub(crate) fn peek_compatible_file_type(data: &[u8]) -> AvifResult<bool> {
1919     let mut stream = IStream::create(data);
1920     let header = parse_header(&mut stream, /*top_level=*/ true)?;
1921     if header.box_type != "ftyp" {
1922         // Section 6.3.4 of ISO/IEC 14496-12:
1923         //   The FileTypeBox shall occur before any variable-length box.
1924         //   Only a fixed-size box such as a file signature, if required, may precede it.
1925         return Ok(false);
1926     }
1927     let header_size = match header.size {
1928         BoxSize::FixedSize(size) => size,
1929         // The 'ftyp' box goes on till the end of the file. Either there is no brand requiring
1930         // anything in the file but a FileTypebox (so not AVIF), or it is invalid.
1931         BoxSize::UntilEndOfStream => return Ok(false),
1932     };
1933     let ftyp = if header_size > stream.bytes_left()? {
1934         let mut header_stream = stream.sub_stream(&BoxSize::FixedSize(stream.bytes_left()?))?;
1935         parse_truncated_ftyp(&mut header_stream)
1936     } else {
1937         let mut header_stream = stream.sub_stream(&header.size)?;
1938         parse_ftyp(&mut header_stream)?
1939     };
1940     Ok(ftyp.is_avif())
1941 }
1942 
parse_tmap(stream: &mut IStream) -> AvifResult<Option<GainMapMetadata>>1943 pub(crate) fn parse_tmap(stream: &mut IStream) -> AvifResult<Option<GainMapMetadata>> {
1944     // Experimental, not yet specified.
1945 
1946     // unsigned int(8) version = 0;
1947     let version = stream.read_u8()?;
1948     if version != 0 {
1949         return Ok(None); // Unsupported version.
1950     }
1951     // unsigned int(16) minimum_version;
1952     let minimum_version = stream.read_u16()?;
1953     let supported_version = 0;
1954     if minimum_version > supported_version {
1955         return Ok(None); // Unsupported version.
1956     }
1957     // unsigned int(16) writer_version;
1958     let writer_version = stream.read_u16()?;
1959 
1960     let mut metadata = GainMapMetadata::default();
1961     let mut bits = stream.sub_bit_stream(1)?;
1962     // unsigned int(1) is_multichannel;
1963     let is_multichannel = bits.read_bool()?;
1964     let channel_count = if is_multichannel { 3 } else { 1 };
1965     // unsigned int(1) use_base_colour_space;
1966     metadata.use_base_color_space = bits.read_bool()?;
1967     // unsigned int(6) reserved;
1968     bits.skip(6)?;
1969 
1970     // unsigned int(32) base_hdr_headroom_numerator;
1971     // unsigned int(32) base_hdr_headroom_denominator;
1972     metadata.base_hdr_headroom = stream.read_ufraction()?;
1973     // unsigned int(32) alternate_hdr_headroom_numerator;
1974     // unsigned int(32) alternate_hdr_headroom_denominator;
1975     metadata.alternate_hdr_headroom = stream.read_ufraction()?;
1976     for i in 0..channel_count {
1977         // int(32) gain_map_min_numerator;
1978         // unsigned int(32) gain_map_min_denominator
1979         metadata.min[i] = stream.read_fraction()?;
1980         // int(32) gain_map_max_numerator;
1981         // unsigned int(32) gain_map_max_denominator;
1982         metadata.max[i] = stream.read_fraction()?;
1983         // unsigned int(32) gamma_numerator;
1984         // unsigned int(32) gamma_denominator;
1985         metadata.gamma[i] = stream.read_ufraction()?;
1986         // int(32) base_offset_numerator;
1987         // unsigned int(32) base_offset_denominator;
1988         metadata.base_offset[i] = stream.read_fraction()?;
1989         // int(32) alternate_offset_numerator;
1990         // unsigned int(32) alternate_offset_denominator;
1991         metadata.alternate_offset[i] = stream.read_fraction()?;
1992     }
1993 
1994     // Fill the remaining values by copying those from the first channel.
1995     for i in channel_count..3 {
1996         metadata.min[i] = metadata.min[0];
1997         metadata.max[i] = metadata.max[0];
1998         metadata.gamma[i] = metadata.gamma[0];
1999         metadata.base_offset[i] = metadata.base_offset[0];
2000         metadata.alternate_offset[i] = metadata.alternate_offset[0];
2001     }
2002     if writer_version <= supported_version && stream.has_bytes_left()? {
2003         return Err(AvifError::InvalidToneMappedImage(
2004             "invalid trailing bytes in tmap box".into(),
2005         ));
2006     }
2007     metadata.is_valid()?;
2008     Ok(Some(metadata))
2009 }
2010 
2011 #[cfg(test)]
2012 mod tests {
2013     use crate::parser::mp4box;
2014     use crate::AvifResult;
2015 
2016     #[test]
peek_compatible_file_type() -> AvifResult<()>2017     fn peek_compatible_file_type() -> AvifResult<()> {
2018         let buf = [
2019             0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, //
2020             0x61, 0x76, 0x69, 0x66, 0x00, 0x00, 0x00, 0x00, //
2021             0x61, 0x76, 0x69, 0x66, 0x6d, 0x69, 0x66, 0x31, //
2022             0x6d, 0x69, 0x61, 0x66, 0x4d, 0x41, 0x31, 0x41, //
2023             0x00, 0x00, 0x00, 0xf2, 0x6d, 0x65, 0x74, 0x61, //
2024             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, //
2025         ];
2026         // Peeking should succeed starting from byte length 12. Since that is the end offset of the
2027         // first valid AVIF brand.
2028         let min_required_bytes = 12;
2029         for i in 0..buf.len() {
2030             let res = mp4box::peek_compatible_file_type(&buf[..i]);
2031             if i < min_required_bytes {
2032                 // Not enough bytes. The return should either be an error or false.
2033                 assert!(res.is_err() || !res.unwrap());
2034             } else {
2035                 assert!(res?);
2036             }
2037         }
2038         Ok(())
2039     }
2040 }
2041