• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "media/base/container_names.h"
6 
7 #include <cctype>
8 #include <limits>
9 
10 #include "base/basictypes.h"
11 #include "base/logging.h"
12 #include "media/base/bit_reader.h"
13 
14 namespace media {
15 
16 namespace container_names {
17 
18 #define TAG(a, b, c, d) \
19     ((static_cast<uint32>(static_cast<uint8>(a)) << 24) | \
20      (static_cast<uint32>(static_cast<uint8>(b)) << 16) | \
21      (static_cast<uint32>(static_cast<uint8>(c)) << 8) | \
22      (static_cast<uint32>(static_cast<uint8>(d))))
23 
24 #define RCHECK(x)     \
25     do {              \
26       if (!(x))       \
27         return false; \
28     } while (0)
29 
30 #define UTF8_BYTE_ORDER_MARK "\xef\xbb\xbf"
31 
32 // Helper function to read 2 bytes (16 bits, big endian) from a buffer.
Read16(const uint8 * p)33 static int Read16(const uint8* p) {
34   return p[0] << 8 | p[1];
35 }
36 
37 // Helper function to read 3 bytes (24 bits, big endian) from a buffer.
Read24(const uint8 * p)38 static uint32 Read24(const uint8* p) {
39   return p[0] << 16 | p[1] << 8 | p[2];
40 }
41 
42 // Helper function to read 4 bytes (32 bits, big endian) from a buffer.
Read32(const uint8 * p)43 static uint32 Read32(const uint8* p) {
44   return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
45 }
46 
47 // Helper function to read 4 bytes (32 bits, little endian) from a buffer.
Read32LE(const uint8 * p)48 static uint32 Read32LE(const uint8* p) {
49   return p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
50 }
51 
52 // Helper function to do buffer comparisons with a string without going off the
53 // end of the buffer.
StartsWith(const uint8 * buffer,size_t buffer_size,const char * prefix)54 static bool StartsWith(const uint8* buffer,
55                        size_t buffer_size,
56                        const char* prefix) {
57   size_t prefix_size = strlen(prefix);
58   return (prefix_size <= buffer_size &&
59           memcmp(buffer, prefix, prefix_size) == 0);
60 }
61 
62 // Helper function to do buffer comparisons with another buffer (to allow for
63 // embedded \0 in the comparison) without going off the end of the buffer.
StartsWith(const uint8 * buffer,size_t buffer_size,const uint8 * prefix,size_t prefix_size)64 static bool StartsWith(const uint8* buffer,
65                        size_t buffer_size,
66                        const uint8* prefix,
67                        size_t prefix_size) {
68   return (prefix_size <= buffer_size &&
69           memcmp(buffer, prefix, prefix_size) == 0);
70 }
71 
72 // Helper function to read up to 64 bits from a bit stream.
ReadBits(BitReader * reader,int num_bits)73 static uint64 ReadBits(BitReader* reader, int num_bits) {
74   DCHECK_GE(reader->bits_available(), num_bits);
75   DCHECK((num_bits > 0) && (num_bits <= 64));
76   uint64 value;
77   reader->ReadBits(num_bits, &value);
78   return value;
79 }
80 
81 const int kAc3FrameSizeTable[38][3] = {
82   { 128, 138, 192 }, { 128, 140, 192 }, { 160, 174, 240 }, { 160, 176, 240 },
83   { 192, 208, 288 }, { 192, 210, 288 }, { 224, 242, 336 }, { 224, 244, 336 },
84   { 256, 278, 384 }, { 256, 280, 384 }, { 320, 348, 480 }, { 320, 350, 480 },
85   { 384, 416, 576 }, { 384, 418, 576 }, { 448, 486, 672 }, { 448, 488, 672 },
86   { 512, 556, 768 }, { 512, 558, 768 }, { 640, 696, 960 }, { 640, 698, 960 },
87   { 768, 834, 1152 }, { 768, 836, 1152 }, { 896, 974, 1344 },
88   { 896, 976, 1344 }, { 1024, 1114, 1536 }, { 1024, 1116, 1536 },
89   { 1280, 1392, 1920 }, { 1280, 1394, 1920 }, { 1536, 1670, 2304 },
90   { 1536, 1672, 2304 }, { 1792, 1950, 2688 }, { 1792, 1952, 2688 },
91   { 2048, 2228, 3072 }, { 2048, 2230, 3072 }, { 2304, 2506, 3456 },
92   { 2304, 2508, 3456 }, { 2560, 2768, 3840 }, { 2560, 2770, 3840 }
93 };
94 
95 // Checks for an ADTS AAC container.
CheckAac(const uint8 * buffer,int buffer_size)96 static bool CheckAac(const uint8* buffer, int buffer_size) {
97   // Audio Data Transport Stream (ADTS) header is 7 or 9 bytes
98   // (from http://wiki.multimedia.cx/index.php?title=ADTS)
99   RCHECK(buffer_size > 6);
100 
101   int offset = 0;
102   while (offset + 6 < buffer_size) {
103     BitReader reader(buffer + offset, 6);
104 
105     // Syncword must be 0xfff.
106     RCHECK(ReadBits(&reader, 12) == 0xfff);
107 
108     // Skip MPEG version.
109     reader.SkipBits(1);
110 
111     // Layer is always 0.
112     RCHECK(ReadBits(&reader, 2) == 0);
113 
114     // Skip protection + profile.
115     reader.SkipBits(1 + 2);
116 
117     // Check sampling frequency index.
118     RCHECK(ReadBits(&reader, 4) != 15);  // Forbidden.
119 
120     // Skip private stream, channel configuration, originality, home,
121     // copyrighted stream, and copyright_start.
122     reader.SkipBits(1 + 3 + 1 + 1 + 1 + 1);
123 
124     // Get frame length (includes header).
125     int size = ReadBits(&reader, 13);
126     RCHECK(size > 0);
127     offset += size;
128   }
129   return true;
130 }
131 
132 const uint16 kAc3SyncWord = 0x0b77;
133 
134 // Checks for an AC3 container.
CheckAc3(const uint8 * buffer,int buffer_size)135 static bool CheckAc3(const uint8* buffer, int buffer_size) {
136   // Reference: ATSC Standard: Digital Audio Compression (AC-3, E-AC-3)
137   //            Doc. A/52:2012
138   // (http://www.atsc.org/cms/standards/A52-2012(12-17).pdf)
139 
140   // AC3 container looks like syncinfo | bsi | audblk * 6 | aux | check.
141   RCHECK(buffer_size > 6);
142 
143   int offset = 0;
144   while (offset + 6 < buffer_size) {
145     BitReader reader(buffer + offset, 6);
146 
147     // Check syncinfo.
148     RCHECK(ReadBits(&reader, 16) == kAc3SyncWord);
149 
150     // Skip crc1.
151     reader.SkipBits(16);
152 
153     // Verify fscod.
154     int sample_rate_code = ReadBits(&reader, 2);
155     RCHECK(sample_rate_code != 3);  // Reserved.
156 
157     // Verify frmsizecod.
158     int frame_size_code = ReadBits(&reader, 6);
159     RCHECK(frame_size_code < 38);  // Undefined.
160 
161     // Verify bsid.
162     RCHECK(ReadBits(&reader, 5) < 10);  // Normally 8 or 6, 16 used by EAC3.
163 
164     offset += kAc3FrameSizeTable[frame_size_code][sample_rate_code];
165   }
166   return true;
167 }
168 
169 // Checks for an EAC3 container (very similar to AC3)
CheckEac3(const uint8 * buffer,int buffer_size)170 static bool CheckEac3(const uint8* buffer, int buffer_size) {
171   // Reference: ATSC Standard: Digital Audio Compression (AC-3, E-AC-3)
172   //            Doc. A/52:2012
173   // (http://www.atsc.org/cms/standards/A52-2012(12-17).pdf)
174 
175   // EAC3 container looks like syncinfo | bsi | audfrm | audblk* | aux | check.
176   RCHECK(buffer_size > 6);
177 
178   int offset = 0;
179   while (offset + 6 < buffer_size) {
180     BitReader reader(buffer + offset, 6);
181 
182     // Check syncinfo.
183     RCHECK(ReadBits(&reader, 16) == kAc3SyncWord);
184 
185     // Verify strmtyp.
186     RCHECK(ReadBits(&reader, 2) != 3);
187 
188     // Skip substreamid.
189     reader.SkipBits(3);
190 
191     // Get frmsize. Include syncinfo size and convert to bytes.
192     int frame_size = (ReadBits(&reader, 11) + 1) * 2;
193     RCHECK(frame_size >= 7);
194 
195     // Skip fscod, fscod2, acmod, and lfeon.
196     reader.SkipBits(2 + 2 + 3 + 1);
197 
198     // Verify bsid.
199     int bit_stream_id = ReadBits(&reader, 5);
200     RCHECK(bit_stream_id >= 11 && bit_stream_id <= 16);
201 
202     offset += frame_size;
203   }
204   return true;
205 }
206 
207 // Additional checks for a BINK container.
CheckBink(const uint8 * buffer,int buffer_size)208 static bool CheckBink(const uint8* buffer, int buffer_size) {
209   // Reference: http://wiki.multimedia.cx/index.php?title=Bink_Container
210   RCHECK(buffer_size >= 44);
211 
212   // Verify number of frames specified.
213   RCHECK(Read32LE(buffer + 8) > 0);
214 
215   // Verify width in range.
216   int width = Read32LE(buffer + 20);
217   RCHECK(width > 0 && width <= 32767);
218 
219   // Verify height in range.
220   int height = Read32LE(buffer + 24);
221   RCHECK(height > 0 && height <= 32767);
222 
223   // Verify frames per second specified.
224   RCHECK(Read32LE(buffer + 28) > 0);
225 
226   // Verify video frames per second specified.
227   RCHECK(Read32LE(buffer + 32) > 0);
228 
229   // Number of audio tracks must be 256 or less.
230   return (Read32LE(buffer + 40) <= 256);
231 }
232 
233 // Additional checks for a CAF container.
CheckCaf(const uint8 * buffer,int buffer_size)234 static bool CheckCaf(const uint8* buffer, int buffer_size) {
235   // Reference: Apple Core Audio Format Specification 1.0
236   // (https://developer.apple.com/library/mac/#documentation/MusicAudio/Reference/CAFSpec/CAF_spec/CAF_spec.html)
237   RCHECK(buffer_size >= 52);
238   BitReader reader(buffer, buffer_size);
239 
240   // mFileType should be "caff".
241   RCHECK(ReadBits(&reader, 32) == TAG('c', 'a', 'f', 'f'));
242 
243   // mFileVersion should be 1.
244   RCHECK(ReadBits(&reader, 16) == 1);
245 
246   // Skip mFileFlags.
247   reader.SkipBits(16);
248 
249   // First chunk should be Audio Description chunk, size 32l.
250   RCHECK(ReadBits(&reader, 32) == TAG('d', 'e', 's', 'c'));
251   RCHECK(ReadBits(&reader, 64) == 32);
252 
253   // CAFAudioFormat.mSampleRate(float64) not 0
254   RCHECK(ReadBits(&reader, 64) != 0);
255 
256   // CAFAudioFormat.mFormatID not 0
257   RCHECK(ReadBits(&reader, 32) != 0);
258 
259   // Skip CAFAudioFormat.mBytesPerPacket and mFramesPerPacket.
260   reader.SkipBits(32 + 32);
261 
262   // CAFAudioFormat.mChannelsPerFrame not 0
263   RCHECK(ReadBits(&reader, 32) != 0);
264   return true;
265 }
266 
267 static bool kSamplingFrequencyValid[16] = { false, true, true, true, false,
268                                             false, true, true, true, false,
269                                             false, true, true, true, false,
270                                             false };
271 static bool kExtAudioIdValid[8] = { true, false, true, false, false, false,
272                                     true, false };
273 
274 // Additional checks for a DTS container.
CheckDts(const uint8 * buffer,int buffer_size)275 static bool CheckDts(const uint8* buffer, int buffer_size) {
276   // Reference: ETSI TS 102 114 V1.3.1 (2011-08)
277   // (http://www.etsi.org/deliver/etsi_ts/102100_102199/102114/01.03.01_60/ts_102114v010301p.pdf)
278   RCHECK(buffer_size > 11);
279 
280   int offset = 0;
281   while (offset + 11 < buffer_size) {
282     BitReader reader(buffer + offset, 11);
283 
284     // Verify sync word.
285     RCHECK(ReadBits(&reader, 32) == 0x7ffe8001);
286 
287     // Skip frame type and deficit sample count.
288     reader.SkipBits(1 + 5);
289 
290     // Verify CRC present flag.
291     RCHECK(ReadBits(&reader, 1) == 0);  // CPF must be 0.
292 
293     // Verify number of PCM sample blocks.
294     RCHECK(ReadBits(&reader, 7) >= 5);
295 
296     // Verify primary frame byte size.
297     int frame_size = ReadBits(&reader, 14);
298     RCHECK(frame_size >= 95);
299 
300     // Skip audio channel arrangement.
301     reader.SkipBits(6);
302 
303     // Verify core audio sampling frequency is an allowed value.
304     RCHECK(kSamplingFrequencyValid[ReadBits(&reader, 4)]);
305 
306     // Verify transmission bit rate is valid.
307     RCHECK(ReadBits(&reader, 5) <= 25);
308 
309     // Verify reserved field is 0.
310     RCHECK(ReadBits(&reader, 1) == 0);
311 
312     // Skip dynamic range flag, time stamp flag, auxiliary data flag, and HDCD.
313     reader.SkipBits(1 + 1 + 1 + 1);
314 
315     // Verify extension audio descriptor flag is an allowed value.
316     RCHECK(kExtAudioIdValid[ReadBits(&reader, 3)]);
317 
318     // Skip extended coding flag and audio sync word insertion flag.
319     reader.SkipBits(1 + 1);
320 
321     // Verify low frequency effects flag is an allowed value.
322     RCHECK(ReadBits(&reader, 2) != 3);
323 
324     offset += frame_size + 1;
325   }
326   return true;
327 }
328 
329 // Checks for a DV container.
CheckDV(const uint8 * buffer,int buffer_size)330 static bool CheckDV(const uint8* buffer, int buffer_size) {
331   // Reference: SMPTE 314M (Annex A has differences with IEC 61834).
332   // (http://standards.smpte.org/content/978-1-61482-454-1/st-314-2005/SEC1.body.pdf)
333   RCHECK(buffer_size > 11);
334 
335   int offset = 0;
336   int current_sequence_number = -1;
337   int last_block_number[6];
338   while (offset + 11 < buffer_size) {
339     BitReader reader(buffer + offset, 11);
340 
341     // Decode ID data. Sections 5, 6, and 7 are reserved.
342     int section = ReadBits(&reader, 3);
343     RCHECK(section < 5);
344 
345     // Next bit must be 1.
346     RCHECK(ReadBits(&reader, 1) == 1);
347 
348     // Skip arbitrary bits.
349     reader.SkipBits(4);
350 
351     int sequence_number = ReadBits(&reader, 4);
352 
353     // Skip FSC.
354     reader.SkipBits(1);
355 
356     // Next 3 bits must be 1.
357     RCHECK(ReadBits(&reader, 3) == 7);
358 
359     int block_number = ReadBits(&reader, 8);
360 
361     if (section == 0) {  // Header.
362       // Validate the reserved bits in the next 8 bytes.
363       reader.SkipBits(1);
364       RCHECK(ReadBits(&reader, 1) == 0);
365       RCHECK(ReadBits(&reader, 11) == 0x7ff);
366       reader.SkipBits(4);
367       RCHECK(ReadBits(&reader, 4) == 0xf);
368       reader.SkipBits(4);
369       RCHECK(ReadBits(&reader, 4) == 0xf);
370       reader.SkipBits(4);
371       RCHECK(ReadBits(&reader, 4) == 0xf);
372       reader.SkipBits(3);
373       RCHECK(ReadBits(&reader, 24) == 0xffffff);
374       current_sequence_number = sequence_number;
375       for (size_t i = 0; i < arraysize(last_block_number); ++i)
376         last_block_number[i] = -1;
377     } else {
378       // Sequence number must match (this will also fail if no header seen).
379       RCHECK(sequence_number == current_sequence_number);
380       // Block number should be increasing.
381       RCHECK(block_number > last_block_number[section]);
382       last_block_number[section] = block_number;
383     }
384 
385     // Move to next block.
386     offset += 80;
387   }
388   return true;
389 }
390 
391 
392 // Checks for a GSM container.
CheckGsm(const uint8 * buffer,int buffer_size)393 static bool CheckGsm(const uint8* buffer, int buffer_size) {
394   // Reference: ETSI EN 300 961 V8.1.1
395   // (http://www.etsi.org/deliver/etsi_en/300900_300999/300961/08.01.01_60/en_300961v080101p.pdf)
396   // also http://tools.ietf.org/html/rfc3551#page-24
397   // GSM files have a 33 byte block, only first 4 bits are fixed.
398   RCHECK(buffer_size >= 1024);  // Need enough data to do a decent check.
399 
400   int offset = 0;
401   while (offset < buffer_size) {
402     // First 4 bits of each block are xD.
403     RCHECK((buffer[offset] & 0xf0) == 0xd0);
404     offset += 33;
405   }
406   return true;
407 }
408 
409 // Advance to the first set of |num_bits| bits that match |start_code|. |offset|
410 // is the current location in the buffer, and is updated. |bytes_needed| is the
411 // number of bytes that must remain in the buffer when |start_code| is found.
412 // Returns true if start_code found (and enough space in the buffer after it),
413 // false otherwise.
AdvanceToStartCode(const uint8 * buffer,int buffer_size,int * offset,int bytes_needed,int num_bits,uint32 start_code)414 static bool AdvanceToStartCode(const uint8* buffer,
415                                int buffer_size,
416                                int* offset,
417                                int bytes_needed,
418                                int num_bits,
419                                uint32 start_code) {
420   DCHECK_GE(bytes_needed, 3);
421   DCHECK_LE(num_bits, 24);  // Only supports up to 24 bits.
422 
423   // Create a mask to isolate |num_bits| bits, once shifted over.
424   uint32 bits_to_shift = 24 - num_bits;
425   uint32 mask = (1 << num_bits) - 1;
426   while (*offset + bytes_needed < buffer_size) {
427     uint32 next = Read24(buffer + *offset);
428     if (((next >> bits_to_shift) & mask) == start_code)
429       return true;
430     ++(*offset);
431   }
432   return false;
433 }
434 
435 // Checks for an H.261 container.
CheckH261(const uint8 * buffer,int buffer_size)436 static bool CheckH261(const uint8* buffer, int buffer_size) {
437   // Reference: ITU-T Recommendation H.261 (03/1993)
438   // (http://www.itu.int/rec/T-REC-H.261-199303-I/en)
439   RCHECK(buffer_size > 16);
440 
441   int offset = 0;
442   bool seen_start_code = false;
443   while (true) {
444     // Advance to picture_start_code, if there is one.
445     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 4, 20, 0x10)) {
446       // No start code found (or off end of buffer), so success if
447       // there was at least one valid header.
448       return seen_start_code;
449     }
450 
451     // Now verify the block. AdvanceToStartCode() made sure that there are
452     // at least 4 bytes remaining in the buffer.
453     BitReader reader(buffer + offset, buffer_size - offset);
454     RCHECK(ReadBits(&reader, 20) == 0x10);
455 
456     // Skip the temporal reference and PTYPE.
457     reader.SkipBits(5 + 6);
458 
459     // Skip any extra insertion information. Since this is open-ended, if we run
460     // out of bits assume that the buffer is correctly formatted.
461     int extra = ReadBits(&reader, 1);
462     while (extra == 1) {
463       if (!reader.SkipBits(8))
464         return seen_start_code;
465       if (!reader.ReadBits(1, &extra))
466         return seen_start_code;
467     }
468 
469     // Next should be a Group of Blocks start code. Again, if we run out of
470     // bits, then assume that the buffer up to here is correct, and the buffer
471     // just happened to end in the middle of a header.
472     int next;
473     if (!reader.ReadBits(16, &next))
474       return seen_start_code;
475     RCHECK(next == 1);
476 
477     // Move to the next block.
478     seen_start_code = true;
479     offset += 4;
480   }
481 }
482 
483 // Checks for an H.263 container.
CheckH263(const uint8 * buffer,int buffer_size)484 static bool CheckH263(const uint8* buffer, int buffer_size) {
485   // Reference: ITU-T Recommendation H.263 (01/2005)
486   // (http://www.itu.int/rec/T-REC-H.263-200501-I/en)
487   // header is PSC(22b) + TR(8b) + PTYPE(8+b).
488   RCHECK(buffer_size > 16);
489 
490   int offset = 0;
491   bool seen_start_code = false;
492   while (true) {
493     // Advance to picture_start_code, if there is one.
494     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 9, 22, 0x20)) {
495       // No start code found (or off end of buffer), so success if
496       // there was at least one valid header.
497       return seen_start_code;
498     }
499 
500     // Now verify the block. AdvanceToStartCode() made sure that there are
501     // at least 9 bytes remaining in the buffer.
502     BitReader reader(buffer + offset, 9);
503     RCHECK(ReadBits(&reader, 22) == 0x20);
504 
505     // Skip the temporal reference.
506     reader.SkipBits(8);
507 
508     // Verify that the first 2 bits of PTYPE are 10b.
509     RCHECK(ReadBits(&reader, 2) == 2);
510 
511     // Skip the split screen indicator, document camera indicator, and full
512     // picture freeze release.
513     reader.SkipBits(1 + 1 + 1);
514 
515     // Verify Source Format.
516     int format = ReadBits(&reader, 3);
517     RCHECK(format != 0 && format != 6);  // Forbidden or reserved.
518 
519     if (format == 7) {
520       // Verify full extended PTYPE.
521       int ufep = ReadBits(&reader, 3);
522       if (ufep == 1) {
523         // Verify the optional part of PLUSPTYPE.
524         format = ReadBits(&reader, 3);
525         RCHECK(format != 0 && format != 7);  // Reserved.
526         reader.SkipBits(11);
527         // Next 4 bits should be b1000.
528         RCHECK(ReadBits(&reader, 4) == 8);  // Not allowed.
529       } else {
530         RCHECK(ufep == 0);  // Only 0 and 1 allowed.
531       }
532 
533       // Verify picture type code is not a reserved value.
534       int picture_type_code = ReadBits(&reader, 3);
535       RCHECK(picture_type_code != 6 && picture_type_code != 7);  // Reserved.
536 
537       // Skip picture resampling mode, reduced resolution mode,
538       // and rounding type.
539       reader.SkipBits(1 + 1 + 1);
540 
541       // Next 3 bits should be b001.
542       RCHECK(ReadBits(&reader, 3) == 1);  // Not allowed.
543     }
544 
545     // Move to the next block.
546     seen_start_code = true;
547     offset += 9;
548   }
549 }
550 
551 // Checks for an H.264 container.
CheckH264(const uint8 * buffer,int buffer_size)552 static bool CheckH264(const uint8* buffer, int buffer_size) {
553   // Reference: ITU-T Recommendation H.264 (01/2012)
554   // (http://www.itu.int/rec/T-REC-H.264)
555   // Section B.1: Byte stream NAL unit syntax and semantics.
556   RCHECK(buffer_size > 4);
557 
558   int offset = 0;
559   int parameter_count = 0;
560   while (true) {
561     // Advance to picture_start_code, if there is one.
562     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 4, 24, 1)) {
563       // No start code found (or off end of buffer), so success if
564       // there was at least one valid header.
565       return parameter_count > 0;
566     }
567 
568     // Now verify the block. AdvanceToStartCode() made sure that there are
569     // at least 4 bytes remaining in the buffer.
570     BitReader reader(buffer + offset, 4);
571     RCHECK(ReadBits(&reader, 24) == 1);
572 
573     // Verify forbidden_zero_bit.
574     RCHECK(ReadBits(&reader, 1) == 0);
575 
576     // Extract nal_ref_idc and nal_unit_type.
577     int nal_ref_idc = ReadBits(&reader, 2);
578     int nal_unit_type = ReadBits(&reader, 5);
579 
580     switch (nal_unit_type) {
581       case 5:  // Coded slice of an IDR picture.
582         RCHECK(nal_ref_idc != 0);
583         break;
584       case 6:   // Supplemental enhancement information (SEI).
585       case 9:   // Access unit delimiter.
586       case 10:  // End of sequence.
587       case 11:  // End of stream.
588       case 12:  // Filler data.
589         RCHECK(nal_ref_idc == 0);
590         break;
591       case 7:  // Sequence parameter set.
592       case 8:  // Picture parameter set.
593         ++parameter_count;
594         break;
595     }
596 
597     // Skip the current start_code_prefix and move to the next.
598     offset += 4;
599   }
600 }
601 
602 static const char kHlsSignature[] = "#EXTM3U";
603 static const char kHls1[] = "#EXT-X-STREAM-INF:";
604 static const char kHls2[] = "#EXT-X-TARGETDURATION:";
605 static const char kHls3[] = "#EXT-X-MEDIA-SEQUENCE:";
606 
607 // Additional checks for a HLS container.
CheckHls(const uint8 * buffer,int buffer_size)608 static bool CheckHls(const uint8* buffer, int buffer_size) {
609   // HLS is simply a play list used for Apple HTTP Live Streaming.
610   // Reference: Apple HTTP Live Streaming Overview
611   // (http://goo.gl/MIwxj)
612 
613   if (StartsWith(buffer, buffer_size, kHlsSignature)) {
614     // Need to find "#EXT-X-STREAM-INF:", "#EXT-X-TARGETDURATION:", or
615     // "#EXT-X-MEDIA-SEQUENCE:" somewhere in the buffer. Other playlists (like
616     // WinAmp) only have additional lines with #EXTINF
617     // (http://en.wikipedia.org/wiki/M3U).
618     int offset = strlen(kHlsSignature);
619     while (offset < buffer_size) {
620       if (buffer[offset] == '#') {
621         if (StartsWith(buffer + offset, buffer_size - offset, kHls1) ||
622             StartsWith(buffer + offset, buffer_size - offset, kHls2) ||
623             StartsWith(buffer + offset, buffer_size - offset, kHls3)) {
624           return true;
625         }
626       }
627       ++offset;
628     }
629   }
630   return false;
631 }
632 
633 // Checks for a MJPEG stream.
CheckMJpeg(const uint8 * buffer,int buffer_size)634 static bool CheckMJpeg(const uint8* buffer, int buffer_size) {
635   // Reference: ISO/IEC 10918-1 : 1993(E), Annex B
636   // (http://www.w3.org/Graphics/JPEG/itu-t81.pdf)
637   RCHECK(buffer_size >= 16);
638 
639   int offset = 0;
640   int last_restart = -1;
641   int num_codes = 0;
642   while (offset + 5 < buffer_size) {
643     // Marker codes are always a two byte code with the first byte xFF.
644     RCHECK(buffer[offset] == 0xff);
645     uint8 code = buffer[offset + 1];
646     RCHECK(code >= 0xc0 || code == 1);
647 
648     // Skip sequences of xFF.
649     if (code == 0xff) {
650       ++offset;
651       continue;
652     }
653 
654     // Success if the next marker code is EOI (end of image)
655     if (code == 0xd9)
656       return true;
657 
658     // Check remaining codes.
659     if (code == 0xd8 || code == 1) {
660       // SOI (start of image) / TEM (private use). No other data with header.
661       offset += 2;
662     } else if (code >= 0xd0 && code <= 0xd7) {
663       // RST (restart) codes must be in sequence. No other data with header.
664       int restart = code & 0x07;
665       if (last_restart >= 0)
666         RCHECK(restart == (last_restart + 1) % 8);
667       last_restart = restart;
668       offset += 2;
669     } else {
670       // All remaining marker codes are followed by a length of the header.
671       int length = Read16(buffer + offset + 2) + 2;
672 
673       // Special handling of SOS (start of scan) marker since the entropy
674       // coded data follows the SOS. Any xFF byte in the data block must be
675       // followed by x00 in the data.
676       if (code == 0xda) {
677         int number_components = buffer[offset + 4];
678         RCHECK(length == 8 + 2 * number_components);
679 
680         // Advance to the next marker.
681         offset += length;
682         while (offset + 2 < buffer_size) {
683           if (buffer[offset] == 0xff && buffer[offset + 1] != 0)
684             break;
685           ++offset;
686         }
687       } else {
688         // Skip over the marker data for the other marker codes.
689         offset += length;
690       }
691     }
692     ++num_codes;
693   }
694   return (num_codes > 1);
695 }
696 
697 enum Mpeg2StartCodes {
698   PROGRAM_END_CODE = 0xb9,
699   PACK_START_CODE = 0xba
700 };
701 
702 // Checks for a MPEG2 Program Stream.
CheckMpeg2ProgramStream(const uint8 * buffer,int buffer_size)703 static bool CheckMpeg2ProgramStream(const uint8* buffer, int buffer_size) {
704   // Reference: ISO/IEC 13818-1 : 2000 (E) / ITU-T Rec. H.222.0 (2000 E).
705   RCHECK(buffer_size > 14);
706 
707   int offset = 0;
708   while (offset + 14 < buffer_size) {
709     BitReader reader(buffer + offset, 14);
710 
711     // Must start with pack_start_code.
712     RCHECK(ReadBits(&reader, 24) == 1);
713     RCHECK(ReadBits(&reader, 8) == PACK_START_CODE);
714 
715     // Determine MPEG version (MPEG1 has b0010, while MPEG2 has b01).
716     int mpeg_version = ReadBits(&reader, 2);
717     if (mpeg_version == 0) {
718       // MPEG1, 10 byte header
719       // Validate rest of version code
720       RCHECK(ReadBits(&reader, 2) == 2);
721     } else {
722       RCHECK(mpeg_version == 1);
723     }
724 
725     // Skip system_clock_reference_base [32..30].
726     reader.SkipBits(3);
727 
728     // Verify marker bit.
729     RCHECK(ReadBits(&reader, 1) == 1);
730 
731     // Skip system_clock_reference_base [29..15].
732     reader.SkipBits(15);
733 
734     // Verify next marker bit.
735     RCHECK(ReadBits(&reader, 1) == 1);
736 
737     // Skip system_clock_reference_base [14..0].
738     reader.SkipBits(15);
739 
740     // Verify next marker bit.
741     RCHECK(ReadBits(&reader, 1) == 1);
742 
743     if (mpeg_version == 0) {
744       // Verify second marker bit.
745       RCHECK(ReadBits(&reader, 1) == 1);
746 
747       // Skip mux_rate.
748       reader.SkipBits(22);
749 
750       // Verify next marker bit.
751       RCHECK(ReadBits(&reader, 1) == 1);
752 
753       // Update offset to be after this header.
754       offset += 12;
755     } else {
756       // Must be MPEG2.
757       // Skip program_mux_rate.
758       reader.SkipBits(22);
759 
760       // Verify pair of marker bits.
761       RCHECK(ReadBits(&reader, 2) == 3);
762 
763       // Skip reserved.
764       reader.SkipBits(5);
765 
766       // Update offset to be after this header.
767       int pack_stuffing_length = ReadBits(&reader, 3);
768       offset += 14 + pack_stuffing_length;
769     }
770 
771     // Check for system headers and PES_packets.
772     while (offset + 6 < buffer_size && Read24(buffer + offset) == 1) {
773       // Next 8 bits determine stream type.
774       int stream_id = buffer[offset + 3];
775 
776       // Some stream types are reserved and shouldn't occur.
777       if (mpeg_version == 0)
778         RCHECK(stream_id != 0xbc && stream_id < 0xf0);
779       else
780         RCHECK(stream_id != 0xfc && stream_id != 0xfd && stream_id != 0xfe);
781 
782       // Some stream types are used for pack headers.
783       if (stream_id == PACK_START_CODE)  // back to outer loop.
784         break;
785       if (stream_id == PROGRAM_END_CODE)  // end of stream.
786         return true;
787 
788       int pes_length = Read16(buffer + offset + 4);
789       RCHECK(pes_length > 0);
790       offset = offset + 6 + pes_length;
791     }
792   }
793   // Success as we are off the end of the buffer and liked everything
794   // in the buffer.
795   return true;
796 }
797 
798 const uint8 kMpeg2SyncWord = 0x47;
799 
800 // Checks for a MPEG2 Transport Stream.
CheckMpeg2TransportStream(const uint8 * buffer,int buffer_size)801 static bool CheckMpeg2TransportStream(const uint8* buffer, int buffer_size) {
802   // Spec: ISO/IEC 13818-1 : 2000 (E) / ITU-T Rec. H.222.0 (2000 E).
803   // Normal packet size is 188 bytes. However, some systems add various error
804   // correction data at the end, resulting in packet of length 192/204/208
805   // (https://en.wikipedia.org/wiki/MPEG_transport_stream). Determine the
806   // length with the first packet.
807   RCHECK(buffer_size >= 250);  // Want more than 1 packet to check.
808 
809   int offset = 0;
810   int packet_length = -1;
811   while (buffer[offset] != kMpeg2SyncWord && offset < 20) {
812     // Skip over any header in the first 20 bytes.
813     ++offset;
814   }
815 
816   while (offset + 6 < buffer_size) {
817     BitReader reader(buffer + offset, 6);
818 
819     // Must start with sync byte.
820     RCHECK(ReadBits(&reader, 8) == kMpeg2SyncWord);
821 
822     // Skip transport_error_indicator, payload_unit_start_indicator, and
823     // transport_priority.
824     reader.SkipBits(1 + 1 + 1);
825 
826     // Verify the pid is not a reserved value.
827     int pid = ReadBits(&reader, 13);
828     RCHECK(pid < 3 || pid > 15);
829 
830     // Skip transport_scrambling_control.
831     reader.SkipBits(2);
832 
833     // Adaptation_field_control can not be 0.
834     int adaptation_field_control = ReadBits(&reader, 2);
835     RCHECK(adaptation_field_control != 0);
836 
837     // If there is an adaptation_field, verify it.
838     if (adaptation_field_control >= 2) {
839       // Skip continuity_counter.
840       reader.SkipBits(4);
841 
842       // Get adaptation_field_length and verify it.
843       int adaptation_field_length = ReadBits(&reader, 8);
844       if (adaptation_field_control == 2)
845         RCHECK(adaptation_field_length == 183);
846       else
847         RCHECK(adaptation_field_length <= 182);
848     }
849 
850     // Attempt to determine the packet length on the first packet.
851     if (packet_length < 0) {
852       if (buffer[offset + 188] == kMpeg2SyncWord)
853         packet_length = 188;
854       else if (buffer[offset + 192] == kMpeg2SyncWord)
855         packet_length = 192;
856       else if (buffer[offset + 204] == kMpeg2SyncWord)
857         packet_length = 204;
858       else
859         packet_length = 208;
860     }
861     offset += packet_length;
862   }
863   return true;
864 }
865 
866 enum Mpeg4StartCodes {
867   VISUAL_OBJECT_SEQUENCE_START_CODE = 0xb0,
868   VISUAL_OBJECT_SEQUENCE_END_CODE = 0xb1,
869   VISUAL_OBJECT_START_CODE = 0xb5,
870   VOP_START_CODE = 0xb6
871 };
872 
873 // Checks for a raw MPEG4 bitstream container.
CheckMpeg4BitStream(const uint8 * buffer,int buffer_size)874 static bool CheckMpeg4BitStream(const uint8* buffer, int buffer_size) {
875   // Defined in ISO/IEC 14496-2:2001.
876   // However, no length ... simply scan for start code values.
877   // Note tags are very similar to H.264.
878   RCHECK(buffer_size > 4);
879 
880   int offset = 0;
881   int sequence_start_count = 0;
882   int sequence_end_count = 0;
883   int visual_object_count = 0;
884   int vop_count = 0;
885   while (true) {
886     // Advance to start_code, if there is one.
887     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 6, 24, 1)) {
888       // Not a complete sequence in memory, so return true if we've seen a
889       // visual_object_sequence_start_code and a visual_object_start_code.
890       return (sequence_start_count > 0 && visual_object_count > 0);
891     }
892 
893     // Now verify the block. AdvanceToStartCode() made sure that there are
894     // at least 6 bytes remaining in the buffer.
895     BitReader reader(buffer + offset, 6);
896     RCHECK(ReadBits(&reader, 24) == 1);
897 
898     int start_code = ReadBits(&reader, 8);
899     RCHECK(start_code < 0x30 || start_code > 0xaf);  // 30..AF and
900     RCHECK(start_code < 0xb7 || start_code > 0xb9);  // B7..B9 reserved
901 
902     switch (start_code) {
903       case VISUAL_OBJECT_SEQUENCE_START_CODE: {
904         ++sequence_start_count;
905         // Verify profile in not one of many reserved values.
906         int profile = ReadBits(&reader, 8);
907         RCHECK(profile > 0);
908         RCHECK(profile < 0x04 || profile > 0x10);
909         RCHECK(profile < 0x13 || profile > 0x20);
910         RCHECK(profile < 0x23 || profile > 0x31);
911         RCHECK(profile < 0x35 || profile > 0x41);
912         RCHECK(profile < 0x43 || profile > 0x60);
913         RCHECK(profile < 0x65 || profile > 0x70);
914         RCHECK(profile < 0x73 || profile > 0x80);
915         RCHECK(profile < 0x83 || profile > 0x90);
916         RCHECK(profile < 0x95 || profile > 0xa0);
917         RCHECK(profile < 0xa4 || profile > 0xb0);
918         RCHECK(profile < 0xb5 || profile > 0xc0);
919         RCHECK(profile < 0xc3 || profile > 0xd0);
920         RCHECK(profile < 0xe4);
921         break;
922       }
923 
924       case VISUAL_OBJECT_SEQUENCE_END_CODE:
925         RCHECK(++sequence_end_count == sequence_start_count);
926         break;
927 
928       case VISUAL_OBJECT_START_CODE: {
929         ++visual_object_count;
930         if (ReadBits(&reader, 1) == 1) {
931           int visual_object_verid = ReadBits(&reader, 4);
932           RCHECK(visual_object_verid > 0 && visual_object_verid < 3);
933           RCHECK(ReadBits(&reader, 3) != 0);
934         }
935         int visual_object_type = ReadBits(&reader, 4);
936         RCHECK(visual_object_type > 0 && visual_object_type < 6);
937         break;
938       }
939 
940       case VOP_START_CODE:
941         RCHECK(++vop_count <= visual_object_count);
942         break;
943     }
944     // Skip this block.
945     offset += 6;
946   }
947 }
948 
949 // Additional checks for a MOV/QuickTime/MPEG4 container.
CheckMov(const uint8 * buffer,int buffer_size)950 static bool CheckMov(const uint8* buffer, int buffer_size) {
951   // Reference: ISO/IEC 14496-12:2005(E).
952   // (http://standards.iso.org/ittf/PubliclyAvailableStandards/c061988_ISO_IEC_14496-12_2012.zip)
953   RCHECK(buffer_size > 8);
954 
955   int offset = 0;
956   while (offset + 8 < buffer_size) {
957     uint32 atomsize = Read32(buffer + offset);
958     uint32 atomtype = Read32(buffer + offset + 4);
959     // Only need to check for ones that are valid at the top level.
960     switch (atomtype) {
961       case TAG('f','t','y','p'):
962       case TAG('p','d','i','n'):
963       case TAG('m','o','o','v'):
964       case TAG('m','o','o','f'):
965       case TAG('m','f','r','a'):
966       case TAG('m','d','a','t'):
967       case TAG('f','r','e','e'):
968       case TAG('s','k','i','p'):
969       case TAG('m','e','t','a'):
970       case TAG('m','e','c','o'):
971       case TAG('s','t','y','p'):
972       case TAG('s','i','d','x'):
973       case TAG('s','s','i','x'):
974       case TAG('p','r','f','t'):
975       case TAG('b','l','o','c'):
976         break;
977       default:
978         return false;
979     }
980     if (atomsize == 1) {
981       // Indicates that the length is the next 64bits.
982       if (offset + 16 > buffer_size)
983         break;
984       if (Read32(buffer + offset + 8) != 0)
985         break;  // Offset is way past buffer size.
986       atomsize = Read32(buffer + offset + 12);
987     }
988     if (atomsize == 0 || atomsize > static_cast<size_t>(buffer_size))
989       break;  // Indicates the last atom or length too big.
990     offset += atomsize;
991   }
992   return true;
993 }
994 
995 enum MPEGVersion {
996   VERSION_25 = 0,
997   VERSION_RESERVED,
998   VERSION_2,
999   VERSION_1
1000 };
1001 enum MPEGLayer {
1002   L_RESERVED = 0,
1003   LAYER_3,
1004   LAYER_2,
1005   LAYER_1
1006 };
1007 
1008 static int kSampleRateTable[4][4] = { { 11025, 12000, 8000, 0 },   // v2.5
1009                                       { 0, 0, 0, 0 },              // not used
1010                                       { 22050, 24000, 16000, 0 },  // v2
1011                                       { 44100, 48000, 32000, 0 }   // v1
1012 };
1013 
1014 static int kBitRateTableV1L1[16] = { 0, 32, 64, 96, 128, 160, 192, 224, 256,
1015                                      288, 320, 352, 384, 416, 448, 0 };
1016 static int kBitRateTableV1L2[16] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160,
1017                                      192, 224, 256, 320, 384, 0 };
1018 static int kBitRateTableV1L3[16] = { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128,
1019                                      160, 192, 224, 256, 320, 0 };
1020 static int kBitRateTableV2L1[16] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144,
1021                                      160, 176, 192, 224, 256, 0 };
1022 static int kBitRateTableV2L23[16] = { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96,
1023                                       112, 128, 144, 160, 0 };
1024 
ValidMpegAudioFrameHeader(const uint8 * header,int header_size,int * framesize)1025 static bool ValidMpegAudioFrameHeader(const uint8* header,
1026                                       int header_size,
1027                                       int* framesize) {
1028   // Reference: http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm.
1029   DCHECK_GE(header_size, 4);
1030   *framesize = 0;
1031   BitReader reader(header, 4);  // Header can only be 4 bytes long.
1032 
1033   // Verify frame sync (11 bits) are all set.
1034   RCHECK(ReadBits(&reader, 11) == 0x7ff);
1035 
1036   // Verify MPEG audio version id.
1037   int version = ReadBits(&reader, 2);
1038   RCHECK(version != 1);  // Reserved.
1039 
1040   // Verify layer.
1041   int layer = ReadBits(&reader, 2);
1042   RCHECK(layer != 0);
1043 
1044   // Skip protection bit.
1045   reader.SkipBits(1);
1046 
1047   // Verify bitrate index.
1048   int bitrate_index = ReadBits(&reader, 4);
1049   RCHECK(bitrate_index != 0xf);
1050 
1051   // Verify sampling rate frequency index.
1052   int sampling_index = ReadBits(&reader, 2);
1053   RCHECK(sampling_index != 3);
1054 
1055   // Get padding bit.
1056   int padding = ReadBits(&reader, 1);
1057 
1058   // Frame size:
1059   // For Layer I files = (12 * BitRate / SampleRate + Padding) * 4
1060   // For others = 144 * BitRate / SampleRate + Padding
1061   // Unfortunately, BitRate and SampleRate are coded.
1062   int sampling_rate = kSampleRateTable[version][sampling_index];
1063   int bitrate;
1064   if (version == VERSION_1) {
1065     if (layer == LAYER_1)
1066       bitrate = kBitRateTableV1L1[bitrate_index];
1067     else if (layer == LAYER_2)
1068       bitrate = kBitRateTableV1L2[bitrate_index];
1069     else
1070       bitrate = kBitRateTableV1L3[bitrate_index];
1071   } else {
1072     if (layer == LAYER_1)
1073       bitrate = kBitRateTableV2L1[bitrate_index];
1074     else
1075       bitrate = kBitRateTableV2L23[bitrate_index];
1076   }
1077   if (layer == LAYER_1)
1078     *framesize = ((12000 * bitrate) / sampling_rate + padding) * 4;
1079   else
1080     *framesize = (144000 * bitrate) / sampling_rate + padding;
1081   return (bitrate > 0 && sampling_rate > 0);
1082 }
1083 
1084 // Extract a size encoded the MP3 way.
GetMp3HeaderSize(const uint8 * buffer,int buffer_size)1085 static int GetMp3HeaderSize(const uint8* buffer, int buffer_size) {
1086   DCHECK_GE(buffer_size, 9);
1087   int size = ((buffer[6] & 0x7f) << 21) + ((buffer[7] & 0x7f) << 14) +
1088              ((buffer[8] & 0x7f) << 7) + (buffer[9] & 0x7f) + 10;
1089   if (buffer[5] & 0x10)  // Footer added?
1090     size += 10;
1091   return size;
1092 }
1093 
1094 // Additional checks for a MP3 container.
CheckMp3(const uint8 * buffer,int buffer_size,bool seenHeader)1095 static bool CheckMp3(const uint8* buffer, int buffer_size, bool seenHeader) {
1096   RCHECK(buffer_size >= 10);  // Must be enough to read the initial header.
1097 
1098   int framesize;
1099   int numSeen = 0;
1100   int offset = 0;
1101   if (seenHeader) {
1102     offset = GetMp3HeaderSize(buffer, buffer_size);
1103   } else {
1104     // Skip over leading 0's.
1105     while (offset < buffer_size && buffer[offset] == 0)
1106       ++offset;
1107   }
1108 
1109   while (offset + 3 < buffer_size) {
1110     RCHECK(ValidMpegAudioFrameHeader(
1111         buffer + offset, buffer_size - offset, &framesize));
1112 
1113     // Have we seen enough valid headers?
1114     if (++numSeen > 10)
1115       return true;
1116     offset += framesize;
1117   }
1118   // Off the end of the buffer, return success if a few valid headers seen.
1119   return numSeen > 2;
1120 }
1121 
1122 // Check that the next characters in |buffer| represent a number. The format
1123 // accepted is optional whitespace followed by 1 or more digits. |max_digits|
1124 // specifies the maximum number of digits to process. Returns true if a valid
1125 // number is found, false otherwise.
VerifyNumber(const uint8 * buffer,int buffer_size,int * offset,int max_digits)1126 static bool VerifyNumber(const uint8* buffer,
1127                          int buffer_size,
1128                          int* offset,
1129                          int max_digits) {
1130   RCHECK(*offset < buffer_size);
1131 
1132   // Skip over any leading space.
1133   while (isspace(buffer[*offset])) {
1134     ++(*offset);
1135     RCHECK(*offset < buffer_size);
1136   }
1137 
1138   // Need to process up to max_digits digits.
1139   int numSeen = 0;
1140   while (--max_digits >= 0 && isdigit(buffer[*offset])) {
1141     ++numSeen;
1142     ++(*offset);
1143     if (*offset >= buffer_size)
1144       return true;  // Out of space but seen a digit.
1145   }
1146 
1147   // Success if at least one digit seen.
1148   return (numSeen > 0);
1149 }
1150 
1151 // Check that the next character in |buffer| is one of |c1| or |c2|. |c2| is
1152 // optional. Returns true if there is a match, false if no match or out of
1153 // space.
VerifyCharacters(const uint8 * buffer,int buffer_size,int * offset,char c1,char c2)1154 static inline bool VerifyCharacters(const uint8* buffer,
1155                                     int buffer_size,
1156                                     int* offset,
1157                                     char c1,
1158                                     char c2) {
1159   RCHECK(*offset < buffer_size);
1160   char c = static_cast<char>(buffer[(*offset)++]);
1161   return (c == c1 || (c == c2 && c2 != 0));
1162 }
1163 
1164 // Checks for a SRT container.
CheckSrt(const uint8 * buffer,int buffer_size)1165 static bool CheckSrt(const uint8* buffer, int buffer_size) {
1166   // Reference: http://en.wikipedia.org/wiki/SubRip
1167   RCHECK(buffer_size > 20);
1168 
1169   // First line should just be the subtitle sequence number.
1170   int offset = StartsWith(buffer, buffer_size, UTF8_BYTE_ORDER_MARK) ? 3 : 0;
1171   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1172   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '\n', '\r'));
1173 
1174   // Skip any additional \n\r.
1175   while (VerifyCharacters(buffer, buffer_size, &offset, '\n', '\r')) {}
1176   --offset;  // Since VerifyCharacters() gobbled up the next non-CR/LF.
1177 
1178   // Second line should look like the following:
1179   //   00:00:10,500 --> 00:00:13,000
1180   // Units separator can be , or .
1181   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1182   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1183   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1184   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1185   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1186   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ',', '.'));
1187   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 3));
1188   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ' ', 0));
1189   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '-', 0));
1190   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '-', 0));
1191   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '>', 0));
1192   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ' ', 0));
1193   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1194   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1195   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1196   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1197   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1198   RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ',', '.'));
1199   RCHECK(VerifyNumber(buffer, buffer_size, &offset, 3));
1200   return true;
1201 }
1202 
1203 // Read a Matroska Element Id.
GetElementId(BitReader * reader)1204 static int GetElementId(BitReader* reader) {
1205   // Element ID is coded with the leading zero bits (max 3) determining size.
1206   // If it is an invalid encoding or the end of the buffer is reached,
1207   // return -1 as a tag that won't be expected.
1208   if (reader->bits_available() >= 8) {
1209     int num_bits_to_read = 0;
1210     static int prefix[] = { 0x80, 0x4000, 0x200000, 0x10000000 };
1211     for (int i = 0; i < 4; ++i) {
1212       num_bits_to_read += 7;
1213       if (ReadBits(reader, 1) == 1) {
1214         if (reader->bits_available() < num_bits_to_read)
1215           break;
1216         // prefix[] adds back the bits read individually.
1217         return ReadBits(reader, num_bits_to_read) | prefix[i];
1218       }
1219     }
1220   }
1221   // Invalid encoding, return something not expected.
1222   return -1;
1223 }
1224 
1225 // Read a Matroska Unsigned Integer (VINT).
GetVint(BitReader * reader)1226 static uint64 GetVint(BitReader* reader) {
1227   // Values are coded with the leading zero bits (max 7) determining size.
1228   // If it is an invalid coding or the end of the buffer is reached,
1229   // return something that will go off the end of the buffer.
1230   if (reader->bits_available() >= 8) {
1231     int num_bits_to_read = 0;
1232     for (int i = 0; i < 8; ++i) {
1233       num_bits_to_read += 7;
1234       if (ReadBits(reader, 1) == 1) {
1235         if (reader->bits_available() < num_bits_to_read)
1236           break;
1237         return ReadBits(reader, num_bits_to_read);
1238       }
1239     }
1240   }
1241   // Incorrect format (more than 7 leading 0's) or off the end of the buffer.
1242   // Since the return value is used as a byte size, return a value that will
1243   // cause a failure when used.
1244   return (reader->bits_available() / 8) + 2;
1245 }
1246 
1247 // Additional checks for a WEBM container.
CheckWebm(const uint8 * buffer,int buffer_size)1248 static bool CheckWebm(const uint8* buffer, int buffer_size) {
1249   // Reference: http://www.matroska.org/technical/specs/index.html
1250   RCHECK(buffer_size > 12);
1251 
1252   BitReader reader(buffer, buffer_size);
1253 
1254   // Verify starting Element Id.
1255   RCHECK(GetElementId(&reader) == 0x1a45dfa3);
1256 
1257   // Get the header size, and ensure there are enough bits to check.
1258   int header_size = GetVint(&reader);
1259   RCHECK(reader.bits_available() / 8 >= header_size);
1260 
1261   // Loop through the header.
1262   while (reader.bits_available() > 0) {
1263     int tag = GetElementId(&reader);
1264     int tagsize = GetVint(&reader);
1265     switch (tag) {
1266       case 0x4286:  // EBMLVersion
1267       case 0x42f7:  // EBMLReadVersion
1268       case 0x42f2:  // EBMLMaxIdLength
1269       case 0x42f3:  // EBMLMaxSizeLength
1270       case 0x4287:  // DocTypeVersion
1271       case 0x4285:  // DocTypeReadVersion
1272       case 0xec:    // void
1273       case 0xbf:    // CRC32
1274         RCHECK(reader.SkipBits(tagsize * 8));
1275         break;
1276 
1277       case 0x4282:  // EBMLDocType
1278         // Need to see "webm" or "matroska" next.
1279         switch (ReadBits(&reader, 32)) {
1280           case TAG('w', 'e', 'b', 'm') :
1281             return true;
1282           case TAG('m', 'a', 't', 'r') :
1283             return (ReadBits(&reader, 32) == TAG('o', 's', 'k', 'a'));
1284         }
1285         return false;
1286 
1287       default:  // Unrecognized tag
1288         return false;
1289     }
1290   }
1291   return false;
1292 }
1293 
1294 enum VC1StartCodes {
1295   VC1_FRAME_START_CODE = 0x0d,
1296   VC1_ENTRY_POINT_START_CODE = 0x0e,
1297   VC1_SEQUENCE_START_CODE = 0x0f
1298 };
1299 
1300 // Checks for a VC1 bitstream container.
CheckVC1(const uint8 * buffer,int buffer_size)1301 static bool CheckVC1(const uint8* buffer, int buffer_size) {
1302   // Reference: SMPTE 421M
1303   // (http://standards.smpte.org/content/978-1-61482-555-5/st-421-2006/SEC1.body.pdf)
1304   // However, no length ... simply scan for start code values.
1305   // Expect to see SEQ | [ [ ENTRY ] PIC* ]*
1306   // Note tags are very similar to H.264.
1307 
1308   RCHECK(buffer_size >= 24);
1309 
1310   // First check for Bitstream Metadata Serialization (Annex L)
1311   if (buffer[0] == 0xc5 &&
1312       Read32(buffer + 4) == 0x04 &&
1313       Read32(buffer + 20) == 0x0c) {
1314     // Verify settings in STRUCT_C and STRUCT_A
1315     BitReader reader(buffer + 8, 12);
1316 
1317     int profile = ReadBits(&reader, 4);
1318     if (profile == 0 || profile == 4) {  // simple or main
1319       // Skip FRMRTQ_POSTPROC, BITRTQ_POSTPROC, and LOOPFILTER.
1320       reader.SkipBits(3 + 5 + 1);
1321 
1322       // Next bit must be 0.
1323       RCHECK(ReadBits(&reader, 1) == 0);
1324 
1325       // Skip MULTIRES.
1326       reader.SkipBits(1);
1327 
1328       // Next bit must be 1.
1329       RCHECK(ReadBits(&reader, 1) == 1);
1330 
1331       // Skip FASTUVMC, EXTENDED_MV, DQUANT, and VSTRANSFORM.
1332       reader.SkipBits(1 + 1 + 2 + 1);
1333 
1334       // Next bit must be 0.
1335       RCHECK(ReadBits(&reader, 1) == 0);
1336 
1337       // Skip OVERLAP, SYNCMARKER, RANGERED, MAXBFRAMES, QUANTIZER, and
1338       // FINTERPFLAG.
1339       reader.SkipBits(1 + 1 + 1 + 3 + 2 + 1);
1340 
1341       // Next bit must be 1.
1342       RCHECK(ReadBits(&reader, 1) == 1);
1343 
1344     } else {
1345       RCHECK(profile == 12);  // Other profile values not allowed.
1346       RCHECK(ReadBits(&reader, 28) == 0);
1347     }
1348 
1349     // Now check HORIZ_SIZE and VERT_SIZE, which must be 8192 or less.
1350     RCHECK(ReadBits(&reader, 32) <= 8192);
1351     RCHECK(ReadBits(&reader, 32) <= 8192);
1352     return true;
1353   }
1354 
1355   // Buffer isn't Bitstream Metadata, so scan for start codes.
1356   int offset = 0;
1357   int sequence_start_code = 0;
1358   int frame_start_code = 0;
1359   while (true) {
1360     // Advance to start_code, if there is one.
1361     if (!AdvanceToStartCode(buffer, buffer_size, &offset, 5, 24, 1)) {
1362       // Not a complete sequence in memory, so return true if we've seen a
1363       // sequence start and a frame start (not checking entry points since
1364       // they only occur in advanced profiles).
1365       return (sequence_start_code > 0 && frame_start_code > 0);
1366     }
1367 
1368     // Now verify the block. AdvanceToStartCode() made sure that there are
1369     // at least 5 bytes remaining in the buffer.
1370     BitReader reader(buffer + offset, 5);
1371     RCHECK(ReadBits(&reader, 24) == 1);
1372 
1373     // Keep track of the number of certain types received.
1374     switch (ReadBits(&reader, 8)) {
1375       case VC1_SEQUENCE_START_CODE: {
1376         ++sequence_start_code;
1377         switch (ReadBits(&reader, 2)) {
1378           case 0:  // simple
1379           case 1:  // main
1380             RCHECK(ReadBits(&reader, 2) == 0);
1381             break;
1382           case 2:  // complex
1383             return false;
1384           case 3:  // advanced
1385             RCHECK(ReadBits(&reader, 3) <= 4);  // Verify level = 0..4
1386             RCHECK(ReadBits(&reader, 2) == 1);  // Verify colordiff_format = 1
1387             break;
1388         }
1389         break;
1390       }
1391 
1392       case VC1_ENTRY_POINT_START_CODE:
1393         // No fields in entry data to check. However, it must occur after
1394         // sequence header.
1395         RCHECK(sequence_start_code > 0);
1396         break;
1397 
1398       case VC1_FRAME_START_CODE:
1399         ++frame_start_code;
1400         break;
1401     }
1402     offset += 5;
1403   }
1404 }
1405 
1406 // For some formats the signature is a bunch of characters. They are defined
1407 // below. Note that the first 4 characters of the string may be used as a TAG
1408 // in LookupContainerByFirst4. For signatures that contain embedded \0, use
1409 // uint8[].
1410 static const char kAmrSignature[] = "#!AMR";
1411 static const uint8 kAsfSignature[] = { 0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66, 0xcf,
1412                                        0x11, 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62,
1413                                        0xce, 0x6c };
1414 static const char kAssSignature[] = "[Script Info]";
1415 static const char kAssBomSignature[] = UTF8_BYTE_ORDER_MARK "[Script Info]";
1416 static const uint8 kWtvSignature[] = { 0xb7, 0xd8, 0x00, 0x20, 0x37, 0x49, 0xda,
1417                                        0x11, 0xa6, 0x4e, 0x00, 0x07, 0xe9, 0x5e,
1418                                        0xad, 0x8d };
1419 
1420 // Attempt to determine the container type from the buffer provided. This is
1421 // a simple pass, that uses the first 4 bytes of the buffer as an index to get
1422 // a rough idea of the container format.
LookupContainerByFirst4(const uint8 * buffer,int buffer_size)1423 static MediaContainerName LookupContainerByFirst4(const uint8* buffer,
1424                                                   int buffer_size) {
1425   // Minimum size that the code expects to exist without checking size.
1426   if (buffer_size < 12)
1427     return CONTAINER_UNKNOWN;
1428 
1429   uint32 first4 = Read32(buffer);
1430   switch (first4) {
1431     case 0x1a45dfa3:
1432       if (CheckWebm(buffer, buffer_size))
1433         return CONTAINER_WEBM;
1434       break;
1435 
1436     case 0x3026b275:
1437       if (StartsWith(buffer,
1438                      buffer_size,
1439                      kAsfSignature,
1440                      sizeof(kAsfSignature))) {
1441         return CONTAINER_ASF;
1442       }
1443       break;
1444 
1445     case TAG('#','!','A','M'):
1446       if (StartsWith(buffer, buffer_size, kAmrSignature))
1447         return CONTAINER_AMR;
1448       break;
1449 
1450     case TAG('#','E','X','T'):
1451       if (CheckHls(buffer, buffer_size))
1452         return CONTAINER_HLS;
1453       break;
1454 
1455     case TAG('.','R','M','F'):
1456       if (buffer[4] == 0 && buffer[5] == 0)
1457         return CONTAINER_RM;
1458       break;
1459 
1460     case TAG('.','r','a','\xfd'):
1461       return CONTAINER_RM;
1462 
1463     case TAG('B','I','K','b'):
1464     case TAG('B','I','K','d'):
1465     case TAG('B','I','K','f'):
1466     case TAG('B','I','K','g'):
1467     case TAG('B','I','K','h'):
1468     case TAG('B','I','K','i'):
1469       if (CheckBink(buffer, buffer_size))
1470         return CONTAINER_BINK;
1471       break;
1472 
1473     case TAG('c','a','f','f'):
1474       if (CheckCaf(buffer, buffer_size))
1475         return CONTAINER_CAF;
1476       break;
1477 
1478     case TAG('D','E','X','A'):
1479       if (buffer_size > 15 &&
1480           Read16(buffer + 11) <= 2048 &&
1481           Read16(buffer + 13) <= 2048) {
1482         return CONTAINER_DXA;
1483       }
1484       break;
1485 
1486     case TAG('D','T','S','H'):
1487       if (Read32(buffer + 4) == TAG('D','H','D','R'))
1488         return CONTAINER_DTSHD;
1489       break;
1490 
1491     case 0x64a30100:
1492     case 0x64a30200:
1493     case 0x64a30300:
1494     case 0x64a30400:
1495     case 0x0001a364:
1496     case 0x0002a364:
1497     case 0x0003a364:
1498       if (Read32(buffer + 4) != 0 && Read32(buffer + 8) != 0)
1499         return CONTAINER_IRCAM;
1500       break;
1501 
1502     case TAG('f','L','a','C'):
1503       return CONTAINER_FLAC;
1504 
1505     case TAG('F','L','V',0):
1506     case TAG('F','L','V',1):
1507     case TAG('F','L','V',2):
1508     case TAG('F','L','V',3):
1509     case TAG('F','L','V',4):
1510       if (buffer[5] == 0 && Read32(buffer + 5) > 8)
1511         return CONTAINER_FLV;
1512       break;
1513 
1514     case TAG('F','O','R','M'):
1515       switch (Read32(buffer + 8)) {
1516         case TAG('A','I','F','F'):
1517         case TAG('A','I','F','C'):
1518           return CONTAINER_AIFF;
1519       }
1520       break;
1521 
1522     case TAG('M','A','C',' '):
1523       return CONTAINER_APE;
1524 
1525     case TAG('O','N','2',' '):
1526       if (Read32(buffer + 8) == TAG('O','N','2','f'))
1527         return CONTAINER_AVI;
1528       break;
1529 
1530     case TAG('O','g','g','S'):
1531       if (buffer[5] <= 7)
1532         return CONTAINER_OGG;
1533       break;
1534 
1535     case TAG('R','F','6','4'):
1536       if (buffer_size > 16 && Read32(buffer + 12) == TAG('d','s','6','4'))
1537         return CONTAINER_WAV;
1538       break;
1539 
1540     case TAG('R','I','F','F'):
1541       switch (Read32(buffer + 8)) {
1542         case TAG('A','V','I',' '):
1543         case TAG('A','V','I','X'):
1544         case TAG('A','V','I','\x19'):
1545         case TAG('A','M','V',' '):
1546           return CONTAINER_AVI;
1547         case TAG('W','A','V','E'):
1548           return CONTAINER_WAV;
1549       }
1550       break;
1551 
1552     case TAG('[','S','c','r'):
1553       if (StartsWith(buffer, buffer_size, kAssSignature))
1554         return CONTAINER_ASS;
1555       break;
1556 
1557     case TAG('\xef','\xbb','\xbf','['):
1558       if (StartsWith(buffer, buffer_size, kAssBomSignature))
1559         return CONTAINER_ASS;
1560       break;
1561 
1562     case 0x7ffe8001:
1563     case 0xfe7f0180:
1564     case 0x1fffe800:
1565     case 0xff1f00e8:
1566       if (CheckDts(buffer, buffer_size))
1567         return CONTAINER_DTS;
1568       break;
1569 
1570     case 0xb7d80020:
1571       if (StartsWith(buffer,
1572                      buffer_size,
1573                      kWtvSignature,
1574                      sizeof(kWtvSignature))) {
1575         return CONTAINER_WTV;
1576       }
1577       break;
1578   }
1579 
1580   // Now try a few different ones that look at something other
1581   // than the first 4 bytes.
1582   uint32 first3 = first4 & 0xffffff00;
1583   switch (first3) {
1584     case TAG('C','W','S',0):
1585     case TAG('F','W','S',0):
1586       return CONTAINER_SWF;
1587 
1588     case TAG('I','D','3',0):
1589       if (CheckMp3(buffer, buffer_size, true))
1590         return CONTAINER_MP3;
1591       break;
1592   }
1593 
1594   // Maybe the first 2 characters are something we can use.
1595   uint32 first2 = Read16(buffer);
1596   switch (first2) {
1597     case kAc3SyncWord:
1598       if (CheckAc3(buffer, buffer_size))
1599         return CONTAINER_AC3;
1600       if (CheckEac3(buffer, buffer_size))
1601         return CONTAINER_EAC3;
1602       break;
1603 
1604     case 0xfff0:
1605     case 0xfff1:
1606     case 0xfff8:
1607     case 0xfff9:
1608       if (CheckAac(buffer, buffer_size))
1609         return CONTAINER_AAC;
1610       break;
1611   }
1612 
1613   // Check if the file is in MP3 format without the header.
1614   if (CheckMp3(buffer, buffer_size, false))
1615     return CONTAINER_MP3;
1616 
1617   return CONTAINER_UNKNOWN;
1618 }
1619 
1620 // Attempt to determine the container name from the buffer provided.
DetermineContainer(const uint8 * buffer,int buffer_size)1621 MediaContainerName DetermineContainer(const uint8* buffer, int buffer_size) {
1622   DCHECK(buffer);
1623 
1624   // Since MOV/QuickTime/MPEG4 streams are common, check for them first.
1625   if (CheckMov(buffer, buffer_size))
1626     return CONTAINER_MOV;
1627 
1628   // Next attempt the simple checks, that typically look at just the
1629   // first few bytes of the file.
1630   MediaContainerName result = LookupContainerByFirst4(buffer, buffer_size);
1631   if (result != CONTAINER_UNKNOWN)
1632     return result;
1633 
1634   // Additional checks that may scan a portion of the buffer.
1635   if (CheckMpeg2ProgramStream(buffer, buffer_size))
1636     return CONTAINER_MPEG2PS;
1637   if (CheckMpeg2TransportStream(buffer, buffer_size))
1638     return CONTAINER_MPEG2TS;
1639   if (CheckMJpeg(buffer, buffer_size))
1640     return CONTAINER_MJPEG;
1641   if (CheckDV(buffer, buffer_size))
1642     return CONTAINER_DV;
1643   if (CheckH261(buffer, buffer_size))
1644     return CONTAINER_H261;
1645   if (CheckH263(buffer, buffer_size))
1646     return CONTAINER_H263;
1647   if (CheckH264(buffer, buffer_size))
1648     return CONTAINER_H264;
1649   if (CheckMpeg4BitStream(buffer, buffer_size))
1650     return CONTAINER_MPEG4BS;
1651   if (CheckVC1(buffer, buffer_size))
1652     return CONTAINER_VC1;
1653   if (CheckSrt(buffer, buffer_size))
1654     return CONTAINER_SRT;
1655   if (CheckGsm(buffer, buffer_size))
1656     return CONTAINER_GSM;
1657 
1658   // AC3/EAC3 might not start at the beginning of the stream,
1659   // so scan for a start code.
1660   int offset = 1;  // No need to start at byte 0 due to First4 check.
1661   if (AdvanceToStartCode(buffer, buffer_size, &offset, 4, 16, kAc3SyncWord)) {
1662     if (CheckAc3(buffer + offset, buffer_size - offset))
1663       return CONTAINER_AC3;
1664     if (CheckEac3(buffer + offset, buffer_size - offset))
1665       return CONTAINER_EAC3;
1666   }
1667 
1668   return CONTAINER_UNKNOWN;
1669 }
1670 
1671 }  // namespace container_names
1672 
1673 }  // namespace media
1674