1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25
26 #include <utils/Log.h>
27
28 #include "include/MPEG4Extractor.h"
29 #include "include/SampleTable.h"
30 #include "include/ESDS.h"
31
32 #include <media/stagefright/foundation/ABitReader.h>
33 #include <media/stagefright/foundation/ABuffer.h>
34 #include <media/stagefright/foundation/ADebug.h>
35 #include <media/stagefright/foundation/AMessage.h>
36 #include <media/stagefright/foundation/AUtils.h>
37 #include <media/stagefright/MediaBuffer.h>
38 #include <media/stagefright/MediaBufferGroup.h>
39 #include <media/stagefright/MediaDefs.h>
40 #include <media/stagefright/MediaSource.h>
41 #include <media/stagefright/MetaData.h>
42 #include <utils/String8.h>
43
44 #include <byteswap.h>
45 #include "include/ID3.h"
46
47 #ifndef UINT32_MAX
48 #define UINT32_MAX (4294967295U)
49 #endif
50
51 namespace android {
52
53 class MPEG4Source : public MediaSource {
54 public:
55 // Caller retains ownership of both "dataSource" and "sampleTable".
56 MPEG4Source(const sp<MPEG4Extractor> &owner,
57 const sp<MetaData> &format,
58 const sp<DataSource> &dataSource,
59 int32_t timeScale,
60 const sp<SampleTable> &sampleTable,
61 Vector<SidxEntry> &sidx,
62 const Trex *trex,
63 off64_t firstMoofOffset);
64
65 virtual status_t start(MetaData *params = NULL);
66 virtual status_t stop();
67
68 virtual sp<MetaData> getFormat();
69
70 virtual status_t read(MediaBuffer **buffer, const ReadOptions *options = NULL);
71 virtual status_t fragmentedRead(MediaBuffer **buffer, const ReadOptions *options = NULL);
72
73 protected:
74 virtual ~MPEG4Source();
75
76 private:
77 Mutex mLock;
78
79 // keep the MPEG4Extractor around, since we're referencing its data
80 sp<MPEG4Extractor> mOwner;
81 sp<MetaData> mFormat;
82 sp<DataSource> mDataSource;
83 int32_t mTimescale;
84 sp<SampleTable> mSampleTable;
85 uint32_t mCurrentSampleIndex;
86 uint32_t mCurrentFragmentIndex;
87 Vector<SidxEntry> &mSegments;
88 const Trex *mTrex;
89 off64_t mFirstMoofOffset;
90 off64_t mCurrentMoofOffset;
91 off64_t mNextMoofOffset;
92 uint32_t mCurrentTime;
93 int32_t mLastParsedTrackId;
94 int32_t mTrackId;
95
96 int32_t mCryptoMode; // passed in from extractor
97 int32_t mDefaultIVSize; // passed in from extractor
98 uint8_t mCryptoKey[16]; // passed in from extractor
99 uint32_t mCurrentAuxInfoType;
100 uint32_t mCurrentAuxInfoTypeParameter;
101 int32_t mCurrentDefaultSampleInfoSize;
102 uint32_t mCurrentSampleInfoCount;
103 uint32_t mCurrentSampleInfoAllocSize;
104 uint8_t* mCurrentSampleInfoSizes;
105 uint32_t mCurrentSampleInfoOffsetCount;
106 uint32_t mCurrentSampleInfoOffsetsAllocSize;
107 uint64_t* mCurrentSampleInfoOffsets;
108
109 bool mIsAVC;
110 bool mIsHEVC;
111 size_t mNALLengthSize;
112
113 bool mStarted;
114
115 MediaBufferGroup *mGroup;
116
117 MediaBuffer *mBuffer;
118
119 bool mWantsNALFragments;
120
121 uint8_t *mSrcBuffer;
122
123 size_t parseNALSize(const uint8_t *data) const;
124 status_t parseChunk(off64_t *offset);
125 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
126 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
127 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
128 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
129
130 struct TrackFragmentHeaderInfo {
131 enum Flags {
132 kBaseDataOffsetPresent = 0x01,
133 kSampleDescriptionIndexPresent = 0x02,
134 kDefaultSampleDurationPresent = 0x08,
135 kDefaultSampleSizePresent = 0x10,
136 kDefaultSampleFlagsPresent = 0x20,
137 kDurationIsEmpty = 0x10000,
138 };
139
140 uint32_t mTrackID;
141 uint32_t mFlags;
142 uint64_t mBaseDataOffset;
143 uint32_t mSampleDescriptionIndex;
144 uint32_t mDefaultSampleDuration;
145 uint32_t mDefaultSampleSize;
146 uint32_t mDefaultSampleFlags;
147
148 uint64_t mDataOffset;
149 };
150 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
151
152 struct Sample {
153 off64_t offset;
154 size_t size;
155 uint32_t duration;
156 int32_t compositionOffset;
157 uint8_t iv[16];
158 Vector<size_t> clearsizes;
159 Vector<size_t> encryptedsizes;
160 };
161 Vector<Sample> mCurrentSamples;
162
163 MPEG4Source(const MPEG4Source &);
164 MPEG4Source &operator=(const MPEG4Source &);
165 };
166
167 // This custom data source wraps an existing one and satisfies requests
168 // falling entirely within a cached range from the cache while forwarding
169 // all remaining requests to the wrapped datasource.
170 // This is used to cache the full sampletable metadata for a single track,
171 // possibly wrapping multiple times to cover all tracks, i.e.
172 // Each MPEG4DataSource caches the sampletable metadata for a single track.
173
174 struct MPEG4DataSource : public DataSource {
175 MPEG4DataSource(const sp<DataSource> &source);
176
177 virtual status_t initCheck() const;
178 virtual ssize_t readAt(off64_t offset, void *data, size_t size);
179 virtual status_t getSize(off64_t *size);
180 virtual uint32_t flags();
181
182 status_t setCachedRange(off64_t offset, size_t size);
183
184 protected:
185 virtual ~MPEG4DataSource();
186
187 private:
188 Mutex mLock;
189
190 sp<DataSource> mSource;
191 off64_t mCachedOffset;
192 size_t mCachedSize;
193 uint8_t *mCache;
194
195 void clearCache();
196
197 MPEG4DataSource(const MPEG4DataSource &);
198 MPEG4DataSource &operator=(const MPEG4DataSource &);
199 };
200
MPEG4DataSource(const sp<DataSource> & source)201 MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
202 : mSource(source),
203 mCachedOffset(0),
204 mCachedSize(0),
205 mCache(NULL) {
206 }
207
~MPEG4DataSource()208 MPEG4DataSource::~MPEG4DataSource() {
209 clearCache();
210 }
211
clearCache()212 void MPEG4DataSource::clearCache() {
213 if (mCache) {
214 free(mCache);
215 mCache = NULL;
216 }
217
218 mCachedOffset = 0;
219 mCachedSize = 0;
220 }
221
initCheck() const222 status_t MPEG4DataSource::initCheck() const {
223 return mSource->initCheck();
224 }
225
readAt(off64_t offset,void * data,size_t size)226 ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
227 Mutex::Autolock autoLock(mLock);
228
229 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
230 memcpy(data, &mCache[offset - mCachedOffset], size);
231 return size;
232 }
233
234 return mSource->readAt(offset, data, size);
235 }
236
getSize(off64_t * size)237 status_t MPEG4DataSource::getSize(off64_t *size) {
238 return mSource->getSize(size);
239 }
240
flags()241 uint32_t MPEG4DataSource::flags() {
242 return mSource->flags();
243 }
244
setCachedRange(off64_t offset,size_t size)245 status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
246 Mutex::Autolock autoLock(mLock);
247
248 clearCache();
249
250 mCache = (uint8_t *)malloc(size);
251
252 if (mCache == NULL) {
253 return -ENOMEM;
254 }
255
256 mCachedOffset = offset;
257 mCachedSize = size;
258
259 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
260
261 if (err < (ssize_t)size) {
262 clearCache();
263
264 return ERROR_IO;
265 }
266
267 return OK;
268 }
269
270 ////////////////////////////////////////////////////////////////////////////////
271
272 static const bool kUseHexDump = false;
273
hexdump(const void * _data,size_t size)274 static void hexdump(const void *_data, size_t size) {
275 const uint8_t *data = (const uint8_t *)_data;
276 size_t offset = 0;
277 while (offset < size) {
278 printf("0x%04zx ", offset);
279
280 size_t n = size - offset;
281 if (n > 16) {
282 n = 16;
283 }
284
285 for (size_t i = 0; i < 16; ++i) {
286 if (i == 8) {
287 printf(" ");
288 }
289
290 if (offset + i < size) {
291 printf("%02x ", data[offset + i]);
292 } else {
293 printf(" ");
294 }
295 }
296
297 printf(" ");
298
299 for (size_t i = 0; i < n; ++i) {
300 if (isprint(data[offset + i])) {
301 printf("%c", data[offset + i]);
302 } else {
303 printf(".");
304 }
305 }
306
307 printf("\n");
308
309 offset += 16;
310 }
311 }
312
FourCC2MIME(uint32_t fourcc)313 static const char *FourCC2MIME(uint32_t fourcc) {
314 switch (fourcc) {
315 case FOURCC('m', 'p', '4', 'a'):
316 return MEDIA_MIMETYPE_AUDIO_AAC;
317
318 case FOURCC('s', 'a', 'm', 'r'):
319 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
320
321 case FOURCC('s', 'a', 'w', 'b'):
322 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
323
324 case FOURCC('m', 'p', '4', 'v'):
325 return MEDIA_MIMETYPE_VIDEO_MPEG4;
326
327 case FOURCC('s', '2', '6', '3'):
328 case FOURCC('h', '2', '6', '3'):
329 case FOURCC('H', '2', '6', '3'):
330 return MEDIA_MIMETYPE_VIDEO_H263;
331
332 case FOURCC('a', 'v', 'c', '1'):
333 return MEDIA_MIMETYPE_VIDEO_AVC;
334
335 case FOURCC('h', 'v', 'c', '1'):
336 case FOURCC('h', 'e', 'v', '1'):
337 return MEDIA_MIMETYPE_VIDEO_HEVC;
338 default:
339 CHECK(!"should not be here.");
340 return NULL;
341 }
342 }
343
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)344 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
345 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
346 // AMR NB audio is always mono, 8kHz
347 *channels = 1;
348 *rate = 8000;
349 return true;
350 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
351 // AMR WB audio is always mono, 16kHz
352 *channels = 1;
353 *rate = 16000;
354 return true;
355 }
356 return false;
357 }
358
MPEG4Extractor(const sp<DataSource> & source)359 MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
360 : mMoofOffset(0),
361 mMoofFound(false),
362 mMdatFound(false),
363 mDataSource(source),
364 mInitCheck(NO_INIT),
365 mHasVideo(false),
366 mHeaderTimescale(0),
367 mFirstTrack(NULL),
368 mLastTrack(NULL),
369 mFileMetaData(new MetaData),
370 mFirstSINF(NULL),
371 mIsDrm(false) {
372 }
373
~MPEG4Extractor()374 MPEG4Extractor::~MPEG4Extractor() {
375 Track *track = mFirstTrack;
376 while (track) {
377 Track *next = track->next;
378
379 delete track;
380 track = next;
381 }
382 mFirstTrack = mLastTrack = NULL;
383
384 SINF *sinf = mFirstSINF;
385 while (sinf) {
386 SINF *next = sinf->next;
387 delete[] sinf->IPMPData;
388 delete sinf;
389 sinf = next;
390 }
391 mFirstSINF = NULL;
392
393 for (size_t i = 0; i < mPssh.size(); i++) {
394 delete [] mPssh[i].data;
395 }
396 }
397
flags() const398 uint32_t MPEG4Extractor::flags() const {
399 return CAN_PAUSE |
400 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
401 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
402 }
403
getMetaData()404 sp<MetaData> MPEG4Extractor::getMetaData() {
405 status_t err;
406 if ((err = readMetaData()) != OK) {
407 return new MetaData;
408 }
409
410 return mFileMetaData;
411 }
412
countTracks()413 size_t MPEG4Extractor::countTracks() {
414 status_t err;
415 if ((err = readMetaData()) != OK) {
416 ALOGV("MPEG4Extractor::countTracks: no tracks");
417 return 0;
418 }
419
420 size_t n = 0;
421 Track *track = mFirstTrack;
422 while (track) {
423 ++n;
424 track = track->next;
425 }
426
427 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
428 return n;
429 }
430
getTrackMetaData(size_t index,uint32_t flags)431 sp<MetaData> MPEG4Extractor::getTrackMetaData(
432 size_t index, uint32_t flags) {
433 status_t err;
434 if ((err = readMetaData()) != OK) {
435 return NULL;
436 }
437
438 Track *track = mFirstTrack;
439 while (index > 0) {
440 if (track == NULL) {
441 return NULL;
442 }
443
444 track = track->next;
445 --index;
446 }
447
448 if (track == NULL) {
449 return NULL;
450 }
451
452 if ((flags & kIncludeExtensiveMetaData)
453 && !track->includes_expensive_metadata) {
454 track->includes_expensive_metadata = true;
455
456 const char *mime;
457 CHECK(track->meta->findCString(kKeyMIMEType, &mime));
458 if (!strncasecmp("video/", mime, 6)) {
459 if (mMoofOffset > 0) {
460 int64_t duration;
461 if (track->meta->findInt64(kKeyDuration, &duration)) {
462 // nothing fancy, just pick a frame near 1/4th of the duration
463 track->meta->setInt64(
464 kKeyThumbnailTime, duration / 4);
465 }
466 } else {
467 uint32_t sampleIndex;
468 uint32_t sampleTime;
469 if (track->sampleTable->findThumbnailSample(&sampleIndex) == OK
470 && track->sampleTable->getMetaDataForSample(
471 sampleIndex, NULL /* offset */, NULL /* size */,
472 &sampleTime) == OK) {
473 track->meta->setInt64(
474 kKeyThumbnailTime,
475 ((int64_t)sampleTime * 1000000) / track->timescale);
476 }
477 }
478 }
479 }
480
481 return track->meta;
482 }
483
MakeFourCCString(uint32_t x,char * s)484 static void MakeFourCCString(uint32_t x, char *s) {
485 s[0] = x >> 24;
486 s[1] = (x >> 16) & 0xff;
487 s[2] = (x >> 8) & 0xff;
488 s[3] = x & 0xff;
489 s[4] = '\0';
490 }
491
readMetaData()492 status_t MPEG4Extractor::readMetaData() {
493 if (mInitCheck != NO_INIT) {
494 return mInitCheck;
495 }
496
497 off64_t offset = 0;
498 status_t err;
499 bool sawMoovOrSidx = false;
500
501 while (!(sawMoovOrSidx && (mMdatFound || mMoofFound))) {
502 off64_t orig_offset = offset;
503 err = parseChunk(&offset, 0);
504
505 if (err != OK && err != UNKNOWN_ERROR) {
506 break;
507 } else if (offset <= orig_offset) {
508 // only continue parsing if the offset was advanced,
509 // otherwise we might end up in an infinite loop
510 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
511 err = ERROR_MALFORMED;
512 break;
513 } else if (err == UNKNOWN_ERROR) {
514 sawMoovOrSidx = true;
515 }
516 }
517
518 if (mInitCheck == OK) {
519 if (mHasVideo) {
520 mFileMetaData->setCString(
521 kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
522 } else {
523 mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
524 }
525 } else {
526 mInitCheck = err;
527 }
528
529 CHECK_NE(err, (status_t)NO_INIT);
530
531 // copy pssh data into file metadata
532 uint64_t psshsize = 0;
533 for (size_t i = 0; i < mPssh.size(); i++) {
534 psshsize += 20 + mPssh[i].datalen;
535 }
536 if (psshsize > 0 && psshsize <= UINT32_MAX) {
537 char *buf = (char*)malloc(psshsize);
538 if (!buf) {
539 ALOGE("b/28471206");
540 return NO_MEMORY;
541 }
542 char *ptr = buf;
543 for (size_t i = 0; i < mPssh.size(); i++) {
544 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
545 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
546 ptr += (20 + mPssh[i].datalen);
547 }
548 mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
549 free(buf);
550 }
551 return mInitCheck;
552 }
553
getDrmTrackInfo(size_t trackID,int * len)554 char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
555 if (mFirstSINF == NULL) {
556 return NULL;
557 }
558
559 SINF *sinf = mFirstSINF;
560 while (sinf && (trackID != sinf->trackID)) {
561 sinf = sinf->next;
562 }
563
564 if (sinf == NULL) {
565 return NULL;
566 }
567
568 *len = sinf->len;
569 return sinf->IPMPData;
570 }
571
572 // Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
readSize(off64_t offset,const sp<DataSource> DataSource,uint8_t * numOfBytes)573 static int32_t readSize(off64_t offset,
574 const sp<DataSource> DataSource, uint8_t *numOfBytes) {
575 uint32_t size = 0;
576 uint8_t data;
577 bool moreData = true;
578 *numOfBytes = 0;
579
580 while (moreData) {
581 if (DataSource->readAt(offset, &data, 1) < 1) {
582 return -1;
583 }
584 offset ++;
585 moreData = (data >= 128) ? true : false;
586 size = (size << 7) | (data & 0x7f); // Take last 7 bits
587 (*numOfBytes) ++;
588 }
589
590 return size;
591 }
592
parseDrmSINF(off64_t *,off64_t data_offset)593 status_t MPEG4Extractor::parseDrmSINF(
594 off64_t * /* offset */, off64_t data_offset) {
595 uint8_t updateIdTag;
596 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
597 return ERROR_IO;
598 }
599 data_offset ++;
600
601 if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
602 return ERROR_MALFORMED;
603 }
604
605 uint8_t numOfBytes;
606 int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
607 if (size < 0) {
608 return ERROR_IO;
609 }
610 data_offset += numOfBytes;
611
612 while(size >= 11 ) {
613 uint8_t descriptorTag;
614 if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
615 return ERROR_IO;
616 }
617 data_offset ++;
618
619 if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
620 return ERROR_MALFORMED;
621 }
622
623 uint8_t buffer[8];
624 //ObjectDescriptorID and ObjectDescriptor url flag
625 if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
626 return ERROR_IO;
627 }
628 data_offset += 2;
629
630 if ((buffer[1] >> 5) & 0x0001) { //url flag is set
631 return ERROR_MALFORMED;
632 }
633
634 if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
635 return ERROR_IO;
636 }
637 data_offset += 8;
638
639 if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
640 || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
641 return ERROR_MALFORMED;
642 }
643
644 SINF *sinf = new SINF;
645 sinf->trackID = U16_AT(&buffer[3]);
646 sinf->IPMPDescriptorID = buffer[7];
647 sinf->next = mFirstSINF;
648 mFirstSINF = sinf;
649
650 size -= (8 + 2 + 1);
651 }
652
653 if (size != 0) {
654 return ERROR_MALFORMED;
655 }
656
657 if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
658 return ERROR_IO;
659 }
660 data_offset ++;
661
662 if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
663 return ERROR_MALFORMED;
664 }
665
666 size = readSize(data_offset, mDataSource, &numOfBytes);
667 if (size < 0) {
668 return ERROR_IO;
669 }
670 data_offset += numOfBytes;
671
672 while (size > 0) {
673 uint8_t tag;
674 int32_t dataLen;
675 if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
676 return ERROR_IO;
677 }
678 data_offset ++;
679
680 if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
681 uint8_t id;
682 dataLen = readSize(data_offset, mDataSource, &numOfBytes);
683 if (dataLen < 0) {
684 return ERROR_IO;
685 } else if (dataLen < 4) {
686 return ERROR_MALFORMED;
687 }
688 data_offset += numOfBytes;
689
690 if (mDataSource->readAt(data_offset, &id, 1) < 1) {
691 return ERROR_IO;
692 }
693 data_offset ++;
694
695 SINF *sinf = mFirstSINF;
696 while (sinf && (sinf->IPMPDescriptorID != id)) {
697 sinf = sinf->next;
698 }
699 if (sinf == NULL) {
700 return ERROR_MALFORMED;
701 }
702 sinf->len = dataLen - 3;
703 sinf->IPMPData = new (std::nothrow) char[sinf->len];
704 if (sinf->IPMPData == NULL) {
705 return ERROR_MALFORMED;
706 }
707 data_offset += 2;
708
709 if (mDataSource->readAt(data_offset, sinf->IPMPData, sinf->len) < sinf->len) {
710 return ERROR_IO;
711 }
712 data_offset += sinf->len;
713
714 size -= (dataLen + numOfBytes + 1);
715 }
716 }
717
718 if (size != 0) {
719 return ERROR_MALFORMED;
720 }
721
722 return UNKNOWN_ERROR; // Return a dummy error.
723 }
724
725 struct PathAdder {
PathAdderandroid::PathAdder726 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
727 : mPath(path) {
728 mPath->push(chunkType);
729 }
730
~PathAdderandroid::PathAdder731 ~PathAdder() {
732 mPath->pop();
733 }
734
735 private:
736 Vector<uint32_t> *mPath;
737
738 PathAdder(const PathAdder &);
739 PathAdder &operator=(const PathAdder &);
740 };
741
underMetaDataPath(const Vector<uint32_t> & path)742 static bool underMetaDataPath(const Vector<uint32_t> &path) {
743 return path.size() >= 5
744 && path[0] == FOURCC('m', 'o', 'o', 'v')
745 && path[1] == FOURCC('u', 'd', 't', 'a')
746 && path[2] == FOURCC('m', 'e', 't', 'a')
747 && path[3] == FOURCC('i', 'l', 's', 't');
748 }
749
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)750 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
751 return path.size() >= 2
752 && path[0] == FOURCC('m', 'o', 'o', 'v')
753 && path[1] == FOURCC('m', 'e', 't', 'a')
754 && (depth == 2
755 || (depth == 3
756 && (path[2] == FOURCC('h', 'd', 'l', 'r')
757 || path[2] == FOURCC('i', 'l', 's', 't')
758 || path[2] == FOURCC('k', 'e', 'y', 's'))));
759 }
760
761 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)762 static void convertTimeToDate(int64_t time_1904, String8 *s) {
763 time_t time_1970 = time_1904 - (((66 * 365 + 17) * 24) * 3600);
764
765 char tmp[32];
766 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", gmtime(&time_1970));
767
768 s->setTo(tmp);
769 }
770
parseChunk(off64_t * offset,int depth)771 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
772 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
773 uint32_t hdr[2];
774 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
775 return ERROR_IO;
776 }
777 uint64_t chunk_size = ntohl(hdr[0]);
778 int32_t chunk_type = ntohl(hdr[1]);
779 off64_t data_offset = *offset + 8;
780
781 if (chunk_size == 1) {
782 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
783 return ERROR_IO;
784 }
785 chunk_size = ntoh64(chunk_size);
786 data_offset += 8;
787
788 if (chunk_size < 16) {
789 // The smallest valid chunk is 16 bytes long in this case.
790 return ERROR_MALFORMED;
791 }
792 } else if (chunk_size == 0) {
793 if (depth == 0) {
794 // atom extends to end of file
795 off64_t sourceSize;
796 if (mDataSource->getSize(&sourceSize) == OK) {
797 chunk_size = (sourceSize - *offset);
798 } else {
799 // XXX could we just pick a "sufficiently large" value here?
800 ALOGE("atom size is 0, and data source has no size");
801 return ERROR_MALFORMED;
802 }
803 } else {
804 // not allowed for non-toplevel atoms, skip it
805 *offset += 4;
806 return OK;
807 }
808 } else if (chunk_size < 8) {
809 // The smallest valid chunk is 8 bytes long.
810 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
811 return ERROR_MALFORMED;
812 }
813
814 char chunk[5];
815 MakeFourCCString(chunk_type, chunk);
816 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
817
818 if (kUseHexDump) {
819 static const char kWhitespace[] = " ";
820 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
821 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
822
823 char buffer[256];
824 size_t n = chunk_size;
825 if (n > sizeof(buffer)) {
826 n = sizeof(buffer);
827 }
828 if (mDataSource->readAt(*offset, buffer, n)
829 < (ssize_t)n) {
830 return ERROR_IO;
831 }
832
833 hexdump(buffer, n);
834 }
835
836 PathAdder autoAdder(&mPath, chunk_type);
837
838 off64_t chunk_data_size = *offset + chunk_size - data_offset;
839
840 if (chunk_type != FOURCC('c', 'p', 'r', 't')
841 && chunk_type != FOURCC('c', 'o', 'v', 'r')
842 && mPath.size() == 5 && underMetaDataPath(mPath)) {
843 off64_t stop_offset = *offset + chunk_size;
844 *offset = data_offset;
845 while (*offset < stop_offset) {
846 status_t err = parseChunk(offset, depth + 1);
847 if (err != OK) {
848 return err;
849 }
850 }
851
852 if (*offset != stop_offset) {
853 return ERROR_MALFORMED;
854 }
855
856 return OK;
857 }
858
859 switch(chunk_type) {
860 case FOURCC('m', 'o', 'o', 'v'):
861 case FOURCC('t', 'r', 'a', 'k'):
862 case FOURCC('m', 'd', 'i', 'a'):
863 case FOURCC('m', 'i', 'n', 'f'):
864 case FOURCC('d', 'i', 'n', 'f'):
865 case FOURCC('s', 't', 'b', 'l'):
866 case FOURCC('m', 'v', 'e', 'x'):
867 case FOURCC('m', 'o', 'o', 'f'):
868 case FOURCC('t', 'r', 'a', 'f'):
869 case FOURCC('m', 'f', 'r', 'a'):
870 case FOURCC('u', 'd', 't', 'a'):
871 case FOURCC('i', 'l', 's', 't'):
872 case FOURCC('s', 'i', 'n', 'f'):
873 case FOURCC('s', 'c', 'h', 'i'):
874 case FOURCC('e', 'd', 't', 's'):
875 {
876 if (chunk_type == FOURCC('m', 'o', 'o', 'f') && !mMoofFound) {
877 // store the offset of the first segment
878 mMoofFound = true;
879 mMoofOffset = *offset;
880 }
881
882 if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
883 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
884
885 if (mDataSource->flags()
886 & (DataSource::kWantsPrefetching
887 | DataSource::kIsCachingDataSource)) {
888 sp<MPEG4DataSource> cachedSource =
889 new MPEG4DataSource(mDataSource);
890
891 if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
892 mDataSource = cachedSource;
893 }
894 }
895
896 if (mLastTrack == NULL)
897 return ERROR_MALFORMED;
898
899 mLastTrack->sampleTable = new SampleTable(mDataSource);
900 }
901
902 bool isTrack = false;
903 if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
904 isTrack = true;
905
906 Track *track = new Track;
907 track->next = NULL;
908 if (mLastTrack) {
909 mLastTrack->next = track;
910 } else {
911 mFirstTrack = track;
912 }
913 mLastTrack = track;
914
915 track->meta = new MetaData;
916 track->includes_expensive_metadata = false;
917 track->skipTrack = false;
918 track->timescale = 0;
919 track->meta->setCString(kKeyMIMEType, "application/octet-stream");
920 }
921
922 off64_t stop_offset = *offset + chunk_size;
923 *offset = data_offset;
924 while (*offset < stop_offset) {
925 status_t err = parseChunk(offset, depth + 1);
926 if (err != OK) {
927 return err;
928 }
929 }
930
931 if (*offset != stop_offset) {
932 return ERROR_MALFORMED;
933 }
934
935 if (isTrack) {
936 int32_t trackId;
937 // There must be exact one track header per track.
938 if (!mLastTrack->meta->findInt32(kKeyTrackID, &trackId)) {
939 mLastTrack->skipTrack = true;
940 }
941 if (mLastTrack->skipTrack) {
942 Track *cur = mFirstTrack;
943
944 if (cur == mLastTrack) {
945 delete cur;
946 mFirstTrack = mLastTrack = NULL;
947 } else {
948 while (cur && cur->next != mLastTrack) {
949 cur = cur->next;
950 }
951 cur->next = NULL;
952 delete mLastTrack;
953 mLastTrack = cur;
954 }
955
956 return OK;
957 }
958
959 status_t err = verifyTrack(mLastTrack);
960
961 if (err != OK) {
962 return err;
963 }
964 } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
965 mInitCheck = OK;
966
967 if (!mIsDrm) {
968 return UNKNOWN_ERROR; // Return a dummy error.
969 } else {
970 return OK;
971 }
972 }
973 break;
974 }
975
976 case FOURCC('e', 'l', 's', 't'):
977 {
978 *offset += chunk_size;
979
980 // See 14496-12 8.6.6
981 uint8_t version;
982 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
983 return ERROR_IO;
984 }
985
986 uint32_t entry_count;
987 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
988 return ERROR_IO;
989 }
990
991 if (entry_count != 1) {
992 // we only support a single entry at the moment, for gapless playback
993 ALOGW("ignoring edit list with %d entries", entry_count);
994 } else if (mHeaderTimescale == 0) {
995 ALOGW("ignoring edit list because timescale is 0");
996 } else {
997 off64_t entriesoffset = data_offset + 8;
998 uint64_t segment_duration;
999 int64_t media_time;
1000
1001 if (version == 1) {
1002 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1003 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1004 return ERROR_IO;
1005 }
1006 } else if (version == 0) {
1007 uint32_t sd;
1008 int32_t mt;
1009 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1010 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1011 return ERROR_IO;
1012 }
1013 segment_duration = sd;
1014 media_time = mt;
1015 } else {
1016 return ERROR_IO;
1017 }
1018
1019 uint64_t halfscale = mHeaderTimescale / 2;
1020 segment_duration = (segment_duration * 1000000 + halfscale)/ mHeaderTimescale;
1021 media_time = (media_time * 1000000 + halfscale) / mHeaderTimescale;
1022
1023 int64_t duration;
1024 int32_t samplerate;
1025 if (!mLastTrack) {
1026 return ERROR_MALFORMED;
1027 }
1028 if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
1029 mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
1030
1031 int64_t delay = (media_time * samplerate + 500000) / 1000000;
1032 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
1033
1034 int64_t paddingus = duration - (segment_duration + media_time);
1035 if (paddingus < 0) {
1036 // track duration from media header (which is what kKeyDuration is) might
1037 // be slightly shorter than the segment duration, which would make the
1038 // padding negative. Clamp to zero.
1039 paddingus = 0;
1040 }
1041 int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
1042 mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
1043 }
1044 }
1045 break;
1046 }
1047
1048 case FOURCC('f', 'r', 'm', 'a'):
1049 {
1050 *offset += chunk_size;
1051
1052 uint32_t original_fourcc;
1053 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1054 return ERROR_IO;
1055 }
1056 original_fourcc = ntohl(original_fourcc);
1057 ALOGV("read original format: %d", original_fourcc);
1058
1059 if (mLastTrack == NULL)
1060 return ERROR_MALFORMED;
1061
1062 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(original_fourcc));
1063 uint32_t num_channels = 0;
1064 uint32_t sample_rate = 0;
1065 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1066 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1067 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1068 }
1069 break;
1070 }
1071
1072 case FOURCC('t', 'e', 'n', 'c'):
1073 {
1074 *offset += chunk_size;
1075
1076 if (chunk_size < 32) {
1077 return ERROR_MALFORMED;
1078 }
1079
1080 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1081 // default IV size, 16 bytes default KeyID
1082 // (ISO 23001-7)
1083 char buf[4];
1084 memset(buf, 0, 4);
1085 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1086 return ERROR_IO;
1087 }
1088 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1089 if (defaultAlgorithmId > 1) {
1090 // only 0 (clear) and 1 (AES-128) are valid
1091 return ERROR_MALFORMED;
1092 }
1093
1094 memset(buf, 0, 4);
1095 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1096 return ERROR_IO;
1097 }
1098 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1099
1100 if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
1101 (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
1102 // only unencrypted data must have 0 IV size
1103 return ERROR_MALFORMED;
1104 } else if (defaultIVSize != 0 &&
1105 defaultIVSize != 8 &&
1106 defaultIVSize != 16) {
1107 // only supported sizes are 0, 8 and 16
1108 return ERROR_MALFORMED;
1109 }
1110
1111 uint8_t defaultKeyId[16];
1112
1113 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1114 return ERROR_IO;
1115 }
1116
1117 if (mLastTrack == NULL)
1118 return ERROR_MALFORMED;
1119
1120 mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1121 mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1122 mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1123 break;
1124 }
1125
1126 case FOURCC('t', 'k', 'h', 'd'):
1127 {
1128 *offset += chunk_size;
1129
1130 status_t err;
1131 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1132 return err;
1133 }
1134
1135 break;
1136 }
1137
1138 case FOURCC('p', 's', 's', 'h'):
1139 {
1140 *offset += chunk_size;
1141
1142 PsshInfo pssh;
1143
1144 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1145 return ERROR_IO;
1146 }
1147
1148 uint32_t psshdatalen = 0;
1149 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1150 return ERROR_IO;
1151 }
1152 pssh.datalen = ntohl(psshdatalen);
1153 ALOGV("pssh data size: %d", pssh.datalen);
1154 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1155 // pssh data length exceeds size of containing box
1156 return ERROR_MALFORMED;
1157 }
1158
1159 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1160 if (pssh.data == NULL) {
1161 return ERROR_MALFORMED;
1162 }
1163 ALOGV("allocated pssh @ %p", pssh.data);
1164 ssize_t requested = (ssize_t) pssh.datalen;
1165 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1166 return ERROR_IO;
1167 }
1168 mPssh.push_back(pssh);
1169
1170 break;
1171 }
1172
1173 case FOURCC('m', 'd', 'h', 'd'):
1174 {
1175 *offset += chunk_size;
1176
1177 if (chunk_data_size < 4 || mLastTrack == NULL) {
1178 return ERROR_MALFORMED;
1179 }
1180
1181 uint8_t version;
1182 if (mDataSource->readAt(
1183 data_offset, &version, sizeof(version))
1184 < (ssize_t)sizeof(version)) {
1185 return ERROR_IO;
1186 }
1187
1188 off64_t timescale_offset;
1189
1190 if (version == 1) {
1191 timescale_offset = data_offset + 4 + 16;
1192 } else if (version == 0) {
1193 timescale_offset = data_offset + 4 + 8;
1194 } else {
1195 return ERROR_IO;
1196 }
1197
1198 uint32_t timescale;
1199 if (mDataSource->readAt(
1200 timescale_offset, ×cale, sizeof(timescale))
1201 < (ssize_t)sizeof(timescale)) {
1202 return ERROR_IO;
1203 }
1204
1205 if (!timescale) {
1206 ALOGE("timescale should not be ZERO.");
1207 return ERROR_MALFORMED;
1208 }
1209
1210 mLastTrack->timescale = ntohl(timescale);
1211
1212 // 14496-12 says all ones means indeterminate, but some files seem to use
1213 // 0 instead. We treat both the same.
1214 int64_t duration = 0;
1215 if (version == 1) {
1216 if (mDataSource->readAt(
1217 timescale_offset + 4, &duration, sizeof(duration))
1218 < (ssize_t)sizeof(duration)) {
1219 return ERROR_IO;
1220 }
1221 if (duration != -1) {
1222 duration = ntoh64(duration);
1223 }
1224 } else {
1225 uint32_t duration32;
1226 if (mDataSource->readAt(
1227 timescale_offset + 4, &duration32, sizeof(duration32))
1228 < (ssize_t)sizeof(duration32)) {
1229 return ERROR_IO;
1230 }
1231 if (duration32 != 0xffffffff) {
1232 duration = ntohl(duration32);
1233 }
1234 }
1235 if (duration != 0 && mLastTrack->timescale != 0) {
1236 mLastTrack->meta->setInt64(
1237 kKeyDuration, (duration * 1000000) / mLastTrack->timescale);
1238 }
1239
1240 uint8_t lang[2];
1241 off64_t lang_offset;
1242 if (version == 1) {
1243 lang_offset = timescale_offset + 4 + 8;
1244 } else if (version == 0) {
1245 lang_offset = timescale_offset + 4 + 4;
1246 } else {
1247 return ERROR_IO;
1248 }
1249
1250 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1251 < (ssize_t)sizeof(lang)) {
1252 return ERROR_IO;
1253 }
1254
1255 // To get the ISO-639-2/T three character language code
1256 // 1 bit pad followed by 3 5-bits characters. Each character
1257 // is packed as the difference between its ASCII value and 0x60.
1258 char lang_code[4];
1259 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1260 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1261 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1262 lang_code[3] = '\0';
1263
1264 mLastTrack->meta->setCString(
1265 kKeyMediaLanguage, lang_code);
1266
1267 break;
1268 }
1269
1270 case FOURCC('s', 't', 's', 'd'):
1271 {
1272 if (chunk_data_size < 8) {
1273 return ERROR_MALFORMED;
1274 }
1275
1276 uint8_t buffer[8];
1277 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1278 return ERROR_MALFORMED;
1279 }
1280
1281 if (mDataSource->readAt(
1282 data_offset, buffer, 8) < 8) {
1283 return ERROR_IO;
1284 }
1285
1286 if (U32_AT(buffer) != 0) {
1287 // Should be version 0, flags 0.
1288 return ERROR_MALFORMED;
1289 }
1290
1291 uint32_t entry_count = U32_AT(&buffer[4]);
1292
1293 if (entry_count > 1) {
1294 // For 3GPP timed text, there could be multiple tx3g boxes contain
1295 // multiple text display formats. These formats will be used to
1296 // display the timed text.
1297 // For encrypted files, there may also be more than one entry.
1298 const char *mime;
1299
1300 if (mLastTrack == NULL)
1301 return ERROR_MALFORMED;
1302
1303 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1304 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1305 strcasecmp(mime, "application/octet-stream")) {
1306 // For now we only support a single type of media per track.
1307 mLastTrack->skipTrack = true;
1308 *offset += chunk_size;
1309 break;
1310 }
1311 }
1312 off64_t stop_offset = *offset + chunk_size;
1313 *offset = data_offset + 8;
1314 for (uint32_t i = 0; i < entry_count; ++i) {
1315 status_t err = parseChunk(offset, depth + 1);
1316 if (err != OK) {
1317 return err;
1318 }
1319 }
1320
1321 if (*offset != stop_offset) {
1322 return ERROR_MALFORMED;
1323 }
1324 break;
1325 }
1326
1327 case FOURCC('m', 'p', '4', 'a'):
1328 case FOURCC('e', 'n', 'c', 'a'):
1329 case FOURCC('s', 'a', 'm', 'r'):
1330 case FOURCC('s', 'a', 'w', 'b'):
1331 {
1332 uint8_t buffer[8 + 20];
1333 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1334 // Basic AudioSampleEntry size.
1335 return ERROR_MALFORMED;
1336 }
1337
1338 if (mDataSource->readAt(
1339 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1340 return ERROR_IO;
1341 }
1342
1343 uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1344 uint32_t num_channels = U16_AT(&buffer[16]);
1345
1346 uint16_t sample_size = U16_AT(&buffer[18]);
1347 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1348
1349 if (mLastTrack == NULL)
1350 return ERROR_MALFORMED;
1351
1352 if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1353 // if the chunk type is enca, we'll get the type from the sinf/frma box later
1354 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1355 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1356 }
1357 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1358 chunk, num_channels, sample_size, sample_rate);
1359 mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1360 mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1361
1362 off64_t stop_offset = *offset + chunk_size;
1363 *offset = data_offset + sizeof(buffer);
1364 while (*offset < stop_offset) {
1365 status_t err = parseChunk(offset, depth + 1);
1366 if (err != OK) {
1367 return err;
1368 }
1369 }
1370
1371 if (*offset != stop_offset) {
1372 return ERROR_MALFORMED;
1373 }
1374 break;
1375 }
1376
1377 case FOURCC('m', 'p', '4', 'v'):
1378 case FOURCC('e', 'n', 'c', 'v'):
1379 case FOURCC('s', '2', '6', '3'):
1380 case FOURCC('H', '2', '6', '3'):
1381 case FOURCC('h', '2', '6', '3'):
1382 case FOURCC('a', 'v', 'c', '1'):
1383 case FOURCC('h', 'v', 'c', '1'):
1384 case FOURCC('h', 'e', 'v', '1'):
1385 {
1386 mHasVideo = true;
1387
1388 uint8_t buffer[78];
1389 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1390 // Basic VideoSampleEntry size.
1391 return ERROR_MALFORMED;
1392 }
1393
1394 if (mDataSource->readAt(
1395 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1396 return ERROR_IO;
1397 }
1398
1399 uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
1400 uint16_t width = U16_AT(&buffer[6 + 18]);
1401 uint16_t height = U16_AT(&buffer[6 + 20]);
1402
1403 // The video sample is not standard-compliant if it has invalid dimension.
1404 // Use some default width and height value, and
1405 // let the decoder figure out the actual width and height (and thus
1406 // be prepared for INFO_FOMRAT_CHANGED event).
1407 if (width == 0) width = 352;
1408 if (height == 0) height = 288;
1409
1410 // printf("*** coding='%s' width=%d height=%d\n",
1411 // chunk, width, height);
1412
1413 if (mLastTrack == NULL)
1414 return ERROR_MALFORMED;
1415
1416 if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1417 // if the chunk type is encv, we'll get the type from the sinf/frma box later
1418 mLastTrack->meta->setCString(kKeyMIMEType, FourCC2MIME(chunk_type));
1419 }
1420 mLastTrack->meta->setInt32(kKeyWidth, width);
1421 mLastTrack->meta->setInt32(kKeyHeight, height);
1422
1423 off64_t stop_offset = *offset + chunk_size;
1424 *offset = data_offset + sizeof(buffer);
1425 while (*offset < stop_offset) {
1426 status_t err = parseChunk(offset, depth + 1);
1427 if (err != OK) {
1428 return err;
1429 }
1430 }
1431
1432 if (*offset != stop_offset) {
1433 return ERROR_MALFORMED;
1434 }
1435 break;
1436 }
1437
1438 case FOURCC('s', 't', 'c', 'o'):
1439 case FOURCC('c', 'o', '6', '4'):
1440 {
1441 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1442 return ERROR_MALFORMED;
1443
1444 status_t err =
1445 mLastTrack->sampleTable->setChunkOffsetParams(
1446 chunk_type, data_offset, chunk_data_size);
1447
1448 *offset += chunk_size;
1449
1450 if (err != OK) {
1451 return err;
1452 }
1453
1454 break;
1455 }
1456
1457 case FOURCC('s', 't', 's', 'c'):
1458 {
1459 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1460 return ERROR_MALFORMED;
1461
1462 status_t err =
1463 mLastTrack->sampleTable->setSampleToChunkParams(
1464 data_offset, chunk_data_size);
1465
1466 *offset += chunk_size;
1467
1468 if (err != OK) {
1469 return err;
1470 }
1471
1472 break;
1473 }
1474
1475 case FOURCC('s', 't', 's', 'z'):
1476 case FOURCC('s', 't', 'z', '2'):
1477 {
1478 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1479 return ERROR_MALFORMED;
1480
1481 status_t err =
1482 mLastTrack->sampleTable->setSampleSizeParams(
1483 chunk_type, data_offset, chunk_data_size);
1484
1485 *offset += chunk_size;
1486
1487 if (err != OK) {
1488 return err;
1489 }
1490
1491 size_t max_size;
1492 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1493
1494 if (err != OK) {
1495 return err;
1496 }
1497
1498 if (max_size != 0) {
1499 // Assume that a given buffer only contains at most 10 chunks,
1500 // each chunk originally prefixed with a 2 byte length will
1501 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1502 // and thus will grow by 2 bytes per chunk.
1503 if (max_size > SIZE_MAX - 10 * 2) {
1504 ALOGE("max sample size too big: %zu", max_size);
1505 return ERROR_MALFORMED;
1506 }
1507 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1508 } else {
1509 // No size was specified. Pick a conservatively large size.
1510 uint32_t width, height;
1511 if (!mLastTrack->meta->findInt32(kKeyWidth, (int32_t*)&width) ||
1512 !mLastTrack->meta->findInt32(kKeyHeight,(int32_t*) &height)) {
1513 ALOGE("No width or height, assuming worst case 1080p");
1514 width = 1920;
1515 height = 1080;
1516 } else {
1517 // A resolution was specified, check that it's not too big. The values below
1518 // were chosen so that the calculations below don't cause overflows, they're
1519 // not indicating that resolutions up to 32kx32k are actually supported.
1520 if (width > 32768 || height > 32768) {
1521 ALOGE("can't support %u x %u video", width, height);
1522 return ERROR_MALFORMED;
1523 }
1524 }
1525
1526 const char *mime;
1527 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1528 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
1529 // AVC requires compression ratio of at least 2, and uses
1530 // macroblocks
1531 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
1532 } else {
1533 // For all other formats there is no minimum compression
1534 // ratio. Use compression ratio of 1.
1535 max_size = width * height * 3 / 2;
1536 }
1537 mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size);
1538 }
1539
1540 // NOTE: setting another piece of metadata invalidates any pointers (such as the
1541 // mimetype) previously obtained, so don't cache them.
1542 const char *mime;
1543 CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1544 // Calculate average frame rate.
1545 if (!strncasecmp("video/", mime, 6)) {
1546 size_t nSamples = mLastTrack->sampleTable->countSamples();
1547 int64_t durationUs;
1548 if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1549 if (durationUs > 0) {
1550 int32_t frameRate = (nSamples * 1000000LL +
1551 (durationUs >> 1)) / durationUs;
1552 mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1553 }
1554 }
1555 }
1556
1557 break;
1558 }
1559
1560 case FOURCC('s', 't', 't', 's'):
1561 {
1562 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1563 return ERROR_MALFORMED;
1564
1565 *offset += chunk_size;
1566
1567 status_t err =
1568 mLastTrack->sampleTable->setTimeToSampleParams(
1569 data_offset, chunk_data_size);
1570
1571 if (err != OK) {
1572 return err;
1573 }
1574
1575 break;
1576 }
1577
1578 case FOURCC('c', 't', 't', 's'):
1579 {
1580 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1581 return ERROR_MALFORMED;
1582
1583 *offset += chunk_size;
1584
1585 status_t err =
1586 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1587 data_offset, chunk_data_size);
1588
1589 if (err != OK) {
1590 return err;
1591 }
1592
1593 break;
1594 }
1595
1596 case FOURCC('s', 't', 's', 's'):
1597 {
1598 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
1599 return ERROR_MALFORMED;
1600
1601 *offset += chunk_size;
1602
1603 status_t err =
1604 mLastTrack->sampleTable->setSyncSampleParams(
1605 data_offset, chunk_data_size);
1606
1607 if (err != OK) {
1608 return err;
1609 }
1610
1611 break;
1612 }
1613
1614 // \xA9xyz
1615 case FOURCC(0xA9, 'x', 'y', 'z'):
1616 {
1617 *offset += chunk_size;
1618
1619 // Best case the total data length inside "\xA9xyz" box
1620 // would be 8, for instance "\xA9xyz" + "\x00\x04\x15\xc7" + "0+0/",
1621 // where "\x00\x04" is the text string length with value = 4,
1622 // "\0x15\xc7" is the language code = en, and "0+0" is a
1623 // location (string) value with longitude = 0 and latitude = 0.
1624 if (chunk_data_size < 8) {
1625 return ERROR_MALFORMED;
1626 }
1627
1628 // Worst case the location string length would be 18,
1629 // for instance +90.0000-180.0000, without the trailing "/" and
1630 // the string length + language code.
1631 char buffer[18];
1632
1633 // Substracting 5 from the data size is because the text string length +
1634 // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1635 off64_t location_length = chunk_data_size - 5;
1636 if (location_length >= (off64_t) sizeof(buffer)) {
1637 return ERROR_MALFORMED;
1638 }
1639
1640 if (mDataSource->readAt(
1641 data_offset + 4, buffer, location_length) < location_length) {
1642 return ERROR_IO;
1643 }
1644
1645 buffer[location_length] = '\0';
1646 mFileMetaData->setCString(kKeyLocation, buffer);
1647 break;
1648 }
1649
1650 case FOURCC('e', 's', 'd', 's'):
1651 {
1652 *offset += chunk_size;
1653
1654 if (chunk_data_size < 4) {
1655 return ERROR_MALFORMED;
1656 }
1657
1658 uint8_t buffer[256];
1659 if (chunk_data_size > (off64_t)sizeof(buffer)) {
1660 return ERROR_BUFFER_TOO_SMALL;
1661 }
1662
1663 if (mDataSource->readAt(
1664 data_offset, buffer, chunk_data_size) < chunk_data_size) {
1665 return ERROR_IO;
1666 }
1667
1668 if (U32_AT(buffer) != 0) {
1669 // Should be version 0, flags 0.
1670 return ERROR_MALFORMED;
1671 }
1672
1673 if (mLastTrack == NULL)
1674 return ERROR_MALFORMED;
1675
1676 mLastTrack->meta->setData(
1677 kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1678
1679 if (mPath.size() >= 2
1680 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'a')) {
1681 // Information from the ESDS must be relied on for proper
1682 // setup of sample rate and channel count for MPEG4 Audio.
1683 // The generic header appears to only contain generic
1684 // information...
1685
1686 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1687 &buffer[4], chunk_data_size - 4);
1688
1689 if (err != OK) {
1690 return err;
1691 }
1692 }
1693 if (mPath.size() >= 2
1694 && mPath[mPath.size() - 2] == FOURCC('m', 'p', '4', 'v')) {
1695 // Check if the video is MPEG2
1696 ESDS esds(&buffer[4], chunk_data_size - 4);
1697
1698 uint8_t objectTypeIndication;
1699 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
1700 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
1701 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1702 }
1703 }
1704 }
1705 break;
1706 }
1707
1708 case FOURCC('a', 'v', 'c', 'C'):
1709 {
1710 *offset += chunk_size;
1711
1712 sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1713
1714 if (buffer->data() == NULL) {
1715 ALOGE("b/28471206");
1716 return NO_MEMORY;
1717 }
1718
1719 if (mDataSource->readAt(
1720 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1721 return ERROR_IO;
1722 }
1723
1724 if (mLastTrack == NULL)
1725 return ERROR_MALFORMED;
1726
1727 mLastTrack->meta->setData(
1728 kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1729
1730 break;
1731 }
1732 case FOURCC('h', 'v', 'c', 'C'):
1733 {
1734 sp<ABuffer> buffer = new ABuffer(chunk_data_size);
1735
1736 if (buffer->data() == NULL) {
1737 ALOGE("b/28471206");
1738 return NO_MEMORY;
1739 }
1740
1741 if (mDataSource->readAt(
1742 data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1743 return ERROR_IO;
1744 }
1745
1746 if (mLastTrack == NULL)
1747 return ERROR_MALFORMED;
1748
1749 mLastTrack->meta->setData(
1750 kKeyHVCC, kTypeHVCC, buffer->data(), chunk_data_size);
1751
1752 *offset += chunk_size;
1753 break;
1754 }
1755
1756 case FOURCC('d', '2', '6', '3'):
1757 {
1758 *offset += chunk_size;
1759 /*
1760 * d263 contains a fixed 7 bytes part:
1761 * vendor - 4 bytes
1762 * version - 1 byte
1763 * level - 1 byte
1764 * profile - 1 byte
1765 * optionally, "d263" box itself may contain a 16-byte
1766 * bit rate box (bitr)
1767 * average bit rate - 4 bytes
1768 * max bit rate - 4 bytes
1769 */
1770 char buffer[23];
1771 if (chunk_data_size != 7 &&
1772 chunk_data_size != 23) {
1773 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
1774 return ERROR_MALFORMED;
1775 }
1776
1777 if (mDataSource->readAt(
1778 data_offset, buffer, chunk_data_size) < chunk_data_size) {
1779 return ERROR_IO;
1780 }
1781
1782 if (mLastTrack == NULL)
1783 return ERROR_MALFORMED;
1784
1785 mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1786
1787 break;
1788 }
1789
1790 case FOURCC('m', 'e', 't', 'a'):
1791 {
1792 off64_t stop_offset = *offset + chunk_size;
1793 *offset = data_offset;
1794 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
1795 if (!isParsingMetaKeys) {
1796 uint8_t buffer[4];
1797 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1798 *offset = stop_offset;
1799 return ERROR_MALFORMED;
1800 }
1801
1802 if (mDataSource->readAt(
1803 data_offset, buffer, 4) < 4) {
1804 *offset = stop_offset;
1805 return ERROR_IO;
1806 }
1807
1808 if (U32_AT(buffer) != 0) {
1809 // Should be version 0, flags 0.
1810
1811 // If it's not, let's assume this is one of those
1812 // apparently malformed chunks that don't have flags
1813 // and completely different semantics than what's
1814 // in the MPEG4 specs and skip it.
1815 *offset = stop_offset;
1816 return OK;
1817 }
1818 *offset += sizeof(buffer);
1819 }
1820
1821 while (*offset < stop_offset) {
1822 status_t err = parseChunk(offset, depth + 1);
1823 if (err != OK) {
1824 return err;
1825 }
1826 }
1827
1828 if (*offset != stop_offset) {
1829 return ERROR_MALFORMED;
1830 }
1831 break;
1832 }
1833
1834 case FOURCC('m', 'e', 'a', 'n'):
1835 case FOURCC('n', 'a', 'm', 'e'):
1836 case FOURCC('d', 'a', 't', 'a'):
1837 {
1838 *offset += chunk_size;
1839
1840 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
1841 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
1842
1843 if (err != OK) {
1844 return err;
1845 }
1846 }
1847
1848 break;
1849 }
1850
1851 case FOURCC('m', 'v', 'h', 'd'):
1852 {
1853 *offset += chunk_size;
1854
1855 if (chunk_data_size < 32) {
1856 return ERROR_MALFORMED;
1857 }
1858
1859 uint8_t header[32];
1860 if (mDataSource->readAt(
1861 data_offset, header, sizeof(header))
1862 < (ssize_t)sizeof(header)) {
1863 return ERROR_IO;
1864 }
1865
1866 uint64_t creationTime;
1867 uint64_t duration = 0;
1868 if (header[0] == 1) {
1869 creationTime = U64_AT(&header[4]);
1870 mHeaderTimescale = U32_AT(&header[20]);
1871 duration = U64_AT(&header[24]);
1872 if (duration == 0xffffffffffffffff) {
1873 duration = 0;
1874 }
1875 } else if (header[0] != 0) {
1876 return ERROR_MALFORMED;
1877 } else {
1878 creationTime = U32_AT(&header[4]);
1879 mHeaderTimescale = U32_AT(&header[12]);
1880 uint32_t d32 = U32_AT(&header[16]);
1881 if (d32 == 0xffffffff) {
1882 d32 = 0;
1883 }
1884 duration = d32;
1885 }
1886 if (duration != 0 && mHeaderTimescale != 0) {
1887 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1888 }
1889
1890 String8 s;
1891 convertTimeToDate(creationTime, &s);
1892
1893 mFileMetaData->setCString(kKeyDate, s.string());
1894
1895 break;
1896 }
1897
1898 case FOURCC('m', 'e', 'h', 'd'):
1899 {
1900 *offset += chunk_size;
1901
1902 if (chunk_data_size < 8) {
1903 return ERROR_MALFORMED;
1904 }
1905
1906 uint8_t flags[4];
1907 if (mDataSource->readAt(
1908 data_offset, flags, sizeof(flags))
1909 < (ssize_t)sizeof(flags)) {
1910 return ERROR_IO;
1911 }
1912
1913 uint64_t duration = 0;
1914 if (flags[0] == 1) {
1915 // 64 bit
1916 if (chunk_data_size < 12) {
1917 return ERROR_MALFORMED;
1918 }
1919 mDataSource->getUInt64(data_offset + 4, &duration);
1920 if (duration == 0xffffffffffffffff) {
1921 duration = 0;
1922 }
1923 } else if (flags[0] == 0) {
1924 // 32 bit
1925 uint32_t d32;
1926 mDataSource->getUInt32(data_offset + 4, &d32);
1927 if (d32 == 0xffffffff) {
1928 d32 = 0;
1929 }
1930 duration = d32;
1931 } else {
1932 return ERROR_MALFORMED;
1933 }
1934
1935 if (duration != 0 && mHeaderTimescale != 0) {
1936 mFileMetaData->setInt64(kKeyDuration, duration * 1000000 / mHeaderTimescale);
1937 }
1938
1939 break;
1940 }
1941
1942 case FOURCC('m', 'd', 'a', 't'):
1943 {
1944 ALOGV("mdat chunk, drm: %d", mIsDrm);
1945
1946 mMdatFound = true;
1947
1948 if (!mIsDrm) {
1949 *offset += chunk_size;
1950 break;
1951 }
1952
1953 if (chunk_size < 8) {
1954 return ERROR_MALFORMED;
1955 }
1956
1957 return parseDrmSINF(offset, data_offset);
1958 }
1959
1960 case FOURCC('h', 'd', 'l', 'r'):
1961 {
1962 *offset += chunk_size;
1963
1964 if (underQTMetaPath(mPath, 3)) {
1965 break;
1966 }
1967
1968 uint32_t buffer;
1969 if (mDataSource->readAt(
1970 data_offset + 8, &buffer, 4) < 4) {
1971 return ERROR_IO;
1972 }
1973
1974 uint32_t type = ntohl(buffer);
1975 // For the 3GPP file format, the handler-type within the 'hdlr' box
1976 // shall be 'text'. We also want to support 'sbtl' handler type
1977 // for a practical reason as various MPEG4 containers use it.
1978 if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1979 if (mLastTrack != NULL) {
1980 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1981 }
1982 }
1983
1984 break;
1985 }
1986
1987 case FOURCC('k', 'e', 'y', 's'):
1988 {
1989 *offset += chunk_size;
1990
1991 if (underQTMetaPath(mPath, 3)) {
1992 parseQTMetaKey(data_offset, chunk_data_size);
1993 }
1994 break;
1995 }
1996
1997 case FOURCC('t', 'r', 'e', 'x'):
1998 {
1999 *offset += chunk_size;
2000
2001 if (chunk_data_size < 24) {
2002 return ERROR_IO;
2003 }
2004 Trex trex;
2005 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2006 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2007 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2008 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2009 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2010 return ERROR_IO;
2011 }
2012 mTrex.add(trex);
2013 break;
2014 }
2015
2016 case FOURCC('t', 'x', '3', 'g'):
2017 {
2018 if (mLastTrack == NULL)
2019 return ERROR_MALFORMED;
2020
2021 uint32_t type;
2022 const void *data;
2023 size_t size = 0;
2024 if (!mLastTrack->meta->findData(
2025 kKeyTextFormatData, &type, &data, &size)) {
2026 size = 0;
2027 }
2028
2029 if ((chunk_size > SIZE_MAX) || (SIZE_MAX - chunk_size <= size)) {
2030 return ERROR_MALFORMED;
2031 }
2032
2033 uint8_t *buffer = new (std::nothrow) uint8_t[size + chunk_size];
2034 if (buffer == NULL) {
2035 return ERROR_MALFORMED;
2036 }
2037
2038 if (size > 0) {
2039 memcpy(buffer, data, size);
2040 }
2041
2042 if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
2043 < chunk_size) {
2044 delete[] buffer;
2045 buffer = NULL;
2046
2047 // advance read pointer so we don't end up reading this again
2048 *offset += chunk_size;
2049 return ERROR_IO;
2050 }
2051
2052 mLastTrack->meta->setData(
2053 kKeyTextFormatData, 0, buffer, size + chunk_size);
2054
2055 delete[] buffer;
2056
2057 *offset += chunk_size;
2058 break;
2059 }
2060
2061 case FOURCC('c', 'o', 'v', 'r'):
2062 {
2063 *offset += chunk_size;
2064
2065 if (mFileMetaData != NULL) {
2066 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2067 chunk_data_size, data_offset);
2068
2069 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2070 return ERROR_MALFORMED;
2071 }
2072 sp<ABuffer> buffer = new ABuffer(chunk_data_size + 1);
2073 if (buffer->data() == NULL) {
2074 ALOGE("b/28471206");
2075 return NO_MEMORY;
2076 }
2077 if (mDataSource->readAt(
2078 data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
2079 return ERROR_IO;
2080 }
2081 const int kSkipBytesOfDataBox = 16;
2082 if (chunk_data_size <= kSkipBytesOfDataBox) {
2083 return ERROR_MALFORMED;
2084 }
2085
2086 mFileMetaData->setData(
2087 kKeyAlbumArt, MetaData::TYPE_NONE,
2088 buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2089 }
2090
2091 break;
2092 }
2093
2094 case FOURCC('t', 'i', 't', 'l'):
2095 case FOURCC('p', 'e', 'r', 'f'):
2096 case FOURCC('a', 'u', 't', 'h'):
2097 case FOURCC('g', 'n', 'r', 'e'):
2098 case FOURCC('a', 'l', 'b', 'm'):
2099 case FOURCC('y', 'r', 'r', 'c'):
2100 {
2101 *offset += chunk_size;
2102
2103 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
2104
2105 if (err != OK) {
2106 return err;
2107 }
2108
2109 break;
2110 }
2111
2112 case FOURCC('I', 'D', '3', '2'):
2113 {
2114 *offset += chunk_size;
2115
2116 if (chunk_data_size < 6) {
2117 return ERROR_MALFORMED;
2118 }
2119
2120 parseID3v2MetaData(data_offset + 6);
2121
2122 break;
2123 }
2124
2125 case FOURCC('-', '-', '-', '-'):
2126 {
2127 mLastCommentMean.clear();
2128 mLastCommentName.clear();
2129 mLastCommentData.clear();
2130 *offset += chunk_size;
2131 break;
2132 }
2133
2134 case FOURCC('s', 'i', 'd', 'x'):
2135 {
2136 parseSegmentIndex(data_offset, chunk_data_size);
2137 *offset += chunk_size;
2138 return UNKNOWN_ERROR; // stop parsing after sidx
2139 }
2140
2141 default:
2142 {
2143 // check if we're parsing 'ilst' for meta keys
2144 // if so, treat type as a number (key-id).
2145 if (underQTMetaPath(mPath, 3)) {
2146 parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
2147 }
2148
2149 *offset += chunk_size;
2150 break;
2151 }
2152 }
2153
2154 return OK;
2155 }
2156
parseSegmentIndex(off64_t offset,size_t size)2157 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2158 ALOGV("MPEG4Extractor::parseSegmentIndex");
2159
2160 if (size < 12) {
2161 return -EINVAL;
2162 }
2163
2164 uint32_t flags;
2165 if (!mDataSource->getUInt32(offset, &flags)) {
2166 return ERROR_MALFORMED;
2167 }
2168
2169 uint32_t version = flags >> 24;
2170 flags &= 0xffffff;
2171
2172 ALOGV("sidx version %d", version);
2173
2174 uint32_t referenceId;
2175 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2176 return ERROR_MALFORMED;
2177 }
2178
2179 uint32_t timeScale;
2180 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2181 return ERROR_MALFORMED;
2182 }
2183 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2184 if (timeScale == 0)
2185 return ERROR_MALFORMED;
2186
2187 uint64_t earliestPresentationTime;
2188 uint64_t firstOffset;
2189
2190 offset += 12;
2191 size -= 12;
2192
2193 if (version == 0) {
2194 if (size < 8) {
2195 return -EINVAL;
2196 }
2197 uint32_t tmp;
2198 if (!mDataSource->getUInt32(offset, &tmp)) {
2199 return ERROR_MALFORMED;
2200 }
2201 earliestPresentationTime = tmp;
2202 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2203 return ERROR_MALFORMED;
2204 }
2205 firstOffset = tmp;
2206 offset += 8;
2207 size -= 8;
2208 } else {
2209 if (size < 16) {
2210 return -EINVAL;
2211 }
2212 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2213 return ERROR_MALFORMED;
2214 }
2215 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2216 return ERROR_MALFORMED;
2217 }
2218 offset += 16;
2219 size -= 16;
2220 }
2221 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
2222
2223 if (size < 4) {
2224 return -EINVAL;
2225 }
2226
2227 uint16_t referenceCount;
2228 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2229 return ERROR_MALFORMED;
2230 }
2231 offset += 4;
2232 size -= 4;
2233 ALOGV("refcount: %d", referenceCount);
2234
2235 if (size < referenceCount * 12) {
2236 return -EINVAL;
2237 }
2238
2239 uint64_t total_duration = 0;
2240 for (unsigned int i = 0; i < referenceCount; i++) {
2241 uint32_t d1, d2, d3;
2242
2243 if (!mDataSource->getUInt32(offset, &d1) || // size
2244 !mDataSource->getUInt32(offset + 4, &d2) || // duration
2245 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2246 return ERROR_MALFORMED;
2247 }
2248
2249 if (d1 & 0x80000000) {
2250 ALOGW("sub-sidx boxes not supported yet");
2251 }
2252 bool sap = d3 & 0x80000000;
2253 uint32_t saptype = (d3 >> 28) & 7;
2254 if (!sap || (saptype != 1 && saptype != 2)) {
2255 // type 1 and 2 are sync samples
2256 ALOGW("not a stream access point, or unsupported type: %08x", d3);
2257 }
2258 total_duration += d2;
2259 offset += 12;
2260 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2261 SidxEntry se;
2262 se.mSize = d1 & 0x7fffffff;
2263 se.mDurationUs = 1000000LL * d2 / timeScale;
2264 mSidxEntries.add(se);
2265 }
2266
2267 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
2268
2269 if (mLastTrack == NULL)
2270 return ERROR_MALFORMED;
2271
2272 int64_t metaDuration;
2273 if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2274 mLastTrack->meta->setInt64(kKeyDuration, sidxDuration);
2275 }
2276 return OK;
2277 }
2278
parseQTMetaKey(off64_t offset,size_t size)2279 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
2280 if (size < 8) {
2281 return ERROR_MALFORMED;
2282 }
2283
2284 uint32_t count;
2285 if (!mDataSource->getUInt32(offset + 4, &count)) {
2286 return ERROR_MALFORMED;
2287 }
2288
2289 if (mMetaKeyMap.size() > 0) {
2290 ALOGW("'keys' atom seen again, discarding existing entries");
2291 mMetaKeyMap.clear();
2292 }
2293
2294 off64_t keyOffset = offset + 8;
2295 off64_t stopOffset = offset + size;
2296 for (size_t i = 1; i <= count; i++) {
2297 if (keyOffset + 8 > stopOffset) {
2298 return ERROR_MALFORMED;
2299 }
2300
2301 uint32_t keySize;
2302 if (!mDataSource->getUInt32(keyOffset, &keySize)
2303 || keySize < 8
2304 || keyOffset + keySize > stopOffset) {
2305 return ERROR_MALFORMED;
2306 }
2307
2308 uint32_t type;
2309 if (!mDataSource->getUInt32(keyOffset + 4, &type)
2310 || type != FOURCC('m', 'd', 't', 'a')) {
2311 return ERROR_MALFORMED;
2312 }
2313
2314 keySize -= 8;
2315 keyOffset += 8;
2316
2317 sp<ABuffer> keyData = new ABuffer(keySize);
2318 if (keyData->data() == NULL) {
2319 return ERROR_MALFORMED;
2320 }
2321 if (mDataSource->readAt(
2322 keyOffset, keyData->data(), keySize) < (ssize_t) keySize) {
2323 return ERROR_MALFORMED;
2324 }
2325
2326 AString key((const char *)keyData->data(), keySize);
2327 mMetaKeyMap.add(i, key);
2328
2329 keyOffset += keySize;
2330 }
2331 return OK;
2332 }
2333
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)2334 status_t MPEG4Extractor::parseQTMetaVal(
2335 int32_t keyId, off64_t offset, size_t size) {
2336 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
2337 if (index < 0) {
2338 // corresponding key is not present, ignore
2339 return ERROR_MALFORMED;
2340 }
2341
2342 if (size <= 16) {
2343 return ERROR_MALFORMED;
2344 }
2345 uint32_t dataSize;
2346 if (!mDataSource->getUInt32(offset, &dataSize)
2347 || dataSize > size || dataSize <= 16) {
2348 return ERROR_MALFORMED;
2349 }
2350 uint32_t atomFourCC;
2351 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
2352 || atomFourCC != FOURCC('d', 'a', 't', 'a')) {
2353 return ERROR_MALFORMED;
2354 }
2355 uint32_t dataType;
2356 if (!mDataSource->getUInt32(offset + 8, &dataType)
2357 || ((dataType & 0xff000000) != 0)) {
2358 // not well-known type
2359 return ERROR_MALFORMED;
2360 }
2361
2362 dataSize -= 16;
2363 offset += 16;
2364
2365 if (dataType == 23 && dataSize >= 4) {
2366 // BE Float32
2367 uint32_t val;
2368 if (!mDataSource->getUInt32(offset, &val)) {
2369 return ERROR_MALFORMED;
2370 }
2371 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
2372 mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val);
2373 }
2374 } else {
2375 // add more keys if needed
2376 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
2377 }
2378
2379 return OK;
2380 }
2381
parseTrackHeader(off64_t data_offset,off64_t data_size)2382 status_t MPEG4Extractor::parseTrackHeader(
2383 off64_t data_offset, off64_t data_size) {
2384 if (data_size < 4) {
2385 return ERROR_MALFORMED;
2386 }
2387
2388 uint8_t version;
2389 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2390 return ERROR_IO;
2391 }
2392
2393 size_t dynSize = (version == 1) ? 36 : 24;
2394
2395 uint8_t buffer[36 + 60];
2396
2397 if (data_size != (off64_t)dynSize + 60) {
2398 return ERROR_MALFORMED;
2399 }
2400
2401 if (mDataSource->readAt(
2402 data_offset, buffer, data_size) < (ssize_t)data_size) {
2403 return ERROR_IO;
2404 }
2405
2406 uint64_t ctime __unused, mtime __unused, duration __unused;
2407 int32_t id;
2408
2409 if (version == 1) {
2410 ctime = U64_AT(&buffer[4]);
2411 mtime = U64_AT(&buffer[12]);
2412 id = U32_AT(&buffer[20]);
2413 duration = U64_AT(&buffer[28]);
2414 } else if (version == 0) {
2415 ctime = U32_AT(&buffer[4]);
2416 mtime = U32_AT(&buffer[8]);
2417 id = U32_AT(&buffer[12]);
2418 duration = U32_AT(&buffer[20]);
2419 } else {
2420 return ERROR_UNSUPPORTED;
2421 }
2422
2423 if (mLastTrack == NULL)
2424 return ERROR_MALFORMED;
2425
2426 mLastTrack->meta->setInt32(kKeyTrackID, id);
2427
2428 size_t matrixOffset = dynSize + 16;
2429 int32_t a00 = U32_AT(&buffer[matrixOffset]);
2430 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2431 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2432 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2433
2434 #if 0
2435 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2436 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2437
2438 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2439 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2440 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2441 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2442 #endif
2443
2444 uint32_t rotationDegrees;
2445
2446 static const int32_t kFixedOne = 0x10000;
2447 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2448 // Identity, no rotation
2449 rotationDegrees = 0;
2450 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2451 rotationDegrees = 90;
2452 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2453 rotationDegrees = 270;
2454 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2455 rotationDegrees = 180;
2456 } else {
2457 ALOGW("We only support 0,90,180,270 degree rotation matrices");
2458 rotationDegrees = 0;
2459 }
2460
2461 if (rotationDegrees != 0) {
2462 mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2463 }
2464
2465 // Handle presentation display size, which could be different
2466 // from the image size indicated by kKeyWidth and kKeyHeight.
2467 uint32_t width = U32_AT(&buffer[dynSize + 52]);
2468 uint32_t height = U32_AT(&buffer[dynSize + 56]);
2469 mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2470 mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2471
2472 return OK;
2473 }
2474
parseITunesMetaData(off64_t offset,size_t size)2475 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
2476 if (size < 4 || size == SIZE_MAX) {
2477 return ERROR_MALFORMED;
2478 }
2479
2480 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2481 if (buffer == NULL) {
2482 return ERROR_MALFORMED;
2483 }
2484 if (mDataSource->readAt(
2485 offset, buffer, size) != (ssize_t)size) {
2486 delete[] buffer;
2487 buffer = NULL;
2488
2489 return ERROR_IO;
2490 }
2491
2492 uint32_t flags = U32_AT(buffer);
2493
2494 uint32_t metadataKey = 0;
2495 char chunk[5];
2496 MakeFourCCString(mPath[4], chunk);
2497 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
2498 switch ((int32_t)mPath[4]) {
2499 case FOURCC(0xa9, 'a', 'l', 'b'):
2500 {
2501 metadataKey = kKeyAlbum;
2502 break;
2503 }
2504 case FOURCC(0xa9, 'A', 'R', 'T'):
2505 {
2506 metadataKey = kKeyArtist;
2507 break;
2508 }
2509 case FOURCC('a', 'A', 'R', 'T'):
2510 {
2511 metadataKey = kKeyAlbumArtist;
2512 break;
2513 }
2514 case FOURCC(0xa9, 'd', 'a', 'y'):
2515 {
2516 metadataKey = kKeyYear;
2517 break;
2518 }
2519 case FOURCC(0xa9, 'n', 'a', 'm'):
2520 {
2521 metadataKey = kKeyTitle;
2522 break;
2523 }
2524 case FOURCC(0xa9, 'w', 'r', 't'):
2525 {
2526 metadataKey = kKeyWriter;
2527 break;
2528 }
2529 case FOURCC('c', 'o', 'v', 'r'):
2530 {
2531 metadataKey = kKeyAlbumArt;
2532 break;
2533 }
2534 case FOURCC('g', 'n', 'r', 'e'):
2535 {
2536 metadataKey = kKeyGenre;
2537 break;
2538 }
2539 case FOURCC(0xa9, 'g', 'e', 'n'):
2540 {
2541 metadataKey = kKeyGenre;
2542 break;
2543 }
2544 case FOURCC('c', 'p', 'i', 'l'):
2545 {
2546 if (size == 9 && flags == 21) {
2547 char tmp[16];
2548 sprintf(tmp, "%d",
2549 (int)buffer[size - 1]);
2550
2551 mFileMetaData->setCString(kKeyCompilation, tmp);
2552 }
2553 break;
2554 }
2555 case FOURCC('t', 'r', 'k', 'n'):
2556 {
2557 if (size == 16 && flags == 0) {
2558 char tmp[16];
2559 uint16_t* pTrack = (uint16_t*)&buffer[10];
2560 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2561 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2562
2563 mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2564 }
2565 break;
2566 }
2567 case FOURCC('d', 'i', 's', 'k'):
2568 {
2569 if ((size == 14 || size == 16) && flags == 0) {
2570 char tmp[16];
2571 uint16_t* pDisc = (uint16_t*)&buffer[10];
2572 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2573 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2574
2575 mFileMetaData->setCString(kKeyDiscNumber, tmp);
2576 }
2577 break;
2578 }
2579 case FOURCC('-', '-', '-', '-'):
2580 {
2581 buffer[size] = '\0';
2582 switch (mPath[5]) {
2583 case FOURCC('m', 'e', 'a', 'n'):
2584 mLastCommentMean.setTo((const char *)buffer + 4);
2585 break;
2586 case FOURCC('n', 'a', 'm', 'e'):
2587 mLastCommentName.setTo((const char *)buffer + 4);
2588 break;
2589 case FOURCC('d', 'a', 't', 'a'):
2590 if (size < 8) {
2591 delete[] buffer;
2592 buffer = NULL;
2593 ALOGE("b/24346430");
2594 return ERROR_MALFORMED;
2595 }
2596 mLastCommentData.setTo((const char *)buffer + 8);
2597 break;
2598 }
2599
2600 // Once we have a set of mean/name/data info, go ahead and process
2601 // it to see if its something we are interested in. Whether or not
2602 // were are interested in the specific tag, make sure to clear out
2603 // the set so we can be ready to process another tuple should one
2604 // show up later in the file.
2605 if ((mLastCommentMean.length() != 0) &&
2606 (mLastCommentName.length() != 0) &&
2607 (mLastCommentData.length() != 0)) {
2608
2609 if (mLastCommentMean == "com.apple.iTunes"
2610 && mLastCommentName == "iTunSMPB") {
2611 int32_t delay, padding;
2612 if (sscanf(mLastCommentData,
2613 " %*x %x %x %*x", &delay, &padding) == 2) {
2614 if (mLastTrack == NULL)
2615 return ERROR_MALFORMED;
2616
2617 mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2618 mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2619 }
2620 }
2621
2622 mLastCommentMean.clear();
2623 mLastCommentName.clear();
2624 mLastCommentData.clear();
2625 }
2626 break;
2627 }
2628
2629 default:
2630 break;
2631 }
2632
2633 if (size >= 8 && metadataKey && !mFileMetaData->hasData(metadataKey)) {
2634 if (metadataKey == kKeyAlbumArt) {
2635 mFileMetaData->setData(
2636 kKeyAlbumArt, MetaData::TYPE_NONE,
2637 buffer + 8, size - 8);
2638 } else if (metadataKey == kKeyGenre) {
2639 if (flags == 0) {
2640 // uint8_t genre code, iTunes genre codes are
2641 // the standard id3 codes, except they start
2642 // at 1 instead of 0 (e.g. Pop is 14, not 13)
2643 // We use standard id3 numbering, so subtract 1.
2644 int genrecode = (int)buffer[size - 1];
2645 genrecode--;
2646 if (genrecode < 0) {
2647 genrecode = 255; // reserved for 'unknown genre'
2648 }
2649 char genre[10];
2650 sprintf(genre, "%d", genrecode);
2651
2652 mFileMetaData->setCString(metadataKey, genre);
2653 } else if (flags == 1) {
2654 // custom genre string
2655 buffer[size] = '\0';
2656
2657 mFileMetaData->setCString(
2658 metadataKey, (const char *)buffer + 8);
2659 }
2660 } else {
2661 buffer[size] = '\0';
2662
2663 mFileMetaData->setCString(
2664 metadataKey, (const char *)buffer + 8);
2665 }
2666 }
2667
2668 delete[] buffer;
2669 buffer = NULL;
2670
2671 return OK;
2672 }
2673
parse3GPPMetaData(off64_t offset,size_t size,int depth)2674 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
2675 if (size < 4 || size == SIZE_MAX) {
2676 return ERROR_MALFORMED;
2677 }
2678
2679 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
2680 if (buffer == NULL) {
2681 return ERROR_MALFORMED;
2682 }
2683 if (mDataSource->readAt(
2684 offset, buffer, size) != (ssize_t)size) {
2685 delete[] buffer;
2686 buffer = NULL;
2687
2688 return ERROR_IO;
2689 }
2690
2691 uint32_t metadataKey = 0;
2692 switch (mPath[depth]) {
2693 case FOURCC('t', 'i', 't', 'l'):
2694 {
2695 metadataKey = kKeyTitle;
2696 break;
2697 }
2698 case FOURCC('p', 'e', 'r', 'f'):
2699 {
2700 metadataKey = kKeyArtist;
2701 break;
2702 }
2703 case FOURCC('a', 'u', 't', 'h'):
2704 {
2705 metadataKey = kKeyWriter;
2706 break;
2707 }
2708 case FOURCC('g', 'n', 'r', 'e'):
2709 {
2710 metadataKey = kKeyGenre;
2711 break;
2712 }
2713 case FOURCC('a', 'l', 'b', 'm'):
2714 {
2715 if (buffer[size - 1] != '\0') {
2716 char tmp[4];
2717 sprintf(tmp, "%u", buffer[size - 1]);
2718
2719 mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2720 }
2721
2722 metadataKey = kKeyAlbum;
2723 break;
2724 }
2725 case FOURCC('y', 'r', 'r', 'c'):
2726 {
2727 char tmp[5];
2728 uint16_t year = U16_AT(&buffer[4]);
2729
2730 if (year < 10000) {
2731 sprintf(tmp, "%u", year);
2732
2733 mFileMetaData->setCString(kKeyYear, tmp);
2734 }
2735 break;
2736 }
2737
2738 default:
2739 break;
2740 }
2741
2742 if (metadataKey > 0) {
2743 bool isUTF8 = true; // Common case
2744 char16_t *framedata = NULL;
2745 int len16 = 0; // Number of UTF-16 characters
2746
2747 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
2748 if (size < 6) {
2749 return ERROR_MALFORMED;
2750 }
2751
2752 if (size - 6 >= 4) {
2753 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
2754 framedata = (char16_t *)(buffer + 6);
2755 if (0xfffe == *framedata) {
2756 // endianness marker (BOM) doesn't match host endianness
2757 for (int i = 0; i < len16; i++) {
2758 framedata[i] = bswap_16(framedata[i]);
2759 }
2760 // BOM is now swapped to 0xfeff, we will execute next block too
2761 }
2762
2763 if (0xfeff == *framedata) {
2764 // Remove the BOM
2765 framedata++;
2766 len16--;
2767 isUTF8 = false;
2768 }
2769 // else normal non-zero-length UTF-8 string
2770 // we can't handle UTF-16 without BOM as there is no other
2771 // indication of encoding.
2772 }
2773
2774 if (isUTF8) {
2775 buffer[size] = 0;
2776 mFileMetaData->setCString(metadataKey, (const char *)buffer + 6);
2777 } else {
2778 // Convert from UTF-16 string to UTF-8 string.
2779 String8 tmpUTF8str(framedata, len16);
2780 mFileMetaData->setCString(metadataKey, tmpUTF8str.string());
2781 }
2782 }
2783
2784 delete[] buffer;
2785 buffer = NULL;
2786
2787 return OK;
2788 }
2789
parseID3v2MetaData(off64_t offset)2790 void MPEG4Extractor::parseID3v2MetaData(off64_t offset) {
2791 ID3 id3(mDataSource, true /* ignorev1 */, offset);
2792
2793 if (id3.isValid()) {
2794 struct Map {
2795 int key;
2796 const char *tag1;
2797 const char *tag2;
2798 };
2799 static const Map kMap[] = {
2800 { kKeyAlbum, "TALB", "TAL" },
2801 { kKeyArtist, "TPE1", "TP1" },
2802 { kKeyAlbumArtist, "TPE2", "TP2" },
2803 { kKeyComposer, "TCOM", "TCM" },
2804 { kKeyGenre, "TCON", "TCO" },
2805 { kKeyTitle, "TIT2", "TT2" },
2806 { kKeyYear, "TYE", "TYER" },
2807 { kKeyAuthor, "TXT", "TEXT" },
2808 { kKeyCDTrackNumber, "TRK", "TRCK" },
2809 { kKeyDiscNumber, "TPA", "TPOS" },
2810 { kKeyCompilation, "TCP", "TCMP" },
2811 };
2812 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
2813
2814 for (size_t i = 0; i < kNumMapEntries; ++i) {
2815 if (!mFileMetaData->hasData(kMap[i].key)) {
2816 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
2817 if (it->done()) {
2818 delete it;
2819 it = new ID3::Iterator(id3, kMap[i].tag2);
2820 }
2821
2822 if (it->done()) {
2823 delete it;
2824 continue;
2825 }
2826
2827 String8 s;
2828 it->getString(&s);
2829 delete it;
2830
2831 mFileMetaData->setCString(kMap[i].key, s);
2832 }
2833 }
2834
2835 size_t dataSize;
2836 String8 mime;
2837 const void *data = id3.getAlbumArt(&dataSize, &mime);
2838
2839 if (data) {
2840 mFileMetaData->setData(kKeyAlbumArt, MetaData::TYPE_NONE, data, dataSize);
2841 mFileMetaData->setCString(kKeyAlbumArtMIME, mime.string());
2842 }
2843 }
2844 }
2845
getTrack(size_t index)2846 sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2847 status_t err;
2848 if ((err = readMetaData()) != OK) {
2849 return NULL;
2850 }
2851
2852 Track *track = mFirstTrack;
2853 while (index > 0) {
2854 if (track == NULL) {
2855 return NULL;
2856 }
2857
2858 track = track->next;
2859 --index;
2860 }
2861
2862 if (track == NULL) {
2863 return NULL;
2864 }
2865
2866
2867 Trex *trex = NULL;
2868 int32_t trackId;
2869 if (track->meta->findInt32(kKeyTrackID, &trackId)) {
2870 for (size_t i = 0; i < mTrex.size(); i++) {
2871 Trex *t = &mTrex.editItemAt(index);
2872 if (t->track_ID == (uint32_t) trackId) {
2873 trex = t;
2874 break;
2875 }
2876 }
2877 } else {
2878 ALOGE("b/21657957");
2879 return NULL;
2880 }
2881
2882 ALOGV("getTrack called, pssh: %zu", mPssh.size());
2883
2884 return new MPEG4Source(this,
2885 track->meta, mDataSource, track->timescale, track->sampleTable,
2886 mSidxEntries, trex, mMoofOffset);
2887 }
2888
2889 // static
verifyTrack(Track * track)2890 status_t MPEG4Extractor::verifyTrack(Track *track) {
2891 const char *mime;
2892 CHECK(track->meta->findCString(kKeyMIMEType, &mime));
2893
2894 uint32_t type;
2895 const void *data;
2896 size_t size;
2897 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2898 if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2899 || type != kTypeAVCC) {
2900 return ERROR_MALFORMED;
2901 }
2902 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
2903 if (!track->meta->findData(kKeyHVCC, &type, &data, &size)
2904 || type != kTypeHVCC) {
2905 return ERROR_MALFORMED;
2906 }
2907 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2908 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
2909 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2910 if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2911 || type != kTypeESDS) {
2912 return ERROR_MALFORMED;
2913 }
2914 }
2915
2916 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
2917 // Make sure we have all the metadata we need.
2918 ALOGE("stbl atom missing/invalid.");
2919 return ERROR_MALFORMED;
2920 }
2921
2922 if (track->timescale == 0) {
2923 ALOGE("timescale invalid.");
2924 return ERROR_MALFORMED;
2925 }
2926
2927 return OK;
2928 }
2929
2930 typedef enum {
2931 //AOT_NONE = -1,
2932 //AOT_NULL_OBJECT = 0,
2933 //AOT_AAC_MAIN = 1, /**< Main profile */
2934 AOT_AAC_LC = 2, /**< Low Complexity object */
2935 //AOT_AAC_SSR = 3,
2936 //AOT_AAC_LTP = 4,
2937 AOT_SBR = 5,
2938 //AOT_AAC_SCAL = 6,
2939 //AOT_TWIN_VQ = 7,
2940 //AOT_CELP = 8,
2941 //AOT_HVXC = 9,
2942 //AOT_RSVD_10 = 10, /**< (reserved) */
2943 //AOT_RSVD_11 = 11, /**< (reserved) */
2944 //AOT_TTSI = 12, /**< TTSI Object */
2945 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
2946 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
2947 //AOT_GEN_MIDI = 15, /**< General MIDI object */
2948 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
2949 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
2950 //AOT_RSVD_18 = 18, /**< (reserved) */
2951 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
2952 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
2953 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
2954 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
2955 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
2956 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
2957 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
2958 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
2959 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
2960 //AOT_RSVD_28 = 28, /**< might become SSC */
2961 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
2962 //AOT_MPEGS = 30, /**< MPEG Surround */
2963
2964 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
2965
2966 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
2967 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
2968 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
2969 //AOT_RSVD_35 = 35, /**< might become DST */
2970 //AOT_RSVD_36 = 36, /**< might become ALS */
2971 //AOT_AAC_SLS = 37, /**< AAC + SLS */
2972 //AOT_SLS = 38, /**< SLS */
2973 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
2974
2975 //AOT_USAC = 42, /**< USAC */
2976 //AOT_SAOC = 43, /**< SAOC */
2977 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
2978
2979 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
2980 } AUDIO_OBJECT_TYPE;
2981
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)2982 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2983 const void *esds_data, size_t esds_size) {
2984 ESDS esds(esds_data, esds_size);
2985
2986 uint8_t objectTypeIndication;
2987 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2988 return ERROR_MALFORMED;
2989 }
2990
2991 if (objectTypeIndication == 0xe1) {
2992 // This isn't MPEG4 audio at all, it's QCELP 14k...
2993 if (mLastTrack == NULL)
2994 return ERROR_MALFORMED;
2995
2996 mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2997 return OK;
2998 }
2999
3000 if (objectTypeIndication == 0x6b) {
3001 // The media subtype is MP3 audio
3002 // Our software MP3 audio decoder may not be able to handle
3003 // packetized MP3 audio; for now, lets just return ERROR_UNSUPPORTED
3004 ALOGE("MP3 track in MP4/3GPP file is not supported");
3005 return ERROR_UNSUPPORTED;
3006 }
3007
3008 const uint8_t *csd;
3009 size_t csd_size;
3010 if (esds.getCodecSpecificInfo(
3011 (const void **)&csd, &csd_size) != OK) {
3012 return ERROR_MALFORMED;
3013 }
3014
3015 if (kUseHexDump) {
3016 printf("ESD of size %zu\n", csd_size);
3017 hexdump(csd, csd_size);
3018 }
3019
3020 if (csd_size == 0) {
3021 // There's no further information, i.e. no codec specific data
3022 // Let's assume that the information provided in the mpeg4 headers
3023 // is accurate and hope for the best.
3024
3025 return OK;
3026 }
3027
3028 if (csd_size < 2) {
3029 return ERROR_MALFORMED;
3030 }
3031
3032 static uint32_t kSamplingRate[] = {
3033 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
3034 16000, 12000, 11025, 8000, 7350
3035 };
3036
3037 ABitReader br(csd, csd_size);
3038 uint32_t objectType = br.getBits(5);
3039
3040 if (objectType == 31) { // AAC-ELD => additional 6 bits
3041 objectType = 32 + br.getBits(6);
3042 }
3043
3044 if (mLastTrack == NULL)
3045 return ERROR_MALFORMED;
3046
3047 //keep AOT type
3048 mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
3049
3050 uint32_t freqIndex = br.getBits(4);
3051
3052 int32_t sampleRate = 0;
3053 int32_t numChannels = 0;
3054 if (freqIndex == 15) {
3055 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
3056 sampleRate = br.getBits(24);
3057 numChannels = br.getBits(4);
3058 } else {
3059 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3060 numChannels = br.getBits(4);
3061
3062 if (freqIndex == 13 || freqIndex == 14) {
3063 return ERROR_MALFORMED;
3064 }
3065
3066 sampleRate = kSamplingRate[freqIndex];
3067 }
3068
3069 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
3070 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3071 uint32_t extFreqIndex = br.getBits(4);
3072 int32_t extSampleRate __unused;
3073 if (extFreqIndex == 15) {
3074 if (csd_size < 8) {
3075 return ERROR_MALFORMED;
3076 }
3077 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
3078 extSampleRate = br.getBits(24);
3079 } else {
3080 if (extFreqIndex == 13 || extFreqIndex == 14) {
3081 return ERROR_MALFORMED;
3082 }
3083 extSampleRate = kSamplingRate[extFreqIndex];
3084 }
3085 //TODO: save the extension sampling rate value in meta data =>
3086 // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
3087 }
3088
3089 switch (numChannels) {
3090 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
3091 case 0:
3092 case 1:// FC
3093 case 2:// FL FR
3094 case 3:// FC, FL FR
3095 case 4:// FC, FL FR, RC
3096 case 5:// FC, FL FR, SL SR
3097 case 6:// FC, FL FR, SL SR, LFE
3098 //numChannels already contains the right value
3099 break;
3100 case 11:// FC, FL FR, SL SR, RC, LFE
3101 numChannels = 7;
3102 break;
3103 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
3104 case 12:// FC, FL FR, SL SR, RL RR, LFE
3105 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
3106 numChannels = 8;
3107 break;
3108 default:
3109 return ERROR_UNSUPPORTED;
3110 }
3111
3112 {
3113 if (objectType == AOT_SBR || objectType == AOT_PS) {
3114 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3115 objectType = br.getBits(5);
3116
3117 if (objectType == AOT_ESCAPE) {
3118 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
3119 objectType = 32 + br.getBits(6);
3120 }
3121 }
3122 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
3123 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
3124 objectType == AOT_ER_BSAC) {
3125 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
3126 const int32_t frameLengthFlag __unused = br.getBits(1);
3127
3128 const int32_t dependsOnCoreCoder = br.getBits(1);
3129
3130 if (dependsOnCoreCoder ) {
3131 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
3132 const int32_t coreCoderDelay __unused = br.getBits(14);
3133 }
3134
3135 int32_t extensionFlag = -1;
3136 if (br.numBitsLeft() > 0) {
3137 extensionFlag = br.getBits(1);
3138 } else {
3139 switch (objectType) {
3140 // 14496-3 4.5.1.1 extensionFlag
3141 case AOT_AAC_LC:
3142 extensionFlag = 0;
3143 break;
3144 case AOT_ER_AAC_LC:
3145 case AOT_ER_AAC_SCAL:
3146 case AOT_ER_BSAC:
3147 case AOT_ER_AAC_LD:
3148 extensionFlag = 1;
3149 break;
3150 default:
3151 return ERROR_MALFORMED;
3152 break;
3153 }
3154 ALOGW("csd missing extension flag; assuming %d for object type %u.",
3155 extensionFlag, objectType);
3156 }
3157
3158 if (numChannels == 0) {
3159 int32_t channelsEffectiveNum = 0;
3160 int32_t channelsNum = 0;
3161 if (br.numBitsLeft() < 32) {
3162 return ERROR_MALFORMED;
3163 }
3164 const int32_t ElementInstanceTag __unused = br.getBits(4);
3165 const int32_t Profile __unused = br.getBits(2);
3166 const int32_t SamplingFrequencyIndex __unused = br.getBits(4);
3167 const int32_t NumFrontChannelElements = br.getBits(4);
3168 const int32_t NumSideChannelElements = br.getBits(4);
3169 const int32_t NumBackChannelElements = br.getBits(4);
3170 const int32_t NumLfeChannelElements = br.getBits(2);
3171 const int32_t NumAssocDataElements __unused = br.getBits(3);
3172 const int32_t NumValidCcElements __unused = br.getBits(4);
3173
3174 const int32_t MonoMixdownPresent = br.getBits(1);
3175
3176 if (MonoMixdownPresent != 0) {
3177 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3178 const int32_t MonoMixdownElementNumber __unused = br.getBits(4);
3179 }
3180
3181 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3182 const int32_t StereoMixdownPresent = br.getBits(1);
3183 if (StereoMixdownPresent != 0) {
3184 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3185 const int32_t StereoMixdownElementNumber __unused = br.getBits(4);
3186 }
3187
3188 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
3189 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
3190 if (MatrixMixdownIndexPresent != 0) {
3191 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
3192 const int32_t MatrixMixdownIndex __unused = br.getBits(2);
3193 const int32_t PseudoSurroundEnable __unused = br.getBits(1);
3194 }
3195
3196 int i;
3197 for (i=0; i < NumFrontChannelElements; i++) {
3198 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3199 const int32_t FrontElementIsCpe = br.getBits(1);
3200 const int32_t FrontElementTagSelect __unused = br.getBits(4);
3201 channelsNum += FrontElementIsCpe ? 2 : 1;
3202 }
3203
3204 for (i=0; i < NumSideChannelElements; i++) {
3205 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3206 const int32_t SideElementIsCpe = br.getBits(1);
3207 const int32_t SideElementTagSelect __unused = br.getBits(4);
3208 channelsNum += SideElementIsCpe ? 2 : 1;
3209 }
3210
3211 for (i=0; i < NumBackChannelElements; i++) {
3212 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
3213 const int32_t BackElementIsCpe = br.getBits(1);
3214 const int32_t BackElementTagSelect __unused = br.getBits(4);
3215 channelsNum += BackElementIsCpe ? 2 : 1;
3216 }
3217 channelsEffectiveNum = channelsNum;
3218
3219 for (i=0; i < NumLfeChannelElements; i++) {
3220 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
3221 const int32_t LfeElementTagSelect __unused = br.getBits(4);
3222 channelsNum += 1;
3223 }
3224 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
3225 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
3226 numChannels = channelsNum;
3227 }
3228 }
3229 }
3230
3231 if (numChannels == 0) {
3232 return ERROR_UNSUPPORTED;
3233 }
3234
3235 if (mLastTrack == NULL)
3236 return ERROR_MALFORMED;
3237
3238 int32_t prevSampleRate;
3239 CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
3240
3241 if (prevSampleRate != sampleRate) {
3242 ALOGV("mpeg4 audio sample rate different from previous setting. "
3243 "was: %d, now: %d", prevSampleRate, sampleRate);
3244 }
3245
3246 mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
3247
3248 int32_t prevChannelCount;
3249 CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
3250
3251 if (prevChannelCount != numChannels) {
3252 ALOGV("mpeg4 audio channel count different from previous setting. "
3253 "was: %d, now: %d", prevChannelCount, numChannels);
3254 }
3255
3256 mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
3257
3258 return OK;
3259 }
3260
3261 ////////////////////////////////////////////////////////////////////////////////
3262
MPEG4Source(const sp<MPEG4Extractor> & owner,const sp<MetaData> & format,const sp<DataSource> & dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset)3263 MPEG4Source::MPEG4Source(
3264 const sp<MPEG4Extractor> &owner,
3265 const sp<MetaData> &format,
3266 const sp<DataSource> &dataSource,
3267 int32_t timeScale,
3268 const sp<SampleTable> &sampleTable,
3269 Vector<SidxEntry> &sidx,
3270 const Trex *trex,
3271 off64_t firstMoofOffset)
3272 : mOwner(owner),
3273 mFormat(format),
3274 mDataSource(dataSource),
3275 mTimescale(timeScale),
3276 mSampleTable(sampleTable),
3277 mCurrentSampleIndex(0),
3278 mCurrentFragmentIndex(0),
3279 mSegments(sidx),
3280 mTrex(trex),
3281 mFirstMoofOffset(firstMoofOffset),
3282 mCurrentMoofOffset(firstMoofOffset),
3283 mCurrentTime(0),
3284 mCurrentSampleInfoAllocSize(0),
3285 mCurrentSampleInfoSizes(NULL),
3286 mCurrentSampleInfoOffsetsAllocSize(0),
3287 mCurrentSampleInfoOffsets(NULL),
3288 mIsAVC(false),
3289 mIsHEVC(false),
3290 mNALLengthSize(0),
3291 mStarted(false),
3292 mGroup(NULL),
3293 mBuffer(NULL),
3294 mWantsNALFragments(false),
3295 mSrcBuffer(NULL) {
3296
3297 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
3298
3299 mFormat->findInt32(kKeyCryptoMode, &mCryptoMode);
3300 mDefaultIVSize = 0;
3301 mFormat->findInt32(kKeyCryptoDefaultIVSize, &mDefaultIVSize);
3302 uint32_t keytype;
3303 const void *key;
3304 size_t keysize;
3305 if (mFormat->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
3306 CHECK(keysize <= 16);
3307 memset(mCryptoKey, 0, 16);
3308 memcpy(mCryptoKey, key, keysize);
3309 }
3310
3311 const char *mime;
3312 bool success = mFormat->findCString(kKeyMIMEType, &mime);
3313 CHECK(success);
3314
3315 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
3316 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
3317
3318 if (mIsAVC) {
3319 uint32_t type;
3320 const void *data;
3321 size_t size;
3322 CHECK(format->findData(kKeyAVCC, &type, &data, &size));
3323
3324 const uint8_t *ptr = (const uint8_t *)data;
3325
3326 CHECK(size >= 7);
3327 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
3328
3329 // The number of bytes used to encode the length of a NAL unit.
3330 mNALLengthSize = 1 + (ptr[4] & 3);
3331 } else if (mIsHEVC) {
3332 uint32_t type;
3333 const void *data;
3334 size_t size;
3335 CHECK(format->findData(kKeyHVCC, &type, &data, &size));
3336
3337 const uint8_t *ptr = (const uint8_t *)data;
3338
3339 CHECK(size >= 7);
3340 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
3341
3342 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
3343 }
3344
3345 CHECK(format->findInt32(kKeyTrackID, &mTrackId));
3346
3347 if (mFirstMoofOffset != 0) {
3348 off64_t offset = mFirstMoofOffset;
3349 parseChunk(&offset);
3350 }
3351 }
3352
~MPEG4Source()3353 MPEG4Source::~MPEG4Source() {
3354 if (mStarted) {
3355 stop();
3356 }
3357 free(mCurrentSampleInfoSizes);
3358 free(mCurrentSampleInfoOffsets);
3359 }
3360
start(MetaData * params)3361 status_t MPEG4Source::start(MetaData *params) {
3362 Mutex::Autolock autoLock(mLock);
3363
3364 CHECK(!mStarted);
3365
3366 int32_t val;
3367 if (params && params->findInt32(kKeyWantsNALFragments, &val)
3368 && val != 0) {
3369 mWantsNALFragments = true;
3370 } else {
3371 mWantsNALFragments = false;
3372 }
3373
3374 int32_t tmp;
3375 CHECK(mFormat->findInt32(kKeyMaxInputSize, &tmp));
3376 size_t max_size = tmp;
3377
3378 // A somewhat arbitrary limit that should be sufficient for 8k video frames
3379 // If you see the message below for a valid input stream: increase the limit
3380 if (max_size > 64 * 1024 * 1024) {
3381 ALOGE("bogus max input size: %zu", max_size);
3382 return ERROR_MALFORMED;
3383 }
3384 mGroup = new MediaBufferGroup;
3385 mGroup->add_buffer(new MediaBuffer(max_size));
3386
3387 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
3388 if (mSrcBuffer == NULL) {
3389 // file probably specified a bad max size
3390 delete mGroup;
3391 mGroup = NULL;
3392 return ERROR_MALFORMED;
3393 }
3394
3395 mStarted = true;
3396
3397 return OK;
3398 }
3399
stop()3400 status_t MPEG4Source::stop() {
3401 Mutex::Autolock autoLock(mLock);
3402
3403 CHECK(mStarted);
3404
3405 if (mBuffer != NULL) {
3406 mBuffer->release();
3407 mBuffer = NULL;
3408 }
3409
3410 delete[] mSrcBuffer;
3411 mSrcBuffer = NULL;
3412
3413 delete mGroup;
3414 mGroup = NULL;
3415
3416 mStarted = false;
3417 mCurrentSampleIndex = 0;
3418
3419 return OK;
3420 }
3421
parseChunk(off64_t * offset)3422 status_t MPEG4Source::parseChunk(off64_t *offset) {
3423 uint32_t hdr[2];
3424 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3425 return ERROR_IO;
3426 }
3427 uint64_t chunk_size = ntohl(hdr[0]);
3428 uint32_t chunk_type = ntohl(hdr[1]);
3429 off64_t data_offset = *offset + 8;
3430
3431 if (chunk_size == 1) {
3432 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
3433 return ERROR_IO;
3434 }
3435 chunk_size = ntoh64(chunk_size);
3436 data_offset += 8;
3437
3438 if (chunk_size < 16) {
3439 // The smallest valid chunk is 16 bytes long in this case.
3440 return ERROR_MALFORMED;
3441 }
3442 } else if (chunk_size < 8) {
3443 // The smallest valid chunk is 8 bytes long.
3444 return ERROR_MALFORMED;
3445 }
3446
3447 char chunk[5];
3448 MakeFourCCString(chunk_type, chunk);
3449 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
3450
3451 off64_t chunk_data_size = *offset + chunk_size - data_offset;
3452
3453 switch(chunk_type) {
3454
3455 case FOURCC('t', 'r', 'a', 'f'):
3456 case FOURCC('m', 'o', 'o', 'f'): {
3457 off64_t stop_offset = *offset + chunk_size;
3458 *offset = data_offset;
3459 while (*offset < stop_offset) {
3460 status_t err = parseChunk(offset);
3461 if (err != OK) {
3462 return err;
3463 }
3464 }
3465 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3466 // *offset points to the box following this moof. Find the next moof from there.
3467
3468 while (true) {
3469 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
3470 return ERROR_END_OF_STREAM;
3471 }
3472 chunk_size = ntohl(hdr[0]);
3473 chunk_type = ntohl(hdr[1]);
3474 if (chunk_type == FOURCC('m', 'o', 'o', 'f')) {
3475 mNextMoofOffset = *offset;
3476 break;
3477 }
3478 *offset += chunk_size;
3479 }
3480 }
3481 break;
3482 }
3483
3484 case FOURCC('t', 'f', 'h', 'd'): {
3485 status_t err;
3486 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
3487 return err;
3488 }
3489 *offset += chunk_size;
3490 break;
3491 }
3492
3493 case FOURCC('t', 'r', 'u', 'n'): {
3494 status_t err;
3495 if (mLastParsedTrackId == mTrackId) {
3496 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
3497 return err;
3498 }
3499 }
3500
3501 *offset += chunk_size;
3502 break;
3503 }
3504
3505 case FOURCC('s', 'a', 'i', 'z'): {
3506 status_t err;
3507 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
3508 return err;
3509 }
3510 *offset += chunk_size;
3511 break;
3512 }
3513 case FOURCC('s', 'a', 'i', 'o'): {
3514 status_t err;
3515 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size)) != OK) {
3516 return err;
3517 }
3518 *offset += chunk_size;
3519 break;
3520 }
3521
3522 case FOURCC('m', 'd', 'a', 't'): {
3523 // parse DRM info if present
3524 ALOGV("MPEG4Source::parseChunk mdat");
3525 // if saiz/saoi was previously observed, do something with the sampleinfos
3526 *offset += chunk_size;
3527 break;
3528 }
3529
3530 default: {
3531 *offset += chunk_size;
3532 break;
3533 }
3534 }
3535 return OK;
3536 }
3537
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t)3538 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
3539 off64_t offset, off64_t /* size */) {
3540 ALOGV("parseSampleAuxiliaryInformationSizes");
3541 // 14496-12 8.7.12
3542 uint8_t version;
3543 if (mDataSource->readAt(
3544 offset, &version, sizeof(version))
3545 < (ssize_t)sizeof(version)) {
3546 return ERROR_IO;
3547 }
3548
3549 if (version != 0) {
3550 return ERROR_UNSUPPORTED;
3551 }
3552 offset++;
3553
3554 uint32_t flags;
3555 if (!mDataSource->getUInt24(offset, &flags)) {
3556 return ERROR_IO;
3557 }
3558 offset += 3;
3559
3560 if (flags & 1) {
3561 uint32_t tmp;
3562 if (!mDataSource->getUInt32(offset, &tmp)) {
3563 return ERROR_MALFORMED;
3564 }
3565 mCurrentAuxInfoType = tmp;
3566 offset += 4;
3567 if (!mDataSource->getUInt32(offset, &tmp)) {
3568 return ERROR_MALFORMED;
3569 }
3570 mCurrentAuxInfoTypeParameter = tmp;
3571 offset += 4;
3572 }
3573
3574 uint8_t defsize;
3575 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
3576 return ERROR_MALFORMED;
3577 }
3578 mCurrentDefaultSampleInfoSize = defsize;
3579 offset++;
3580
3581 uint32_t smplcnt;
3582 if (!mDataSource->getUInt32(offset, &smplcnt)) {
3583 return ERROR_MALFORMED;
3584 }
3585 mCurrentSampleInfoCount = smplcnt;
3586 offset += 4;
3587
3588 if (mCurrentDefaultSampleInfoSize != 0) {
3589 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
3590 return OK;
3591 }
3592 if (smplcnt > mCurrentSampleInfoAllocSize) {
3593 mCurrentSampleInfoSizes = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
3594 mCurrentSampleInfoAllocSize = smplcnt;
3595 }
3596
3597 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
3598 return OK;
3599 }
3600
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t)3601 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
3602 off64_t offset, off64_t /* size */) {
3603 ALOGV("parseSampleAuxiliaryInformationOffsets");
3604 // 14496-12 8.7.13
3605 uint8_t version;
3606 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
3607 return ERROR_IO;
3608 }
3609 offset++;
3610
3611 uint32_t flags;
3612 if (!mDataSource->getUInt24(offset, &flags)) {
3613 return ERROR_IO;
3614 }
3615 offset += 3;
3616
3617 uint32_t entrycount;
3618 if (!mDataSource->getUInt32(offset, &entrycount)) {
3619 return ERROR_IO;
3620 }
3621 offset += 4;
3622 if (entrycount == 0) {
3623 return OK;
3624 }
3625 if (entrycount > UINT32_MAX / 8) {
3626 return ERROR_MALFORMED;
3627 }
3628
3629 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
3630 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
3631 if (newPtr == NULL) {
3632 return NO_MEMORY;
3633 }
3634 mCurrentSampleInfoOffsets = newPtr;
3635 mCurrentSampleInfoOffsetsAllocSize = entrycount;
3636 }
3637 mCurrentSampleInfoOffsetCount = entrycount;
3638
3639 if (mCurrentSampleInfoOffsets == NULL) {
3640 return OK;
3641 }
3642
3643 for (size_t i = 0; i < entrycount; i++) {
3644 if (version == 0) {
3645 uint32_t tmp;
3646 if (!mDataSource->getUInt32(offset, &tmp)) {
3647 return ERROR_IO;
3648 }
3649 mCurrentSampleInfoOffsets[i] = tmp;
3650 offset += 4;
3651 } else {
3652 uint64_t tmp;
3653 if (!mDataSource->getUInt64(offset, &tmp)) {
3654 return ERROR_IO;
3655 }
3656 mCurrentSampleInfoOffsets[i] = tmp;
3657 offset += 8;
3658 }
3659 }
3660
3661 // parse clear/encrypted data
3662
3663 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
3664
3665 drmoffset += mCurrentMoofOffset;
3666 int ivlength;
3667 CHECK(mFormat->findInt32(kKeyCryptoDefaultIVSize, &ivlength));
3668
3669 // only 0, 8 and 16 byte initialization vectors are supported
3670 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
3671 ALOGW("unsupported IV length: %d", ivlength);
3672 return ERROR_MALFORMED;
3673 }
3674 // read CencSampleAuxiliaryDataFormats
3675 for (size_t i = 0; i < mCurrentSampleInfoCount; i++) {
3676 if (i >= mCurrentSamples.size()) {
3677 ALOGW("too few samples");
3678 break;
3679 }
3680 Sample *smpl = &mCurrentSamples.editItemAt(i);
3681
3682 memset(smpl->iv, 0, 16);
3683 if (mDataSource->readAt(drmoffset, smpl->iv, ivlength) != ivlength) {
3684 return ERROR_IO;
3685 }
3686
3687 drmoffset += ivlength;
3688
3689 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
3690 if (smplinfosize == 0) {
3691 smplinfosize = mCurrentSampleInfoSizes[i];
3692 }
3693 if (smplinfosize > ivlength) {
3694 uint16_t numsubsamples;
3695 if (!mDataSource->getUInt16(drmoffset, &numsubsamples)) {
3696 return ERROR_IO;
3697 }
3698 drmoffset += 2;
3699 for (size_t j = 0; j < numsubsamples; j++) {
3700 uint16_t numclear;
3701 uint32_t numencrypted;
3702 if (!mDataSource->getUInt16(drmoffset, &numclear)) {
3703 return ERROR_IO;
3704 }
3705 drmoffset += 2;
3706 if (!mDataSource->getUInt32(drmoffset, &numencrypted)) {
3707 return ERROR_IO;
3708 }
3709 drmoffset += 4;
3710 smpl->clearsizes.add(numclear);
3711 smpl->encryptedsizes.add(numencrypted);
3712 }
3713 } else {
3714 smpl->clearsizes.add(0);
3715 smpl->encryptedsizes.add(smpl->size);
3716 }
3717 }
3718
3719
3720 return OK;
3721 }
3722
parseTrackFragmentHeader(off64_t offset,off64_t size)3723 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
3724
3725 if (size < 8) {
3726 return -EINVAL;
3727 }
3728
3729 uint32_t flags;
3730 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
3731 return ERROR_MALFORMED;
3732 }
3733
3734 if (flags & 0xff000000) {
3735 return -EINVAL;
3736 }
3737
3738 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
3739 return ERROR_MALFORMED;
3740 }
3741
3742 if (mLastParsedTrackId != mTrackId) {
3743 // this is not the right track, skip it
3744 return OK;
3745 }
3746
3747 mTrackFragmentHeaderInfo.mFlags = flags;
3748 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
3749 offset += 8;
3750 size -= 8;
3751
3752 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
3753
3754 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
3755 if (size < 8) {
3756 return -EINVAL;
3757 }
3758
3759 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
3760 return ERROR_MALFORMED;
3761 }
3762 offset += 8;
3763 size -= 8;
3764 }
3765
3766 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
3767 if (size < 4) {
3768 return -EINVAL;
3769 }
3770
3771 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
3772 return ERROR_MALFORMED;
3773 }
3774 offset += 4;
3775 size -= 4;
3776 }
3777
3778 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3779 if (size < 4) {
3780 return -EINVAL;
3781 }
3782
3783 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
3784 return ERROR_MALFORMED;
3785 }
3786 offset += 4;
3787 size -= 4;
3788 }
3789
3790 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3791 if (size < 4) {
3792 return -EINVAL;
3793 }
3794
3795 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
3796 return ERROR_MALFORMED;
3797 }
3798 offset += 4;
3799 size -= 4;
3800 }
3801
3802 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3803 if (size < 4) {
3804 return -EINVAL;
3805 }
3806
3807 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
3808 return ERROR_MALFORMED;
3809 }
3810 offset += 4;
3811 size -= 4;
3812 }
3813
3814 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
3815 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
3816 }
3817
3818 mTrackFragmentHeaderInfo.mDataOffset = 0;
3819 return OK;
3820 }
3821
parseTrackFragmentRun(off64_t offset,off64_t size)3822 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
3823
3824 ALOGV("MPEG4Extractor::parseTrackFragmentRun");
3825 if (size < 8) {
3826 return -EINVAL;
3827 }
3828
3829 enum {
3830 kDataOffsetPresent = 0x01,
3831 kFirstSampleFlagsPresent = 0x04,
3832 kSampleDurationPresent = 0x100,
3833 kSampleSizePresent = 0x200,
3834 kSampleFlagsPresent = 0x400,
3835 kSampleCompositionTimeOffsetPresent = 0x800,
3836 };
3837
3838 uint32_t flags;
3839 if (!mDataSource->getUInt32(offset, &flags)) {
3840 return ERROR_MALFORMED;
3841 }
3842 ALOGV("fragment run flags: %08x", flags);
3843
3844 if (flags & 0xff000000) {
3845 return -EINVAL;
3846 }
3847
3848 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
3849 // These two shall not be used together.
3850 return -EINVAL;
3851 }
3852
3853 uint32_t sampleCount;
3854 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
3855 return ERROR_MALFORMED;
3856 }
3857 offset += 8;
3858 size -= 8;
3859
3860 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
3861
3862 uint32_t firstSampleFlags = 0;
3863
3864 if (flags & kDataOffsetPresent) {
3865 if (size < 4) {
3866 return -EINVAL;
3867 }
3868
3869 int32_t dataOffsetDelta;
3870 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
3871 return ERROR_MALFORMED;
3872 }
3873
3874 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
3875
3876 offset += 4;
3877 size -= 4;
3878 }
3879
3880 if (flags & kFirstSampleFlagsPresent) {
3881 if (size < 4) {
3882 return -EINVAL;
3883 }
3884
3885 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
3886 return ERROR_MALFORMED;
3887 }
3888 offset += 4;
3889 size -= 4;
3890 }
3891
3892 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
3893 sampleCtsOffset = 0;
3894
3895 size_t bytesPerSample = 0;
3896 if (flags & kSampleDurationPresent) {
3897 bytesPerSample += 4;
3898 } else if (mTrackFragmentHeaderInfo.mFlags
3899 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
3900 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
3901 } else if (mTrex) {
3902 sampleDuration = mTrex->default_sample_duration;
3903 }
3904
3905 if (flags & kSampleSizePresent) {
3906 bytesPerSample += 4;
3907 } else if (mTrackFragmentHeaderInfo.mFlags
3908 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
3909 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3910 } else {
3911 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
3912 }
3913
3914 if (flags & kSampleFlagsPresent) {
3915 bytesPerSample += 4;
3916 } else if (mTrackFragmentHeaderInfo.mFlags
3917 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
3918 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3919 } else {
3920 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
3921 }
3922
3923 if (flags & kSampleCompositionTimeOffsetPresent) {
3924 bytesPerSample += 4;
3925 } else {
3926 sampleCtsOffset = 0;
3927 }
3928
3929 if (size < (off64_t)(sampleCount * bytesPerSample)) {
3930 return -EINVAL;
3931 }
3932
3933 Sample tmp;
3934 for (uint32_t i = 0; i < sampleCount; ++i) {
3935 if (flags & kSampleDurationPresent) {
3936 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
3937 return ERROR_MALFORMED;
3938 }
3939 offset += 4;
3940 }
3941
3942 if (flags & kSampleSizePresent) {
3943 if (!mDataSource->getUInt32(offset, &sampleSize)) {
3944 return ERROR_MALFORMED;
3945 }
3946 offset += 4;
3947 }
3948
3949 if (flags & kSampleFlagsPresent) {
3950 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
3951 return ERROR_MALFORMED;
3952 }
3953 offset += 4;
3954 }
3955
3956 if (flags & kSampleCompositionTimeOffsetPresent) {
3957 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
3958 return ERROR_MALFORMED;
3959 }
3960 offset += 4;
3961 }
3962
3963 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
3964 " flags 0x%08x", i + 1,
3965 dataOffset, sampleSize, sampleDuration,
3966 (flags & kFirstSampleFlagsPresent) && i == 0
3967 ? firstSampleFlags : sampleFlags);
3968 tmp.offset = dataOffset;
3969 tmp.size = sampleSize;
3970 tmp.duration = sampleDuration;
3971 tmp.compositionOffset = sampleCtsOffset;
3972 mCurrentSamples.add(tmp);
3973
3974 dataOffset += sampleSize;
3975 }
3976
3977 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
3978
3979 return OK;
3980 }
3981
getFormat()3982 sp<MetaData> MPEG4Source::getFormat() {
3983 Mutex::Autolock autoLock(mLock);
3984
3985 return mFormat;
3986 }
3987
parseNALSize(const uint8_t * data) const3988 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
3989 switch (mNALLengthSize) {
3990 case 1:
3991 return *data;
3992 case 2:
3993 return U16_AT(data);
3994 case 3:
3995 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
3996 case 4:
3997 return U32_AT(data);
3998 }
3999
4000 // This cannot happen, mNALLengthSize springs to life by adding 1 to
4001 // a 2-bit integer.
4002 CHECK(!"Should not be here.");
4003
4004 return 0;
4005 }
4006
read(MediaBuffer ** out,const ReadOptions * options)4007 status_t MPEG4Source::read(
4008 MediaBuffer **out, const ReadOptions *options) {
4009 Mutex::Autolock autoLock(mLock);
4010
4011 CHECK(mStarted);
4012
4013 if (mFirstMoofOffset > 0) {
4014 return fragmentedRead(out, options);
4015 }
4016
4017 *out = NULL;
4018
4019 int64_t targetSampleTimeUs = -1;
4020
4021 int64_t seekTimeUs;
4022 ReadOptions::SeekMode mode;
4023 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4024 uint32_t findFlags = 0;
4025 switch (mode) {
4026 case ReadOptions::SEEK_PREVIOUS_SYNC:
4027 findFlags = SampleTable::kFlagBefore;
4028 break;
4029 case ReadOptions::SEEK_NEXT_SYNC:
4030 findFlags = SampleTable::kFlagAfter;
4031 break;
4032 case ReadOptions::SEEK_CLOSEST_SYNC:
4033 case ReadOptions::SEEK_CLOSEST:
4034 findFlags = SampleTable::kFlagClosest;
4035 break;
4036 default:
4037 CHECK(!"Should not be here.");
4038 break;
4039 }
4040
4041 uint32_t sampleIndex;
4042 status_t err = mSampleTable->findSampleAtTime(
4043 seekTimeUs, 1000000, mTimescale,
4044 &sampleIndex, findFlags);
4045
4046 if (mode == ReadOptions::SEEK_CLOSEST) {
4047 // We found the closest sample already, now we want the sync
4048 // sample preceding it (or the sample itself of course), even
4049 // if the subsequent sync sample is closer.
4050 findFlags = SampleTable::kFlagBefore;
4051 }
4052
4053 uint32_t syncSampleIndex;
4054 if (err == OK) {
4055 err = mSampleTable->findSyncSampleNear(
4056 sampleIndex, &syncSampleIndex, findFlags);
4057 }
4058
4059 uint32_t sampleTime;
4060 if (err == OK) {
4061 err = mSampleTable->getMetaDataForSample(
4062 sampleIndex, NULL, NULL, &sampleTime);
4063 }
4064
4065 if (err != OK) {
4066 if (err == ERROR_OUT_OF_RANGE) {
4067 // An attempt to seek past the end of the stream would
4068 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
4069 // this all the way to the MediaPlayer would cause abnormal
4070 // termination. Legacy behaviour appears to be to behave as if
4071 // we had seeked to the end of stream, ending normally.
4072 err = ERROR_END_OF_STREAM;
4073 }
4074 ALOGV("end of stream");
4075 return err;
4076 }
4077
4078 if (mode == ReadOptions::SEEK_CLOSEST) {
4079 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
4080 }
4081
4082 #if 0
4083 uint32_t syncSampleTime;
4084 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
4085 syncSampleIndex, NULL, NULL, &syncSampleTime));
4086
4087 ALOGI("seek to time %lld us => sample at time %lld us, "
4088 "sync sample at time %lld us",
4089 seekTimeUs,
4090 sampleTime * 1000000ll / mTimescale,
4091 syncSampleTime * 1000000ll / mTimescale);
4092 #endif
4093
4094 mCurrentSampleIndex = syncSampleIndex;
4095 if (mBuffer != NULL) {
4096 mBuffer->release();
4097 mBuffer = NULL;
4098 }
4099
4100 // fall through
4101 }
4102
4103 off64_t offset;
4104 size_t size;
4105 uint32_t cts, stts;
4106 bool isSyncSample;
4107 bool newBuffer = false;
4108 if (mBuffer == NULL) {
4109 newBuffer = true;
4110
4111 status_t err =
4112 mSampleTable->getMetaDataForSample(
4113 mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
4114
4115 if (err != OK) {
4116 return err;
4117 }
4118
4119 err = mGroup->acquire_buffer(&mBuffer);
4120
4121 if (err != OK) {
4122 CHECK(mBuffer == NULL);
4123 return err;
4124 }
4125 if (size > mBuffer->size()) {
4126 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4127 return ERROR_BUFFER_TOO_SMALL;
4128 }
4129 }
4130
4131 if ((!mIsAVC && !mIsHEVC) || mWantsNALFragments) {
4132 if (newBuffer) {
4133 ssize_t num_bytes_read =
4134 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4135
4136 if (num_bytes_read < (ssize_t)size) {
4137 mBuffer->release();
4138 mBuffer = NULL;
4139
4140 return ERROR_IO;
4141 }
4142
4143 CHECK(mBuffer != NULL);
4144 mBuffer->set_range(0, size);
4145 mBuffer->meta_data()->clear();
4146 mBuffer->meta_data()->setInt64(
4147 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4148 mBuffer->meta_data()->setInt64(
4149 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4150
4151 if (targetSampleTimeUs >= 0) {
4152 mBuffer->meta_data()->setInt64(
4153 kKeyTargetTime, targetSampleTimeUs);
4154 }
4155
4156 if (isSyncSample) {
4157 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4158 }
4159
4160 ++mCurrentSampleIndex;
4161 }
4162
4163 if (!mIsAVC && !mIsHEVC) {
4164 *out = mBuffer;
4165 mBuffer = NULL;
4166
4167 return OK;
4168 }
4169
4170 // Each NAL unit is split up into its constituent fragments and
4171 // each one of them returned in its own buffer.
4172
4173 CHECK(mBuffer->range_length() >= mNALLengthSize);
4174
4175 const uint8_t *src =
4176 (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4177
4178 size_t nal_size = parseNALSize(src);
4179 if (mNALLengthSize > SIZE_MAX - nal_size) {
4180 ALOGE("b/24441553, b/24445122");
4181 }
4182 if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4183 ALOGE("incomplete NAL unit.");
4184
4185 mBuffer->release();
4186 mBuffer = NULL;
4187
4188 return ERROR_MALFORMED;
4189 }
4190
4191 MediaBuffer *clone = mBuffer->clone();
4192 CHECK(clone != NULL);
4193 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4194
4195 CHECK(mBuffer != NULL);
4196 mBuffer->set_range(
4197 mBuffer->range_offset() + mNALLengthSize + nal_size,
4198 mBuffer->range_length() - mNALLengthSize - nal_size);
4199
4200 if (mBuffer->range_length() == 0) {
4201 mBuffer->release();
4202 mBuffer = NULL;
4203 }
4204
4205 *out = clone;
4206
4207 return OK;
4208 } else {
4209 // Whole NAL units are returned but each fragment is prefixed by
4210 // the start code (0x00 00 00 01).
4211 ssize_t num_bytes_read = 0;
4212 int32_t drm = 0;
4213 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4214 if (usesDRM) {
4215 num_bytes_read =
4216 mDataSource->readAt(offset, (uint8_t*)mBuffer->data(), size);
4217 } else {
4218 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
4219 }
4220
4221 if (num_bytes_read < (ssize_t)size) {
4222 mBuffer->release();
4223 mBuffer = NULL;
4224
4225 return ERROR_IO;
4226 }
4227
4228 if (usesDRM) {
4229 CHECK(mBuffer != NULL);
4230 mBuffer->set_range(0, size);
4231
4232 } else {
4233 uint8_t *dstData = (uint8_t *)mBuffer->data();
4234 size_t srcOffset = 0;
4235 size_t dstOffset = 0;
4236
4237 while (srcOffset < size) {
4238 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4239 size_t nalLength = 0;
4240 if (!isMalFormed) {
4241 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4242 srcOffset += mNALLengthSize;
4243 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
4244 }
4245
4246 if (isMalFormed) {
4247 ALOGE("Video is malformed");
4248 mBuffer->release();
4249 mBuffer = NULL;
4250 return ERROR_MALFORMED;
4251 }
4252
4253 if (nalLength == 0) {
4254 continue;
4255 }
4256
4257 if (dstOffset > SIZE_MAX - 4 ||
4258 dstOffset + 4 > SIZE_MAX - nalLength ||
4259 dstOffset + 4 + nalLength > mBuffer->size()) {
4260 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
4261 android_errorWriteLog(0x534e4554, "27208621");
4262 mBuffer->release();
4263 mBuffer = NULL;
4264 return ERROR_MALFORMED;
4265 }
4266
4267 dstData[dstOffset++] = 0;
4268 dstData[dstOffset++] = 0;
4269 dstData[dstOffset++] = 0;
4270 dstData[dstOffset++] = 1;
4271 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4272 srcOffset += nalLength;
4273 dstOffset += nalLength;
4274 }
4275 CHECK_EQ(srcOffset, size);
4276 CHECK(mBuffer != NULL);
4277 mBuffer->set_range(0, dstOffset);
4278 }
4279
4280 mBuffer->meta_data()->clear();
4281 mBuffer->meta_data()->setInt64(
4282 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4283 mBuffer->meta_data()->setInt64(
4284 kKeyDuration, ((int64_t)stts * 1000000) / mTimescale);
4285
4286 if (targetSampleTimeUs >= 0) {
4287 mBuffer->meta_data()->setInt64(
4288 kKeyTargetTime, targetSampleTimeUs);
4289 }
4290
4291 if (isSyncSample) {
4292 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4293 }
4294
4295 ++mCurrentSampleIndex;
4296
4297 *out = mBuffer;
4298 mBuffer = NULL;
4299
4300 return OK;
4301 }
4302 }
4303
fragmentedRead(MediaBuffer ** out,const ReadOptions * options)4304 status_t MPEG4Source::fragmentedRead(
4305 MediaBuffer **out, const ReadOptions *options) {
4306
4307 ALOGV("MPEG4Source::fragmentedRead");
4308
4309 CHECK(mStarted);
4310
4311 *out = NULL;
4312
4313 int64_t targetSampleTimeUs = -1;
4314
4315 int64_t seekTimeUs;
4316 ReadOptions::SeekMode mode;
4317 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
4318
4319 int numSidxEntries = mSegments.size();
4320 if (numSidxEntries != 0) {
4321 int64_t totalTime = 0;
4322 off64_t totalOffset = mFirstMoofOffset;
4323 for (int i = 0; i < numSidxEntries; i++) {
4324 const SidxEntry *se = &mSegments[i];
4325 if (totalTime + se->mDurationUs > seekTimeUs) {
4326 // The requested time is somewhere in this segment
4327 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
4328 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
4329 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
4330 // requested next sync, or closest sync and it was closer to the end of
4331 // this segment
4332 totalTime += se->mDurationUs;
4333 totalOffset += se->mSize;
4334 }
4335 break;
4336 }
4337 totalTime += se->mDurationUs;
4338 totalOffset += se->mSize;
4339 }
4340 mCurrentMoofOffset = totalOffset;
4341 mCurrentSamples.clear();
4342 mCurrentSampleIndex = 0;
4343 parseChunk(&totalOffset);
4344 mCurrentTime = totalTime * mTimescale / 1000000ll;
4345 } else {
4346 // without sidx boxes, we can only seek to 0
4347 mCurrentMoofOffset = mFirstMoofOffset;
4348 mCurrentSamples.clear();
4349 mCurrentSampleIndex = 0;
4350 off64_t tmp = mCurrentMoofOffset;
4351 parseChunk(&tmp);
4352 mCurrentTime = 0;
4353 }
4354
4355 if (mBuffer != NULL) {
4356 mBuffer->release();
4357 mBuffer = NULL;
4358 }
4359
4360 // fall through
4361 }
4362
4363 off64_t offset = 0;
4364 size_t size = 0;
4365 uint32_t cts = 0;
4366 bool isSyncSample = false;
4367 bool newBuffer = false;
4368 if (mBuffer == NULL) {
4369 newBuffer = true;
4370
4371 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4372 // move to next fragment if there is one
4373 if (mNextMoofOffset <= mCurrentMoofOffset) {
4374 return ERROR_END_OF_STREAM;
4375 }
4376 off64_t nextMoof = mNextMoofOffset;
4377 mCurrentMoofOffset = nextMoof;
4378 mCurrentSamples.clear();
4379 mCurrentSampleIndex = 0;
4380 parseChunk(&nextMoof);
4381 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
4382 return ERROR_END_OF_STREAM;
4383 }
4384 }
4385
4386 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4387 offset = smpl->offset;
4388 size = smpl->size;
4389 cts = mCurrentTime + smpl->compositionOffset;
4390 mCurrentTime += smpl->duration;
4391 isSyncSample = (mCurrentSampleIndex == 0); // XXX
4392
4393 status_t err = mGroup->acquire_buffer(&mBuffer);
4394
4395 if (err != OK) {
4396 CHECK(mBuffer == NULL);
4397 ALOGV("acquire_buffer returned %d", err);
4398 return err;
4399 }
4400 if (size > mBuffer->size()) {
4401 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
4402 return ERROR_BUFFER_TOO_SMALL;
4403 }
4404 }
4405
4406 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
4407 const sp<MetaData> bufmeta = mBuffer->meta_data();
4408 bufmeta->clear();
4409 if (smpl->encryptedsizes.size()) {
4410 // store clear/encrypted lengths in metadata
4411 bufmeta->setData(kKeyPlainSizes, 0,
4412 smpl->clearsizes.array(), smpl->clearsizes.size() * 4);
4413 bufmeta->setData(kKeyEncryptedSizes, 0,
4414 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * 4);
4415 bufmeta->setData(kKeyCryptoIV, 0, smpl->iv, 16); // use 16 or the actual size?
4416 bufmeta->setInt32(kKeyCryptoDefaultIVSize, mDefaultIVSize);
4417 bufmeta->setInt32(kKeyCryptoMode, mCryptoMode);
4418 bufmeta->setData(kKeyCryptoKey, 0, mCryptoKey, 16);
4419 }
4420
4421 if ((!mIsAVC && !mIsHEVC)|| mWantsNALFragments) {
4422 if (newBuffer) {
4423 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
4424 mBuffer->release();
4425 mBuffer = NULL;
4426
4427 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
4428 return ERROR_MALFORMED;
4429 }
4430
4431 ssize_t num_bytes_read =
4432 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
4433
4434 if (num_bytes_read < (ssize_t)size) {
4435 mBuffer->release();
4436 mBuffer = NULL;
4437
4438 ALOGE("i/o error");
4439 return ERROR_IO;
4440 }
4441
4442 CHECK(mBuffer != NULL);
4443 mBuffer->set_range(0, size);
4444 mBuffer->meta_data()->setInt64(
4445 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4446 mBuffer->meta_data()->setInt64(
4447 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4448
4449 if (targetSampleTimeUs >= 0) {
4450 mBuffer->meta_data()->setInt64(
4451 kKeyTargetTime, targetSampleTimeUs);
4452 }
4453
4454 if (isSyncSample) {
4455 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4456 }
4457
4458 ++mCurrentSampleIndex;
4459 }
4460
4461 if (!mIsAVC && !mIsHEVC) {
4462 *out = mBuffer;
4463 mBuffer = NULL;
4464
4465 return OK;
4466 }
4467
4468 // Each NAL unit is split up into its constituent fragments and
4469 // each one of them returned in its own buffer.
4470
4471 CHECK(mBuffer->range_length() >= mNALLengthSize);
4472
4473 const uint8_t *src =
4474 (const uint8_t *)mBuffer->data() + mBuffer->range_offset();
4475
4476 size_t nal_size = parseNALSize(src);
4477 if (mNALLengthSize > SIZE_MAX - nal_size) {
4478 ALOGE("b/24441553, b/24445122");
4479 }
4480
4481 if (mBuffer->range_length() - mNALLengthSize < nal_size) {
4482 ALOGE("incomplete NAL unit.");
4483
4484 mBuffer->release();
4485 mBuffer = NULL;
4486
4487 return ERROR_MALFORMED;
4488 }
4489
4490 MediaBuffer *clone = mBuffer->clone();
4491 CHECK(clone != NULL);
4492 clone->set_range(mBuffer->range_offset() + mNALLengthSize, nal_size);
4493
4494 CHECK(mBuffer != NULL);
4495 mBuffer->set_range(
4496 mBuffer->range_offset() + mNALLengthSize + nal_size,
4497 mBuffer->range_length() - mNALLengthSize - nal_size);
4498
4499 if (mBuffer->range_length() == 0) {
4500 mBuffer->release();
4501 mBuffer = NULL;
4502 }
4503
4504 *out = clone;
4505
4506 return OK;
4507 } else {
4508 ALOGV("whole NAL");
4509 // Whole NAL units are returned but each fragment is prefixed by
4510 // the start code (0x00 00 00 01).
4511 ssize_t num_bytes_read = 0;
4512 int32_t drm = 0;
4513 bool usesDRM = (mFormat->findInt32(kKeyIsDRM, &drm) && drm != 0);
4514 void *data = NULL;
4515 bool isMalFormed = false;
4516 if (usesDRM) {
4517 if (mBuffer == NULL || !isInRange((size_t)0u, mBuffer->size(), size)) {
4518 isMalFormed = true;
4519 } else {
4520 data = mBuffer->data();
4521 }
4522 } else {
4523 int32_t max_size;
4524 if (mFormat == NULL
4525 || !mFormat->findInt32(kKeyMaxInputSize, &max_size)
4526 || !isInRange((size_t)0u, (size_t)max_size, size)) {
4527 isMalFormed = true;
4528 } else {
4529 data = mSrcBuffer;
4530 }
4531 }
4532
4533 if (isMalFormed || data == NULL) {
4534 ALOGE("isMalFormed size %zu", size);
4535 if (mBuffer != NULL) {
4536 mBuffer->release();
4537 mBuffer = NULL;
4538 }
4539 return ERROR_MALFORMED;
4540 }
4541 num_bytes_read = mDataSource->readAt(offset, data, size);
4542
4543 if (num_bytes_read < (ssize_t)size) {
4544 mBuffer->release();
4545 mBuffer = NULL;
4546
4547 ALOGE("i/o error");
4548 return ERROR_IO;
4549 }
4550
4551 if (usesDRM) {
4552 CHECK(mBuffer != NULL);
4553 mBuffer->set_range(0, size);
4554
4555 } else {
4556 uint8_t *dstData = (uint8_t *)mBuffer->data();
4557 size_t srcOffset = 0;
4558 size_t dstOffset = 0;
4559
4560 while (srcOffset < size) {
4561 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
4562 size_t nalLength = 0;
4563 if (!isMalFormed) {
4564 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
4565 srcOffset += mNALLengthSize;
4566 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
4567 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
4568 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
4569 }
4570
4571 if (isMalFormed) {
4572 ALOGE("Video is malformed; nalLength %zu", nalLength);
4573 mBuffer->release();
4574 mBuffer = NULL;
4575 return ERROR_MALFORMED;
4576 }
4577
4578 if (nalLength == 0) {
4579 continue;
4580 }
4581
4582 if (dstOffset > SIZE_MAX - 4 ||
4583 dstOffset + 4 > SIZE_MAX - nalLength ||
4584 dstOffset + 4 + nalLength > mBuffer->size()) {
4585 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
4586 android_errorWriteLog(0x534e4554, "26365349");
4587 mBuffer->release();
4588 mBuffer = NULL;
4589 return ERROR_MALFORMED;
4590 }
4591
4592 dstData[dstOffset++] = 0;
4593 dstData[dstOffset++] = 0;
4594 dstData[dstOffset++] = 0;
4595 dstData[dstOffset++] = 1;
4596 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
4597 srcOffset += nalLength;
4598 dstOffset += nalLength;
4599 }
4600 CHECK_EQ(srcOffset, size);
4601 CHECK(mBuffer != NULL);
4602 mBuffer->set_range(0, dstOffset);
4603 }
4604
4605 mBuffer->meta_data()->setInt64(
4606 kKeyTime, ((int64_t)cts * 1000000) / mTimescale);
4607 mBuffer->meta_data()->setInt64(
4608 kKeyDuration, ((int64_t)smpl->duration * 1000000) / mTimescale);
4609
4610 if (targetSampleTimeUs >= 0) {
4611 mBuffer->meta_data()->setInt64(
4612 kKeyTargetTime, targetSampleTimeUs);
4613 }
4614
4615 if (isSyncSample) {
4616 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
4617 }
4618
4619 ++mCurrentSampleIndex;
4620
4621 *out = mBuffer;
4622 mBuffer = NULL;
4623
4624 return OK;
4625 }
4626 }
4627
findTrackByMimePrefix(const char * mimePrefix)4628 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
4629 const char *mimePrefix) {
4630 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
4631 const char *mime;
4632 if (track->meta != NULL
4633 && track->meta->findCString(kKeyMIMEType, &mime)
4634 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
4635 return track;
4636 }
4637 }
4638
4639 return NULL;
4640 }
4641
LegacySniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence)4642 static bool LegacySniffMPEG4(
4643 const sp<DataSource> &source, String8 *mimeType, float *confidence) {
4644 uint8_t header[8];
4645
4646 ssize_t n = source->readAt(4, header, sizeof(header));
4647 if (n < (ssize_t)sizeof(header)) {
4648 return false;
4649 }
4650
4651 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
4652 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
4653 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
4654 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
4655 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
4656 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)) {
4657 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4658 *confidence = 0.4;
4659
4660 return true;
4661 }
4662
4663 return false;
4664 }
4665
isCompatibleBrand(uint32_t fourcc)4666 static bool isCompatibleBrand(uint32_t fourcc) {
4667 static const uint32_t kCompatibleBrands[] = {
4668 FOURCC('i', 's', 'o', 'm'),
4669 FOURCC('i', 's', 'o', '2'),
4670 FOURCC('a', 'v', 'c', '1'),
4671 FOURCC('h', 'v', 'c', '1'),
4672 FOURCC('h', 'e', 'v', '1'),
4673 FOURCC('3', 'g', 'p', '4'),
4674 FOURCC('m', 'p', '4', '1'),
4675 FOURCC('m', 'p', '4', '2'),
4676
4677 // Won't promise that the following file types can be played.
4678 // Just give these file types a chance.
4679 FOURCC('q', 't', ' ', ' '), // Apple's QuickTime
4680 FOURCC('M', 'S', 'N', 'V'), // Sony's PSP
4681
4682 FOURCC('3', 'g', '2', 'a'), // 3GPP2
4683 FOURCC('3', 'g', '2', 'b'),
4684 };
4685
4686 for (size_t i = 0;
4687 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
4688 ++i) {
4689 if (kCompatibleBrands[i] == fourcc) {
4690 return true;
4691 }
4692 }
4693
4694 return false;
4695 }
4696
4697 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
4698 // compatible brand is present.
4699 // Also try to identify where this file's metadata ends
4700 // (end of the 'moov' atom) and report it to the caller as part of
4701 // the metadata.
BetterSniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)4702 static bool BetterSniffMPEG4(
4703 const sp<DataSource> &source, String8 *mimeType, float *confidence,
4704 sp<AMessage> *meta) {
4705 // We scan up to 128 bytes to identify this file as an MP4.
4706 static const off64_t kMaxScanOffset = 128ll;
4707
4708 off64_t offset = 0ll;
4709 bool foundGoodFileType = false;
4710 off64_t moovAtomEndOffset = -1ll;
4711 bool done = false;
4712
4713 while (!done && offset < kMaxScanOffset) {
4714 uint32_t hdr[2];
4715 if (source->readAt(offset, hdr, 8) < 8) {
4716 return false;
4717 }
4718
4719 uint64_t chunkSize = ntohl(hdr[0]);
4720 uint32_t chunkType = ntohl(hdr[1]);
4721 off64_t chunkDataOffset = offset + 8;
4722
4723 if (chunkSize == 1) {
4724 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
4725 return false;
4726 }
4727
4728 chunkSize = ntoh64(chunkSize);
4729 chunkDataOffset += 8;
4730
4731 if (chunkSize < 16) {
4732 // The smallest valid chunk is 16 bytes long in this case.
4733 return false;
4734 }
4735 } else if (chunkSize < 8) {
4736 // The smallest valid chunk is 8 bytes long.
4737 return false;
4738 }
4739
4740 off64_t chunkDataSize = offset + chunkSize - chunkDataOffset;
4741
4742 char chunkstring[5];
4743 MakeFourCCString(chunkType, chunkstring);
4744 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset);
4745 switch (chunkType) {
4746 case FOURCC('f', 't', 'y', 'p'):
4747 {
4748 if (chunkDataSize < 8) {
4749 return false;
4750 }
4751
4752 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
4753 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
4754 if (i == 1) {
4755 // Skip this index, it refers to the minorVersion,
4756 // not a brand.
4757 continue;
4758 }
4759
4760 uint32_t brand;
4761 if (source->readAt(
4762 chunkDataOffset + 4 * i, &brand, 4) < 4) {
4763 return false;
4764 }
4765
4766 brand = ntohl(brand);
4767
4768 if (isCompatibleBrand(brand)) {
4769 foundGoodFileType = true;
4770 break;
4771 }
4772 }
4773
4774 if (!foundGoodFileType) {
4775 return false;
4776 }
4777
4778 break;
4779 }
4780
4781 case FOURCC('m', 'o', 'o', 'v'):
4782 {
4783 moovAtomEndOffset = offset + chunkSize;
4784
4785 done = true;
4786 break;
4787 }
4788
4789 default:
4790 break;
4791 }
4792
4793 offset += chunkSize;
4794 }
4795
4796 if (!foundGoodFileType) {
4797 return false;
4798 }
4799
4800 *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
4801 *confidence = 0.4f;
4802
4803 if (moovAtomEndOffset >= 0) {
4804 *meta = new AMessage;
4805 (*meta)->setInt64("meta-data-size", moovAtomEndOffset);
4806
4807 ALOGV("found metadata size: %lld", (long long)moovAtomEndOffset);
4808 }
4809
4810 return true;
4811 }
4812
SniffMPEG4(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> * meta)4813 bool SniffMPEG4(
4814 const sp<DataSource> &source, String8 *mimeType, float *confidence,
4815 sp<AMessage> *meta) {
4816 if (BetterSniffMPEG4(source, mimeType, confidence, meta)) {
4817 return true;
4818 }
4819
4820 if (LegacySniffMPEG4(source, mimeType, confidence)) {
4821 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
4822 return true;
4823 }
4824
4825 return false;
4826 }
4827
4828 } // namespace android
4829