1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <algorithm>
23 #include <map>
24 #include <memory>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include <log/log.h>
30 #include <utils/Log.h>
31
32 #include "AC4Parser.h"
33 #include "MPEG4Extractor.h"
34 #include "SampleTable.h"
35 #include "ItemTable.h"
36
37 #include <ESDS.h>
38 #include <ID3.h>
39 #include <media/stagefright/DataSourceBase.h>
40 #include <media/ExtractorUtils.h>
41 #include <media/stagefright/foundation/ABitReader.h>
42 #include <media/stagefright/foundation/ABuffer.h>
43 #include <media/stagefright/foundation/ADebug.h>
44 #include <media/stagefright/foundation/AMessage.h>
45 #include <media/stagefright/foundation/AudioPresentationInfo.h>
46 #include <media/stagefright/foundation/AUtils.h>
47 #include <media/stagefright/foundation/ByteUtils.h>
48 #include <media/stagefright/foundation/ColorUtils.h>
49 #include <media/stagefright/foundation/avc_utils.h>
50 #include <media/stagefright/foundation/hexdump.h>
51 #include <media/stagefright/foundation/OpusHeader.h>
52 #include <media/stagefright/MediaBufferGroup.h>
53 #include <media/stagefright/MediaDefs.h>
54 #include <media/stagefright/MetaDataBase.h>
55 #include <utils/String8.h>
56
57 #include <byteswap.h>
58
59 #ifndef UINT32_MAX
60 #define UINT32_MAX (4294967295U)
61 #endif
62
63 #define ALAC_SPECIFIC_INFO_SIZE (36)
64
65 // TODO : Remove the defines once mainline media is built against NDK >= 31.
66 // The mp4 extractor is part of mainline and builds against NDK 29 as of
67 // writing. These keys are available only from NDK 31:
68 #define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
69 "mpegh-profile-level-indication"
70 #define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
71 "mpegh-reference-channel-layout"
72 #define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
73 "mpegh-compatible-sets"
74
75 namespace android {
76
77 enum {
78 // max track header chunk to return
79 kMaxTrackHeaderSize = 32,
80
81 // maximum size of an atom. Some atoms can be bigger according to the spec,
82 // but we only allow up to this size.
83 kMaxAtomSize = 64 * 1024 * 1024,
84 };
85
86 class MPEG4Source : public MediaTrackHelper {
87 static const size_t kMaxPcmFrameSize = 8192;
88 public:
89 // Caller retains ownership of both "dataSource" and "sampleTable".
90 MPEG4Source(AMediaFormat *format,
91 DataSourceHelper *dataSource,
92 int32_t timeScale,
93 const sp<SampleTable> &sampleTable,
94 Vector<SidxEntry> &sidx,
95 const Trex *trex,
96 off64_t firstMoofOffset,
97 const sp<ItemTable> &itemTable,
98 uint64_t elstShiftStartTicks,
99 uint64_t elstInitialEmptyEditTicks);
100 virtual status_t init();
101
102 virtual media_status_t start();
103 virtual media_status_t stop();
104
105 virtual media_status_t getFormat(AMediaFormat *);
106
107 virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()108 bool supportsNonBlockingRead() override { return true; }
109 virtual media_status_t fragmentedRead(
110 MediaBufferHelper **buffer, const ReadOptions *options = NULL);
111
112 virtual ~MPEG4Source();
113
114 private:
115 Mutex mLock;
116
117 AMediaFormat *mFormat;
118 DataSourceHelper *mDataSource;
119 int32_t mTimescale;
120 sp<SampleTable> mSampleTable;
121 uint32_t mCurrentSampleIndex;
122 uint32_t mCurrentFragmentIndex;
123 Vector<SidxEntry> &mSegments;
124 const Trex *mTrex;
125 off64_t mFirstMoofOffset;
126 off64_t mCurrentMoofOffset;
127 off64_t mCurrentMoofSize;
128 off64_t mNextMoofOffset;
129 uint32_t mCurrentTime; // in media timescale ticks
130 int32_t mLastParsedTrackId;
131 int32_t mTrackId;
132
133 int32_t mCryptoMode; // passed in from extractor
134 int32_t mDefaultIVSize; // passed in from extractor
135 uint8_t mCryptoKey[16]; // passed in from extractor
136 int32_t mDefaultEncryptedByteBlock;
137 int32_t mDefaultSkipByteBlock;
138 uint32_t mCurrentAuxInfoType;
139 uint32_t mCurrentAuxInfoTypeParameter;
140 int32_t mCurrentDefaultSampleInfoSize;
141 uint32_t mCurrentSampleInfoCount;
142 uint32_t mCurrentSampleInfoAllocSize;
143 uint8_t* mCurrentSampleInfoSizes;
144 uint32_t mCurrentSampleInfoOffsetCount;
145 uint32_t mCurrentSampleInfoOffsetsAllocSize;
146 uint64_t* mCurrentSampleInfoOffsets;
147
148 bool mIsAVC;
149 bool mIsHEVC;
150 bool mIsDolbyVision;
151 bool mIsAC4;
152 bool mIsMpegH = false;
153 bool mIsPcm;
154 size_t mNALLengthSize;
155
156 bool mStarted;
157
158 MediaBufferHelper *mBuffer;
159
160 size_t mSrcBufferSize;
161 uint8_t *mSrcBuffer;
162
163 bool mIsHeif;
164 bool mIsAvif;
165 bool mIsAudio;
166 bool mIsUsac = false;
167 sp<ItemTable> mItemTable;
168
169 /* Shift start offset (move to earlier time) when media_time > 0,
170 * in media time scale.
171 */
172 uint64_t mElstShiftStartTicks;
173 /* Initial start offset (move to later time), empty edit list entry
174 * in media time scale.
175 */
176 uint64_t mElstInitialEmptyEditTicks;
177
178 size_t parseNALSize(const uint8_t *data) const;
179 status_t parseChunk(off64_t *offset);
180 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
181 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
182 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
183 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
184 status_t parseClearEncryptedSizes(off64_t offset, bool isSampleEncryption,
185 uint32_t flags, off64_t size);
186 status_t parseSampleEncryption(off64_t offset, off64_t size);
187 // returns -1 for invalid layer ID
188 int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
189
190 struct TrackFragmentHeaderInfo {
191 enum Flags {
192 kBaseDataOffsetPresent = 0x01,
193 kSampleDescriptionIndexPresent = 0x02,
194 kDefaultSampleDurationPresent = 0x08,
195 kDefaultSampleSizePresent = 0x10,
196 kDefaultSampleFlagsPresent = 0x20,
197 kDurationIsEmpty = 0x10000,
198 };
199
200 uint32_t mTrackID;
201 uint32_t mFlags;
202 uint64_t mBaseDataOffset;
203 uint32_t mSampleDescriptionIndex;
204 uint32_t mDefaultSampleDuration;
205 uint32_t mDefaultSampleSize;
206 uint32_t mDefaultSampleFlags;
207
208 uint64_t mDataOffset;
209 };
210 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
211
212 struct Sample {
213 off64_t offset;
214 size_t size;
215 uint32_t duration;
216 int32_t compositionOffset;
217 uint8_t iv[16];
218 Vector<uint32_t> clearsizes;
219 Vector<uint32_t> encryptedsizes;
220 };
221 Vector<Sample> mCurrentSamples;
222 std::map<off64_t, uint32_t> mDrmOffsets;
223
224 MPEG4Source(const MPEG4Source &);
225 MPEG4Source &operator=(const MPEG4Source &);
226 };
227
228 // This custom data source wraps an existing one and satisfies requests
229 // falling entirely within a cached range from the cache while forwarding
230 // all remaining requests to the wrapped datasource.
231 // This is used to cache the full sampletable metadata for a single track,
232 // possibly wrapping multiple times to cover all tracks, i.e.
233 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
234
235 class CachedRangedDataSource : public DataSourceHelper {
236 public:
237 explicit CachedRangedDataSource(DataSourceHelper *source);
238 virtual ~CachedRangedDataSource();
239
240 ssize_t readAt(off64_t offset, void *data, size_t size) override;
241 status_t getSize(off64_t *size) override;
242 uint32_t flags() override;
243
244 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
245
246
247 private:
248 Mutex mLock;
249
250 DataSourceHelper *mSource;
251 bool mOwnsDataSource;
252 off64_t mCachedOffset;
253 size_t mCachedSize;
254 uint8_t *mCache;
255
256 void clearCache();
257
258 CachedRangedDataSource(const CachedRangedDataSource &);
259 CachedRangedDataSource &operator=(const CachedRangedDataSource &);
260 };
261
CachedRangedDataSource(DataSourceHelper * source)262 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
263 : DataSourceHelper(source),
264 mSource(source),
265 mOwnsDataSource(false),
266 mCachedOffset(0),
267 mCachedSize(0),
268 mCache(NULL) {
269 }
270
~CachedRangedDataSource()271 CachedRangedDataSource::~CachedRangedDataSource() {
272 clearCache();
273 if (mOwnsDataSource) {
274 delete mSource;
275 }
276 }
277
clearCache()278 void CachedRangedDataSource::clearCache() {
279 if (mCache) {
280 free(mCache);
281 mCache = NULL;
282 }
283
284 mCachedOffset = 0;
285 mCachedSize = 0;
286 }
287
readAt(off64_t offset,void * data,size_t size)288 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
289 Mutex::Autolock autoLock(mLock);
290
291 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
292 memcpy(data, &mCache[offset - mCachedOffset], size);
293 return size;
294 }
295
296 return mSource->readAt(offset, data, size);
297 }
298
getSize(off64_t * size)299 status_t CachedRangedDataSource::getSize(off64_t *size) {
300 return mSource->getSize(size);
301 }
302
flags()303 uint32_t CachedRangedDataSource::flags() {
304 return mSource->flags();
305 }
306
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)307 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
308 size_t size,
309 bool assumeSourceOwnershipOnSuccess) {
310 Mutex::Autolock autoLock(mLock);
311
312 clearCache();
313
314 mCache = (uint8_t *)malloc(size);
315
316 if (mCache == NULL) {
317 return -ENOMEM;
318 }
319
320 mCachedOffset = offset;
321 mCachedSize = size;
322
323 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
324
325 if (err < (ssize_t)size) {
326 clearCache();
327
328 return ERROR_IO;
329 }
330 mOwnsDataSource = assumeSourceOwnershipOnSuccess;
331 return OK;
332 }
333
334 ////////////////////////////////////////////////////////////////////////////////
335
336 static const bool kUseHexDump = false;
337
FourCC2MIME(uint32_t fourcc)338 static const char *FourCC2MIME(uint32_t fourcc) {
339 switch (fourcc) {
340 case FOURCC("mp4a"):
341 return MEDIA_MIMETYPE_AUDIO_AAC;
342
343 case FOURCC("samr"):
344 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
345
346 case FOURCC("sawb"):
347 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
348
349 case FOURCC("ec-3"):
350 return MEDIA_MIMETYPE_AUDIO_EAC3;
351
352 case FOURCC("mp4v"):
353 return MEDIA_MIMETYPE_VIDEO_MPEG4;
354
355 case FOURCC("s263"):
356 case FOURCC("h263"):
357 case FOURCC("H263"):
358 return MEDIA_MIMETYPE_VIDEO_H263;
359
360 case FOURCC("avc1"):
361 return MEDIA_MIMETYPE_VIDEO_AVC;
362
363 case FOURCC("hvc1"):
364 case FOURCC("hev1"):
365 return MEDIA_MIMETYPE_VIDEO_HEVC;
366
367 case FOURCC("dvav"):
368 case FOURCC("dva1"):
369 case FOURCC("dvhe"):
370 case FOURCC("dvh1"):
371 case FOURCC("dav1"):
372 return MEDIA_MIMETYPE_VIDEO_DOLBY_VISION;
373
374 case FOURCC("ac-4"):
375 return MEDIA_MIMETYPE_AUDIO_AC4;
376 case FOURCC("Opus"):
377 return MEDIA_MIMETYPE_AUDIO_OPUS;
378
379 case FOURCC("twos"):
380 case FOURCC("sowt"):
381 return MEDIA_MIMETYPE_AUDIO_RAW;
382 case FOURCC("alac"):
383 return MEDIA_MIMETYPE_AUDIO_ALAC;
384 case FOURCC("fLaC"):
385 return MEDIA_MIMETYPE_AUDIO_FLAC;
386 case FOURCC("av01"):
387 return MEDIA_MIMETYPE_VIDEO_AV1;
388 case FOURCC("vp09"):
389 return MEDIA_MIMETYPE_VIDEO_VP9;
390 case FOURCC(".mp3"):
391 case 0x6D730055: // "ms U" mp3 audio
392 return MEDIA_MIMETYPE_AUDIO_MPEG;
393 case FOURCC("mha1"):
394 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1;
395 case FOURCC("mhm1"):
396 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1;
397 default:
398 ALOGW("Unknown fourcc: %c%c%c%c",
399 (fourcc >> 24) & 0xff,
400 (fourcc >> 16) & 0xff,
401 (fourcc >> 8) & 0xff,
402 fourcc & 0xff
403 );
404 return "application/octet-stream";
405 }
406 }
407
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)408 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
409 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
410 // AMR NB audio is always mono, 8kHz
411 *channels = 1;
412 *rate = 8000;
413 return true;
414 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
415 // AMR WB audio is always mono, 16kHz
416 *channels = 1;
417 *rate = 16000;
418 return true;
419 }
420 return false;
421 }
422
MPEG4Extractor(DataSourceHelper * source,const char * mime)423 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
424 : mMoofOffset(0),
425 mMoofFound(false),
426 mMdatFound(false),
427 mDataSource(source),
428 mInitCheck(NO_INIT),
429 mHeaderTimescale(0),
430 mIsQT(false),
431 mIsHeif(false),
432 mHasMoovBox(false),
433 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
434 mIsAvif(false),
435 mFirstTrack(NULL),
436 mLastTrack(NULL) {
437 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
438 mFileMetaData = AMediaFormat_new();
439 }
440
~MPEG4Extractor()441 MPEG4Extractor::~MPEG4Extractor() {
442 Track *track = mFirstTrack;
443 while (track) {
444 Track *next = track->next;
445
446 delete track;
447 track = next;
448 }
449 mFirstTrack = mLastTrack = NULL;
450
451 for (size_t i = 0; i < mPssh.size(); i++) {
452 delete [] mPssh[i].data;
453 }
454 mPssh.clear();
455
456 delete mDataSource;
457 AMediaFormat_delete(mFileMetaData);
458 }
459
flags() const460 uint32_t MPEG4Extractor::flags() const {
461 return CAN_PAUSE |
462 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
463 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
464 }
465
getMetaData(AMediaFormat * meta)466 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
467 status_t err;
468 if ((err = readMetaData()) != OK) {
469 return AMEDIA_ERROR_UNKNOWN;
470 }
471 AMediaFormat_copy(meta, mFileMetaData);
472 return AMEDIA_OK;
473 }
474
countTracks()475 size_t MPEG4Extractor::countTracks() {
476 status_t err;
477 if ((err = readMetaData()) != OK) {
478 ALOGV("MPEG4Extractor::countTracks: no tracks");
479 return 0;
480 }
481
482 size_t n = 0;
483 Track *track = mFirstTrack;
484 while (track) {
485 ++n;
486 track = track->next;
487 }
488
489 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
490 return n;
491 }
492
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)493 media_status_t MPEG4Extractor::getTrackMetaData(
494 AMediaFormat *meta,
495 size_t index, uint32_t flags) {
496 status_t err;
497 if ((err = readMetaData()) != OK) {
498 return AMEDIA_ERROR_UNKNOWN;
499 }
500
501 Track *track = mFirstTrack;
502 while (index > 0) {
503 if (track == NULL) {
504 return AMEDIA_ERROR_UNKNOWN;
505 }
506
507 track = track->next;
508 --index;
509 }
510
511 if (track == NULL) {
512 return AMEDIA_ERROR_UNKNOWN;
513 }
514
515 [=] {
516 int64_t duration;
517 int32_t samplerate;
518 // Only for audio track.
519 if (track->elst_needs_processing && mHeaderTimescale != 0 &&
520 AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
521 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
522 // Elst has to be processed only the first time this function is called.
523 track->elst_needs_processing = false;
524
525 if (track->elst_segment_duration > INT64_MAX) {
526 return;
527 }
528 int64_t segment_duration = track->elst_segment_duration;
529 int64_t media_time = track->elst_media_time;
530 int64_t halfscale = track->timescale / 2;
531
532 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
533 ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
534 segment_duration, media_time,
535 halfscale, mHeaderTimescale, track->timescale);
536
537 if ((uint32_t)samplerate != track->timescale){
538 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
539 samplerate);
540 }
541 // Both delay and paddingsamples have to be set inorder for either to be
542 // effective in the lower layers.
543 int64_t delay = 0;
544 if (media_time > 0) { // Gapless playback
545 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
546 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
547 __builtin_add_overflow(delay, halfscale, &delay) ||
548 (delay /= track->timescale, false) ||
549 delay > INT32_MAX ||
550 delay < INT32_MIN) {
551 ALOGW("ignoring edit list with bogus values");
552 return;
553 }
554 }
555 ALOGV("delay = %" PRId64, delay);
556 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
557
558 int64_t paddingsamples = 0;
559 if (segment_duration > 0) {
560 int64_t scaled_duration;
561 // scaled_duration = duration * mHeaderTimescale;
562 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
563 return;
564 }
565 ALOGV("scaled_duration = %" PRId64, scaled_duration);
566
567 int64_t segment_end;
568 int64_t padding;
569 int64_t segment_duration_e6;
570 int64_t media_time_scaled_e6;
571 int64_t media_time_scaled;
572 // padding = scaled_duration - ((segment_duration * 1000000) +
573 // ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
574 // segment_duration is based on timescale in movie header box(mdhd)
575 // media_time is based on timescale track header/media timescale
576 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
577 __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
578 __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
579 return;
580 }
581 media_time_scaled_e6 /= track->timescale;
582 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
583 || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
584 return;
585 }
586 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
587 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
588 // might be slightly shorter than the segment duration, which would make the
589 // padding negative. Clamp to zero.
590 if (padding > 0) {
591 int64_t halfscale_mht = mHeaderTimescale / 2;
592 int64_t halfscale_e6;
593 int64_t timescale_e6;
594 // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
595 // / (mHeaderTimescale * 1000000);
596 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
597 __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
598 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
599 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
600 (paddingsamples /= timescale_e6, false) ||
601 paddingsamples > INT32_MAX) {
602 return;
603 }
604 }
605 }
606 ALOGV("paddingsamples = %" PRId64, paddingsamples);
607 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
608 }
609 }();
610
611 if ((flags & kIncludeExtensiveMetaData)
612 && !track->includes_expensive_metadata) {
613 track->includes_expensive_metadata = true;
614
615 const char *mime;
616 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
617 if (!strncasecmp("video/", mime, 6)) {
618 // MPEG2 tracks do not provide CSD, so read the stream header
619 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
620 off64_t offset;
621 size_t size;
622 if (track->sampleTable->getMetaDataForSample(
623 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
624 if (size > kMaxTrackHeaderSize) {
625 size = kMaxTrackHeaderSize;
626 }
627 uint8_t header[kMaxTrackHeaderSize];
628 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
629 AMediaFormat_setBuffer(track->meta,
630 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
631 }
632 }
633 }
634
635 if (mMoofOffset > 0) {
636 int64_t duration;
637 if (AMediaFormat_getInt64(track->meta,
638 AMEDIAFORMAT_KEY_DURATION, &duration)) {
639 // nothing fancy, just pick a frame near 1/4th of the duration
640 AMediaFormat_setInt64(track->meta,
641 AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
642 }
643 } else {
644 uint32_t sampleIndex;
645 uint64_t sampleTime;
646 if (track->timescale != 0 &&
647 track->sampleTable->findThumbnailSample(&sampleIndex) == OK
648 && track->sampleTable->getMetaDataForSample(
649 sampleIndex, NULL /* offset */, NULL /* size */,
650 &sampleTime) == OK) {
651 AMediaFormat_setInt64(track->meta,
652 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
653 ((int64_t)sampleTime * 1000000) / track->timescale);
654 }
655 }
656 }
657 }
658
659 return AMediaFormat_copy(meta, track->meta);
660 }
661
readMetaData()662 status_t MPEG4Extractor::readMetaData() {
663 if (mInitCheck != NO_INIT) {
664 return mInitCheck;
665 }
666
667 off64_t offset = 0;
668 status_t err;
669 bool sawMoovOrSidx = false;
670
671 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
672 (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
673 (mItemTable != NULL) && mItemTable->isValid()))) {
674 off64_t orig_offset = offset;
675 err = parseChunk(&offset, 0);
676
677 if (err != OK && err != UNKNOWN_ERROR) {
678 break;
679 } else if (offset <= orig_offset) {
680 // only continue parsing if the offset was advanced,
681 // otherwise we might end up in an infinite loop
682 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
683 err = ERROR_MALFORMED;
684 break;
685 } else if (err == UNKNOWN_ERROR) {
686 sawMoovOrSidx = true;
687 }
688 }
689
690 if ((mIsAvif || mIsHeif) && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
691 off64_t exifOffset;
692 size_t exifSize;
693 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
694 AMediaFormat_setInt64(mFileMetaData,
695 AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
696 AMediaFormat_setInt64(mFileMetaData,
697 AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
698 }
699 off64_t xmpOffset;
700 size_t xmpSize;
701 if (mItemTable->getXmpOffsetAndSize(&xmpOffset, &xmpSize) == OK) {
702 // TODO(chz): b/175717339
703 // Use a hard-coded string here instead of named keys. The keys are available
704 // only on API 31+. The mp4 extractor is part of mainline and has min_sdk_version
705 // of 29. This hard-coded string can be replaced with the named constant once
706 // the mp4 extractor is built against API 31+.
707 AMediaFormat_setInt64(mFileMetaData,
708 "xmp-offset" /*AMEDIAFORMAT_KEY_XMP_OFFSET*/, (int64_t)xmpOffset);
709 AMediaFormat_setInt64(mFileMetaData,
710 "xmp-size" /*AMEDIAFORMAT_KEY_XMP_SIZE*/, (int64_t)xmpSize);
711 }
712 for (uint32_t imageIndex = 0;
713 imageIndex < mItemTable->countImages(); imageIndex++) {
714 AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
715 if (meta == NULL) {
716 ALOGE("heif image %u has no meta!", imageIndex);
717 continue;
718 }
719 // Some heif files advertise image sequence brands (eg. 'hevc') in
720 // ftyp box, but don't have any valid tracks in them. Instead of
721 // reporting the entire file as malformed, we override the error
722 // to allow still images to be extracted.
723 if (err != OK) {
724 ALOGW("Extracting still images only");
725 err = OK;
726 }
727 mInitCheck = OK;
728
729 ALOGV("adding %s image track %u", mIsHeif ? "HEIF" : "AVIF", imageIndex);
730 Track *track = new Track;
731 if (mLastTrack != NULL) {
732 mLastTrack->next = track;
733 } else {
734 mFirstTrack = track;
735 }
736 mLastTrack = track;
737
738 track->meta = meta;
739 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
740 track->timescale = 1000000;
741 }
742 }
743
744 if (mInitCheck == OK) {
745 if (findTrackByMimePrefix("video/") != NULL) {
746 AMediaFormat_setString(mFileMetaData,
747 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
748 } else if (findTrackByMimePrefix("audio/") != NULL) {
749 AMediaFormat_setString(mFileMetaData,
750 AMEDIAFORMAT_KEY_MIME, "audio/mp4");
751 } else if (findTrackByMimePrefix(
752 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
753 AMediaFormat_setString(mFileMetaData,
754 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
755 } else if (findTrackByMimePrefix(
756 MEDIA_MIMETYPE_IMAGE_AVIF) != NULL) {
757 AMediaFormat_setString(mFileMetaData,
758 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_IMAGE_AVIF);
759 } else {
760 AMediaFormat_setString(mFileMetaData,
761 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
762 }
763 } else {
764 mInitCheck = err;
765 }
766
767 CHECK_NE(err, (status_t)NO_INIT);
768
769 // copy pssh data into file metadata
770 uint64_t psshsize = 0;
771 for (size_t i = 0; i < mPssh.size(); i++) {
772 psshsize += 20 + mPssh[i].datalen;
773 }
774 if (psshsize > 0 && psshsize <= UINT32_MAX) {
775 char *buf = (char*)malloc(psshsize);
776 if (!buf) {
777 ALOGE("b/28471206");
778 return NO_MEMORY;
779 }
780 char *ptr = buf;
781 for (size_t i = 0; i < mPssh.size(); i++) {
782 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
783 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
784 ptr += (20 + mPssh[i].datalen);
785 }
786 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
787 free(buf);
788 }
789
790 return mInitCheck;
791 }
792
793 struct PathAdder {
PathAdderandroid::PathAdder794 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
795 : mPath(path) {
796 mPath->push(chunkType);
797 }
798
~PathAdderandroid::PathAdder799 ~PathAdder() {
800 mPath->pop();
801 }
802
803 private:
804 Vector<uint32_t> *mPath;
805
806 PathAdder(const PathAdder &);
807 PathAdder &operator=(const PathAdder &);
808 };
809
underMetaDataPath(const Vector<uint32_t> & path)810 static bool underMetaDataPath(const Vector<uint32_t> &path) {
811 return path.size() >= 5
812 && path[0] == FOURCC("moov")
813 && path[1] == FOURCC("udta")
814 && path[2] == FOURCC("meta")
815 && path[3] == FOURCC("ilst");
816 }
817
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)818 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
819 return path.size() >= 2
820 && path[0] == FOURCC("moov")
821 && path[1] == FOURCC("meta")
822 && (depth == 2
823 || (depth == 3
824 && (path[2] == FOURCC("hdlr")
825 || path[2] == FOURCC("ilst")
826 || path[2] == FOURCC("keys"))));
827 }
828
829 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)830 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
831 // delta between mpeg4 time and unix epoch time
832 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
833 if (time_1904 < INT64_MIN + delta) {
834 return false;
835 }
836 time_t time_1970 = time_1904 - delta;
837
838 char tmp[32];
839 struct tm* tm = gmtime(&time_1970);
840 if (tm != NULL &&
841 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
842 s->setTo(tmp);
843 return true;
844 }
845 return false;
846 }
847
parseChunk(off64_t * offset,int depth)848 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
849 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
850
851 if (*offset < 0) {
852 ALOGE("b/23540914");
853 return ERROR_MALFORMED;
854 }
855 if (depth > 100) {
856 ALOGE("b/27456299");
857 return ERROR_MALFORMED;
858 }
859 uint32_t hdr[2];
860 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
861 return ERROR_IO;
862 }
863 uint64_t chunk_size = ntohl(hdr[0]);
864 int32_t chunk_type = ntohl(hdr[1]);
865 off64_t data_offset = *offset + 8;
866
867 if (chunk_size == 1) {
868 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
869 return ERROR_IO;
870 }
871 chunk_size = ntoh64(chunk_size);
872 data_offset += 8;
873
874 if (chunk_size < 16) {
875 // The smallest valid chunk is 16 bytes long in this case.
876 return ERROR_MALFORMED;
877 }
878 } else if (chunk_size == 0) {
879 if (depth == 0) {
880 // atom extends to end of file
881 off64_t sourceSize;
882 if (mDataSource->getSize(&sourceSize) == OK) {
883 chunk_size = (sourceSize - *offset);
884 } else {
885 // XXX could we just pick a "sufficiently large" value here?
886 ALOGE("atom size is 0, and data source has no size");
887 return ERROR_MALFORMED;
888 }
889 } else {
890 // not allowed for non-toplevel atoms, skip it
891 *offset += 4;
892 return OK;
893 }
894 } else if (chunk_size < 8) {
895 // The smallest valid chunk is 8 bytes long.
896 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
897 return ERROR_MALFORMED;
898 }
899
900 char chunk[5];
901 MakeFourCCString(chunk_type, chunk);
902 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
903
904 if (kUseHexDump) {
905 static const char kWhitespace[] = " ";
906 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
907 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
908
909 char buffer[256];
910 size_t n = chunk_size;
911 if (n > sizeof(buffer)) {
912 n = sizeof(buffer);
913 }
914 if (mDataSource->readAt(*offset, buffer, n)
915 < (ssize_t)n) {
916 return ERROR_IO;
917 }
918
919 hexdump(buffer, n);
920 }
921
922 PathAdder autoAdder(&mPath, chunk_type);
923
924 // (data_offset - *offset) is either 8 or 16
925 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
926 if (chunk_data_size < 0) {
927 ALOGE("b/23540914");
928 return ERROR_MALFORMED;
929 }
930 if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
931 char errMsg[100];
932 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
933 ALOGE("%s (b/28615448)", errMsg);
934 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
935 return ERROR_MALFORMED;
936 }
937
938 if (chunk_type != FOURCC("cprt")
939 && chunk_type != FOURCC("covr")
940 && mPath.size() == 5 && underMetaDataPath(mPath)) {
941 off64_t stop_offset = *offset + chunk_size;
942 *offset = data_offset;
943 while (*offset < stop_offset) {
944 status_t err = parseChunk(offset, depth + 1);
945 if (err != OK) {
946 return err;
947 }
948 }
949
950 if (*offset != stop_offset) {
951 return ERROR_MALFORMED;
952 }
953
954 return OK;
955 }
956
957 switch(chunk_type) {
958 case FOURCC("moov"):
959 case FOURCC("trak"):
960 case FOURCC("mdia"):
961 case FOURCC("minf"):
962 case FOURCC("dinf"):
963 case FOURCC("stbl"):
964 case FOURCC("mvex"):
965 case FOURCC("moof"):
966 case FOURCC("traf"):
967 case FOURCC("mfra"):
968 case FOURCC("udta"):
969 case FOURCC("ilst"):
970 case FOURCC("sinf"):
971 case FOURCC("schi"):
972 case FOURCC("edts"):
973 case FOURCC("wave"):
974 {
975 if (chunk_type == FOURCC("moov") && depth != 0) {
976 ALOGE("moov: depth %d", depth);
977 return ERROR_MALFORMED;
978 }
979
980 if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
981 ALOGE("duplicate moov");
982 return ERROR_MALFORMED;
983 }
984
985 if (chunk_type == FOURCC("moof") && !mMoofFound) {
986 // store the offset of the first segment
987 mMoofFound = true;
988 mMoofOffset = *offset;
989 }
990
991 if (chunk_type == FOURCC("stbl")) {
992 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
993
994 if (mDataSource->flags()
995 & (DataSourceBase::kWantsPrefetching
996 | DataSourceBase::kIsCachingDataSource)) {
997 CachedRangedDataSource *cachedSource =
998 new CachedRangedDataSource(mDataSource);
999
1000 if (cachedSource->setCachedRange(
1001 *offset, chunk_size,
1002 true /* assume ownership on success */) == OK) {
1003 mDataSource = cachedSource;
1004 } else {
1005 delete cachedSource;
1006 }
1007 }
1008
1009 if (mLastTrack == NULL) {
1010 return ERROR_MALFORMED;
1011 }
1012
1013 mLastTrack->sampleTable = new SampleTable(mDataSource);
1014 }
1015
1016 bool isTrack = false;
1017 if (chunk_type == FOURCC("trak")) {
1018 if (depth != 1) {
1019 ALOGE("trak: depth %d", depth);
1020 return ERROR_MALFORMED;
1021 }
1022 isTrack = true;
1023
1024 ALOGV("adding new track");
1025 Track *track = new Track;
1026 if (mLastTrack) {
1027 mLastTrack->next = track;
1028 } else {
1029 mFirstTrack = track;
1030 }
1031 mLastTrack = track;
1032
1033 track->meta = AMediaFormat_new();
1034 AMediaFormat_setString(track->meta,
1035 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
1036 }
1037
1038 off64_t stop_offset = *offset + chunk_size;
1039 *offset = data_offset;
1040 while (*offset < stop_offset) {
1041
1042 // pass udata terminate
1043 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
1044 // handle the case that udta terminates with terminate code x00000000
1045 // note that 0 terminator is optional and we just handle this case.
1046 uint32_t terminate_code = 1;
1047 mDataSource->readAt(*offset, &terminate_code, 4);
1048 if (0 == terminate_code) {
1049 *offset += 4;
1050 ALOGD("Terminal code for udta");
1051 continue;
1052 } else {
1053 ALOGW("invalid udta Terminal code");
1054 }
1055 }
1056
1057 status_t err = parseChunk(offset, depth + 1);
1058 if (err != OK) {
1059 if (isTrack) {
1060 mLastTrack->skipTrack = true;
1061 break;
1062 }
1063 return err;
1064 }
1065 }
1066
1067 if (*offset != stop_offset) {
1068 return ERROR_MALFORMED;
1069 }
1070
1071 if (isTrack) {
1072 int32_t trackId;
1073 // There must be exactly one track header per track.
1074
1075 if (!AMediaFormat_getInt32(mLastTrack->meta,
1076 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1077 mLastTrack->skipTrack = true;
1078 }
1079
1080 status_t err = verifyTrack(mLastTrack);
1081 if (err != OK) {
1082 mLastTrack->skipTrack = true;
1083 }
1084
1085
1086 if (mLastTrack->skipTrack) {
1087 ALOGV("skipping this track...");
1088 Track *cur = mFirstTrack;
1089
1090 if (cur == mLastTrack) {
1091 delete cur;
1092 mFirstTrack = mLastTrack = NULL;
1093 } else {
1094 while (cur && cur->next != mLastTrack) {
1095 cur = cur->next;
1096 }
1097 if (cur) {
1098 cur->next = NULL;
1099 }
1100 delete mLastTrack;
1101 mLastTrack = cur;
1102 }
1103
1104 return OK;
1105 }
1106
1107 // place things we built elsewhere into their final locations
1108
1109 // put aggregated tx3g data into the metadata
1110 if (mLastTrack->mTx3gFilled > 0) {
1111 ALOGV("Putting %zu bytes of tx3g data into meta data",
1112 mLastTrack->mTx3gFilled);
1113 AMediaFormat_setBuffer(mLastTrack->meta,
1114 AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1115 mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1116 // drop it now to reduce our footprint
1117 free(mLastTrack->mTx3gBuffer);
1118 mLastTrack->mTx3gBuffer = NULL;
1119 mLastTrack->mTx3gFilled = 0;
1120 mLastTrack->mTx3gSize = 0;
1121 }
1122
1123 const char *mime;
1124 AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime);
1125
1126 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
1127 void *data;
1128 size_t size;
1129
1130 if (AMediaFormat_getBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
1131 &data, &size)
1132 && size >= 5) {
1133 const uint8_t *ptr = (const uint8_t *)data;
1134 const uint8_t profile = ptr[2] >> 1;
1135 const uint8_t blCompatibilityId = (ptr[4]) >> 4;
1136 bool create_two_tracks = false;
1137
1138 if (blCompatibilityId && blCompatibilityId != 15) {
1139 create_two_tracks = true;
1140 }
1141
1142 if (4 == profile || 7 == profile ||
1143 (profile >= 8 && profile < 11 && create_two_tracks)) {
1144 // we need a backward compatible track
1145 ALOGV("Adding new backward compatible track");
1146 Track *track_b = new Track;
1147
1148 track_b->timescale = mLastTrack->timescale;
1149 track_b->sampleTable = mLastTrack->sampleTable;
1150 track_b->includes_expensive_metadata =
1151 mLastTrack->includes_expensive_metadata;
1152 track_b->skipTrack = mLastTrack->skipTrack;
1153 track_b->elst_needs_processing = mLastTrack->elst_needs_processing;
1154 track_b->elst_media_time = mLastTrack->elst_media_time;
1155 track_b->elst_segment_duration = mLastTrack->elst_segment_duration;
1156 track_b->elst_shift_start_ticks = mLastTrack->elst_shift_start_ticks;
1157 track_b->elst_initial_empty_edit_ticks =
1158 mLastTrack->elst_initial_empty_edit_ticks;
1159 track_b->subsample_encryption = mLastTrack->subsample_encryption;
1160
1161 track_b->mTx3gBuffer = mLastTrack->mTx3gBuffer;
1162 track_b->mTx3gSize = mLastTrack->mTx3gSize;
1163 track_b->mTx3gFilled = mLastTrack->mTx3gFilled;
1164
1165 track_b->meta = AMediaFormat_new();
1166 AMediaFormat_copy(track_b->meta, mLastTrack->meta);
1167
1168 mLastTrack->next = track_b;
1169 track_b->next = NULL;
1170
1171 // we want to remove the csd-2 key from the metadata, but
1172 // don't have an AMediaFormat_* function to do so. Settle
1173 // for replacing this csd-2 with an empty csd-2.
1174 uint8_t emptybuffer[8] = {};
1175 AMediaFormat_setBuffer(track_b->meta, AMEDIAFORMAT_KEY_CSD_2,
1176 emptybuffer, 0);
1177
1178 if (4 == profile || 7 == profile || 8 == profile ) {
1179 AMediaFormat_setString(track_b->meta,
1180 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_HEVC);
1181 } else if (9 == profile) {
1182 AMediaFormat_setString(track_b->meta,
1183 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
1184 } else if (10 == profile) {
1185 AMediaFormat_setString(track_b->meta,
1186 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AV1);
1187 } // Should never get to else part
1188
1189 mLastTrack = track_b;
1190 }
1191 }
1192 }
1193 } else if (chunk_type == FOURCC("moov")) {
1194 mInitCheck = OK;
1195
1196 return UNKNOWN_ERROR; // Return a generic error.
1197 }
1198 break;
1199 }
1200
1201 case FOURCC("schm"):
1202 {
1203
1204 *offset += chunk_size;
1205 if (!mLastTrack) {
1206 return ERROR_MALFORMED;
1207 }
1208
1209 uint32_t scheme_type;
1210 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1211 return ERROR_IO;
1212 }
1213 scheme_type = ntohl(scheme_type);
1214 int32_t mode = kCryptoModeUnencrypted;
1215 switch(scheme_type) {
1216 case FOURCC("cbc1"):
1217 {
1218 mode = kCryptoModeAesCbc;
1219 break;
1220 }
1221 case FOURCC("cbcs"):
1222 {
1223 mode = kCryptoModeAesCbc;
1224 mLastTrack->subsample_encryption = true;
1225 break;
1226 }
1227 case FOURCC("cenc"):
1228 {
1229 mode = kCryptoModeAesCtr;
1230 break;
1231 }
1232 case FOURCC("cens"):
1233 {
1234 mode = kCryptoModeAesCtr;
1235 mLastTrack->subsample_encryption = true;
1236 break;
1237 }
1238 }
1239 if (mode != kCryptoModeUnencrypted) {
1240 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1241 }
1242 break;
1243 }
1244
1245
1246 case FOURCC("elst"):
1247 {
1248 *offset += chunk_size;
1249
1250 if (!mLastTrack) {
1251 return ERROR_MALFORMED;
1252 }
1253
1254 // See 14496-12 8.6.6
1255 uint8_t version;
1256 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1257 return ERROR_IO;
1258 }
1259
1260 uint32_t entry_count;
1261 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1262 return ERROR_IO;
1263 }
1264
1265 if (entry_count > 2) {
1266 /* We support a single entry for gapless playback or negating offset for
1267 * reordering B frames, two entries (empty edit) for start offset at the moment.
1268 */
1269 ALOGW("ignoring edit list with %d entries", entry_count);
1270 } else {
1271 off64_t entriesoffset = data_offset + 8;
1272 uint64_t segment_duration;
1273 int64_t media_time;
1274 bool empty_edit_present = false;
1275 for (int i = 0; i < entry_count; ++i) {
1276 switch (version) {
1277 case 0: {
1278 uint32_t sd;
1279 int32_t mt;
1280 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1281 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1282 return ERROR_IO;
1283 }
1284 segment_duration = sd;
1285 media_time = mt;
1286 // 4(segment duration) + 4(media time) + 4(media rate)
1287 entriesoffset += 12;
1288 break;
1289 }
1290 case 1: {
1291 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1292 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1293 return ERROR_IO;
1294 }
1295 // 8(segment duration) + 8(media time) + 4(media rate)
1296 entriesoffset += 20;
1297 break;
1298 }
1299 default:
1300 return ERROR_IO;
1301 break;
1302 }
1303 // Empty edit entry would have to be first entry.
1304 if (media_time == -1 && i == 0) {
1305 empty_edit_present = true;
1306 ALOGV("initial empty edit ticks: %" PRIu64, segment_duration);
1307 /* In movie header timescale, and needs to be converted to media timescale
1308 * after we get that from a track's 'mdhd' atom,
1309 * which at times come after 'elst'.
1310 */
1311 mLastTrack->elst_initial_empty_edit_ticks = segment_duration;
1312 } else if (media_time >= 0 && i == 0) {
1313 ALOGV("first edit list entry - from gapless playback files");
1314 mLastTrack->elst_media_time = media_time;
1315 mLastTrack->elst_segment_duration = segment_duration;
1316 ALOGV("segment_duration: %" PRIu64 " media_time: %" PRId64,
1317 segment_duration, media_time);
1318 // media_time is in media timescale as are STTS/CTTS entries.
1319 mLastTrack->elst_shift_start_ticks = media_time;
1320 } else if (empty_edit_present && i == 1) {
1321 // Process second entry only when the first entry was an empty edit entry.
1322 ALOGV("second edit list entry");
1323 mLastTrack->elst_shift_start_ticks = media_time;
1324 } else {
1325 ALOGW("for now, unsupported entry in edit list %" PRIu32, entry_count);
1326 }
1327 }
1328 // save these for later, because the elst atom might precede
1329 // the atoms that actually gives us the duration and sample rate
1330 // needed to calculate the padding and delay values
1331 mLastTrack->elst_needs_processing = true;
1332 }
1333 break;
1334 }
1335
1336 case FOURCC("frma"):
1337 {
1338 *offset += chunk_size;
1339
1340 uint32_t original_fourcc;
1341 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1342 return ERROR_IO;
1343 }
1344 original_fourcc = ntohl(original_fourcc);
1345 ALOGV("read original format: %d", original_fourcc);
1346
1347 if (mLastTrack == NULL) {
1348 return ERROR_MALFORMED;
1349 }
1350
1351 AMediaFormat_setString(mLastTrack->meta,
1352 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1353 uint32_t num_channels = 0;
1354 uint32_t sample_rate = 0;
1355 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1356 AMediaFormat_setInt32(mLastTrack->meta,
1357 AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1358 AMediaFormat_setInt32(mLastTrack->meta,
1359 AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1360 }
1361
1362 if (!mIsQT && original_fourcc == FOURCC("alac")) {
1363 off64_t tmpOffset = *offset;
1364 status_t err = parseALACSampleEntry(&tmpOffset);
1365 if (err != OK) {
1366 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1367 return err;
1368 }
1369 *offset = tmpOffset + 8;
1370 }
1371
1372 break;
1373 }
1374
1375 case FOURCC("tenc"):
1376 {
1377 *offset += chunk_size;
1378
1379 if (chunk_size < 32) {
1380 return ERROR_MALFORMED;
1381 }
1382
1383 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1384 // default IV size, 16 bytes default KeyID
1385 // (ISO 23001-7)
1386
1387 uint8_t version;
1388 if (mDataSource->readAt(data_offset, &version, sizeof(version))
1389 < (ssize_t)sizeof(version)) {
1390 return ERROR_IO;
1391 }
1392
1393 uint8_t buf[4];
1394 memset(buf, 0, 4);
1395 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1396 return ERROR_IO;
1397 }
1398
1399 if (mLastTrack == NULL) {
1400 return ERROR_MALFORMED;
1401 }
1402
1403 uint8_t defaultEncryptedByteBlock = 0;
1404 uint8_t defaultSkipByteBlock = 0;
1405 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1406 if (version == 1) {
1407 uint32_t pattern = buf[2];
1408 defaultEncryptedByteBlock = pattern >> 4;
1409 defaultSkipByteBlock = pattern & 0xf;
1410 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1411 // use (1,0) to mean "encrypt everything"
1412 defaultEncryptedByteBlock = 1;
1413 }
1414 } else if (mLastTrack->subsample_encryption) {
1415 ALOGW("subsample_encryption should be version 1");
1416 } else if (defaultAlgorithmId > 1) {
1417 // only 0 (clear) and 1 (AES-128) are valid
1418 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1419 defaultAlgorithmId = 1;
1420 }
1421
1422 memset(buf, 0, 4);
1423 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1424 return ERROR_IO;
1425 }
1426 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1427
1428 if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1429 // only unencrypted data must have 0 IV size
1430 return ERROR_MALFORMED;
1431 } else if (defaultIVSize != 0 &&
1432 defaultIVSize != 8 &&
1433 defaultIVSize != 16) {
1434 return ERROR_MALFORMED;
1435 }
1436
1437 uint8_t defaultKeyId[16];
1438
1439 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1440 return ERROR_IO;
1441 }
1442
1443 sp<ABuffer> defaultConstantIv;
1444 if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1445
1446 uint8_t ivlength;
1447 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1448 < (ssize_t)sizeof(ivlength)) {
1449 return ERROR_IO;
1450 }
1451
1452 if (ivlength != 8 && ivlength != 16) {
1453 ALOGW("unsupported IV length: %u", ivlength);
1454 return ERROR_MALFORMED;
1455 }
1456
1457 defaultConstantIv = new ABuffer(ivlength);
1458 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1459 < (ssize_t)ivlength) {
1460 return ERROR_IO;
1461 }
1462
1463 defaultConstantIv->setRange(0, ivlength);
1464 }
1465
1466 int32_t tmpAlgorithmId;
1467 if (!AMediaFormat_getInt32(mLastTrack->meta,
1468 AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1469 AMediaFormat_setInt32(mLastTrack->meta,
1470 AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1471 }
1472
1473 AMediaFormat_setInt32(mLastTrack->meta,
1474 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1475 AMediaFormat_setBuffer(mLastTrack->meta,
1476 AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1477 AMediaFormat_setInt32(mLastTrack->meta,
1478 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1479 AMediaFormat_setInt32(mLastTrack->meta,
1480 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1481 if (defaultConstantIv != NULL) {
1482 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1483 defaultConstantIv->data(), defaultConstantIv->size());
1484 }
1485 break;
1486 }
1487
1488 case FOURCC("tkhd"):
1489 {
1490 *offset += chunk_size;
1491
1492 status_t err;
1493 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1494 return err;
1495 }
1496
1497 break;
1498 }
1499
1500 case FOURCC("tref"):
1501 {
1502 off64_t stop_offset = *offset + chunk_size;
1503 *offset = data_offset;
1504 while (*offset < stop_offset) {
1505 status_t err = parseChunk(offset, depth + 1);
1506 if (err != OK) {
1507 return err;
1508 }
1509 }
1510 if (*offset != stop_offset) {
1511 return ERROR_MALFORMED;
1512 }
1513 break;
1514 }
1515
1516 case FOURCC("thmb"):
1517 {
1518 *offset += chunk_size;
1519
1520 if (mLastTrack != NULL) {
1521 // Skip thumbnail track for now since we don't have an
1522 // API to retrieve it yet.
1523 // The thumbnail track can't be accessed by negative index or time,
1524 // because each timed sample has its own corresponding thumbnail
1525 // in the thumbnail track. We'll need a dedicated API to retrieve
1526 // thumbnail at time instead.
1527 mLastTrack->skipTrack = true;
1528 }
1529
1530 break;
1531 }
1532
1533 case FOURCC("pssh"):
1534 {
1535 *offset += chunk_size;
1536
1537 PsshInfo pssh;
1538
1539 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1540 return ERROR_IO;
1541 }
1542
1543 uint32_t psshdatalen = 0;
1544 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1545 return ERROR_IO;
1546 }
1547 pssh.datalen = ntohl(psshdatalen);
1548 ALOGV("pssh data size: %d", pssh.datalen);
1549 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1550 // pssh data length exceeds size of containing box
1551 return ERROR_MALFORMED;
1552 }
1553
1554 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1555 if (pssh.data == NULL) {
1556 return ERROR_MALFORMED;
1557 }
1558 ALOGV("allocated pssh @ %p", pssh.data);
1559 ssize_t requested = (ssize_t) pssh.datalen;
1560 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1561 delete[] pssh.data;
1562 return ERROR_IO;
1563 }
1564 mPssh.push_back(pssh);
1565
1566 break;
1567 }
1568
1569 case FOURCC("mdhd"):
1570 {
1571 *offset += chunk_size;
1572
1573 if (chunk_data_size < 4 || mLastTrack == NULL) {
1574 return ERROR_MALFORMED;
1575 }
1576
1577 uint8_t version;
1578 if (mDataSource->readAt(
1579 data_offset, &version, sizeof(version))
1580 < (ssize_t)sizeof(version)) {
1581 return ERROR_IO;
1582 }
1583
1584 off64_t timescale_offset;
1585
1586 if (version == 1) {
1587 timescale_offset = data_offset + 4 + 16;
1588 } else if (version == 0) {
1589 timescale_offset = data_offset + 4 + 8;
1590 } else {
1591 return ERROR_IO;
1592 }
1593
1594 uint32_t timescale;
1595 if (mDataSource->readAt(
1596 timescale_offset, ×cale, sizeof(timescale))
1597 < (ssize_t)sizeof(timescale)) {
1598 return ERROR_IO;
1599 }
1600
1601 if (!timescale) {
1602 ALOGE("timescale should not be ZERO.");
1603 return ERROR_MALFORMED;
1604 }
1605
1606 mLastTrack->timescale = ntohl(timescale);
1607
1608 // 14496-12 says all ones means indeterminate, but some files seem to use
1609 // 0 instead. We treat both the same.
1610 int64_t duration = 0;
1611 if (version == 1) {
1612 if (mDataSource->readAt(
1613 timescale_offset + 4, &duration, sizeof(duration))
1614 < (ssize_t)sizeof(duration)) {
1615 return ERROR_IO;
1616 }
1617 if (duration != -1) {
1618 duration = ntoh64(duration);
1619 }
1620 } else {
1621 uint32_t duration32;
1622 if (mDataSource->readAt(
1623 timescale_offset + 4, &duration32, sizeof(duration32))
1624 < (ssize_t)sizeof(duration32)) {
1625 return ERROR_IO;
1626 }
1627 if (duration32 != 0xffffffff) {
1628 duration = ntohl(duration32);
1629 }
1630 }
1631 if (duration != 0 && mLastTrack->timescale != 0) {
1632 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1633 if (durationUs < 0 || durationUs > INT64_MAX) {
1634 ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1635 (long long) duration, (long long) mLastTrack->timescale);
1636 return ERROR_MALFORMED;
1637 }
1638 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1639 }
1640
1641 uint8_t lang[2];
1642 off64_t lang_offset;
1643 if (version == 1) {
1644 lang_offset = timescale_offset + 4 + 8;
1645 } else if (version == 0) {
1646 lang_offset = timescale_offset + 4 + 4;
1647 } else {
1648 return ERROR_IO;
1649 }
1650
1651 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1652 < (ssize_t)sizeof(lang)) {
1653 return ERROR_IO;
1654 }
1655
1656 // To get the ISO-639-2/T three character language code
1657 // 1 bit pad followed by 3 5-bits characters. Each character
1658 // is packed as the difference between its ASCII value and 0x60.
1659 char lang_code[4];
1660 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1661 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1662 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1663 lang_code[3] = '\0';
1664
1665 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1666
1667 break;
1668 }
1669
1670 case FOURCC("stsd"):
1671 {
1672 uint8_t buffer[8];
1673 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1674 return ERROR_MALFORMED;
1675 }
1676
1677 if (mDataSource->readAt(
1678 data_offset, buffer, 8) < 8) {
1679 return ERROR_IO;
1680 }
1681
1682 if (U32_AT(buffer) != 0) {
1683 // Should be version 0, flags 0.
1684 return ERROR_MALFORMED;
1685 }
1686
1687 uint32_t entry_count = U32_AT(&buffer[4]);
1688
1689 if (entry_count > 1) {
1690 // For 3GPP timed text, there could be multiple tx3g boxes contain
1691 // multiple text display formats. These formats will be used to
1692 // display the timed text.
1693 // For encrypted files, there may also be more than one entry.
1694 const char *mime;
1695
1696 if (mLastTrack == NULL)
1697 return ERROR_MALFORMED;
1698
1699 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1700 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1701 strcasecmp(mime, "application/octet-stream")) {
1702 // For now we only support a single type of media per track.
1703 mLastTrack->skipTrack = true;
1704 *offset += chunk_size;
1705 break;
1706 }
1707 }
1708 off64_t stop_offset = *offset + chunk_size;
1709 *offset = data_offset + 8;
1710 for (uint32_t i = 0; i < entry_count; ++i) {
1711 status_t err = parseChunk(offset, depth + 1);
1712 if (err != OK) {
1713 return err;
1714 }
1715 }
1716
1717 if (*offset != stop_offset) {
1718 return ERROR_MALFORMED;
1719 }
1720 break;
1721 }
1722 case FOURCC("mett"):
1723 {
1724 *offset += chunk_size;
1725
1726 // the absolute minimum size of a compliant mett box is 11 bytes:
1727 // 6 byte reserved, 2 byte index, null byte, one char mime_format, null byte
1728 // The resulting mime_format would be invalid at that size though.
1729 if (mLastTrack == NULL || chunk_data_size < 11) {
1730 return ERROR_MALFORMED;
1731 }
1732
1733 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1734 if (buffer.get() == NULL) {
1735 return NO_MEMORY;
1736 }
1737
1738 if (mDataSource->readAt(
1739 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1740 return ERROR_IO;
1741 }
1742
1743 // ISO-14496-12:
1744 // int8 reserved[6]; // should be all zeroes
1745 // int16_t data_reference_index;
1746 // char content_encoding[]; // null terminated, optional (= just the null byte)
1747 // char mime_format[]; // null terminated, mandatory
1748 // optional other boxes
1749 //
1750 // API < 29:
1751 // char mime_format[]; // null terminated
1752 //
1753 // API >= 29
1754 // char mime_format[]; // null terminated
1755 // char mime_format[]; // null terminated
1756
1757 // Prior to API 29, the metadata track was not compliant with ISO/IEC
1758 // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1759 // metatrack. As of API 29 and onwards, a change was made to metadata track to
1760 // make it somewhat compatible with the standard. The workaround is to write the
1761 // null-terminated mime_format string twice. This allows compliant parsers to
1762 // read the missing reserved, data_reference_index, and content_encoding fields
1763 // from the first mime_type string. The actual mime_format field would then be
1764 // read correctly from the second string. The non-compliant Android frameworks
1765 // from API 28 and earlier would still be able to read the mime_format correctly
1766 // as it would only read the first null-terminated mime_format string. To enable
1767 // reading metadata tracks generated from both the non-compliant and compliant
1768 // formats, a check needs to be done to see which format is used.
1769 const char *str = (const char*) buffer.get();
1770 size_t string_length = strnlen(str, chunk_data_size);
1771
1772 if (string_length == chunk_data_size - 1) {
1773 // This is likely a pre API 29 file, since it's a single null terminated
1774 // string filling the entire box.
1775 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, str);
1776 } else {
1777 // This might be a fully compliant metadata track, a "double mime" compatibility
1778 // track, or anything else, including a single non-terminated string, so we need
1779 // to determine the length of each string we want to parse out of the box.
1780 size_t encoding_length = strnlen(str + 8, chunk_data_size - 8);
1781 if (encoding_length + 8 >= chunk_data_size - 2) {
1782 // the encoding extends to the end of the box, so there's no mime_format
1783 return ERROR_MALFORMED;
1784 }
1785 String8 contentEncoding(str + 8, encoding_length);
1786 String8 mimeFormat(str + 8 + encoding_length + 1,
1787 chunk_data_size - 8 - encoding_length - 1);
1788 AMediaFormat_setString(mLastTrack->meta,
1789 AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1790 }
1791 break;
1792 }
1793
1794 case FOURCC("mp4a"):
1795 case FOURCC("enca"):
1796 case FOURCC("samr"):
1797 case FOURCC("sawb"):
1798 case FOURCC("Opus"):
1799 case FOURCC("twos"):
1800 case FOURCC("sowt"):
1801 case FOURCC("alac"):
1802 case FOURCC("fLaC"):
1803 case FOURCC(".mp3"):
1804 case 0x6D730055: // "ms U" mp3 audio
1805 case FOURCC("mha1"):
1806 case FOURCC("mhm1"):
1807 {
1808 if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1809
1810 if (chunk_type == FOURCC("alac")) {
1811 off64_t offsetTmp = *offset;
1812 status_t err = parseALACSampleEntry(&offsetTmp);
1813 if (err != OK) {
1814 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1815 return err;
1816 }
1817 }
1818
1819 // Ignore all atoms embedded in QT wave atom
1820 ALOGV("Ignore all atoms embedded in QT wave atom");
1821 *offset += chunk_size;
1822 break;
1823 }
1824
1825 uint8_t buffer[8 + 20];
1826 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1827 // Basic AudioSampleEntry size.
1828 return ERROR_MALFORMED;
1829 }
1830
1831 if (mDataSource->readAt(
1832 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1833 return ERROR_IO;
1834 }
1835
1836 // we can get data_ref_index value from U16_AT(&buffer[6])
1837 uint16_t version = U16_AT(&buffer[8]);
1838 uint32_t num_channels = U16_AT(&buffer[16]);
1839
1840 uint16_t sample_size = U16_AT(&buffer[18]);
1841 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1842
1843 if (mLastTrack == NULL)
1844 return ERROR_MALFORMED;
1845
1846 off64_t stop_offset = *offset + chunk_size;
1847 *offset = data_offset + sizeof(buffer);
1848
1849 if (mIsQT) {
1850 if (version == 1) {
1851 if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1852 return ERROR_IO;
1853 }
1854
1855 #if 0
1856 U32_AT(buffer); // samples per packet
1857 U32_AT(&buffer[4]); // bytes per packet
1858 U32_AT(&buffer[8]); // bytes per frame
1859 U32_AT(&buffer[12]); // bytes per sample
1860 #endif
1861 *offset += 16;
1862 } else if (version == 2) {
1863 uint8_t v2buffer[36];
1864 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1865 return ERROR_IO;
1866 }
1867
1868 #if 0
1869 U32_AT(v2buffer); // size of struct only
1870 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
1871 num_channels = U32_AT(&v2buffer[12]); // num audio channels
1872 U32_AT(&v2buffer[16]); // always 0x7f000000
1873 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
1874 U32_AT(&v2buffer[24]); // format specifc flags
1875 U32_AT(&v2buffer[28]); // const bytes per audio packet
1876 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
1877 #endif
1878 *offset += 36;
1879 }
1880 }
1881
1882 if (chunk_type != FOURCC("enca")) {
1883 // if the chunk type is enca, we'll get the type from the frma box later
1884 AMediaFormat_setString(mLastTrack->meta,
1885 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1886 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1887
1888 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1889 AMediaFormat_setInt32(mLastTrack->meta,
1890 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1891 if (chunk_type == FOURCC("twos")) {
1892 AMediaFormat_setInt32(mLastTrack->meta,
1893 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1894 }
1895 }
1896 }
1897 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1898 chunk, num_channels, sample_size, sample_rate);
1899 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1900 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1901
1902 if (chunk_type == FOURCC("Opus")) {
1903 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1904 data_offset += sizeof(buffer);
1905 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1906
1907 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1908 opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1909 return ERROR_MALFORMED;
1910 }
1911 // Read Opus Header
1912 if (mDataSource->readAt(
1913 data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1914 return ERROR_IO;
1915 }
1916
1917 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1918 // http://wiki.xiph.org/OggOpus#ID_Header
1919 strncpy((char *)opusInfo, "OpusHead", 8);
1920
1921 // Version shall be 0 as per mp4 Opus Specific Box
1922 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1923 if (opusInfo[8]) {
1924 return ERROR_MALFORMED;
1925 }
1926 // Force version to 1 as per OpusHead definition
1927 // (http://wiki.xiph.org/OggOpus#ID_Header)
1928 opusInfo[8] = 1;
1929
1930 // Read Opus Specific Box values
1931 size_t opusOffset = 10;
1932 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1933 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1934 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1935
1936 // Convert Opus Specific Box values. ParseOpusHeader expects
1937 // the values in LE, however MP4 stores these values as BE
1938 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1939 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1940 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1941 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1942
1943 static const int64_t kSeekPreRollNs = 80000000; // Fixed 80 msec
1944 static const int32_t kOpusSampleRate = 48000;
1945 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1946
1947 AMediaFormat_setBuffer(mLastTrack->meta,
1948 AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1949 AMediaFormat_setBuffer(mLastTrack->meta,
1950 AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1951 AMediaFormat_setBuffer(mLastTrack->meta,
1952 AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1953
1954 data_offset += opusInfoSize;
1955 *offset = data_offset;
1956 CHECK_EQ(*offset, stop_offset);
1957 }
1958
1959 if (!mIsQT && chunk_type == FOURCC("alac")) {
1960 data_offset += sizeof(buffer);
1961
1962 status_t err = parseALACSampleEntry(&data_offset);
1963 if (err != OK) {
1964 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1965 return err;
1966 }
1967 *offset = data_offset;
1968 CHECK_EQ(*offset, stop_offset);
1969 }
1970
1971 if (chunk_type == FOURCC("fLaC")) {
1972
1973 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
1974 // 4 for mime, 4 for blockType and BlockLen, 34 for metadata
1975 uint8_t flacInfo[4 + 4 + 34];
1976 // skipping dFla, version
1977 data_offset += sizeof(buffer) + 12;
1978 size_t flacOffset = 4;
1979 // Add flaC header mime type to CSD
1980 strncpy((char *)flacInfo, "fLaC", 4);
1981 if (mDataSource->readAt(
1982 data_offset, flacInfo + flacOffset, sizeof(flacInfo) - flacOffset) <
1983 (ssize_t)sizeof(flacInfo) - flacOffset) {
1984 return ERROR_IO;
1985 }
1986 data_offset += sizeof(flacInfo) - flacOffset;
1987
1988 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
1989 sizeof(flacInfo));
1990 *offset = data_offset;
1991 CHECK_EQ(*offset, stop_offset);
1992 }
1993
1994 while (*offset < stop_offset) {
1995 status_t err = parseChunk(offset, depth + 1);
1996 if (err != OK) {
1997 return err;
1998 }
1999 }
2000
2001 if (*offset != stop_offset) {
2002 return ERROR_MALFORMED;
2003 }
2004 break;
2005 }
2006 case FOURCC("mhaC"):
2007 {
2008 // See ISO_IEC_23008-3;2019 MHADecoderConfigurationRecord
2009 constexpr uint32_t mhac_header_size = 4 /* size */ + 4 /* boxtype 'mhaC' */
2010 + 1 /* configurationVersion */ + 1 /* mpegh3daProfileLevelIndication */
2011 + 1 /* referenceChannelLayout */ + 2 /* mpegh3daConfigLength */;
2012 uint8_t mhac_header[mhac_header_size];
2013 off64_t data_offset = *offset;
2014
2015 if (chunk_size < sizeof(mhac_header)) {
2016 return ERROR_MALFORMED;
2017 }
2018
2019 if (mDataSource->readAt(data_offset, mhac_header, sizeof(mhac_header))
2020 < (ssize_t)sizeof(mhac_header)) {
2021 return ERROR_IO;
2022 }
2023
2024 //get mpegh3daProfileLevelIndication
2025 const uint32_t mpegh3daProfileLevelIndication = mhac_header[9];
2026 AMediaFormat_setInt32(mLastTrack->meta,
2027 AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
2028 mpegh3daProfileLevelIndication);
2029
2030 //get referenceChannelLayout
2031 const uint32_t referenceChannelLayout = mhac_header[10];
2032 AMediaFormat_setInt32(mLastTrack->meta,
2033 AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
2034 referenceChannelLayout);
2035
2036 // get mpegh3daConfigLength
2037 const uint32_t mhac_config_size = U16_AT(&mhac_header[11]);
2038 if (chunk_size != sizeof(mhac_header) + mhac_config_size) {
2039 return ERROR_MALFORMED;
2040 }
2041
2042 data_offset += sizeof(mhac_header);
2043 uint8_t mhac_config[mhac_config_size];
2044 if (mDataSource->readAt(data_offset, mhac_config, sizeof(mhac_config))
2045 < (ssize_t)sizeof(mhac_config)) {
2046 return ERROR_IO;
2047 }
2048
2049 AMediaFormat_setBuffer(mLastTrack->meta,
2050 AMEDIAFORMAT_KEY_CSD_0, mhac_config, sizeof(mhac_config));
2051 data_offset += sizeof(mhac_config);
2052 *offset = data_offset;
2053 break;
2054 }
2055 case FOURCC("mhaP"):
2056 {
2057 // FDAmd_2 of ISO_IEC_23008-3;2019 MHAProfileAndLevelCompatibilitySetBox
2058 constexpr uint32_t mhap_header_size = 4 /* size */ + 4 /* boxtype 'mhaP' */
2059 + 1 /* numCompatibleSets */;
2060
2061 uint8_t mhap_header[mhap_header_size];
2062 off64_t data_offset = *offset;
2063
2064 if (chunk_size < (ssize_t)mhap_header_size) {
2065 return ERROR_MALFORMED;
2066 }
2067
2068 if (mDataSource->readAt(data_offset, mhap_header, sizeof(mhap_header))
2069 < (ssize_t)sizeof(mhap_header)) {
2070 return ERROR_IO;
2071 }
2072
2073 // mhap_compatible_sets_size = numCompatibleSets * sizeof(uint8_t)
2074 const uint32_t mhap_compatible_sets_size = mhap_header[8];
2075 if (chunk_size != sizeof(mhap_header) + mhap_compatible_sets_size) {
2076 return ERROR_MALFORMED;
2077 }
2078
2079 data_offset += sizeof(mhap_header);
2080 uint8_t mhap_compatible_sets[mhap_compatible_sets_size];
2081 if (mDataSource->readAt(
2082 data_offset, mhap_compatible_sets, sizeof(mhap_compatible_sets))
2083 < (ssize_t)sizeof(mhap_compatible_sets)) {
2084 return ERROR_IO;
2085 }
2086
2087 AMediaFormat_setBuffer(mLastTrack->meta,
2088 AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
2089 mhap_compatible_sets, sizeof(mhap_compatible_sets));
2090 data_offset += sizeof(mhap_compatible_sets);
2091 *offset = data_offset;
2092 break;
2093 }
2094 case FOURCC("mp4v"):
2095 case FOURCC("encv"):
2096 case FOURCC("s263"):
2097 case FOURCC("H263"):
2098 case FOURCC("h263"):
2099 case FOURCC("avc1"):
2100 case FOURCC("hvc1"):
2101 case FOURCC("hev1"):
2102 case FOURCC("dvav"):
2103 case FOURCC("dva1"):
2104 case FOURCC("dvhe"):
2105 case FOURCC("dvh1"):
2106 case FOURCC("dav1"):
2107 case FOURCC("av01"):
2108 case FOURCC("vp09"):
2109 {
2110 uint8_t buffer[78];
2111 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
2112 // Basic VideoSampleEntry size.
2113 return ERROR_MALFORMED;
2114 }
2115
2116 if (mDataSource->readAt(
2117 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
2118 return ERROR_IO;
2119 }
2120
2121 // we can get data_ref_index value from U16_AT(&buffer[6])
2122 uint16_t width = U16_AT(&buffer[6 + 18]);
2123 uint16_t height = U16_AT(&buffer[6 + 20]);
2124
2125 // The video sample is not standard-compliant if it has invalid dimension.
2126 // Use some default width and height value, and
2127 // let the decoder figure out the actual width and height (and thus
2128 // be prepared for INFO_FOMRAT_CHANGED event).
2129 if (width == 0) width = 352;
2130 if (height == 0) height = 288;
2131
2132 // printf("*** coding='%s' width=%d height=%d\n",
2133 // chunk, width, height);
2134
2135 if (mLastTrack == NULL)
2136 return ERROR_MALFORMED;
2137
2138 if (chunk_type != FOURCC("encv")) {
2139 // if the chunk type is encv, we'll get the type from the frma box later
2140 AMediaFormat_setString(mLastTrack->meta,
2141 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
2142 }
2143 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
2144 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
2145
2146 off64_t stop_offset = *offset + chunk_size;
2147 *offset = data_offset + sizeof(buffer);
2148 while (*offset < stop_offset) {
2149 status_t err = parseChunk(offset, depth + 1);
2150 if (err != OK) {
2151 return err;
2152 }
2153 }
2154
2155 if (*offset != stop_offset) {
2156 return ERROR_MALFORMED;
2157 }
2158 break;
2159 }
2160
2161 case FOURCC("stco"):
2162 case FOURCC("co64"):
2163 {
2164 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2165 return ERROR_MALFORMED;
2166 }
2167
2168 status_t err =
2169 mLastTrack->sampleTable->setChunkOffsetParams(
2170 chunk_type, data_offset, chunk_data_size);
2171
2172 *offset += chunk_size;
2173
2174 if (err != OK) {
2175 return err;
2176 }
2177
2178 break;
2179 }
2180
2181 case FOURCC("stsc"):
2182 {
2183 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2184 return ERROR_MALFORMED;
2185
2186 status_t err =
2187 mLastTrack->sampleTable->setSampleToChunkParams(
2188 data_offset, chunk_data_size);
2189
2190 *offset += chunk_size;
2191
2192 if (err != OK) {
2193 return err;
2194 }
2195
2196 break;
2197 }
2198
2199 case FOURCC("stsz"):
2200 case FOURCC("stz2"):
2201 {
2202 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2203 return ERROR_MALFORMED;
2204 }
2205
2206 status_t err =
2207 mLastTrack->sampleTable->setSampleSizeParams(
2208 chunk_type, data_offset, chunk_data_size);
2209
2210 *offset += chunk_size;
2211
2212 if (err != OK) {
2213 return err;
2214 }
2215
2216 adjustRawDefaultFrameSize();
2217
2218 size_t max_size;
2219 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
2220
2221 if (err != OK) {
2222 return err;
2223 }
2224
2225 if (max_size != 0) {
2226 // Assume that a given buffer only contains at most 10 chunks,
2227 // each chunk originally prefixed with a 2 byte length will
2228 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
2229 // and thus will grow by 2 bytes per chunk.
2230 if (max_size > SIZE_MAX - 10 * 2) {
2231 ALOGE("max sample size too big: %zu", max_size);
2232 return ERROR_MALFORMED;
2233 }
2234 AMediaFormat_setInt32(mLastTrack->meta,
2235 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
2236 } else {
2237 // No size was specified. Pick a conservatively large size.
2238 uint32_t width, height;
2239 if (!AMediaFormat_getInt32(mLastTrack->meta,
2240 AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
2241 !AMediaFormat_getInt32(mLastTrack->meta,
2242 AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
2243 ALOGE("No width or height, assuming worst case 1080p");
2244 width = 1920;
2245 height = 1080;
2246 } else {
2247 // A resolution was specified, check that it's not too big. The values below
2248 // were chosen so that the calculations below don't cause overflows, they're
2249 // not indicating that resolutions up to 32kx32k are actually supported.
2250 if (width > 32768 || height > 32768) {
2251 ALOGE("can't support %u x %u video", width, height);
2252 return ERROR_MALFORMED;
2253 }
2254 }
2255
2256 const char *mime;
2257 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2258 if (!strncmp(mime, "audio/", 6)) {
2259 // for audio, use 128KB
2260 max_size = 1024 * 128;
2261 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
2262 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
2263 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
2264 // AVC & HEVC requires compression ratio of at least 2, and uses
2265 // macroblocks
2266 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
2267 } else {
2268 // For all other formats there is no minimum compression
2269 // ratio. Use compression ratio of 1.
2270 max_size = width * height * 3 / 2;
2271 }
2272 // HACK: allow 10% overhead
2273 // TODO: read sample size from traf atom for fragmented MPEG4.
2274 max_size += max_size / 10;
2275 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2276 }
2277
2278 // NOTE: setting another piece of metadata invalidates any pointers (such as the
2279 // mimetype) previously obtained, so don't cache them.
2280 const char *mime;
2281 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2282 // Calculate average frame rate.
2283 if (!strncasecmp("video/", mime, 6)) {
2284 size_t nSamples = mLastTrack->sampleTable->countSamples();
2285 if (nSamples == 0) {
2286 int32_t trackId;
2287 if (AMediaFormat_getInt32(mLastTrack->meta,
2288 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2289 for (size_t i = 0; i < mTrex.size(); i++) {
2290 Trex *t = &mTrex.editItemAt(i);
2291 if (t->track_ID == (uint32_t) trackId) {
2292 if (t->default_sample_duration > 0) {
2293 int32_t frameRate =
2294 mLastTrack->timescale / t->default_sample_duration;
2295 AMediaFormat_setInt32(mLastTrack->meta,
2296 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2297 }
2298 break;
2299 }
2300 }
2301 }
2302 } else {
2303 int64_t durationUs;
2304 if (AMediaFormat_getInt64(mLastTrack->meta,
2305 AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2306 if (durationUs > 0) {
2307 int32_t frameRate = (nSamples * 1000000LL +
2308 (durationUs >> 1)) / durationUs;
2309 AMediaFormat_setInt32(mLastTrack->meta,
2310 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2311 }
2312 }
2313 ALOGV("setting frame count %zu", nSamples);
2314 AMediaFormat_setInt32(mLastTrack->meta,
2315 AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2316 }
2317 }
2318
2319 break;
2320 }
2321
2322 case FOURCC("stts"):
2323 {
2324 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2325 return ERROR_MALFORMED;
2326
2327 *offset += chunk_size;
2328
2329 if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2330 char chunk[5];
2331 MakeFourCCString(mPath[depth - 1], chunk);
2332 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2333 break;
2334 }
2335
2336 status_t err =
2337 mLastTrack->sampleTable->setTimeToSampleParams(
2338 data_offset, chunk_data_size);
2339
2340 if (err != OK) {
2341 return err;
2342 }
2343
2344 break;
2345 }
2346
2347 case FOURCC("ctts"):
2348 {
2349 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2350 return ERROR_MALFORMED;
2351
2352 *offset += chunk_size;
2353
2354 status_t err =
2355 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2356 data_offset, chunk_data_size);
2357
2358 if (err != OK) {
2359 return err;
2360 }
2361
2362 break;
2363 }
2364
2365 case FOURCC("stss"):
2366 {
2367 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2368 return ERROR_MALFORMED;
2369
2370 *offset += chunk_size;
2371
2372 status_t err =
2373 mLastTrack->sampleTable->setSyncSampleParams(
2374 data_offset, chunk_data_size);
2375
2376 if (err != OK) {
2377 return err;
2378 }
2379
2380 break;
2381 }
2382
2383 // \xA9xyz
2384 case FOURCC("\251xyz"):
2385 {
2386 *offset += chunk_size;
2387
2388 // Best case the total data length inside "\xA9xyz" box would
2389 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2390 // where "\x00\x05" is the text string length with value = 5,
2391 // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2392 // location (string) value with longitude = 0 and latitude = 0.
2393 // Since some devices encountered in the wild omit the trailing
2394 // slash, we'll allow that.
2395 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2396 return ERROR_MALFORMED;
2397 }
2398
2399 uint16_t len;
2400 if (!mDataSource->getUInt16(data_offset, &len)) {
2401 return ERROR_IO;
2402 }
2403
2404 // allow "+0+0" without trailing slash
2405 if (len < 4 || len > chunk_data_size - 4) {
2406 return ERROR_MALFORMED;
2407 }
2408 // The location string following the language code is formatted
2409 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2410 // Allocate 2 extra bytes, in case we need to add a trailing slash,
2411 // and to add a terminating 0.
2412 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2413 if (!buffer) {
2414 return NO_MEMORY;
2415 }
2416
2417 if (mDataSource->readAt(
2418 data_offset + 4, &buffer[0], len) < len) {
2419 return ERROR_IO;
2420 }
2421
2422 len = strlen(&buffer[0]);
2423 if (len < 4) {
2424 return ERROR_MALFORMED;
2425 }
2426 // Add a trailing slash if there wasn't one.
2427 if (buffer[len - 1] != '/') {
2428 buffer[len] = '/';
2429 }
2430 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2431 break;
2432 }
2433
2434 case FOURCC("esds"):
2435 {
2436 *offset += chunk_size;
2437
2438 if (chunk_data_size < 4) {
2439 return ERROR_MALFORMED;
2440 }
2441
2442 auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2443 uint8_t *buffer = tmp.get();
2444 if (buffer == NULL) {
2445 return -ENOMEM;
2446 }
2447
2448 if (mDataSource->readAt(
2449 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2450 return ERROR_IO;
2451 }
2452
2453 if (U32_AT(buffer) != 0) {
2454 // Should be version 0, flags 0.
2455 return ERROR_MALFORMED;
2456 }
2457
2458 if (mLastTrack == NULL)
2459 return ERROR_MALFORMED;
2460
2461 AMediaFormat_setBuffer(mLastTrack->meta,
2462 AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2463
2464 if (mPath.size() >= 2
2465 && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2466 // Information from the ESDS must be relied on for proper
2467 // setup of sample rate and channel count for MPEG4 Audio.
2468 // The generic header appears to only contain generic
2469 // information...
2470
2471 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2472 &buffer[4], chunk_data_size - 4);
2473
2474 if (err != OK) {
2475 return err;
2476 }
2477 }
2478 if (mPath.size() >= 2
2479 && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2480 // Check if the video is MPEG2
2481 ESDS esds(&buffer[4], chunk_data_size - 4);
2482
2483 uint8_t objectTypeIndication;
2484 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2485 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2486 AMediaFormat_setString(mLastTrack->meta,
2487 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2488 }
2489 }
2490 }
2491 break;
2492 }
2493
2494 case FOURCC("btrt"):
2495 {
2496 *offset += chunk_size;
2497 if (mLastTrack == NULL) {
2498 return ERROR_MALFORMED;
2499 }
2500
2501 uint8_t buffer[12];
2502 if (chunk_data_size != sizeof(buffer)) {
2503 return ERROR_MALFORMED;
2504 }
2505
2506 if (mDataSource->readAt(
2507 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2508 return ERROR_IO;
2509 }
2510
2511 uint32_t maxBitrate = U32_AT(&buffer[4]);
2512 uint32_t avgBitrate = U32_AT(&buffer[8]);
2513 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2514 AMediaFormat_setInt32(mLastTrack->meta,
2515 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2516 }
2517 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2518 AMediaFormat_setInt32(mLastTrack->meta,
2519 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2520 }
2521 break;
2522 }
2523
2524 case FOURCC("avcC"):
2525 {
2526 *offset += chunk_size;
2527
2528 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2529
2530 if (buffer.get() == NULL) {
2531 ALOGE("b/28471206");
2532 return NO_MEMORY;
2533 }
2534
2535 if (mDataSource->readAt(
2536 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2537 return ERROR_IO;
2538 }
2539
2540 if (mLastTrack == NULL)
2541 return ERROR_MALFORMED;
2542
2543 AMediaFormat_setBuffer(mLastTrack->meta,
2544 AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2545
2546 break;
2547 }
2548 case FOURCC("hvcC"):
2549 {
2550 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2551
2552 if (buffer.get() == NULL) {
2553 ALOGE("b/28471206");
2554 return NO_MEMORY;
2555 }
2556
2557 if (mDataSource->readAt(
2558 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2559 return ERROR_IO;
2560 }
2561
2562 if (mLastTrack == NULL)
2563 return ERROR_MALFORMED;
2564
2565 AMediaFormat_setBuffer(mLastTrack->meta,
2566 AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2567
2568 *offset += chunk_size;
2569 break;
2570 }
2571
2572 case FOURCC("vpcC"):
2573 case FOURCC("av1C"):
2574 {
2575 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2576
2577 if (buffer.get() == NULL) {
2578 ALOGE("b/28471206");
2579 return NO_MEMORY;
2580 }
2581
2582 if (mDataSource->readAt(
2583 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2584 return ERROR_IO;
2585 }
2586
2587 if (mLastTrack == NULL)
2588 return ERROR_MALFORMED;
2589
2590 AMediaFormat_setBuffer(mLastTrack->meta,
2591 AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2592
2593 *offset += chunk_size;
2594 break;
2595 }
2596
2597 case FOURCC("dvcC"):
2598 case FOURCC("dvvC"):
2599 case FOURCC("dvwC"):
2600 {
2601 if (chunk_data_size != 24) {
2602 return ERROR_MALFORMED;
2603 }
2604
2605 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2606
2607 if (buffer.get() == NULL) {
2608 ALOGE("b/28471206");
2609 return NO_MEMORY;
2610 }
2611
2612 if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2613 return ERROR_IO;
2614 }
2615
2616 if (mLastTrack == NULL)
2617 return ERROR_MALFORMED;
2618
2619 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
2620 buffer.get(), chunk_data_size);
2621 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME,
2622 MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
2623
2624 *offset += chunk_size;
2625 break;
2626 }
2627
2628 case FOURCC("d263"):
2629 {
2630 *offset += chunk_size;
2631 /*
2632 * d263 contains a fixed 7 bytes part:
2633 * vendor - 4 bytes
2634 * version - 1 byte
2635 * level - 1 byte
2636 * profile - 1 byte
2637 * optionally, "d263" box itself may contain a 16-byte
2638 * bit rate box (bitr)
2639 * average bit rate - 4 bytes
2640 * max bit rate - 4 bytes
2641 */
2642 char buffer[23];
2643 if (chunk_data_size != 7 &&
2644 chunk_data_size != 23) {
2645 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2646 return ERROR_MALFORMED;
2647 }
2648
2649 if (mDataSource->readAt(
2650 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2651 return ERROR_IO;
2652 }
2653
2654 if (mLastTrack == NULL)
2655 return ERROR_MALFORMED;
2656
2657 AMediaFormat_setBuffer(mLastTrack->meta,
2658 AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2659
2660 break;
2661 }
2662
2663 case FOURCC("meta"):
2664 {
2665 off64_t stop_offset = *offset + chunk_size;
2666 *offset = data_offset;
2667 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2668 if (!isParsingMetaKeys) {
2669 uint8_t buffer[4];
2670 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2671 *offset = stop_offset;
2672 return ERROR_MALFORMED;
2673 }
2674
2675 if (mDataSource->readAt(
2676 data_offset, buffer, 4) < 4) {
2677 *offset = stop_offset;
2678 return ERROR_IO;
2679 }
2680
2681 if (U32_AT(buffer) != 0) {
2682 // Should be version 0, flags 0.
2683
2684 // If it's not, let's assume this is one of those
2685 // apparently malformed chunks that don't have flags
2686 // and completely different semantics than what's
2687 // in the MPEG4 specs and skip it.
2688 *offset = stop_offset;
2689 return OK;
2690 }
2691 *offset += sizeof(buffer);
2692 }
2693
2694 while (*offset < stop_offset) {
2695 status_t err = parseChunk(offset, depth + 1);
2696 if (err != OK) {
2697 return err;
2698 }
2699 }
2700
2701 if (*offset != stop_offset) {
2702 return ERROR_MALFORMED;
2703 }
2704 break;
2705 }
2706
2707 case FOURCC("iloc"):
2708 case FOURCC("iinf"):
2709 case FOURCC("iprp"):
2710 case FOURCC("pitm"):
2711 case FOURCC("idat"):
2712 case FOURCC("iref"):
2713 case FOURCC("ipro"):
2714 {
2715 if (mIsHeif || mIsAvif) {
2716 if (mItemTable == NULL) {
2717 mItemTable = new ItemTable(mDataSource, mIsHeif);
2718 }
2719 status_t err = mItemTable->parse(
2720 chunk_type, data_offset, chunk_data_size);
2721 if (err != OK) {
2722 return err;
2723 }
2724 }
2725 *offset += chunk_size;
2726 break;
2727 }
2728
2729 case FOURCC("mean"):
2730 case FOURCC("name"):
2731 case FOURCC("data"):
2732 {
2733 *offset += chunk_size;
2734
2735 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2736 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2737
2738 if (err != OK) {
2739 return err;
2740 }
2741 }
2742
2743 break;
2744 }
2745
2746 case FOURCC("mvhd"):
2747 {
2748 *offset += chunk_size;
2749
2750 if (depth != 1) {
2751 ALOGE("mvhd: depth %d", depth);
2752 return ERROR_MALFORMED;
2753 }
2754 if (chunk_data_size < 32) {
2755 return ERROR_MALFORMED;
2756 }
2757
2758 uint8_t header[32];
2759 if (mDataSource->readAt(
2760 data_offset, header, sizeof(header))
2761 < (ssize_t)sizeof(header)) {
2762 return ERROR_IO;
2763 }
2764
2765 uint64_t creationTime;
2766 uint64_t duration = 0;
2767 if (header[0] == 1) {
2768 creationTime = U64_AT(&header[4]);
2769 mHeaderTimescale = U32_AT(&header[20]);
2770 duration = U64_AT(&header[24]);
2771 if (duration == 0xffffffffffffffff) {
2772 duration = 0;
2773 }
2774 } else if (header[0] != 0) {
2775 return ERROR_MALFORMED;
2776 } else {
2777 creationTime = U32_AT(&header[4]);
2778 mHeaderTimescale = U32_AT(&header[12]);
2779 uint32_t d32 = U32_AT(&header[16]);
2780 if (d32 == 0xffffffff) {
2781 d32 = 0;
2782 }
2783 duration = d32;
2784 }
2785 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2786 AMediaFormat_setInt64(mFileMetaData,
2787 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2788 }
2789
2790 String8 s;
2791 if (convertTimeToDate(creationTime, &s)) {
2792 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.string());
2793 }
2794
2795 break;
2796 }
2797
2798 case FOURCC("mehd"):
2799 {
2800 *offset += chunk_size;
2801
2802 if (chunk_data_size < 8) {
2803 return ERROR_MALFORMED;
2804 }
2805
2806 uint8_t flags[4];
2807 if (mDataSource->readAt(
2808 data_offset, flags, sizeof(flags))
2809 < (ssize_t)sizeof(flags)) {
2810 return ERROR_IO;
2811 }
2812
2813 uint64_t duration = 0;
2814 if (flags[0] == 1) {
2815 // 64 bit
2816 if (chunk_data_size < 12) {
2817 return ERROR_MALFORMED;
2818 }
2819 mDataSource->getUInt64(data_offset + 4, &duration);
2820 if (duration == 0xffffffffffffffff) {
2821 duration = 0;
2822 }
2823 } else if (flags[0] == 0) {
2824 // 32 bit
2825 uint32_t d32;
2826 mDataSource->getUInt32(data_offset + 4, &d32);
2827 if (d32 == 0xffffffff) {
2828 d32 = 0;
2829 }
2830 duration = d32;
2831 } else {
2832 return ERROR_MALFORMED;
2833 }
2834
2835 if (duration != 0 && mHeaderTimescale != 0) {
2836 AMediaFormat_setInt64(mFileMetaData,
2837 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2838 }
2839
2840 break;
2841 }
2842
2843 case FOURCC("mdat"):
2844 {
2845 mMdatFound = true;
2846
2847 *offset += chunk_size;
2848 break;
2849 }
2850
2851 case FOURCC("hdlr"):
2852 {
2853 *offset += chunk_size;
2854
2855 if (underQTMetaPath(mPath, 3)) {
2856 break;
2857 }
2858
2859 uint32_t buffer;
2860 if (mDataSource->readAt(
2861 data_offset + 8, &buffer, 4) < 4) {
2862 return ERROR_IO;
2863 }
2864
2865 uint32_t type = ntohl(buffer);
2866 // For the 3GPP file format, the handler-type within the 'hdlr' box
2867 // shall be 'text'. We also want to support 'sbtl' handler type
2868 // for a practical reason as various MPEG4 containers use it.
2869 if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2870 if (mLastTrack != NULL) {
2871 AMediaFormat_setString(mLastTrack->meta,
2872 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2873 }
2874 }
2875
2876 break;
2877 }
2878
2879 case FOURCC("keys"):
2880 {
2881 *offset += chunk_size;
2882
2883 if (underQTMetaPath(mPath, 3)) {
2884 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2885 if (err != OK) {
2886 return err;
2887 }
2888 }
2889 break;
2890 }
2891
2892 case FOURCC("trex"):
2893 {
2894 *offset += chunk_size;
2895
2896 if (chunk_data_size < 24) {
2897 return ERROR_IO;
2898 }
2899 Trex trex;
2900 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2901 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2902 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2903 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2904 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2905 return ERROR_IO;
2906 }
2907 mTrex.add(trex);
2908 break;
2909 }
2910
2911 case FOURCC("tx3g"):
2912 {
2913 if (mLastTrack == NULL)
2914 return ERROR_MALFORMED;
2915
2916 // complain about ridiculous chunks
2917 if (chunk_size > kMaxAtomSize) {
2918 return ERROR_MALFORMED;
2919 }
2920
2921 // complain about empty atoms
2922 if (chunk_data_size <= 0) {
2923 ALOGE("b/124330204");
2924 android_errorWriteLog(0x534e4554, "124330204");
2925 return ERROR_MALFORMED;
2926 }
2927
2928 // should fill buffer based on "data_offset" and "chunk_data_size"
2929 // instead of *offset and chunk_size;
2930 // but we've been feeding the extra data to consumers for multiple releases and
2931 // if those apps are compensating for it, we'd break them with such a change
2932 //
2933
2934 if (mLastTrack->mTx3gBuffer == NULL) {
2935 mLastTrack->mTx3gSize = 0;
2936 mLastTrack->mTx3gFilled = 0;
2937 }
2938 if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2939 size_t growth = kTx3gGrowth;
2940 if (growth < chunk_size) {
2941 growth = chunk_size;
2942 }
2943 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2944 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2945 ALOGE("b/124330204 - too much space");
2946 android_errorWriteLog(0x534e4554, "124330204");
2947 return ERROR_MALFORMED;
2948 }
2949 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2950 mLastTrack->mTx3gSize + growth);
2951 if (updated == NULL) {
2952 return ERROR_MALFORMED;
2953 }
2954 mLastTrack->mTx3gBuffer = updated;
2955 mLastTrack->mTx3gSize += growth;
2956 }
2957
2958 if ((size_t)(mDataSource->readAt(*offset,
2959 mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2960 chunk_size))
2961 < chunk_size) {
2962
2963 // advance read pointer so we don't end up reading this again
2964 *offset += chunk_size;
2965 return ERROR_IO;
2966 }
2967
2968 mLastTrack->mTx3gFilled += chunk_size;
2969 *offset += chunk_size;
2970 break;
2971 }
2972
2973 case FOURCC("covr"):
2974 {
2975 *offset += chunk_size;
2976
2977 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2978 chunk_data_size, data_offset);
2979
2980 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2981 return ERROR_MALFORMED;
2982 }
2983 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2984 if (buffer.get() == NULL) {
2985 ALOGE("b/28471206");
2986 return NO_MEMORY;
2987 }
2988 if (mDataSource->readAt(
2989 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2990 return ERROR_IO;
2991 }
2992 const int kSkipBytesOfDataBox = 16;
2993 if (chunk_data_size <= kSkipBytesOfDataBox) {
2994 return ERROR_MALFORMED;
2995 }
2996
2997 AMediaFormat_setBuffer(mFileMetaData,
2998 AMEDIAFORMAT_KEY_ALBUMART,
2999 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
3000
3001 break;
3002 }
3003
3004 case FOURCC("colr"):
3005 {
3006 *offset += chunk_size;
3007 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3008 // ignore otherwise
3009 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3010 status_t err = parseColorInfo(data_offset, chunk_data_size);
3011 if (err != OK) {
3012 return err;
3013 }
3014 }
3015
3016 break;
3017 }
3018
3019 case FOURCC("pasp"):
3020 {
3021 *offset += chunk_size;
3022 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3023 // ignore otherwise
3024 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3025 status_t err = parsePaspBox(data_offset, chunk_data_size);
3026 if (err != OK) {
3027 return err;
3028 }
3029 }
3030
3031 break;
3032 }
3033
3034 case FOURCC("titl"):
3035 case FOURCC("perf"):
3036 case FOURCC("auth"):
3037 case FOURCC("gnre"):
3038 case FOURCC("albm"):
3039 case FOURCC("yrrc"):
3040 {
3041 *offset += chunk_size;
3042
3043 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
3044
3045 if (err != OK) {
3046 return err;
3047 }
3048
3049 break;
3050 }
3051
3052 case FOURCC("ID32"):
3053 {
3054 *offset += chunk_size;
3055
3056 if (chunk_data_size < 6) {
3057 return ERROR_MALFORMED;
3058 }
3059
3060 parseID3v2MetaData(data_offset + 6, chunk_data_size - 6);
3061
3062 break;
3063 }
3064
3065 case FOURCC("----"):
3066 {
3067 mLastCommentMean.clear();
3068 mLastCommentName.clear();
3069 mLastCommentData.clear();
3070 *offset += chunk_size;
3071 break;
3072 }
3073
3074 case FOURCC("sidx"):
3075 {
3076 status_t err = parseSegmentIndex(data_offset, chunk_data_size);
3077 if (err != OK) {
3078 return err;
3079 }
3080 *offset += chunk_size;
3081 return UNKNOWN_ERROR; // stop parsing after sidx
3082 }
3083
3084 case FOURCC("ac-3"):
3085 {
3086 *offset += chunk_size;
3087 // bypass ac-3 if parse fail
3088 if (parseAC3SpecificBox(data_offset) != OK) {
3089 if (mLastTrack != NULL) {
3090 ALOGW("Fail to parse ac-3");
3091 mLastTrack->skipTrack = true;
3092 }
3093 }
3094 return OK;
3095 }
3096
3097 case FOURCC("ec-3"):
3098 {
3099 *offset += chunk_size;
3100 // bypass ec-3 if parse fail
3101 if (parseEAC3SpecificBox(data_offset) != OK) {
3102 if (mLastTrack != NULL) {
3103 ALOGW("Fail to parse ec-3");
3104 mLastTrack->skipTrack = true;
3105 }
3106 }
3107 return OK;
3108 }
3109
3110 case FOURCC("ac-4"):
3111 {
3112 *offset += chunk_size;
3113 // bypass ac-4 if parse fail
3114 if (parseAC4SpecificBox(data_offset) != OK) {
3115 if (mLastTrack != NULL) {
3116 ALOGW("Fail to parse ac-4");
3117 mLastTrack->skipTrack = true;
3118 }
3119 }
3120 return OK;
3121 }
3122
3123 case FOURCC("ftyp"):
3124 {
3125 if (chunk_data_size < 8 || depth != 0) {
3126 return ERROR_MALFORMED;
3127 }
3128
3129 off64_t stop_offset = *offset + chunk_size;
3130 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
3131 std::set<uint32_t> brandSet;
3132 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3133 if (i == 1) {
3134 // Skip this index, it refers to the minorVersion,
3135 // not a brand.
3136 continue;
3137 }
3138
3139 uint32_t brand;
3140 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
3141 return ERROR_MALFORMED;
3142 }
3143
3144 brand = ntohl(brand);
3145 brandSet.insert(brand);
3146 }
3147
3148 if (brandSet.count(FOURCC("qt ")) > 0) {
3149 mIsQT = true;
3150 } else {
3151 if (brandSet.count(FOURCC("mif1")) > 0
3152 && brandSet.count(FOURCC("heic")) > 0) {
3153 ALOGV("identified HEIF image");
3154
3155 mIsHeif = true;
3156 brandSet.erase(FOURCC("mif1"));
3157 brandSet.erase(FOURCC("heic"));
3158 } else if (brandSet.count(FOURCC("avif")) > 0 ||
3159 brandSet.count(FOURCC("avis")) > 0) {
3160 ALOGV("identified AVIF image");
3161 mIsAvif = true;
3162 brandSet.erase(FOURCC("avif"));
3163 brandSet.erase(FOURCC("avis"));
3164 }
3165
3166 if (!brandSet.empty()) {
3167 // This means that the file should have moov box.
3168 // It could be any iso files (mp4, heifs, etc.)
3169 mHasMoovBox = true;
3170 if (mIsHeif || mIsAvif) {
3171 ALOGV("identified %s image with other tracks", mIsHeif ? "HEIF" : "AVIF");
3172 }
3173 }
3174 }
3175
3176 *offset = stop_offset;
3177
3178 break;
3179 }
3180
3181 default:
3182 {
3183 // check if we're parsing 'ilst' for meta keys
3184 // if so, treat type as a number (key-id).
3185 if (underQTMetaPath(mPath, 3)) {
3186 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
3187 if (err != OK) {
3188 return err;
3189 }
3190 }
3191
3192 *offset += chunk_size;
3193 break;
3194 }
3195 }
3196
3197 return OK;
3198 }
3199
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)3200 status_t MPEG4Extractor::parseChannelCountSampleRate(
3201 off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
3202 // skip 16 bytes:
3203 // + 6-byte reserved,
3204 // + 2-byte data reference index,
3205 // + 8-byte reserved
3206 *offset += 16;
3207 if (!mDataSource->getUInt16(*offset, channelCount)) {
3208 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
3209 return ERROR_MALFORMED;
3210 }
3211 // skip 8 bytes:
3212 // + 2-byte channelCount,
3213 // + 2-byte sample size,
3214 // + 4-byte reserved
3215 *offset += 8;
3216 if (!mDataSource->getUInt16(*offset, sampleRate)) {
3217 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
3218 return ERROR_MALFORMED;
3219 }
3220 // skip 4 bytes:
3221 // + 2-byte sampleRate,
3222 // + 2-byte reserved
3223 *offset += 4;
3224 return OK;
3225 }
3226
parseAC4SpecificBox(off64_t offset)3227 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
3228 if (mLastTrack == NULL) {
3229 return ERROR_MALFORMED;
3230 }
3231
3232 uint16_t sampleRate, channelCount;
3233 status_t status;
3234 if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
3235 return status;
3236 }
3237 uint32_t size;
3238 // + 4-byte size
3239 // + 4-byte type
3240 // + 3-byte payload
3241 const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
3242 if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
3243 ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
3244 return ERROR_MALFORMED;
3245 }
3246
3247 // + 4-byte size
3248 offset += 4;
3249 uint32_t type;
3250 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
3251 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
3252 return ERROR_MALFORMED;
3253 }
3254
3255 // + 4-byte type
3256 offset += 4;
3257 const uint32_t kAC4SpecificBoxPayloadSize = 1176;
3258 uint8_t chunk[kAC4SpecificBoxPayloadSize];
3259 ssize_t dsiSize = size - 8; // size of box - size and type fields
3260 if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
3261 mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
3262 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
3263 return ERROR_MALFORMED;
3264 }
3265 // + size-byte payload
3266 offset += dsiSize;
3267 ABitReader br(chunk, dsiSize);
3268 AC4DSIParser parser(br);
3269 if (!parser.parse()){
3270 ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
3271 return ERROR_MALFORMED;
3272 }
3273
3274 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
3275 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3276 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3277
3278 AudioPresentationCollection presentations;
3279 // translate the AC4 presentation information to audio presentations for this track
3280 AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
3281 if (!ac4Presentations.empty()) {
3282 for (const auto& ac4Presentation : ac4Presentations) {
3283 auto& presentation = ac4Presentation.second;
3284 if (!presentation.mEnabled) {
3285 continue;
3286 }
3287 AudioPresentationV1 ap;
3288 ap.mPresentationId = presentation.mGroupIndex;
3289 ap.mProgramId = presentation.mProgramID;
3290 ap.mLanguage = presentation.mLanguage;
3291 if (presentation.mPreVirtualized) {
3292 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
3293 } else {
3294 switch (presentation.mChannelMode) {
3295 case AC4Parser::AC4Presentation::kChannelMode_Mono:
3296 case AC4Parser::AC4Presentation::kChannelMode_Stereo:
3297 ap.mMasteringIndication = MASTERED_FOR_STEREO;
3298 break;
3299 case AC4Parser::AC4Presentation::kChannelMode_3_0:
3300 case AC4Parser::AC4Presentation::kChannelMode_5_0:
3301 case AC4Parser::AC4Presentation::kChannelMode_5_1:
3302 case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
3303 case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
3304 case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
3305 case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
3306 ap.mMasteringIndication = MASTERED_FOR_SURROUND;
3307 break;
3308 case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
3309 case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
3310 case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
3311 case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
3312 case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
3313 case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
3314 case AC4Parser::AC4Presentation::kChannelMode_22_2:
3315 ap.mMasteringIndication = MASTERED_FOR_3D;
3316 break;
3317 default:
3318 ALOGE("Invalid channel mode in AC4 presentation");
3319 return ERROR_MALFORMED;
3320 }
3321 }
3322
3323 ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
3324 AC4Parser::AC4Presentation::kVisuallyImpaired);
3325 ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
3326 AC4Parser::AC4Presentation::kVoiceOver);
3327 ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
3328 if (!ap.mLanguage.empty()) {
3329 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
3330 }
3331 presentations.push_back(std::move(ap));
3332 }
3333 }
3334
3335 if (presentations.empty()) {
3336 // Clear audio presentation info in metadata.
3337 AMediaFormat_setBuffer(
3338 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3339 } else {
3340 std::ostringstream outStream(std::ios::out);
3341 serializeAudioPresentations(presentations, &outStream);
3342 AMediaFormat_setBuffer(
3343 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3344 outStream.str().data(), outStream.str().size());
3345 }
3346 return OK;
3347 }
3348
parseEAC3SpecificBox(off64_t offset)3349 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3350 if (mLastTrack == NULL) {
3351 return ERROR_MALFORMED;
3352 }
3353
3354 uint16_t sampleRate, channels;
3355 status_t status;
3356 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3357 return status;
3358 }
3359 uint32_t size;
3360 // + 4-byte size
3361 // + 4-byte type
3362 // + 3-byte payload
3363 const uint32_t kEAC3SpecificBoxMinSize = 11;
3364 // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3365 // calculated from the required bits read below as well as the maximum number of independent
3366 // and dependant sub streams you can have
3367 const uint32_t kEAC3SpecificBoxMaxSize = 152;
3368 if (!mDataSource->getUInt32(offset, &size) ||
3369 size < kEAC3SpecificBoxMinSize ||
3370 size > kEAC3SpecificBoxMaxSize) {
3371 ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3372 return ERROR_MALFORMED;
3373 }
3374
3375 offset += 4;
3376 uint32_t type;
3377 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3378 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3379 return ERROR_MALFORMED;
3380 }
3381
3382 offset += 4;
3383 uint8_t* chunk = new (std::nothrow) uint8_t[size];
3384 if (chunk == NULL) {
3385 return ERROR_MALFORMED;
3386 }
3387
3388 if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3389 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3390 delete[] chunk;
3391 return ERROR_MALFORMED;
3392 }
3393
3394 ABitReader br(chunk, size);
3395 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3396 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3397
3398 if (br.numBitsLeft() < 16) {
3399 delete[] chunk;
3400 return ERROR_MALFORMED;
3401 }
3402 unsigned data_rate = br.getBits(13);
3403 ALOGV("EAC3 data rate = %d", data_rate);
3404
3405 unsigned num_ind_sub = br.getBits(3) + 1;
3406 ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3407 if (br.numBitsLeft() < (num_ind_sub * 23)) {
3408 delete[] chunk;
3409 return ERROR_MALFORMED;
3410 }
3411
3412 unsigned channelCount = 0;
3413 for (unsigned i = 0; i < num_ind_sub; i++) {
3414 unsigned fscod = br.getBits(2);
3415 if (fscod == 3) {
3416 ALOGE("Incorrect fscod (3) in EAC3 header");
3417 delete[] chunk;
3418 return ERROR_MALFORMED;
3419 }
3420 unsigned boxSampleRate = sampleRateTable[fscod];
3421 if (boxSampleRate != sampleRate) {
3422 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3423 boxSampleRate, sampleRate);
3424 delete[] chunk;
3425 return ERROR_MALFORMED;
3426 }
3427
3428 unsigned bsid = br.getBits(5);
3429 if (bsid == 9 || bsid == 10) {
3430 ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3431 } else if (bsid > 16) {
3432 ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3433 delete[] chunk;
3434 return ERROR_MALFORMED;
3435 }
3436
3437 // skip
3438 br.skipBits(2);
3439 unsigned bsmod = br.getBits(3);
3440 unsigned acmod = br.getBits(3);
3441 unsigned lfeon = br.getBits(1);
3442 // we currently only support the first stream
3443 if (i == 0)
3444 channelCount = channelCountTable[acmod] + lfeon;
3445 ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3446
3447 br.skipBits(3);
3448 unsigned num_dep_sub = br.getBits(4);
3449 ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3450 if (num_dep_sub != 0) {
3451 if (br.numBitsLeft() < 9) {
3452 delete[] chunk;
3453 return ERROR_MALFORMED;
3454 }
3455 static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3456 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3457 unsigned chan_loc = br.getBits(9);
3458 unsigned mask = 1;
3459 for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3460 if ((chan_loc & mask) != 0) {
3461 // we currently only support the first stream
3462 if (i == 0) {
3463 channelCount++;
3464 // these are 2 channels in the mask
3465 if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3466 channelCount++;
3467 }
3468 }
3469 ALOGV(" %s", chan_loc_tbl[j]);
3470 }
3471 }
3472 } else {
3473 if (br.numBitsLeft() == 0) {
3474 delete[] chunk;
3475 return ERROR_MALFORMED;
3476 }
3477 br.skipBits(1);
3478 }
3479 }
3480
3481 if (br.numBitsLeft() != 0) {
3482 if (br.numBitsLeft() < 8) {
3483 delete[] chunk;
3484 return ERROR_MALFORMED;
3485 }
3486 unsigned mask = br.getBits(8);
3487 for (unsigned i = 0; i < 8; i++) {
3488 if (((0x1 << i) & mask) == 0)
3489 continue;
3490
3491 if (br.numBitsLeft() < 8) {
3492 delete[] chunk;
3493 return ERROR_MALFORMED;
3494 }
3495 switch (i) {
3496 case 0: {
3497 unsigned complexity = br.getBits(8);
3498 ALOGV("Found a JOC stream with complexity = %d", complexity);
3499 }break;
3500 default: {
3501 br.skipBits(8);
3502 }break;
3503 }
3504 }
3505 }
3506 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3507 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3508 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3509
3510 delete[] chunk;
3511 return OK;
3512 }
3513
parseAC3SpecificBox(off64_t offset)3514 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3515 if (mLastTrack == NULL) {
3516 return ERROR_MALFORMED;
3517 }
3518
3519 uint16_t sampleRate, channels;
3520 status_t status;
3521 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3522 return status;
3523 }
3524 uint32_t size;
3525 // + 4-byte size
3526 // + 4-byte type
3527 // + 3-byte payload
3528 const uint32_t kAC3SpecificBoxSize = 11;
3529 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3530 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3531 return ERROR_MALFORMED;
3532 }
3533
3534 offset += 4;
3535 uint32_t type;
3536 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3537 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3538 return ERROR_MALFORMED;
3539 }
3540
3541 offset += 4;
3542 const uint32_t kAC3SpecificBoxPayloadSize = 3;
3543 uint8_t chunk[kAC3SpecificBoxPayloadSize];
3544 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3545 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3546 return ERROR_MALFORMED;
3547 }
3548
3549 ABitReader br(chunk, sizeof(chunk));
3550 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3551 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3552
3553 unsigned fscod = br.getBits(2);
3554 if (fscod == 3) {
3555 ALOGE("Incorrect fscod (3) in AC3 header");
3556 return ERROR_MALFORMED;
3557 }
3558 unsigned boxSampleRate = sampleRateTable[fscod];
3559 if (boxSampleRate != sampleRate) {
3560 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3561 boxSampleRate, sampleRate);
3562 return ERROR_MALFORMED;
3563 }
3564
3565 unsigned bsid = br.getBits(5);
3566 if (bsid > 8) {
3567 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3568 return ERROR_MALFORMED;
3569 }
3570
3571 // skip
3572 br.skipBits(3); // bsmod
3573
3574 unsigned acmod = br.getBits(3);
3575 unsigned lfeon = br.getBits(1);
3576 unsigned channelCount = channelCountTable[acmod] + lfeon;
3577
3578 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3579 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3580 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3581 return OK;
3582 }
3583
parseALACSampleEntry(off64_t * offset)3584 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3585 // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3586 // Store ALAC magic cookie (decoder needs it).
3587 uint8_t alacInfo[12];
3588 off64_t data_offset = *offset;
3589
3590 if (mDataSource->readAt(
3591 data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3592 return ERROR_IO;
3593 }
3594 uint32_t size = U32_AT(&alacInfo[0]);
3595 if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3596 (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3597 (U32_AT(&alacInfo[8]) != 0)) {
3598 ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3599 size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3600 return ERROR_MALFORMED;
3601 }
3602 data_offset += sizeof(alacInfo);
3603 uint8_t cookie[size - sizeof(alacInfo)];
3604 if (mDataSource->readAt(
3605 data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3606 return ERROR_IO;
3607 }
3608
3609 uint8_t bitsPerSample = cookie[5];
3610 AMediaFormat_setInt32(mLastTrack->meta,
3611 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3612 AMediaFormat_setInt32(mLastTrack->meta,
3613 AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3614 AMediaFormat_setInt32(mLastTrack->meta,
3615 AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3616 AMediaFormat_setBuffer(mLastTrack->meta,
3617 AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3618 data_offset += sizeof(cookie);
3619 *offset = data_offset;
3620 return OK;
3621 }
3622
parseSegmentIndex(off64_t offset,size_t size)3623 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3624 ALOGV("MPEG4Extractor::parseSegmentIndex");
3625
3626 if (size < 12) {
3627 return -EINVAL;
3628 }
3629
3630 uint32_t flags;
3631 if (!mDataSource->getUInt32(offset, &flags)) {
3632 return ERROR_MALFORMED;
3633 }
3634
3635 uint32_t version = flags >> 24;
3636 flags &= 0xffffff;
3637
3638 ALOGV("sidx version %d", version);
3639
3640 uint32_t referenceId;
3641 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3642 return ERROR_MALFORMED;
3643 }
3644
3645 uint32_t timeScale;
3646 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3647 return ERROR_MALFORMED;
3648 }
3649 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3650 if (timeScale == 0)
3651 return ERROR_MALFORMED;
3652
3653 uint64_t earliestPresentationTime;
3654 uint64_t firstOffset;
3655
3656 offset += 12;
3657 size -= 12;
3658
3659 if (version == 0) {
3660 if (size < 8) {
3661 return -EINVAL;
3662 }
3663 uint32_t tmp;
3664 if (!mDataSource->getUInt32(offset, &tmp)) {
3665 return ERROR_MALFORMED;
3666 }
3667 earliestPresentationTime = tmp;
3668 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3669 return ERROR_MALFORMED;
3670 }
3671 firstOffset = tmp;
3672 offset += 8;
3673 size -= 8;
3674 } else {
3675 if (size < 16) {
3676 return -EINVAL;
3677 }
3678 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3679 return ERROR_MALFORMED;
3680 }
3681 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3682 return ERROR_MALFORMED;
3683 }
3684 offset += 16;
3685 size -= 16;
3686 }
3687 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3688
3689 if (size < 4) {
3690 return -EINVAL;
3691 }
3692
3693 uint16_t referenceCount;
3694 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3695 return ERROR_MALFORMED;
3696 }
3697 offset += 4;
3698 size -= 4;
3699 ALOGV("refcount: %d", referenceCount);
3700
3701 if (size < referenceCount * 12) {
3702 return -EINVAL;
3703 }
3704
3705 uint64_t total_duration = 0;
3706 for (unsigned int i = 0; i < referenceCount; i++) {
3707 uint32_t d1, d2, d3;
3708
3709 if (!mDataSource->getUInt32(offset, &d1) || // size
3710 !mDataSource->getUInt32(offset + 4, &d2) || // duration
3711 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3712 return ERROR_MALFORMED;
3713 }
3714
3715 if (d1 & 0x80000000) {
3716 ALOGW("sub-sidx boxes not supported yet");
3717 }
3718 bool sap = d3 & 0x80000000;
3719 uint32_t saptype = (d3 >> 28) & 7;
3720 if (!sap || (saptype != 1 && saptype != 2)) {
3721 // type 1 and 2 are sync samples
3722 ALOGW("not a stream access point, or unsupported type: %08x", d3);
3723 }
3724 total_duration += d2;
3725 offset += 12;
3726 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3727 SidxEntry se;
3728 se.mSize = d1 & 0x7fffffff;
3729 se.mDurationUs = 1000000LL * d2 / timeScale;
3730 mSidxEntries.add(se);
3731 }
3732
3733 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3734
3735 if (mLastTrack == NULL)
3736 return ERROR_MALFORMED;
3737
3738 int64_t metaDuration;
3739 if (!AMediaFormat_getInt64(mLastTrack->meta,
3740 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3741 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3742 }
3743 return OK;
3744 }
3745
parseQTMetaKey(off64_t offset,size_t size)3746 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3747 if (size < 8) {
3748 return ERROR_MALFORMED;
3749 }
3750
3751 uint32_t count;
3752 if (!mDataSource->getUInt32(offset + 4, &count)) {
3753 return ERROR_MALFORMED;
3754 }
3755
3756 if (mMetaKeyMap.size() > 0) {
3757 ALOGW("'keys' atom seen again, discarding existing entries");
3758 mMetaKeyMap.clear();
3759 }
3760
3761 off64_t keyOffset = offset + 8;
3762 off64_t stopOffset = offset + size;
3763 for (size_t i = 1; i <= count; i++) {
3764 if (keyOffset + 8 > stopOffset) {
3765 return ERROR_MALFORMED;
3766 }
3767
3768 uint32_t keySize;
3769 if (!mDataSource->getUInt32(keyOffset, &keySize)
3770 || keySize < 8
3771 || keyOffset + keySize > stopOffset) {
3772 return ERROR_MALFORMED;
3773 }
3774
3775 uint32_t type;
3776 if (!mDataSource->getUInt32(keyOffset + 4, &type)
3777 || type != FOURCC("mdta")) {
3778 return ERROR_MALFORMED;
3779 }
3780
3781 keySize -= 8;
3782 keyOffset += 8;
3783
3784 auto keyData = heapbuffer<uint8_t>(keySize);
3785 if (keyData.get() == NULL) {
3786 return ERROR_MALFORMED;
3787 }
3788 if (mDataSource->readAt(
3789 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3790 return ERROR_MALFORMED;
3791 }
3792
3793 AString key((const char *)keyData.get(), keySize);
3794 mMetaKeyMap.add(i, key);
3795
3796 keyOffset += keySize;
3797 }
3798 return OK;
3799 }
3800
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3801 status_t MPEG4Extractor::parseQTMetaVal(
3802 int32_t keyId, off64_t offset, size_t size) {
3803 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3804 if (index < 0) {
3805 // corresponding key is not present, ignore
3806 return ERROR_MALFORMED;
3807 }
3808
3809 if (size <= 16) {
3810 return ERROR_MALFORMED;
3811 }
3812 uint32_t dataSize;
3813 if (!mDataSource->getUInt32(offset, &dataSize)
3814 || dataSize > size || dataSize <= 16) {
3815 return ERROR_MALFORMED;
3816 }
3817 uint32_t atomFourCC;
3818 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3819 || atomFourCC != FOURCC("data")) {
3820 return ERROR_MALFORMED;
3821 }
3822 uint32_t dataType;
3823 if (!mDataSource->getUInt32(offset + 8, &dataType)
3824 || ((dataType & 0xff000000) != 0)) {
3825 // not well-known type
3826 return ERROR_MALFORMED;
3827 }
3828
3829 dataSize -= 16;
3830 offset += 16;
3831
3832 if (dataType == 23 && dataSize >= 4) {
3833 // BE Float32
3834 uint32_t val;
3835 if (!mDataSource->getUInt32(offset, &val)) {
3836 return ERROR_MALFORMED;
3837 }
3838 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3839 AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3840 }
3841 } else if (dataType == 67 && dataSize >= 4) {
3842 // BE signed int32
3843 uint32_t val;
3844 if (!mDataSource->getUInt32(offset, &val)) {
3845 return ERROR_MALFORMED;
3846 }
3847 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3848 AMediaFormat_setInt32(mFileMetaData,
3849 AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3850 }
3851 } else {
3852 // add more keys if needed
3853 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3854 }
3855
3856 return OK;
3857 }
3858
parseTrackHeader(off64_t data_offset,off64_t data_size)3859 status_t MPEG4Extractor::parseTrackHeader(
3860 off64_t data_offset, off64_t data_size) {
3861 if (data_size < 4) {
3862 return ERROR_MALFORMED;
3863 }
3864
3865 uint8_t version;
3866 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3867 return ERROR_IO;
3868 }
3869
3870 size_t dynSize = (version == 1) ? 36 : 24;
3871
3872 uint8_t buffer[36 + 60];
3873
3874 if (data_size != (off64_t)dynSize + 60) {
3875 return ERROR_MALFORMED;
3876 }
3877
3878 if (mDataSource->readAt(
3879 data_offset, buffer, data_size) < (ssize_t)data_size) {
3880 return ERROR_IO;
3881 }
3882
3883 int32_t id;
3884
3885 if (version == 1) {
3886 // we can get ctime value from U64_AT(&buffer[4])
3887 // we can get mtime value from U64_AT(&buffer[12])
3888 id = U32_AT(&buffer[20]);
3889 // we can get duration value from U64_AT(&buffer[28])
3890 } else if (version == 0) {
3891 // we can get ctime value from U32_AT(&buffer[4])
3892 // we can get mtime value from U32_AT(&buffer[8])
3893 id = U32_AT(&buffer[12]);
3894 // we can get duration value from U32_AT(&buffer[20])
3895 } else {
3896 return ERROR_UNSUPPORTED;
3897 }
3898
3899 if (mLastTrack == NULL)
3900 return ERROR_MALFORMED;
3901
3902 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3903
3904 size_t matrixOffset = dynSize + 16;
3905 int32_t a00 = U32_AT(&buffer[matrixOffset]);
3906 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3907 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3908 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3909
3910 #if 0
3911 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3912 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3913
3914 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3915 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3916 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3917 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3918 #endif
3919
3920 uint32_t rotationDegrees;
3921
3922 static const int32_t kFixedOne = 0x10000;
3923 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3924 // Identity, no rotation
3925 rotationDegrees = 0;
3926 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3927 rotationDegrees = 90;
3928 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3929 rotationDegrees = 270;
3930 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3931 rotationDegrees = 180;
3932 } else {
3933 ALOGW("We only support 0,90,180,270 degree rotation matrices");
3934 rotationDegrees = 0;
3935 }
3936
3937 if (rotationDegrees != 0) {
3938 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3939 }
3940
3941 // Handle presentation display size, which could be different
3942 // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3943 uint32_t width = U32_AT(&buffer[dynSize + 52]);
3944 uint32_t height = U32_AT(&buffer[dynSize + 56]);
3945 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3946 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3947
3948 return OK;
3949 }
3950
parseITunesMetaData(off64_t offset,size_t size)3951 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3952 if (size == 0) {
3953 return OK;
3954 }
3955
3956 if (size < 4 || size == SIZE_MAX) {
3957 return ERROR_MALFORMED;
3958 }
3959
3960 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3961 if (buffer == NULL) {
3962 return ERROR_MALFORMED;
3963 }
3964 if (mDataSource->readAt(
3965 offset, buffer, size) != (ssize_t)size) {
3966 delete[] buffer;
3967 buffer = NULL;
3968
3969 return ERROR_IO;
3970 }
3971
3972 uint32_t flags = U32_AT(buffer);
3973
3974 const char *metadataKey = nullptr;
3975 char chunk[5];
3976 MakeFourCCString(mPath[4], chunk);
3977 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
3978 switch ((int32_t)mPath[4]) {
3979 case FOURCC("\251alb"):
3980 {
3981 metadataKey = AMEDIAFORMAT_KEY_ALBUM;
3982 break;
3983 }
3984 case FOURCC("\251ART"):
3985 {
3986 metadataKey = AMEDIAFORMAT_KEY_ARTIST;
3987 break;
3988 }
3989 case FOURCC("aART"):
3990 {
3991 metadataKey = AMEDIAFORMAT_KEY_ALBUMARTIST;
3992 break;
3993 }
3994 case FOURCC("\251day"):
3995 {
3996 metadataKey = AMEDIAFORMAT_KEY_YEAR;
3997 break;
3998 }
3999 case FOURCC("\251nam"):
4000 {
4001 metadataKey = AMEDIAFORMAT_KEY_TITLE;
4002 break;
4003 }
4004 case FOURCC("\251wrt"):
4005 {
4006 // various open source taggers agree that the "©wrt" tag is for composer, not writer
4007 metadataKey = AMEDIAFORMAT_KEY_COMPOSER;
4008 break;
4009 }
4010 case FOURCC("covr"):
4011 {
4012 metadataKey = AMEDIAFORMAT_KEY_ALBUMART;
4013 break;
4014 }
4015 case FOURCC("gnre"):
4016 case FOURCC("\251gen"):
4017 {
4018 metadataKey = AMEDIAFORMAT_KEY_GENRE;
4019 break;
4020 }
4021 case FOURCC("cpil"):
4022 {
4023 if (size == 9 && flags == 21) {
4024 char tmp[16];
4025 sprintf(tmp, "%d",
4026 (int)buffer[size - 1]);
4027
4028 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
4029 }
4030 break;
4031 }
4032 case FOURCC("trkn"):
4033 {
4034 if (size == 16 && flags == 0) {
4035 char tmp[16];
4036 uint16_t* pTrack = (uint16_t*)&buffer[10];
4037 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
4038 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
4039
4040 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4041 }
4042 break;
4043 }
4044 case FOURCC("disk"):
4045 {
4046 if ((size == 14 || size == 16) && flags == 0) {
4047 char tmp[16];
4048 uint16_t* pDisc = (uint16_t*)&buffer[10];
4049 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
4050 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
4051
4052 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
4053 }
4054 break;
4055 }
4056 case FOURCC("----"):
4057 {
4058 buffer[size] = '\0';
4059 switch (mPath[5]) {
4060 case FOURCC("mean"):
4061 mLastCommentMean.setTo((const char *)buffer + 4);
4062 break;
4063 case FOURCC("name"):
4064 mLastCommentName.setTo((const char *)buffer + 4);
4065 break;
4066 case FOURCC("data"):
4067 if (size < 8) {
4068 delete[] buffer;
4069 buffer = NULL;
4070 ALOGE("b/24346430");
4071 return ERROR_MALFORMED;
4072 }
4073 mLastCommentData.setTo((const char *)buffer + 8);
4074 break;
4075 }
4076
4077 // Once we have a set of mean/name/data info, go ahead and process
4078 // it to see if its something we are interested in. Whether or not
4079 // were are interested in the specific tag, make sure to clear out
4080 // the set so we can be ready to process another tuple should one
4081 // show up later in the file.
4082 if ((mLastCommentMean.length() != 0) &&
4083 (mLastCommentName.length() != 0) &&
4084 (mLastCommentData.length() != 0)) {
4085
4086 if (mLastCommentMean == "com.apple.iTunes"
4087 && mLastCommentName == "iTunSMPB") {
4088 int32_t delay, padding;
4089 if (sscanf(mLastCommentData,
4090 " %*x %x %x %*x", &delay, &padding) == 2) {
4091 if (mLastTrack == NULL) {
4092 delete[] buffer;
4093 return ERROR_MALFORMED;
4094 }
4095
4096 AMediaFormat_setInt32(mLastTrack->meta,
4097 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
4098 AMediaFormat_setInt32(mLastTrack->meta,
4099 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
4100 }
4101 }
4102
4103 mLastCommentMean.clear();
4104 mLastCommentName.clear();
4105 mLastCommentData.clear();
4106 }
4107 break;
4108 }
4109
4110 default:
4111 break;
4112 }
4113
4114 void *tmpData;
4115 size_t tmpDataSize;
4116 const char *s;
4117 if (size >= 8 && metadataKey &&
4118 !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
4119 !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
4120 if (!strcmp(metadataKey, "albumart")) {
4121 AMediaFormat_setBuffer(mFileMetaData, metadataKey,
4122 buffer + 8, size - 8);
4123 } else if (!strcmp(metadataKey, AMEDIAFORMAT_KEY_GENRE)) {
4124 if (flags == 0) {
4125 // uint8_t genre code, iTunes genre codes are
4126 // the standard id3 codes, except they start
4127 // at 1 instead of 0 (e.g. Pop is 14, not 13)
4128 // We use standard id3 numbering, so subtract 1.
4129 int genrecode = (int)buffer[size - 1];
4130 genrecode--;
4131 if (genrecode < 0) {
4132 genrecode = 255; // reserved for 'unknown genre'
4133 }
4134 char genre[10];
4135 sprintf(genre, "%d", genrecode);
4136
4137 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
4138 } else if (flags == 1) {
4139 // custom genre string
4140 buffer[size] = '\0';
4141
4142 AMediaFormat_setString(mFileMetaData,
4143 metadataKey, (const char *)buffer + 8);
4144 }
4145 } else {
4146 buffer[size] = '\0';
4147
4148 AMediaFormat_setString(mFileMetaData,
4149 metadataKey, (const char *)buffer + 8);
4150 }
4151 }
4152
4153 delete[] buffer;
4154 buffer = NULL;
4155
4156 return OK;
4157 }
4158
parseColorInfo(off64_t offset,size_t size)4159 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
4160 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
4161 return ERROR_MALFORMED;
4162 }
4163
4164 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4165 if (buffer == NULL) {
4166 return ERROR_MALFORMED;
4167 }
4168 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4169 delete[] buffer;
4170 buffer = NULL;
4171
4172 return ERROR_IO;
4173 }
4174
4175 int32_t type = U32_AT(&buffer[0]);
4176 if ((type == FOURCC("nclx") && size >= 11)
4177 || (type == FOURCC("nclc") && size >= 10)) {
4178 // only store the first color specification
4179 int32_t existingColor;
4180 if (!AMediaFormat_getInt32(mLastTrack->meta,
4181 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
4182 int32_t primaries = U16_AT(&buffer[4]);
4183 int32_t isotransfer = U16_AT(&buffer[6]);
4184 int32_t coeffs = U16_AT(&buffer[8]);
4185 bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
4186
4187 int32_t range = 0;
4188 int32_t standard = 0;
4189 int32_t transfer = 0;
4190 ColorUtils::convertIsoColorAspectsToPlatformAspects(
4191 primaries, isotransfer, coeffs, fullRange,
4192 &range, &standard, &transfer);
4193
4194 if (range != 0) {
4195 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
4196 }
4197 if (standard != 0) {
4198 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
4199 }
4200 if (transfer != 0) {
4201 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
4202 }
4203 }
4204 }
4205
4206 delete[] buffer;
4207 buffer = NULL;
4208
4209 return OK;
4210 }
4211
parsePaspBox(off64_t offset,size_t size)4212 status_t MPEG4Extractor::parsePaspBox(off64_t offset, size_t size) {
4213 if (size < 8 || size == SIZE_MAX || mLastTrack == NULL) {
4214 return ERROR_MALFORMED;
4215 }
4216
4217 uint32_t data[2]; // hSpacing, vSpacing
4218 if (mDataSource->readAt(offset, data, 8) < 8) {
4219 return ERROR_IO;
4220 }
4221 uint32_t hSpacing = ntohl(data[0]);
4222 uint32_t vSpacing = ntohl(data[1]);
4223
4224 if (hSpacing != 0 && vSpacing != 0) {
4225 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_WIDTH, hSpacing);
4226 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_HEIGHT, vSpacing);
4227 }
4228
4229 return OK;
4230 }
4231
parse3GPPMetaData(off64_t offset,size_t size,int depth)4232 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
4233 if (size < 4 || size == SIZE_MAX) {
4234 return ERROR_MALFORMED;
4235 }
4236
4237 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4238 if (buffer == NULL) {
4239 return ERROR_MALFORMED;
4240 }
4241 if (mDataSource->readAt(
4242 offset, buffer, size) != (ssize_t)size) {
4243 delete[] buffer;
4244 buffer = NULL;
4245
4246 return ERROR_IO;
4247 }
4248
4249 const char *metadataKey = nullptr;
4250 switch (mPath[depth]) {
4251 case FOURCC("titl"):
4252 {
4253 metadataKey = "title";
4254 break;
4255 }
4256 case FOURCC("perf"):
4257 {
4258 metadataKey = "artist";
4259 break;
4260 }
4261 case FOURCC("auth"):
4262 {
4263 metadataKey = "writer";
4264 break;
4265 }
4266 case FOURCC("gnre"):
4267 {
4268 metadataKey = "genre";
4269 break;
4270 }
4271 case FOURCC("albm"):
4272 {
4273 if (buffer[size - 1] != '\0') {
4274 char tmp[4];
4275 sprintf(tmp, "%u", buffer[size - 1]);
4276
4277 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4278 }
4279
4280 metadataKey = "album";
4281 break;
4282 }
4283 case FOURCC("yrrc"):
4284 {
4285 if (size < 6) {
4286 delete[] buffer;
4287 buffer = NULL;
4288 ALOGE("b/62133227");
4289 android_errorWriteLog(0x534e4554, "62133227");
4290 return ERROR_MALFORMED;
4291 }
4292 char tmp[5];
4293 uint16_t year = U16_AT(&buffer[4]);
4294
4295 if (year < 10000) {
4296 sprintf(tmp, "%u", year);
4297
4298 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
4299 }
4300 break;
4301 }
4302
4303 default:
4304 break;
4305 }
4306
4307 if (metadataKey) {
4308 bool isUTF8 = true; // Common case
4309 char16_t *framedata = NULL;
4310 int len16 = 0; // Number of UTF-16 characters
4311
4312 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
4313 if (size < 6) {
4314 delete[] buffer;
4315 buffer = NULL;
4316 return ERROR_MALFORMED;
4317 }
4318
4319 if (size - 6 >= 4) {
4320 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
4321 framedata = (char16_t *)(buffer + 6);
4322 if (0xfffe == *framedata) {
4323 // endianness marker (BOM) doesn't match host endianness
4324 for (int i = 0; i < len16; i++) {
4325 framedata[i] = bswap_16(framedata[i]);
4326 }
4327 // BOM is now swapped to 0xfeff, we will execute next block too
4328 }
4329
4330 if (0xfeff == *framedata) {
4331 // Remove the BOM
4332 framedata++;
4333 len16--;
4334 isUTF8 = false;
4335 }
4336 // else normal non-zero-length UTF-8 string
4337 // we can't handle UTF-16 without BOM as there is no other
4338 // indication of encoding.
4339 }
4340
4341 if (isUTF8) {
4342 buffer[size] = 0;
4343 AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
4344 } else {
4345 // Convert from UTF-16 string to UTF-8 string.
4346 String8 tmpUTF8str(framedata, len16);
4347 AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.string());
4348 }
4349 }
4350
4351 delete[] buffer;
4352 buffer = NULL;
4353
4354 return OK;
4355 }
4356
parseID3v2MetaData(off64_t offset,uint64_t size)4357 void MPEG4Extractor::parseID3v2MetaData(off64_t offset, uint64_t size) {
4358 uint8_t *buffer = new (std::nothrow) uint8_t[size];
4359 if (buffer == NULL) {
4360 return;
4361 }
4362 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4363 delete[] buffer;
4364 buffer = NULL;
4365 return;
4366 }
4367
4368 ID3 id3(buffer, size, true /* ignorev1 */);
4369 delete[] buffer;
4370
4371 if (id3.isValid()) {
4372 struct Map {
4373 const char *key;
4374 const char *tag1;
4375 const char *tag2;
4376 };
4377 static const Map kMap[] = {
4378 { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4379 { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4380 { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4381 { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4382 { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4383 { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4384 { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4385 { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4386 { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4387 { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4388 { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4389 };
4390 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4391
4392 for (size_t i = 0; i < kNumMapEntries; ++i) {
4393 const char *ss;
4394 if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4395 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4396 if (it->done()) {
4397 delete it;
4398 it = new ID3::Iterator(id3, kMap[i].tag2);
4399 }
4400
4401 if (it->done()) {
4402 delete it;
4403 continue;
4404 }
4405
4406 String8 s;
4407 it->getString(&s);
4408 delete it;
4409
4410 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4411 }
4412 }
4413
4414 size_t dataSize;
4415 String8 mime;
4416 const void *data = id3.getAlbumArt(&dataSize, &mime);
4417
4418 if (data) {
4419 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4420 }
4421 }
4422 }
4423
getTrack(size_t index)4424 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4425 status_t err;
4426 if ((err = readMetaData()) != OK) {
4427 return NULL;
4428 }
4429
4430 Track *track = mFirstTrack;
4431 while (index > 0) {
4432 if (track == NULL) {
4433 return NULL;
4434 }
4435
4436 track = track->next;
4437 --index;
4438 }
4439
4440 if (track == NULL) {
4441 return NULL;
4442 }
4443
4444
4445 Trex *trex = NULL;
4446 int32_t trackId;
4447 if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4448 for (size_t i = 0; i < mTrex.size(); i++) {
4449 Trex *t = &mTrex.editItemAt(i);
4450 if (t->track_ID == (uint32_t) trackId) {
4451 trex = t;
4452 break;
4453 }
4454 }
4455 } else {
4456 ALOGE("b/21657957");
4457 return NULL;
4458 }
4459
4460 ALOGV("getTrack called, pssh: %zu", mPssh.size());
4461
4462 const char *mime;
4463 if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4464 return NULL;
4465 }
4466 sp<ItemTable> itemTable;
4467 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4468 void *data;
4469 size_t size;
4470 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4471 return NULL;
4472 }
4473
4474 const uint8_t *ptr = (const uint8_t *)data;
4475
4476 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
4477 return NULL;
4478 }
4479 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4480 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4481 void *data;
4482 size_t size;
4483 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4484 return NULL;
4485 }
4486
4487 const uint8_t *ptr = (const uint8_t *)data;
4488
4489 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
4490 return NULL;
4491 }
4492 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4493 itemTable = mItemTable;
4494 }
4495 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4496 void *data;
4497 size_t size;
4498 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)
4499 || size != 24) {
4500 return NULL;
4501 }
4502
4503 const uint8_t *ptr = (const uint8_t *)data;
4504 // dv_major.dv_minor Should be 1.0 or 2.1
4505 if ((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)) {
4506 return NULL;
4507 }
4508 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)
4509 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4510 void *data;
4511 size_t size;
4512 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4513 return NULL;
4514 }
4515
4516 const uint8_t *ptr = (const uint8_t *)data;
4517
4518 if (size < 4 || ptr[0] != 0x81) { // configurationVersion == 1
4519 return NULL;
4520 }
4521 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4522 itemTable = mItemTable;
4523 }
4524 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4525 void *data;
4526 size_t size;
4527 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4528 return NULL;
4529 }
4530
4531 const uint8_t *ptr = (const uint8_t *)data;
4532
4533 if (size < 5 || ptr[0] != 0x01) { // configurationVersion == 1
4534 return NULL;
4535 }
4536 }
4537
4538 ALOGV("track->elst_shift_start_ticks :%" PRIu64, track->elst_shift_start_ticks);
4539
4540 uint64_t elst_initial_empty_edit_ticks = 0;
4541 if (mHeaderTimescale != 0) {
4542 // Convert empty_edit_ticks from movie timescale to media timescale.
4543 uint64_t elst_initial_empty_edit_ticks_mul = 0, elst_initial_empty_edit_ticks_add = 0;
4544 if (__builtin_mul_overflow(track->elst_initial_empty_edit_ticks, track->timescale,
4545 &elst_initial_empty_edit_ticks_mul) ||
4546 __builtin_add_overflow(elst_initial_empty_edit_ticks_mul, (mHeaderTimescale / 2),
4547 &elst_initial_empty_edit_ticks_add)) {
4548 ALOGE("track->elst_initial_empty_edit_ticks overflow");
4549 return nullptr;
4550 }
4551 elst_initial_empty_edit_ticks = elst_initial_empty_edit_ticks_add / mHeaderTimescale;
4552 }
4553 ALOGV("elst_initial_empty_edit_ticks in MediaTimeScale :%" PRIu64,
4554 elst_initial_empty_edit_ticks);
4555
4556 MPEG4Source* source =
4557 new MPEG4Source(track->meta, mDataSource, track->timescale, track->sampleTable,
4558 mSidxEntries, trex, mMoofOffset, itemTable,
4559 track->elst_shift_start_ticks, elst_initial_empty_edit_ticks);
4560 if (source->init() != OK) {
4561 delete source;
4562 return NULL;
4563 }
4564 return source;
4565 }
4566
4567 // static
verifyTrack(Track * track)4568 status_t MPEG4Extractor::verifyTrack(Track *track) {
4569 const char *mime;
4570 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4571
4572 void *data;
4573 size_t size;
4574 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4575 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4576 return ERROR_MALFORMED;
4577 }
4578 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4579 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4580 return ERROR_MALFORMED;
4581 }
4582 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4583 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4584 return ERROR_MALFORMED;
4585 }
4586 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4587 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4588 return ERROR_MALFORMED;
4589 }
4590 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4591 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4592 return ERROR_MALFORMED;
4593 }
4594 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4595 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4596 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4597 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4598 return ERROR_MALFORMED;
4599 }
4600 }
4601
4602 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4603 // Make sure we have all the metadata we need.
4604 ALOGE("stbl atom missing/invalid.");
4605 return ERROR_MALFORMED;
4606 }
4607
4608 if (track->timescale == 0) {
4609 ALOGE("timescale invalid.");
4610 return ERROR_MALFORMED;
4611 }
4612
4613 return OK;
4614 }
4615
4616 typedef enum {
4617 //AOT_NONE = -1,
4618 //AOT_NULL_OBJECT = 0,
4619 //AOT_AAC_MAIN = 1, /**< Main profile */
4620 AOT_AAC_LC = 2, /**< Low Complexity object */
4621 //AOT_AAC_SSR = 3,
4622 //AOT_AAC_LTP = 4,
4623 AOT_SBR = 5,
4624 //AOT_AAC_SCAL = 6,
4625 //AOT_TWIN_VQ = 7,
4626 //AOT_CELP = 8,
4627 //AOT_HVXC = 9,
4628 //AOT_RSVD_10 = 10, /**< (reserved) */
4629 //AOT_RSVD_11 = 11, /**< (reserved) */
4630 //AOT_TTSI = 12, /**< TTSI Object */
4631 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
4632 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
4633 //AOT_GEN_MIDI = 15, /**< General MIDI object */
4634 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4635 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
4636 //AOT_RSVD_18 = 18, /**< (reserved) */
4637 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
4638 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
4639 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
4640 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
4641 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
4642 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
4643 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
4644 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
4645 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
4646 //AOT_RSVD_28 = 28, /**< might become SSC */
4647 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
4648 //AOT_MPEGS = 30, /**< MPEG Surround */
4649
4650 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
4651
4652 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
4653 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
4654 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
4655 //AOT_RSVD_35 = 35, /**< might become DST */
4656 //AOT_RSVD_36 = 36, /**< might become ALS */
4657 //AOT_AAC_SLS = 37, /**< AAC + SLS */
4658 //AOT_SLS = 38, /**< SLS */
4659 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
4660
4661 AOT_USAC = 42, /**< USAC */
4662 //AOT_SAOC = 43, /**< SAOC */
4663 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
4664
4665 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
4666 } AUDIO_OBJECT_TYPE;
4667
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4668 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4669 const void *esds_data, size_t esds_size) {
4670 ESDS esds(esds_data, esds_size);
4671
4672 uint8_t objectTypeIndication;
4673 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4674 return ERROR_MALFORMED;
4675 }
4676
4677 if (objectTypeIndication == 0xe1) {
4678 // This isn't MPEG4 audio at all, it's QCELP 14k...
4679 if (mLastTrack == NULL)
4680 return ERROR_MALFORMED;
4681
4682 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4683 return OK;
4684 }
4685
4686 if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4687 // mp3 audio
4688 if (mLastTrack == NULL)
4689 return ERROR_MALFORMED;
4690
4691 AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4692 return OK;
4693 }
4694
4695 if (mLastTrack != NULL) {
4696 uint32_t maxBitrate = 0;
4697 uint32_t avgBitrate = 0;
4698 esds.getBitRate(&maxBitrate, &avgBitrate);
4699 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4700 AMediaFormat_setInt32(mLastTrack->meta,
4701 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4702 }
4703 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4704 AMediaFormat_setInt32(mLastTrack->meta,
4705 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4706 }
4707 }
4708
4709 const uint8_t *csd;
4710 size_t csd_size;
4711 if (esds.getCodecSpecificInfo(
4712 (const void **)&csd, &csd_size) != OK) {
4713 return ERROR_MALFORMED;
4714 }
4715
4716 if (kUseHexDump) {
4717 printf("ESD of size %zu\n", csd_size);
4718 hexdump(csd, csd_size);
4719 }
4720
4721 if (csd_size == 0) {
4722 // There's no further information, i.e. no codec specific data
4723 // Let's assume that the information provided in the mpeg4 headers
4724 // is accurate and hope for the best.
4725
4726 return OK;
4727 }
4728
4729 if (csd_size < 2) {
4730 return ERROR_MALFORMED;
4731 }
4732
4733 if (objectTypeIndication == 0xdd) {
4734 // vorbis audio
4735 if (csd[0] != 0x02) {
4736 return ERROR_MALFORMED;
4737 }
4738
4739 // codecInfo starts with two lengths, len1 and len2, that are
4740 // "Xiph-style-lacing encoded"..
4741
4742 size_t offset = 1;
4743 size_t len1 = 0;
4744 while (offset < csd_size && csd[offset] == 0xff) {
4745 if (__builtin_add_overflow(len1, 0xff, &len1)) {
4746 return ERROR_MALFORMED;
4747 }
4748 ++offset;
4749 }
4750 if (offset >= csd_size) {
4751 return ERROR_MALFORMED;
4752 }
4753 if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4754 return ERROR_MALFORMED;
4755 }
4756 ++offset;
4757 if (len1 == 0) {
4758 return ERROR_MALFORMED;
4759 }
4760
4761 size_t len2 = 0;
4762 while (offset < csd_size && csd[offset] == 0xff) {
4763 if (__builtin_add_overflow(len2, 0xff, &len2)) {
4764 return ERROR_MALFORMED;
4765 }
4766 ++offset;
4767 }
4768 if (offset >= csd_size) {
4769 return ERROR_MALFORMED;
4770 }
4771 if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4772 return ERROR_MALFORMED;
4773 }
4774 ++offset;
4775 if (len2 == 0) {
4776 return ERROR_MALFORMED;
4777 }
4778 if (offset + len1 > csd_size || csd[offset] != 0x01) {
4779 return ERROR_MALFORMED;
4780 }
4781
4782 if (mLastTrack == NULL) {
4783 return ERROR_MALFORMED;
4784 }
4785 // formerly kKeyVorbisInfo
4786 AMediaFormat_setBuffer(mLastTrack->meta,
4787 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4788
4789 if (__builtin_add_overflow(offset, len1, &offset) ||
4790 offset >= csd_size || csd[offset] != 0x03) {
4791 return ERROR_MALFORMED;
4792 }
4793
4794 if (__builtin_add_overflow(offset, len2, &offset) ||
4795 offset >= csd_size || csd[offset] != 0x05) {
4796 return ERROR_MALFORMED;
4797 }
4798
4799 // formerly kKeyVorbisBooks
4800 AMediaFormat_setBuffer(mLastTrack->meta,
4801 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4802 AMediaFormat_setString(mLastTrack->meta,
4803 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4804
4805 return OK;
4806 }
4807
4808 static uint32_t kSamplingRate[] = {
4809 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4810 16000, 12000, 11025, 8000, 7350
4811 };
4812
4813 ABitReader br(csd, csd_size);
4814 uint32_t objectType = br.getBits(5);
4815
4816 if (objectType == AOT_ESCAPE) { // AAC-ELD => additional 6 bits
4817 objectType = 32 + br.getBits(6);
4818 }
4819
4820 if (mLastTrack == NULL)
4821 return ERROR_MALFORMED;
4822
4823 //keep AOT type
4824 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4825
4826 uint32_t freqIndex = br.getBits(4);
4827
4828 int32_t sampleRate = 0;
4829 int32_t numChannels = 0;
4830 if (freqIndex == 15) {
4831 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4832 sampleRate = br.getBits(24);
4833 numChannels = br.getBits(4);
4834 } else {
4835 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4836 numChannels = br.getBits(4);
4837
4838 if (freqIndex == 13 || freqIndex == 14) {
4839 return ERROR_MALFORMED;
4840 }
4841
4842 sampleRate = kSamplingRate[freqIndex];
4843 }
4844
4845 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4846 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4847 uint32_t extFreqIndex = br.getBits(4);
4848 if (extFreqIndex == 15) {
4849 if (csd_size < 8) {
4850 return ERROR_MALFORMED;
4851 }
4852 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4853 br.skipBits(24); // extSampleRate
4854 } else {
4855 if (extFreqIndex == 13 || extFreqIndex == 14) {
4856 return ERROR_MALFORMED;
4857 }
4858 //extSampleRate = kSamplingRate[extFreqIndex];
4859 }
4860 //TODO: save the extension sampling rate value in meta data =>
4861 // AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4862 }
4863
4864 switch (numChannels) {
4865 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4866 case 0:
4867 case 1:// FC
4868 case 2:// FL FR
4869 case 3:// FC, FL FR
4870 case 4:// FC, FL FR, RC
4871 case 5:// FC, FL FR, SL SR
4872 case 6:// FC, FL FR, SL SR, LFE
4873 //numChannels already contains the right value
4874 break;
4875 case 11:// FC, FL FR, SL SR, RC, LFE
4876 numChannels = 7;
4877 break;
4878 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4879 case 12:// FC, FL FR, SL SR, RL RR, LFE
4880 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
4881 numChannels = 8;
4882 break;
4883 default:
4884 return ERROR_UNSUPPORTED;
4885 }
4886
4887 {
4888 if (objectType == AOT_SBR || objectType == AOT_PS) {
4889 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4890 objectType = br.getBits(5);
4891
4892 if (objectType == AOT_ESCAPE) {
4893 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4894 objectType = 32 + br.getBits(6);
4895 }
4896 }
4897 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4898 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4899 objectType == AOT_ER_BSAC) {
4900 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4901 br.skipBits(1); // frameLengthFlag
4902
4903 const int32_t dependsOnCoreCoder = br.getBits(1);
4904
4905 if (dependsOnCoreCoder ) {
4906 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4907 br.skipBits(14); // coreCoderDelay
4908 }
4909
4910 int32_t extensionFlag = -1;
4911 if (br.numBitsLeft() > 0) {
4912 extensionFlag = br.getBits(1);
4913 } else {
4914 switch (objectType) {
4915 // 14496-3 4.5.1.1 extensionFlag
4916 case AOT_AAC_LC:
4917 extensionFlag = 0;
4918 break;
4919 case AOT_ER_AAC_LC:
4920 case AOT_ER_AAC_SCAL:
4921 case AOT_ER_BSAC:
4922 case AOT_ER_AAC_LD:
4923 extensionFlag = 1;
4924 break;
4925 default:
4926 return ERROR_MALFORMED;
4927 break;
4928 }
4929 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4930 extensionFlag, objectType);
4931 }
4932
4933 if (numChannels == 0) {
4934 int32_t channelsEffectiveNum = 0;
4935 int32_t channelsNum = 0;
4936 if (br.numBitsLeft() < 32) {
4937 return ERROR_MALFORMED;
4938 }
4939 br.skipBits(4); // ElementInstanceTag
4940 br.skipBits(2); // Profile
4941 br.skipBits(4); // SamplingFrequencyIndex
4942 const int32_t NumFrontChannelElements = br.getBits(4);
4943 const int32_t NumSideChannelElements = br.getBits(4);
4944 const int32_t NumBackChannelElements = br.getBits(4);
4945 const int32_t NumLfeChannelElements = br.getBits(2);
4946 br.skipBits(3); // NumAssocDataElements
4947 br.skipBits(4); // NumValidCcElements
4948
4949 const int32_t MonoMixdownPresent = br.getBits(1);
4950
4951 if (MonoMixdownPresent != 0) {
4952 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4953 br.skipBits(4); // MonoMixdownElementNumber
4954 }
4955
4956 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4957 const int32_t StereoMixdownPresent = br.getBits(1);
4958 if (StereoMixdownPresent != 0) {
4959 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4960 br.skipBits(4); // StereoMixdownElementNumber
4961 }
4962
4963 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4964 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4965 if (MatrixMixdownIndexPresent != 0) {
4966 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4967 br.skipBits(2); // MatrixMixdownIndex
4968 br.skipBits(1); // PseudoSurroundEnable
4969 }
4970
4971 int i;
4972 for (i=0; i < NumFrontChannelElements; i++) {
4973 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4974 const int32_t FrontElementIsCpe = br.getBits(1);
4975 br.skipBits(4); // FrontElementTagSelect
4976 channelsNum += FrontElementIsCpe ? 2 : 1;
4977 }
4978
4979 for (i=0; i < NumSideChannelElements; i++) {
4980 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4981 const int32_t SideElementIsCpe = br.getBits(1);
4982 br.skipBits(4); // SideElementTagSelect
4983 channelsNum += SideElementIsCpe ? 2 : 1;
4984 }
4985
4986 for (i=0; i < NumBackChannelElements; i++) {
4987 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4988 const int32_t BackElementIsCpe = br.getBits(1);
4989 br.skipBits(4); // BackElementTagSelect
4990 channelsNum += BackElementIsCpe ? 2 : 1;
4991 }
4992 channelsEffectiveNum = channelsNum;
4993
4994 for (i=0; i < NumLfeChannelElements; i++) {
4995 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4996 br.skipBits(4); // LfeElementTagSelect
4997 channelsNum += 1;
4998 }
4999 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
5000 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
5001 numChannels = channelsNum;
5002 }
5003 }
5004 }
5005
5006 if (numChannels == 0) {
5007 return ERROR_UNSUPPORTED;
5008 }
5009
5010 if (mLastTrack == NULL)
5011 return ERROR_MALFORMED;
5012
5013 int32_t prevSampleRate;
5014 CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
5015
5016 if (prevSampleRate != sampleRate) {
5017 ALOGV("mpeg4 audio sample rate different from previous setting. "
5018 "was: %d, now: %d", prevSampleRate, sampleRate);
5019 }
5020
5021 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
5022
5023 int32_t prevChannelCount;
5024 CHECK(AMediaFormat_getInt32(mLastTrack->meta,
5025 AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
5026
5027 if (prevChannelCount != numChannels) {
5028 ALOGV("mpeg4 audio channel count different from previous setting. "
5029 "was: %d, now: %d", prevChannelCount, numChannels);
5030 }
5031
5032 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
5033
5034 return OK;
5035 }
5036
adjustRawDefaultFrameSize()5037 void MPEG4Extractor::adjustRawDefaultFrameSize() {
5038 int32_t chanCount = 0;
5039 int32_t bitWidth = 0;
5040 const char *mimeStr = NULL;
5041
5042 if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
5043 !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
5044 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
5045 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
5046 // samplesize in stsz may not right , so updade default samplesize
5047 mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
5048 }
5049 }
5050
5051 ////////////////////////////////////////////////////////////////////////////////
5052
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks,uint64_t elstInitialEmptyEditTicks)5053 MPEG4Source::MPEG4Source(
5054 AMediaFormat *format,
5055 DataSourceHelper *dataSource,
5056 int32_t timeScale,
5057 const sp<SampleTable> &sampleTable,
5058 Vector<SidxEntry> &sidx,
5059 const Trex *trex,
5060 off64_t firstMoofOffset,
5061 const sp<ItemTable> &itemTable,
5062 uint64_t elstShiftStartTicks,
5063 uint64_t elstInitialEmptyEditTicks)
5064 : mFormat(format),
5065 mDataSource(dataSource),
5066 mTimescale(timeScale),
5067 mSampleTable(sampleTable),
5068 mCurrentSampleIndex(0),
5069 mCurrentFragmentIndex(0),
5070 mSegments(sidx),
5071 mTrex(trex),
5072 mFirstMoofOffset(firstMoofOffset),
5073 mCurrentMoofOffset(firstMoofOffset),
5074 mCurrentMoofSize(0),
5075 mNextMoofOffset(-1),
5076 mCurrentTime(0),
5077 mDefaultEncryptedByteBlock(0),
5078 mDefaultSkipByteBlock(0),
5079 mCurrentSampleInfoAllocSize(0),
5080 mCurrentSampleInfoSizes(NULL),
5081 mCurrentSampleInfoOffsetsAllocSize(0),
5082 mCurrentSampleInfoOffsets(NULL),
5083 mIsAVC(false),
5084 mIsHEVC(false),
5085 mIsDolbyVision(false),
5086 mIsAC4(false),
5087 mIsPcm(false),
5088 mNALLengthSize(0),
5089 mStarted(false),
5090 mBuffer(NULL),
5091 mSrcBufferSize(0),
5092 mSrcBuffer(NULL),
5093 mItemTable(itemTable),
5094 mElstShiftStartTicks(elstShiftStartTicks),
5095 mElstInitialEmptyEditTicks(elstInitialEmptyEditTicks) {
5096
5097 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
5098
5099 AMediaFormat_getInt32(mFormat,
5100 AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
5101 mDefaultIVSize = 0;
5102 AMediaFormat_getInt32(mFormat,
5103 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
5104 void *key;
5105 size_t keysize;
5106 if (AMediaFormat_getBuffer(mFormat,
5107 AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
5108 CHECK(keysize <= 16);
5109 memset(mCryptoKey, 0, 16);
5110 memcpy(mCryptoKey, key, keysize);
5111 }
5112
5113 AMediaFormat_getInt32(mFormat,
5114 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
5115 AMediaFormat_getInt32(mFormat,
5116 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
5117
5118 const char *mime;
5119 bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
5120 CHECK(success);
5121
5122 mIsMpegH = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1) ||
5123 !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1);
5124 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
5125 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
5126 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
5127 mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
5128 mIsDolbyVision = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
5129 mIsHeif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) && mItemTable != NULL;
5130 mIsAvif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF) && mItemTable != NULL;
5131
5132 if (mIsAVC) {
5133 void *data;
5134 size_t size;
5135 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5136
5137 const uint8_t *ptr = (const uint8_t *)data;
5138
5139 CHECK(size >= 7);
5140 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5141
5142 // The number of bytes used to encode the length of a NAL unit.
5143 mNALLengthSize = 1 + (ptr[4] & 3);
5144 } else if (mIsHEVC) {
5145 void *data;
5146 size_t size;
5147 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5148
5149 const uint8_t *ptr = (const uint8_t *)data;
5150
5151 CHECK(size >= 22);
5152 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5153
5154 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
5155 } else if (mIsDolbyVision) {
5156 ALOGV("%s DolbyVision stream detected", __FUNCTION__);
5157 void *data;
5158 size_t size;
5159 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_2, &data, &size));
5160
5161 const uint8_t *ptr = (const uint8_t *)data;
5162
5163 CHECK(size == 24);
5164
5165 // dv_major.dv_minor Should be 1.0 or 2.1
5166 CHECK(!((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)));
5167
5168 const uint8_t profile = ptr[2] >> 1;
5169 // profile == (unknown,1,9) --> AVC; profile = (2,3,4,5,6,7,8) --> HEVC;
5170 // profile == (10) --> AV1
5171 if (profile > 1 && profile < 9) {
5172 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5173
5174 const uint8_t *ptr = (const uint8_t *)data;
5175
5176 CHECK(size >= 22);
5177 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5178
5179 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
5180 } else if (10 == profile) {
5181 /* AV1 profile nothing to do */
5182 } else {
5183 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5184 const uint8_t *ptr = (const uint8_t *)data;
5185
5186 CHECK(size >= 7);
5187 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5188 // The number of bytes used to encode the length of a NAL unit.
5189 mNALLengthSize = 1 + (ptr[4] & 3);
5190 }
5191 }
5192
5193 mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
5194 mIsAudio = !strncasecmp(mime, "audio/", 6);
5195
5196 int32_t aacObjectType = -1;
5197
5198 if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_AAC_PROFILE, &aacObjectType)) {
5199 mIsUsac = (aacObjectType == AOT_USAC);
5200 }
5201
5202 if (mIsPcm) {
5203 int32_t numChannels = 0;
5204 int32_t bitsPerSample = 0;
5205 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
5206 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
5207
5208 int32_t bytesPerSample = bitsPerSample >> 3;
5209 int32_t pcmSampleSize = bytesPerSample * numChannels;
5210
5211 size_t maxSampleSize;
5212 status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
5213 if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
5214 || bitsPerSample != 16) {
5215 // Not supported
5216 mIsPcm = false;
5217 } else {
5218 AMediaFormat_setInt32(mFormat,
5219 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
5220 }
5221 }
5222
5223 CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
5224 }
5225
init()5226 status_t MPEG4Source::init() {
5227 if (mFirstMoofOffset != 0) {
5228 off64_t offset = mFirstMoofOffset;
5229 return parseChunk(&offset);
5230 }
5231 return OK;
5232 }
5233
~MPEG4Source()5234 MPEG4Source::~MPEG4Source() {
5235 if (mStarted) {
5236 stop();
5237 }
5238 free(mCurrentSampleInfoSizes);
5239 free(mCurrentSampleInfoOffsets);
5240 }
5241
start()5242 media_status_t MPEG4Source::start() {
5243 Mutex::Autolock autoLock(mLock);
5244
5245 CHECK(!mStarted);
5246
5247 int32_t tmp;
5248 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
5249 size_t max_size = tmp;
5250
5251 // A somewhat arbitrary limit that should be sufficient for 8k video frames
5252 // If you see the message below for a valid input stream: increase the limit
5253 const size_t kMaxBufferSize = 64 * 1024 * 1024;
5254 if (max_size > kMaxBufferSize) {
5255 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
5256 return AMEDIA_ERROR_MALFORMED;
5257 }
5258 if (max_size == 0) {
5259 ALOGE("zero max input size");
5260 return AMEDIA_ERROR_MALFORMED;
5261 }
5262
5263 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
5264 const size_t kInitialBuffers = 2;
5265 const size_t kMaxBuffers = 8;
5266 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
5267 mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
5268 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
5269 if (mSrcBuffer == NULL) {
5270 // file probably specified a bad max size
5271 return AMEDIA_ERROR_MALFORMED;
5272 }
5273 mSrcBufferSize = max_size;
5274
5275 mStarted = true;
5276
5277 return AMEDIA_OK;
5278 }
5279
stop()5280 media_status_t MPEG4Source::stop() {
5281 Mutex::Autolock autoLock(mLock);
5282
5283 CHECK(mStarted);
5284
5285 if (mBuffer != NULL) {
5286 mBuffer->release();
5287 mBuffer = NULL;
5288 }
5289
5290 mSrcBufferSize = 0;
5291 delete[] mSrcBuffer;
5292 mSrcBuffer = NULL;
5293
5294 mStarted = false;
5295 mCurrentSampleIndex = 0;
5296
5297 return AMEDIA_OK;
5298 }
5299
parseChunk(off64_t * offset)5300 status_t MPEG4Source::parseChunk(off64_t *offset) {
5301 uint32_t hdr[2];
5302 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5303 return ERROR_IO;
5304 }
5305 uint64_t chunk_size = ntohl(hdr[0]);
5306 uint32_t chunk_type = ntohl(hdr[1]);
5307 off64_t data_offset = *offset + 8;
5308
5309 if (chunk_size == 1) {
5310 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5311 return ERROR_IO;
5312 }
5313 chunk_size = ntoh64(chunk_size);
5314 data_offset += 8;
5315
5316 if (chunk_size < 16) {
5317 // The smallest valid chunk is 16 bytes long in this case.
5318 return ERROR_MALFORMED;
5319 }
5320 } else if (chunk_size < 8) {
5321 // The smallest valid chunk is 8 bytes long.
5322 return ERROR_MALFORMED;
5323 }
5324
5325 char chunk[5];
5326 MakeFourCCString(chunk_type, chunk);
5327 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
5328
5329 off64_t chunk_data_size = *offset + chunk_size - data_offset;
5330
5331 switch(chunk_type) {
5332
5333 case FOURCC("traf"):
5334 case FOURCC("moof"): {
5335 off64_t stop_offset = *offset + chunk_size;
5336 *offset = data_offset;
5337 if (chunk_type == FOURCC("moof")) {
5338 mCurrentMoofSize = chunk_data_size;
5339 }
5340 while (*offset < stop_offset) {
5341 status_t err = parseChunk(offset);
5342 if (err != OK) {
5343 return err;
5344 }
5345 }
5346 if (chunk_type == FOURCC("moof")) {
5347 // *offset points to the box following this moof. Find the next moof from there.
5348
5349 while (true) {
5350 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5351 // no more box to the end of file.
5352 break;
5353 }
5354 chunk_size = ntohl(hdr[0]);
5355 chunk_type = ntohl(hdr[1]);
5356 if (chunk_size == 1) {
5357 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
5358 // which is defined in 4.2 Object Structure.
5359 // When chunk_size==1, 8 bytes follows as "largesize".
5360 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5361 return ERROR_IO;
5362 }
5363 chunk_size = ntoh64(chunk_size);
5364 if (chunk_size < 16) {
5365 // The smallest valid chunk is 16 bytes long in this case.
5366 return ERROR_MALFORMED;
5367 }
5368 } else if (chunk_size == 0) {
5369 // next box extends to end of file.
5370 } else if (chunk_size < 8) {
5371 // The smallest valid chunk is 8 bytes long in this case.
5372 return ERROR_MALFORMED;
5373 }
5374
5375 if (chunk_type == FOURCC("moof")) {
5376 mNextMoofOffset = *offset;
5377 break;
5378 } else if (chunk_type == FOURCC("mdat")) {
5379 parseChunk(offset);
5380 continue;
5381 } else if (chunk_size == 0) {
5382 break;
5383 }
5384 *offset += chunk_size;
5385 }
5386 }
5387 break;
5388 }
5389
5390 case FOURCC("tfhd"): {
5391 status_t err;
5392 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
5393 return err;
5394 }
5395 *offset += chunk_size;
5396 break;
5397 }
5398
5399 case FOURCC("trun"): {
5400 status_t err;
5401 if (mLastParsedTrackId == mTrackId) {
5402 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
5403 return err;
5404 }
5405 }
5406
5407 *offset += chunk_size;
5408 break;
5409 }
5410
5411 case FOURCC("saiz"): {
5412 status_t err;
5413 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
5414 return err;
5415 }
5416 *offset += chunk_size;
5417 break;
5418 }
5419 case FOURCC("saio"): {
5420 status_t err;
5421 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
5422 != OK) {
5423 return err;
5424 }
5425 *offset += chunk_size;
5426 break;
5427 }
5428
5429 case FOURCC("senc"): {
5430 status_t err;
5431 if ((err = parseSampleEncryption(data_offset, chunk_data_size)) != OK) {
5432 return err;
5433 }
5434 *offset += chunk_size;
5435 break;
5436 }
5437
5438 case FOURCC("mdat"): {
5439 // parse DRM info if present
5440 ALOGV("MPEG4Source::parseChunk mdat");
5441 // if saiz/saoi was previously observed, do something with the sampleinfos
5442 status_t err = OK;
5443 auto kv = mDrmOffsets.lower_bound(*offset);
5444 if (kv != mDrmOffsets.end()) {
5445 auto drmoffset = kv->first;
5446 auto flags = kv->second;
5447 mDrmOffsets.erase(kv);
5448 ALOGV("mdat chunk_size %" PRIu64 " drmoffset %" PRId64 " offset %" PRId64,
5449 chunk_size, drmoffset, *offset);
5450 if (chunk_size >= drmoffset - *offset) {
5451 err = parseClearEncryptedSizes(drmoffset, false, flags,
5452 chunk_size - (drmoffset - *offset));
5453 }
5454 }
5455 if (err != OK) {
5456 return err;
5457 }
5458 *offset += chunk_size;
5459 break;
5460 }
5461
5462 default: {
5463 *offset += chunk_size;
5464 break;
5465 }
5466 }
5467 return OK;
5468 }
5469
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t size)5470 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
5471 off64_t offset, off64_t size) {
5472 ALOGV("parseSampleAuxiliaryInformationSizes");
5473 if (size < 9) {
5474 return -EINVAL;
5475 }
5476 // 14496-12 8.7.12
5477 uint8_t version;
5478 if (mDataSource->readAt(
5479 offset, &version, sizeof(version))
5480 < (ssize_t)sizeof(version)) {
5481 return ERROR_IO;
5482 }
5483
5484 if (version != 0) {
5485 return ERROR_UNSUPPORTED;
5486 }
5487 offset++;
5488 size--;
5489
5490 uint32_t flags;
5491 if (!mDataSource->getUInt24(offset, &flags)) {
5492 return ERROR_IO;
5493 }
5494 offset += 3;
5495 size -= 3;
5496
5497 if (flags & 1) {
5498 if (size < 13) {
5499 return -EINVAL;
5500 }
5501 uint32_t tmp;
5502 if (!mDataSource->getUInt32(offset, &tmp)) {
5503 return ERROR_MALFORMED;
5504 }
5505 mCurrentAuxInfoType = tmp;
5506 offset += 4;
5507 size -= 4;
5508 if (!mDataSource->getUInt32(offset, &tmp)) {
5509 return ERROR_MALFORMED;
5510 }
5511 mCurrentAuxInfoTypeParameter = tmp;
5512 offset += 4;
5513 size -= 4;
5514 }
5515
5516 uint8_t defsize;
5517 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5518 return ERROR_MALFORMED;
5519 }
5520 mCurrentDefaultSampleInfoSize = defsize;
5521 offset++;
5522 size--;
5523
5524 uint32_t smplcnt;
5525 if (!mDataSource->getUInt32(offset, &smplcnt)) {
5526 return ERROR_MALFORMED;
5527 }
5528 mCurrentSampleInfoCount = smplcnt;
5529 offset += 4;
5530 size -= 4;
5531 if (mCurrentDefaultSampleInfoSize != 0) {
5532 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5533 return OK;
5534 }
5535 if(smplcnt > size) {
5536 ALOGW("b/124525515 - smplcnt(%u) > size(%ld)", (unsigned int)smplcnt, (unsigned long)size);
5537 android_errorWriteLog(0x534e4554, "124525515");
5538 return -EINVAL;
5539 }
5540 if (smplcnt > mCurrentSampleInfoAllocSize) {
5541 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5542 if (newPtr == NULL) {
5543 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5544 return NO_MEMORY;
5545 }
5546 mCurrentSampleInfoSizes = newPtr;
5547 mCurrentSampleInfoAllocSize = smplcnt;
5548 }
5549
5550 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5551 return OK;
5552 }
5553
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t size)5554 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5555 off64_t offset, off64_t size) {
5556 ALOGV("parseSampleAuxiliaryInformationOffsets");
5557 if (size < 8) {
5558 return -EINVAL;
5559 }
5560 // 14496-12 8.7.13
5561 uint8_t version;
5562 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5563 return ERROR_IO;
5564 }
5565 offset++;
5566 size--;
5567
5568 uint32_t flags;
5569 if (!mDataSource->getUInt24(offset, &flags)) {
5570 return ERROR_IO;
5571 }
5572 offset += 3;
5573 size -= 3;
5574
5575 uint32_t entrycount;
5576 if (!mDataSource->getUInt32(offset, &entrycount)) {
5577 return ERROR_IO;
5578 }
5579 offset += 4;
5580 size -= 4;
5581 if (entrycount == 0) {
5582 return OK;
5583 }
5584 if (entrycount > UINT32_MAX / 8) {
5585 return ERROR_MALFORMED;
5586 }
5587
5588 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5589 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5590 if (newPtr == NULL) {
5591 ALOGE("failed to realloc %u -> %u",
5592 mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5593 return NO_MEMORY;
5594 }
5595 mCurrentSampleInfoOffsets = newPtr;
5596 mCurrentSampleInfoOffsetsAllocSize = entrycount;
5597 }
5598 mCurrentSampleInfoOffsetCount = entrycount;
5599
5600 if (mCurrentSampleInfoOffsets == NULL) {
5601 return OK;
5602 }
5603
5604 for (size_t i = 0; i < entrycount; i++) {
5605 if (version == 0) {
5606 if (size < 4) {
5607 ALOGW("b/124526959");
5608 android_errorWriteLog(0x534e4554, "124526959");
5609 return -EINVAL;
5610 }
5611 uint32_t tmp;
5612 if (!mDataSource->getUInt32(offset, &tmp)) {
5613 return ERROR_IO;
5614 }
5615 mCurrentSampleInfoOffsets[i] = tmp;
5616 offset += 4;
5617 size -= 4;
5618 } else {
5619 if (size < 8) {
5620 ALOGW("b/124526959");
5621 android_errorWriteLog(0x534e4554, "124526959");
5622 return -EINVAL;
5623 }
5624 uint64_t tmp;
5625 if (!mDataSource->getUInt64(offset, &tmp)) {
5626 return ERROR_IO;
5627 }
5628 mCurrentSampleInfoOffsets[i] = tmp;
5629 offset += 8;
5630 size -= 8;
5631 }
5632 }
5633
5634 // parse clear/encrypted data
5635
5636 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5637
5638 drmoffset += mCurrentMoofOffset;
5639 mDrmOffsets[drmoffset] = flags;
5640 ALOGV("saio drmoffset %" PRId64 " flags %u", drmoffset, flags);
5641
5642 return OK;
5643 }
5644
parseClearEncryptedSizes(off64_t offset,bool isSampleEncryption,uint32_t flags,off64_t size)5645 status_t MPEG4Source::parseClearEncryptedSizes(
5646 off64_t offset, bool isSampleEncryption, uint32_t flags, off64_t size) {
5647
5648 int32_t ivlength;
5649 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5650 return ERROR_MALFORMED;
5651 }
5652
5653 // only 0, 8 and 16 byte initialization vectors are supported
5654 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5655 ALOGW("unsupported IV length: %d", ivlength);
5656 return ERROR_MALFORMED;
5657 }
5658
5659 uint32_t sampleCount = mCurrentSampleInfoCount;
5660 if (isSampleEncryption) {
5661 if (size < 4) {
5662 return ERROR_MALFORMED;
5663 }
5664 if (!mDataSource->getUInt32(offset, &sampleCount)) {
5665 return ERROR_IO;
5666 }
5667 offset += 4;
5668 size -= 4;
5669 }
5670
5671 // read CencSampleAuxiliaryDataFormats
5672 for (size_t i = 0; i < sampleCount; i++) {
5673 if (i >= mCurrentSamples.size()) {
5674 ALOGW("too few samples");
5675 break;
5676 }
5677 Sample *smpl = &mCurrentSamples.editItemAt(i);
5678 if (!smpl->clearsizes.isEmpty()) {
5679 continue;
5680 }
5681
5682 memset(smpl->iv, 0, 16);
5683 if (size < ivlength) {
5684 return ERROR_MALFORMED;
5685 }
5686 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5687 return ERROR_IO;
5688 }
5689
5690 offset += ivlength;
5691 size -= ivlength;
5692
5693 bool readSubsamples;
5694 if (isSampleEncryption) {
5695 readSubsamples = flags & 2;
5696 } else {
5697 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5698 if (smplinfosize == 0) {
5699 smplinfosize = mCurrentSampleInfoSizes[i];
5700 }
5701 readSubsamples = smplinfosize > ivlength;
5702 }
5703
5704 if (readSubsamples) {
5705 uint16_t numsubsamples;
5706 if (size < 2) {
5707 return ERROR_MALFORMED;
5708 }
5709 if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5710 return ERROR_IO;
5711 }
5712 offset += 2;
5713 size -= 2;
5714 for (size_t j = 0; j < numsubsamples; j++) {
5715 uint16_t numclear;
5716 uint32_t numencrypted;
5717 if (size < 6) {
5718 return ERROR_MALFORMED;
5719 }
5720 if (!mDataSource->getUInt16(offset, &numclear)) {
5721 return ERROR_IO;
5722 }
5723 offset += 2;
5724 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5725 return ERROR_IO;
5726 }
5727 offset += 4;
5728 size -= 6;
5729 smpl->clearsizes.add(numclear);
5730 smpl->encryptedsizes.add(numencrypted);
5731 }
5732 } else {
5733 smpl->clearsizes.add(0);
5734 smpl->encryptedsizes.add(smpl->size);
5735 }
5736 }
5737
5738 return OK;
5739 }
5740
parseSampleEncryption(off64_t offset,off64_t chunk_data_size)5741 status_t MPEG4Source::parseSampleEncryption(off64_t offset, off64_t chunk_data_size) {
5742 uint32_t flags;
5743 if (chunk_data_size < 4) {
5744 return ERROR_MALFORMED;
5745 }
5746 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5747 return ERROR_MALFORMED;
5748 }
5749 return parseClearEncryptedSizes(offset + 4, true, flags, chunk_data_size - 4);
5750 }
5751
parseTrackFragmentHeader(off64_t offset,off64_t size)5752 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5753
5754 if (size < 8) {
5755 return -EINVAL;
5756 }
5757
5758 uint32_t flags;
5759 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5760 return ERROR_MALFORMED;
5761 }
5762
5763 if (flags & 0xff000000) {
5764 return -EINVAL;
5765 }
5766
5767 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5768 return ERROR_MALFORMED;
5769 }
5770
5771 if (mLastParsedTrackId != mTrackId) {
5772 // this is not the right track, skip it
5773 return OK;
5774 }
5775
5776 mTrackFragmentHeaderInfo.mFlags = flags;
5777 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5778 offset += 8;
5779 size -= 8;
5780
5781 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5782
5783 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5784 if (size < 8) {
5785 return -EINVAL;
5786 }
5787
5788 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5789 return ERROR_MALFORMED;
5790 }
5791 offset += 8;
5792 size -= 8;
5793 }
5794
5795 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5796 if (size < 4) {
5797 return -EINVAL;
5798 }
5799
5800 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5801 return ERROR_MALFORMED;
5802 }
5803 offset += 4;
5804 size -= 4;
5805 }
5806
5807 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5808 if (size < 4) {
5809 return -EINVAL;
5810 }
5811
5812 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5813 return ERROR_MALFORMED;
5814 }
5815 offset += 4;
5816 size -= 4;
5817 }
5818
5819 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5820 if (size < 4) {
5821 return -EINVAL;
5822 }
5823
5824 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5825 return ERROR_MALFORMED;
5826 }
5827 offset += 4;
5828 size -= 4;
5829 }
5830
5831 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5832 if (size < 4) {
5833 return -EINVAL;
5834 }
5835
5836 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5837 return ERROR_MALFORMED;
5838 }
5839 offset += 4;
5840 size -= 4;
5841 }
5842
5843 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5844 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5845 }
5846
5847 mTrackFragmentHeaderInfo.mDataOffset = 0;
5848 return OK;
5849 }
5850
parseTrackFragmentRun(off64_t offset,off64_t size)5851 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5852
5853 ALOGV("MPEG4Source::parseTrackFragmentRun");
5854 if (size < 8) {
5855 return -EINVAL;
5856 }
5857
5858 enum {
5859 kDataOffsetPresent = 0x01,
5860 kFirstSampleFlagsPresent = 0x04,
5861 kSampleDurationPresent = 0x100,
5862 kSampleSizePresent = 0x200,
5863 kSampleFlagsPresent = 0x400,
5864 kSampleCompositionTimeOffsetPresent = 0x800,
5865 };
5866
5867 uint32_t flags;
5868 if (!mDataSource->getUInt32(offset, &flags)) {
5869 return ERROR_MALFORMED;
5870 }
5871 // |version| only affects SampleCompositionTimeOffset field.
5872 // If version == 0, SampleCompositionTimeOffset is uint32_t;
5873 // Otherwise, SampleCompositionTimeOffset is int32_t.
5874 // Sample.compositionOffset is defined as int32_t.
5875 uint8_t version = flags >> 24;
5876 flags &= 0xffffff;
5877 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5878
5879 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5880 // These two shall not be used together.
5881 return -EINVAL;
5882 }
5883
5884 uint32_t sampleCount;
5885 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5886 return ERROR_MALFORMED;
5887 }
5888 offset += 8;
5889 size -= 8;
5890
5891 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5892
5893 uint32_t firstSampleFlags = 0;
5894
5895 if (flags & kDataOffsetPresent) {
5896 if (size < 4) {
5897 return -EINVAL;
5898 }
5899
5900 uint32_t dataOffsetDelta;
5901 if (!mDataSource->getUInt32(offset, &dataOffsetDelta)) {
5902 return ERROR_MALFORMED;
5903 }
5904
5905 if (__builtin_add_overflow(
5906 mTrackFragmentHeaderInfo.mBaseDataOffset, dataOffsetDelta, &dataOffset)) {
5907 ALOGW("b/232242894 mBaseDataOffset(%" PRIu64 ") + dataOffsetDelta(%u) overflows uint64",
5908 mTrackFragmentHeaderInfo.mBaseDataOffset, dataOffsetDelta);
5909 android_errorWriteLog(0x534e4554, "232242894");
5910 return ERROR_MALFORMED;
5911 }
5912
5913 offset += 4;
5914 size -= 4;
5915 }
5916
5917 if (flags & kFirstSampleFlagsPresent) {
5918 if (size < 4) {
5919 return -EINVAL;
5920 }
5921
5922 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5923 return ERROR_MALFORMED;
5924 }
5925 offset += 4;
5926 size -= 4;
5927 }
5928
5929 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5930 sampleCtsOffset = 0;
5931
5932 size_t bytesPerSample = 0;
5933 if (flags & kSampleDurationPresent) {
5934 bytesPerSample += 4;
5935 } else if (mTrackFragmentHeaderInfo.mFlags
5936 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5937 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5938 } else if (mTrex) {
5939 sampleDuration = mTrex->default_sample_duration;
5940 }
5941
5942 if (flags & kSampleSizePresent) {
5943 bytesPerSample += 4;
5944 } else {
5945 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5946 #ifdef VERY_VERY_VERBOSE_LOGGING
5947 // We don't expect this, but also want to avoid spamming the log if
5948 // we hit this case.
5949 if (!(mTrackFragmentHeaderInfo.mFlags
5950 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent)) {
5951 ALOGW("No sample size specified");
5952 }
5953 #endif
5954 }
5955
5956 if (flags & kSampleFlagsPresent) {
5957 bytesPerSample += 4;
5958 } else {
5959 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5960 #ifdef VERY_VERY_VERBOSE_LOGGING
5961 // We don't expect this, but also want to avoid spamming the log if
5962 // we hit this case.
5963 if (!(mTrackFragmentHeaderInfo.mFlags
5964 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent)) {
5965 ALOGW("No sample flags specified");
5966 }
5967 #endif
5968 }
5969
5970 if (flags & kSampleCompositionTimeOffsetPresent) {
5971 bytesPerSample += 4;
5972 } else {
5973 sampleCtsOffset = 0;
5974 }
5975
5976 if (bytesPerSample != 0) {
5977 if (size < (off64_t)sampleCount * bytesPerSample) {
5978 return -EINVAL;
5979 }
5980 } else {
5981 if (sampleDuration == 0) {
5982 ALOGW("b/123389881 sampleDuration == 0");
5983 android_errorWriteLog(0x534e4554, "124389881 zero");
5984 return -EINVAL;
5985 }
5986
5987 // apply some quick (vs strict legality) checks
5988 //
5989 static constexpr uint32_t kMaxTrunSampleCount = 10000;
5990 if (sampleCount > kMaxTrunSampleCount) {
5991 ALOGW("b/123389881 sampleCount(%u) > kMaxTrunSampleCount(%u)",
5992 sampleCount, kMaxTrunSampleCount);
5993 android_errorWriteLog(0x534e4554, "124389881 count");
5994 return -EINVAL;
5995 }
5996 }
5997
5998 Sample tmp;
5999 for (uint32_t i = 0; i < sampleCount; ++i) {
6000 if (flags & kSampleDurationPresent) {
6001 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
6002 return ERROR_MALFORMED;
6003 }
6004 offset += 4;
6005 }
6006
6007 if (flags & kSampleSizePresent) {
6008 if (!mDataSource->getUInt32(offset, &sampleSize)) {
6009 return ERROR_MALFORMED;
6010 }
6011 offset += 4;
6012 }
6013
6014 if (flags & kSampleFlagsPresent) {
6015 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
6016 return ERROR_MALFORMED;
6017 }
6018 offset += 4;
6019 }
6020
6021 if (flags & kSampleCompositionTimeOffsetPresent) {
6022 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
6023 return ERROR_MALFORMED;
6024 }
6025 offset += 4;
6026 }
6027
6028 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
6029 " flags 0x%08x ctsOffset %" PRIu32, i + 1,
6030 dataOffset, sampleSize, sampleDuration,
6031 (flags & kFirstSampleFlagsPresent) && i == 0
6032 ? firstSampleFlags : sampleFlags, sampleCtsOffset);
6033 tmp.offset = dataOffset;
6034 tmp.size = sampleSize;
6035 tmp.duration = sampleDuration;
6036 tmp.compositionOffset = sampleCtsOffset;
6037 memset(tmp.iv, 0, sizeof(tmp.iv));
6038 if (mCurrentSamples.add(tmp) < 0) {
6039 ALOGW("b/123389881 failed saving sample(n=%zu)", mCurrentSamples.size());
6040 android_errorWriteLog(0x534e4554, "124389881 allocation");
6041 mCurrentSamples.clear();
6042 return NO_MEMORY;
6043 }
6044
6045 if (__builtin_add_overflow(dataOffset, sampleSize, &dataOffset)) {
6046 ALOGW("b/232242894 dataOffset(%" PRIu64 ") + sampleSize(%u) overflows uint64",
6047 dataOffset, sampleSize);
6048 android_errorWriteLog(0x534e4554, "232242894");
6049 return ERROR_MALFORMED;
6050 }
6051 }
6052
6053 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
6054
6055 return OK;
6056 }
6057
getFormat(AMediaFormat * meta)6058 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
6059 Mutex::Autolock autoLock(mLock);
6060 AMediaFormat_copy(meta, mFormat);
6061 return AMEDIA_OK;
6062 }
6063
parseNALSize(const uint8_t * data) const6064 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
6065 switch (mNALLengthSize) {
6066 case 1:
6067 return *data;
6068 case 2:
6069 return U16_AT(data);
6070 case 3:
6071 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
6072 case 4:
6073 return U32_AT(data);
6074 }
6075
6076 // This cannot happen, mNALLengthSize springs to life by adding 1 to
6077 // a 2-bit integer.
6078 CHECK(!"Should not be here.");
6079
6080 return 0;
6081 }
6082
parseHEVCLayerId(const uint8_t * data,size_t size)6083 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
6084 if (data == nullptr || size < mNALLengthSize + 2) {
6085 return -1;
6086 }
6087
6088 // HEVC NAL-header (16-bit)
6089 // 1 6 6 3
6090 // |-|uuuuuu|------|iii|
6091 // ^ ^
6092 // NAL_type layer_id + 1
6093 //
6094 // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
6095 enum {
6096 TSA_N = 2,
6097 TSA_R = 3,
6098 STSA_N = 4,
6099 STSA_R = 5,
6100 };
6101
6102 data += mNALLengthSize;
6103 uint16_t nalHeader = data[0] << 8 | data[1];
6104
6105 uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
6106 if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
6107 int32_t layerIdPlusOne = nalHeader & 0x7u;
6108 ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
6109 return layerIdPlusOne - 1;
6110 }
6111 return 0;
6112 }
6113
read(MediaBufferHelper ** out,const ReadOptions * options)6114 media_status_t MPEG4Source::read(
6115 MediaBufferHelper **out, const ReadOptions *options) {
6116 Mutex::Autolock autoLock(mLock);
6117
6118 CHECK(mStarted);
6119
6120 if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
6121 *out = nullptr;
6122 return AMEDIA_ERROR_WOULD_BLOCK;
6123 }
6124
6125 if (mFirstMoofOffset > 0) {
6126 return fragmentedRead(out, options);
6127 }
6128
6129 *out = NULL;
6130
6131 int64_t targetSampleTimeUs = -1;
6132
6133 int64_t seekTimeUs;
6134 ReadOptions::SeekMode mode;
6135
6136 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6137 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6138 if (mIsHeif || mIsAvif) {
6139 CHECK(mSampleTable == NULL);
6140 CHECK(mItemTable != NULL);
6141 int32_t imageIndex;
6142 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
6143 return AMEDIA_ERROR_MALFORMED;
6144 }
6145
6146 status_t err;
6147 if (seekTimeUs >= 0) {
6148 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
6149 } else {
6150 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
6151 }
6152 if (err != OK) {
6153 return AMEDIA_ERROR_UNKNOWN;
6154 }
6155 } else {
6156 uint32_t findFlags = 0;
6157 switch (mode) {
6158 case ReadOptions::SEEK_PREVIOUS_SYNC:
6159 findFlags = SampleTable::kFlagBefore;
6160 break;
6161 case ReadOptions::SEEK_NEXT_SYNC:
6162 findFlags = SampleTable::kFlagAfter;
6163 break;
6164 case ReadOptions::SEEK_CLOSEST_SYNC:
6165 case ReadOptions::SEEK_CLOSEST:
6166 findFlags = SampleTable::kFlagClosest;
6167 break;
6168 case ReadOptions::SEEK_FRAME_INDEX:
6169 findFlags = SampleTable::kFlagFrameIndex;
6170 break;
6171 default:
6172 CHECK(!"Should not be here.");
6173 break;
6174 }
6175 if( mode != ReadOptions::SEEK_FRAME_INDEX) {
6176 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6177 if (mElstInitialEmptyEditTicks > 0) {
6178 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6179 mTimescale;
6180 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6181 * Hence, lower bound on seekTimeUs is 0.
6182 */
6183 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6184 }
6185 if (mElstShiftStartTicks > 0) {
6186 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6187 seekTimeUs += elstShiftStartUs;
6188 }
6189 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6190 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6191 elstShiftStartUs);
6192 }
6193
6194 uint32_t sampleIndex;
6195 status_t err = mSampleTable->findSampleAtTime(
6196 seekTimeUs, 1000000, mTimescale,
6197 &sampleIndex, findFlags);
6198
6199 if (mode == ReadOptions::SEEK_CLOSEST
6200 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6201 // We found the closest sample already, now we want the sync
6202 // sample preceding it (or the sample itself of course), even
6203 // if the subsequent sync sample is closer.
6204 findFlags = SampleTable::kFlagBefore;
6205 }
6206
6207 uint32_t syncSampleIndex = sampleIndex;
6208 // assume every non-USAC/non-MPEGH audio sample is a sync sample.
6209 // This works around
6210 // seek issues with files that were incorrectly written with an
6211 // empty or single-sample stss block for the audio track
6212 if (err == OK && (!mIsAudio || mIsUsac || mIsMpegH)) {
6213 err = mSampleTable->findSyncSampleNear(
6214 sampleIndex, &syncSampleIndex, findFlags);
6215 }
6216
6217 uint64_t sampleTime;
6218 if (err == OK) {
6219 err = mSampleTable->getMetaDataForSample(
6220 sampleIndex, NULL, NULL, &sampleTime);
6221 }
6222
6223 if (err != OK) {
6224 if (err == ERROR_OUT_OF_RANGE) {
6225 // An attempt to seek past the end of the stream would
6226 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
6227 // this all the way to the MediaPlayer would cause abnormal
6228 // termination. Legacy behaviour appears to be to behave as if
6229 // we had seeked to the end of stream, ending normally.
6230 return AMEDIA_ERROR_END_OF_STREAM;
6231 }
6232 ALOGV("end of stream");
6233 return AMEDIA_ERROR_UNKNOWN;
6234 }
6235
6236 if (mode == ReadOptions::SEEK_CLOSEST
6237 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6238 if (mElstInitialEmptyEditTicks > 0) {
6239 sampleTime += mElstInitialEmptyEditTicks;
6240 }
6241 if (mElstShiftStartTicks > 0){
6242 if (sampleTime > mElstShiftStartTicks) {
6243 sampleTime -= mElstShiftStartTicks;
6244 } else {
6245 sampleTime = 0;
6246 }
6247 }
6248 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
6249 }
6250
6251 #if 0
6252 uint32_t syncSampleTime;
6253 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
6254 syncSampleIndex, NULL, NULL, &syncSampleTime));
6255
6256 ALOGI("seek to time %lld us => sample at time %lld us, "
6257 "sync sample at time %lld us",
6258 seekTimeUs,
6259 sampleTime * 1000000ll / mTimescale,
6260 syncSampleTime * 1000000ll / mTimescale);
6261 #endif
6262
6263 mCurrentSampleIndex = syncSampleIndex;
6264 }
6265
6266 if (mBuffer != NULL) {
6267 mBuffer->release();
6268 mBuffer = NULL;
6269 }
6270
6271 // fall through
6272 }
6273
6274 off64_t offset = 0;
6275 size_t size = 0;
6276 int64_t cts;
6277 uint64_t stts;
6278 bool isSyncSample;
6279 bool newBuffer = false;
6280 if (mBuffer == NULL) {
6281 newBuffer = true;
6282
6283 status_t err;
6284 if (!mIsHeif && !mIsAvif) {
6285 err = mSampleTable->getMetaDataForSample(mCurrentSampleIndex, &offset, &size,
6286 (uint64_t*)&cts, &isSyncSample, &stts);
6287 if(err == OK) {
6288 if (mElstInitialEmptyEditTicks > 0) {
6289 cts += mElstInitialEmptyEditTicks;
6290 }
6291 if (mElstShiftStartTicks > 0) {
6292 // cts can be negative. for example, initial audio samples for gapless playback.
6293 cts -= (int64_t)mElstShiftStartTicks;
6294 }
6295 }
6296 } else {
6297 err = mItemTable->getImageOffsetAndSize(
6298 options && options->getSeekTo(&seekTimeUs, &mode) ?
6299 &mCurrentSampleIndex : NULL, &offset, &size);
6300
6301 cts = stts = 0;
6302 isSyncSample = 0;
6303 ALOGV("image offset %lld, size %zu", (long long)offset, size);
6304 }
6305
6306 if (err != OK) {
6307 if (err == ERROR_END_OF_STREAM) {
6308 return AMEDIA_ERROR_END_OF_STREAM;
6309 }
6310 return AMEDIA_ERROR_UNKNOWN;
6311 }
6312
6313 err = mBufferGroup->acquire_buffer(&mBuffer);
6314
6315 if (err != OK) {
6316 CHECK(mBuffer == NULL);
6317 return AMEDIA_ERROR_UNKNOWN;
6318 }
6319 if (size > mBuffer->size()) {
6320 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6321 mBuffer->release();
6322 mBuffer = NULL;
6323 return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
6324 }
6325 }
6326
6327 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize) && !mIsAC4) {
6328 if (newBuffer) {
6329 if (mIsPcm) {
6330 // The twos' PCM block reader assumes that all samples has the same size.
6331 uint32_t lastSampleIndexInChunk = mSampleTable->getLastSampleIndexInChunk();
6332 if (lastSampleIndexInChunk < mCurrentSampleIndex) {
6333 mBuffer->release();
6334 mBuffer = nullptr;
6335 return AMEDIA_ERROR_UNKNOWN;
6336 }
6337 uint32_t samplesToRead = lastSampleIndexInChunk - mCurrentSampleIndex + 1;
6338 if (samplesToRead > kMaxPcmFrameSize) {
6339 samplesToRead = kMaxPcmFrameSize;
6340 }
6341
6342 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
6343 samplesToRead, size, mCurrentSampleIndex,
6344 mSampleTable->getLastSampleIndexInChunk());
6345
6346 size_t totalSize = samplesToRead * size;
6347 if (mBuffer->size() < totalSize) {
6348 mBuffer->release();
6349 mBuffer = nullptr;
6350 return AMEDIA_ERROR_UNKNOWN;
6351 }
6352 uint8_t* buf = (uint8_t *)mBuffer->data();
6353 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
6354 if (bytesRead < (ssize_t)totalSize) {
6355 mBuffer->release();
6356 mBuffer = NULL;
6357 return AMEDIA_ERROR_IO;
6358 }
6359
6360 AMediaFormat *meta = mBuffer->meta_data();
6361 AMediaFormat_clear(meta);
6362 AMediaFormat_setInt64(
6363 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6364 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6365
6366 int32_t byteOrder = 0;
6367 bool isGetBigEndian = AMediaFormat_getInt32(mFormat,
6368 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
6369
6370 if (isGetBigEndian && byteOrder == 1) {
6371 // Big-endian -> little-endian
6372 uint16_t *dstData = (uint16_t *)buf;
6373 uint16_t *srcData = (uint16_t *)buf;
6374
6375 for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
6376 dstData[j] = ntohs(srcData[j]);
6377 }
6378 }
6379
6380 mCurrentSampleIndex += samplesToRead;
6381 mBuffer->set_range(0, totalSize);
6382 } else {
6383 ssize_t num_bytes_read =
6384 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6385
6386 if (num_bytes_read < (ssize_t)size) {
6387 mBuffer->release();
6388 mBuffer = NULL;
6389
6390 return AMEDIA_ERROR_IO;
6391 }
6392
6393 CHECK(mBuffer != NULL);
6394 mBuffer->set_range(0, size);
6395 AMediaFormat *meta = mBuffer->meta_data();
6396 AMediaFormat_clear(meta);
6397 AMediaFormat_setInt64(
6398 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6399 AMediaFormat_setInt64(
6400 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6401
6402 if (targetSampleTimeUs >= 0) {
6403 AMediaFormat_setInt64(
6404 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6405 }
6406
6407 if (isSyncSample) {
6408 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6409 }
6410
6411 AMediaFormat_setInt64(
6412 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/,
6413 offset);
6414
6415 if (mSampleTable != nullptr &&
6416 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6417 AMediaFormat_setInt64(
6418 meta,
6419 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6420 mSampleTable->getLastSampleIndexInChunk());
6421 }
6422
6423 ++mCurrentSampleIndex;
6424 }
6425 }
6426
6427 *out = mBuffer;
6428 mBuffer = NULL;
6429
6430 return AMEDIA_OK;
6431
6432 } else if (mIsAC4) {
6433 CHECK(mBuffer != NULL);
6434 // Make sure there is enough space to write the sync header and the raw frame
6435 if (mBuffer->range_length() < (7 + size)) {
6436 mBuffer->release();
6437 mBuffer = NULL;
6438
6439 return AMEDIA_ERROR_IO;
6440 }
6441
6442 uint8_t *dstData = (uint8_t *)mBuffer->data();
6443 size_t dstOffset = 0;
6444 // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
6445 // AC40 sync word, meaning no CRC at the end of the frame
6446 dstData[dstOffset++] = 0xAC;
6447 dstData[dstOffset++] = 0x40;
6448 dstData[dstOffset++] = 0xFF;
6449 dstData[dstOffset++] = 0xFF;
6450 dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
6451 dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
6452 dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
6453
6454 ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
6455 if (numBytesRead != (ssize_t)size) {
6456 mBuffer->release();
6457 mBuffer = NULL;
6458
6459 return AMEDIA_ERROR_IO;
6460 }
6461
6462 mBuffer->set_range(0, dstOffset + size);
6463 AMediaFormat *meta = mBuffer->meta_data();
6464 AMediaFormat_clear(meta);
6465 AMediaFormat_setInt64(
6466 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6467 AMediaFormat_setInt64(
6468 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6469
6470 if (targetSampleTimeUs >= 0) {
6471 AMediaFormat_setInt64(
6472 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6473 }
6474
6475 if (isSyncSample) {
6476 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6477 }
6478
6479 ++mCurrentSampleIndex;
6480
6481 *out = mBuffer;
6482 mBuffer = NULL;
6483
6484 return AMEDIA_OK;
6485 } else {
6486 // Whole NAL units are returned but each fragment is prefixed by
6487 // the start code (0x00 00 00 01).
6488 ssize_t num_bytes_read = 0;
6489 bool mSrcBufferFitsDataToRead = size <= mSrcBufferSize;
6490 if (mSrcBufferFitsDataToRead) {
6491 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
6492 } else {
6493 // We are trying to read a sample larger than the expected max sample size.
6494 // Fall through and let the failure be handled by the following if.
6495 android_errorWriteLog(0x534e4554, "188893559");
6496 }
6497
6498 if (num_bytes_read < (ssize_t)size) {
6499 mBuffer->release();
6500 mBuffer = NULL;
6501 return mSrcBufferFitsDataToRead ? AMEDIA_ERROR_IO : AMEDIA_ERROR_MALFORMED;
6502 }
6503
6504 uint8_t *dstData = (uint8_t *)mBuffer->data();
6505 size_t srcOffset = 0;
6506 size_t dstOffset = 0;
6507
6508 while (srcOffset < size) {
6509 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6510 size_t nalLength = 0;
6511 if (!isMalFormed) {
6512 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6513 srcOffset += mNALLengthSize;
6514 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
6515 }
6516
6517 if (isMalFormed) {
6518 //if nallength abnormal,ignore it.
6519 ALOGW("abnormal nallength, ignore this NAL");
6520 srcOffset = size;
6521 break;
6522 }
6523
6524 if (nalLength == 0) {
6525 continue;
6526 }
6527
6528 if (dstOffset > SIZE_MAX - 4 ||
6529 dstOffset + 4 > SIZE_MAX - nalLength ||
6530 dstOffset + 4 + nalLength > mBuffer->size()) {
6531 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
6532 android_errorWriteLog(0x534e4554, "27208621");
6533 mBuffer->release();
6534 mBuffer = NULL;
6535 return AMEDIA_ERROR_MALFORMED;
6536 }
6537
6538 dstData[dstOffset++] = 0;
6539 dstData[dstOffset++] = 0;
6540 dstData[dstOffset++] = 0;
6541 dstData[dstOffset++] = 1;
6542 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6543 srcOffset += nalLength;
6544 dstOffset += nalLength;
6545 }
6546 CHECK_EQ(srcOffset, size);
6547 CHECK(mBuffer != NULL);
6548 mBuffer->set_range(0, dstOffset);
6549
6550 AMediaFormat *meta = mBuffer->meta_data();
6551 AMediaFormat_clear(meta);
6552 AMediaFormat_setInt64(
6553 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6554 AMediaFormat_setInt64(
6555 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6556
6557 if (targetSampleTimeUs >= 0) {
6558 AMediaFormat_setInt64(
6559 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6560 }
6561
6562 if (mIsAVC) {
6563 uint32_t layerId = FindAVCLayerId(
6564 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6565 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6566 } else if (mIsHEVC) {
6567 int32_t layerId = parseHEVCLayerId(
6568 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6569 if (layerId >= 0) {
6570 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6571 }
6572 }
6573
6574 if (isSyncSample) {
6575 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6576 }
6577
6578 AMediaFormat_setInt64(
6579 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/, offset);
6580
6581 if (mSampleTable != nullptr &&
6582 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6583 AMediaFormat_setInt64(
6584 meta,
6585 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6586 mSampleTable->getLastSampleIndexInChunk());
6587 }
6588
6589 ++mCurrentSampleIndex;
6590
6591 *out = mBuffer;
6592 mBuffer = NULL;
6593
6594 return AMEDIA_OK;
6595 }
6596 }
6597
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)6598 media_status_t MPEG4Source::fragmentedRead(
6599 MediaBufferHelper **out, const ReadOptions *options) {
6600
6601 ALOGV("MPEG4Source::fragmentedRead");
6602
6603 CHECK(mStarted);
6604
6605 *out = NULL;
6606
6607 int64_t targetSampleTimeUs = -1;
6608
6609 int64_t seekTimeUs;
6610 ReadOptions::SeekMode mode;
6611 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6612 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6613 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6614 if (mElstInitialEmptyEditTicks > 0) {
6615 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6616 mTimescale;
6617 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6618 * Hence, lower bound on seekTimeUs is 0.
6619 */
6620 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6621 }
6622 if (mElstShiftStartTicks > 0){
6623 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6624 seekTimeUs += elstShiftStartUs;
6625 }
6626 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6627 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6628 elstShiftStartUs);
6629
6630 int numSidxEntries = mSegments.size();
6631 if (numSidxEntries != 0) {
6632 int64_t totalTime = 0;
6633 off64_t totalOffset = mFirstMoofOffset;
6634 for (int i = 0; i < numSidxEntries; i++) {
6635 const SidxEntry *se = &mSegments[i];
6636 if (totalTime + se->mDurationUs > seekTimeUs) {
6637 // The requested time is somewhere in this segment
6638 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6639 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6640 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6641 // requested next sync, or closest sync and it was closer to the end of
6642 // this segment
6643 totalTime += se->mDurationUs;
6644 totalOffset += se->mSize;
6645 }
6646 break;
6647 }
6648 totalTime += se->mDurationUs;
6649 totalOffset += se->mSize;
6650 }
6651 mCurrentMoofOffset = totalOffset;
6652 mNextMoofOffset = -1;
6653 mCurrentSamples.clear();
6654 mCurrentSampleIndex = 0;
6655 status_t err = parseChunk(&totalOffset);
6656 if (err != OK) {
6657 return AMEDIA_ERROR_UNKNOWN;
6658 }
6659 mCurrentTime = totalTime * mTimescale / 1000000ll;
6660 } else {
6661 // without sidx boxes, we can only seek to 0
6662 mCurrentMoofOffset = mFirstMoofOffset;
6663 mNextMoofOffset = -1;
6664 mCurrentSamples.clear();
6665 mCurrentSampleIndex = 0;
6666 off64_t tmp = mCurrentMoofOffset;
6667 status_t err = parseChunk(&tmp);
6668 if (err != OK) {
6669 return AMEDIA_ERROR_UNKNOWN;
6670 }
6671 mCurrentTime = 0;
6672 }
6673
6674 if (mBuffer != NULL) {
6675 mBuffer->release();
6676 mBuffer = NULL;
6677 }
6678
6679 // fall through
6680 }
6681
6682 off64_t offset = 0;
6683 size_t size = 0;
6684 int64_t cts = 0;
6685 bool isSyncSample = false;
6686 bool newBuffer = false;
6687 if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6688 newBuffer = true;
6689
6690 if (mBuffer != NULL) {
6691 mBuffer->release();
6692 mBuffer = NULL;
6693 }
6694 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6695 // move to next fragment if there is one
6696 if (mNextMoofOffset <= mCurrentMoofOffset) {
6697 return AMEDIA_ERROR_END_OF_STREAM;
6698 }
6699 off64_t nextMoof = mNextMoofOffset;
6700 mCurrentMoofOffset = nextMoof;
6701 mCurrentSamples.clear();
6702 mCurrentSampleIndex = 0;
6703 status_t err = parseChunk(&nextMoof);
6704 if (err != OK) {
6705 return AMEDIA_ERROR_UNKNOWN;
6706 }
6707 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6708 return AMEDIA_ERROR_END_OF_STREAM;
6709 }
6710 }
6711
6712 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6713 offset = smpl->offset;
6714 size = smpl->size;
6715 cts = mCurrentTime + smpl->compositionOffset;
6716
6717 if (mElstInitialEmptyEditTicks > 0) {
6718 cts += mElstInitialEmptyEditTicks;
6719 }
6720 if (mElstShiftStartTicks > 0) {
6721 // cts can be negative. for example, initial audio samples for gapless playback.
6722 cts -= (int64_t)mElstShiftStartTicks;
6723 }
6724
6725 mCurrentTime += smpl->duration;
6726 isSyncSample = (mCurrentSampleIndex == 0);
6727
6728 status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6729
6730 if (err != OK) {
6731 CHECK(mBuffer == NULL);
6732 ALOGV("acquire_buffer returned %d", err);
6733 return AMEDIA_ERROR_UNKNOWN;
6734 }
6735 if (size > mBuffer->size()) {
6736 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6737 mBuffer->release();
6738 mBuffer = NULL;
6739 return AMEDIA_ERROR_UNKNOWN;
6740 }
6741 }
6742
6743 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6744 AMediaFormat *bufmeta = mBuffer->meta_data();
6745 AMediaFormat_clear(bufmeta);
6746 if (smpl->encryptedsizes.size()) {
6747 // store clear/encrypted lengths in metadata
6748 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6749 smpl->clearsizes.array(), smpl->clearsizes.size() * sizeof(uint32_t));
6750 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6751 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * sizeof(uint32_t));
6752 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6753 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6754 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6755 AMediaFormat_setInt32(bufmeta,
6756 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6757 AMediaFormat_setInt32(bufmeta,
6758 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6759
6760 void *iv = NULL;
6761 size_t ivlength = 0;
6762 if (!AMediaFormat_getBuffer(mFormat,
6763 "crypto-iv", &iv, &ivlength)) {
6764 iv = (void *) smpl->iv;
6765 ivlength = 16; // use 16 or the actual size?
6766 }
6767 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6768 }
6769
6770 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize)) {
6771 if (newBuffer) {
6772 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6773 mBuffer->release();
6774 mBuffer = NULL;
6775
6776 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6777 return AMEDIA_ERROR_MALFORMED;
6778 }
6779
6780 ssize_t num_bytes_read =
6781 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6782
6783 if (num_bytes_read < (ssize_t)size) {
6784 mBuffer->release();
6785 mBuffer = NULL;
6786
6787 ALOGE("i/o error");
6788 return AMEDIA_ERROR_IO;
6789 }
6790
6791 CHECK(mBuffer != NULL);
6792 mBuffer->set_range(0, size);
6793 AMediaFormat_setInt64(bufmeta,
6794 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6795 AMediaFormat_setInt64(bufmeta,
6796 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6797
6798 if (targetSampleTimeUs >= 0) {
6799 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6800 }
6801
6802 if (mIsAVC) {
6803 uint32_t layerId = FindAVCLayerId(
6804 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6805 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6806 } else if (mIsHEVC) {
6807 int32_t layerId = parseHEVCLayerId(
6808 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6809 if (layerId >= 0) {
6810 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6811 }
6812 }
6813
6814 if (isSyncSample) {
6815 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6816 }
6817
6818 ++mCurrentSampleIndex;
6819 }
6820
6821 *out = mBuffer;
6822 mBuffer = NULL;
6823
6824 return AMEDIA_OK;
6825
6826 } else {
6827 ALOGV("whole NAL");
6828 // Whole NAL units are returned but each fragment is prefixed by
6829 // the start code (0x00 00 00 01).
6830 ssize_t num_bytes_read = 0;
6831 void *data = NULL;
6832 bool isMalFormed = false;
6833 int32_t max_size;
6834 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6835 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6836 isMalFormed = true;
6837 } else {
6838 data = mSrcBuffer;
6839 }
6840
6841 if (isMalFormed || data == NULL) {
6842 ALOGE("isMalFormed size %zu", size);
6843 if (mBuffer != NULL) {
6844 mBuffer->release();
6845 mBuffer = NULL;
6846 }
6847 return AMEDIA_ERROR_MALFORMED;
6848 }
6849 num_bytes_read = mDataSource->readAt(offset, data, size);
6850
6851 if (num_bytes_read < (ssize_t)size) {
6852 mBuffer->release();
6853 mBuffer = NULL;
6854
6855 ALOGE("i/o error");
6856 return AMEDIA_ERROR_IO;
6857 }
6858
6859 uint8_t *dstData = (uint8_t *)mBuffer->data();
6860 size_t srcOffset = 0;
6861 size_t dstOffset = 0;
6862
6863 while (srcOffset < size) {
6864 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6865 size_t nalLength = 0;
6866 if (!isMalFormed) {
6867 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6868 srcOffset += mNALLengthSize;
6869 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6870 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6871 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6872 }
6873
6874 if (isMalFormed) {
6875 ALOGE("Video is malformed; nalLength %zu", nalLength);
6876 mBuffer->release();
6877 mBuffer = NULL;
6878 return AMEDIA_ERROR_MALFORMED;
6879 }
6880
6881 if (nalLength == 0) {
6882 continue;
6883 }
6884
6885 if (dstOffset > SIZE_MAX - 4 ||
6886 dstOffset + 4 > SIZE_MAX - nalLength ||
6887 dstOffset + 4 + nalLength > mBuffer->size()) {
6888 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6889 android_errorWriteLog(0x534e4554, "26365349");
6890 mBuffer->release();
6891 mBuffer = NULL;
6892 return AMEDIA_ERROR_MALFORMED;
6893 }
6894
6895 dstData[dstOffset++] = 0;
6896 dstData[dstOffset++] = 0;
6897 dstData[dstOffset++] = 0;
6898 dstData[dstOffset++] = 1;
6899 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6900 srcOffset += nalLength;
6901 dstOffset += nalLength;
6902 }
6903 CHECK_EQ(srcOffset, size);
6904 CHECK(mBuffer != NULL);
6905 mBuffer->set_range(0, dstOffset);
6906
6907 AMediaFormat *bufmeta = mBuffer->meta_data();
6908 AMediaFormat_setInt64(bufmeta,
6909 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6910 AMediaFormat_setInt64(bufmeta,
6911 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6912
6913 if (targetSampleTimeUs >= 0) {
6914 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6915 }
6916
6917 if (isSyncSample) {
6918 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6919 }
6920
6921 ++mCurrentSampleIndex;
6922
6923 *out = mBuffer;
6924 mBuffer = NULL;
6925
6926 return AMEDIA_OK;
6927 }
6928
6929 return AMEDIA_OK;
6930 }
6931
findTrackByMimePrefix(const char * mimePrefix)6932 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6933 const char *mimePrefix) {
6934 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6935 const char *mime;
6936 if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6937 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6938 return track;
6939 }
6940 }
6941
6942 return NULL;
6943 }
6944
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6945 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6946 uint8_t header[8];
6947
6948 ssize_t n = source->readAt(4, header, sizeof(header));
6949 if (n < (ssize_t)sizeof(header)) {
6950 return false;
6951 }
6952
6953 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6954 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6955 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6956 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6957 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6958 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
6959 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
6960 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)
6961 || !memcmp(header, "ftypavif", 8) || !memcmp(header, "ftypavis", 8)) {
6962 *confidence = 0.4;
6963
6964 return true;
6965 }
6966
6967 return false;
6968 }
6969
isCompatibleBrand(uint32_t fourcc)6970 static bool isCompatibleBrand(uint32_t fourcc) {
6971 static const uint32_t kCompatibleBrands[] = {
6972 FOURCC("isom"),
6973 FOURCC("iso2"),
6974 FOURCC("avc1"),
6975 FOURCC("hvc1"),
6976 FOURCC("hev1"),
6977 FOURCC("av01"),
6978 FOURCC("vp09"),
6979 FOURCC("3gp4"),
6980 FOURCC("mp41"),
6981 FOURCC("mp42"),
6982 FOURCC("dash"),
6983 FOURCC("nvr1"),
6984
6985 // Won't promise that the following file types can be played.
6986 // Just give these file types a chance.
6987 FOURCC("qt "), // Apple's QuickTime
6988 FOURCC("MSNV"), // Sony's PSP
6989 FOURCC("wmf "),
6990
6991 FOURCC("3g2a"), // 3GPP2
6992 FOURCC("3g2b"),
6993 FOURCC("mif1"), // HEIF image
6994 FOURCC("heic"), // HEIF image
6995 FOURCC("msf1"), // HEIF image sequence
6996 FOURCC("hevc"), // HEIF image sequence
6997 FOURCC("avif"), // AVIF image
6998 FOURCC("avis"), // AVIF image sequence
6999 };
7000
7001 for (size_t i = 0;
7002 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
7003 ++i) {
7004 if (kCompatibleBrands[i] == fourcc) {
7005 return true;
7006 }
7007 }
7008
7009 return false;
7010 }
7011
7012 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
7013 // compatible brand is present.
7014 // Also try to identify where this file's metadata ends
7015 // (end of the 'moov' atom) and report it to the caller as part of
7016 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)7017 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
7018 // We scan up to 128 bytes to identify this file as an MP4.
7019 static const off64_t kMaxScanOffset = 128ll;
7020
7021 off64_t offset = 0ll;
7022 bool foundGoodFileType = false;
7023 off64_t moovAtomEndOffset = -1ll;
7024 bool done = false;
7025
7026 while (!done && offset < kMaxScanOffset) {
7027 uint32_t hdr[2];
7028 if (source->readAt(offset, hdr, 8) < 8) {
7029 return false;
7030 }
7031
7032 uint64_t chunkSize = ntohl(hdr[0]);
7033 uint32_t chunkType = ntohl(hdr[1]);
7034 off64_t chunkDataOffset = offset + 8;
7035
7036 if (chunkSize == 1) {
7037 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
7038 return false;
7039 }
7040
7041 chunkSize = ntoh64(chunkSize);
7042 chunkDataOffset += 8;
7043
7044 if (chunkSize < 16) {
7045 // The smallest valid chunk is 16 bytes long in this case.
7046 return false;
7047 }
7048 if (chunkSize > INT64_MAX) {
7049 // reject overly large chunk sizes that could
7050 // be interpreted as negative
7051 ALOGE("chunk size too large");
7052 return false;
7053 }
7054
7055 } else if (chunkSize < 8) {
7056 // The smallest valid chunk is 8 bytes long.
7057 return false;
7058 }
7059
7060 // (data_offset - offset) is either 8 or 16
7061 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
7062 if (chunkDataSize < 0) {
7063 ALOGE("b/23540914");
7064 return false;
7065 }
7066
7067 char chunkstring[5];
7068 MakeFourCCString(chunkType, chunkstring);
7069 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
7070 chunkstring, chunkSize, (long long)offset);
7071 switch (chunkType) {
7072 case FOURCC("ftyp"):
7073 {
7074 if (chunkDataSize < 8) {
7075 return false;
7076 }
7077
7078 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
7079 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
7080 if (i == 1) {
7081 // Skip this index, it refers to the minorVersion,
7082 // not a brand.
7083 continue;
7084 }
7085
7086 uint32_t brand;
7087 if (source->readAt(
7088 chunkDataOffset + 4 * i, &brand, 4) < 4) {
7089 return false;
7090 }
7091
7092 brand = ntohl(brand);
7093
7094 if (isCompatibleBrand(brand)) {
7095 foundGoodFileType = true;
7096 break;
7097 }
7098 }
7099
7100 if (!foundGoodFileType) {
7101 return false;
7102 }
7103
7104 break;
7105 }
7106
7107 case FOURCC("moov"):
7108 {
7109 if (__builtin_add_overflow(offset, chunkSize, &moovAtomEndOffset)) {
7110 ALOGE("chunk size + offset would overflow");
7111 return false;
7112 }
7113
7114 done = true;
7115 break;
7116 }
7117
7118 default:
7119 break;
7120 }
7121
7122 if (__builtin_add_overflow(offset, chunkSize, &offset)) {
7123 ALOGE("chunk size + offset would overflow");
7124 return false;
7125 }
7126 }
7127
7128 if (!foundGoodFileType) {
7129 return false;
7130 }
7131
7132 *confidence = 0.4f;
7133
7134 return true;
7135 }
7136
CreateExtractor(CDataSource * source,void *)7137 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
7138 return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
7139 }
7140
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)7141 static CreatorFunc Sniff(
7142 CDataSource *source, float *confidence, void **,
7143 FreeMetaFunc *) {
7144 DataSourceHelper helper(source);
7145 if (BetterSniffMPEG4(&helper, confidence)) {
7146 return CreateExtractor;
7147 }
7148
7149 if (LegacySniffMPEG4(&helper, confidence)) {
7150 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
7151 return CreateExtractor;
7152 }
7153
7154 return NULL;
7155 }
7156
7157 static const char *extensions[] = {
7158 "3g2",
7159 "3ga",
7160 "3gp",
7161 "3gpp",
7162 "3gpp2",
7163 "m4a",
7164 "m4r",
7165 "m4v",
7166 "mov",
7167 "mp4",
7168 "qt",
7169 NULL
7170 };
7171
7172 extern "C" {
7173 // This is the only symbol that needs to be exported
7174 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()7175 ExtractorDef GETEXTRACTORDEF() {
7176 return {
7177 EXTRACTORDEF_VERSION,
7178 UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
7179 2, // version
7180 "MP4 Extractor",
7181 { .v3 = {Sniff, extensions} },
7182 };
7183 }
7184
7185 } // extern "C"
7186
7187 } // namespace android
7188