1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <algorithm>
23 #include <map>
24 #include <memory>
25 #include <numeric>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include <utils/Log.h>
31
32 #include <android-base/properties.h>
33 #ifdef __ANDROID__
34 #include <android/api-level.h>
35 #endif //__ANDROID__
36 #include "AC4Parser.h"
37 #include "MPEG4Extractor.h"
38 #include "SampleTable.h"
39 #include "ItemTable.h"
40
41 #include <com_android_media_extractor_flags.h>
42 #include <media/esds/ESDS.h>
43 #include <ID3.h>
44 #include <media/stagefright/DataSourceBase.h>
45 #include <media/ExtractorUtils.h>
46 #include <media/stagefright/foundation/ABitReader.h>
47 #include <media/stagefright/foundation/ABuffer.h>
48 #include <media/stagefright/foundation/ADebug.h>
49 #include <media/stagefright/foundation/AMessage.h>
50 #include <media/stagefright/foundation/AudioPresentationInfo.h>
51 #include <media/stagefright/foundation/AUtils.h>
52 #include <media/stagefright/foundation/ByteUtils.h>
53 #include <media/stagefright/foundation/ColorUtils.h>
54 #include <media/stagefright/foundation/avc_utils.h>
55 #include <media/stagefright/foundation/hexdump.h>
56 #include <media/stagefright/foundation/OpusHeader.h>
57 #include <media/stagefright/MediaBufferGroup.h>
58 #include <media/stagefright/MediaDefs.h>
59 #include <media/stagefright/MetaDataBase.h>
60 #include <media/stagefright/MetaDataUtils.h>
61 #include <utils/String8.h>
62
63 #include <byteswap.h>
64
65 #ifndef UINT32_MAX
66 #define UINT32_MAX (4294967295U)
67 #endif
68
69 #define ALAC_SPECIFIC_INFO_SIZE (36)
70
71 // TODO : Remove the defines once mainline media is built against NDK >= 31.
72 // The mp4 extractor is part of mainline and builds against NDK 29 as of
73 // writing. These keys are available only from NDK 31:
74 #define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
75 "mpegh-profile-level-indication"
76 #define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
77 "mpegh-reference-channel-layout"
78 #define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
79 "mpegh-compatible-sets"
80
81 namespace android {
82
83 enum {
84 // max track header chunk to return
85 kMaxTrackHeaderSize = 32,
86
87 // maximum size of an atom. Some atoms can be bigger according to the spec,
88 // but we only allow up to this size.
89 kMaxAtomSize = 64 * 1024 * 1024,
90 };
91
isAtLeastRelease(int version,const std::string codeName)92 static bool isAtLeastRelease([[maybe_unused]] int version,
93 [[maybe_unused]] const std::string codeName) {
94 #ifdef __ANDROID__
95 static std::once_flag sCheckOnce;
96 static std::string sDeviceCodeName;
97 static int sDeviceApiLevel = 0;
98 std::call_once(sCheckOnce, [&]() {
99 sDeviceCodeName = base::GetProperty("ro.build.version.codename", "");
100 sDeviceApiLevel = android_get_device_api_level();
101 });
102 return sDeviceApiLevel >= version || sDeviceCodeName == codeName;
103 #else //__ANDROID__
104 return true;
105 #endif //__ANDROID__
106 }
107
108 class MPEG4Source : public MediaTrackHelper {
109 static const size_t kMaxPcmFrameSize = 8192;
110 public:
111 // Caller retains ownership of both "dataSource" and "sampleTable".
112 MPEG4Source(AMediaFormat *format,
113 DataSourceHelper *dataSource,
114 int32_t timeScale,
115 const sp<SampleTable> &sampleTable,
116 Vector<SidxEntry> &sidx,
117 const Trex *trex,
118 off64_t firstMoofOffset,
119 const sp<ItemTable> &itemTable,
120 uint64_t elstShiftStartTicks,
121 uint64_t elstInitialEmptyEditTicks);
122 virtual status_t init();
123
124 virtual media_status_t start();
125 virtual media_status_t stop();
126
127 virtual media_status_t getFormat(AMediaFormat *);
128
129 virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()130 bool supportsNonBlockingRead() override { return true; }
131 virtual media_status_t fragmentedRead(
132 MediaBufferHelper **buffer, const ReadOptions *options = NULL);
133
134 virtual ~MPEG4Source();
135
136 private:
137 Mutex mLock;
138
139 AMediaFormat *mFormat;
140 DataSourceHelper *mDataSource;
141 int32_t mTimescale;
142 sp<SampleTable> mSampleTable;
143 uint32_t mCurrentSampleIndex;
144 uint32_t mCurrentFragmentIndex;
145 Vector<SidxEntry> &mSegments;
146 const Trex *mTrex;
147 off64_t mFirstMoofOffset;
148 off64_t mCurrentMoofOffset;
149 off64_t mCurrentMoofSize;
150 off64_t mNextMoofOffset;
151 uint32_t mCurrentTime; // in media timescale ticks
152 int32_t mLastParsedTrackId;
153 int32_t mTrackId;
154
155 int32_t mCryptoMode; // passed in from extractor
156 int32_t mDefaultIVSize; // passed in from extractor
157 uint8_t mCryptoKey[16]; // passed in from extractor
158 int32_t mDefaultEncryptedByteBlock;
159 int32_t mDefaultSkipByteBlock;
160 uint32_t mCurrentAuxInfoType;
161 uint32_t mCurrentAuxInfoTypeParameter;
162 int32_t mCurrentDefaultSampleInfoSize;
163 uint32_t mCurrentSampleInfoCount;
164 uint32_t mCurrentSampleInfoAllocSize;
165 uint8_t* mCurrentSampleInfoSizes;
166 uint32_t mCurrentSampleInfoOffsetCount;
167 uint32_t mCurrentSampleInfoOffsetsAllocSize;
168 uint64_t* mCurrentSampleInfoOffsets;
169
170 bool mIsAVC;
171 bool mIsHEVC;
172 bool mIsAPV;
173 bool mIsDolbyVision;
174 bool mIsAC4;
175 bool mIsMpegH = false;
176 bool mIsPcm;
177 size_t mNALLengthSize;
178
179 bool mStarted;
180
181 MediaBufferHelper *mBuffer;
182
183 size_t mSrcBufferSize;
184 uint8_t *mSrcBuffer;
185
186 bool mIsHeif;
187 bool mIsAvif;
188 bool mIsAudio;
189 bool mIsUsac = false;
190 sp<ItemTable> mItemTable;
191
192 /* Shift start offset (move to earlier time) when media_time > 0,
193 * in media time scale.
194 */
195 uint64_t mElstShiftStartTicks;
196 /* Initial start offset (move to later time), empty edit list entry
197 * in media time scale.
198 */
199 uint64_t mElstInitialEmptyEditTicks;
200
201 size_t parseNALSize(const uint8_t *data) const;
202 status_t parseChunk(off64_t *offset);
203 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
204 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
205 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
206 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
207 status_t parseClearEncryptedSizes(off64_t offset, bool isSampleEncryption,
208 uint32_t flags, off64_t size);
209 status_t parseSampleEncryption(off64_t offset, off64_t size);
210 // returns -1 for invalid layer ID
211 int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
212 size_t getNALLengthSizeFromAvcCsd(const uint8_t *data, const size_t size) const;
213 size_t getNALLengthSizeFromHevcCsd(const uint8_t *data, const size_t size) const;
214
215 int64_t rescaleTime(int64_t value, int64_t scale, int64_t originScale) const;
216
217 struct TrackFragmentHeaderInfo {
218 enum Flags {
219 kBaseDataOffsetPresent = 0x01,
220 kSampleDescriptionIndexPresent = 0x02,
221 kDefaultSampleDurationPresent = 0x08,
222 kDefaultSampleSizePresent = 0x10,
223 kDefaultSampleFlagsPresent = 0x20,
224 kDurationIsEmpty = 0x10000,
225 };
226
227 uint32_t mTrackID;
228 uint32_t mFlags;
229 uint64_t mBaseDataOffset;
230 uint32_t mSampleDescriptionIndex;
231 uint32_t mDefaultSampleDuration;
232 uint32_t mDefaultSampleSize;
233 uint32_t mDefaultSampleFlags;
234
235 uint64_t mDataOffset;
236 };
237 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
238
239 struct Sample {
240 off64_t offset;
241 size_t size;
242 uint32_t duration;
243 int32_t compositionOffset;
244 uint8_t iv[16];
245 Vector<uint32_t> clearsizes;
246 Vector<uint32_t> encryptedsizes;
247 };
248 Vector<Sample> mCurrentSamples;
249 std::map<off64_t, uint32_t> mDrmOffsets;
250
251 MPEG4Source(const MPEG4Source &);
252 MPEG4Source &operator=(const MPEG4Source &);
253 };
254
255 // This custom data source wraps an existing one and satisfies requests
256 // falling entirely within a cached range from the cache while forwarding
257 // all remaining requests to the wrapped datasource.
258 // This is used to cache the full sampletable metadata for a single track,
259 // possibly wrapping multiple times to cover all tracks, i.e.
260 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
261
262 class CachedRangedDataSource : public DataSourceHelper {
263 public:
264 explicit CachedRangedDataSource(DataSourceHelper *source);
265 virtual ~CachedRangedDataSource();
266
267 ssize_t readAt(off64_t offset, void *data, size_t size) override;
268 status_t getSize(off64_t *size) override;
269 uint32_t flags() override;
270
271 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
272
273
274 private:
275 Mutex mLock;
276
277 DataSourceHelper *mSource;
278 bool mOwnsDataSource;
279 off64_t mCachedOffset;
280 size_t mCachedSize;
281 uint8_t *mCache;
282
283 void clearCache();
284
285 CachedRangedDataSource(const CachedRangedDataSource &);
286 CachedRangedDataSource &operator=(const CachedRangedDataSource &);
287 };
288
CachedRangedDataSource(DataSourceHelper * source)289 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
290 : DataSourceHelper(source),
291 mSource(source),
292 mOwnsDataSource(false),
293 mCachedOffset(0),
294 mCachedSize(0),
295 mCache(NULL) {
296 }
297
~CachedRangedDataSource()298 CachedRangedDataSource::~CachedRangedDataSource() {
299 clearCache();
300 if (mOwnsDataSource) {
301 delete mSource;
302 }
303 }
304
clearCache()305 void CachedRangedDataSource::clearCache() {
306 if (mCache) {
307 free(mCache);
308 mCache = NULL;
309 }
310
311 mCachedOffset = 0;
312 mCachedSize = 0;
313 }
314
readAt(off64_t offset,void * data,size_t size)315 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
316 Mutex::Autolock autoLock(mLock);
317
318 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
319 memcpy(data, &mCache[offset - mCachedOffset], size);
320 return size;
321 }
322
323 return mSource->readAt(offset, data, size);
324 }
325
getSize(off64_t * size)326 status_t CachedRangedDataSource::getSize(off64_t *size) {
327 return mSource->getSize(size);
328 }
329
flags()330 uint32_t CachedRangedDataSource::flags() {
331 return mSource->flags();
332 }
333
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)334 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
335 size_t size,
336 bool assumeSourceOwnershipOnSuccess) {
337 Mutex::Autolock autoLock(mLock);
338
339 clearCache();
340
341 mCache = (uint8_t *)malloc(size);
342
343 if (mCache == NULL) {
344 return -ENOMEM;
345 }
346
347 mCachedOffset = offset;
348 mCachedSize = size;
349
350 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
351
352 if (err < (ssize_t)size) {
353 clearCache();
354
355 return ERROR_IO;
356 }
357 mOwnsDataSource = assumeSourceOwnershipOnSuccess;
358 return OK;
359 }
360
361 ////////////////////////////////////////////////////////////////////////////////
362
363 static const bool kUseHexDump = false;
364
FourCC2MIME(uint32_t fourcc)365 static const char *FourCC2MIME(uint32_t fourcc) {
366 switch (fourcc) {
367 case FOURCC("mp4a"):
368 return MEDIA_MIMETYPE_AUDIO_AAC;
369
370 case FOURCC("samr"):
371 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
372
373 case FOURCC("sawb"):
374 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
375
376 case FOURCC("ec-3"):
377 return MEDIA_MIMETYPE_AUDIO_EAC3;
378
379 case FOURCC("mp4v"):
380 return MEDIA_MIMETYPE_VIDEO_MPEG4;
381
382 case FOURCC("s263"):
383 case FOURCC("h263"):
384 case FOURCC("H263"):
385 return MEDIA_MIMETYPE_VIDEO_H263;
386
387 case FOURCC("avc1"):
388 return MEDIA_MIMETYPE_VIDEO_AVC;
389
390 case FOURCC("hvc1"):
391 case FOURCC("hev1"):
392 return MEDIA_MIMETYPE_VIDEO_HEVC;
393
394 case FOURCC("apv1"):
395 // Enable APV codec support from Android Baklava
396 if (!(isAtLeastRelease(36, "Baklava") &&
397 com::android::media::extractor::flags::extractor_mp4_enable_apv())) {
398 ALOGV("APV support not enabled");
399 return "application/octet-stream";
400 }
401 return MEDIA_MIMETYPE_VIDEO_APV;
402
403 case FOURCC("dvav"):
404 case FOURCC("dva1"):
405 case FOURCC("dvhe"):
406 case FOURCC("dvh1"):
407 case FOURCC("dav1"):
408 return MEDIA_MIMETYPE_VIDEO_DOLBY_VISION;
409
410 case FOURCC("ac-4"):
411 return MEDIA_MIMETYPE_AUDIO_AC4;
412 case FOURCC("Opus"):
413 return MEDIA_MIMETYPE_AUDIO_OPUS;
414
415 case FOURCC("twos"):
416 case FOURCC("sowt"):
417 return MEDIA_MIMETYPE_AUDIO_RAW;
418 case FOURCC("alac"):
419 return MEDIA_MIMETYPE_AUDIO_ALAC;
420 case FOURCC("fLaC"):
421 return MEDIA_MIMETYPE_AUDIO_FLAC;
422 case FOURCC("av01"):
423 return MEDIA_MIMETYPE_VIDEO_AV1;
424 case FOURCC("vp09"):
425 return MEDIA_MIMETYPE_VIDEO_VP9;
426 case FOURCC(".mp3"):
427 case 0x6D730055: // "ms U" mp3 audio
428 return MEDIA_MIMETYPE_AUDIO_MPEG;
429 case FOURCC("mha1"):
430 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1;
431 case FOURCC("mhm1"):
432 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1;
433 case FOURCC("dtsc"):
434 return MEDIA_MIMETYPE_AUDIO_DTS;
435 case FOURCC("dtse"):
436 case FOURCC("dtsh"):
437 return MEDIA_MIMETYPE_AUDIO_DTS_HD;
438 case FOURCC("dtsl"):
439 return MEDIA_MIMETYPE_AUDIO_DTS_HD_MA;
440 case FOURCC("dtsx"):
441 return MEDIA_MIMETYPE_AUDIO_DTS_UHD_P2;
442 default:
443 ALOGW("Unknown fourcc: %c%c%c%c",
444 (fourcc >> 24) & 0xff,
445 (fourcc >> 16) & 0xff,
446 (fourcc >> 8) & 0xff,
447 fourcc & 0xff
448 );
449 return "application/octet-stream";
450 }
451 }
452
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)453 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
454 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
455 // AMR NB audio is always mono, 8kHz
456 *channels = 1;
457 *rate = 8000;
458 return true;
459 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
460 // AMR WB audio is always mono, 16kHz
461 *channels = 1;
462 *rate = 16000;
463 return true;
464 }
465 return false;
466 }
467
MPEG4Extractor(DataSourceHelper * source,const char * mime)468 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
469 : mMoofOffset(0),
470 mMoofFound(false),
471 mMdatFound(false),
472 mDataSource(source),
473 mInitCheck(NO_INIT),
474 mHeaderTimescale(0),
475 mIsQT(false),
476 mIsHeif(false),
477 mHasMoovBox(false),
478 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
479 mIsAvif(false),
480 mFirstTrack(NULL),
481 mLastTrack(NULL) {
482 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
483 mFileMetaData = AMediaFormat_new();
484 }
485
~MPEG4Extractor()486 MPEG4Extractor::~MPEG4Extractor() {
487 Track *track = mFirstTrack;
488 while (track) {
489 Track *next = track->next;
490
491 delete track;
492 track = next;
493 }
494 mFirstTrack = mLastTrack = NULL;
495
496 for (size_t i = 0; i < mPssh.size(); i++) {
497 delete [] mPssh[i].data;
498 }
499 mPssh.clear();
500
501 delete mDataSource;
502 AMediaFormat_delete(mFileMetaData);
503 }
504
flags() const505 uint32_t MPEG4Extractor::flags() const {
506 return CAN_PAUSE |
507 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
508 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
509 }
510
getMetaData(AMediaFormat * meta)511 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
512 status_t err;
513 if ((err = readMetaData()) != OK) {
514 return AMEDIA_ERROR_UNKNOWN;
515 }
516 AMediaFormat_copy(meta, mFileMetaData);
517 return AMEDIA_OK;
518 }
519
countTracks()520 size_t MPEG4Extractor::countTracks() {
521 status_t err;
522 if ((err = readMetaData()) != OK) {
523 ALOGV("MPEG4Extractor::countTracks: no tracks");
524 return 0;
525 }
526
527 size_t n = 0;
528 Track *track = mFirstTrack;
529 while (track) {
530 ++n;
531 track = track->next;
532 }
533
534 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
535 return n;
536 }
537
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)538 media_status_t MPEG4Extractor::getTrackMetaData(
539 AMediaFormat *meta,
540 size_t index, uint32_t flags) {
541 status_t err;
542 if ((err = readMetaData()) != OK) {
543 return AMEDIA_ERROR_UNKNOWN;
544 }
545
546 Track *track = mFirstTrack;
547 while (index > 0) {
548 if (track == NULL) {
549 return AMEDIA_ERROR_UNKNOWN;
550 }
551
552 track = track->next;
553 --index;
554 }
555
556 if (track == NULL) {
557 return AMEDIA_ERROR_UNKNOWN;
558 }
559
560 [this, &track] {
561 int64_t duration = track->mMdhdDurationUs;
562 int32_t samplerate;
563 // Only for audio track.
564 if (track->elst_needs_processing && mHeaderTimescale != 0 && duration != 0 &&
565 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
566 // Elst has to be processed only the first time this function is called.
567 track->elst_needs_processing = false;
568
569 if (track->elst_segment_duration > INT64_MAX) {
570 return;
571 }
572 int64_t segment_duration = track->elst_segment_duration;
573 int64_t media_time = track->elst_media_time;
574 int64_t halfscale = track->timescale / 2;
575
576 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
577 ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
578 segment_duration, media_time,
579 halfscale, mHeaderTimescale, track->timescale);
580
581 if ((uint32_t)samplerate != track->timescale){
582 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
583 samplerate);
584 }
585 // Both delay and paddingsamples have to be set inorder for either to be
586 // effective in the lower layers.
587 int64_t delay = 0;
588 if (media_time > 0) { // Gapless playback
589 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
590 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
591 __builtin_add_overflow(delay, halfscale, &delay) ||
592 (delay /= track->timescale, false) ||
593 delay > INT32_MAX ||
594 delay < INT32_MIN) {
595 ALOGW("ignoring edit list with bogus values");
596 return;
597 }
598 }
599 ALOGV("delay = %" PRId64, delay);
600 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
601
602 int64_t paddingsamples = 0;
603 if (segment_duration > 0) {
604 int64_t scaled_duration;
605 // scaled_duration = duration * mHeaderTimescale;
606 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
607 return;
608 }
609 ALOGV("scaled_duration = %" PRId64, scaled_duration);
610
611 int64_t segment_end;
612 int64_t padding;
613 int64_t segment_duration_e6;
614 int64_t media_time_scaled_e6;
615 int64_t media_time_scaled;
616 // padding = scaled_duration - ((segment_duration * 1000000) +
617 // ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
618 // segment_duration is based on timescale in movie header box(mdhd)
619 // media_time is based on timescale track header/media timescale
620 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
621 __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
622 __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
623 return;
624 }
625 media_time_scaled_e6 /= track->timescale;
626 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
627 || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
628 return;
629 }
630 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
631 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
632 // might be slightly shorter than the segment duration, which would make the
633 // padding negative. Clamp to zero.
634 if (padding > 0) {
635 int64_t halfscale_mht = mHeaderTimescale / 2;
636 int64_t halfscale_e6;
637 int64_t timescale_e6;
638 // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
639 // / (mHeaderTimescale * 1000000);
640 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
641 __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
642 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
643 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
644 (paddingsamples /= timescale_e6, false) ||
645 paddingsamples > INT32_MAX) {
646 return;
647 }
648 }
649 }
650 ALOGV("paddingsamples = %" PRId64, paddingsamples);
651 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
652 }
653 }();
654
655 if ((flags & kIncludeExtensiveMetaData)
656 && !track->includes_expensive_metadata) {
657 track->includes_expensive_metadata = true;
658
659 const char *mime;
660 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
661 if (!strncasecmp("video/", mime, 6)) {
662 // MPEG2 tracks do not provide CSD, so read the stream header
663 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
664 off64_t offset;
665 size_t size;
666 if (track->sampleTable->getMetaDataForSample(
667 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
668 if (size > kMaxTrackHeaderSize) {
669 size = kMaxTrackHeaderSize;
670 }
671 uint8_t header[kMaxTrackHeaderSize];
672 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
673 AMediaFormat_setBuffer(track->meta,
674 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
675 }
676 }
677 }
678
679 if (mMoofOffset > 0) {
680 int64_t duration;
681 if (AMediaFormat_getInt64(track->meta,
682 AMEDIAFORMAT_KEY_DURATION, &duration)) {
683 // nothing fancy, just pick a frame near 1/4th of the duration
684 AMediaFormat_setInt64(track->meta,
685 AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
686 }
687 } else {
688 uint32_t sampleIndex;
689 uint64_t sampleTime;
690 if (track->timescale != 0 &&
691 track->sampleTable->findThumbnailSample(&sampleIndex) == OK
692 && track->sampleTable->getMetaDataForSample(
693 sampleIndex, NULL /* offset */, NULL /* size */,
694 &sampleTime) == OK) {
695 AMediaFormat_setInt64(track->meta,
696 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
697 ((int64_t)sampleTime * 1000000) / track->timescale);
698 }
699 }
700 }
701 }
702
703 return AMediaFormat_copy(meta, track->meta);
704 }
705
readMetaData()706 status_t MPEG4Extractor::readMetaData() {
707 if (mInitCheck != NO_INIT) {
708 return mInitCheck;
709 }
710
711 off64_t offset = 0;
712 status_t err;
713 bool sawMoovOrSidx = false;
714
715 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
716 (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
717 (mItemTable != NULL) && mItemTable->isValid()))) {
718 off64_t orig_offset = offset;
719 err = parseChunk(&offset, 0);
720
721 if (err != OK && err != UNKNOWN_ERROR) {
722 break;
723 } else if (offset <= orig_offset) {
724 // only continue parsing if the offset was advanced,
725 // otherwise we might end up in an infinite loop
726 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
727 err = ERROR_MALFORMED;
728 break;
729 } else if (err == UNKNOWN_ERROR) {
730 sawMoovOrSidx = true;
731 }
732 }
733
734 if ((mIsAvif || mIsHeif) && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
735 off64_t exifOffset;
736 size_t exifSize;
737 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
738 AMediaFormat_setInt64(mFileMetaData,
739 AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
740 AMediaFormat_setInt64(mFileMetaData,
741 AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
742 }
743 off64_t xmpOffset;
744 size_t xmpSize;
745 if (mItemTable->getXmpOffsetAndSize(&xmpOffset, &xmpSize) == OK) {
746 // TODO(chz): b/175717339
747 // Use a hard-coded string here instead of named keys. The keys are available
748 // only on API 31+. The mp4 extractor is part of mainline and has min_sdk_version
749 // of 29. This hard-coded string can be replaced with the named constant once
750 // the mp4 extractor is built against API 31+.
751 AMediaFormat_setInt64(mFileMetaData,
752 "xmp-offset" /*AMEDIAFORMAT_KEY_XMP_OFFSET*/, (int64_t)xmpOffset);
753 AMediaFormat_setInt64(mFileMetaData,
754 "xmp-size" /*AMEDIAFORMAT_KEY_XMP_SIZE*/, (int64_t)xmpSize);
755 }
756 for (uint32_t imageIndex = 0;
757 imageIndex < mItemTable->countImages(); imageIndex++) {
758 AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
759 if (meta == NULL) {
760 ALOGE("heif image %u has no meta!", imageIndex);
761 continue;
762 }
763 // Some heif files advertise image sequence brands (eg. 'hevc') in
764 // ftyp box, but don't have any valid tracks in them. Instead of
765 // reporting the entire file as malformed, we override the error
766 // to allow still images to be extracted.
767 if (err != OK) {
768 ALOGW("Extracting still images only");
769 err = OK;
770 }
771 mInitCheck = OK;
772
773 ALOGV("adding %s image track %u", mIsHeif ? "HEIF" : "AVIF", imageIndex);
774 Track *track = new Track;
775 if (mLastTrack != NULL) {
776 mLastTrack->next = track;
777 } else {
778 mFirstTrack = track;
779 }
780 mLastTrack = track;
781
782 track->meta = meta;
783 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
784 track->timescale = 1000000;
785 }
786 }
787
788 if (mInitCheck == OK) {
789 if (findTrackByMimePrefix("video/") != NULL) {
790 AMediaFormat_setString(mFileMetaData,
791 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
792 } else if (findTrackByMimePrefix("audio/") != NULL) {
793 AMediaFormat_setString(mFileMetaData,
794 AMEDIAFORMAT_KEY_MIME, "audio/mp4");
795 } else if (findTrackByMimePrefix(
796 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
797 AMediaFormat_setString(mFileMetaData,
798 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
799 } else if (findTrackByMimePrefix(
800 MEDIA_MIMETYPE_IMAGE_AVIF) != NULL) {
801 AMediaFormat_setString(mFileMetaData,
802 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_IMAGE_AVIF);
803 } else {
804 AMediaFormat_setString(mFileMetaData,
805 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
806 }
807 } else {
808 mInitCheck = err;
809 }
810
811 CHECK_NE(err, (status_t)NO_INIT);
812
813 // copy pssh data into file metadata
814 uint64_t psshsize = 0;
815 for (size_t i = 0; i < mPssh.size(); i++) {
816 psshsize += 20 + mPssh[i].datalen;
817 }
818 if (psshsize > 0 && psshsize <= UINT32_MAX) {
819 char *buf = (char*)malloc(psshsize);
820 if (!buf) {
821 ALOGE("b/28471206");
822 return NO_MEMORY;
823 }
824 char *ptr = buf;
825 for (size_t i = 0; i < mPssh.size(); i++) {
826 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
827 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
828 ptr += (20 + mPssh[i].datalen);
829 }
830 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
831 free(buf);
832 }
833
834 return mInitCheck;
835 }
836
837 struct PathAdder {
PathAdderandroid::PathAdder838 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
839 : mPath(path) {
840 mPath->push(chunkType);
841 }
842
~PathAdderandroid::PathAdder843 ~PathAdder() {
844 mPath->pop();
845 }
846
847 private:
848 Vector<uint32_t> *mPath;
849
850 PathAdder(const PathAdder &);
851 PathAdder &operator=(const PathAdder &);
852 };
853
underMetaDataPath(const Vector<uint32_t> & path)854 static bool underMetaDataPath(const Vector<uint32_t> &path) {
855 return path.size() >= 5
856 && path[0] == FOURCC("moov")
857 && path[1] == FOURCC("udta")
858 && path[2] == FOURCC("meta")
859 && path[3] == FOURCC("ilst");
860 }
861
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)862 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
863 return path.size() >= 2
864 && path[0] == FOURCC("moov")
865 && path[1] == FOURCC("meta")
866 && (depth == 2
867 || (depth == 3
868 && (path[2] == FOURCC("hdlr")
869 || path[2] == FOURCC("ilst")
870 || path[2] == FOURCC("keys"))));
871 }
872
873 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)874 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
875 // delta between mpeg4 time and unix epoch time
876 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
877 if (time_1904 < INT64_MIN + delta) {
878 return false;
879 }
880 time_t time_1970 = time_1904 - delta;
881
882 char tmp[32];
883 struct tm* tm = gmtime(&time_1970);
884 if (tm != NULL &&
885 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
886 *s = tmp;
887 return true;
888 }
889 return false;
890 }
891
parseChunk(off64_t * offset,int depth)892 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
893 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
894
895 if (*offset < 0) {
896 ALOGE("b/23540914");
897 return ERROR_MALFORMED;
898 }
899 if (depth > 100) {
900 ALOGE("b/27456299");
901 return ERROR_MALFORMED;
902 }
903 uint32_t hdr[2];
904 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
905 return ERROR_IO;
906 }
907 uint64_t chunk_size = ntohl(hdr[0]);
908 int32_t chunk_type = ntohl(hdr[1]);
909 off64_t data_offset = *offset + 8;
910
911 if (chunk_size == 1) {
912 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
913 return ERROR_IO;
914 }
915 chunk_size = ntoh64(chunk_size);
916 data_offset += 8;
917
918 if (chunk_size < 16) {
919 // The smallest valid chunk is 16 bytes long in this case.
920 return ERROR_MALFORMED;
921 }
922 } else if (chunk_size == 0) {
923 if (depth == 0) {
924 // atom extends to end of file
925 off64_t sourceSize;
926 if (mDataSource->getSize(&sourceSize) == OK) {
927 chunk_size = (sourceSize - *offset);
928 } else {
929 // XXX could we just pick a "sufficiently large" value here?
930 ALOGE("atom size is 0, and data source has no size");
931 return ERROR_MALFORMED;
932 }
933 } else {
934 // not allowed for non-toplevel atoms, skip it
935 *offset += 4;
936 return OK;
937 }
938 } else if (chunk_size < 8) {
939 // The smallest valid chunk is 8 bytes long.
940 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
941 return ERROR_MALFORMED;
942 }
943
944 char chunk[5];
945 MakeFourCCString(chunk_type, chunk);
946 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
947
948 if (kUseHexDump) {
949 static const char kWhitespace[] = " ";
950 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
951 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
952
953 char buffer[256];
954 size_t n = chunk_size;
955 if (n > sizeof(buffer)) {
956 n = sizeof(buffer);
957 }
958 if (mDataSource->readAt(*offset, buffer, n)
959 < (ssize_t)n) {
960 return ERROR_IO;
961 }
962
963 hexdump(buffer, n);
964 }
965
966 PathAdder autoAdder(&mPath, chunk_type);
967
968 // (data_offset - *offset) is either 8 or 16
969 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
970 if (chunk_data_size < 0) {
971 ALOGE("b/23540914");
972 return ERROR_MALFORMED;
973 }
974 if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
975 char errMsg[100];
976 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
977 ALOGE("%s (b/28615448)", errMsg);
978 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
979 return ERROR_MALFORMED;
980 }
981
982 if (chunk_type != FOURCC("cprt")
983 && chunk_type != FOURCC("covr")
984 && mPath.size() == 5 && underMetaDataPath(mPath)) {
985 off64_t stop_offset = *offset + chunk_size;
986 *offset = data_offset;
987 while (*offset < stop_offset) {
988 status_t err = parseChunk(offset, depth + 1);
989 if (err != OK) {
990 return err;
991 }
992 }
993
994 if (*offset != stop_offset) {
995 return ERROR_MALFORMED;
996 }
997
998 return OK;
999 }
1000
1001 switch(chunk_type) {
1002 case FOURCC("moov"):
1003 case FOURCC("trak"):
1004 case FOURCC("mdia"):
1005 case FOURCC("minf"):
1006 case FOURCC("dinf"):
1007 case FOURCC("stbl"):
1008 case FOURCC("mvex"):
1009 case FOURCC("moof"):
1010 case FOURCC("traf"):
1011 case FOURCC("mfra"):
1012 case FOURCC("udta"):
1013 case FOURCC("ilst"):
1014 case FOURCC("sinf"):
1015 case FOURCC("schi"):
1016 case FOURCC("edts"):
1017 case FOURCC("wave"):
1018 {
1019 if (chunk_type == FOURCC("moov") && depth != 0) {
1020 ALOGE("moov: depth %d", depth);
1021 return ERROR_MALFORMED;
1022 }
1023
1024 if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
1025 ALOGE("duplicate moov");
1026 return ERROR_MALFORMED;
1027 }
1028
1029 if (chunk_type == FOURCC("moof") && !mMoofFound) {
1030 // store the offset of the first segment
1031 mMoofFound = true;
1032 mMoofOffset = *offset;
1033 }
1034
1035 if (chunk_type == FOURCC("stbl")) {
1036 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
1037
1038 if (mDataSource->flags()
1039 & (DataSourceBase::kWantsPrefetching
1040 | DataSourceBase::kIsCachingDataSource)) {
1041 CachedRangedDataSource *cachedSource =
1042 new CachedRangedDataSource(mDataSource);
1043
1044 if (cachedSource->setCachedRange(
1045 *offset, chunk_size,
1046 true /* assume ownership on success */) == OK) {
1047 mDataSource = cachedSource;
1048 } else {
1049 delete cachedSource;
1050 }
1051 }
1052
1053 if (mLastTrack == NULL) {
1054 return ERROR_MALFORMED;
1055 }
1056
1057 mLastTrack->sampleTable = new SampleTable(mDataSource);
1058 }
1059
1060 bool isTrack = false;
1061 if (chunk_type == FOURCC("trak")) {
1062 if (depth != 1) {
1063 ALOGE("trak: depth %d", depth);
1064 return ERROR_MALFORMED;
1065 }
1066 isTrack = true;
1067
1068 ALOGV("adding new track");
1069 Track *track = new Track;
1070 if (mLastTrack) {
1071 mLastTrack->next = track;
1072 } else {
1073 mFirstTrack = track;
1074 }
1075 mLastTrack = track;
1076
1077 track->meta = AMediaFormat_new();
1078 AMediaFormat_setString(track->meta,
1079 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
1080 }
1081
1082 off64_t stop_offset = *offset + chunk_size;
1083 *offset = data_offset;
1084 while (*offset < stop_offset) {
1085
1086 // pass udata terminate
1087 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
1088 // handle the case that udta terminates with terminate code x00000000
1089 // note that 0 terminator is optional and we just handle this case.
1090 uint32_t terminate_code = 1;
1091 mDataSource->readAt(*offset, &terminate_code, 4);
1092 if (0 == terminate_code) {
1093 *offset += 4;
1094 ALOGD("Terminal code for udta");
1095 continue;
1096 } else {
1097 ALOGW("invalid udta Terminal code");
1098 }
1099 }
1100
1101 status_t err = parseChunk(offset, depth + 1);
1102 if (err != OK) {
1103 if (isTrack) {
1104 mLastTrack->skipTrack = true;
1105 break;
1106 }
1107 return err;
1108 }
1109 }
1110
1111 if (*offset != stop_offset) {
1112 return ERROR_MALFORMED;
1113 }
1114
1115 if (isTrack) {
1116 int32_t trackId;
1117 // There must be exactly one track header per track.
1118
1119 if (!AMediaFormat_getInt32(mLastTrack->meta,
1120 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1121 mLastTrack->skipTrack = true;
1122 }
1123
1124 status_t err = verifyTrack(mLastTrack);
1125 if (err != OK) {
1126 mLastTrack->skipTrack = true;
1127 }
1128
1129
1130 if (mLastTrack->skipTrack) {
1131 ALOGV("skipping this track...");
1132 Track *cur = mFirstTrack;
1133
1134 if (cur == mLastTrack) {
1135 delete cur;
1136 mFirstTrack = mLastTrack = NULL;
1137 } else {
1138 while (cur && cur->next != mLastTrack) {
1139 cur = cur->next;
1140 }
1141 if (cur) {
1142 cur->next = NULL;
1143 }
1144 delete mLastTrack;
1145 mLastTrack = cur;
1146 }
1147
1148 return OK;
1149 }
1150
1151 // place things we built elsewhere into their final locations
1152
1153 // put aggregated tx3g data into the metadata
1154 if (mLastTrack->mTx3gFilled > 0) {
1155 ALOGV("Putting %zu bytes of tx3g data into meta data",
1156 mLastTrack->mTx3gFilled);
1157 AMediaFormat_setBuffer(mLastTrack->meta,
1158 AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1159 mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1160 // drop it now to reduce our footprint
1161 free(mLastTrack->mTx3gBuffer);
1162 mLastTrack->mTx3gBuffer = NULL;
1163 mLastTrack->mTx3gFilled = 0;
1164 mLastTrack->mTx3gSize = 0;
1165 }
1166
1167 const char *mime;
1168 AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime);
1169
1170 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
1171 void *data;
1172 size_t size;
1173
1174 if (AMediaFormat_getBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
1175 &data, &size)
1176 && size >= 5) {
1177 const uint8_t *ptr = (const uint8_t *)data;
1178 const uint8_t profile = ptr[2] >> 1;
1179 const uint8_t blCompatibilityId = (ptr[4]) >> 4;
1180 bool create_two_tracks = false;
1181
1182 if (blCompatibilityId && blCompatibilityId != 15) {
1183 create_two_tracks = true;
1184 }
1185
1186 if (4 == profile || 7 == profile ||
1187 (profile >= 8 && profile < 11 && create_two_tracks)) {
1188 // we need a backward compatible track
1189 ALOGV("Adding new backward compatible track");
1190 Track *track_b = new Track;
1191
1192 track_b->timescale = mLastTrack->timescale;
1193 track_b->sampleTable = mLastTrack->sampleTable;
1194 track_b->includes_expensive_metadata =
1195 mLastTrack->includes_expensive_metadata;
1196 track_b->skipTrack = mLastTrack->skipTrack;
1197 track_b->elst_needs_processing = mLastTrack->elst_needs_processing;
1198 track_b->elst_media_time = mLastTrack->elst_media_time;
1199 track_b->elst_segment_duration = mLastTrack->elst_segment_duration;
1200 track_b->elst_shift_start_ticks = mLastTrack->elst_shift_start_ticks;
1201 track_b->elst_initial_empty_edit_ticks =
1202 mLastTrack->elst_initial_empty_edit_ticks;
1203 track_b->subsample_encryption = mLastTrack->subsample_encryption;
1204
1205 track_b->mTx3gBuffer = mLastTrack->mTx3gBuffer;
1206 track_b->mTx3gSize = mLastTrack->mTx3gSize;
1207 track_b->mTx3gFilled = mLastTrack->mTx3gFilled;
1208
1209 track_b->meta = AMediaFormat_new();
1210 AMediaFormat_copy(track_b->meta, mLastTrack->meta);
1211
1212 mLastTrack->next = track_b;
1213 track_b->next = NULL;
1214
1215 // we want to remove the csd-2 key from the metadata, but
1216 // don't have an AMediaFormat_* function to do so. Settle
1217 // for replacing this csd-2 with an empty csd-2.
1218 uint8_t emptybuffer[8] = {};
1219 AMediaFormat_setBuffer(track_b->meta, AMEDIAFORMAT_KEY_CSD_2,
1220 emptybuffer, 0);
1221
1222 if (4 == profile || 7 == profile || 8 == profile ) {
1223 AMediaFormat_setString(track_b->meta,
1224 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_HEVC);
1225 } else if (9 == profile) {
1226 AMediaFormat_setString(track_b->meta,
1227 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
1228 } else if (10 == profile) {
1229 AMediaFormat_setString(track_b->meta,
1230 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AV1);
1231 } // Should never get to else part
1232
1233 mLastTrack = track_b;
1234 }
1235 }
1236 }
1237 } else if (chunk_type == FOURCC("moov")) {
1238 mInitCheck = OK;
1239
1240 return UNKNOWN_ERROR; // Return a generic error.
1241 }
1242 break;
1243 }
1244
1245 case FOURCC("schm"):
1246 {
1247
1248 *offset += chunk_size;
1249 if (!mLastTrack) {
1250 return ERROR_MALFORMED;
1251 }
1252
1253 uint32_t scheme_type;
1254 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1255 return ERROR_IO;
1256 }
1257 scheme_type = ntohl(scheme_type);
1258 int32_t mode = kCryptoModeUnencrypted;
1259 switch(scheme_type) {
1260 case FOURCC("cbc1"):
1261 {
1262 mode = kCryptoModeAesCbc;
1263 break;
1264 }
1265 case FOURCC("cbcs"):
1266 {
1267 mode = kCryptoModeAesCbc;
1268 mLastTrack->subsample_encryption = true;
1269 break;
1270 }
1271 case FOURCC("cenc"):
1272 {
1273 mode = kCryptoModeAesCtr;
1274 break;
1275 }
1276 case FOURCC("cens"):
1277 {
1278 mode = kCryptoModeAesCtr;
1279 mLastTrack->subsample_encryption = true;
1280 break;
1281 }
1282 }
1283 if (mode != kCryptoModeUnencrypted) {
1284 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1285 }
1286 break;
1287 }
1288
1289
1290 case FOURCC("elst"):
1291 {
1292 *offset += chunk_size;
1293
1294 if (!mLastTrack) {
1295 return ERROR_MALFORMED;
1296 }
1297
1298 // See 14496-12 8.6.6
1299 uint8_t version;
1300 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1301 return ERROR_IO;
1302 }
1303
1304 uint32_t entry_count;
1305 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1306 return ERROR_IO;
1307 }
1308
1309 if (entry_count > 2) {
1310 /* We support a single entry for gapless playback or negating offset for
1311 * reordering B frames, two entries (empty edit) for start offset at the moment.
1312 */
1313 ALOGW("ignoring edit list with %d entries", entry_count);
1314 } else {
1315 off64_t entriesoffset = data_offset + 8;
1316 uint64_t segment_duration;
1317 int64_t media_time;
1318 bool empty_edit_present = false;
1319 for (int i = 0; i < entry_count; ++i) {
1320 switch (version) {
1321 case 0: {
1322 uint32_t sd;
1323 int32_t mt;
1324 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1325 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1326 return ERROR_IO;
1327 }
1328 segment_duration = sd;
1329 media_time = mt;
1330 // 4(segment duration) + 4(media time) + 4(media rate)
1331 entriesoffset += 12;
1332 break;
1333 }
1334 case 1: {
1335 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1336 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1337 return ERROR_IO;
1338 }
1339 // 8(segment duration) + 8(media time) + 4(media rate)
1340 entriesoffset += 20;
1341 break;
1342 }
1343 default:
1344 return ERROR_IO;
1345 break;
1346 }
1347 // Empty edit entry would have to be first entry.
1348 if (media_time == -1 && i == 0) {
1349 empty_edit_present = true;
1350 ALOGV("initial empty edit ticks: %" PRIu64, segment_duration);
1351 /* In movie header timescale, and needs to be converted to media timescale
1352 * after we get that from a track's 'mdhd' atom,
1353 * which at times come after 'elst'.
1354 */
1355 mLastTrack->elst_initial_empty_edit_ticks = segment_duration;
1356 } else if (media_time >= 0 && i == 0) {
1357 ALOGV("first edit list entry - from gapless playback files");
1358 mLastTrack->elst_media_time = media_time;
1359 mLastTrack->elst_segment_duration = segment_duration;
1360 ALOGV("segment_duration: %" PRIu64 " media_time: %" PRId64,
1361 segment_duration, media_time);
1362 // media_time is in media timescale as are STTS/CTTS entries.
1363 mLastTrack->elst_shift_start_ticks = media_time;
1364 } else if (empty_edit_present && i == 1) {
1365 // Process second entry only when the first entry was an empty edit entry.
1366 ALOGV("second edit list entry");
1367 mLastTrack->elst_shift_start_ticks = media_time;
1368 } else {
1369 ALOGW("for now, unsupported entry in edit list %" PRIu32, entry_count);
1370 }
1371 }
1372 // save these for later, because the elst atom might precede
1373 // the atoms that actually gives us the duration and sample rate
1374 // needed to calculate the padding and delay values
1375 mLastTrack->elst_needs_processing = true;
1376 }
1377 break;
1378 }
1379
1380 case FOURCC("frma"):
1381 {
1382 *offset += chunk_size;
1383
1384 uint32_t original_fourcc;
1385 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1386 return ERROR_IO;
1387 }
1388 original_fourcc = ntohl(original_fourcc);
1389 ALOGV("read original format: %d", original_fourcc);
1390
1391 if (mLastTrack == NULL) {
1392 return ERROR_MALFORMED;
1393 }
1394
1395 AMediaFormat_setString(mLastTrack->meta,
1396 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1397 uint32_t num_channels = 0;
1398 uint32_t sample_rate = 0;
1399 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1400 AMediaFormat_setInt32(mLastTrack->meta,
1401 AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1402 AMediaFormat_setInt32(mLastTrack->meta,
1403 AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1404 }
1405
1406 if (!mIsQT && original_fourcc == FOURCC("alac")) {
1407 off64_t tmpOffset = *offset;
1408 status_t err = parseALACSampleEntry(&tmpOffset);
1409 if (err != OK) {
1410 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1411 return err;
1412 }
1413 *offset = tmpOffset + 8;
1414 }
1415
1416 break;
1417 }
1418
1419 case FOURCC("tenc"):
1420 {
1421 *offset += chunk_size;
1422
1423 if (chunk_size < 32) {
1424 return ERROR_MALFORMED;
1425 }
1426
1427 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1428 // default IV size, 16 bytes default KeyID
1429 // (ISO 23001-7)
1430
1431 uint8_t version;
1432 if (mDataSource->readAt(data_offset, &version, sizeof(version))
1433 < (ssize_t)sizeof(version)) {
1434 return ERROR_IO;
1435 }
1436
1437 uint8_t buf[4];
1438 memset(buf, 0, 4);
1439 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1440 return ERROR_IO;
1441 }
1442
1443 if (mLastTrack == NULL) {
1444 return ERROR_MALFORMED;
1445 }
1446
1447 uint8_t defaultEncryptedByteBlock = 0;
1448 uint8_t defaultSkipByteBlock = 0;
1449 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1450 if (version == 1) {
1451 uint32_t pattern = buf[2];
1452 defaultEncryptedByteBlock = pattern >> 4;
1453 defaultSkipByteBlock = pattern & 0xf;
1454 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1455 // use (1,0) to mean "encrypt everything"
1456 defaultEncryptedByteBlock = 1;
1457 }
1458 } else if (mLastTrack->subsample_encryption) {
1459 ALOGW("subsample_encryption should be version 1");
1460 } else if (defaultAlgorithmId > 1) {
1461 // only 0 (clear) and 1 (AES-128) are valid
1462 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1463 defaultAlgorithmId = 1;
1464 }
1465
1466 memset(buf, 0, 4);
1467 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1468 return ERROR_IO;
1469 }
1470 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1471
1472 if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1473 // only unencrypted data must have 0 IV size
1474 return ERROR_MALFORMED;
1475 } else if (defaultIVSize != 0 &&
1476 defaultIVSize != 8 &&
1477 defaultIVSize != 16) {
1478 return ERROR_MALFORMED;
1479 }
1480
1481 uint8_t defaultKeyId[16];
1482
1483 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1484 return ERROR_IO;
1485 }
1486
1487 sp<ABuffer> defaultConstantIv;
1488 if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1489
1490 uint8_t ivlength;
1491 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1492 < (ssize_t)sizeof(ivlength)) {
1493 return ERROR_IO;
1494 }
1495
1496 if (ivlength != 8 && ivlength != 16) {
1497 ALOGW("unsupported IV length: %u", ivlength);
1498 return ERROR_MALFORMED;
1499 }
1500
1501 defaultConstantIv = new ABuffer(ivlength);
1502 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1503 < (ssize_t)ivlength) {
1504 return ERROR_IO;
1505 }
1506
1507 defaultConstantIv->setRange(0, ivlength);
1508 }
1509
1510 int32_t tmpAlgorithmId;
1511 if (!AMediaFormat_getInt32(mLastTrack->meta,
1512 AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1513 AMediaFormat_setInt32(mLastTrack->meta,
1514 AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1515 }
1516
1517 AMediaFormat_setInt32(mLastTrack->meta,
1518 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1519 AMediaFormat_setBuffer(mLastTrack->meta,
1520 AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1521 AMediaFormat_setInt32(mLastTrack->meta,
1522 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1523 AMediaFormat_setInt32(mLastTrack->meta,
1524 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1525 if (defaultConstantIv != NULL) {
1526 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1527 defaultConstantIv->data(), defaultConstantIv->size());
1528 }
1529 break;
1530 }
1531
1532 case FOURCC("tkhd"):
1533 {
1534 *offset += chunk_size;
1535
1536 status_t err;
1537 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1538 return err;
1539 }
1540
1541 break;
1542 }
1543
1544 case FOURCC("tref"):
1545 {
1546 off64_t stop_offset = *offset + chunk_size;
1547 *offset = data_offset;
1548 while (*offset < stop_offset) {
1549 status_t err = parseChunk(offset, depth + 1);
1550 if (err != OK) {
1551 return err;
1552 }
1553 }
1554 if (*offset != stop_offset) {
1555 return ERROR_MALFORMED;
1556 }
1557 break;
1558 }
1559
1560 case FOURCC("thmb"):
1561 {
1562 *offset += chunk_size;
1563
1564 if (mLastTrack != NULL) {
1565 // Skip thumbnail track for now since we don't have an
1566 // API to retrieve it yet.
1567 // The thumbnail track can't be accessed by negative index or time,
1568 // because each timed sample has its own corresponding thumbnail
1569 // in the thumbnail track. We'll need a dedicated API to retrieve
1570 // thumbnail at time instead.
1571 mLastTrack->skipTrack = true;
1572 }
1573
1574 break;
1575 }
1576
1577 case FOURCC("pssh"):
1578 {
1579 *offset += chunk_size;
1580
1581 PsshInfo pssh;
1582
1583 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1584 return ERROR_IO;
1585 }
1586
1587 uint32_t psshdatalen = 0;
1588 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1589 return ERROR_IO;
1590 }
1591 pssh.datalen = ntohl(psshdatalen);
1592 ALOGV("pssh data size: %d", pssh.datalen);
1593 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1594 // pssh data length exceeds size of containing box
1595 return ERROR_MALFORMED;
1596 }
1597
1598 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1599 if (pssh.data == NULL) {
1600 return ERROR_MALFORMED;
1601 }
1602 ALOGV("allocated pssh @ %p", pssh.data);
1603 ssize_t requested = (ssize_t) pssh.datalen;
1604 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1605 delete[] pssh.data;
1606 return ERROR_IO;
1607 }
1608 mPssh.push_back(pssh);
1609
1610 break;
1611 }
1612
1613 case FOURCC("mdhd"):
1614 {
1615 *offset += chunk_size;
1616
1617 if (chunk_data_size < 4 || mLastTrack == NULL) {
1618 return ERROR_MALFORMED;
1619 }
1620
1621 uint8_t version;
1622 if (mDataSource->readAt(
1623 data_offset, &version, sizeof(version))
1624 < (ssize_t)sizeof(version)) {
1625 return ERROR_IO;
1626 }
1627
1628 off64_t timescale_offset;
1629
1630 if (version == 1) {
1631 timescale_offset = data_offset + 4 + 16;
1632 } else if (version == 0) {
1633 timescale_offset = data_offset + 4 + 8;
1634 } else {
1635 return ERROR_IO;
1636 }
1637
1638 uint32_t timescale;
1639 if (mDataSource->readAt(
1640 timescale_offset, ×cale, sizeof(timescale))
1641 < (ssize_t)sizeof(timescale)) {
1642 return ERROR_IO;
1643 }
1644
1645 if (!timescale) {
1646 ALOGE("timescale should not be ZERO.");
1647 return ERROR_MALFORMED;
1648 }
1649
1650 mLastTrack->timescale = ntohl(timescale);
1651
1652 // 14496-12 says all ones means indeterminate, but some files seem to use
1653 // 0 instead. We treat both the same.
1654 int64_t duration = 0;
1655 if (version == 1) {
1656 if (mDataSource->readAt(
1657 timescale_offset + 4, &duration, sizeof(duration))
1658 < (ssize_t)sizeof(duration)) {
1659 return ERROR_IO;
1660 }
1661 if (duration != -1) {
1662 duration = ntoh64(duration);
1663 }
1664 } else {
1665 uint32_t duration32;
1666 if (mDataSource->readAt(
1667 timescale_offset + 4, &duration32, sizeof(duration32))
1668 < (ssize_t)sizeof(duration32)) {
1669 return ERROR_IO;
1670 }
1671 if (duration32 != 0xffffffff) {
1672 duration = ntohl(duration32);
1673 }
1674 }
1675 if (duration != 0 && mLastTrack->timescale != 0) {
1676 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1677 if (durationUs < 0 || durationUs > INT64_MAX) {
1678 ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1679 (long long) duration, (long long) mLastTrack->timescale);
1680 return ERROR_MALFORMED;
1681 }
1682 // Store this track's mdhd duration to calculate the padding.
1683 mLastTrack->mMdhdDurationUs = (int64_t)durationUs;
1684 } else {
1685 mLastTrack->mMdhdDurationUs = 0;
1686 }
1687
1688 uint8_t lang[2];
1689 off64_t lang_offset;
1690 if (version == 1) {
1691 lang_offset = timescale_offset + 4 + 8;
1692 } else if (version == 0) {
1693 lang_offset = timescale_offset + 4 + 4;
1694 } else {
1695 return ERROR_IO;
1696 }
1697
1698 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1699 < (ssize_t)sizeof(lang)) {
1700 return ERROR_IO;
1701 }
1702
1703 // To get the ISO-639-2/T three character language code
1704 // 1 bit pad followed by 3 5-bits characters. Each character
1705 // is packed as the difference between its ASCII value and 0x60.
1706 char lang_code[4];
1707 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1708 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1709 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1710 lang_code[3] = '\0';
1711
1712 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1713
1714 break;
1715 }
1716
1717 case FOURCC("stsd"):
1718 {
1719 uint8_t buffer[8];
1720 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1721 return ERROR_MALFORMED;
1722 }
1723
1724 if (mDataSource->readAt(
1725 data_offset, buffer, 8) < 8) {
1726 return ERROR_IO;
1727 }
1728
1729 if (U32_AT(buffer) != 0) {
1730 // Should be version 0, flags 0.
1731 return ERROR_MALFORMED;
1732 }
1733
1734 uint32_t entry_count = U32_AT(&buffer[4]);
1735
1736 if (entry_count > 1) {
1737 // For 3GPP timed text, there could be multiple tx3g boxes contain
1738 // multiple text display formats. These formats will be used to
1739 // display the timed text.
1740 // For encrypted files, there may also be more than one entry.
1741 const char *mime;
1742
1743 if (mLastTrack == NULL)
1744 return ERROR_MALFORMED;
1745
1746 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1747 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1748 strcasecmp(mime, "application/octet-stream")) {
1749 // For now we only support a single type of media per track.
1750 mLastTrack->skipTrack = true;
1751 *offset += chunk_size;
1752 break;
1753 }
1754 }
1755 off64_t stop_offset = *offset + chunk_size;
1756 *offset = data_offset + 8;
1757 for (uint32_t i = 0; i < entry_count; ++i) {
1758 status_t err = parseChunk(offset, depth + 1);
1759 if (err != OK) {
1760 return err;
1761 }
1762 }
1763
1764 if (*offset != stop_offset) {
1765 return ERROR_MALFORMED;
1766 }
1767 break;
1768 }
1769 case FOURCC("mett"):
1770 {
1771 *offset += chunk_size;
1772
1773 // the absolute minimum size of a compliant mett box is 11 bytes:
1774 // 6 byte reserved, 2 byte index, null byte, one char mime_format, null byte
1775 // The resulting mime_format would be invalid at that size though.
1776 if (mLastTrack == NULL || chunk_data_size < 11) {
1777 return ERROR_MALFORMED;
1778 }
1779
1780 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1781 if (buffer.get() == NULL) {
1782 return NO_MEMORY;
1783 }
1784
1785 if (mDataSource->readAt(
1786 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1787 return ERROR_IO;
1788 }
1789
1790 // ISO-14496-12:
1791 // int8 reserved[6]; // should be all zeroes
1792 // int16_t data_reference_index;
1793 // char content_encoding[]; // null terminated, optional (= just the null byte)
1794 // char mime_format[]; // null terminated, mandatory
1795 // optional other boxes
1796 //
1797 // API < 29:
1798 // char mime_format[]; // null terminated
1799 //
1800 // API >= 29
1801 // char mime_format[]; // null terminated
1802 // char mime_format[]; // null terminated
1803
1804 // Prior to API 29, the metadata track was not compliant with ISO/IEC
1805 // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1806 // metatrack. As of API 29 and onwards, a change was made to metadata track to
1807 // make it somewhat compatible with the standard. The workaround is to write the
1808 // null-terminated mime_format string twice. This allows compliant parsers to
1809 // read the missing reserved, data_reference_index, and content_encoding fields
1810 // from the first mime_type string. The actual mime_format field would then be
1811 // read correctly from the second string. The non-compliant Android frameworks
1812 // from API 28 and earlier would still be able to read the mime_format correctly
1813 // as it would only read the first null-terminated mime_format string. To enable
1814 // reading metadata tracks generated from both the non-compliant and compliant
1815 // formats, a check needs to be done to see which format is used.
1816 const char *str = (const char*) buffer.get();
1817 size_t string_length = strnlen(str, chunk_data_size);
1818
1819 if (string_length == chunk_data_size - 1) {
1820 // This is likely a pre API 29 file, since it's a single null terminated
1821 // string filling the entire box.
1822 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, str);
1823 } else {
1824 // This might be a fully compliant metadata track, a "double mime" compatibility
1825 // track, or anything else, including a single non-terminated string, so we need
1826 // to determine the length of each string we want to parse out of the box.
1827 size_t encoding_length = strnlen(str + 8, chunk_data_size - 8);
1828 if (encoding_length + 8 >= chunk_data_size - 2) {
1829 // the encoding extends to the end of the box, so there's no mime_format
1830 return ERROR_MALFORMED;
1831 }
1832 String8 contentEncoding(str + 8, encoding_length);
1833 String8 mimeFormat(str + 8 + encoding_length + 1,
1834 chunk_data_size - 8 - encoding_length - 1);
1835 AMediaFormat_setString(mLastTrack->meta,
1836 AMEDIAFORMAT_KEY_MIME, mimeFormat.c_str());
1837 }
1838 break;
1839 }
1840
1841 case FOURCC("mp4a"):
1842 case FOURCC("enca"):
1843 case FOURCC("samr"):
1844 case FOURCC("sawb"):
1845 case FOURCC("Opus"):
1846 case FOURCC("twos"):
1847 case FOURCC("sowt"):
1848 case FOURCC("alac"):
1849 case FOURCC("fLaC"):
1850 case FOURCC(".mp3"):
1851 case 0x6D730055: // "ms U" mp3 audio
1852 case FOURCC("mha1"):
1853 case FOURCC("mhm1"):
1854 case FOURCC("dtsc"):
1855 case FOURCC("dtse"):
1856 case FOURCC("dtsh"):
1857 case FOURCC("dtsl"):
1858 case FOURCC("dtsx"):
1859 {
1860 if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1861
1862 if (chunk_type == FOURCC("alac")) {
1863 off64_t offsetTmp = *offset;
1864 status_t err = parseALACSampleEntry(&offsetTmp);
1865 if (err != OK) {
1866 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1867 return err;
1868 }
1869 }
1870
1871 // Ignore all atoms embedded in QT wave atom
1872 ALOGV("Ignore all atoms embedded in QT wave atom");
1873 *offset += chunk_size;
1874 break;
1875 }
1876
1877 uint8_t buffer[8 + 20];
1878 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1879 // Basic AudioSampleEntry size.
1880 return ERROR_MALFORMED;
1881 }
1882
1883 if (mDataSource->readAt(
1884 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1885 return ERROR_IO;
1886 }
1887
1888 // we can get data_ref_index value from U16_AT(&buffer[6])
1889 uint16_t version = U16_AT(&buffer[8]);
1890 uint32_t num_channels = U16_AT(&buffer[16]);
1891
1892 uint16_t sample_size = U16_AT(&buffer[18]);
1893 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1894
1895 if (mLastTrack == NULL)
1896 return ERROR_MALFORMED;
1897
1898 off64_t stop_offset = *offset + chunk_size;
1899 *offset = data_offset + sizeof(buffer);
1900
1901 if (mIsQT) {
1902 if (version == 1) {
1903 if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1904 return ERROR_IO;
1905 }
1906
1907 #if 0
1908 U32_AT(buffer); // samples per packet
1909 U32_AT(&buffer[4]); // bytes per packet
1910 U32_AT(&buffer[8]); // bytes per frame
1911 U32_AT(&buffer[12]); // bytes per sample
1912 #endif
1913 *offset += 16;
1914 } else if (version == 2) {
1915 uint8_t v2buffer[36];
1916 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1917 return ERROR_IO;
1918 }
1919
1920 #if 0
1921 U32_AT(v2buffer); // size of struct only
1922 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
1923 num_channels = U32_AT(&v2buffer[12]); // num audio channels
1924 U32_AT(&v2buffer[16]); // always 0x7f000000
1925 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
1926 U32_AT(&v2buffer[24]); // format specifc flags
1927 U32_AT(&v2buffer[28]); // const bytes per audio packet
1928 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
1929 #endif
1930 *offset += 36;
1931 }
1932 }
1933
1934 if (chunk_type != FOURCC("enca")) {
1935 // if the chunk type is enca, we'll get the type from the frma box later
1936 AMediaFormat_setString(mLastTrack->meta,
1937 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1938 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1939
1940 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1941 AMediaFormat_setInt32(mLastTrack->meta,
1942 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1943 if (chunk_type == FOURCC("twos")) {
1944 AMediaFormat_setInt32(mLastTrack->meta,
1945 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1946 }
1947 }
1948 }
1949 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1950 chunk, num_channels, sample_size, sample_rate);
1951 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1952 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1953
1954 if (chunk_type == FOURCC("Opus")) {
1955 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1956 data_offset += sizeof(buffer);
1957 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1958
1959 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1960 opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1961 return ERROR_MALFORMED;
1962 }
1963 // Read Opus Header
1964 if (mDataSource->readAt(
1965 data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1966 return ERROR_IO;
1967 }
1968
1969 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1970 // http://wiki.xiph.org/OggOpus#ID_Header
1971 strncpy((char *)opusInfo, "OpusHead", 8);
1972
1973 // Version shall be 0 as per mp4 Opus Specific Box
1974 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1975 if (opusInfo[8]) {
1976 return ERROR_MALFORMED;
1977 }
1978 // Force version to 1 as per OpusHead definition
1979 // (http://wiki.xiph.org/OggOpus#ID_Header)
1980 opusInfo[8] = 1;
1981
1982 // Read Opus Specific Box values
1983 size_t opusOffset = 10;
1984 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1985 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1986 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1987
1988 // Convert Opus Specific Box values. ParseOpusHeader expects
1989 // the values in LE, however MP4 stores these values as BE
1990 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1991 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1992 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1993 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1994
1995 static const int64_t kSeekPreRollNs = 80000000; // Fixed 80 msec
1996 static const int32_t kOpusSampleRate = 48000;
1997 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1998
1999 AMediaFormat_setBuffer(mLastTrack->meta,
2000 AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
2001 AMediaFormat_setBuffer(mLastTrack->meta,
2002 AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
2003 AMediaFormat_setBuffer(mLastTrack->meta,
2004 AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
2005
2006 data_offset += opusInfoSize;
2007 *offset = data_offset;
2008 CHECK_EQ(*offset, stop_offset);
2009 }
2010
2011 if (!mIsQT && chunk_type == FOURCC("alac")) {
2012 data_offset += sizeof(buffer);
2013
2014 status_t err = parseALACSampleEntry(&data_offset);
2015 if (err != OK) {
2016 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
2017 return err;
2018 }
2019 *offset = data_offset;
2020 CHECK_EQ(*offset, stop_offset);
2021 }
2022
2023 if (chunk_type == FOURCC("fLaC")) {
2024 data_offset += sizeof(buffer);
2025 *offset = data_offset;
2026 }
2027
2028 while (*offset < stop_offset) {
2029 status_t err = parseChunk(offset, depth + 1);
2030 if (err != OK) {
2031 return err;
2032 }
2033 }
2034
2035 if (*offset != stop_offset) {
2036 return ERROR_MALFORMED;
2037 }
2038 break;
2039 }
2040 case FOURCC("mhaC"):
2041 {
2042 // See ISO_IEC_23008-3;2019 MHADecoderConfigurationRecord
2043 constexpr uint32_t mhac_header_size = 4 /* size */ + 4 /* boxtype 'mhaC' */
2044 + 1 /* configurationVersion */ + 1 /* mpegh3daProfileLevelIndication */
2045 + 1 /* referenceChannelLayout */ + 2 /* mpegh3daConfigLength */;
2046 uint8_t mhac_header[mhac_header_size];
2047 off64_t data_offset = *offset;
2048
2049 if (mLastTrack == NULL || chunk_size < sizeof(mhac_header)) {
2050 return ERROR_MALFORMED;
2051 }
2052
2053 if (mDataSource->readAt(data_offset, mhac_header, sizeof(mhac_header))
2054 < (ssize_t)sizeof(mhac_header)) {
2055 return ERROR_IO;
2056 }
2057
2058 //get mpegh3daProfileLevelIndication
2059 const uint32_t mpegh3daProfileLevelIndication = mhac_header[9];
2060 AMediaFormat_setInt32(mLastTrack->meta,
2061 AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
2062 mpegh3daProfileLevelIndication);
2063
2064 //get referenceChannelLayout
2065 const uint32_t referenceChannelLayout = mhac_header[10];
2066 AMediaFormat_setInt32(mLastTrack->meta,
2067 AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
2068 referenceChannelLayout);
2069
2070 // get mpegh3daConfigLength
2071 const uint32_t mhac_config_size = U16_AT(&mhac_header[11]);
2072 if (chunk_size != sizeof(mhac_header) + mhac_config_size) {
2073 return ERROR_MALFORMED;
2074 }
2075
2076 data_offset += sizeof(mhac_header);
2077 uint8_t mhac_config[mhac_config_size];
2078 if (mDataSource->readAt(data_offset, mhac_config, sizeof(mhac_config))
2079 < (ssize_t)sizeof(mhac_config)) {
2080 return ERROR_IO;
2081 }
2082
2083 AMediaFormat_setBuffer(mLastTrack->meta,
2084 AMEDIAFORMAT_KEY_CSD_0, mhac_config, sizeof(mhac_config));
2085 data_offset += sizeof(mhac_config);
2086 *offset = data_offset;
2087 break;
2088 }
2089 case FOURCC("mhaP"):
2090 {
2091 // FDAmd_2 of ISO_IEC_23008-3;2019 MHAProfileAndLevelCompatibilitySetBox
2092 constexpr uint32_t mhap_header_size = 4 /* size */ + 4 /* boxtype 'mhaP' */
2093 + 1 /* numCompatibleSets */;
2094
2095 uint8_t mhap_header[mhap_header_size];
2096 off64_t data_offset = *offset;
2097
2098 if (chunk_size < (ssize_t)mhap_header_size) {
2099 return ERROR_MALFORMED;
2100 }
2101
2102 if (mDataSource->readAt(data_offset, mhap_header, sizeof(mhap_header))
2103 < (ssize_t)sizeof(mhap_header)) {
2104 return ERROR_IO;
2105 }
2106
2107 // mhap_compatible_sets_size = numCompatibleSets * sizeof(uint8_t)
2108 const uint32_t mhap_compatible_sets_size = mhap_header[8];
2109 if (chunk_size != sizeof(mhap_header) + mhap_compatible_sets_size) {
2110 return ERROR_MALFORMED;
2111 }
2112
2113 data_offset += sizeof(mhap_header);
2114 uint8_t mhap_compatible_sets[mhap_compatible_sets_size];
2115 if (mDataSource->readAt(
2116 data_offset, mhap_compatible_sets, sizeof(mhap_compatible_sets))
2117 < (ssize_t)sizeof(mhap_compatible_sets)) {
2118 return ERROR_IO;
2119 }
2120
2121 AMediaFormat_setBuffer(mLastTrack->meta,
2122 AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
2123 mhap_compatible_sets, sizeof(mhap_compatible_sets));
2124 data_offset += sizeof(mhap_compatible_sets);
2125 *offset = data_offset;
2126 break;
2127 }
2128 case FOURCC("mp4v"):
2129 case FOURCC("encv"):
2130 case FOURCC("s263"):
2131 case FOURCC("H263"):
2132 case FOURCC("h263"):
2133 case FOURCC("avc1"):
2134 case FOURCC("hvc1"):
2135 case FOURCC("hev1"):
2136 case FOURCC("dvav"):
2137 case FOURCC("dva1"):
2138 case FOURCC("dvhe"):
2139 case FOURCC("dvh1"):
2140 case FOURCC("dav1"):
2141 case FOURCC("av01"):
2142 case FOURCC("vp09"):
2143 case FOURCC("apv1"):
2144 {
2145 uint8_t buffer[78];
2146 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
2147 // Basic VideoSampleEntry size.
2148 return ERROR_MALFORMED;
2149 }
2150
2151 if (mDataSource->readAt(
2152 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
2153 return ERROR_IO;
2154 }
2155
2156 // we can get data_ref_index value from U16_AT(&buffer[6])
2157 uint16_t width = U16_AT(&buffer[6 + 18]);
2158 uint16_t height = U16_AT(&buffer[6 + 20]);
2159
2160 // The video sample is not standard-compliant if it has invalid dimension.
2161 // Use some default width and height value, and
2162 // let the decoder figure out the actual width and height (and thus
2163 // be prepared for INFO_FOMRAT_CHANGED event).
2164 if (width == 0) width = 352;
2165 if (height == 0) height = 288;
2166
2167 // printf("*** coding='%s' width=%d height=%d\n",
2168 // chunk, width, height);
2169
2170 if (mLastTrack == NULL)
2171 return ERROR_MALFORMED;
2172
2173 if (chunk_type != FOURCC("encv")) {
2174 // if the chunk type is encv, we'll get the type from the frma box later
2175 AMediaFormat_setString(mLastTrack->meta,
2176 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
2177 }
2178 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
2179 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
2180
2181 off64_t stop_offset = *offset + chunk_size;
2182 *offset = data_offset + sizeof(buffer);
2183 while (*offset < stop_offset) {
2184 status_t err = parseChunk(offset, depth + 1);
2185 if (err != OK) {
2186 return err;
2187 }
2188 }
2189
2190 if (*offset != stop_offset) {
2191 return ERROR_MALFORMED;
2192 }
2193 break;
2194 }
2195
2196 case FOURCC("stco"):
2197 case FOURCC("co64"):
2198 {
2199 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2200 return ERROR_MALFORMED;
2201 }
2202
2203 status_t err =
2204 mLastTrack->sampleTable->setChunkOffsetParams(
2205 chunk_type, data_offset, chunk_data_size);
2206
2207 *offset += chunk_size;
2208
2209 if (err != OK) {
2210 return err;
2211 }
2212
2213 break;
2214 }
2215
2216 case FOURCC("stsc"):
2217 {
2218 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2219 return ERROR_MALFORMED;
2220
2221 status_t err =
2222 mLastTrack->sampleTable->setSampleToChunkParams(
2223 data_offset, chunk_data_size);
2224
2225 *offset += chunk_size;
2226
2227 if (err != OK) {
2228 return err;
2229 }
2230
2231 break;
2232 }
2233
2234 case FOURCC("stsz"):
2235 case FOURCC("stz2"):
2236 {
2237 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2238 return ERROR_MALFORMED;
2239 }
2240
2241 status_t err =
2242 mLastTrack->sampleTable->setSampleSizeParams(
2243 chunk_type, data_offset, chunk_data_size);
2244
2245 *offset += chunk_size;
2246
2247 if (err != OK) {
2248 return err;
2249 }
2250
2251 adjustRawDefaultFrameSize();
2252
2253 size_t max_size;
2254 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
2255
2256 if (err != OK) {
2257 return err;
2258 }
2259
2260 if (max_size != 0) {
2261 // Assume that a given buffer only contains at most 10 chunks,
2262 // each chunk originally prefixed with a 2 byte length will
2263 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
2264 // and thus will grow by 2 bytes per chunk.
2265 if (max_size > SIZE_MAX - 10 * 2) {
2266 ALOGE("max sample size too big: %zu", max_size);
2267 return ERROR_MALFORMED;
2268 }
2269 AMediaFormat_setInt32(mLastTrack->meta,
2270 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
2271 } else {
2272 // No size was specified. Pick a conservatively large size.
2273 uint32_t width, height;
2274 if (!AMediaFormat_getInt32(mLastTrack->meta,
2275 AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
2276 !AMediaFormat_getInt32(mLastTrack->meta,
2277 AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
2278 ALOGE("No width or height, assuming worst case 1080p");
2279 width = 1920;
2280 height = 1080;
2281 } else {
2282 // A resolution was specified, check that it's not too big. The values below
2283 // were chosen so that the calculations below don't cause overflows, they're
2284 // not indicating that resolutions up to 32kx32k are actually supported.
2285 if (width > 32768 || height > 32768) {
2286 ALOGE("can't support %u x %u video", width, height);
2287 return ERROR_MALFORMED;
2288 }
2289 }
2290
2291 const char *mime;
2292 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2293 if (!strncmp(mime, "audio/", 6)) {
2294 // for audio, use 128KB
2295 max_size = 1024 * 128;
2296 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
2297 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
2298 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
2299 // AVC & HEVC requires compression ratio of at least 2, and uses
2300 // macroblocks
2301 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
2302 } else {
2303 // For all other formats there is no minimum compression
2304 // ratio. Use compression ratio of 1.
2305 max_size = width * height * 3 / 2;
2306 }
2307 // HACK: allow 10% overhead
2308 // TODO: read sample size from traf atom for fragmented MPEG4.
2309 max_size += max_size / 10;
2310 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2311 }
2312
2313 // NOTE: setting another piece of metadata invalidates any pointers (such as the
2314 // mimetype) previously obtained, so don't cache them.
2315 const char *mime;
2316 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2317 // Calculate average frame rate.
2318 if (!strncasecmp("video/", mime, 6)) {
2319 size_t nSamples = mLastTrack->sampleTable->countSamples();
2320 if (nSamples == 0) {
2321 int32_t trackId;
2322 if (AMediaFormat_getInt32(mLastTrack->meta,
2323 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2324 for (size_t i = 0; i < mTrex.size(); i++) {
2325 Trex *t = &mTrex.editItemAt(i);
2326 if (t->track_ID == (uint32_t) trackId) {
2327 if (t->default_sample_duration > 0) {
2328 int32_t frameRate =
2329 mLastTrack->timescale / t->default_sample_duration;
2330 AMediaFormat_setInt32(mLastTrack->meta,
2331 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2332 }
2333 break;
2334 }
2335 }
2336 }
2337 } else {
2338 int64_t durationUs;
2339 if (AMediaFormat_getInt64(mLastTrack->meta,
2340 AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2341 if (durationUs > 0) {
2342 int32_t frameRate = (nSamples * 1000000LL +
2343 (durationUs >> 1)) / durationUs;
2344 AMediaFormat_setInt32(mLastTrack->meta,
2345 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2346 }
2347 }
2348 ALOGV("setting frame count %zu", nSamples);
2349 AMediaFormat_setInt32(mLastTrack->meta,
2350 AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2351 }
2352 }
2353
2354 break;
2355 }
2356
2357 case FOURCC("stts"):
2358 {
2359 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2360 return ERROR_MALFORMED;
2361
2362 *offset += chunk_size;
2363
2364 if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2365 char chunk[5];
2366 MakeFourCCString(mPath[depth - 1], chunk);
2367 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2368 break;
2369 }
2370
2371 status_t err =
2372 mLastTrack->sampleTable->setTimeToSampleParams(
2373 data_offset, chunk_data_size);
2374
2375 if (err != OK) {
2376 return err;
2377 }
2378
2379 break;
2380 }
2381
2382 case FOURCC("ctts"):
2383 {
2384 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2385 return ERROR_MALFORMED;
2386
2387 *offset += chunk_size;
2388
2389 status_t err =
2390 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2391 data_offset, chunk_data_size);
2392
2393 if (err != OK) {
2394 return err;
2395 }
2396
2397 break;
2398 }
2399
2400 case FOURCC("stss"):
2401 {
2402 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2403 return ERROR_MALFORMED;
2404
2405 *offset += chunk_size;
2406
2407 status_t err =
2408 mLastTrack->sampleTable->setSyncSampleParams(
2409 data_offset, chunk_data_size);
2410
2411 if (err != OK) {
2412 return err;
2413 }
2414
2415 break;
2416 }
2417
2418 // \xA9xyz
2419 case FOURCC("\251xyz"):
2420 {
2421 *offset += chunk_size;
2422
2423 // Best case the total data length inside "\xA9xyz" box would
2424 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2425 // where "\x00\x05" is the text string length with value = 5,
2426 // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2427 // location (string) value with longitude = 0 and latitude = 0.
2428 // Since some devices encountered in the wild omit the trailing
2429 // slash, we'll allow that.
2430 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2431 return ERROR_MALFORMED;
2432 }
2433
2434 uint16_t len;
2435 if (!mDataSource->getUInt16(data_offset, &len)) {
2436 return ERROR_IO;
2437 }
2438
2439 // allow "+0+0" without trailing slash
2440 if (len < 4 || len > chunk_data_size - 4) {
2441 return ERROR_MALFORMED;
2442 }
2443 // The location string following the language code is formatted
2444 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2445 // Allocate 2 extra bytes, in case we need to add a trailing slash,
2446 // and to add a terminating 0.
2447 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2448 if (!buffer) {
2449 return NO_MEMORY;
2450 }
2451
2452 if (mDataSource->readAt(
2453 data_offset + 4, &buffer[0], len) < len) {
2454 return ERROR_IO;
2455 }
2456
2457 len = strlen(&buffer[0]);
2458 if (len < 4) {
2459 return ERROR_MALFORMED;
2460 }
2461 // Add a trailing slash if there wasn't one.
2462 if (buffer[len - 1] != '/') {
2463 buffer[len] = '/';
2464 }
2465 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2466 break;
2467 }
2468
2469 case FOURCC("esds"):
2470 {
2471 *offset += chunk_size;
2472
2473 if (chunk_data_size < 4) {
2474 return ERROR_MALFORMED;
2475 }
2476
2477 auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2478 uint8_t *buffer = tmp.get();
2479 if (buffer == NULL) {
2480 return -ENOMEM;
2481 }
2482
2483 if (mDataSource->readAt(
2484 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2485 return ERROR_IO;
2486 }
2487
2488 if (U32_AT(buffer) != 0) {
2489 // Should be version 0, flags 0.
2490 return ERROR_MALFORMED;
2491 }
2492
2493 if (mLastTrack == NULL)
2494 return ERROR_MALFORMED;
2495
2496 AMediaFormat_setBuffer(mLastTrack->meta,
2497 AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2498
2499 if (mPath.size() >= 2
2500 && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2501 // Information from the ESDS must be relied on for proper
2502 // setup of sample rate and channel count for MPEG4 Audio.
2503 // The generic header appears to only contain generic
2504 // information...
2505
2506 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2507 &buffer[4], chunk_data_size - 4);
2508
2509 if (err != OK) {
2510 return err;
2511 }
2512 }
2513 if (mPath.size() >= 2
2514 && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2515 // Check if the video is MPEG2
2516 ESDS esds(&buffer[4], chunk_data_size - 4);
2517
2518 uint8_t objectTypeIndication;
2519 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2520 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2521 AMediaFormat_setString(mLastTrack->meta,
2522 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2523 }
2524 }
2525 }
2526 break;
2527 }
2528
2529 case FOURCC("btrt"):
2530 {
2531 *offset += chunk_size;
2532 if (mLastTrack == NULL) {
2533 return ERROR_MALFORMED;
2534 }
2535
2536 uint8_t buffer[12];
2537 if (chunk_data_size != sizeof(buffer)) {
2538 return ERROR_MALFORMED;
2539 }
2540
2541 if (mDataSource->readAt(
2542 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2543 return ERROR_IO;
2544 }
2545
2546 uint32_t maxBitrate = U32_AT(&buffer[4]);
2547 uint32_t avgBitrate = U32_AT(&buffer[8]);
2548 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2549 AMediaFormat_setInt32(mLastTrack->meta,
2550 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2551 }
2552 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2553 AMediaFormat_setInt32(mLastTrack->meta,
2554 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2555 }
2556 break;
2557 }
2558
2559 case FOURCC("dfLa"):
2560 {
2561 *offset += chunk_size;
2562
2563 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
2564 // 4 for mediaType, 4 for blockType and BlockLen, 34 for metadata
2565 uint8_t flacInfo[4 + 4 + 34];
2566
2567 if (chunk_data_size != sizeof(flacInfo)) {
2568 return ERROR_MALFORMED;
2569 }
2570
2571 data_offset += 4;
2572 size_t flacOffset = 4;
2573 // Add flaC header mediaType to CSD
2574 strncpy((char *)flacInfo, "fLaC", 4);
2575
2576 ssize_t bytesToRead = sizeof(flacInfo) - flacOffset;
2577 if (mDataSource->readAt(
2578 data_offset, flacInfo + flacOffset, bytesToRead) < bytesToRead) {
2579 return ERROR_IO;
2580 }
2581
2582 data_offset += bytesToRead;
2583 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
2584 sizeof(flacInfo));
2585 break;
2586 }
2587
2588 case FOURCC("avcC"):
2589 {
2590 *offset += chunk_size;
2591
2592 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2593
2594 if (buffer.get() == NULL) {
2595 ALOGE("b/28471206");
2596 return NO_MEMORY;
2597 }
2598
2599 if (mDataSource->readAt(
2600 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2601 return ERROR_IO;
2602 }
2603
2604 if (mLastTrack == NULL)
2605 return ERROR_MALFORMED;
2606
2607 AMediaFormat_setBuffer(mLastTrack->meta,
2608 AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2609
2610 break;
2611 }
2612 case FOURCC("hvcC"):
2613 {
2614 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2615
2616 if (buffer.get() == NULL) {
2617 ALOGE("b/28471206");
2618 return NO_MEMORY;
2619 }
2620
2621 if (mDataSource->readAt(
2622 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2623 return ERROR_IO;
2624 }
2625
2626 if (mLastTrack == NULL)
2627 return ERROR_MALFORMED;
2628
2629 AMediaFormat_setBuffer(mLastTrack->meta,
2630 AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2631
2632 *offset += chunk_size;
2633 break;
2634 }
2635 case FOURCC("vpcC"):
2636 {
2637 if (mLastTrack == NULL) {
2638 return ERROR_MALFORMED;
2639 }
2640
2641 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2642
2643 if (buffer.get() == NULL) {
2644 ALOGE("b/28471206");
2645 return NO_MEMORY;
2646 }
2647
2648 if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2649 return ERROR_IO;
2650 }
2651
2652 if (!MakeVP9CodecPrivateFromVpcC(mLastTrack->meta, buffer.get(), chunk_data_size)) {
2653 ALOGE("Failed to create VP9 CodecPrivate from vpcC.");
2654 return ERROR_MALFORMED;
2655 }
2656
2657 *offset += chunk_size;
2658 break;
2659 }
2660
2661 case FOURCC("apvC"): {
2662 // Enable APV codec support from Android Baklava
2663 if (!(isAtLeastRelease(36, "Baklava") &&
2664 com::android::media::extractor::flags::extractor_mp4_enable_apv())) {
2665 ALOGV("APV support not enabled");
2666 *offset += chunk_size;
2667 break;
2668 }
2669
2670 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2671
2672 if (buffer.get() == NULL) {
2673 ALOGE("b/28471206");
2674 return NO_MEMORY;
2675 }
2676
2677 if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2678 return ERROR_IO;
2679 }
2680
2681 if (mLastTrack == NULL)
2682 return ERROR_MALFORMED;
2683
2684 int bytes_to_skip = 4;
2685 if (chunk_data_size < bytes_to_skip) {
2686 return ERROR_MALFORMED;
2687 }
2688 // apvC extends FullBox so first 4 bytes of version and flag should be zero.
2689 for (int i = 0; i < bytes_to_skip; i++) {
2690 if (buffer[i] != 0) {
2691 return ERROR_MALFORMED;
2692 }
2693 }
2694
2695 // Advance the buffer pointer by 4 bytes as it contains 4 bytes of flag and version.
2696 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0,
2697 buffer.get() + bytes_to_skip, chunk_data_size - bytes_to_skip);
2698
2699 *offset += chunk_size;
2700 break;
2701 }
2702 case FOURCC("av1C"): {
2703 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2704
2705 if (buffer.get() == NULL) {
2706 ALOGE("b/28471206");
2707 return NO_MEMORY;
2708 }
2709
2710 if (mDataSource->readAt(
2711 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2712 return ERROR_IO;
2713 }
2714
2715 if (mLastTrack == NULL)
2716 return ERROR_MALFORMED;
2717
2718 AMediaFormat_setBuffer(mLastTrack->meta,
2719 AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2720
2721 *offset += chunk_size;
2722 break;
2723 }
2724
2725 case FOURCC("dvcC"):
2726 case FOURCC("dvvC"):
2727 case FOURCC("dvwC"):
2728 {
2729 if (chunk_data_size != 24) {
2730 return ERROR_MALFORMED;
2731 }
2732
2733 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2734
2735 if (buffer.get() == NULL) {
2736 ALOGE("b/28471206");
2737 return NO_MEMORY;
2738 }
2739
2740 if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2741 return ERROR_IO;
2742 }
2743
2744 if (mLastTrack == NULL)
2745 return ERROR_MALFORMED;
2746
2747 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
2748 buffer.get(), chunk_data_size);
2749 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME,
2750 MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
2751
2752 *offset += chunk_size;
2753 break;
2754 }
2755
2756 case FOURCC("d263"):
2757 {
2758 *offset += chunk_size;
2759 /*
2760 * d263 contains a fixed 7 bytes part:
2761 * vendor - 4 bytes
2762 * version - 1 byte
2763 * level - 1 byte
2764 * profile - 1 byte
2765 * optionally, "d263" box itself may contain a 16-byte
2766 * bit rate box (bitr)
2767 * average bit rate - 4 bytes
2768 * max bit rate - 4 bytes
2769 */
2770 char buffer[23];
2771 if (chunk_data_size != 7 &&
2772 chunk_data_size != 23) {
2773 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2774 return ERROR_MALFORMED;
2775 }
2776
2777 if (mDataSource->readAt(
2778 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2779 return ERROR_IO;
2780 }
2781
2782 if (mLastTrack == NULL)
2783 return ERROR_MALFORMED;
2784
2785 AMediaFormat_setBuffer(mLastTrack->meta,
2786 AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2787
2788 break;
2789 }
2790
2791 case FOURCC("meta"):
2792 {
2793 off64_t stop_offset = *offset + chunk_size;
2794 *offset = data_offset;
2795 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2796 if (!isParsingMetaKeys) {
2797 uint8_t buffer[4];
2798 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2799 *offset = stop_offset;
2800 return ERROR_MALFORMED;
2801 }
2802
2803 if (mDataSource->readAt(
2804 data_offset, buffer, 4) < 4) {
2805 *offset = stop_offset;
2806 return ERROR_IO;
2807 }
2808
2809 if (U32_AT(buffer) != 0) {
2810 // Should be version 0, flags 0.
2811
2812 // If it's not, let's assume this is one of those
2813 // apparently malformed chunks that don't have flags
2814 // and completely different semantics than what's
2815 // in the MPEG4 specs and skip it.
2816 *offset = stop_offset;
2817 return OK;
2818 }
2819 *offset += sizeof(buffer);
2820 }
2821
2822 while (*offset < stop_offset) {
2823 status_t err = parseChunk(offset, depth + 1);
2824 if (err != OK) {
2825 return err;
2826 }
2827 }
2828
2829 if (*offset != stop_offset) {
2830 return ERROR_MALFORMED;
2831 }
2832 break;
2833 }
2834
2835 case FOURCC("iloc"):
2836 case FOURCC("iinf"):
2837 case FOURCC("iprp"):
2838 case FOURCC("pitm"):
2839 case FOURCC("idat"):
2840 case FOURCC("iref"):
2841 case FOURCC("ipro"):
2842 {
2843 if (mIsHeif || mIsAvif) {
2844 if (mItemTable == NULL) {
2845 mItemTable = new ItemTable(mDataSource, mIsHeif);
2846 }
2847 status_t err = mItemTable->parse(
2848 chunk_type, data_offset, chunk_data_size);
2849 if (err != OK) {
2850 return err;
2851 }
2852 }
2853 *offset += chunk_size;
2854 break;
2855 }
2856
2857 case FOURCC("mean"):
2858 case FOURCC("name"):
2859 case FOURCC("data"):
2860 {
2861 *offset += chunk_size;
2862
2863 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2864 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2865
2866 if (err != OK) {
2867 return err;
2868 }
2869 }
2870
2871 break;
2872 }
2873
2874 case FOURCC("mvhd"):
2875 {
2876 *offset += chunk_size;
2877
2878 if (depth != 1) {
2879 ALOGE("mvhd: depth %d", depth);
2880 return ERROR_MALFORMED;
2881 }
2882 if (chunk_data_size < 32) {
2883 return ERROR_MALFORMED;
2884 }
2885
2886 uint8_t header[32];
2887 if (mDataSource->readAt(
2888 data_offset, header, sizeof(header))
2889 < (ssize_t)sizeof(header)) {
2890 return ERROR_IO;
2891 }
2892
2893 uint64_t creationTime;
2894 uint64_t duration = 0;
2895 if (header[0] == 1) {
2896 creationTime = U64_AT(&header[4]);
2897 mHeaderTimescale = U32_AT(&header[20]);
2898 duration = U64_AT(&header[24]);
2899 if (duration == 0xffffffffffffffff) {
2900 duration = 0;
2901 }
2902 } else if (header[0] != 0) {
2903 return ERROR_MALFORMED;
2904 } else {
2905 creationTime = U32_AT(&header[4]);
2906 mHeaderTimescale = U32_AT(&header[12]);
2907 uint32_t d32 = U32_AT(&header[16]);
2908 if (d32 == 0xffffffff) {
2909 d32 = 0;
2910 }
2911 duration = d32;
2912 }
2913 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2914 AMediaFormat_setInt64(mFileMetaData,
2915 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2916 }
2917
2918 String8 s;
2919 if (convertTimeToDate(creationTime, &s)) {
2920 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.c_str());
2921 }
2922
2923 break;
2924 }
2925
2926 case FOURCC("mehd"):
2927 {
2928 *offset += chunk_size;
2929
2930 if (chunk_data_size < 8) {
2931 return ERROR_MALFORMED;
2932 }
2933
2934 uint8_t flags[4];
2935 if (mDataSource->readAt(
2936 data_offset, flags, sizeof(flags))
2937 < (ssize_t)sizeof(flags)) {
2938 return ERROR_IO;
2939 }
2940
2941 uint64_t duration = 0;
2942 if (flags[0] == 1) {
2943 // 64 bit
2944 if (chunk_data_size < 12) {
2945 return ERROR_MALFORMED;
2946 }
2947 mDataSource->getUInt64(data_offset + 4, &duration);
2948 if (duration == 0xffffffffffffffff) {
2949 duration = 0;
2950 }
2951 } else if (flags[0] == 0) {
2952 // 32 bit
2953 uint32_t d32;
2954 mDataSource->getUInt32(data_offset + 4, &d32);
2955 if (d32 == 0xffffffff) {
2956 d32 = 0;
2957 }
2958 duration = d32;
2959 } else {
2960 return ERROR_MALFORMED;
2961 }
2962
2963 if (duration != 0 && mHeaderTimescale != 0) {
2964 AMediaFormat_setInt64(mFileMetaData,
2965 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2966 }
2967
2968 break;
2969 }
2970
2971 case FOURCC("mdat"):
2972 {
2973 mMdatFound = true;
2974
2975 *offset += chunk_size;
2976 break;
2977 }
2978
2979 case FOURCC("hdlr"):
2980 {
2981 *offset += chunk_size;
2982
2983 if (underQTMetaPath(mPath, 3)) {
2984 break;
2985 }
2986
2987 uint32_t buffer;
2988 if (mDataSource->readAt(
2989 data_offset + 8, &buffer, 4) < 4) {
2990 return ERROR_IO;
2991 }
2992
2993 uint32_t type = ntohl(buffer);
2994 // For the 3GPP file format, the handler-type within the 'hdlr' box
2995 // shall be 'text'. We also want to support 'sbtl' handler type
2996 // for a practical reason as various MPEG4 containers use it.
2997 if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2998 if (mLastTrack != NULL) {
2999 AMediaFormat_setString(mLastTrack->meta,
3000 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
3001 }
3002 }
3003
3004 break;
3005 }
3006
3007 case FOURCC("keys"):
3008 {
3009 *offset += chunk_size;
3010
3011 if (underQTMetaPath(mPath, 3)) {
3012 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
3013 if (err != OK) {
3014 return err;
3015 }
3016 }
3017 break;
3018 }
3019
3020 case FOURCC("trex"):
3021 {
3022 *offset += chunk_size;
3023
3024 if (chunk_data_size < 24) {
3025 return ERROR_IO;
3026 }
3027 Trex trex;
3028 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
3029 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
3030 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
3031 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
3032 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
3033 return ERROR_IO;
3034 }
3035 mTrex.add(trex);
3036 break;
3037 }
3038
3039 case FOURCC("tx3g"):
3040 {
3041 if (mLastTrack == NULL)
3042 return ERROR_MALFORMED;
3043
3044 // complain about ridiculous chunks
3045 if (chunk_size > kMaxAtomSize) {
3046 return ERROR_MALFORMED;
3047 }
3048
3049 // complain about empty atoms
3050 if (chunk_data_size <= 0) {
3051 ALOGE("b/124330204");
3052 android_errorWriteLog(0x534e4554, "124330204");
3053 return ERROR_MALFORMED;
3054 }
3055
3056 // should fill buffer based on "data_offset" and "chunk_data_size"
3057 // instead of *offset and chunk_size;
3058 // but we've been feeding the extra data to consumers for multiple releases and
3059 // if those apps are compensating for it, we'd break them with such a change
3060 //
3061
3062 if (mLastTrack->mTx3gBuffer == NULL) {
3063 mLastTrack->mTx3gSize = 0;
3064 mLastTrack->mTx3gFilled = 0;
3065 }
3066 if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
3067 size_t growth = kTx3gGrowth;
3068 if (growth < chunk_size) {
3069 growth = chunk_size;
3070 }
3071 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
3072 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
3073 ALOGE("b/124330204 - too much space");
3074 android_errorWriteLog(0x534e4554, "124330204");
3075 return ERROR_MALFORMED;
3076 }
3077 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
3078 mLastTrack->mTx3gSize + growth);
3079 if (updated == NULL) {
3080 return ERROR_MALFORMED;
3081 }
3082 mLastTrack->mTx3gBuffer = updated;
3083 mLastTrack->mTx3gSize += growth;
3084 }
3085
3086 if ((size_t)(mDataSource->readAt(*offset,
3087 mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
3088 chunk_size))
3089 < chunk_size) {
3090
3091 // advance read pointer so we don't end up reading this again
3092 *offset += chunk_size;
3093 return ERROR_IO;
3094 }
3095
3096 mLastTrack->mTx3gFilled += chunk_size;
3097 *offset += chunk_size;
3098 break;
3099 }
3100
3101 case FOURCC("covr"):
3102 {
3103 *offset += chunk_size;
3104
3105 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
3106 chunk_data_size, data_offset);
3107
3108 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
3109 return ERROR_MALFORMED;
3110 }
3111 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
3112 if (buffer.get() == NULL) {
3113 ALOGE("b/28471206");
3114 return NO_MEMORY;
3115 }
3116 if (mDataSource->readAt(
3117 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
3118 return ERROR_IO;
3119 }
3120 const int kSkipBytesOfDataBox = 16;
3121 if (chunk_data_size <= kSkipBytesOfDataBox) {
3122 return ERROR_MALFORMED;
3123 }
3124
3125 AMediaFormat_setBuffer(mFileMetaData,
3126 AMEDIAFORMAT_KEY_ALBUMART,
3127 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
3128
3129 break;
3130 }
3131
3132 case FOURCC("colr"):
3133 {
3134 *offset += chunk_size;
3135 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3136 // ignore otherwise
3137 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3138 status_t err = parseColorInfo(data_offset, chunk_data_size);
3139 if (err != OK) {
3140 return err;
3141 }
3142 }
3143
3144 break;
3145 }
3146
3147 case FOURCC("pasp"):
3148 {
3149 *offset += chunk_size;
3150 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3151 // ignore otherwise
3152 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3153 status_t err = parsePaspBox(data_offset, chunk_data_size);
3154 if (err != OK) {
3155 return err;
3156 }
3157 }
3158
3159 break;
3160 }
3161
3162 case FOURCC("titl"):
3163 case FOURCC("perf"):
3164 case FOURCC("auth"):
3165 case FOURCC("gnre"):
3166 case FOURCC("albm"):
3167 case FOURCC("yrrc"):
3168 {
3169 *offset += chunk_size;
3170
3171 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
3172
3173 if (err != OK) {
3174 return err;
3175 }
3176
3177 break;
3178 }
3179
3180 case FOURCC("ID32"):
3181 {
3182 *offset += chunk_size;
3183
3184 if (chunk_data_size < 6) {
3185 return ERROR_MALFORMED;
3186 }
3187
3188 parseID3v2MetaData(data_offset + 6, chunk_data_size - 6);
3189
3190 break;
3191 }
3192
3193 case FOURCC("----"):
3194 {
3195 mLastCommentMean.clear();
3196 mLastCommentName.clear();
3197 mLastCommentData.clear();
3198 *offset += chunk_size;
3199 break;
3200 }
3201
3202 case FOURCC("sidx"):
3203 {
3204 status_t err = parseSegmentIndex(data_offset, chunk_data_size);
3205 if (err != OK) {
3206 return err;
3207 }
3208 *offset += chunk_size;
3209 return UNKNOWN_ERROR; // stop parsing after sidx
3210 }
3211
3212 case FOURCC("ac-3"):
3213 {
3214 *offset += chunk_size;
3215 // bypass ac-3 if parse fail
3216 if (parseAC3SpecificBox(data_offset) != OK) {
3217 if (mLastTrack != NULL) {
3218 ALOGW("Fail to parse ac-3");
3219 mLastTrack->skipTrack = true;
3220 }
3221 }
3222 return OK;
3223 }
3224
3225 case FOURCC("ec-3"):
3226 {
3227 *offset += chunk_size;
3228 // bypass ec-3 if parse fail
3229 if (parseEAC3SpecificBox(data_offset) != OK) {
3230 if (mLastTrack != NULL) {
3231 ALOGW("Fail to parse ec-3");
3232 mLastTrack->skipTrack = true;
3233 }
3234 }
3235 return OK;
3236 }
3237
3238 case FOURCC("ac-4"):
3239 {
3240 *offset += chunk_size;
3241 // bypass ac-4 if parse fail
3242 if (parseAC4SpecificBox(data_offset) != OK) {
3243 if (mLastTrack != NULL) {
3244 ALOGW("Fail to parse ac-4");
3245 mLastTrack->skipTrack = true;
3246 }
3247 }
3248 return OK;
3249 }
3250
3251 case FOURCC("ftyp"):
3252 {
3253 if (chunk_data_size < 8 || depth != 0) {
3254 return ERROR_MALFORMED;
3255 }
3256
3257 off64_t stop_offset = *offset + chunk_size;
3258 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
3259 std::set<uint32_t> brandSet;
3260 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3261 if (i == 1) {
3262 // Skip this index, it refers to the minorVersion,
3263 // not a brand.
3264 continue;
3265 }
3266
3267 uint32_t brand;
3268 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
3269 return ERROR_MALFORMED;
3270 }
3271
3272 brand = ntohl(brand);
3273 brandSet.insert(brand);
3274 }
3275
3276 if (brandSet.count(FOURCC("qt ")) > 0) {
3277 mIsQT = true;
3278 } else {
3279 if (brandSet.count(FOURCC("mif1")) > 0
3280 && brandSet.count(FOURCC("heic")) > 0) {
3281 ALOGV("identified HEIF image");
3282
3283 mIsHeif = true;
3284 brandSet.erase(FOURCC("mif1"));
3285 brandSet.erase(FOURCC("heic"));
3286 } else if (brandSet.count(FOURCC("avif")) > 0 ||
3287 brandSet.count(FOURCC("avis")) > 0) {
3288 ALOGV("identified AVIF image");
3289 mIsAvif = true;
3290 brandSet.erase(FOURCC("avif"));
3291 brandSet.erase(FOURCC("avis"));
3292 }
3293
3294 if (!brandSet.empty()) {
3295 // This means that the file should have moov box.
3296 // It could be any iso files (mp4, heifs, etc.)
3297 mHasMoovBox = true;
3298 if (mIsHeif || mIsAvif) {
3299 ALOGV("identified %s image with other tracks", mIsHeif ? "HEIF" : "AVIF");
3300 }
3301 }
3302 }
3303
3304 *offset = stop_offset;
3305
3306 break;
3307 }
3308
3309 default:
3310 {
3311 // check if we're parsing 'ilst' for meta keys
3312 // if so, treat type as a number (key-id).
3313 if (underQTMetaPath(mPath, 3)) {
3314 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
3315 if (err != OK) {
3316 return err;
3317 }
3318 }
3319
3320 *offset += chunk_size;
3321 break;
3322 }
3323 }
3324
3325 return OK;
3326 }
3327
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)3328 status_t MPEG4Extractor::parseChannelCountSampleRate(
3329 off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
3330 // skip 16 bytes:
3331 // + 6-byte reserved,
3332 // + 2-byte data reference index,
3333 // + 8-byte reserved
3334 *offset += 16;
3335 if (!mDataSource->getUInt16(*offset, channelCount)) {
3336 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
3337 return ERROR_MALFORMED;
3338 }
3339 // skip 8 bytes:
3340 // + 2-byte channelCount,
3341 // + 2-byte sample size,
3342 // + 4-byte reserved
3343 *offset += 8;
3344 if (!mDataSource->getUInt16(*offset, sampleRate)) {
3345 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
3346 return ERROR_MALFORMED;
3347 }
3348 // skip 4 bytes:
3349 // + 2-byte sampleRate,
3350 // + 2-byte reserved
3351 *offset += 4;
3352 return OK;
3353 }
3354
parseAC4SpecificBox(off64_t offset)3355 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
3356 if (mLastTrack == NULL) {
3357 return ERROR_MALFORMED;
3358 }
3359
3360 uint16_t sampleRate, channelCount;
3361 status_t status;
3362 if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
3363 return status;
3364 }
3365 uint32_t size;
3366 // + 4-byte size
3367 // + 4-byte type
3368 // + 3-byte payload
3369 const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
3370 if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
3371 ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
3372 return ERROR_MALFORMED;
3373 }
3374
3375 // + 4-byte size
3376 offset += 4;
3377 uint32_t type;
3378 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
3379 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
3380 return ERROR_MALFORMED;
3381 }
3382
3383 // + 4-byte type
3384 offset += 4;
3385 const uint32_t kAC4SpecificBoxPayloadSize = 1176;
3386 uint8_t chunk[kAC4SpecificBoxPayloadSize];
3387 ssize_t dsiSize = size - 8; // size of box - size and type fields
3388 if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
3389 mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
3390 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
3391 return ERROR_MALFORMED;
3392 }
3393 // + size-byte payload
3394 offset += dsiSize;
3395 ABitReader br(chunk, dsiSize);
3396 AC4DSIParser parser(br);
3397 if (!parser.parse()){
3398 ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
3399 return ERROR_MALFORMED;
3400 }
3401
3402 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
3403 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3404 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3405
3406 AudioPresentationCollection presentations;
3407 // translate the AC4 presentation information to audio presentations for this track
3408 AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
3409 if (!ac4Presentations.empty()) {
3410 for (const auto& ac4Presentation : ac4Presentations) {
3411 auto& presentation = ac4Presentation.second;
3412 if (!presentation.mEnabled) {
3413 continue;
3414 }
3415 AudioPresentationV1 ap;
3416 ap.mPresentationId = presentation.mGroupIndex;
3417 ap.mProgramId = presentation.mProgramID;
3418 ap.mLanguage = presentation.mLanguage;
3419 if (presentation.mPreVirtualized) {
3420 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
3421 } else {
3422 switch (presentation.mChannelMode) {
3423 case AC4Parser::AC4Presentation::kChannelMode_Mono:
3424 case AC4Parser::AC4Presentation::kChannelMode_Stereo:
3425 ap.mMasteringIndication = MASTERED_FOR_STEREO;
3426 break;
3427 case AC4Parser::AC4Presentation::kChannelMode_3_0:
3428 case AC4Parser::AC4Presentation::kChannelMode_5_0:
3429 case AC4Parser::AC4Presentation::kChannelMode_5_1:
3430 case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
3431 case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
3432 case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
3433 case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
3434 ap.mMasteringIndication = MASTERED_FOR_SURROUND;
3435 break;
3436 case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
3437 case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
3438 case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
3439 case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
3440 case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
3441 case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
3442 case AC4Parser::AC4Presentation::kChannelMode_22_2:
3443 ap.mMasteringIndication = MASTERED_FOR_3D;
3444 break;
3445 default:
3446 ALOGE("Invalid channel mode in AC4 presentation");
3447 return ERROR_MALFORMED;
3448 }
3449 }
3450
3451 ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
3452 AC4Parser::AC4Presentation::kVisuallyImpaired);
3453 ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
3454 AC4Parser::AC4Presentation::kVoiceOver);
3455 ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
3456 if (!ap.mLanguage.empty()) {
3457 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
3458 }
3459 presentations.push_back(std::move(ap));
3460 }
3461 }
3462
3463 if (presentations.empty()) {
3464 // Clear audio presentation info in metadata.
3465 AMediaFormat_setBuffer(
3466 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3467 } else {
3468 std::ostringstream outStream(std::ios::out);
3469 serializeAudioPresentations(presentations, &outStream);
3470 AMediaFormat_setBuffer(
3471 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3472 outStream.str().data(), outStream.str().size());
3473 }
3474 return OK;
3475 }
3476
parseEAC3SpecificBox(off64_t offset)3477 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3478 if (mLastTrack == NULL) {
3479 return ERROR_MALFORMED;
3480 }
3481
3482 uint16_t sampleRate, channels;
3483 status_t status;
3484 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3485 return status;
3486 }
3487 uint32_t size;
3488 // + 4-byte size
3489 // + 4-byte type
3490 // + 3-byte payload
3491 const uint32_t kEAC3SpecificBoxMinSize = 11;
3492 // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3493 // calculated from the required bits read below as well as the maximum number of independent
3494 // and dependant sub streams you can have
3495 const uint32_t kEAC3SpecificBoxMaxSize = 152;
3496 if (!mDataSource->getUInt32(offset, &size) ||
3497 size < kEAC3SpecificBoxMinSize ||
3498 size > kEAC3SpecificBoxMaxSize) {
3499 ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3500 return ERROR_MALFORMED;
3501 }
3502
3503 offset += 4;
3504 uint32_t type;
3505 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3506 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3507 return ERROR_MALFORMED;
3508 }
3509
3510 offset += 4;
3511 uint8_t* chunk = new (std::nothrow) uint8_t[size];
3512 if (chunk == NULL) {
3513 return ERROR_MALFORMED;
3514 }
3515
3516 if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3517 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3518 delete[] chunk;
3519 return ERROR_MALFORMED;
3520 }
3521
3522 ABitReader br(chunk, size);
3523 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3524 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3525
3526 if (br.numBitsLeft() < 16) {
3527 delete[] chunk;
3528 return ERROR_MALFORMED;
3529 }
3530 unsigned data_rate = br.getBits(13);
3531 ALOGV("EAC3 data rate = %d", data_rate);
3532
3533 unsigned num_ind_sub = br.getBits(3) + 1;
3534 ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3535 if (br.numBitsLeft() < (num_ind_sub * 23)) {
3536 delete[] chunk;
3537 return ERROR_MALFORMED;
3538 }
3539
3540 unsigned channelCount = 0;
3541 for (unsigned i = 0; i < num_ind_sub; i++) {
3542 unsigned fscod = br.getBits(2);
3543 if (fscod == 3) {
3544 ALOGE("Incorrect fscod (3) in EAC3 header");
3545 delete[] chunk;
3546 return ERROR_MALFORMED;
3547 }
3548 unsigned boxSampleRate = sampleRateTable[fscod];
3549 if (boxSampleRate != sampleRate) {
3550 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3551 boxSampleRate, sampleRate);
3552 delete[] chunk;
3553 return ERROR_MALFORMED;
3554 }
3555
3556 unsigned bsid = br.getBits(5);
3557 if (bsid == 9 || bsid == 10) {
3558 ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3559 } else if (bsid > 16) {
3560 ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3561 delete[] chunk;
3562 return ERROR_MALFORMED;
3563 }
3564
3565 // skip
3566 br.skipBits(2);
3567 unsigned bsmod = br.getBits(3);
3568 unsigned acmod = br.getBits(3);
3569 unsigned lfeon = br.getBits(1);
3570 // we currently only support the first stream
3571 if (i == 0)
3572 channelCount = channelCountTable[acmod] + lfeon;
3573 ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3574
3575 br.skipBits(3);
3576 unsigned num_dep_sub = br.getBits(4);
3577 ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3578 if (num_dep_sub != 0) {
3579 if (br.numBitsLeft() < 9) {
3580 delete[] chunk;
3581 return ERROR_MALFORMED;
3582 }
3583 static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3584 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3585 unsigned chan_loc = br.getBits(9);
3586 unsigned mask = 1;
3587 for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3588 if ((chan_loc & mask) != 0) {
3589 // we currently only support the first stream
3590 if (i == 0) {
3591 channelCount++;
3592 // these are 2 channels in the mask
3593 if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3594 channelCount++;
3595 }
3596 }
3597 ALOGV(" %s", chan_loc_tbl[j]);
3598 }
3599 }
3600 } else {
3601 if (br.numBitsLeft() == 0) {
3602 delete[] chunk;
3603 return ERROR_MALFORMED;
3604 }
3605 br.skipBits(1);
3606 }
3607 }
3608
3609 if (br.numBitsLeft() != 0) {
3610 if (br.numBitsLeft() < 8) {
3611 delete[] chunk;
3612 return ERROR_MALFORMED;
3613 }
3614 unsigned mask = br.getBits(8);
3615 for (unsigned i = 0; i < 8; i++) {
3616 if (((0x1 << i) & mask) == 0)
3617 continue;
3618
3619 if (br.numBitsLeft() < 8) {
3620 delete[] chunk;
3621 return ERROR_MALFORMED;
3622 }
3623 switch (i) {
3624 case 0: {
3625 unsigned complexity = br.getBits(8);
3626 ALOGV("Found a JOC stream with complexity = %d", complexity);
3627 }break;
3628 default: {
3629 br.skipBits(8);
3630 }break;
3631 }
3632 }
3633 }
3634 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3635 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3636 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3637
3638 delete[] chunk;
3639 return OK;
3640 }
3641
parseAC3SpecificBox(off64_t offset)3642 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3643 if (mLastTrack == NULL) {
3644 return ERROR_MALFORMED;
3645 }
3646
3647 uint16_t sampleRate, channels;
3648 status_t status;
3649 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3650 return status;
3651 }
3652 uint32_t size;
3653 // + 4-byte size
3654 // + 4-byte type
3655 // + 3-byte payload
3656 const uint32_t kAC3SpecificBoxSize = 11;
3657 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3658 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3659 return ERROR_MALFORMED;
3660 }
3661
3662 offset += 4;
3663 uint32_t type;
3664 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3665 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3666 return ERROR_MALFORMED;
3667 }
3668
3669 offset += 4;
3670 const uint32_t kAC3SpecificBoxPayloadSize = 3;
3671 uint8_t chunk[kAC3SpecificBoxPayloadSize];
3672 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3673 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3674 return ERROR_MALFORMED;
3675 }
3676
3677 ABitReader br(chunk, sizeof(chunk));
3678 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3679 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3680
3681 unsigned fscod = br.getBits(2);
3682 if (fscod == 3) {
3683 ALOGE("Incorrect fscod (3) in AC3 header");
3684 return ERROR_MALFORMED;
3685 }
3686 unsigned boxSampleRate = sampleRateTable[fscod];
3687 if (boxSampleRate != sampleRate) {
3688 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3689 boxSampleRate, sampleRate);
3690 return ERROR_MALFORMED;
3691 }
3692
3693 unsigned bsid = br.getBits(5);
3694 if (bsid > 8) {
3695 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3696 return ERROR_MALFORMED;
3697 }
3698
3699 // skip
3700 br.skipBits(3); // bsmod
3701
3702 unsigned acmod = br.getBits(3);
3703 unsigned lfeon = br.getBits(1);
3704 unsigned channelCount = channelCountTable[acmod] + lfeon;
3705
3706 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3707 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3708 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3709 return OK;
3710 }
3711
parseALACSampleEntry(off64_t * offset)3712 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3713 // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3714 // Store ALAC magic cookie (decoder needs it).
3715 uint8_t alacInfo[12];
3716 off64_t data_offset = *offset;
3717
3718 if (mDataSource->readAt(
3719 data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3720 return ERROR_IO;
3721 }
3722 uint32_t size = U32_AT(&alacInfo[0]);
3723 if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3724 (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3725 (U32_AT(&alacInfo[8]) != 0)) {
3726 ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3727 size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3728 return ERROR_MALFORMED;
3729 }
3730 data_offset += sizeof(alacInfo);
3731 uint8_t cookie[size - sizeof(alacInfo)];
3732 if (mDataSource->readAt(
3733 data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3734 return ERROR_IO;
3735 }
3736
3737 uint8_t bitsPerSample = cookie[5];
3738 AMediaFormat_setInt32(mLastTrack->meta,
3739 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3740 AMediaFormat_setInt32(mLastTrack->meta,
3741 AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3742 AMediaFormat_setInt32(mLastTrack->meta,
3743 AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3744 AMediaFormat_setBuffer(mLastTrack->meta,
3745 AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3746 data_offset += sizeof(cookie);
3747 *offset = data_offset;
3748 return OK;
3749 }
3750
parseSegmentIndex(off64_t offset,size_t size)3751 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3752 ALOGV("MPEG4Extractor::parseSegmentIndex");
3753
3754 if (size < 12) {
3755 return -EINVAL;
3756 }
3757
3758 uint32_t flags;
3759 if (!mDataSource->getUInt32(offset, &flags)) {
3760 return ERROR_MALFORMED;
3761 }
3762
3763 uint32_t version = flags >> 24;
3764 flags &= 0xffffff;
3765
3766 ALOGV("sidx version %d", version);
3767
3768 uint32_t referenceId;
3769 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3770 return ERROR_MALFORMED;
3771 }
3772
3773 uint32_t timeScale;
3774 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3775 return ERROR_MALFORMED;
3776 }
3777 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3778 if (timeScale == 0)
3779 return ERROR_MALFORMED;
3780
3781 uint64_t earliestPresentationTime;
3782 uint64_t firstOffset;
3783
3784 offset += 12;
3785 size -= 12;
3786
3787 if (version == 0) {
3788 if (size < 8) {
3789 return -EINVAL;
3790 }
3791 uint32_t tmp;
3792 if (!mDataSource->getUInt32(offset, &tmp)) {
3793 return ERROR_MALFORMED;
3794 }
3795 earliestPresentationTime = tmp;
3796 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3797 return ERROR_MALFORMED;
3798 }
3799 firstOffset = tmp;
3800 offset += 8;
3801 size -= 8;
3802 } else {
3803 if (size < 16) {
3804 return -EINVAL;
3805 }
3806 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3807 return ERROR_MALFORMED;
3808 }
3809 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3810 return ERROR_MALFORMED;
3811 }
3812 offset += 16;
3813 size -= 16;
3814 }
3815 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3816
3817 if (size < 4) {
3818 return -EINVAL;
3819 }
3820
3821 uint16_t referenceCount;
3822 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3823 return ERROR_MALFORMED;
3824 }
3825 offset += 4;
3826 size -= 4;
3827 ALOGV("refcount: %d", referenceCount);
3828
3829 if (size < referenceCount * 12) {
3830 return -EINVAL;
3831 }
3832
3833 uint64_t total_duration = 0;
3834 for (unsigned int i = 0; i < referenceCount; i++) {
3835 uint32_t d1, d2, d3;
3836
3837 if (!mDataSource->getUInt32(offset, &d1) || // size
3838 !mDataSource->getUInt32(offset + 4, &d2) || // duration
3839 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3840 return ERROR_MALFORMED;
3841 }
3842
3843 if (d1 & 0x80000000) {
3844 ALOGW("sub-sidx boxes not supported yet");
3845 }
3846 bool sap = d3 & 0x80000000;
3847 uint32_t saptype = (d3 >> 28) & 7;
3848 if (!sap || (saptype != 1 && saptype != 2)) {
3849 // type 1 and 2 are sync samples
3850 ALOGW("not a stream access point, or unsupported type: %08x", d3);
3851 }
3852 total_duration += d2;
3853 offset += 12;
3854 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3855 SidxEntry se;
3856 se.mSize = d1 & 0x7fffffff;
3857 se.mDurationUs = 1000000LL * d2 / timeScale;
3858 mSidxEntries.add(se);
3859 }
3860
3861 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3862
3863 if (mLastTrack == NULL)
3864 return ERROR_MALFORMED;
3865
3866 int64_t metaDuration;
3867 if (!AMediaFormat_getInt64(mLastTrack->meta,
3868 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3869 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3870 }
3871 return OK;
3872 }
3873
parseQTMetaKey(off64_t offset,size_t size)3874 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3875 if (size < 8) {
3876 return ERROR_MALFORMED;
3877 }
3878
3879 uint32_t count;
3880 if (!mDataSource->getUInt32(offset + 4, &count)) {
3881 return ERROR_MALFORMED;
3882 }
3883
3884 if (mMetaKeyMap.size() > 0) {
3885 ALOGW("'keys' atom seen again, discarding existing entries");
3886 mMetaKeyMap.clear();
3887 }
3888
3889 off64_t keyOffset = offset + 8;
3890 off64_t stopOffset = offset + size;
3891 for (size_t i = 1; i <= count; i++) {
3892 if (keyOffset + 8 > stopOffset) {
3893 return ERROR_MALFORMED;
3894 }
3895
3896 uint32_t keySize;
3897 if (!mDataSource->getUInt32(keyOffset, &keySize)
3898 || keySize < 8
3899 || keyOffset + keySize > stopOffset) {
3900 return ERROR_MALFORMED;
3901 }
3902
3903 uint32_t type;
3904 if (!mDataSource->getUInt32(keyOffset + 4, &type)
3905 || type != FOURCC("mdta")) {
3906 return ERROR_MALFORMED;
3907 }
3908
3909 keySize -= 8;
3910 keyOffset += 8;
3911
3912 auto keyData = heapbuffer<uint8_t>(keySize);
3913 if (keyData.get() == NULL) {
3914 return ERROR_MALFORMED;
3915 }
3916 if (mDataSource->readAt(
3917 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3918 return ERROR_MALFORMED;
3919 }
3920
3921 AString key((const char *)keyData.get(), keySize);
3922 mMetaKeyMap.add(i, key);
3923
3924 keyOffset += keySize;
3925 }
3926 return OK;
3927 }
3928
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3929 status_t MPEG4Extractor::parseQTMetaVal(
3930 int32_t keyId, off64_t offset, size_t size) {
3931 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3932 if (index < 0) {
3933 // corresponding key is not present, ignore
3934 return ERROR_MALFORMED;
3935 }
3936
3937 if (size <= 16) {
3938 return ERROR_MALFORMED;
3939 }
3940 uint32_t dataSize;
3941 if (!mDataSource->getUInt32(offset, &dataSize)
3942 || dataSize > size || dataSize <= 16) {
3943 return ERROR_MALFORMED;
3944 }
3945 uint32_t atomFourCC;
3946 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3947 || atomFourCC != FOURCC("data")) {
3948 return ERROR_MALFORMED;
3949 }
3950 uint32_t dataType;
3951 if (!mDataSource->getUInt32(offset + 8, &dataType)
3952 || ((dataType & 0xff000000) != 0)) {
3953 // not well-known type
3954 return ERROR_MALFORMED;
3955 }
3956
3957 dataSize -= 16;
3958 offset += 16;
3959
3960 if (dataType == 23 && dataSize >= 4) {
3961 // BE Float32
3962 uint32_t val;
3963 if (!mDataSource->getUInt32(offset, &val)) {
3964 return ERROR_MALFORMED;
3965 }
3966 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3967 AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3968 }
3969 } else if (dataType == 67 && dataSize >= 4) {
3970 // BE signed int32
3971 uint32_t val;
3972 if (!mDataSource->getUInt32(offset, &val)) {
3973 return ERROR_MALFORMED;
3974 }
3975 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3976 AMediaFormat_setInt32(mFileMetaData,
3977 AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3978 }
3979 } else {
3980 // add more keys if needed
3981 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3982 }
3983
3984 return OK;
3985 }
3986
parseTrackHeader(off64_t data_offset,off64_t data_size)3987 status_t MPEG4Extractor::parseTrackHeader(
3988 off64_t data_offset, off64_t data_size) {
3989 if (data_size < 4) {
3990 return ERROR_MALFORMED;
3991 }
3992
3993 uint8_t version;
3994 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3995 return ERROR_IO;
3996 }
3997
3998 size_t dynSize = (version == 1) ? 36 : 24;
3999
4000 uint8_t buffer[36 + 60];
4001
4002 if (data_size != (off64_t)dynSize + 60) {
4003 return ERROR_MALFORMED;
4004 }
4005
4006 if (mDataSource->readAt(
4007 data_offset, buffer, data_size) < (ssize_t)data_size) {
4008 return ERROR_IO;
4009 }
4010
4011 int32_t id;
4012 int64_t duration;
4013
4014 if (version == 1) {
4015 // we can get ctime value from U64_AT(&buffer[4])
4016 // we can get mtime value from U64_AT(&buffer[12])
4017 id = U32_AT(&buffer[20]);
4018 duration = U64_AT(&buffer[28]);
4019 } else if (version == 0) {
4020 // we can get ctime value from U32_AT(&buffer[4])
4021 // we can get mtime value from U32_AT(&buffer[8])
4022 id = U32_AT(&buffer[12]);
4023 duration = U32_AT(&buffer[20]);
4024 } else {
4025 return ERROR_UNSUPPORTED;
4026 }
4027
4028 if (mLastTrack == NULL)
4029 return ERROR_MALFORMED;
4030
4031 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
4032 if (duration != 0 && mHeaderTimescale != 0) {
4033 long double durationUs = ((long double)duration * 1000000) / mHeaderTimescale;
4034 if (durationUs < 0 || durationUs > INT64_MAX) {
4035 ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
4036 (long long) duration, (long long) mHeaderTimescale);
4037 return ERROR_MALFORMED;
4038 }
4039 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
4040 }
4041
4042 size_t matrixOffset = dynSize + 16;
4043 int32_t a00 = U32_AT(&buffer[matrixOffset]);
4044 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
4045 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
4046 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
4047
4048 #if 0
4049 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
4050 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
4051
4052 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
4053 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
4054 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
4055 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
4056 #endif
4057
4058 uint32_t rotationDegrees;
4059
4060 static const int32_t kFixedOne = 0x10000;
4061 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
4062 // Identity, no rotation
4063 rotationDegrees = 0;
4064 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
4065 rotationDegrees = 90;
4066 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
4067 rotationDegrees = 270;
4068 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
4069 rotationDegrees = 180;
4070 } else {
4071 ALOGW("We only support 0,90,180,270 degree rotation matrices");
4072 rotationDegrees = 0;
4073 }
4074
4075 if (rotationDegrees != 0) {
4076 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
4077 }
4078
4079 // Handle presentation display size, which could be different
4080 // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
4081 uint32_t width = U32_AT(&buffer[dynSize + 52]);
4082 uint32_t height = U32_AT(&buffer[dynSize + 56]);
4083 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
4084 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
4085
4086 return OK;
4087 }
4088
parseITunesMetaData(off64_t offset,size_t size)4089 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
4090 if (size == 0) {
4091 return OK;
4092 }
4093
4094 if (size < 4 || size == SIZE_MAX) {
4095 return ERROR_MALFORMED;
4096 }
4097
4098 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4099 if (buffer == NULL) {
4100 return ERROR_MALFORMED;
4101 }
4102 if (mDataSource->readAt(
4103 offset, buffer, size) != (ssize_t)size) {
4104 delete[] buffer;
4105 buffer = NULL;
4106
4107 return ERROR_IO;
4108 }
4109
4110 uint32_t flags = U32_AT(buffer);
4111
4112 const char *metadataKey = nullptr;
4113 char chunk[5];
4114 MakeFourCCString(mPath[4], chunk);
4115 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
4116 switch ((int32_t)mPath[4]) {
4117 case FOURCC("\251alb"):
4118 {
4119 metadataKey = AMEDIAFORMAT_KEY_ALBUM;
4120 break;
4121 }
4122 case FOURCC("\251ART"):
4123 {
4124 metadataKey = AMEDIAFORMAT_KEY_ARTIST;
4125 break;
4126 }
4127 case FOURCC("aART"):
4128 {
4129 metadataKey = AMEDIAFORMAT_KEY_ALBUMARTIST;
4130 break;
4131 }
4132 case FOURCC("\251day"):
4133 {
4134 metadataKey = AMEDIAFORMAT_KEY_YEAR;
4135 break;
4136 }
4137 case FOURCC("\251nam"):
4138 {
4139 metadataKey = AMEDIAFORMAT_KEY_TITLE;
4140 break;
4141 }
4142 case FOURCC("\251wrt"):
4143 {
4144 // various open source taggers agree that the "©wrt" tag is for composer, not writer
4145 metadataKey = AMEDIAFORMAT_KEY_COMPOSER;
4146 break;
4147 }
4148 case FOURCC("covr"):
4149 {
4150 metadataKey = AMEDIAFORMAT_KEY_ALBUMART;
4151 break;
4152 }
4153 case FOURCC("gnre"):
4154 case FOURCC("\251gen"):
4155 {
4156 metadataKey = AMEDIAFORMAT_KEY_GENRE;
4157 break;
4158 }
4159 case FOURCC("cpil"):
4160 {
4161 if (size == 9 && flags == 21) {
4162 char tmp[16];
4163 sprintf(tmp, "%d",
4164 (int)buffer[size - 1]);
4165
4166 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
4167 }
4168 break;
4169 }
4170 case FOURCC("trkn"):
4171 {
4172 if (size == 16 && flags == 0) {
4173 char tmp[16];
4174 uint16_t* pTrack = (uint16_t*)&buffer[10];
4175 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
4176 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
4177
4178 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4179 }
4180 break;
4181 }
4182 case FOURCC("disk"):
4183 {
4184 if ((size == 14 || size == 16) && flags == 0) {
4185 char tmp[16];
4186 uint16_t* pDisc = (uint16_t*)&buffer[10];
4187 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
4188 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
4189
4190 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
4191 }
4192 break;
4193 }
4194 case FOURCC("----"):
4195 {
4196 buffer[size] = '\0';
4197 switch (mPath[5]) {
4198 case FOURCC("mean"):
4199 mLastCommentMean = ((const char *)buffer + 4);
4200 break;
4201 case FOURCC("name"):
4202 mLastCommentName = ((const char *)buffer + 4);
4203 break;
4204 case FOURCC("data"):
4205 if (size < 8) {
4206 delete[] buffer;
4207 buffer = NULL;
4208 ALOGE("b/24346430");
4209 return ERROR_MALFORMED;
4210 }
4211 mLastCommentData = ((const char *)buffer + 8);
4212 break;
4213 }
4214
4215 // Once we have a set of mean/name/data info, go ahead and process
4216 // it to see if its something we are interested in. Whether or not
4217 // were are interested in the specific tag, make sure to clear out
4218 // the set so we can be ready to process another tuple should one
4219 // show up later in the file.
4220 if ((mLastCommentMean.length() != 0) &&
4221 (mLastCommentName.length() != 0) &&
4222 (mLastCommentData.length() != 0)) {
4223
4224 if (mLastCommentMean == "com.apple.iTunes"
4225 && mLastCommentName == "iTunSMPB") {
4226 int32_t delay, padding;
4227 if (sscanf(mLastCommentData,
4228 " %*x %x %x %*x", &delay, &padding) == 2) {
4229 if (mLastTrack == NULL) {
4230 delete[] buffer;
4231 return ERROR_MALFORMED;
4232 }
4233
4234 AMediaFormat_setInt32(mLastTrack->meta,
4235 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
4236 AMediaFormat_setInt32(mLastTrack->meta,
4237 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
4238 }
4239 }
4240
4241 mLastCommentMean.clear();
4242 mLastCommentName.clear();
4243 mLastCommentData.clear();
4244 }
4245 break;
4246 }
4247
4248 default:
4249 break;
4250 }
4251
4252 void *tmpData;
4253 size_t tmpDataSize;
4254 const char *s;
4255 if (size >= 8 && metadataKey &&
4256 !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
4257 !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
4258 if (!strcmp(metadataKey, "albumart")) {
4259 AMediaFormat_setBuffer(mFileMetaData, metadataKey,
4260 buffer + 8, size - 8);
4261 } else if (!strcmp(metadataKey, AMEDIAFORMAT_KEY_GENRE)) {
4262 if (flags == 0) {
4263 // uint8_t genre code, iTunes genre codes are
4264 // the standard id3 codes, except they start
4265 // at 1 instead of 0 (e.g. Pop is 14, not 13)
4266 // We use standard id3 numbering, so subtract 1.
4267 int genrecode = (int)buffer[size - 1];
4268 genrecode--;
4269 if (genrecode < 0) {
4270 genrecode = 255; // reserved for 'unknown genre'
4271 }
4272 char genre[10];
4273 sprintf(genre, "%d", genrecode);
4274
4275 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
4276 } else if (flags == 1) {
4277 // custom genre string
4278 buffer[size] = '\0';
4279
4280 AMediaFormat_setString(mFileMetaData,
4281 metadataKey, (const char *)buffer + 8);
4282 }
4283 } else {
4284 buffer[size] = '\0';
4285
4286 AMediaFormat_setString(mFileMetaData,
4287 metadataKey, (const char *)buffer + 8);
4288 }
4289 }
4290
4291 delete[] buffer;
4292 buffer = NULL;
4293
4294 return OK;
4295 }
4296
parseColorInfo(off64_t offset,size_t size)4297 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
4298 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
4299 return ERROR_MALFORMED;
4300 }
4301
4302 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4303 if (buffer == NULL) {
4304 return ERROR_MALFORMED;
4305 }
4306 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4307 delete[] buffer;
4308 buffer = NULL;
4309
4310 return ERROR_IO;
4311 }
4312
4313 int32_t type = U32_AT(&buffer[0]);
4314 if ((type == FOURCC("nclx") && size >= 11)
4315 || (type == FOURCC("nclc") && size >= 10)) {
4316 // only store the first color specification
4317 int32_t existingColor;
4318 if (!AMediaFormat_getInt32(mLastTrack->meta,
4319 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
4320 int32_t primaries = U16_AT(&buffer[4]);
4321 int32_t isotransfer = U16_AT(&buffer[6]);
4322 int32_t coeffs = U16_AT(&buffer[8]);
4323 bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
4324
4325 int32_t range = 0;
4326 int32_t standard = 0;
4327 int32_t transfer = 0;
4328 ColorUtils::convertIsoColorAspectsToPlatformAspects(
4329 primaries, isotransfer, coeffs, fullRange,
4330 &range, &standard, &transfer);
4331
4332 if (range != 0) {
4333 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
4334 }
4335 if (standard != 0) {
4336 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
4337 }
4338 if (transfer != 0) {
4339 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
4340 }
4341 }
4342 }
4343
4344 delete[] buffer;
4345 buffer = NULL;
4346
4347 return OK;
4348 }
4349
parsePaspBox(off64_t offset,size_t size)4350 status_t MPEG4Extractor::parsePaspBox(off64_t offset, size_t size) {
4351 if (size < 8 || size == SIZE_MAX || mLastTrack == NULL) {
4352 return ERROR_MALFORMED;
4353 }
4354
4355 uint32_t data[2]; // hSpacing, vSpacing
4356 if (mDataSource->readAt(offset, data, 8) < 8) {
4357 return ERROR_IO;
4358 }
4359 uint32_t hSpacing = ntohl(data[0]);
4360 uint32_t vSpacing = ntohl(data[1]);
4361
4362 if (hSpacing != 0 && vSpacing != 0) {
4363 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_WIDTH, hSpacing);
4364 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_HEIGHT, vSpacing);
4365 }
4366
4367 return OK;
4368 }
4369
parse3GPPMetaData(off64_t offset,size_t size,int depth)4370 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
4371 if (size < 4 || size == SIZE_MAX) {
4372 return ERROR_MALFORMED;
4373 }
4374
4375 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4376 if (buffer == NULL) {
4377 return ERROR_MALFORMED;
4378 }
4379 if (mDataSource->readAt(
4380 offset, buffer, size) != (ssize_t)size) {
4381 delete[] buffer;
4382 buffer = NULL;
4383
4384 return ERROR_IO;
4385 }
4386
4387 const char *metadataKey = nullptr;
4388 switch (mPath[depth]) {
4389 case FOURCC("titl"):
4390 {
4391 metadataKey = "title";
4392 break;
4393 }
4394 case FOURCC("perf"):
4395 {
4396 metadataKey = "artist";
4397 break;
4398 }
4399 case FOURCC("auth"):
4400 {
4401 metadataKey = "writer";
4402 break;
4403 }
4404 case FOURCC("gnre"):
4405 {
4406 metadataKey = "genre";
4407 break;
4408 }
4409 case FOURCC("albm"):
4410 {
4411 if (buffer[size - 1] != '\0') {
4412 char tmp[4];
4413 sprintf(tmp, "%u", buffer[size - 1]);
4414
4415 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4416 }
4417
4418 metadataKey = "album";
4419 break;
4420 }
4421 case FOURCC("yrrc"):
4422 {
4423 if (size < 6) {
4424 delete[] buffer;
4425 buffer = NULL;
4426 ALOGE("b/62133227");
4427 android_errorWriteLog(0x534e4554, "62133227");
4428 return ERROR_MALFORMED;
4429 }
4430 char tmp[5];
4431 uint16_t year = U16_AT(&buffer[4]);
4432
4433 if (year < 10000) {
4434 sprintf(tmp, "%u", year);
4435
4436 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
4437 }
4438 break;
4439 }
4440
4441 default:
4442 break;
4443 }
4444
4445 if (metadataKey) {
4446 bool isUTF8 = true; // Common case
4447 char16_t *framedata = NULL;
4448 int len16 = 0; // Number of UTF-16 characters
4449
4450 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
4451 if (size < 6) {
4452 delete[] buffer;
4453 buffer = NULL;
4454 return ERROR_MALFORMED;
4455 }
4456
4457 if (size - 6 >= 4) {
4458 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
4459 framedata = (char16_t *)(buffer + 6);
4460 if (0xfffe == *framedata) {
4461 // endianness marker (BOM) doesn't match host endianness
4462 for (int i = 0; i < len16; i++) {
4463 framedata[i] = bswap_16(framedata[i]);
4464 }
4465 // BOM is now swapped to 0xfeff, we will execute next block too
4466 }
4467
4468 if (0xfeff == *framedata) {
4469 // Remove the BOM
4470 framedata++;
4471 len16--;
4472 isUTF8 = false;
4473 }
4474 // else normal non-zero-length UTF-8 string
4475 // we can't handle UTF-16 without BOM as there is no other
4476 // indication of encoding.
4477 }
4478
4479 if (isUTF8) {
4480 buffer[size] = 0;
4481 AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
4482 } else {
4483 // Convert from UTF-16 string to UTF-8 string.
4484 String8 tmpUTF8str(framedata, len16);
4485 AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.c_str());
4486 }
4487 }
4488
4489 delete[] buffer;
4490 buffer = NULL;
4491
4492 return OK;
4493 }
4494
parseID3v2MetaData(off64_t offset,uint64_t size)4495 void MPEG4Extractor::parseID3v2MetaData(off64_t offset, uint64_t size) {
4496 uint8_t *buffer = new (std::nothrow) uint8_t[size];
4497 if (buffer == NULL) {
4498 return;
4499 }
4500 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4501 delete[] buffer;
4502 buffer = NULL;
4503 return;
4504 }
4505
4506 ID3 id3(buffer, size, true /* ignorev1 */);
4507 delete[] buffer;
4508
4509 if (id3.isValid()) {
4510 struct Map {
4511 const char *key;
4512 const char *tag1;
4513 const char *tag2;
4514 };
4515 static const Map kMap[] = {
4516 { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4517 { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4518 { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4519 { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4520 { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4521 { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4522 { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4523 { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4524 { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4525 { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4526 { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4527 };
4528 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4529
4530 for (size_t i = 0; i < kNumMapEntries; ++i) {
4531 const char *ss;
4532 if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4533 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4534 if (it->done()) {
4535 delete it;
4536 it = new ID3::Iterator(id3, kMap[i].tag2);
4537 }
4538
4539 if (it->done()) {
4540 delete it;
4541 continue;
4542 }
4543
4544 String8 s;
4545 it->getString(&s);
4546 delete it;
4547
4548 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4549 }
4550 }
4551
4552 size_t dataSize;
4553 String8 mime;
4554 const void *data = id3.getAlbumArt(&dataSize, &mime);
4555
4556 if (data) {
4557 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4558 }
4559 }
4560 }
4561
getTrack(size_t index)4562 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4563 status_t err;
4564 if ((err = readMetaData()) != OK) {
4565 return NULL;
4566 }
4567
4568 Track *track = mFirstTrack;
4569 while (index > 0) {
4570 if (track == NULL) {
4571 return NULL;
4572 }
4573
4574 track = track->next;
4575 --index;
4576 }
4577
4578 if (track == NULL) {
4579 return NULL;
4580 }
4581
4582
4583 Trex *trex = NULL;
4584 int32_t trackId;
4585 if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4586 for (size_t i = 0; i < mTrex.size(); i++) {
4587 Trex *t = &mTrex.editItemAt(i);
4588 if (t->track_ID == (uint32_t) trackId) {
4589 trex = t;
4590 break;
4591 }
4592 }
4593 } else {
4594 ALOGE("b/21657957");
4595 return NULL;
4596 }
4597
4598 ALOGV("getTrack called, pssh: %zu", mPssh.size());
4599
4600 const char *mime;
4601 if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4602 return NULL;
4603 }
4604 sp<ItemTable> itemTable;
4605 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4606 void *data;
4607 size_t size;
4608 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4609 return NULL;
4610 }
4611
4612 const uint8_t *ptr = (const uint8_t *)data;
4613
4614 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
4615 return NULL;
4616 }
4617 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4618 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4619 void *data;
4620 size_t size;
4621 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4622 return NULL;
4623 }
4624
4625 const uint8_t *ptr = (const uint8_t *)data;
4626
4627 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
4628 return NULL;
4629 }
4630 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4631 itemTable = mItemTable;
4632 }
4633 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4634 void *data;
4635 size_t size;
4636 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)
4637 || size != 24) {
4638 return NULL;
4639 }
4640
4641 const uint8_t *ptr = (const uint8_t *)data;
4642 // dv_major.dv_minor Should be 1.0 or 2.1
4643 if ((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)) {
4644 return NULL;
4645 }
4646 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)
4647 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4648 void *data;
4649 size_t size;
4650 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4651 return NULL;
4652 }
4653
4654 const uint8_t *ptr = (const uint8_t *)data;
4655
4656 if (size < 4 || ptr[0] != 0x81) { // configurationVersion == 1
4657 return NULL;
4658 }
4659 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4660 itemTable = mItemTable;
4661 }
4662 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4663 void *data;
4664 size_t size;
4665 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4666 return NULL;
4667 }
4668
4669 const uint8_t *ptr = (const uint8_t *)data;
4670
4671 if (size < 5 || ptr[0] != 0x01) { // configurationVersion == 1
4672 return NULL;
4673 }
4674 }
4675
4676 ALOGV("track->elst_shift_start_ticks :%" PRIu64, track->elst_shift_start_ticks);
4677
4678 uint64_t elst_initial_empty_edit_ticks = 0;
4679 if (mHeaderTimescale != 0) {
4680 // Convert empty_edit_ticks from movie timescale to media timescale.
4681 uint64_t elst_initial_empty_edit_ticks_mul = 0, elst_initial_empty_edit_ticks_add = 0;
4682 if (__builtin_mul_overflow(track->elst_initial_empty_edit_ticks, track->timescale,
4683 &elst_initial_empty_edit_ticks_mul) ||
4684 __builtin_add_overflow(elst_initial_empty_edit_ticks_mul, (mHeaderTimescale / 2),
4685 &elst_initial_empty_edit_ticks_add)) {
4686 ALOGE("track->elst_initial_empty_edit_ticks overflow");
4687 return nullptr;
4688 }
4689 elst_initial_empty_edit_ticks = elst_initial_empty_edit_ticks_add / mHeaderTimescale;
4690 }
4691 ALOGV("elst_initial_empty_edit_ticks in MediaTimeScale :%" PRIu64,
4692 elst_initial_empty_edit_ticks);
4693
4694 MPEG4Source* source =
4695 new MPEG4Source(track->meta, mDataSource, track->timescale, track->sampleTable,
4696 mSidxEntries, trex, mMoofOffset, itemTable,
4697 track->elst_shift_start_ticks, elst_initial_empty_edit_ticks);
4698 if (source->init() != OK) {
4699 delete source;
4700 return NULL;
4701 }
4702 return source;
4703 }
4704
4705 // static
verifyTrack(Track * track)4706 status_t MPEG4Extractor::verifyTrack(Track *track) {
4707 const char *mime;
4708 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4709
4710 void *data;
4711 size_t size;
4712 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4713 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4714 return ERROR_MALFORMED;
4715 }
4716 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4717 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4718 return ERROR_MALFORMED;
4719 }
4720 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4721 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4722 return ERROR_MALFORMED;
4723 }
4724 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4725 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4726 return ERROR_MALFORMED;
4727 }
4728 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4729 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4730 return ERROR_MALFORMED;
4731 }
4732 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4733 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4734 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4735 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4736 return ERROR_MALFORMED;
4737 }
4738 }
4739
4740 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4741 // Make sure we have all the metadata we need.
4742 ALOGE("stbl atom missing/invalid.");
4743 return ERROR_MALFORMED;
4744 }
4745
4746 if (track->timescale == 0) {
4747 ALOGE("timescale invalid.");
4748 return ERROR_MALFORMED;
4749 }
4750
4751 return OK;
4752 }
4753
4754 typedef enum {
4755 //AOT_NONE = -1,
4756 //AOT_NULL_OBJECT = 0,
4757 //AOT_AAC_MAIN = 1, /**< Main profile */
4758 AOT_AAC_LC = 2, /**< Low Complexity object */
4759 //AOT_AAC_SSR = 3,
4760 //AOT_AAC_LTP = 4,
4761 AOT_SBR = 5,
4762 //AOT_AAC_SCAL = 6,
4763 //AOT_TWIN_VQ = 7,
4764 //AOT_CELP = 8,
4765 //AOT_HVXC = 9,
4766 //AOT_RSVD_10 = 10, /**< (reserved) */
4767 //AOT_RSVD_11 = 11, /**< (reserved) */
4768 //AOT_TTSI = 12, /**< TTSI Object */
4769 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
4770 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
4771 //AOT_GEN_MIDI = 15, /**< General MIDI object */
4772 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4773 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
4774 //AOT_RSVD_18 = 18, /**< (reserved) */
4775 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
4776 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
4777 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
4778 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
4779 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
4780 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
4781 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
4782 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
4783 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
4784 //AOT_RSVD_28 = 28, /**< might become SSC */
4785 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
4786 //AOT_MPEGS = 30, /**< MPEG Surround */
4787
4788 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
4789
4790 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
4791 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
4792 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
4793 //AOT_RSVD_35 = 35, /**< might become DST */
4794 //AOT_RSVD_36 = 36, /**< might become ALS */
4795 //AOT_AAC_SLS = 37, /**< AAC + SLS */
4796 //AOT_SLS = 38, /**< SLS */
4797 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
4798
4799 AOT_USAC = 42, /**< USAC */
4800 //AOT_SAOC = 43, /**< SAOC */
4801 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
4802
4803 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
4804 } AUDIO_OBJECT_TYPE;
4805
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4806 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4807 const void *esds_data, size_t esds_size) {
4808 ESDS esds(esds_data, esds_size);
4809
4810 uint8_t objectTypeIndication;
4811 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4812 return ERROR_MALFORMED;
4813 }
4814
4815 if (objectTypeIndication == 0xe1) {
4816 // This isn't MPEG4 audio at all, it's QCELP 14k...
4817 if (mLastTrack == NULL)
4818 return ERROR_MALFORMED;
4819
4820 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4821 return OK;
4822 }
4823
4824 if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4825 // mp3 audio
4826 if (mLastTrack == NULL)
4827 return ERROR_MALFORMED;
4828
4829 AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4830 return OK;
4831 }
4832
4833 if (mLastTrack != NULL) {
4834 uint32_t maxBitrate = 0;
4835 uint32_t avgBitrate = 0;
4836 esds.getBitRate(&maxBitrate, &avgBitrate);
4837 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4838 AMediaFormat_setInt32(mLastTrack->meta,
4839 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4840 }
4841 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4842 AMediaFormat_setInt32(mLastTrack->meta,
4843 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4844 }
4845 }
4846
4847 const uint8_t *csd;
4848 size_t csd_size;
4849 if (esds.getCodecSpecificInfo(
4850 (const void **)&csd, &csd_size) != OK) {
4851 return ERROR_MALFORMED;
4852 }
4853
4854 if (kUseHexDump) {
4855 printf("ESD of size %zu\n", csd_size);
4856 hexdump(csd, csd_size);
4857 }
4858
4859 if (csd_size == 0) {
4860 // There's no further information, i.e. no codec specific data
4861 // Let's assume that the information provided in the mpeg4 headers
4862 // is accurate and hope for the best.
4863
4864 return OK;
4865 }
4866
4867 if (csd_size < 2) {
4868 return ERROR_MALFORMED;
4869 }
4870
4871 if (objectTypeIndication == 0xdd) {
4872 // vorbis audio
4873 if (csd[0] != 0x02) {
4874 return ERROR_MALFORMED;
4875 }
4876
4877 // codecInfo starts with two lengths, len1 and len2, that are
4878 // "Xiph-style-lacing encoded"..
4879
4880 size_t offset = 1;
4881 size_t len1 = 0;
4882 while (offset < csd_size && csd[offset] == 0xff) {
4883 if (__builtin_add_overflow(len1, 0xff, &len1)) {
4884 return ERROR_MALFORMED;
4885 }
4886 ++offset;
4887 }
4888 if (offset >= csd_size) {
4889 return ERROR_MALFORMED;
4890 }
4891 if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4892 return ERROR_MALFORMED;
4893 }
4894 ++offset;
4895 if (len1 == 0) {
4896 return ERROR_MALFORMED;
4897 }
4898
4899 size_t len2 = 0;
4900 while (offset < csd_size && csd[offset] == 0xff) {
4901 if (__builtin_add_overflow(len2, 0xff, &len2)) {
4902 return ERROR_MALFORMED;
4903 }
4904 ++offset;
4905 }
4906 if (offset >= csd_size) {
4907 return ERROR_MALFORMED;
4908 }
4909 if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4910 return ERROR_MALFORMED;
4911 }
4912 ++offset;
4913 if (len2 == 0) {
4914 return ERROR_MALFORMED;
4915 }
4916 if (offset + len1 > csd_size || csd[offset] != 0x01) {
4917 return ERROR_MALFORMED;
4918 }
4919
4920 if (mLastTrack == NULL) {
4921 return ERROR_MALFORMED;
4922 }
4923 // formerly kKeyVorbisInfo
4924 AMediaFormat_setBuffer(mLastTrack->meta,
4925 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4926
4927 if (__builtin_add_overflow(offset, len1, &offset) ||
4928 offset >= csd_size || csd[offset] != 0x03) {
4929 return ERROR_MALFORMED;
4930 }
4931
4932 if (__builtin_add_overflow(offset, len2, &offset) ||
4933 offset >= csd_size || csd[offset] != 0x05) {
4934 return ERROR_MALFORMED;
4935 }
4936
4937 // formerly kKeyVorbisBooks
4938 AMediaFormat_setBuffer(mLastTrack->meta,
4939 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4940 AMediaFormat_setString(mLastTrack->meta,
4941 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4942
4943 return OK;
4944 }
4945
4946 static uint32_t kSamplingRate[] = {
4947 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4948 16000, 12000, 11025, 8000, 7350
4949 };
4950
4951 ABitReader br(csd, csd_size);
4952 uint32_t objectType = br.getBits(5);
4953
4954 if (objectType == AOT_ESCAPE) { // AAC-ELD => additional 6 bits
4955 objectType = 32 + br.getBits(6);
4956 }
4957
4958 if (mLastTrack == NULL)
4959 return ERROR_MALFORMED;
4960
4961 //keep AOT type
4962 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4963
4964 uint32_t freqIndex = br.getBits(4);
4965
4966 int32_t sampleRate = 0;
4967 int32_t numChannels = 0;
4968 if (freqIndex == 15) {
4969 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4970 sampleRate = br.getBits(24);
4971 numChannels = br.getBits(4);
4972 } else {
4973 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4974 numChannels = br.getBits(4);
4975
4976 if (freqIndex == 13 || freqIndex == 14) {
4977 return ERROR_MALFORMED;
4978 }
4979
4980 sampleRate = kSamplingRate[freqIndex];
4981 }
4982
4983 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4984 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4985 uint32_t extFreqIndex = br.getBits(4);
4986 if (extFreqIndex == 15) {
4987 if (csd_size < 8) {
4988 return ERROR_MALFORMED;
4989 }
4990 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4991 br.skipBits(24); // extSampleRate
4992 } else {
4993 if (extFreqIndex == 13 || extFreqIndex == 14) {
4994 return ERROR_MALFORMED;
4995 }
4996 //extSampleRate = kSamplingRate[extFreqIndex];
4997 }
4998 //TODO: save the extension sampling rate value in meta data =>
4999 // AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
5000 }
5001
5002 switch (numChannels) {
5003 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
5004 case 0:
5005 case 1:// FC
5006 case 2:// FL FR
5007 case 3:// FC, FL FR
5008 case 4:// FC, FL FR, RC
5009 case 5:// FC, FL FR, SL SR
5010 case 6:// FC, FL FR, SL SR, LFE
5011 //numChannels already contains the right value
5012 break;
5013 case 11:// FC, FL FR, SL SR, RC, LFE
5014 numChannels = 7;
5015 break;
5016 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
5017 case 12:// FC, FL FR, SL SR, RL RR, LFE
5018 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
5019 numChannels = 8;
5020 break;
5021 default:
5022 return ERROR_UNSUPPORTED;
5023 }
5024
5025 {
5026 if (objectType == AOT_SBR || objectType == AOT_PS) {
5027 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5028 objectType = br.getBits(5);
5029
5030 if (objectType == AOT_ESCAPE) {
5031 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
5032 objectType = 32 + br.getBits(6);
5033 }
5034 }
5035 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
5036 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
5037 objectType == AOT_ER_BSAC) {
5038 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
5039 br.skipBits(1); // frameLengthFlag
5040
5041 const int32_t dependsOnCoreCoder = br.getBits(1);
5042
5043 if (dependsOnCoreCoder ) {
5044 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
5045 br.skipBits(14); // coreCoderDelay
5046 }
5047
5048 int32_t extensionFlag = -1;
5049 if (br.numBitsLeft() > 0) {
5050 extensionFlag = br.getBits(1);
5051 } else {
5052 switch (objectType) {
5053 // 14496-3 4.5.1.1 extensionFlag
5054 case AOT_AAC_LC:
5055 extensionFlag = 0;
5056 break;
5057 case AOT_ER_AAC_LC:
5058 case AOT_ER_AAC_SCAL:
5059 case AOT_ER_BSAC:
5060 case AOT_ER_AAC_LD:
5061 extensionFlag = 1;
5062 break;
5063 default:
5064 return ERROR_MALFORMED;
5065 break;
5066 }
5067 ALOGW("csd missing extension flag; assuming %d for object type %u.",
5068 extensionFlag, objectType);
5069 }
5070
5071 if (numChannels == 0) {
5072 int32_t channelsEffectiveNum = 0;
5073 int32_t channelsNum = 0;
5074 if (br.numBitsLeft() < 32) {
5075 return ERROR_MALFORMED;
5076 }
5077 br.skipBits(4); // ElementInstanceTag
5078 br.skipBits(2); // Profile
5079 br.skipBits(4); // SamplingFrequencyIndex
5080 const int32_t NumFrontChannelElements = br.getBits(4);
5081 const int32_t NumSideChannelElements = br.getBits(4);
5082 const int32_t NumBackChannelElements = br.getBits(4);
5083 const int32_t NumLfeChannelElements = br.getBits(2);
5084 br.skipBits(3); // NumAssocDataElements
5085 br.skipBits(4); // NumValidCcElements
5086
5087 const int32_t MonoMixdownPresent = br.getBits(1);
5088
5089 if (MonoMixdownPresent != 0) {
5090 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
5091 br.skipBits(4); // MonoMixdownElementNumber
5092 }
5093
5094 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
5095 const int32_t StereoMixdownPresent = br.getBits(1);
5096 if (StereoMixdownPresent != 0) {
5097 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
5098 br.skipBits(4); // StereoMixdownElementNumber
5099 }
5100
5101 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
5102 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
5103 if (MatrixMixdownIndexPresent != 0) {
5104 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
5105 br.skipBits(2); // MatrixMixdownIndex
5106 br.skipBits(1); // PseudoSurroundEnable
5107 }
5108
5109 int i;
5110 for (i=0; i < NumFrontChannelElements; i++) {
5111 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5112 const int32_t FrontElementIsCpe = br.getBits(1);
5113 br.skipBits(4); // FrontElementTagSelect
5114 channelsNum += FrontElementIsCpe ? 2 : 1;
5115 }
5116
5117 for (i=0; i < NumSideChannelElements; i++) {
5118 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5119 const int32_t SideElementIsCpe = br.getBits(1);
5120 br.skipBits(4); // SideElementTagSelect
5121 channelsNum += SideElementIsCpe ? 2 : 1;
5122 }
5123
5124 for (i=0; i < NumBackChannelElements; i++) {
5125 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5126 const int32_t BackElementIsCpe = br.getBits(1);
5127 br.skipBits(4); // BackElementTagSelect
5128 channelsNum += BackElementIsCpe ? 2 : 1;
5129 }
5130 channelsEffectiveNum = channelsNum;
5131
5132 for (i=0; i < NumLfeChannelElements; i++) {
5133 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
5134 br.skipBits(4); // LfeElementTagSelect
5135 channelsNum += 1;
5136 }
5137 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
5138 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
5139 numChannels = channelsNum;
5140 }
5141 }
5142 }
5143
5144 if (numChannels == 0) {
5145 return ERROR_UNSUPPORTED;
5146 }
5147
5148 if (mLastTrack == NULL)
5149 return ERROR_MALFORMED;
5150
5151 int32_t prevSampleRate;
5152 CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
5153
5154 if (prevSampleRate != sampleRate) {
5155 ALOGV("mpeg4 audio sample rate different from previous setting. "
5156 "was: %d, now: %d", prevSampleRate, sampleRate);
5157 }
5158
5159 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
5160
5161 int32_t prevChannelCount;
5162 CHECK(AMediaFormat_getInt32(mLastTrack->meta,
5163 AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
5164
5165 if (prevChannelCount != numChannels) {
5166 ALOGV("mpeg4 audio channel count different from previous setting. "
5167 "was: %d, now: %d", prevChannelCount, numChannels);
5168 }
5169
5170 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
5171
5172 return OK;
5173 }
5174
adjustRawDefaultFrameSize()5175 void MPEG4Extractor::adjustRawDefaultFrameSize() {
5176 int32_t chanCount = 0;
5177 int32_t bitWidth = 0;
5178 const char *mimeStr = NULL;
5179
5180 if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
5181 !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
5182 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
5183 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
5184 // samplesize in stsz may not right , so updade default samplesize
5185 mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
5186 }
5187 }
5188
5189 ////////////////////////////////////////////////////////////////////////////////
5190
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks,uint64_t elstInitialEmptyEditTicks)5191 MPEG4Source::MPEG4Source(
5192 AMediaFormat *format,
5193 DataSourceHelper *dataSource,
5194 int32_t timeScale,
5195 const sp<SampleTable> &sampleTable,
5196 Vector<SidxEntry> &sidx,
5197 const Trex *trex,
5198 off64_t firstMoofOffset,
5199 const sp<ItemTable> &itemTable,
5200 uint64_t elstShiftStartTicks,
5201 uint64_t elstInitialEmptyEditTicks)
5202 : mFormat(format),
5203 mDataSource(dataSource),
5204 mTimescale(timeScale),
5205 mSampleTable(sampleTable),
5206 mCurrentSampleIndex(0),
5207 mCurrentFragmentIndex(0),
5208 mSegments(sidx),
5209 mTrex(trex),
5210 mFirstMoofOffset(firstMoofOffset),
5211 mCurrentMoofOffset(firstMoofOffset),
5212 mCurrentMoofSize(0),
5213 mNextMoofOffset(-1),
5214 mCurrentTime(0),
5215 mDefaultEncryptedByteBlock(0),
5216 mDefaultSkipByteBlock(0),
5217 mCurrentSampleInfoAllocSize(0),
5218 mCurrentSampleInfoSizes(NULL),
5219 mCurrentSampleInfoOffsetsAllocSize(0),
5220 mCurrentSampleInfoOffsets(NULL),
5221 mIsAVC(false),
5222 mIsHEVC(false),
5223 mIsAPV(false),
5224 mIsDolbyVision(false),
5225 mIsAC4(false),
5226 mIsPcm(false),
5227 mNALLengthSize(0),
5228 mStarted(false),
5229 mBuffer(NULL),
5230 mSrcBufferSize(0),
5231 mSrcBuffer(NULL),
5232 mItemTable(itemTable),
5233 mElstShiftStartTicks(elstShiftStartTicks),
5234 mElstInitialEmptyEditTicks(elstInitialEmptyEditTicks) {
5235
5236 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
5237
5238 AMediaFormat_getInt32(mFormat,
5239 AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
5240 mDefaultIVSize = 0;
5241 AMediaFormat_getInt32(mFormat,
5242 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
5243 void *key;
5244 size_t keysize;
5245 if (AMediaFormat_getBuffer(mFormat,
5246 AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
5247 CHECK(keysize <= 16);
5248 memset(mCryptoKey, 0, 16);
5249 memcpy(mCryptoKey, key, keysize);
5250 }
5251
5252 AMediaFormat_getInt32(mFormat,
5253 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
5254 AMediaFormat_getInt32(mFormat,
5255 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
5256
5257 const char *mime;
5258 bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
5259 CHECK(success);
5260
5261 mIsMpegH = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1) ||
5262 !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1);
5263 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
5264 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
5265 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
5266 // Enable APV codec support from Android Baklava
5267 mIsAPV = false;
5268 if (isAtLeastRelease(36, "Baklava")) {
5269 mIsAPV = com::android::media::extractor::flags::extractor_mp4_enable_apv() &&
5270 !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_APV);
5271 }
5272 mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
5273 mIsDolbyVision = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
5274 mIsHeif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) && mItemTable != NULL;
5275 mIsAvif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF) && mItemTable != NULL;
5276
5277 if (mIsAVC) {
5278 void *data;
5279 size_t size;
5280 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5281
5282 mNALLengthSize = getNALLengthSizeFromAvcCsd((const uint8_t *)data, size);
5283 } else if (mIsHEVC) {
5284 void *data;
5285 size_t size;
5286 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5287
5288 mNALLengthSize = getNALLengthSizeFromHevcCsd((const uint8_t *)data, size);
5289 } else if (mIsDolbyVision) {
5290 ALOGV("%s DolbyVision stream detected", __FUNCTION__);
5291 void *data;
5292 size_t size;
5293 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_2, &data, &size));
5294
5295 const uint8_t *ptr = (const uint8_t *)data;
5296
5297 CHECK(size == 24);
5298
5299 // dv_major.dv_minor Should be 1.0 or 2.1
5300 CHECK(!((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)));
5301
5302 const uint8_t profile = ptr[2] >> 1;
5303 // profile == (4,5,6,7,8) --> HEVC; profile == (9) --> AVC; profile == (10) --> AV1
5304 if (profile > 3 && profile < 9) {
5305 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5306
5307 mNALLengthSize = getNALLengthSizeFromHevcCsd((const uint8_t *)data, size);
5308 } else if (9 == profile) {
5309 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5310
5311 mNALLengthSize = getNALLengthSizeFromAvcCsd((const uint8_t *)data, size);
5312 } else if (10 == profile) {
5313 /* AV1 profile nothing to do */
5314 } else {
5315 if (AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
5316 mNALLengthSize = getNALLengthSizeFromHevcCsd((const uint8_t *)data, size);
5317 } else if (AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
5318 mNALLengthSize = getNALLengthSizeFromAvcCsd((const uint8_t *)data, size);
5319 } else {
5320 LOG_ALWAYS_FATAL("Invalid Dolby Vision profile = %d", profile);
5321 }
5322 }
5323 }
5324
5325 mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
5326 mIsAudio = !strncasecmp(mime, "audio/", 6);
5327
5328 int32_t aacObjectType = -1;
5329
5330 if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_AAC_PROFILE, &aacObjectType)) {
5331 mIsUsac = (aacObjectType == AOT_USAC);
5332 }
5333
5334 if (mIsPcm) {
5335 int32_t numChannels = 0;
5336 int32_t bitsPerSample = 0;
5337 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
5338 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
5339
5340 int32_t bytesPerSample = bitsPerSample >> 3;
5341 int32_t pcmSampleSize = bytesPerSample * numChannels;
5342
5343 size_t maxSampleSize;
5344 status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
5345 if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
5346 || bitsPerSample != 16) {
5347 // Not supported
5348 mIsPcm = false;
5349 } else {
5350 AMediaFormat_setInt32(mFormat,
5351 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
5352 }
5353 }
5354
5355 CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
5356 }
5357
init()5358 status_t MPEG4Source::init() {
5359 if (mFirstMoofOffset != 0) {
5360 off64_t offset = mFirstMoofOffset;
5361 return parseChunk(&offset);
5362 }
5363 return OK;
5364 }
5365
~MPEG4Source()5366 MPEG4Source::~MPEG4Source() {
5367 if (mStarted) {
5368 stop();
5369 }
5370 free(mCurrentSampleInfoSizes);
5371 free(mCurrentSampleInfoOffsets);
5372 }
5373
start()5374 media_status_t MPEG4Source::start() {
5375 Mutex::Autolock autoLock(mLock);
5376
5377 CHECK(!mStarted);
5378
5379 int32_t tmp;
5380 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
5381 size_t max_size = tmp;
5382
5383 // A somewhat arbitrary limit that should be sufficient for 8k video frames
5384 // If you see the message below for a valid input stream: increase the limit
5385 const size_t kMaxBufferSize = 64 * 1024 * 1024;
5386 if (max_size > kMaxBufferSize) {
5387 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
5388 return AMEDIA_ERROR_MALFORMED;
5389 }
5390 if (max_size == 0) {
5391 ALOGE("zero max input size");
5392 return AMEDIA_ERROR_MALFORMED;
5393 }
5394
5395 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
5396 const size_t kInitialBuffers = 2;
5397 const size_t kMaxBuffers = 8;
5398 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
5399 mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
5400 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
5401 if (mSrcBuffer == NULL) {
5402 // file probably specified a bad max size
5403 return AMEDIA_ERROR_MALFORMED;
5404 }
5405 mSrcBufferSize = max_size;
5406
5407 mStarted = true;
5408
5409 return AMEDIA_OK;
5410 }
5411
stop()5412 media_status_t MPEG4Source::stop() {
5413 Mutex::Autolock autoLock(mLock);
5414
5415 CHECK(mStarted);
5416
5417 if (mBuffer != NULL) {
5418 mBuffer->release();
5419 mBuffer = NULL;
5420 }
5421
5422 mSrcBufferSize = 0;
5423 delete[] mSrcBuffer;
5424 mSrcBuffer = NULL;
5425
5426 mStarted = false;
5427 mCurrentSampleIndex = 0;
5428
5429 return AMEDIA_OK;
5430 }
5431
parseChunk(off64_t * offset)5432 status_t MPEG4Source::parseChunk(off64_t *offset) {
5433 uint32_t hdr[2];
5434 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5435 return ERROR_IO;
5436 }
5437 uint64_t chunk_size = ntohl(hdr[0]);
5438 uint32_t chunk_type = ntohl(hdr[1]);
5439 off64_t data_offset = *offset + 8;
5440
5441 if (chunk_size == 1) {
5442 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5443 return ERROR_IO;
5444 }
5445 chunk_size = ntoh64(chunk_size);
5446 data_offset += 8;
5447
5448 if (chunk_size < 16) {
5449 // The smallest valid chunk is 16 bytes long in this case.
5450 return ERROR_MALFORMED;
5451 }
5452 } else if (chunk_size < 8) {
5453 // The smallest valid chunk is 8 bytes long.
5454 return ERROR_MALFORMED;
5455 }
5456
5457 char chunk[5];
5458 MakeFourCCString(chunk_type, chunk);
5459 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
5460
5461 off64_t chunk_data_size = *offset + chunk_size - data_offset;
5462
5463 switch(chunk_type) {
5464
5465 case FOURCC("traf"):
5466 case FOURCC("moof"): {
5467 off64_t stop_offset = *offset + chunk_size;
5468 *offset = data_offset;
5469 if (chunk_type == FOURCC("moof")) {
5470 mCurrentMoofSize = chunk_data_size;
5471 }
5472 while (*offset < stop_offset) {
5473 status_t err = parseChunk(offset);
5474 if (err != OK) {
5475 return err;
5476 }
5477 }
5478 if (chunk_type == FOURCC("moof")) {
5479 // *offset points to the box following this moof. Find the next moof from there.
5480
5481 while (true) {
5482 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5483 // no more box to the end of file.
5484 break;
5485 }
5486 chunk_size = ntohl(hdr[0]);
5487 chunk_type = ntohl(hdr[1]);
5488 if (chunk_size == 1) {
5489 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
5490 // which is defined in 4.2 Object Structure.
5491 // When chunk_size==1, 8 bytes follows as "largesize".
5492 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5493 return ERROR_IO;
5494 }
5495 chunk_size = ntoh64(chunk_size);
5496 if (chunk_size < 16) {
5497 // The smallest valid chunk is 16 bytes long in this case.
5498 return ERROR_MALFORMED;
5499 }
5500 } else if (chunk_size == 0) {
5501 // next box extends to end of file.
5502 } else if (chunk_size < 8) {
5503 // The smallest valid chunk is 8 bytes long in this case.
5504 return ERROR_MALFORMED;
5505 }
5506
5507 if (chunk_type == FOURCC("moof")) {
5508 mNextMoofOffset = *offset;
5509 break;
5510 } else if (chunk_type == FOURCC("mdat")) {
5511 parseChunk(offset);
5512 continue;
5513 } else if (chunk_size == 0) {
5514 break;
5515 }
5516 *offset += chunk_size;
5517 }
5518 }
5519 break;
5520 }
5521
5522 case FOURCC("tfhd"): {
5523 status_t err;
5524 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
5525 return err;
5526 }
5527 *offset += chunk_size;
5528 break;
5529 }
5530
5531 case FOURCC("trun"): {
5532 status_t err;
5533 if (mLastParsedTrackId == mTrackId) {
5534 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
5535 return err;
5536 }
5537 }
5538
5539 *offset += chunk_size;
5540 break;
5541 }
5542
5543 case FOURCC("saiz"): {
5544 status_t err;
5545 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
5546 return err;
5547 }
5548 *offset += chunk_size;
5549 break;
5550 }
5551 case FOURCC("saio"): {
5552 status_t err;
5553 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
5554 != OK) {
5555 return err;
5556 }
5557 *offset += chunk_size;
5558 break;
5559 }
5560
5561 case FOURCC("senc"): {
5562 status_t err;
5563 if ((err = parseSampleEncryption(data_offset, chunk_data_size)) != OK) {
5564 return err;
5565 }
5566 *offset += chunk_size;
5567 break;
5568 }
5569
5570 case FOURCC("mdat"): {
5571 // parse DRM info if present
5572 ALOGV("MPEG4Source::parseChunk mdat");
5573 // if saiz/saoi was previously observed, do something with the sampleinfos
5574 status_t err = OK;
5575 auto kv = mDrmOffsets.lower_bound(*offset);
5576 if (kv != mDrmOffsets.end()) {
5577 auto drmoffset = kv->first;
5578 auto flags = kv->second;
5579 mDrmOffsets.erase(kv);
5580 ALOGV("mdat chunk_size %" PRIu64 " drmoffset %" PRId64 " offset %" PRId64,
5581 chunk_size, drmoffset, *offset);
5582 if (chunk_size >= drmoffset - *offset) {
5583 err = parseClearEncryptedSizes(drmoffset, false, flags,
5584 chunk_size - (drmoffset - *offset));
5585 }
5586 }
5587 if (err != OK) {
5588 return err;
5589 }
5590 *offset += chunk_size;
5591 break;
5592 }
5593
5594 default: {
5595 *offset += chunk_size;
5596 break;
5597 }
5598 }
5599 return OK;
5600 }
5601
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t size)5602 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
5603 off64_t offset, off64_t size) {
5604 ALOGV("parseSampleAuxiliaryInformationSizes");
5605 if (size < 9) {
5606 return -EINVAL;
5607 }
5608 // 14496-12 8.7.12
5609 uint8_t version;
5610 if (mDataSource->readAt(
5611 offset, &version, sizeof(version))
5612 < (ssize_t)sizeof(version)) {
5613 return ERROR_IO;
5614 }
5615
5616 if (version != 0) {
5617 return ERROR_UNSUPPORTED;
5618 }
5619 offset++;
5620 size--;
5621
5622 uint32_t flags;
5623 if (!mDataSource->getUInt24(offset, &flags)) {
5624 return ERROR_IO;
5625 }
5626 offset += 3;
5627 size -= 3;
5628
5629 if (flags & 1) {
5630 if (size < 13) {
5631 return -EINVAL;
5632 }
5633 uint32_t tmp;
5634 if (!mDataSource->getUInt32(offset, &tmp)) {
5635 return ERROR_MALFORMED;
5636 }
5637 mCurrentAuxInfoType = tmp;
5638 offset += 4;
5639 size -= 4;
5640 if (!mDataSource->getUInt32(offset, &tmp)) {
5641 return ERROR_MALFORMED;
5642 }
5643 mCurrentAuxInfoTypeParameter = tmp;
5644 offset += 4;
5645 size -= 4;
5646 }
5647
5648 uint8_t defsize;
5649 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5650 return ERROR_MALFORMED;
5651 }
5652 mCurrentDefaultSampleInfoSize = defsize;
5653 offset++;
5654 size--;
5655
5656 uint32_t smplcnt;
5657 if (!mDataSource->getUInt32(offset, &smplcnt)) {
5658 return ERROR_MALFORMED;
5659 }
5660 mCurrentSampleInfoCount = smplcnt;
5661 offset += 4;
5662 size -= 4;
5663 if (mCurrentDefaultSampleInfoSize != 0) {
5664 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5665 return OK;
5666 }
5667 if(smplcnt > size) {
5668 ALOGW("b/124525515 - smplcnt(%u) > size(%ld)", (unsigned int)smplcnt, (unsigned long)size);
5669 android_errorWriteLog(0x534e4554, "124525515");
5670 return -EINVAL;
5671 }
5672 if (smplcnt > mCurrentSampleInfoAllocSize) {
5673 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5674 if (newPtr == NULL) {
5675 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5676 return NO_MEMORY;
5677 }
5678 mCurrentSampleInfoSizes = newPtr;
5679 mCurrentSampleInfoAllocSize = smplcnt;
5680 }
5681
5682 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5683 return OK;
5684 }
5685
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t size)5686 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5687 off64_t offset, off64_t size) {
5688 ALOGV("parseSampleAuxiliaryInformationOffsets");
5689 if (size < 8) {
5690 return -EINVAL;
5691 }
5692 // 14496-12 8.7.13
5693 uint8_t version;
5694 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5695 return ERROR_IO;
5696 }
5697 offset++;
5698 size--;
5699
5700 uint32_t flags;
5701 if (!mDataSource->getUInt24(offset, &flags)) {
5702 return ERROR_IO;
5703 }
5704 offset += 3;
5705 size -= 3;
5706
5707 uint32_t entrycount;
5708 if (!mDataSource->getUInt32(offset, &entrycount)) {
5709 return ERROR_IO;
5710 }
5711 offset += 4;
5712 size -= 4;
5713 if (entrycount == 0) {
5714 return OK;
5715 }
5716 if (entrycount > UINT32_MAX / 8) {
5717 return ERROR_MALFORMED;
5718 }
5719
5720 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5721 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5722 if (newPtr == NULL) {
5723 ALOGE("failed to realloc %u -> %u",
5724 mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5725 return NO_MEMORY;
5726 }
5727 mCurrentSampleInfoOffsets = newPtr;
5728 mCurrentSampleInfoOffsetsAllocSize = entrycount;
5729 }
5730 mCurrentSampleInfoOffsetCount = entrycount;
5731
5732 if (mCurrentSampleInfoOffsets == NULL) {
5733 return OK;
5734 }
5735
5736 for (size_t i = 0; i < entrycount; i++) {
5737 if (version == 0) {
5738 if (size < 4) {
5739 ALOGW("b/124526959");
5740 android_errorWriteLog(0x534e4554, "124526959");
5741 return -EINVAL;
5742 }
5743 uint32_t tmp;
5744 if (!mDataSource->getUInt32(offset, &tmp)) {
5745 return ERROR_IO;
5746 }
5747 mCurrentSampleInfoOffsets[i] = tmp;
5748 offset += 4;
5749 size -= 4;
5750 } else {
5751 if (size < 8) {
5752 ALOGW("b/124526959");
5753 android_errorWriteLog(0x534e4554, "124526959");
5754 return -EINVAL;
5755 }
5756 uint64_t tmp;
5757 if (!mDataSource->getUInt64(offset, &tmp)) {
5758 return ERROR_IO;
5759 }
5760 mCurrentSampleInfoOffsets[i] = tmp;
5761 offset += 8;
5762 size -= 8;
5763 }
5764 }
5765
5766 // parse clear/encrypted data
5767
5768 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5769
5770 drmoffset += mCurrentMoofOffset;
5771 mDrmOffsets[drmoffset] = flags;
5772 ALOGV("saio drmoffset %" PRId64 " flags %u", drmoffset, flags);
5773
5774 return OK;
5775 }
5776
parseClearEncryptedSizes(off64_t offset,bool isSampleEncryption,uint32_t flags,off64_t size)5777 status_t MPEG4Source::parseClearEncryptedSizes(
5778 off64_t offset, bool isSampleEncryption, uint32_t flags, off64_t size) {
5779
5780 int32_t ivlength;
5781 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5782 return ERROR_MALFORMED;
5783 }
5784
5785 // only 0, 8 and 16 byte initialization vectors are supported
5786 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5787 ALOGW("unsupported IV length: %d", ivlength);
5788 return ERROR_MALFORMED;
5789 }
5790
5791 uint32_t sampleCount = mCurrentSampleInfoCount;
5792 if (isSampleEncryption) {
5793 if (size < 4) {
5794 return ERROR_MALFORMED;
5795 }
5796 if (!mDataSource->getUInt32(offset, &sampleCount)) {
5797 return ERROR_IO;
5798 }
5799 offset += 4;
5800 size -= 4;
5801 }
5802
5803 // read CencSampleAuxiliaryDataFormats
5804 for (size_t i = 0; i < sampleCount; i++) {
5805 if (i >= mCurrentSamples.size()) {
5806 ALOGW("too few samples");
5807 break;
5808 }
5809 Sample *smpl = &mCurrentSamples.editItemAt(i);
5810 if (!smpl->clearsizes.isEmpty()) {
5811 continue;
5812 }
5813
5814 memset(smpl->iv, 0, 16);
5815 if (size < ivlength) {
5816 return ERROR_MALFORMED;
5817 }
5818 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5819 return ERROR_IO;
5820 }
5821
5822 offset += ivlength;
5823 size -= ivlength;
5824
5825 bool readSubsamples;
5826 if (isSampleEncryption) {
5827 readSubsamples = flags & 2;
5828 } else {
5829 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5830 if (smplinfosize == 0) {
5831 smplinfosize = mCurrentSampleInfoSizes[i];
5832 }
5833 readSubsamples = smplinfosize > ivlength;
5834 }
5835
5836 if (readSubsamples) {
5837 uint16_t numsubsamples;
5838 if (size < 2) {
5839 return ERROR_MALFORMED;
5840 }
5841 if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5842 return ERROR_IO;
5843 }
5844 offset += 2;
5845 size -= 2;
5846 for (size_t j = 0; j < numsubsamples; j++) {
5847 uint16_t numclear;
5848 uint32_t numencrypted;
5849 if (size < 6) {
5850 return ERROR_MALFORMED;
5851 }
5852 if (!mDataSource->getUInt16(offset, &numclear)) {
5853 return ERROR_IO;
5854 }
5855 offset += 2;
5856 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5857 return ERROR_IO;
5858 }
5859 offset += 4;
5860 size -= 6;
5861 smpl->clearsizes.add(numclear);
5862 smpl->encryptedsizes.add(numencrypted);
5863 }
5864 } else {
5865 smpl->clearsizes.add(0);
5866 smpl->encryptedsizes.add(smpl->size);
5867 }
5868 }
5869
5870 return OK;
5871 }
5872
parseSampleEncryption(off64_t offset,off64_t chunk_data_size)5873 status_t MPEG4Source::parseSampleEncryption(off64_t offset, off64_t chunk_data_size) {
5874 uint32_t flags;
5875 if (chunk_data_size < 4) {
5876 return ERROR_MALFORMED;
5877 }
5878 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5879 return ERROR_MALFORMED;
5880 }
5881 return parseClearEncryptedSizes(offset + 4, true, flags, chunk_data_size - 4);
5882 }
5883
parseTrackFragmentHeader(off64_t offset,off64_t size)5884 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5885
5886 if (size < 8) {
5887 return -EINVAL;
5888 }
5889
5890 uint32_t flags;
5891 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5892 return ERROR_MALFORMED;
5893 }
5894
5895 if (flags & 0xff000000) {
5896 return -EINVAL;
5897 }
5898
5899 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5900 return ERROR_MALFORMED;
5901 }
5902
5903 if (mLastParsedTrackId != mTrackId) {
5904 // this is not the right track, skip it
5905 return OK;
5906 }
5907
5908 mTrackFragmentHeaderInfo.mFlags = flags;
5909 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5910 offset += 8;
5911 size -= 8;
5912
5913 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5914
5915 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5916 if (size < 8) {
5917 return -EINVAL;
5918 }
5919
5920 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5921 return ERROR_MALFORMED;
5922 }
5923 offset += 8;
5924 size -= 8;
5925 }
5926
5927 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5928 if (size < 4) {
5929 return -EINVAL;
5930 }
5931
5932 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5933 return ERROR_MALFORMED;
5934 }
5935 offset += 4;
5936 size -= 4;
5937 }
5938
5939 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5940 if (size < 4) {
5941 return -EINVAL;
5942 }
5943
5944 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5945 return ERROR_MALFORMED;
5946 }
5947 offset += 4;
5948 size -= 4;
5949 }
5950
5951 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5952 if (size < 4) {
5953 return -EINVAL;
5954 }
5955
5956 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5957 return ERROR_MALFORMED;
5958 }
5959 offset += 4;
5960 size -= 4;
5961 }
5962
5963 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5964 if (size < 4) {
5965 return -EINVAL;
5966 }
5967
5968 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5969 return ERROR_MALFORMED;
5970 }
5971 offset += 4;
5972 size -= 4;
5973 }
5974
5975 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5976 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5977 }
5978
5979 mTrackFragmentHeaderInfo.mDataOffset = 0;
5980 return OK;
5981 }
5982
parseTrackFragmentRun(off64_t offset,off64_t size)5983 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5984
5985 ALOGV("MPEG4Source::parseTrackFragmentRun");
5986 if (size < 8) {
5987 return -EINVAL;
5988 }
5989
5990 enum {
5991 kDataOffsetPresent = 0x01,
5992 kFirstSampleFlagsPresent = 0x04,
5993 kSampleDurationPresent = 0x100,
5994 kSampleSizePresent = 0x200,
5995 kSampleFlagsPresent = 0x400,
5996 kSampleCompositionTimeOffsetPresent = 0x800,
5997 };
5998
5999 uint32_t flags;
6000 if (!mDataSource->getUInt32(offset, &flags)) {
6001 return ERROR_MALFORMED;
6002 }
6003 // |version| only affects SampleCompositionTimeOffset field.
6004 // If version == 0, SampleCompositionTimeOffset is uint32_t;
6005 // Otherwise, SampleCompositionTimeOffset is int32_t.
6006 // Sample.compositionOffset is defined as int32_t.
6007 uint8_t version = flags >> 24;
6008 flags &= 0xffffff;
6009 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
6010
6011 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
6012 // These two shall not be used together.
6013 return -EINVAL;
6014 }
6015
6016 uint32_t sampleCount;
6017 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
6018 return ERROR_MALFORMED;
6019 }
6020 offset += 8;
6021 size -= 8;
6022
6023 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
6024
6025 uint32_t firstSampleFlags = 0;
6026
6027 if (flags & kDataOffsetPresent) {
6028 if (size < 4) {
6029 return -EINVAL;
6030 }
6031
6032 uint32_t dataOffsetDelta;
6033 if (!mDataSource->getUInt32(offset, &dataOffsetDelta)) {
6034 return ERROR_MALFORMED;
6035 }
6036
6037 if (__builtin_add_overflow(
6038 mTrackFragmentHeaderInfo.mBaseDataOffset, dataOffsetDelta, &dataOffset)) {
6039 ALOGW("b/232242894 mBaseDataOffset(%" PRIu64 ") + dataOffsetDelta(%u) overflows uint64",
6040 mTrackFragmentHeaderInfo.mBaseDataOffset, dataOffsetDelta);
6041 android_errorWriteLog(0x534e4554, "232242894");
6042 return ERROR_MALFORMED;
6043 }
6044
6045 offset += 4;
6046 size -= 4;
6047 }
6048
6049 if (flags & kFirstSampleFlagsPresent) {
6050 if (size < 4) {
6051 return -EINVAL;
6052 }
6053
6054 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
6055 return ERROR_MALFORMED;
6056 }
6057 offset += 4;
6058 size -= 4;
6059 }
6060
6061 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
6062 sampleCtsOffset = 0;
6063
6064 size_t bytesPerSample = 0;
6065 if (flags & kSampleDurationPresent) {
6066 bytesPerSample += 4;
6067 } else if (mTrackFragmentHeaderInfo.mFlags
6068 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
6069 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
6070 } else if (mTrex) {
6071 sampleDuration = mTrex->default_sample_duration;
6072 }
6073
6074 if (flags & kSampleSizePresent) {
6075 bytesPerSample += 4;
6076 } else {
6077 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
6078 #ifdef VERY_VERY_VERBOSE_LOGGING
6079 // We don't expect this, but also want to avoid spamming the log if
6080 // we hit this case.
6081 if (!(mTrackFragmentHeaderInfo.mFlags
6082 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent)) {
6083 ALOGW("No sample size specified");
6084 }
6085 #endif
6086 }
6087
6088 if (flags & kSampleFlagsPresent) {
6089 bytesPerSample += 4;
6090 } else {
6091 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
6092 #ifdef VERY_VERY_VERBOSE_LOGGING
6093 // We don't expect this, but also want to avoid spamming the log if
6094 // we hit this case.
6095 if (!(mTrackFragmentHeaderInfo.mFlags
6096 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent)) {
6097 ALOGW("No sample flags specified");
6098 }
6099 #endif
6100 }
6101
6102 if (flags & kSampleCompositionTimeOffsetPresent) {
6103 bytesPerSample += 4;
6104 } else {
6105 sampleCtsOffset = 0;
6106 }
6107
6108 if (bytesPerSample != 0) {
6109 if (size < (off64_t)sampleCount * bytesPerSample) {
6110 return -EINVAL;
6111 }
6112 } else {
6113 if (sampleDuration == 0) {
6114 ALOGW("b/123389881 sampleDuration == 0");
6115 android_errorWriteLog(0x534e4554, "124389881 zero");
6116 return -EINVAL;
6117 }
6118
6119 // apply some quick (vs strict legality) checks
6120 //
6121 static constexpr uint32_t kMaxTrunSampleCount = 10000;
6122 if (sampleCount > kMaxTrunSampleCount) {
6123 ALOGW("b/123389881 sampleCount(%u) > kMaxTrunSampleCount(%u)",
6124 sampleCount, kMaxTrunSampleCount);
6125 android_errorWriteLog(0x534e4554, "124389881 count");
6126 return -EINVAL;
6127 }
6128 }
6129
6130 Sample tmp;
6131 for (uint32_t i = 0; i < sampleCount; ++i) {
6132 if (flags & kSampleDurationPresent) {
6133 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
6134 return ERROR_MALFORMED;
6135 }
6136 offset += 4;
6137 }
6138
6139 if (flags & kSampleSizePresent) {
6140 if (!mDataSource->getUInt32(offset, &sampleSize)) {
6141 return ERROR_MALFORMED;
6142 }
6143 offset += 4;
6144 }
6145
6146 if (flags & kSampleFlagsPresent) {
6147 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
6148 return ERROR_MALFORMED;
6149 }
6150 offset += 4;
6151 }
6152
6153 if (flags & kSampleCompositionTimeOffsetPresent) {
6154 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
6155 return ERROR_MALFORMED;
6156 }
6157 offset += 4;
6158 }
6159
6160 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
6161 " flags 0x%08x ctsOffset %" PRIu32, i + 1,
6162 dataOffset, sampleSize, sampleDuration,
6163 (flags & kFirstSampleFlagsPresent) && i == 0
6164 ? firstSampleFlags : sampleFlags, sampleCtsOffset);
6165 tmp.offset = dataOffset;
6166 tmp.size = sampleSize;
6167 tmp.duration = sampleDuration;
6168 tmp.compositionOffset = sampleCtsOffset;
6169 memset(tmp.iv, 0, sizeof(tmp.iv));
6170 if (mCurrentSamples.add(tmp) < 0) {
6171 ALOGW("b/123389881 failed saving sample(n=%zu)", mCurrentSamples.size());
6172 android_errorWriteLog(0x534e4554, "124389881 allocation");
6173 mCurrentSamples.clear();
6174 return NO_MEMORY;
6175 }
6176
6177 if (__builtin_add_overflow(dataOffset, sampleSize, &dataOffset)) {
6178 ALOGW("b/232242894 dataOffset(%" PRIu64 ") + sampleSize(%u) overflows uint64",
6179 dataOffset, sampleSize);
6180 android_errorWriteLog(0x534e4554, "232242894");
6181 return ERROR_MALFORMED;
6182 }
6183 }
6184
6185 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
6186
6187 return OK;
6188 }
6189
getFormat(AMediaFormat * meta)6190 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
6191 Mutex::Autolock autoLock(mLock);
6192 AMediaFormat_copy(meta, mFormat);
6193 return AMEDIA_OK;
6194 }
6195
parseNALSize(const uint8_t * data) const6196 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
6197 switch (mNALLengthSize) {
6198 case 1:
6199 return *data;
6200 case 2:
6201 return U16_AT(data);
6202 case 3:
6203 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
6204 case 4:
6205 return U32_AT(data);
6206 }
6207
6208 // This cannot happen, mNALLengthSize springs to life by adding 1 to
6209 // a 2-bit integer.
6210 CHECK(!"Should not be here.");
6211
6212 return 0;
6213 }
6214
parseHEVCLayerId(const uint8_t * data,size_t size)6215 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
6216 if (data == nullptr || size < mNALLengthSize + 2) {
6217 return -1;
6218 }
6219
6220 // HEVC NAL-header (16-bit)
6221 // 1 6 6 3
6222 // |-|uuuuuu|------|iii|
6223 // ^ ^
6224 // NAL_type layer_id + 1
6225 //
6226 // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
6227 enum {
6228 TSA_N = 2,
6229 TSA_R = 3,
6230 STSA_N = 4,
6231 STSA_R = 5,
6232 };
6233
6234 data += mNALLengthSize;
6235 uint16_t nalHeader = data[0] << 8 | data[1];
6236
6237 uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
6238 if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
6239 int32_t layerIdPlusOne = nalHeader & 0x7u;
6240 ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
6241 return layerIdPlusOne - 1;
6242 }
6243 return 0;
6244 }
6245
getNALLengthSizeFromAvcCsd(const uint8_t * data,const size_t size) const6246 size_t MPEG4Source::getNALLengthSizeFromAvcCsd(const uint8_t *data, const size_t size) const {
6247 CHECK(data != nullptr);
6248 CHECK(size >= 7);
6249 CHECK_EQ((unsigned)data[0], 1u); // configurationVersion == 1
6250
6251 // The number of bytes used to encode the length of a NAL unit.
6252 return 1 + (data[4] & 3);
6253 }
6254
getNALLengthSizeFromHevcCsd(const uint8_t * data,const size_t size) const6255 size_t MPEG4Source::getNALLengthSizeFromHevcCsd(const uint8_t *data, const size_t size) const {
6256 CHECK(data != nullptr);
6257 CHECK(size >= 22);
6258 CHECK_EQ((unsigned)data[0], 1u); // configurationVersion == 1
6259
6260 // The number of bytes used to encode the length of a NAL unit.
6261 return 1 + (data[14 + 7] & 3);
6262 }
6263
rescaleTime(int64_t value,int64_t scale,int64_t originScale) const6264 int64_t MPEG4Source::rescaleTime(int64_t value, int64_t scale, int64_t originScale) const {
6265 // Rescale time: calculate value * scale / originScale
6266 if (value == 0 || scale == 0) {
6267 return 0;
6268 }
6269
6270 CHECK(value > 0);
6271 CHECK(scale > 0);
6272 CHECK(originScale > 0);
6273
6274 if (originScale >= scale && (originScale % scale) == 0) {
6275 int64_t factor = originScale / scale;
6276 return value / factor;
6277 } else if (originScale < scale && (scale % originScale) == 0) {
6278 int64_t factor = scale / originScale;
6279 if (__builtin_mul_overflow(value, factor, &value)) {
6280 return std::numeric_limits<int64_t>::max();
6281 }
6282 return value;
6283 } else if (originScale >= value && (originScale % value) == 0) {
6284 int64_t factor = originScale / value;
6285 return scale / factor;
6286 } else if (originScale < value && (value % originScale) == 0) {
6287 int64_t factor = value / originScale;
6288 if (__builtin_mul_overflow(scale, factor, &value)) {
6289 return std::numeric_limits<int64_t>::max();
6290 }
6291 return value;
6292 } else {
6293 int64_t rescaleValue;
6294 if (!__builtin_mul_overflow(value, scale, &rescaleValue)) {
6295 return rescaleValue / originScale;
6296 } else {
6297 // Divide the max gcd before calc scale/originScale
6298 int64_t gcdOfScaleAndOriginScale = std::gcd(scale, originScale);
6299 int64_t simpleScale = scale / gcdOfScaleAndOriginScale;
6300 int64_t simpleOriginScale = originScale / gcdOfScaleAndOriginScale;
6301 // Divide the max gcd before calc value/simpleOriginScale
6302 int64_t gcdOfValueAndSimpleOriginScale = std::gcd(value, simpleOriginScale);
6303 int64_t simpleValue = value / gcdOfValueAndSimpleOriginScale;
6304 simpleOriginScale /= gcdOfValueAndSimpleOriginScale;
6305
6306 if (!__builtin_mul_overflow(simpleValue, simpleScale, &simpleValue)) {
6307 return simpleValue / simpleOriginScale;
6308 } else {
6309 // Fallback using long double to calculate the rescale value
6310 long double rescale = (long double)value / originScale * scale;
6311 if (rescale > std::numeric_limits<int64_t>::max()) {
6312 return std::numeric_limits<int64_t>::max();
6313 }
6314
6315 return rescale;
6316 }
6317 }
6318 }
6319 }
6320
read(MediaBufferHelper ** out,const ReadOptions * options)6321 media_status_t MPEG4Source::read(
6322 MediaBufferHelper **out, const ReadOptions *options) {
6323 Mutex::Autolock autoLock(mLock);
6324
6325 CHECK(mStarted);
6326
6327 if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
6328 *out = nullptr;
6329 return AMEDIA_ERROR_WOULD_BLOCK;
6330 }
6331
6332 if (mFirstMoofOffset > 0) {
6333 return fragmentedRead(out, options);
6334 }
6335
6336 *out = NULL;
6337
6338 int64_t targetSampleTimeUs = -1;
6339
6340 int64_t seekTimeUs;
6341 ReadOptions::SeekMode mode;
6342
6343 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6344 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6345 if (mIsHeif || mIsAvif) {
6346 CHECK(mSampleTable == NULL);
6347 CHECK(mItemTable != NULL);
6348 int32_t imageIndex;
6349 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
6350 return AMEDIA_ERROR_MALFORMED;
6351 }
6352
6353 status_t err;
6354 if (seekTimeUs >= 0) {
6355 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
6356 } else {
6357 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
6358 }
6359 if (err != OK) {
6360 return AMEDIA_ERROR_UNKNOWN;
6361 }
6362 } else {
6363 uint32_t findFlags = 0;
6364 switch (mode) {
6365 case ReadOptions::SEEK_PREVIOUS_SYNC:
6366 findFlags = SampleTable::kFlagBefore;
6367 break;
6368 case ReadOptions::SEEK_NEXT_SYNC:
6369 findFlags = SampleTable::kFlagAfter;
6370 break;
6371 case ReadOptions::SEEK_CLOSEST_SYNC:
6372 case ReadOptions::SEEK_CLOSEST:
6373 findFlags = SampleTable::kFlagClosest;
6374 break;
6375 case ReadOptions::SEEK_FRAME_INDEX:
6376 findFlags = SampleTable::kFlagFrameIndex;
6377 break;
6378 default:
6379 CHECK(!"Should not be here.");
6380 break;
6381 }
6382 if( mode != ReadOptions::SEEK_FRAME_INDEX) {
6383 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6384 if (mElstInitialEmptyEditTicks > 0) {
6385 elstInitialEmptyEditUs = rescaleTime(mElstInitialEmptyEditTicks, 1000000,
6386 mTimescale);
6387
6388 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6389 * Hence, lower bound on seekTimeUs is 0.
6390 */
6391 if (__builtin_sub_overflow(seekTimeUs, elstInitialEmptyEditUs,
6392 &seekTimeUs) || seekTimeUs < 0) {
6393 ALOGW("seekTimeUs:%" PRId64 " would be a bogus value, set to 0",
6394 seekTimeUs);
6395 seekTimeUs = 0;
6396 }
6397 }
6398 if (mElstShiftStartTicks > 0) {
6399 elstShiftStartUs = rescaleTime(mElstShiftStartTicks, 1000000, mTimescale);
6400
6401 if (__builtin_add_overflow(seekTimeUs, elstShiftStartUs, &seekTimeUs)) {
6402 ALOGW("seek + elst shift start would be overflow, round to max");
6403 seekTimeUs = std::numeric_limits<int64_t>::max();
6404 }
6405 }
6406 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6407 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6408 elstShiftStartUs);
6409 }
6410
6411 uint32_t sampleIndex;
6412 status_t err = mSampleTable->findSampleAtTime(
6413 seekTimeUs, 1000000, mTimescale,
6414 &sampleIndex, findFlags);
6415
6416 if (mode == ReadOptions::SEEK_CLOSEST
6417 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6418 // We found the closest sample already, now we want the sync
6419 // sample preceding it (or the sample itself of course), even
6420 // if the subsequent sync sample is closer.
6421 findFlags = SampleTable::kFlagBefore;
6422 }
6423
6424 uint32_t syncSampleIndex = sampleIndex;
6425 // assume every non-USAC/non-MPEGH audio sample is a sync sample.
6426 // This works around
6427 // seek issues with files that were incorrectly written with an
6428 // empty or single-sample stss block for the audio track
6429 if (err == OK && (!mIsAudio || mIsUsac || mIsMpegH)) {
6430 err = mSampleTable->findSyncSampleNear(
6431 sampleIndex, &syncSampleIndex, findFlags);
6432 }
6433
6434 uint64_t sampleTime;
6435 if (err == OK) {
6436 err = mSampleTable->getMetaDataForSample(
6437 sampleIndex, NULL, NULL, &sampleTime);
6438 }
6439
6440 if (err != OK) {
6441 if (err == ERROR_OUT_OF_RANGE) {
6442 // An attempt to seek past the end of the stream would
6443 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
6444 // this all the way to the MediaPlayer would cause abnormal
6445 // termination. Legacy behaviour appears to be to behave as if
6446 // we had seeked to the end of stream, ending normally.
6447 return AMEDIA_ERROR_END_OF_STREAM;
6448 }
6449 ALOGV("end of stream");
6450 return AMEDIA_ERROR_UNKNOWN;
6451 }
6452
6453 if (mode == ReadOptions::SEEK_CLOSEST
6454 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6455 if (mElstInitialEmptyEditTicks > 0) {
6456 sampleTime += mElstInitialEmptyEditTicks;
6457 }
6458 if (mElstShiftStartTicks > 0){
6459 if (sampleTime > mElstShiftStartTicks) {
6460 sampleTime -= mElstShiftStartTicks;
6461 } else {
6462 sampleTime = 0;
6463 }
6464 }
6465 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
6466 }
6467
6468 #if 0
6469 uint32_t syncSampleTime;
6470 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
6471 syncSampleIndex, NULL, NULL, &syncSampleTime));
6472
6473 ALOGI("seek to time %lld us => sample at time %lld us, "
6474 "sync sample at time %lld us",
6475 seekTimeUs,
6476 sampleTime * 1000000ll / mTimescale,
6477 syncSampleTime * 1000000ll / mTimescale);
6478 #endif
6479
6480 mCurrentSampleIndex = syncSampleIndex;
6481 }
6482
6483 if (mBuffer != NULL) {
6484 mBuffer->release();
6485 mBuffer = NULL;
6486 }
6487
6488 // fall through
6489 }
6490
6491 off64_t offset = 0;
6492 size_t size = 0;
6493 int64_t cts;
6494 uint64_t stts;
6495 bool isSyncSample;
6496 bool newBuffer = false;
6497 if (mBuffer == NULL) {
6498 newBuffer = true;
6499
6500 status_t err;
6501 if (!mIsHeif && !mIsAvif) {
6502 err = mSampleTable->getMetaDataForSample(mCurrentSampleIndex, &offset, &size,
6503 (uint64_t*)&cts, &isSyncSample, &stts);
6504 if(err == OK) {
6505 if (mElstInitialEmptyEditTicks > 0) {
6506 cts += mElstInitialEmptyEditTicks;
6507 }
6508 if (mElstShiftStartTicks > 0) {
6509 // cts can be negative. for example, initial audio samples for gapless playback.
6510 cts -= (int64_t)mElstShiftStartTicks;
6511 }
6512 }
6513 } else {
6514 err = mItemTable->getImageOffsetAndSize(
6515 options && options->getSeekTo(&seekTimeUs, &mode) ?
6516 &mCurrentSampleIndex : NULL, &offset, &size);
6517
6518 cts = stts = 0;
6519 isSyncSample = 0;
6520 ALOGV("image offset %lld, size %zu", (long long)offset, size);
6521 }
6522
6523 if (err != OK) {
6524 if (err == ERROR_END_OF_STREAM) {
6525 return AMEDIA_ERROR_END_OF_STREAM;
6526 }
6527 return AMEDIA_ERROR_UNKNOWN;
6528 }
6529
6530 err = mBufferGroup->acquire_buffer(&mBuffer);
6531
6532 if (err != OK || mBuffer == nullptr) {
6533 CHECK(mBuffer == NULL);
6534 return AMEDIA_ERROR_UNKNOWN;
6535 }
6536 if (size > mBuffer->size()) {
6537 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6538 mBuffer->release();
6539 mBuffer = NULL;
6540 return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
6541 }
6542 }
6543
6544 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize) && !mIsAC4) {
6545 if (newBuffer) {
6546 if (mIsPcm) {
6547 // The twos' PCM block reader assumes that all samples has the same size.
6548 uint32_t lastSampleIndexInChunk = mSampleTable->getLastSampleIndexInChunk();
6549 if (lastSampleIndexInChunk < mCurrentSampleIndex) {
6550 mBuffer->release();
6551 mBuffer = nullptr;
6552 return AMEDIA_ERROR_UNKNOWN;
6553 }
6554 uint32_t samplesToRead = lastSampleIndexInChunk - mCurrentSampleIndex + 1;
6555 if (samplesToRead > kMaxPcmFrameSize) {
6556 samplesToRead = kMaxPcmFrameSize;
6557 }
6558
6559 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
6560 samplesToRead, size, mCurrentSampleIndex,
6561 mSampleTable->getLastSampleIndexInChunk());
6562
6563 size_t totalSize = samplesToRead * size;
6564 if (mBuffer->size() < totalSize) {
6565 mBuffer->release();
6566 mBuffer = nullptr;
6567 return AMEDIA_ERROR_UNKNOWN;
6568 }
6569 uint8_t* buf = (uint8_t *)mBuffer->data();
6570 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
6571 if (bytesRead < (ssize_t)totalSize) {
6572 mBuffer->release();
6573 mBuffer = NULL;
6574 return AMEDIA_ERROR_IO;
6575 }
6576
6577 AMediaFormat *meta = mBuffer->meta_data();
6578 AMediaFormat_clear(meta);
6579 AMediaFormat_setInt64(
6580 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6581 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6582
6583 int32_t byteOrder = 0;
6584 bool isGetBigEndian = AMediaFormat_getInt32(mFormat,
6585 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
6586
6587 if (isGetBigEndian && byteOrder == 1) {
6588 // Big-endian -> little-endian
6589 uint16_t *dstData = (uint16_t *)buf;
6590 uint16_t *srcData = (uint16_t *)buf;
6591
6592 for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
6593 dstData[j] = ntohs(srcData[j]);
6594 }
6595 }
6596
6597 mCurrentSampleIndex += samplesToRead;
6598 mBuffer->set_range(0, totalSize);
6599 } else {
6600 ssize_t num_bytes_read =
6601 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6602
6603 if (num_bytes_read < (ssize_t)size) {
6604 mBuffer->release();
6605 mBuffer = NULL;
6606
6607 return AMEDIA_ERROR_IO;
6608 }
6609
6610 CHECK(mBuffer != NULL);
6611 mBuffer->set_range(0, size);
6612 AMediaFormat *meta = mBuffer->meta_data();
6613 AMediaFormat_clear(meta);
6614 AMediaFormat_setInt64(
6615 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6616 AMediaFormat_setInt64(
6617 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6618
6619 if (targetSampleTimeUs >= 0) {
6620 AMediaFormat_setInt64(
6621 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6622 }
6623
6624 if (isSyncSample) {
6625 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6626 }
6627
6628 AMediaFormat_setInt64(
6629 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/,
6630 offset);
6631
6632 if (mSampleTable != nullptr &&
6633 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6634 AMediaFormat_setInt64(
6635 meta,
6636 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6637 mSampleTable->getLastSampleIndexInChunk());
6638 }
6639
6640 ++mCurrentSampleIndex;
6641 }
6642 }
6643
6644 *out = mBuffer;
6645 mBuffer = NULL;
6646
6647 return AMEDIA_OK;
6648
6649 } else if (mIsAC4) {
6650 CHECK(mBuffer != NULL);
6651 // Make sure there is enough space to write the sync header and the raw frame
6652 if (mBuffer->range_length() < (7 + size)) {
6653 mBuffer->release();
6654 mBuffer = NULL;
6655
6656 return AMEDIA_ERROR_IO;
6657 }
6658
6659 uint8_t *dstData = (uint8_t *)mBuffer->data();
6660 size_t dstOffset = 0;
6661 // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
6662 // AC40 sync word, meaning no CRC at the end of the frame
6663 dstData[dstOffset++] = 0xAC;
6664 dstData[dstOffset++] = 0x40;
6665 dstData[dstOffset++] = 0xFF;
6666 dstData[dstOffset++] = 0xFF;
6667 dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
6668 dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
6669 dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
6670
6671 ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
6672 if (numBytesRead != (ssize_t)size) {
6673 mBuffer->release();
6674 mBuffer = NULL;
6675
6676 return AMEDIA_ERROR_IO;
6677 }
6678
6679 mBuffer->set_range(0, dstOffset + size);
6680 AMediaFormat *meta = mBuffer->meta_data();
6681 AMediaFormat_clear(meta);
6682 AMediaFormat_setInt64(
6683 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6684 AMediaFormat_setInt64(
6685 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6686
6687 if (targetSampleTimeUs >= 0) {
6688 AMediaFormat_setInt64(
6689 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6690 }
6691
6692 if (isSyncSample) {
6693 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6694 }
6695
6696 void *presentationsData;
6697 size_t presentationsSize;
6698 if (AMediaFormat_getBuffer(
6699 mFormat, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
6700 &presentationsData, &presentationsSize)) {
6701 AMediaFormat_setBuffer(
6702 meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
6703 presentationsData, presentationsSize);
6704 }
6705
6706 ++mCurrentSampleIndex;
6707
6708 *out = mBuffer;
6709 mBuffer = NULL;
6710
6711 return AMEDIA_OK;
6712 } else {
6713 // Whole NAL units are returned but each fragment is prefixed by
6714 // the start code (0x00 00 00 01).
6715 ssize_t num_bytes_read = 0;
6716 bool mSrcBufferFitsDataToRead = size <= mSrcBufferSize;
6717 if (mSrcBufferFitsDataToRead) {
6718 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
6719 } else {
6720 // We are trying to read a sample larger than the expected max sample size.
6721 // Fall through and let the failure be handled by the following if.
6722 android_errorWriteLog(0x534e4554, "188893559");
6723 }
6724
6725 if (num_bytes_read < (ssize_t)size) {
6726 mBuffer->release();
6727 mBuffer = NULL;
6728 return mSrcBufferFitsDataToRead ? AMEDIA_ERROR_IO : AMEDIA_ERROR_MALFORMED;
6729 }
6730
6731 uint8_t *dstData = (uint8_t *)mBuffer->data();
6732 size_t srcOffset = 0;
6733 size_t dstOffset = 0;
6734
6735 while (srcOffset < size) {
6736 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6737 size_t nalLength = 0;
6738 if (!isMalFormed) {
6739 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6740 srcOffset += mNALLengthSize;
6741 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
6742 }
6743
6744 if (isMalFormed) {
6745 //if nallength abnormal,ignore it.
6746 ALOGW("abnormal nallength, ignore this NAL");
6747 srcOffset = size;
6748 break;
6749 }
6750
6751 if (nalLength == 0) {
6752 continue;
6753 }
6754
6755 if (dstOffset > SIZE_MAX - 4 ||
6756 dstOffset + 4 > SIZE_MAX - nalLength ||
6757 dstOffset + 4 + nalLength > mBuffer->size()) {
6758 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
6759 android_errorWriteLog(0x534e4554, "27208621");
6760 mBuffer->release();
6761 mBuffer = NULL;
6762 return AMEDIA_ERROR_MALFORMED;
6763 }
6764
6765 dstData[dstOffset++] = 0;
6766 dstData[dstOffset++] = 0;
6767 dstData[dstOffset++] = 0;
6768 dstData[dstOffset++] = 1;
6769 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6770 srcOffset += nalLength;
6771 dstOffset += nalLength;
6772 }
6773 CHECK_EQ(srcOffset, size);
6774 CHECK(mBuffer != NULL);
6775 mBuffer->set_range(0, dstOffset);
6776
6777 AMediaFormat *meta = mBuffer->meta_data();
6778 AMediaFormat_clear(meta);
6779 AMediaFormat_setInt64(
6780 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6781 AMediaFormat_setInt64(
6782 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6783
6784 if (targetSampleTimeUs >= 0) {
6785 AMediaFormat_setInt64(
6786 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6787 }
6788
6789 if (mIsAVC) {
6790 uint32_t layerId = FindAVCLayerId(
6791 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6792 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6793 } else if (mIsHEVC) {
6794 int32_t layerId = parseHEVCLayerId(
6795 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6796 if (layerId >= 0) {
6797 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6798 }
6799 }
6800
6801 if (isSyncSample) {
6802 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6803 }
6804
6805 AMediaFormat_setInt64(
6806 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/, offset);
6807
6808 if (mSampleTable != nullptr &&
6809 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6810 AMediaFormat_setInt64(
6811 meta,
6812 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6813 mSampleTable->getLastSampleIndexInChunk());
6814 }
6815
6816 ++mCurrentSampleIndex;
6817
6818 *out = mBuffer;
6819 mBuffer = NULL;
6820
6821 return AMEDIA_OK;
6822 }
6823 }
6824
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)6825 media_status_t MPEG4Source::fragmentedRead(
6826 MediaBufferHelper **out, const ReadOptions *options) {
6827
6828 ALOGV("MPEG4Source::fragmentedRead");
6829
6830 CHECK(mStarted);
6831
6832 *out = NULL;
6833
6834 int64_t targetSampleTimeUs = -1;
6835
6836 int64_t seekTimeUs;
6837 ReadOptions::SeekMode mode;
6838 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6839 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6840 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6841 if (mElstInitialEmptyEditTicks > 0) {
6842 elstInitialEmptyEditUs = rescaleTime(mElstInitialEmptyEditTicks, 1000000,
6843 mTimescale);
6844
6845 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6846 * Hence, lower bound on seekTimeUs is 0.
6847 */
6848 if (__builtin_sub_overflow(seekTimeUs, elstInitialEmptyEditUs,
6849 &seekTimeUs) || seekTimeUs < 0) {
6850 ALOGW("seekTimeUs:%" PRId64 " would be a bogus value, set to 0",
6851 seekTimeUs);
6852 seekTimeUs = 0;
6853 }
6854 }
6855 if (mElstShiftStartTicks > 0) {
6856 elstShiftStartUs = rescaleTime(mElstShiftStartTicks, 1000000, mTimescale);
6857
6858 if (__builtin_add_overflow(seekTimeUs, elstShiftStartUs, &seekTimeUs)) {
6859 ALOGW("seek + elst shift start would be overflow, round to max");
6860 seekTimeUs = std::numeric_limits<int64_t>::max();
6861 }
6862 }
6863 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6864 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6865 elstShiftStartUs);
6866
6867 int numSidxEntries = mSegments.size();
6868 if (numSidxEntries != 0) {
6869 int64_t totalTime = 0;
6870 off64_t totalOffset = mFirstMoofOffset;
6871 for (int i = 0; i < numSidxEntries; i++) {
6872 const SidxEntry *se = &mSegments[i];
6873 if (totalTime + se->mDurationUs > seekTimeUs) {
6874 // The requested time is somewhere in this segment
6875 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6876 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6877 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6878 // requested next sync, or closest sync and it was closer to the end of
6879 // this segment
6880 totalTime += se->mDurationUs;
6881 totalOffset += se->mSize;
6882 }
6883 break;
6884 }
6885 totalTime += se->mDurationUs;
6886 totalOffset += se->mSize;
6887 }
6888 mCurrentMoofOffset = totalOffset;
6889 mNextMoofOffset = -1;
6890 mCurrentSamples.clear();
6891 mCurrentSampleIndex = 0;
6892 status_t err = parseChunk(&totalOffset);
6893 if (err != OK) {
6894 return AMEDIA_ERROR_UNKNOWN;
6895 }
6896 mCurrentTime = totalTime * mTimescale / 1000000ll;
6897 } else {
6898 // without sidx boxes, we can only seek to 0
6899 mCurrentMoofOffset = mFirstMoofOffset;
6900 mNextMoofOffset = -1;
6901 mCurrentSamples.clear();
6902 mCurrentSampleIndex = 0;
6903 off64_t tmp = mCurrentMoofOffset;
6904 status_t err = parseChunk(&tmp);
6905 if (err != OK) {
6906 return AMEDIA_ERROR_UNKNOWN;
6907 }
6908 mCurrentTime = 0;
6909 }
6910
6911 if (mBuffer != NULL) {
6912 mBuffer->release();
6913 mBuffer = NULL;
6914 }
6915
6916 // fall through
6917 }
6918
6919 off64_t offset = 0;
6920 size_t size = 0;
6921 int64_t cts = 0;
6922 bool isSyncSample = false;
6923 bool newBuffer = false;
6924 if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6925 newBuffer = true;
6926
6927 if (mBuffer != NULL) {
6928 mBuffer->release();
6929 mBuffer = NULL;
6930 }
6931 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6932 // move to next fragment if there is one
6933 if (mNextMoofOffset <= mCurrentMoofOffset) {
6934 return AMEDIA_ERROR_END_OF_STREAM;
6935 }
6936 off64_t nextMoof = mNextMoofOffset;
6937 mCurrentMoofOffset = nextMoof;
6938 mCurrentSamples.clear();
6939 mCurrentSampleIndex = 0;
6940 status_t err = parseChunk(&nextMoof);
6941 if (err != OK) {
6942 return AMEDIA_ERROR_UNKNOWN;
6943 }
6944 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6945 return AMEDIA_ERROR_END_OF_STREAM;
6946 }
6947 }
6948
6949 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6950 offset = smpl->offset;
6951 size = smpl->size;
6952 cts = (int64_t)mCurrentTime + (int64_t)smpl->compositionOffset;
6953
6954 if (mElstInitialEmptyEditTicks > 0) {
6955 cts += mElstInitialEmptyEditTicks;
6956 }
6957 if (mElstShiftStartTicks > 0) {
6958 // cts can be negative. for example, initial audio samples for gapless playback.
6959 cts -= (int64_t)mElstShiftStartTicks;
6960 }
6961
6962 mCurrentTime += smpl->duration;
6963 isSyncSample = (mCurrentSampleIndex == 0);
6964
6965 status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6966
6967 if (err != OK) {
6968 CHECK(mBuffer == NULL);
6969 ALOGV("acquire_buffer returned %d", err);
6970 return AMEDIA_ERROR_UNKNOWN;
6971 }
6972 if (size > mBuffer->size()) {
6973 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6974 mBuffer->release();
6975 mBuffer = NULL;
6976 return AMEDIA_ERROR_UNKNOWN;
6977 }
6978 }
6979
6980 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6981 AMediaFormat *bufmeta = mBuffer->meta_data();
6982 AMediaFormat_clear(bufmeta);
6983 if (smpl->encryptedsizes.size()) {
6984 // store clear/encrypted lengths in metadata
6985 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6986 smpl->clearsizes.array(), smpl->clearsizes.size() * sizeof(uint32_t));
6987 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6988 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * sizeof(uint32_t));
6989 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6990 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6991 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6992 AMediaFormat_setInt32(bufmeta,
6993 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6994 AMediaFormat_setInt32(bufmeta,
6995 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6996
6997 void *iv = NULL;
6998 size_t ivlength = 0;
6999 if (!AMediaFormat_getBuffer(mFormat,
7000 "crypto-iv", &iv, &ivlength)) {
7001 iv = (void *) smpl->iv;
7002 ivlength = 16; // use 16 or the actual size?
7003 }
7004 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
7005 }
7006
7007 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize)) {
7008 if (newBuffer) {
7009 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
7010 mBuffer->release();
7011 mBuffer = NULL;
7012
7013 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
7014 return AMEDIA_ERROR_MALFORMED;
7015 }
7016
7017 ssize_t num_bytes_read =
7018 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
7019
7020 if (num_bytes_read < (ssize_t)size) {
7021 mBuffer->release();
7022 mBuffer = NULL;
7023
7024 ALOGE("i/o error");
7025 return AMEDIA_ERROR_IO;
7026 }
7027
7028 CHECK(mBuffer != NULL);
7029 mBuffer->set_range(0, size);
7030 AMediaFormat_setInt64(bufmeta,
7031 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
7032 AMediaFormat_setInt64(bufmeta,
7033 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
7034
7035 if (targetSampleTimeUs >= 0) {
7036 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
7037 }
7038
7039 if (mIsAVC) {
7040 uint32_t layerId = FindAVCLayerId(
7041 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
7042 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
7043 } else if (mIsHEVC) {
7044 int32_t layerId = parseHEVCLayerId(
7045 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
7046 if (layerId >= 0) {
7047 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
7048 }
7049 }
7050
7051 if (isSyncSample) {
7052 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
7053 }
7054
7055 ++mCurrentSampleIndex;
7056 }
7057
7058 *out = mBuffer;
7059 mBuffer = NULL;
7060
7061 return AMEDIA_OK;
7062
7063 } else {
7064 ALOGV("whole NAL");
7065 // Whole NAL units are returned but each fragment is prefixed by
7066 // the start code (0x00 00 00 01).
7067 ssize_t num_bytes_read = 0;
7068 void *data = NULL;
7069 bool isMalFormed = false;
7070 int32_t max_size;
7071 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
7072 || !isInRange((size_t)0u, (size_t)max_size, size)) {
7073 isMalFormed = true;
7074 } else {
7075 data = mSrcBuffer;
7076 }
7077
7078 if (isMalFormed || data == NULL) {
7079 ALOGE("isMalFormed size %zu", size);
7080 if (mBuffer != NULL) {
7081 mBuffer->release();
7082 mBuffer = NULL;
7083 }
7084 return AMEDIA_ERROR_MALFORMED;
7085 }
7086 num_bytes_read = mDataSource->readAt(offset, data, size);
7087
7088 if (num_bytes_read < (ssize_t)size) {
7089 mBuffer->release();
7090 mBuffer = NULL;
7091
7092 ALOGE("i/o error");
7093 return AMEDIA_ERROR_IO;
7094 }
7095
7096 uint8_t *dstData = (uint8_t *)mBuffer->data();
7097 size_t srcOffset = 0;
7098 size_t dstOffset = 0;
7099
7100 while (srcOffset < size) {
7101 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
7102 size_t nalLength = 0;
7103 if (!isMalFormed) {
7104 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
7105 srcOffset += mNALLengthSize;
7106 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
7107 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
7108 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
7109 }
7110
7111 if (isMalFormed) {
7112 ALOGE("Video is malformed; nalLength %zu", nalLength);
7113 mBuffer->release();
7114 mBuffer = NULL;
7115 return AMEDIA_ERROR_MALFORMED;
7116 }
7117
7118 if (nalLength == 0) {
7119 continue;
7120 }
7121
7122 if (dstOffset > SIZE_MAX - 4 ||
7123 dstOffset + 4 > SIZE_MAX - nalLength ||
7124 dstOffset + 4 + nalLength > mBuffer->size()) {
7125 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
7126 android_errorWriteLog(0x534e4554, "26365349");
7127 mBuffer->release();
7128 mBuffer = NULL;
7129 return AMEDIA_ERROR_MALFORMED;
7130 }
7131
7132 dstData[dstOffset++] = 0;
7133 dstData[dstOffset++] = 0;
7134 dstData[dstOffset++] = 0;
7135 dstData[dstOffset++] = 1;
7136 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
7137 srcOffset += nalLength;
7138 dstOffset += nalLength;
7139 }
7140 CHECK_EQ(srcOffset, size);
7141 CHECK(mBuffer != NULL);
7142 mBuffer->set_range(0, dstOffset);
7143
7144 AMediaFormat *bufmeta = mBuffer->meta_data();
7145 AMediaFormat_setInt64(bufmeta,
7146 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
7147 AMediaFormat_setInt64(bufmeta,
7148 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
7149
7150 if (targetSampleTimeUs >= 0) {
7151 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
7152 }
7153
7154 if (isSyncSample) {
7155 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
7156 }
7157
7158 ++mCurrentSampleIndex;
7159
7160 *out = mBuffer;
7161 mBuffer = NULL;
7162
7163 return AMEDIA_OK;
7164 }
7165
7166 return AMEDIA_OK;
7167 }
7168
findTrackByMimePrefix(const char * mimePrefix)7169 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
7170 const char *mimePrefix) {
7171 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
7172 const char *mime;
7173 if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
7174 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
7175 return track;
7176 }
7177 }
7178
7179 return NULL;
7180 }
7181
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)7182 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
7183 uint8_t header[8];
7184
7185 ssize_t n = source->readAt(4, header, sizeof(header));
7186 if (n < (ssize_t)sizeof(header)) {
7187 return false;
7188 }
7189
7190 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
7191 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
7192 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
7193 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
7194 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
7195 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
7196 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
7197 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)
7198 || !memcmp(header, "ftypavif", 8) || !memcmp(header, "ftypavis", 8)) {
7199 *confidence = 0.4;
7200
7201 return true;
7202 }
7203
7204 return false;
7205 }
7206
isCompatibleBrand(uint32_t fourcc)7207 static bool isCompatibleBrand(uint32_t fourcc) {
7208 static const uint32_t kCompatibleBrands[] = {
7209 FOURCC("isom"),
7210 FOURCC("iso2"),
7211 FOURCC("avc1"),
7212 FOURCC("hvc1"),
7213 FOURCC("hev1"),
7214 FOURCC("av01"),
7215 FOURCC("vp09"),
7216 FOURCC("3gp4"),
7217 FOURCC("mp41"),
7218 FOURCC("mp42"),
7219 FOURCC("dash"),
7220 FOURCC("nvr1"),
7221
7222 // Won't promise that the following file types can be played.
7223 // Just give these file types a chance.
7224 FOURCC("qt "), // Apple's QuickTime
7225 FOURCC("MSNV"), // Sony's PSP
7226 FOURCC("wmf "),
7227
7228 FOURCC("3g2a"), // 3GPP2
7229 FOURCC("3g2b"),
7230 FOURCC("mif1"), // HEIF image
7231 FOURCC("heic"), // HEIF image
7232 FOURCC("msf1"), // HEIF image sequence
7233 FOURCC("hevc"), // HEIF image sequence
7234 FOURCC("avif"), // AVIF image
7235 FOURCC("avis"), // AVIF image sequence
7236 };
7237
7238 for (size_t i = 0;
7239 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
7240 ++i) {
7241 if (kCompatibleBrands[i] == fourcc) {
7242 return true;
7243 }
7244 }
7245
7246 return false;
7247 }
7248
7249 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
7250 // compatible brand is present.
7251 // Also try to identify where this file's metadata ends
7252 // (end of the 'moov' atom) and report it to the caller as part of
7253 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)7254 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
7255 // We scan up to 128 bytes to identify this file as an MP4.
7256 static const off64_t kMaxScanOffset = 128ll;
7257
7258 off64_t offset = 0ll;
7259 bool foundGoodFileType = false;
7260 off64_t moovAtomEndOffset = -1ll;
7261 bool done = false;
7262
7263 while (!done && offset < kMaxScanOffset) {
7264 uint32_t hdr[2];
7265 if (source->readAt(offset, hdr, 8) < 8) {
7266 return false;
7267 }
7268
7269 uint64_t chunkSize = ntohl(hdr[0]);
7270 uint32_t chunkType = ntohl(hdr[1]);
7271 off64_t chunkDataOffset = offset + 8;
7272
7273 if (chunkSize == 1) {
7274 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
7275 return false;
7276 }
7277
7278 chunkSize = ntoh64(chunkSize);
7279 chunkDataOffset += 8;
7280
7281 if (chunkSize < 16) {
7282 // The smallest valid chunk is 16 bytes long in this case.
7283 return false;
7284 }
7285 if (chunkSize > INT64_MAX) {
7286 // reject overly large chunk sizes that could
7287 // be interpreted as negative
7288 ALOGE("chunk size too large");
7289 return false;
7290 }
7291
7292 } else if (chunkSize < 8) {
7293 // The smallest valid chunk is 8 bytes long.
7294 return false;
7295 }
7296
7297 // (data_offset - offset) is either 8 or 16
7298 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
7299 if (chunkDataSize < 0) {
7300 ALOGE("b/23540914");
7301 return false;
7302 }
7303
7304 char chunkstring[5];
7305 MakeFourCCString(chunkType, chunkstring);
7306 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
7307 chunkstring, chunkSize, (long long)offset);
7308 switch (chunkType) {
7309 case FOURCC("ftyp"):
7310 {
7311 if (chunkDataSize < 8) {
7312 return false;
7313 }
7314
7315 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
7316 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
7317 if (i == 1) {
7318 // Skip this index, it refers to the minorVersion,
7319 // not a brand.
7320 continue;
7321 }
7322
7323 uint32_t brand;
7324 if (source->readAt(
7325 chunkDataOffset + 4 * i, &brand, 4) < 4) {
7326 return false;
7327 }
7328
7329 brand = ntohl(brand);
7330
7331 if (isCompatibleBrand(brand)) {
7332 foundGoodFileType = true;
7333 break;
7334 }
7335 }
7336
7337 if (!foundGoodFileType) {
7338 return false;
7339 }
7340
7341 break;
7342 }
7343
7344 case FOURCC("moov"):
7345 {
7346 if (__builtin_add_overflow(offset, chunkSize, &moovAtomEndOffset)) {
7347 ALOGE("chunk size + offset would overflow");
7348 return false;
7349 }
7350
7351 done = true;
7352 break;
7353 }
7354
7355 default:
7356 break;
7357 }
7358
7359 if (__builtin_add_overflow(offset, chunkSize, &offset)) {
7360 ALOGE("chunk size + offset would overflow");
7361 return false;
7362 }
7363 }
7364
7365 if (!foundGoodFileType) {
7366 return false;
7367 }
7368
7369 *confidence = 0.4f;
7370
7371 return true;
7372 }
7373
CreateExtractor(CDataSource * source,void *)7374 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
7375 return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
7376 }
7377
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)7378 static CreatorFunc Sniff(
7379 CDataSource *source, float *confidence, void **,
7380 FreeMetaFunc *) {
7381 DataSourceHelper helper(source);
7382 if (BetterSniffMPEG4(&helper, confidence)) {
7383 return CreateExtractor;
7384 }
7385
7386 if (LegacySniffMPEG4(&helper, confidence)) {
7387 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
7388 return CreateExtractor;
7389 }
7390
7391 return NULL;
7392 }
7393
7394 static const char *extensions[] = {
7395 "3g2",
7396 "3ga",
7397 "3gp",
7398 "3gpp",
7399 "3gpp2",
7400 "m4a",
7401 "m4r",
7402 "m4v",
7403 "mov",
7404 "mp4",
7405 "qt",
7406 NULL
7407 };
7408
7409 extern "C" {
7410 // This is the only symbol that needs to be exported
7411 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()7412 ExtractorDef GETEXTRACTORDEF() {
7413 return {
7414 EXTRACTORDEF_VERSION,
7415 UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
7416 2, // version
7417 "MP4 Extractor",
7418 { .v3 = {Sniff, extensions} },
7419 };
7420 }
7421
7422 } // extern "C"
7423
7424 } // namespace android
7425