1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <algorithm>
23 #include <map>
24 #include <memory>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include <log/log.h>
30 #include <utils/Log.h>
31
32 #include "AC4Parser.h"
33 #include "MPEG4Extractor.h"
34 #include "SampleTable.h"
35 #include "ItemTable.h"
36
37 #include <media/esds/ESDS.h>
38 #include <ID3.h>
39 #include <media/stagefright/DataSourceBase.h>
40 #include <media/ExtractorUtils.h>
41 #include <media/stagefright/foundation/ABitReader.h>
42 #include <media/stagefright/foundation/ABuffer.h>
43 #include <media/stagefright/foundation/ADebug.h>
44 #include <media/stagefright/foundation/AMessage.h>
45 #include <media/stagefright/foundation/AudioPresentationInfo.h>
46 #include <media/stagefright/foundation/AUtils.h>
47 #include <media/stagefright/foundation/ByteUtils.h>
48 #include <media/stagefright/foundation/ColorUtils.h>
49 #include <media/stagefright/foundation/avc_utils.h>
50 #include <media/stagefright/foundation/hexdump.h>
51 #include <media/stagefright/foundation/OpusHeader.h>
52 #include <media/stagefright/MediaBufferGroup.h>
53 #include <media/stagefright/MediaDefs.h>
54 #include <media/stagefright/MetaDataBase.h>
55 #include <utils/String8.h>
56
57 #include <byteswap.h>
58
59 #ifndef UINT32_MAX
60 #define UINT32_MAX (4294967295U)
61 #endif
62
63 #define ALAC_SPECIFIC_INFO_SIZE (36)
64
65 // TODO : Remove the defines once mainline media is built against NDK >= 31.
66 // The mp4 extractor is part of mainline and builds against NDK 29 as of
67 // writing. These keys are available only from NDK 31:
68 #define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
69 "mpegh-profile-level-indication"
70 #define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
71 "mpegh-reference-channel-layout"
72 #define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
73 "mpegh-compatible-sets"
74
75 namespace android {
76
77 enum {
78 // max track header chunk to return
79 kMaxTrackHeaderSize = 32,
80
81 // maximum size of an atom. Some atoms can be bigger according to the spec,
82 // but we only allow up to this size.
83 kMaxAtomSize = 64 * 1024 * 1024,
84 };
85
86 class MPEG4Source : public MediaTrackHelper {
87 static const size_t kMaxPcmFrameSize = 8192;
88 public:
89 // Caller retains ownership of both "dataSource" and "sampleTable".
90 MPEG4Source(AMediaFormat *format,
91 DataSourceHelper *dataSource,
92 int32_t timeScale,
93 const sp<SampleTable> &sampleTable,
94 Vector<SidxEntry> &sidx,
95 const Trex *trex,
96 off64_t firstMoofOffset,
97 const sp<ItemTable> &itemTable,
98 uint64_t elstShiftStartTicks,
99 uint64_t elstInitialEmptyEditTicks);
100 virtual status_t init();
101
102 virtual media_status_t start();
103 virtual media_status_t stop();
104
105 virtual media_status_t getFormat(AMediaFormat *);
106
107 virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()108 bool supportsNonBlockingRead() override { return true; }
109 virtual media_status_t fragmentedRead(
110 MediaBufferHelper **buffer, const ReadOptions *options = NULL);
111
112 virtual ~MPEG4Source();
113
114 private:
115 Mutex mLock;
116
117 AMediaFormat *mFormat;
118 DataSourceHelper *mDataSource;
119 int32_t mTimescale;
120 sp<SampleTable> mSampleTable;
121 uint32_t mCurrentSampleIndex;
122 uint32_t mCurrentFragmentIndex;
123 Vector<SidxEntry> &mSegments;
124 const Trex *mTrex;
125 off64_t mFirstMoofOffset;
126 off64_t mCurrentMoofOffset;
127 off64_t mCurrentMoofSize;
128 off64_t mNextMoofOffset;
129 uint32_t mCurrentTime; // in media timescale ticks
130 int32_t mLastParsedTrackId;
131 int32_t mTrackId;
132
133 int32_t mCryptoMode; // passed in from extractor
134 int32_t mDefaultIVSize; // passed in from extractor
135 uint8_t mCryptoKey[16]; // passed in from extractor
136 int32_t mDefaultEncryptedByteBlock;
137 int32_t mDefaultSkipByteBlock;
138 uint32_t mCurrentAuxInfoType;
139 uint32_t mCurrentAuxInfoTypeParameter;
140 int32_t mCurrentDefaultSampleInfoSize;
141 uint32_t mCurrentSampleInfoCount;
142 uint32_t mCurrentSampleInfoAllocSize;
143 uint8_t* mCurrentSampleInfoSizes;
144 uint32_t mCurrentSampleInfoOffsetCount;
145 uint32_t mCurrentSampleInfoOffsetsAllocSize;
146 uint64_t* mCurrentSampleInfoOffsets;
147
148 bool mIsAVC;
149 bool mIsHEVC;
150 bool mIsDolbyVision;
151 bool mIsAC4;
152 bool mIsMpegH = false;
153 bool mIsPcm;
154 size_t mNALLengthSize;
155
156 bool mStarted;
157
158 MediaBufferHelper *mBuffer;
159
160 size_t mSrcBufferSize;
161 uint8_t *mSrcBuffer;
162
163 bool mIsHeif;
164 bool mIsAvif;
165 bool mIsAudio;
166 bool mIsUsac = false;
167 sp<ItemTable> mItemTable;
168
169 /* Shift start offset (move to earlier time) when media_time > 0,
170 * in media time scale.
171 */
172 uint64_t mElstShiftStartTicks;
173 /* Initial start offset (move to later time), empty edit list entry
174 * in media time scale.
175 */
176 uint64_t mElstInitialEmptyEditTicks;
177
178 size_t parseNALSize(const uint8_t *data) const;
179 status_t parseChunk(off64_t *offset);
180 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
181 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
182 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
183 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
184 status_t parseClearEncryptedSizes(off64_t offset, bool isSampleEncryption,
185 uint32_t flags, off64_t size);
186 status_t parseSampleEncryption(off64_t offset, off64_t size);
187 // returns -1 for invalid layer ID
188 int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
189
190 struct TrackFragmentHeaderInfo {
191 enum Flags {
192 kBaseDataOffsetPresent = 0x01,
193 kSampleDescriptionIndexPresent = 0x02,
194 kDefaultSampleDurationPresent = 0x08,
195 kDefaultSampleSizePresent = 0x10,
196 kDefaultSampleFlagsPresent = 0x20,
197 kDurationIsEmpty = 0x10000,
198 };
199
200 uint32_t mTrackID;
201 uint32_t mFlags;
202 uint64_t mBaseDataOffset;
203 uint32_t mSampleDescriptionIndex;
204 uint32_t mDefaultSampleDuration;
205 uint32_t mDefaultSampleSize;
206 uint32_t mDefaultSampleFlags;
207
208 uint64_t mDataOffset;
209 };
210 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
211
212 struct Sample {
213 off64_t offset;
214 size_t size;
215 uint32_t duration;
216 int32_t compositionOffset;
217 uint8_t iv[16];
218 Vector<uint32_t> clearsizes;
219 Vector<uint32_t> encryptedsizes;
220 };
221 Vector<Sample> mCurrentSamples;
222 std::map<off64_t, uint32_t> mDrmOffsets;
223
224 MPEG4Source(const MPEG4Source &);
225 MPEG4Source &operator=(const MPEG4Source &);
226 };
227
228 // This custom data source wraps an existing one and satisfies requests
229 // falling entirely within a cached range from the cache while forwarding
230 // all remaining requests to the wrapped datasource.
231 // This is used to cache the full sampletable metadata for a single track,
232 // possibly wrapping multiple times to cover all tracks, i.e.
233 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
234
235 class CachedRangedDataSource : public DataSourceHelper {
236 public:
237 explicit CachedRangedDataSource(DataSourceHelper *source);
238 virtual ~CachedRangedDataSource();
239
240 ssize_t readAt(off64_t offset, void *data, size_t size) override;
241 status_t getSize(off64_t *size) override;
242 uint32_t flags() override;
243
244 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
245
246
247 private:
248 Mutex mLock;
249
250 DataSourceHelper *mSource;
251 bool mOwnsDataSource;
252 off64_t mCachedOffset;
253 size_t mCachedSize;
254 uint8_t *mCache;
255
256 void clearCache();
257
258 CachedRangedDataSource(const CachedRangedDataSource &);
259 CachedRangedDataSource &operator=(const CachedRangedDataSource &);
260 };
261
CachedRangedDataSource(DataSourceHelper * source)262 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
263 : DataSourceHelper(source),
264 mSource(source),
265 mOwnsDataSource(false),
266 mCachedOffset(0),
267 mCachedSize(0),
268 mCache(NULL) {
269 }
270
~CachedRangedDataSource()271 CachedRangedDataSource::~CachedRangedDataSource() {
272 clearCache();
273 if (mOwnsDataSource) {
274 delete mSource;
275 }
276 }
277
clearCache()278 void CachedRangedDataSource::clearCache() {
279 if (mCache) {
280 free(mCache);
281 mCache = NULL;
282 }
283
284 mCachedOffset = 0;
285 mCachedSize = 0;
286 }
287
readAt(off64_t offset,void * data,size_t size)288 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
289 Mutex::Autolock autoLock(mLock);
290
291 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
292 memcpy(data, &mCache[offset - mCachedOffset], size);
293 return size;
294 }
295
296 return mSource->readAt(offset, data, size);
297 }
298
getSize(off64_t * size)299 status_t CachedRangedDataSource::getSize(off64_t *size) {
300 return mSource->getSize(size);
301 }
302
flags()303 uint32_t CachedRangedDataSource::flags() {
304 return mSource->flags();
305 }
306
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)307 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
308 size_t size,
309 bool assumeSourceOwnershipOnSuccess) {
310 Mutex::Autolock autoLock(mLock);
311
312 clearCache();
313
314 mCache = (uint8_t *)malloc(size);
315
316 if (mCache == NULL) {
317 return -ENOMEM;
318 }
319
320 mCachedOffset = offset;
321 mCachedSize = size;
322
323 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
324
325 if (err < (ssize_t)size) {
326 clearCache();
327
328 return ERROR_IO;
329 }
330 mOwnsDataSource = assumeSourceOwnershipOnSuccess;
331 return OK;
332 }
333
334 ////////////////////////////////////////////////////////////////////////////////
335
336 static const bool kUseHexDump = false;
337
FourCC2MIME(uint32_t fourcc)338 static const char *FourCC2MIME(uint32_t fourcc) {
339 switch (fourcc) {
340 case FOURCC("mp4a"):
341 return MEDIA_MIMETYPE_AUDIO_AAC;
342
343 case FOURCC("samr"):
344 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
345
346 case FOURCC("sawb"):
347 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
348
349 case FOURCC("ec-3"):
350 return MEDIA_MIMETYPE_AUDIO_EAC3;
351
352 case FOURCC("mp4v"):
353 return MEDIA_MIMETYPE_VIDEO_MPEG4;
354
355 case FOURCC("s263"):
356 case FOURCC("h263"):
357 case FOURCC("H263"):
358 return MEDIA_MIMETYPE_VIDEO_H263;
359
360 case FOURCC("avc1"):
361 return MEDIA_MIMETYPE_VIDEO_AVC;
362
363 case FOURCC("hvc1"):
364 case FOURCC("hev1"):
365 return MEDIA_MIMETYPE_VIDEO_HEVC;
366
367 case FOURCC("dvav"):
368 case FOURCC("dva1"):
369 case FOURCC("dvhe"):
370 case FOURCC("dvh1"):
371 case FOURCC("dav1"):
372 return MEDIA_MIMETYPE_VIDEO_DOLBY_VISION;
373
374 case FOURCC("ac-4"):
375 return MEDIA_MIMETYPE_AUDIO_AC4;
376 case FOURCC("Opus"):
377 return MEDIA_MIMETYPE_AUDIO_OPUS;
378
379 case FOURCC("twos"):
380 case FOURCC("sowt"):
381 return MEDIA_MIMETYPE_AUDIO_RAW;
382 case FOURCC("alac"):
383 return MEDIA_MIMETYPE_AUDIO_ALAC;
384 case FOURCC("fLaC"):
385 return MEDIA_MIMETYPE_AUDIO_FLAC;
386 case FOURCC("av01"):
387 return MEDIA_MIMETYPE_VIDEO_AV1;
388 case FOURCC("vp09"):
389 return MEDIA_MIMETYPE_VIDEO_VP9;
390 case FOURCC(".mp3"):
391 case 0x6D730055: // "ms U" mp3 audio
392 return MEDIA_MIMETYPE_AUDIO_MPEG;
393 case FOURCC("mha1"):
394 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1;
395 case FOURCC("mhm1"):
396 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1;
397 case FOURCC("dtsc"):
398 return MEDIA_MIMETYPE_AUDIO_DTS;
399 case FOURCC("dtse"):
400 case FOURCC("dtsh"):
401 return MEDIA_MIMETYPE_AUDIO_DTS_HD;
402 case FOURCC("dtsl"):
403 return MEDIA_MIMETYPE_AUDIO_DTS_HD_MA;
404 case FOURCC("dtsx"):
405 return MEDIA_MIMETYPE_AUDIO_DTS_UHD_P2;
406 default:
407 ALOGW("Unknown fourcc: %c%c%c%c",
408 (fourcc >> 24) & 0xff,
409 (fourcc >> 16) & 0xff,
410 (fourcc >> 8) & 0xff,
411 fourcc & 0xff
412 );
413 return "application/octet-stream";
414 }
415 }
416
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)417 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
418 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
419 // AMR NB audio is always mono, 8kHz
420 *channels = 1;
421 *rate = 8000;
422 return true;
423 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
424 // AMR WB audio is always mono, 16kHz
425 *channels = 1;
426 *rate = 16000;
427 return true;
428 }
429 return false;
430 }
431
MPEG4Extractor(DataSourceHelper * source,const char * mime)432 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
433 : mMoofOffset(0),
434 mMoofFound(false),
435 mMdatFound(false),
436 mDataSource(source),
437 mInitCheck(NO_INIT),
438 mHeaderTimescale(0),
439 mIsQT(false),
440 mIsHeif(false),
441 mHasMoovBox(false),
442 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
443 mIsAvif(false),
444 mFirstTrack(NULL),
445 mLastTrack(NULL) {
446 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
447 mFileMetaData = AMediaFormat_new();
448 }
449
~MPEG4Extractor()450 MPEG4Extractor::~MPEG4Extractor() {
451 Track *track = mFirstTrack;
452 while (track) {
453 Track *next = track->next;
454
455 delete track;
456 track = next;
457 }
458 mFirstTrack = mLastTrack = NULL;
459
460 for (size_t i = 0; i < mPssh.size(); i++) {
461 delete [] mPssh[i].data;
462 }
463 mPssh.clear();
464
465 delete mDataSource;
466 AMediaFormat_delete(mFileMetaData);
467 }
468
flags() const469 uint32_t MPEG4Extractor::flags() const {
470 return CAN_PAUSE |
471 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
472 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
473 }
474
getMetaData(AMediaFormat * meta)475 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
476 status_t err;
477 if ((err = readMetaData()) != OK) {
478 return AMEDIA_ERROR_UNKNOWN;
479 }
480 AMediaFormat_copy(meta, mFileMetaData);
481 return AMEDIA_OK;
482 }
483
countTracks()484 size_t MPEG4Extractor::countTracks() {
485 status_t err;
486 if ((err = readMetaData()) != OK) {
487 ALOGV("MPEG4Extractor::countTracks: no tracks");
488 return 0;
489 }
490
491 size_t n = 0;
492 Track *track = mFirstTrack;
493 while (track) {
494 ++n;
495 track = track->next;
496 }
497
498 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
499 return n;
500 }
501
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)502 media_status_t MPEG4Extractor::getTrackMetaData(
503 AMediaFormat *meta,
504 size_t index, uint32_t flags) {
505 status_t err;
506 if ((err = readMetaData()) != OK) {
507 return AMEDIA_ERROR_UNKNOWN;
508 }
509
510 Track *track = mFirstTrack;
511 while (index > 0) {
512 if (track == NULL) {
513 return AMEDIA_ERROR_UNKNOWN;
514 }
515
516 track = track->next;
517 --index;
518 }
519
520 if (track == NULL) {
521 return AMEDIA_ERROR_UNKNOWN;
522 }
523
524 [=] {
525 int64_t duration;
526 int32_t samplerate;
527 // Only for audio track.
528 if (track->elst_needs_processing && mHeaderTimescale != 0 &&
529 AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
530 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
531 // Elst has to be processed only the first time this function is called.
532 track->elst_needs_processing = false;
533
534 if (track->elst_segment_duration > INT64_MAX) {
535 return;
536 }
537 int64_t segment_duration = track->elst_segment_duration;
538 int64_t media_time = track->elst_media_time;
539 int64_t halfscale = track->timescale / 2;
540
541 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
542 ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
543 segment_duration, media_time,
544 halfscale, mHeaderTimescale, track->timescale);
545
546 if ((uint32_t)samplerate != track->timescale){
547 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
548 samplerate);
549 }
550 // Both delay and paddingsamples have to be set inorder for either to be
551 // effective in the lower layers.
552 int64_t delay = 0;
553 if (media_time > 0) { // Gapless playback
554 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
555 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
556 __builtin_add_overflow(delay, halfscale, &delay) ||
557 (delay /= track->timescale, false) ||
558 delay > INT32_MAX ||
559 delay < INT32_MIN) {
560 ALOGW("ignoring edit list with bogus values");
561 return;
562 }
563 }
564 ALOGV("delay = %" PRId64, delay);
565 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
566
567 int64_t paddingsamples = 0;
568 if (segment_duration > 0) {
569 int64_t scaled_duration;
570 // scaled_duration = duration * mHeaderTimescale;
571 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
572 return;
573 }
574 ALOGV("scaled_duration = %" PRId64, scaled_duration);
575
576 int64_t segment_end;
577 int64_t padding;
578 int64_t segment_duration_e6;
579 int64_t media_time_scaled_e6;
580 int64_t media_time_scaled;
581 // padding = scaled_duration - ((segment_duration * 1000000) +
582 // ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
583 // segment_duration is based on timescale in movie header box(mdhd)
584 // media_time is based on timescale track header/media timescale
585 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
586 __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
587 __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
588 return;
589 }
590 media_time_scaled_e6 /= track->timescale;
591 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
592 || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
593 return;
594 }
595 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
596 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
597 // might be slightly shorter than the segment duration, which would make the
598 // padding negative. Clamp to zero.
599 if (padding > 0) {
600 int64_t halfscale_mht = mHeaderTimescale / 2;
601 int64_t halfscale_e6;
602 int64_t timescale_e6;
603 // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
604 // / (mHeaderTimescale * 1000000);
605 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
606 __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
607 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
608 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
609 (paddingsamples /= timescale_e6, false) ||
610 paddingsamples > INT32_MAX) {
611 return;
612 }
613 }
614 }
615 ALOGV("paddingsamples = %" PRId64, paddingsamples);
616 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
617 }
618 }();
619
620 if ((flags & kIncludeExtensiveMetaData)
621 && !track->includes_expensive_metadata) {
622 track->includes_expensive_metadata = true;
623
624 const char *mime;
625 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
626 if (!strncasecmp("video/", mime, 6)) {
627 // MPEG2 tracks do not provide CSD, so read the stream header
628 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
629 off64_t offset;
630 size_t size;
631 if (track->sampleTable->getMetaDataForSample(
632 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
633 if (size > kMaxTrackHeaderSize) {
634 size = kMaxTrackHeaderSize;
635 }
636 uint8_t header[kMaxTrackHeaderSize];
637 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
638 AMediaFormat_setBuffer(track->meta,
639 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
640 }
641 }
642 }
643
644 if (mMoofOffset > 0) {
645 int64_t duration;
646 if (AMediaFormat_getInt64(track->meta,
647 AMEDIAFORMAT_KEY_DURATION, &duration)) {
648 // nothing fancy, just pick a frame near 1/4th of the duration
649 AMediaFormat_setInt64(track->meta,
650 AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
651 }
652 } else {
653 uint32_t sampleIndex;
654 uint64_t sampleTime;
655 if (track->timescale != 0 &&
656 track->sampleTable->findThumbnailSample(&sampleIndex) == OK
657 && track->sampleTable->getMetaDataForSample(
658 sampleIndex, NULL /* offset */, NULL /* size */,
659 &sampleTime) == OK) {
660 AMediaFormat_setInt64(track->meta,
661 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
662 ((int64_t)sampleTime * 1000000) / track->timescale);
663 }
664 }
665 }
666 }
667
668 return AMediaFormat_copy(meta, track->meta);
669 }
670
readMetaData()671 status_t MPEG4Extractor::readMetaData() {
672 if (mInitCheck != NO_INIT) {
673 return mInitCheck;
674 }
675
676 off64_t offset = 0;
677 status_t err;
678 bool sawMoovOrSidx = false;
679
680 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
681 (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
682 (mItemTable != NULL) && mItemTable->isValid()))) {
683 off64_t orig_offset = offset;
684 err = parseChunk(&offset, 0);
685
686 if (err != OK && err != UNKNOWN_ERROR) {
687 break;
688 } else if (offset <= orig_offset) {
689 // only continue parsing if the offset was advanced,
690 // otherwise we might end up in an infinite loop
691 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
692 err = ERROR_MALFORMED;
693 break;
694 } else if (err == UNKNOWN_ERROR) {
695 sawMoovOrSidx = true;
696 }
697 }
698
699 if ((mIsAvif || mIsHeif) && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
700 off64_t exifOffset;
701 size_t exifSize;
702 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
703 AMediaFormat_setInt64(mFileMetaData,
704 AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
705 AMediaFormat_setInt64(mFileMetaData,
706 AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
707 }
708 off64_t xmpOffset;
709 size_t xmpSize;
710 if (mItemTable->getXmpOffsetAndSize(&xmpOffset, &xmpSize) == OK) {
711 // TODO(chz): b/175717339
712 // Use a hard-coded string here instead of named keys. The keys are available
713 // only on API 31+. The mp4 extractor is part of mainline and has min_sdk_version
714 // of 29. This hard-coded string can be replaced with the named constant once
715 // the mp4 extractor is built against API 31+.
716 AMediaFormat_setInt64(mFileMetaData,
717 "xmp-offset" /*AMEDIAFORMAT_KEY_XMP_OFFSET*/, (int64_t)xmpOffset);
718 AMediaFormat_setInt64(mFileMetaData,
719 "xmp-size" /*AMEDIAFORMAT_KEY_XMP_SIZE*/, (int64_t)xmpSize);
720 }
721 for (uint32_t imageIndex = 0;
722 imageIndex < mItemTable->countImages(); imageIndex++) {
723 AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
724 if (meta == NULL) {
725 ALOGE("heif image %u has no meta!", imageIndex);
726 continue;
727 }
728 // Some heif files advertise image sequence brands (eg. 'hevc') in
729 // ftyp box, but don't have any valid tracks in them. Instead of
730 // reporting the entire file as malformed, we override the error
731 // to allow still images to be extracted.
732 if (err != OK) {
733 ALOGW("Extracting still images only");
734 err = OK;
735 }
736 mInitCheck = OK;
737
738 ALOGV("adding %s image track %u", mIsHeif ? "HEIF" : "AVIF", imageIndex);
739 Track *track = new Track;
740 if (mLastTrack != NULL) {
741 mLastTrack->next = track;
742 } else {
743 mFirstTrack = track;
744 }
745 mLastTrack = track;
746
747 track->meta = meta;
748 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
749 track->timescale = 1000000;
750 }
751 }
752
753 if (mInitCheck == OK) {
754 if (findTrackByMimePrefix("video/") != NULL) {
755 AMediaFormat_setString(mFileMetaData,
756 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
757 } else if (findTrackByMimePrefix("audio/") != NULL) {
758 AMediaFormat_setString(mFileMetaData,
759 AMEDIAFORMAT_KEY_MIME, "audio/mp4");
760 } else if (findTrackByMimePrefix(
761 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
762 AMediaFormat_setString(mFileMetaData,
763 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
764 } else if (findTrackByMimePrefix(
765 MEDIA_MIMETYPE_IMAGE_AVIF) != NULL) {
766 AMediaFormat_setString(mFileMetaData,
767 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_IMAGE_AVIF);
768 } else {
769 AMediaFormat_setString(mFileMetaData,
770 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
771 }
772 } else {
773 mInitCheck = err;
774 }
775
776 CHECK_NE(err, (status_t)NO_INIT);
777
778 // copy pssh data into file metadata
779 uint64_t psshsize = 0;
780 for (size_t i = 0; i < mPssh.size(); i++) {
781 psshsize += 20 + mPssh[i].datalen;
782 }
783 if (psshsize > 0 && psshsize <= UINT32_MAX) {
784 char *buf = (char*)malloc(psshsize);
785 if (!buf) {
786 ALOGE("b/28471206");
787 return NO_MEMORY;
788 }
789 char *ptr = buf;
790 for (size_t i = 0; i < mPssh.size(); i++) {
791 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
792 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
793 ptr += (20 + mPssh[i].datalen);
794 }
795 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
796 free(buf);
797 }
798
799 return mInitCheck;
800 }
801
802 struct PathAdder {
PathAdderandroid::PathAdder803 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
804 : mPath(path) {
805 mPath->push(chunkType);
806 }
807
~PathAdderandroid::PathAdder808 ~PathAdder() {
809 mPath->pop();
810 }
811
812 private:
813 Vector<uint32_t> *mPath;
814
815 PathAdder(const PathAdder &);
816 PathAdder &operator=(const PathAdder &);
817 };
818
underMetaDataPath(const Vector<uint32_t> & path)819 static bool underMetaDataPath(const Vector<uint32_t> &path) {
820 return path.size() >= 5
821 && path[0] == FOURCC("moov")
822 && path[1] == FOURCC("udta")
823 && path[2] == FOURCC("meta")
824 && path[3] == FOURCC("ilst");
825 }
826
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)827 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
828 return path.size() >= 2
829 && path[0] == FOURCC("moov")
830 && path[1] == FOURCC("meta")
831 && (depth == 2
832 || (depth == 3
833 && (path[2] == FOURCC("hdlr")
834 || path[2] == FOURCC("ilst")
835 || path[2] == FOURCC("keys"))));
836 }
837
838 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)839 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
840 // delta between mpeg4 time and unix epoch time
841 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
842 if (time_1904 < INT64_MIN + delta) {
843 return false;
844 }
845 time_t time_1970 = time_1904 - delta;
846
847 char tmp[32];
848 struct tm* tm = gmtime(&time_1970);
849 if (tm != NULL &&
850 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
851 s->setTo(tmp);
852 return true;
853 }
854 return false;
855 }
856
parseChunk(off64_t * offset,int depth)857 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
858 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
859
860 if (*offset < 0) {
861 ALOGE("b/23540914");
862 return ERROR_MALFORMED;
863 }
864 if (depth > 100) {
865 ALOGE("b/27456299");
866 return ERROR_MALFORMED;
867 }
868 uint32_t hdr[2];
869 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
870 return ERROR_IO;
871 }
872 uint64_t chunk_size = ntohl(hdr[0]);
873 int32_t chunk_type = ntohl(hdr[1]);
874 off64_t data_offset = *offset + 8;
875
876 if (chunk_size == 1) {
877 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
878 return ERROR_IO;
879 }
880 chunk_size = ntoh64(chunk_size);
881 data_offset += 8;
882
883 if (chunk_size < 16) {
884 // The smallest valid chunk is 16 bytes long in this case.
885 return ERROR_MALFORMED;
886 }
887 } else if (chunk_size == 0) {
888 if (depth == 0) {
889 // atom extends to end of file
890 off64_t sourceSize;
891 if (mDataSource->getSize(&sourceSize) == OK) {
892 chunk_size = (sourceSize - *offset);
893 } else {
894 // XXX could we just pick a "sufficiently large" value here?
895 ALOGE("atom size is 0, and data source has no size");
896 return ERROR_MALFORMED;
897 }
898 } else {
899 // not allowed for non-toplevel atoms, skip it
900 *offset += 4;
901 return OK;
902 }
903 } else if (chunk_size < 8) {
904 // The smallest valid chunk is 8 bytes long.
905 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
906 return ERROR_MALFORMED;
907 }
908
909 char chunk[5];
910 MakeFourCCString(chunk_type, chunk);
911 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
912
913 if (kUseHexDump) {
914 static const char kWhitespace[] = " ";
915 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
916 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
917
918 char buffer[256];
919 size_t n = chunk_size;
920 if (n > sizeof(buffer)) {
921 n = sizeof(buffer);
922 }
923 if (mDataSource->readAt(*offset, buffer, n)
924 < (ssize_t)n) {
925 return ERROR_IO;
926 }
927
928 hexdump(buffer, n);
929 }
930
931 PathAdder autoAdder(&mPath, chunk_type);
932
933 // (data_offset - *offset) is either 8 or 16
934 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
935 if (chunk_data_size < 0) {
936 ALOGE("b/23540914");
937 return ERROR_MALFORMED;
938 }
939 if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
940 char errMsg[100];
941 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
942 ALOGE("%s (b/28615448)", errMsg);
943 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
944 return ERROR_MALFORMED;
945 }
946
947 if (chunk_type != FOURCC("cprt")
948 && chunk_type != FOURCC("covr")
949 && mPath.size() == 5 && underMetaDataPath(mPath)) {
950 off64_t stop_offset = *offset + chunk_size;
951 *offset = data_offset;
952 while (*offset < stop_offset) {
953 status_t err = parseChunk(offset, depth + 1);
954 if (err != OK) {
955 return err;
956 }
957 }
958
959 if (*offset != stop_offset) {
960 return ERROR_MALFORMED;
961 }
962
963 return OK;
964 }
965
966 switch(chunk_type) {
967 case FOURCC("moov"):
968 case FOURCC("trak"):
969 case FOURCC("mdia"):
970 case FOURCC("minf"):
971 case FOURCC("dinf"):
972 case FOURCC("stbl"):
973 case FOURCC("mvex"):
974 case FOURCC("moof"):
975 case FOURCC("traf"):
976 case FOURCC("mfra"):
977 case FOURCC("udta"):
978 case FOURCC("ilst"):
979 case FOURCC("sinf"):
980 case FOURCC("schi"):
981 case FOURCC("edts"):
982 case FOURCC("wave"):
983 {
984 if (chunk_type == FOURCC("moov") && depth != 0) {
985 ALOGE("moov: depth %d", depth);
986 return ERROR_MALFORMED;
987 }
988
989 if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
990 ALOGE("duplicate moov");
991 return ERROR_MALFORMED;
992 }
993
994 if (chunk_type == FOURCC("moof") && !mMoofFound) {
995 // store the offset of the first segment
996 mMoofFound = true;
997 mMoofOffset = *offset;
998 }
999
1000 if (chunk_type == FOURCC("stbl")) {
1001 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
1002
1003 if (mDataSource->flags()
1004 & (DataSourceBase::kWantsPrefetching
1005 | DataSourceBase::kIsCachingDataSource)) {
1006 CachedRangedDataSource *cachedSource =
1007 new CachedRangedDataSource(mDataSource);
1008
1009 if (cachedSource->setCachedRange(
1010 *offset, chunk_size,
1011 true /* assume ownership on success */) == OK) {
1012 mDataSource = cachedSource;
1013 } else {
1014 delete cachedSource;
1015 }
1016 }
1017
1018 if (mLastTrack == NULL) {
1019 return ERROR_MALFORMED;
1020 }
1021
1022 mLastTrack->sampleTable = new SampleTable(mDataSource);
1023 }
1024
1025 bool isTrack = false;
1026 if (chunk_type == FOURCC("trak")) {
1027 if (depth != 1) {
1028 ALOGE("trak: depth %d", depth);
1029 return ERROR_MALFORMED;
1030 }
1031 isTrack = true;
1032
1033 ALOGV("adding new track");
1034 Track *track = new Track;
1035 if (mLastTrack) {
1036 mLastTrack->next = track;
1037 } else {
1038 mFirstTrack = track;
1039 }
1040 mLastTrack = track;
1041
1042 track->meta = AMediaFormat_new();
1043 AMediaFormat_setString(track->meta,
1044 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
1045 }
1046
1047 off64_t stop_offset = *offset + chunk_size;
1048 *offset = data_offset;
1049 while (*offset < stop_offset) {
1050
1051 // pass udata terminate
1052 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
1053 // handle the case that udta terminates with terminate code x00000000
1054 // note that 0 terminator is optional and we just handle this case.
1055 uint32_t terminate_code = 1;
1056 mDataSource->readAt(*offset, &terminate_code, 4);
1057 if (0 == terminate_code) {
1058 *offset += 4;
1059 ALOGD("Terminal code for udta");
1060 continue;
1061 } else {
1062 ALOGW("invalid udta Terminal code");
1063 }
1064 }
1065
1066 status_t err = parseChunk(offset, depth + 1);
1067 if (err != OK) {
1068 if (isTrack) {
1069 mLastTrack->skipTrack = true;
1070 break;
1071 }
1072 return err;
1073 }
1074 }
1075
1076 if (*offset != stop_offset) {
1077 return ERROR_MALFORMED;
1078 }
1079
1080 if (isTrack) {
1081 int32_t trackId;
1082 // There must be exactly one track header per track.
1083
1084 if (!AMediaFormat_getInt32(mLastTrack->meta,
1085 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1086 mLastTrack->skipTrack = true;
1087 }
1088
1089 status_t err = verifyTrack(mLastTrack);
1090 if (err != OK) {
1091 mLastTrack->skipTrack = true;
1092 }
1093
1094
1095 if (mLastTrack->skipTrack) {
1096 ALOGV("skipping this track...");
1097 Track *cur = mFirstTrack;
1098
1099 if (cur == mLastTrack) {
1100 delete cur;
1101 mFirstTrack = mLastTrack = NULL;
1102 } else {
1103 while (cur && cur->next != mLastTrack) {
1104 cur = cur->next;
1105 }
1106 if (cur) {
1107 cur->next = NULL;
1108 }
1109 delete mLastTrack;
1110 mLastTrack = cur;
1111 }
1112
1113 return OK;
1114 }
1115
1116 // place things we built elsewhere into their final locations
1117
1118 // put aggregated tx3g data into the metadata
1119 if (mLastTrack->mTx3gFilled > 0) {
1120 ALOGV("Putting %zu bytes of tx3g data into meta data",
1121 mLastTrack->mTx3gFilled);
1122 AMediaFormat_setBuffer(mLastTrack->meta,
1123 AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1124 mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1125 // drop it now to reduce our footprint
1126 free(mLastTrack->mTx3gBuffer);
1127 mLastTrack->mTx3gBuffer = NULL;
1128 mLastTrack->mTx3gFilled = 0;
1129 mLastTrack->mTx3gSize = 0;
1130 }
1131
1132 const char *mime;
1133 AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime);
1134
1135 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
1136 void *data;
1137 size_t size;
1138
1139 if (AMediaFormat_getBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
1140 &data, &size)
1141 && size >= 5) {
1142 const uint8_t *ptr = (const uint8_t *)data;
1143 const uint8_t profile = ptr[2] >> 1;
1144 const uint8_t blCompatibilityId = (ptr[4]) >> 4;
1145 bool create_two_tracks = false;
1146
1147 if (blCompatibilityId && blCompatibilityId != 15) {
1148 create_two_tracks = true;
1149 }
1150
1151 if (4 == profile || 7 == profile ||
1152 (profile >= 8 && profile < 11 && create_two_tracks)) {
1153 // we need a backward compatible track
1154 ALOGV("Adding new backward compatible track");
1155 Track *track_b = new Track;
1156
1157 track_b->timescale = mLastTrack->timescale;
1158 track_b->sampleTable = mLastTrack->sampleTable;
1159 track_b->includes_expensive_metadata =
1160 mLastTrack->includes_expensive_metadata;
1161 track_b->skipTrack = mLastTrack->skipTrack;
1162 track_b->elst_needs_processing = mLastTrack->elst_needs_processing;
1163 track_b->elst_media_time = mLastTrack->elst_media_time;
1164 track_b->elst_segment_duration = mLastTrack->elst_segment_duration;
1165 track_b->elst_shift_start_ticks = mLastTrack->elst_shift_start_ticks;
1166 track_b->elst_initial_empty_edit_ticks =
1167 mLastTrack->elst_initial_empty_edit_ticks;
1168 track_b->subsample_encryption = mLastTrack->subsample_encryption;
1169
1170 track_b->mTx3gBuffer = mLastTrack->mTx3gBuffer;
1171 track_b->mTx3gSize = mLastTrack->mTx3gSize;
1172 track_b->mTx3gFilled = mLastTrack->mTx3gFilled;
1173
1174 track_b->meta = AMediaFormat_new();
1175 AMediaFormat_copy(track_b->meta, mLastTrack->meta);
1176
1177 mLastTrack->next = track_b;
1178 track_b->next = NULL;
1179
1180 // we want to remove the csd-2 key from the metadata, but
1181 // don't have an AMediaFormat_* function to do so. Settle
1182 // for replacing this csd-2 with an empty csd-2.
1183 uint8_t emptybuffer[8] = {};
1184 AMediaFormat_setBuffer(track_b->meta, AMEDIAFORMAT_KEY_CSD_2,
1185 emptybuffer, 0);
1186
1187 if (4 == profile || 7 == profile || 8 == profile ) {
1188 AMediaFormat_setString(track_b->meta,
1189 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_HEVC);
1190 } else if (9 == profile) {
1191 AMediaFormat_setString(track_b->meta,
1192 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
1193 } else if (10 == profile) {
1194 AMediaFormat_setString(track_b->meta,
1195 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AV1);
1196 } // Should never get to else part
1197
1198 mLastTrack = track_b;
1199 }
1200 }
1201 }
1202 } else if (chunk_type == FOURCC("moov")) {
1203 mInitCheck = OK;
1204
1205 return UNKNOWN_ERROR; // Return a generic error.
1206 }
1207 break;
1208 }
1209
1210 case FOURCC("schm"):
1211 {
1212
1213 *offset += chunk_size;
1214 if (!mLastTrack) {
1215 return ERROR_MALFORMED;
1216 }
1217
1218 uint32_t scheme_type;
1219 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1220 return ERROR_IO;
1221 }
1222 scheme_type = ntohl(scheme_type);
1223 int32_t mode = kCryptoModeUnencrypted;
1224 switch(scheme_type) {
1225 case FOURCC("cbc1"):
1226 {
1227 mode = kCryptoModeAesCbc;
1228 break;
1229 }
1230 case FOURCC("cbcs"):
1231 {
1232 mode = kCryptoModeAesCbc;
1233 mLastTrack->subsample_encryption = true;
1234 break;
1235 }
1236 case FOURCC("cenc"):
1237 {
1238 mode = kCryptoModeAesCtr;
1239 break;
1240 }
1241 case FOURCC("cens"):
1242 {
1243 mode = kCryptoModeAesCtr;
1244 mLastTrack->subsample_encryption = true;
1245 break;
1246 }
1247 }
1248 if (mode != kCryptoModeUnencrypted) {
1249 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1250 }
1251 break;
1252 }
1253
1254
1255 case FOURCC("elst"):
1256 {
1257 *offset += chunk_size;
1258
1259 if (!mLastTrack) {
1260 return ERROR_MALFORMED;
1261 }
1262
1263 // See 14496-12 8.6.6
1264 uint8_t version;
1265 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1266 return ERROR_IO;
1267 }
1268
1269 uint32_t entry_count;
1270 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1271 return ERROR_IO;
1272 }
1273
1274 if (entry_count > 2) {
1275 /* We support a single entry for gapless playback or negating offset for
1276 * reordering B frames, two entries (empty edit) for start offset at the moment.
1277 */
1278 ALOGW("ignoring edit list with %d entries", entry_count);
1279 } else {
1280 off64_t entriesoffset = data_offset + 8;
1281 uint64_t segment_duration;
1282 int64_t media_time;
1283 bool empty_edit_present = false;
1284 for (int i = 0; i < entry_count; ++i) {
1285 switch (version) {
1286 case 0: {
1287 uint32_t sd;
1288 int32_t mt;
1289 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1290 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1291 return ERROR_IO;
1292 }
1293 segment_duration = sd;
1294 media_time = mt;
1295 // 4(segment duration) + 4(media time) + 4(media rate)
1296 entriesoffset += 12;
1297 break;
1298 }
1299 case 1: {
1300 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1301 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1302 return ERROR_IO;
1303 }
1304 // 8(segment duration) + 8(media time) + 4(media rate)
1305 entriesoffset += 20;
1306 break;
1307 }
1308 default:
1309 return ERROR_IO;
1310 break;
1311 }
1312 // Empty edit entry would have to be first entry.
1313 if (media_time == -1 && i == 0) {
1314 empty_edit_present = true;
1315 ALOGV("initial empty edit ticks: %" PRIu64, segment_duration);
1316 /* In movie header timescale, and needs to be converted to media timescale
1317 * after we get that from a track's 'mdhd' atom,
1318 * which at times come after 'elst'.
1319 */
1320 mLastTrack->elst_initial_empty_edit_ticks = segment_duration;
1321 } else if (media_time >= 0 && i == 0) {
1322 ALOGV("first edit list entry - from gapless playback files");
1323 mLastTrack->elst_media_time = media_time;
1324 mLastTrack->elst_segment_duration = segment_duration;
1325 ALOGV("segment_duration: %" PRIu64 " media_time: %" PRId64,
1326 segment_duration, media_time);
1327 // media_time is in media timescale as are STTS/CTTS entries.
1328 mLastTrack->elst_shift_start_ticks = media_time;
1329 } else if (empty_edit_present && i == 1) {
1330 // Process second entry only when the first entry was an empty edit entry.
1331 ALOGV("second edit list entry");
1332 mLastTrack->elst_shift_start_ticks = media_time;
1333 } else {
1334 ALOGW("for now, unsupported entry in edit list %" PRIu32, entry_count);
1335 }
1336 }
1337 // save these for later, because the elst atom might precede
1338 // the atoms that actually gives us the duration and sample rate
1339 // needed to calculate the padding and delay values
1340 mLastTrack->elst_needs_processing = true;
1341 }
1342 break;
1343 }
1344
1345 case FOURCC("frma"):
1346 {
1347 *offset += chunk_size;
1348
1349 uint32_t original_fourcc;
1350 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1351 return ERROR_IO;
1352 }
1353 original_fourcc = ntohl(original_fourcc);
1354 ALOGV("read original format: %d", original_fourcc);
1355
1356 if (mLastTrack == NULL) {
1357 return ERROR_MALFORMED;
1358 }
1359
1360 AMediaFormat_setString(mLastTrack->meta,
1361 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1362 uint32_t num_channels = 0;
1363 uint32_t sample_rate = 0;
1364 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1365 AMediaFormat_setInt32(mLastTrack->meta,
1366 AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1367 AMediaFormat_setInt32(mLastTrack->meta,
1368 AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1369 }
1370
1371 if (!mIsQT && original_fourcc == FOURCC("alac")) {
1372 off64_t tmpOffset = *offset;
1373 status_t err = parseALACSampleEntry(&tmpOffset);
1374 if (err != OK) {
1375 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1376 return err;
1377 }
1378 *offset = tmpOffset + 8;
1379 }
1380
1381 break;
1382 }
1383
1384 case FOURCC("tenc"):
1385 {
1386 *offset += chunk_size;
1387
1388 if (chunk_size < 32) {
1389 return ERROR_MALFORMED;
1390 }
1391
1392 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1393 // default IV size, 16 bytes default KeyID
1394 // (ISO 23001-7)
1395
1396 uint8_t version;
1397 if (mDataSource->readAt(data_offset, &version, sizeof(version))
1398 < (ssize_t)sizeof(version)) {
1399 return ERROR_IO;
1400 }
1401
1402 uint8_t buf[4];
1403 memset(buf, 0, 4);
1404 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1405 return ERROR_IO;
1406 }
1407
1408 if (mLastTrack == NULL) {
1409 return ERROR_MALFORMED;
1410 }
1411
1412 uint8_t defaultEncryptedByteBlock = 0;
1413 uint8_t defaultSkipByteBlock = 0;
1414 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1415 if (version == 1) {
1416 uint32_t pattern = buf[2];
1417 defaultEncryptedByteBlock = pattern >> 4;
1418 defaultSkipByteBlock = pattern & 0xf;
1419 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1420 // use (1,0) to mean "encrypt everything"
1421 defaultEncryptedByteBlock = 1;
1422 }
1423 } else if (mLastTrack->subsample_encryption) {
1424 ALOGW("subsample_encryption should be version 1");
1425 } else if (defaultAlgorithmId > 1) {
1426 // only 0 (clear) and 1 (AES-128) are valid
1427 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1428 defaultAlgorithmId = 1;
1429 }
1430
1431 memset(buf, 0, 4);
1432 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1433 return ERROR_IO;
1434 }
1435 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1436
1437 if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1438 // only unencrypted data must have 0 IV size
1439 return ERROR_MALFORMED;
1440 } else if (defaultIVSize != 0 &&
1441 defaultIVSize != 8 &&
1442 defaultIVSize != 16) {
1443 return ERROR_MALFORMED;
1444 }
1445
1446 uint8_t defaultKeyId[16];
1447
1448 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1449 return ERROR_IO;
1450 }
1451
1452 sp<ABuffer> defaultConstantIv;
1453 if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1454
1455 uint8_t ivlength;
1456 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1457 < (ssize_t)sizeof(ivlength)) {
1458 return ERROR_IO;
1459 }
1460
1461 if (ivlength != 8 && ivlength != 16) {
1462 ALOGW("unsupported IV length: %u", ivlength);
1463 return ERROR_MALFORMED;
1464 }
1465
1466 defaultConstantIv = new ABuffer(ivlength);
1467 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1468 < (ssize_t)ivlength) {
1469 return ERROR_IO;
1470 }
1471
1472 defaultConstantIv->setRange(0, ivlength);
1473 }
1474
1475 int32_t tmpAlgorithmId;
1476 if (!AMediaFormat_getInt32(mLastTrack->meta,
1477 AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1478 AMediaFormat_setInt32(mLastTrack->meta,
1479 AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1480 }
1481
1482 AMediaFormat_setInt32(mLastTrack->meta,
1483 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1484 AMediaFormat_setBuffer(mLastTrack->meta,
1485 AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1486 AMediaFormat_setInt32(mLastTrack->meta,
1487 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1488 AMediaFormat_setInt32(mLastTrack->meta,
1489 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1490 if (defaultConstantIv != NULL) {
1491 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1492 defaultConstantIv->data(), defaultConstantIv->size());
1493 }
1494 break;
1495 }
1496
1497 case FOURCC("tkhd"):
1498 {
1499 *offset += chunk_size;
1500
1501 status_t err;
1502 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1503 return err;
1504 }
1505
1506 break;
1507 }
1508
1509 case FOURCC("tref"):
1510 {
1511 off64_t stop_offset = *offset + chunk_size;
1512 *offset = data_offset;
1513 while (*offset < stop_offset) {
1514 status_t err = parseChunk(offset, depth + 1);
1515 if (err != OK) {
1516 return err;
1517 }
1518 }
1519 if (*offset != stop_offset) {
1520 return ERROR_MALFORMED;
1521 }
1522 break;
1523 }
1524
1525 case FOURCC("thmb"):
1526 {
1527 *offset += chunk_size;
1528
1529 if (mLastTrack != NULL) {
1530 // Skip thumbnail track for now since we don't have an
1531 // API to retrieve it yet.
1532 // The thumbnail track can't be accessed by negative index or time,
1533 // because each timed sample has its own corresponding thumbnail
1534 // in the thumbnail track. We'll need a dedicated API to retrieve
1535 // thumbnail at time instead.
1536 mLastTrack->skipTrack = true;
1537 }
1538
1539 break;
1540 }
1541
1542 case FOURCC("pssh"):
1543 {
1544 *offset += chunk_size;
1545
1546 PsshInfo pssh;
1547
1548 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1549 return ERROR_IO;
1550 }
1551
1552 uint32_t psshdatalen = 0;
1553 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1554 return ERROR_IO;
1555 }
1556 pssh.datalen = ntohl(psshdatalen);
1557 ALOGV("pssh data size: %d", pssh.datalen);
1558 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1559 // pssh data length exceeds size of containing box
1560 return ERROR_MALFORMED;
1561 }
1562
1563 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1564 if (pssh.data == NULL) {
1565 return ERROR_MALFORMED;
1566 }
1567 ALOGV("allocated pssh @ %p", pssh.data);
1568 ssize_t requested = (ssize_t) pssh.datalen;
1569 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1570 delete[] pssh.data;
1571 return ERROR_IO;
1572 }
1573 mPssh.push_back(pssh);
1574
1575 break;
1576 }
1577
1578 case FOURCC("mdhd"):
1579 {
1580 *offset += chunk_size;
1581
1582 if (chunk_data_size < 4 || mLastTrack == NULL) {
1583 return ERROR_MALFORMED;
1584 }
1585
1586 uint8_t version;
1587 if (mDataSource->readAt(
1588 data_offset, &version, sizeof(version))
1589 < (ssize_t)sizeof(version)) {
1590 return ERROR_IO;
1591 }
1592
1593 off64_t timescale_offset;
1594
1595 if (version == 1) {
1596 timescale_offset = data_offset + 4 + 16;
1597 } else if (version == 0) {
1598 timescale_offset = data_offset + 4 + 8;
1599 } else {
1600 return ERROR_IO;
1601 }
1602
1603 uint32_t timescale;
1604 if (mDataSource->readAt(
1605 timescale_offset, ×cale, sizeof(timescale))
1606 < (ssize_t)sizeof(timescale)) {
1607 return ERROR_IO;
1608 }
1609
1610 if (!timescale) {
1611 ALOGE("timescale should not be ZERO.");
1612 return ERROR_MALFORMED;
1613 }
1614
1615 mLastTrack->timescale = ntohl(timescale);
1616
1617 // 14496-12 says all ones means indeterminate, but some files seem to use
1618 // 0 instead. We treat both the same.
1619 int64_t duration = 0;
1620 if (version == 1) {
1621 if (mDataSource->readAt(
1622 timescale_offset + 4, &duration, sizeof(duration))
1623 < (ssize_t)sizeof(duration)) {
1624 return ERROR_IO;
1625 }
1626 if (duration != -1) {
1627 duration = ntoh64(duration);
1628 }
1629 } else {
1630 uint32_t duration32;
1631 if (mDataSource->readAt(
1632 timescale_offset + 4, &duration32, sizeof(duration32))
1633 < (ssize_t)sizeof(duration32)) {
1634 return ERROR_IO;
1635 }
1636 if (duration32 != 0xffffffff) {
1637 duration = ntohl(duration32);
1638 }
1639 }
1640 if (duration != 0 && mLastTrack->timescale != 0) {
1641 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1642 if (durationUs < 0 || durationUs > INT64_MAX) {
1643 ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1644 (long long) duration, (long long) mLastTrack->timescale);
1645 return ERROR_MALFORMED;
1646 }
1647 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1648 }
1649
1650 uint8_t lang[2];
1651 off64_t lang_offset;
1652 if (version == 1) {
1653 lang_offset = timescale_offset + 4 + 8;
1654 } else if (version == 0) {
1655 lang_offset = timescale_offset + 4 + 4;
1656 } else {
1657 return ERROR_IO;
1658 }
1659
1660 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1661 < (ssize_t)sizeof(lang)) {
1662 return ERROR_IO;
1663 }
1664
1665 // To get the ISO-639-2/T three character language code
1666 // 1 bit pad followed by 3 5-bits characters. Each character
1667 // is packed as the difference between its ASCII value and 0x60.
1668 char lang_code[4];
1669 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1670 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1671 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1672 lang_code[3] = '\0';
1673
1674 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1675
1676 break;
1677 }
1678
1679 case FOURCC("stsd"):
1680 {
1681 uint8_t buffer[8];
1682 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1683 return ERROR_MALFORMED;
1684 }
1685
1686 if (mDataSource->readAt(
1687 data_offset, buffer, 8) < 8) {
1688 return ERROR_IO;
1689 }
1690
1691 if (U32_AT(buffer) != 0) {
1692 // Should be version 0, flags 0.
1693 return ERROR_MALFORMED;
1694 }
1695
1696 uint32_t entry_count = U32_AT(&buffer[4]);
1697
1698 if (entry_count > 1) {
1699 // For 3GPP timed text, there could be multiple tx3g boxes contain
1700 // multiple text display formats. These formats will be used to
1701 // display the timed text.
1702 // For encrypted files, there may also be more than one entry.
1703 const char *mime;
1704
1705 if (mLastTrack == NULL)
1706 return ERROR_MALFORMED;
1707
1708 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1709 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1710 strcasecmp(mime, "application/octet-stream")) {
1711 // For now we only support a single type of media per track.
1712 mLastTrack->skipTrack = true;
1713 *offset += chunk_size;
1714 break;
1715 }
1716 }
1717 off64_t stop_offset = *offset + chunk_size;
1718 *offset = data_offset + 8;
1719 for (uint32_t i = 0; i < entry_count; ++i) {
1720 status_t err = parseChunk(offset, depth + 1);
1721 if (err != OK) {
1722 return err;
1723 }
1724 }
1725
1726 if (*offset != stop_offset) {
1727 return ERROR_MALFORMED;
1728 }
1729 break;
1730 }
1731 case FOURCC("mett"):
1732 {
1733 *offset += chunk_size;
1734
1735 // the absolute minimum size of a compliant mett box is 11 bytes:
1736 // 6 byte reserved, 2 byte index, null byte, one char mime_format, null byte
1737 // The resulting mime_format would be invalid at that size though.
1738 if (mLastTrack == NULL || chunk_data_size < 11) {
1739 return ERROR_MALFORMED;
1740 }
1741
1742 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1743 if (buffer.get() == NULL) {
1744 return NO_MEMORY;
1745 }
1746
1747 if (mDataSource->readAt(
1748 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1749 return ERROR_IO;
1750 }
1751
1752 // ISO-14496-12:
1753 // int8 reserved[6]; // should be all zeroes
1754 // int16_t data_reference_index;
1755 // char content_encoding[]; // null terminated, optional (= just the null byte)
1756 // char mime_format[]; // null terminated, mandatory
1757 // optional other boxes
1758 //
1759 // API < 29:
1760 // char mime_format[]; // null terminated
1761 //
1762 // API >= 29
1763 // char mime_format[]; // null terminated
1764 // char mime_format[]; // null terminated
1765
1766 // Prior to API 29, the metadata track was not compliant with ISO/IEC
1767 // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1768 // metatrack. As of API 29 and onwards, a change was made to metadata track to
1769 // make it somewhat compatible with the standard. The workaround is to write the
1770 // null-terminated mime_format string twice. This allows compliant parsers to
1771 // read the missing reserved, data_reference_index, and content_encoding fields
1772 // from the first mime_type string. The actual mime_format field would then be
1773 // read correctly from the second string. The non-compliant Android frameworks
1774 // from API 28 and earlier would still be able to read the mime_format correctly
1775 // as it would only read the first null-terminated mime_format string. To enable
1776 // reading metadata tracks generated from both the non-compliant and compliant
1777 // formats, a check needs to be done to see which format is used.
1778 const char *str = (const char*) buffer.get();
1779 size_t string_length = strnlen(str, chunk_data_size);
1780
1781 if (string_length == chunk_data_size - 1) {
1782 // This is likely a pre API 29 file, since it's a single null terminated
1783 // string filling the entire box.
1784 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, str);
1785 } else {
1786 // This might be a fully compliant metadata track, a "double mime" compatibility
1787 // track, or anything else, including a single non-terminated string, so we need
1788 // to determine the length of each string we want to parse out of the box.
1789 size_t encoding_length = strnlen(str + 8, chunk_data_size - 8);
1790 if (encoding_length + 8 >= chunk_data_size - 2) {
1791 // the encoding extends to the end of the box, so there's no mime_format
1792 return ERROR_MALFORMED;
1793 }
1794 String8 contentEncoding(str + 8, encoding_length);
1795 String8 mimeFormat(str + 8 + encoding_length + 1,
1796 chunk_data_size - 8 - encoding_length - 1);
1797 AMediaFormat_setString(mLastTrack->meta,
1798 AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1799 }
1800 break;
1801 }
1802
1803 case FOURCC("mp4a"):
1804 case FOURCC("enca"):
1805 case FOURCC("samr"):
1806 case FOURCC("sawb"):
1807 case FOURCC("Opus"):
1808 case FOURCC("twos"):
1809 case FOURCC("sowt"):
1810 case FOURCC("alac"):
1811 case FOURCC("fLaC"):
1812 case FOURCC(".mp3"):
1813 case 0x6D730055: // "ms U" mp3 audio
1814 case FOURCC("mha1"):
1815 case FOURCC("mhm1"):
1816 case FOURCC("dtsc"):
1817 case FOURCC("dtse"):
1818 case FOURCC("dtsh"):
1819 case FOURCC("dtsl"):
1820 case FOURCC("dtsx"):
1821 {
1822 if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1823
1824 if (chunk_type == FOURCC("alac")) {
1825 off64_t offsetTmp = *offset;
1826 status_t err = parseALACSampleEntry(&offsetTmp);
1827 if (err != OK) {
1828 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1829 return err;
1830 }
1831 }
1832
1833 // Ignore all atoms embedded in QT wave atom
1834 ALOGV("Ignore all atoms embedded in QT wave atom");
1835 *offset += chunk_size;
1836 break;
1837 }
1838
1839 uint8_t buffer[8 + 20];
1840 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1841 // Basic AudioSampleEntry size.
1842 return ERROR_MALFORMED;
1843 }
1844
1845 if (mDataSource->readAt(
1846 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1847 return ERROR_IO;
1848 }
1849
1850 // we can get data_ref_index value from U16_AT(&buffer[6])
1851 uint16_t version = U16_AT(&buffer[8]);
1852 uint32_t num_channels = U16_AT(&buffer[16]);
1853
1854 uint16_t sample_size = U16_AT(&buffer[18]);
1855 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1856
1857 if (mLastTrack == NULL)
1858 return ERROR_MALFORMED;
1859
1860 off64_t stop_offset = *offset + chunk_size;
1861 *offset = data_offset + sizeof(buffer);
1862
1863 if (mIsQT) {
1864 if (version == 1) {
1865 if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1866 return ERROR_IO;
1867 }
1868
1869 #if 0
1870 U32_AT(buffer); // samples per packet
1871 U32_AT(&buffer[4]); // bytes per packet
1872 U32_AT(&buffer[8]); // bytes per frame
1873 U32_AT(&buffer[12]); // bytes per sample
1874 #endif
1875 *offset += 16;
1876 } else if (version == 2) {
1877 uint8_t v2buffer[36];
1878 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1879 return ERROR_IO;
1880 }
1881
1882 #if 0
1883 U32_AT(v2buffer); // size of struct only
1884 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
1885 num_channels = U32_AT(&v2buffer[12]); // num audio channels
1886 U32_AT(&v2buffer[16]); // always 0x7f000000
1887 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
1888 U32_AT(&v2buffer[24]); // format specifc flags
1889 U32_AT(&v2buffer[28]); // const bytes per audio packet
1890 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
1891 #endif
1892 *offset += 36;
1893 }
1894 }
1895
1896 if (chunk_type != FOURCC("enca")) {
1897 // if the chunk type is enca, we'll get the type from the frma box later
1898 AMediaFormat_setString(mLastTrack->meta,
1899 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1900 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1901
1902 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1903 AMediaFormat_setInt32(mLastTrack->meta,
1904 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1905 if (chunk_type == FOURCC("twos")) {
1906 AMediaFormat_setInt32(mLastTrack->meta,
1907 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1908 }
1909 }
1910 }
1911 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1912 chunk, num_channels, sample_size, sample_rate);
1913 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1914 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1915
1916 if (chunk_type == FOURCC("Opus")) {
1917 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1918 data_offset += sizeof(buffer);
1919 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1920
1921 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1922 opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1923 return ERROR_MALFORMED;
1924 }
1925 // Read Opus Header
1926 if (mDataSource->readAt(
1927 data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1928 return ERROR_IO;
1929 }
1930
1931 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1932 // http://wiki.xiph.org/OggOpus#ID_Header
1933 strncpy((char *)opusInfo, "OpusHead", 8);
1934
1935 // Version shall be 0 as per mp4 Opus Specific Box
1936 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1937 if (opusInfo[8]) {
1938 return ERROR_MALFORMED;
1939 }
1940 // Force version to 1 as per OpusHead definition
1941 // (http://wiki.xiph.org/OggOpus#ID_Header)
1942 opusInfo[8] = 1;
1943
1944 // Read Opus Specific Box values
1945 size_t opusOffset = 10;
1946 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1947 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1948 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1949
1950 // Convert Opus Specific Box values. ParseOpusHeader expects
1951 // the values in LE, however MP4 stores these values as BE
1952 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1953 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1954 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1955 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1956
1957 static const int64_t kSeekPreRollNs = 80000000; // Fixed 80 msec
1958 static const int32_t kOpusSampleRate = 48000;
1959 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1960
1961 AMediaFormat_setBuffer(mLastTrack->meta,
1962 AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1963 AMediaFormat_setBuffer(mLastTrack->meta,
1964 AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1965 AMediaFormat_setBuffer(mLastTrack->meta,
1966 AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1967
1968 data_offset += opusInfoSize;
1969 *offset = data_offset;
1970 CHECK_EQ(*offset, stop_offset);
1971 }
1972
1973 if (!mIsQT && chunk_type == FOURCC("alac")) {
1974 data_offset += sizeof(buffer);
1975
1976 status_t err = parseALACSampleEntry(&data_offset);
1977 if (err != OK) {
1978 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1979 return err;
1980 }
1981 *offset = data_offset;
1982 CHECK_EQ(*offset, stop_offset);
1983 }
1984
1985 if (chunk_type == FOURCC("fLaC")) {
1986 data_offset += sizeof(buffer);
1987 *offset = data_offset;
1988 }
1989
1990 while (*offset < stop_offset) {
1991 status_t err = parseChunk(offset, depth + 1);
1992 if (err != OK) {
1993 return err;
1994 }
1995 }
1996
1997 if (*offset != stop_offset) {
1998 return ERROR_MALFORMED;
1999 }
2000 break;
2001 }
2002 case FOURCC("mhaC"):
2003 {
2004 // See ISO_IEC_23008-3;2019 MHADecoderConfigurationRecord
2005 constexpr uint32_t mhac_header_size = 4 /* size */ + 4 /* boxtype 'mhaC' */
2006 + 1 /* configurationVersion */ + 1 /* mpegh3daProfileLevelIndication */
2007 + 1 /* referenceChannelLayout */ + 2 /* mpegh3daConfigLength */;
2008 uint8_t mhac_header[mhac_header_size];
2009 off64_t data_offset = *offset;
2010
2011 if (chunk_size < sizeof(mhac_header)) {
2012 return ERROR_MALFORMED;
2013 }
2014
2015 if (mDataSource->readAt(data_offset, mhac_header, sizeof(mhac_header))
2016 < (ssize_t)sizeof(mhac_header)) {
2017 return ERROR_IO;
2018 }
2019
2020 //get mpegh3daProfileLevelIndication
2021 const uint32_t mpegh3daProfileLevelIndication = mhac_header[9];
2022 AMediaFormat_setInt32(mLastTrack->meta,
2023 AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
2024 mpegh3daProfileLevelIndication);
2025
2026 //get referenceChannelLayout
2027 const uint32_t referenceChannelLayout = mhac_header[10];
2028 AMediaFormat_setInt32(mLastTrack->meta,
2029 AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
2030 referenceChannelLayout);
2031
2032 // get mpegh3daConfigLength
2033 const uint32_t mhac_config_size = U16_AT(&mhac_header[11]);
2034 if (chunk_size != sizeof(mhac_header) + mhac_config_size) {
2035 return ERROR_MALFORMED;
2036 }
2037
2038 data_offset += sizeof(mhac_header);
2039 uint8_t mhac_config[mhac_config_size];
2040 if (mDataSource->readAt(data_offset, mhac_config, sizeof(mhac_config))
2041 < (ssize_t)sizeof(mhac_config)) {
2042 return ERROR_IO;
2043 }
2044
2045 AMediaFormat_setBuffer(mLastTrack->meta,
2046 AMEDIAFORMAT_KEY_CSD_0, mhac_config, sizeof(mhac_config));
2047 data_offset += sizeof(mhac_config);
2048 *offset = data_offset;
2049 break;
2050 }
2051 case FOURCC("mhaP"):
2052 {
2053 // FDAmd_2 of ISO_IEC_23008-3;2019 MHAProfileAndLevelCompatibilitySetBox
2054 constexpr uint32_t mhap_header_size = 4 /* size */ + 4 /* boxtype 'mhaP' */
2055 + 1 /* numCompatibleSets */;
2056
2057 uint8_t mhap_header[mhap_header_size];
2058 off64_t data_offset = *offset;
2059
2060 if (chunk_size < (ssize_t)mhap_header_size) {
2061 return ERROR_MALFORMED;
2062 }
2063
2064 if (mDataSource->readAt(data_offset, mhap_header, sizeof(mhap_header))
2065 < (ssize_t)sizeof(mhap_header)) {
2066 return ERROR_IO;
2067 }
2068
2069 // mhap_compatible_sets_size = numCompatibleSets * sizeof(uint8_t)
2070 const uint32_t mhap_compatible_sets_size = mhap_header[8];
2071 if (chunk_size != sizeof(mhap_header) + mhap_compatible_sets_size) {
2072 return ERROR_MALFORMED;
2073 }
2074
2075 data_offset += sizeof(mhap_header);
2076 uint8_t mhap_compatible_sets[mhap_compatible_sets_size];
2077 if (mDataSource->readAt(
2078 data_offset, mhap_compatible_sets, sizeof(mhap_compatible_sets))
2079 < (ssize_t)sizeof(mhap_compatible_sets)) {
2080 return ERROR_IO;
2081 }
2082
2083 AMediaFormat_setBuffer(mLastTrack->meta,
2084 AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
2085 mhap_compatible_sets, sizeof(mhap_compatible_sets));
2086 data_offset += sizeof(mhap_compatible_sets);
2087 *offset = data_offset;
2088 break;
2089 }
2090 case FOURCC("mp4v"):
2091 case FOURCC("encv"):
2092 case FOURCC("s263"):
2093 case FOURCC("H263"):
2094 case FOURCC("h263"):
2095 case FOURCC("avc1"):
2096 case FOURCC("hvc1"):
2097 case FOURCC("hev1"):
2098 case FOURCC("dvav"):
2099 case FOURCC("dva1"):
2100 case FOURCC("dvhe"):
2101 case FOURCC("dvh1"):
2102 case FOURCC("dav1"):
2103 case FOURCC("av01"):
2104 case FOURCC("vp09"):
2105 {
2106 uint8_t buffer[78];
2107 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
2108 // Basic VideoSampleEntry size.
2109 return ERROR_MALFORMED;
2110 }
2111
2112 if (mDataSource->readAt(
2113 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
2114 return ERROR_IO;
2115 }
2116
2117 // we can get data_ref_index value from U16_AT(&buffer[6])
2118 uint16_t width = U16_AT(&buffer[6 + 18]);
2119 uint16_t height = U16_AT(&buffer[6 + 20]);
2120
2121 // The video sample is not standard-compliant if it has invalid dimension.
2122 // Use some default width and height value, and
2123 // let the decoder figure out the actual width and height (and thus
2124 // be prepared for INFO_FOMRAT_CHANGED event).
2125 if (width == 0) width = 352;
2126 if (height == 0) height = 288;
2127
2128 // printf("*** coding='%s' width=%d height=%d\n",
2129 // chunk, width, height);
2130
2131 if (mLastTrack == NULL)
2132 return ERROR_MALFORMED;
2133
2134 if (chunk_type != FOURCC("encv")) {
2135 // if the chunk type is encv, we'll get the type from the frma box later
2136 AMediaFormat_setString(mLastTrack->meta,
2137 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
2138 }
2139 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
2140 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
2141
2142 off64_t stop_offset = *offset + chunk_size;
2143 *offset = data_offset + sizeof(buffer);
2144 while (*offset < stop_offset) {
2145 status_t err = parseChunk(offset, depth + 1);
2146 if (err != OK) {
2147 return err;
2148 }
2149 }
2150
2151 if (*offset != stop_offset) {
2152 return ERROR_MALFORMED;
2153 }
2154 break;
2155 }
2156
2157 case FOURCC("stco"):
2158 case FOURCC("co64"):
2159 {
2160 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2161 return ERROR_MALFORMED;
2162 }
2163
2164 status_t err =
2165 mLastTrack->sampleTable->setChunkOffsetParams(
2166 chunk_type, data_offset, chunk_data_size);
2167
2168 *offset += chunk_size;
2169
2170 if (err != OK) {
2171 return err;
2172 }
2173
2174 break;
2175 }
2176
2177 case FOURCC("stsc"):
2178 {
2179 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2180 return ERROR_MALFORMED;
2181
2182 status_t err =
2183 mLastTrack->sampleTable->setSampleToChunkParams(
2184 data_offset, chunk_data_size);
2185
2186 *offset += chunk_size;
2187
2188 if (err != OK) {
2189 return err;
2190 }
2191
2192 break;
2193 }
2194
2195 case FOURCC("stsz"):
2196 case FOURCC("stz2"):
2197 {
2198 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2199 return ERROR_MALFORMED;
2200 }
2201
2202 status_t err =
2203 mLastTrack->sampleTable->setSampleSizeParams(
2204 chunk_type, data_offset, chunk_data_size);
2205
2206 *offset += chunk_size;
2207
2208 if (err != OK) {
2209 return err;
2210 }
2211
2212 adjustRawDefaultFrameSize();
2213
2214 size_t max_size;
2215 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
2216
2217 if (err != OK) {
2218 return err;
2219 }
2220
2221 if (max_size != 0) {
2222 // Assume that a given buffer only contains at most 10 chunks,
2223 // each chunk originally prefixed with a 2 byte length will
2224 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
2225 // and thus will grow by 2 bytes per chunk.
2226 if (max_size > SIZE_MAX - 10 * 2) {
2227 ALOGE("max sample size too big: %zu", max_size);
2228 return ERROR_MALFORMED;
2229 }
2230 AMediaFormat_setInt32(mLastTrack->meta,
2231 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
2232 } else {
2233 // No size was specified. Pick a conservatively large size.
2234 uint32_t width, height;
2235 if (!AMediaFormat_getInt32(mLastTrack->meta,
2236 AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
2237 !AMediaFormat_getInt32(mLastTrack->meta,
2238 AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
2239 ALOGE("No width or height, assuming worst case 1080p");
2240 width = 1920;
2241 height = 1080;
2242 } else {
2243 // A resolution was specified, check that it's not too big. The values below
2244 // were chosen so that the calculations below don't cause overflows, they're
2245 // not indicating that resolutions up to 32kx32k are actually supported.
2246 if (width > 32768 || height > 32768) {
2247 ALOGE("can't support %u x %u video", width, height);
2248 return ERROR_MALFORMED;
2249 }
2250 }
2251
2252 const char *mime;
2253 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2254 if (!strncmp(mime, "audio/", 6)) {
2255 // for audio, use 128KB
2256 max_size = 1024 * 128;
2257 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
2258 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
2259 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
2260 // AVC & HEVC requires compression ratio of at least 2, and uses
2261 // macroblocks
2262 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
2263 } else {
2264 // For all other formats there is no minimum compression
2265 // ratio. Use compression ratio of 1.
2266 max_size = width * height * 3 / 2;
2267 }
2268 // HACK: allow 10% overhead
2269 // TODO: read sample size from traf atom for fragmented MPEG4.
2270 max_size += max_size / 10;
2271 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2272 }
2273
2274 // NOTE: setting another piece of metadata invalidates any pointers (such as the
2275 // mimetype) previously obtained, so don't cache them.
2276 const char *mime;
2277 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2278 // Calculate average frame rate.
2279 if (!strncasecmp("video/", mime, 6)) {
2280 size_t nSamples = mLastTrack->sampleTable->countSamples();
2281 if (nSamples == 0) {
2282 int32_t trackId;
2283 if (AMediaFormat_getInt32(mLastTrack->meta,
2284 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2285 for (size_t i = 0; i < mTrex.size(); i++) {
2286 Trex *t = &mTrex.editItemAt(i);
2287 if (t->track_ID == (uint32_t) trackId) {
2288 if (t->default_sample_duration > 0) {
2289 int32_t frameRate =
2290 mLastTrack->timescale / t->default_sample_duration;
2291 AMediaFormat_setInt32(mLastTrack->meta,
2292 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2293 }
2294 break;
2295 }
2296 }
2297 }
2298 } else {
2299 int64_t durationUs;
2300 if (AMediaFormat_getInt64(mLastTrack->meta,
2301 AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2302 if (durationUs > 0) {
2303 int32_t frameRate = (nSamples * 1000000LL +
2304 (durationUs >> 1)) / durationUs;
2305 AMediaFormat_setInt32(mLastTrack->meta,
2306 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2307 }
2308 }
2309 ALOGV("setting frame count %zu", nSamples);
2310 AMediaFormat_setInt32(mLastTrack->meta,
2311 AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2312 }
2313 }
2314
2315 break;
2316 }
2317
2318 case FOURCC("stts"):
2319 {
2320 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2321 return ERROR_MALFORMED;
2322
2323 *offset += chunk_size;
2324
2325 if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2326 char chunk[5];
2327 MakeFourCCString(mPath[depth - 1], chunk);
2328 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2329 break;
2330 }
2331
2332 status_t err =
2333 mLastTrack->sampleTable->setTimeToSampleParams(
2334 data_offset, chunk_data_size);
2335
2336 if (err != OK) {
2337 return err;
2338 }
2339
2340 break;
2341 }
2342
2343 case FOURCC("ctts"):
2344 {
2345 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2346 return ERROR_MALFORMED;
2347
2348 *offset += chunk_size;
2349
2350 status_t err =
2351 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2352 data_offset, chunk_data_size);
2353
2354 if (err != OK) {
2355 return err;
2356 }
2357
2358 break;
2359 }
2360
2361 case FOURCC("stss"):
2362 {
2363 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2364 return ERROR_MALFORMED;
2365
2366 *offset += chunk_size;
2367
2368 status_t err =
2369 mLastTrack->sampleTable->setSyncSampleParams(
2370 data_offset, chunk_data_size);
2371
2372 if (err != OK) {
2373 return err;
2374 }
2375
2376 break;
2377 }
2378
2379 // \xA9xyz
2380 case FOURCC("\251xyz"):
2381 {
2382 *offset += chunk_size;
2383
2384 // Best case the total data length inside "\xA9xyz" box would
2385 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2386 // where "\x00\x05" is the text string length with value = 5,
2387 // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2388 // location (string) value with longitude = 0 and latitude = 0.
2389 // Since some devices encountered in the wild omit the trailing
2390 // slash, we'll allow that.
2391 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2392 return ERROR_MALFORMED;
2393 }
2394
2395 uint16_t len;
2396 if (!mDataSource->getUInt16(data_offset, &len)) {
2397 return ERROR_IO;
2398 }
2399
2400 // allow "+0+0" without trailing slash
2401 if (len < 4 || len > chunk_data_size - 4) {
2402 return ERROR_MALFORMED;
2403 }
2404 // The location string following the language code is formatted
2405 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2406 // Allocate 2 extra bytes, in case we need to add a trailing slash,
2407 // and to add a terminating 0.
2408 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2409 if (!buffer) {
2410 return NO_MEMORY;
2411 }
2412
2413 if (mDataSource->readAt(
2414 data_offset + 4, &buffer[0], len) < len) {
2415 return ERROR_IO;
2416 }
2417
2418 len = strlen(&buffer[0]);
2419 if (len < 4) {
2420 return ERROR_MALFORMED;
2421 }
2422 // Add a trailing slash if there wasn't one.
2423 if (buffer[len - 1] != '/') {
2424 buffer[len] = '/';
2425 }
2426 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2427 break;
2428 }
2429
2430 case FOURCC("esds"):
2431 {
2432 *offset += chunk_size;
2433
2434 if (chunk_data_size < 4) {
2435 return ERROR_MALFORMED;
2436 }
2437
2438 auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2439 uint8_t *buffer = tmp.get();
2440 if (buffer == NULL) {
2441 return -ENOMEM;
2442 }
2443
2444 if (mDataSource->readAt(
2445 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2446 return ERROR_IO;
2447 }
2448
2449 if (U32_AT(buffer) != 0) {
2450 // Should be version 0, flags 0.
2451 return ERROR_MALFORMED;
2452 }
2453
2454 if (mLastTrack == NULL)
2455 return ERROR_MALFORMED;
2456
2457 AMediaFormat_setBuffer(mLastTrack->meta,
2458 AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2459
2460 if (mPath.size() >= 2
2461 && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2462 // Information from the ESDS must be relied on for proper
2463 // setup of sample rate and channel count for MPEG4 Audio.
2464 // The generic header appears to only contain generic
2465 // information...
2466
2467 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2468 &buffer[4], chunk_data_size - 4);
2469
2470 if (err != OK) {
2471 return err;
2472 }
2473 }
2474 if (mPath.size() >= 2
2475 && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2476 // Check if the video is MPEG2
2477 ESDS esds(&buffer[4], chunk_data_size - 4);
2478
2479 uint8_t objectTypeIndication;
2480 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2481 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2482 AMediaFormat_setString(mLastTrack->meta,
2483 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2484 }
2485 }
2486 }
2487 break;
2488 }
2489
2490 case FOURCC("btrt"):
2491 {
2492 *offset += chunk_size;
2493 if (mLastTrack == NULL) {
2494 return ERROR_MALFORMED;
2495 }
2496
2497 uint8_t buffer[12];
2498 if (chunk_data_size != sizeof(buffer)) {
2499 return ERROR_MALFORMED;
2500 }
2501
2502 if (mDataSource->readAt(
2503 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2504 return ERROR_IO;
2505 }
2506
2507 uint32_t maxBitrate = U32_AT(&buffer[4]);
2508 uint32_t avgBitrate = U32_AT(&buffer[8]);
2509 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2510 AMediaFormat_setInt32(mLastTrack->meta,
2511 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2512 }
2513 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2514 AMediaFormat_setInt32(mLastTrack->meta,
2515 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2516 }
2517 break;
2518 }
2519
2520 case FOURCC("dfLa"):
2521 {
2522 *offset += chunk_size;
2523
2524 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
2525 // 4 for mediaType, 4 for blockType and BlockLen, 34 for metadata
2526 uint8_t flacInfo[4 + 4 + 34];
2527
2528 if (chunk_data_size != sizeof(flacInfo)) {
2529 return ERROR_MALFORMED;
2530 }
2531
2532 data_offset += 4;
2533 size_t flacOffset = 4;
2534 // Add flaC header mediaType to CSD
2535 strncpy((char *)flacInfo, "fLaC", 4);
2536
2537 ssize_t bytesToRead = sizeof(flacInfo) - flacOffset;
2538 if (mDataSource->readAt(
2539 data_offset, flacInfo + flacOffset, bytesToRead) < bytesToRead) {
2540 return ERROR_IO;
2541 }
2542
2543 data_offset += bytesToRead;
2544 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
2545 sizeof(flacInfo));
2546 break;
2547 }
2548
2549 case FOURCC("avcC"):
2550 {
2551 *offset += chunk_size;
2552
2553 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2554
2555 if (buffer.get() == NULL) {
2556 ALOGE("b/28471206");
2557 return NO_MEMORY;
2558 }
2559
2560 if (mDataSource->readAt(
2561 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2562 return ERROR_IO;
2563 }
2564
2565 if (mLastTrack == NULL)
2566 return ERROR_MALFORMED;
2567
2568 AMediaFormat_setBuffer(mLastTrack->meta,
2569 AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2570
2571 break;
2572 }
2573 case FOURCC("hvcC"):
2574 {
2575 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2576
2577 if (buffer.get() == NULL) {
2578 ALOGE("b/28471206");
2579 return NO_MEMORY;
2580 }
2581
2582 if (mDataSource->readAt(
2583 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2584 return ERROR_IO;
2585 }
2586
2587 if (mLastTrack == NULL)
2588 return ERROR_MALFORMED;
2589
2590 AMediaFormat_setBuffer(mLastTrack->meta,
2591 AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2592
2593 *offset += chunk_size;
2594 break;
2595 }
2596
2597 case FOURCC("vpcC"):
2598 case FOURCC("av1C"):
2599 {
2600 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2601
2602 if (buffer.get() == NULL) {
2603 ALOGE("b/28471206");
2604 return NO_MEMORY;
2605 }
2606
2607 if (mDataSource->readAt(
2608 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2609 return ERROR_IO;
2610 }
2611
2612 if (mLastTrack == NULL)
2613 return ERROR_MALFORMED;
2614
2615 AMediaFormat_setBuffer(mLastTrack->meta,
2616 AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2617
2618 *offset += chunk_size;
2619 break;
2620 }
2621
2622 case FOURCC("dvcC"):
2623 case FOURCC("dvvC"):
2624 case FOURCC("dvwC"):
2625 {
2626 if (chunk_data_size != 24) {
2627 return ERROR_MALFORMED;
2628 }
2629
2630 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2631
2632 if (buffer.get() == NULL) {
2633 ALOGE("b/28471206");
2634 return NO_MEMORY;
2635 }
2636
2637 if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2638 return ERROR_IO;
2639 }
2640
2641 if (mLastTrack == NULL)
2642 return ERROR_MALFORMED;
2643
2644 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
2645 buffer.get(), chunk_data_size);
2646 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME,
2647 MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
2648
2649 *offset += chunk_size;
2650 break;
2651 }
2652
2653 case FOURCC("d263"):
2654 {
2655 *offset += chunk_size;
2656 /*
2657 * d263 contains a fixed 7 bytes part:
2658 * vendor - 4 bytes
2659 * version - 1 byte
2660 * level - 1 byte
2661 * profile - 1 byte
2662 * optionally, "d263" box itself may contain a 16-byte
2663 * bit rate box (bitr)
2664 * average bit rate - 4 bytes
2665 * max bit rate - 4 bytes
2666 */
2667 char buffer[23];
2668 if (chunk_data_size != 7 &&
2669 chunk_data_size != 23) {
2670 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2671 return ERROR_MALFORMED;
2672 }
2673
2674 if (mDataSource->readAt(
2675 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2676 return ERROR_IO;
2677 }
2678
2679 if (mLastTrack == NULL)
2680 return ERROR_MALFORMED;
2681
2682 AMediaFormat_setBuffer(mLastTrack->meta,
2683 AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2684
2685 break;
2686 }
2687
2688 case FOURCC("meta"):
2689 {
2690 off64_t stop_offset = *offset + chunk_size;
2691 *offset = data_offset;
2692 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2693 if (!isParsingMetaKeys) {
2694 uint8_t buffer[4];
2695 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2696 *offset = stop_offset;
2697 return ERROR_MALFORMED;
2698 }
2699
2700 if (mDataSource->readAt(
2701 data_offset, buffer, 4) < 4) {
2702 *offset = stop_offset;
2703 return ERROR_IO;
2704 }
2705
2706 if (U32_AT(buffer) != 0) {
2707 // Should be version 0, flags 0.
2708
2709 // If it's not, let's assume this is one of those
2710 // apparently malformed chunks that don't have flags
2711 // and completely different semantics than what's
2712 // in the MPEG4 specs and skip it.
2713 *offset = stop_offset;
2714 return OK;
2715 }
2716 *offset += sizeof(buffer);
2717 }
2718
2719 while (*offset < stop_offset) {
2720 status_t err = parseChunk(offset, depth + 1);
2721 if (err != OK) {
2722 return err;
2723 }
2724 }
2725
2726 if (*offset != stop_offset) {
2727 return ERROR_MALFORMED;
2728 }
2729 break;
2730 }
2731
2732 case FOURCC("iloc"):
2733 case FOURCC("iinf"):
2734 case FOURCC("iprp"):
2735 case FOURCC("pitm"):
2736 case FOURCC("idat"):
2737 case FOURCC("iref"):
2738 case FOURCC("ipro"):
2739 {
2740 if (mIsHeif || mIsAvif) {
2741 if (mItemTable == NULL) {
2742 mItemTable = new ItemTable(mDataSource, mIsHeif);
2743 }
2744 status_t err = mItemTable->parse(
2745 chunk_type, data_offset, chunk_data_size);
2746 if (err != OK) {
2747 return err;
2748 }
2749 }
2750 *offset += chunk_size;
2751 break;
2752 }
2753
2754 case FOURCC("mean"):
2755 case FOURCC("name"):
2756 case FOURCC("data"):
2757 {
2758 *offset += chunk_size;
2759
2760 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2761 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2762
2763 if (err != OK) {
2764 return err;
2765 }
2766 }
2767
2768 break;
2769 }
2770
2771 case FOURCC("mvhd"):
2772 {
2773 *offset += chunk_size;
2774
2775 if (depth != 1) {
2776 ALOGE("mvhd: depth %d", depth);
2777 return ERROR_MALFORMED;
2778 }
2779 if (chunk_data_size < 32) {
2780 return ERROR_MALFORMED;
2781 }
2782
2783 uint8_t header[32];
2784 if (mDataSource->readAt(
2785 data_offset, header, sizeof(header))
2786 < (ssize_t)sizeof(header)) {
2787 return ERROR_IO;
2788 }
2789
2790 uint64_t creationTime;
2791 uint64_t duration = 0;
2792 if (header[0] == 1) {
2793 creationTime = U64_AT(&header[4]);
2794 mHeaderTimescale = U32_AT(&header[20]);
2795 duration = U64_AT(&header[24]);
2796 if (duration == 0xffffffffffffffff) {
2797 duration = 0;
2798 }
2799 } else if (header[0] != 0) {
2800 return ERROR_MALFORMED;
2801 } else {
2802 creationTime = U32_AT(&header[4]);
2803 mHeaderTimescale = U32_AT(&header[12]);
2804 uint32_t d32 = U32_AT(&header[16]);
2805 if (d32 == 0xffffffff) {
2806 d32 = 0;
2807 }
2808 duration = d32;
2809 }
2810 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2811 AMediaFormat_setInt64(mFileMetaData,
2812 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2813 }
2814
2815 String8 s;
2816 if (convertTimeToDate(creationTime, &s)) {
2817 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.string());
2818 }
2819
2820 break;
2821 }
2822
2823 case FOURCC("mehd"):
2824 {
2825 *offset += chunk_size;
2826
2827 if (chunk_data_size < 8) {
2828 return ERROR_MALFORMED;
2829 }
2830
2831 uint8_t flags[4];
2832 if (mDataSource->readAt(
2833 data_offset, flags, sizeof(flags))
2834 < (ssize_t)sizeof(flags)) {
2835 return ERROR_IO;
2836 }
2837
2838 uint64_t duration = 0;
2839 if (flags[0] == 1) {
2840 // 64 bit
2841 if (chunk_data_size < 12) {
2842 return ERROR_MALFORMED;
2843 }
2844 mDataSource->getUInt64(data_offset + 4, &duration);
2845 if (duration == 0xffffffffffffffff) {
2846 duration = 0;
2847 }
2848 } else if (flags[0] == 0) {
2849 // 32 bit
2850 uint32_t d32;
2851 mDataSource->getUInt32(data_offset + 4, &d32);
2852 if (d32 == 0xffffffff) {
2853 d32 = 0;
2854 }
2855 duration = d32;
2856 } else {
2857 return ERROR_MALFORMED;
2858 }
2859
2860 if (duration != 0 && mHeaderTimescale != 0) {
2861 AMediaFormat_setInt64(mFileMetaData,
2862 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2863 }
2864
2865 break;
2866 }
2867
2868 case FOURCC("mdat"):
2869 {
2870 mMdatFound = true;
2871
2872 *offset += chunk_size;
2873 break;
2874 }
2875
2876 case FOURCC("hdlr"):
2877 {
2878 *offset += chunk_size;
2879
2880 if (underQTMetaPath(mPath, 3)) {
2881 break;
2882 }
2883
2884 uint32_t buffer;
2885 if (mDataSource->readAt(
2886 data_offset + 8, &buffer, 4) < 4) {
2887 return ERROR_IO;
2888 }
2889
2890 uint32_t type = ntohl(buffer);
2891 // For the 3GPP file format, the handler-type within the 'hdlr' box
2892 // shall be 'text'. We also want to support 'sbtl' handler type
2893 // for a practical reason as various MPEG4 containers use it.
2894 if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2895 if (mLastTrack != NULL) {
2896 AMediaFormat_setString(mLastTrack->meta,
2897 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2898 }
2899 }
2900
2901 break;
2902 }
2903
2904 case FOURCC("keys"):
2905 {
2906 *offset += chunk_size;
2907
2908 if (underQTMetaPath(mPath, 3)) {
2909 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2910 if (err != OK) {
2911 return err;
2912 }
2913 }
2914 break;
2915 }
2916
2917 case FOURCC("trex"):
2918 {
2919 *offset += chunk_size;
2920
2921 if (chunk_data_size < 24) {
2922 return ERROR_IO;
2923 }
2924 Trex trex;
2925 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2926 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2927 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2928 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2929 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2930 return ERROR_IO;
2931 }
2932 mTrex.add(trex);
2933 break;
2934 }
2935
2936 case FOURCC("tx3g"):
2937 {
2938 if (mLastTrack == NULL)
2939 return ERROR_MALFORMED;
2940
2941 // complain about ridiculous chunks
2942 if (chunk_size > kMaxAtomSize) {
2943 return ERROR_MALFORMED;
2944 }
2945
2946 // complain about empty atoms
2947 if (chunk_data_size <= 0) {
2948 ALOGE("b/124330204");
2949 android_errorWriteLog(0x534e4554, "124330204");
2950 return ERROR_MALFORMED;
2951 }
2952
2953 // should fill buffer based on "data_offset" and "chunk_data_size"
2954 // instead of *offset and chunk_size;
2955 // but we've been feeding the extra data to consumers for multiple releases and
2956 // if those apps are compensating for it, we'd break them with such a change
2957 //
2958
2959 if (mLastTrack->mTx3gBuffer == NULL) {
2960 mLastTrack->mTx3gSize = 0;
2961 mLastTrack->mTx3gFilled = 0;
2962 }
2963 if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2964 size_t growth = kTx3gGrowth;
2965 if (growth < chunk_size) {
2966 growth = chunk_size;
2967 }
2968 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2969 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2970 ALOGE("b/124330204 - too much space");
2971 android_errorWriteLog(0x534e4554, "124330204");
2972 return ERROR_MALFORMED;
2973 }
2974 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2975 mLastTrack->mTx3gSize + growth);
2976 if (updated == NULL) {
2977 return ERROR_MALFORMED;
2978 }
2979 mLastTrack->mTx3gBuffer = updated;
2980 mLastTrack->mTx3gSize += growth;
2981 }
2982
2983 if ((size_t)(mDataSource->readAt(*offset,
2984 mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2985 chunk_size))
2986 < chunk_size) {
2987
2988 // advance read pointer so we don't end up reading this again
2989 *offset += chunk_size;
2990 return ERROR_IO;
2991 }
2992
2993 mLastTrack->mTx3gFilled += chunk_size;
2994 *offset += chunk_size;
2995 break;
2996 }
2997
2998 case FOURCC("covr"):
2999 {
3000 *offset += chunk_size;
3001
3002 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
3003 chunk_data_size, data_offset);
3004
3005 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
3006 return ERROR_MALFORMED;
3007 }
3008 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
3009 if (buffer.get() == NULL) {
3010 ALOGE("b/28471206");
3011 return NO_MEMORY;
3012 }
3013 if (mDataSource->readAt(
3014 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
3015 return ERROR_IO;
3016 }
3017 const int kSkipBytesOfDataBox = 16;
3018 if (chunk_data_size <= kSkipBytesOfDataBox) {
3019 return ERROR_MALFORMED;
3020 }
3021
3022 AMediaFormat_setBuffer(mFileMetaData,
3023 AMEDIAFORMAT_KEY_ALBUMART,
3024 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
3025
3026 break;
3027 }
3028
3029 case FOURCC("colr"):
3030 {
3031 *offset += chunk_size;
3032 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3033 // ignore otherwise
3034 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3035 status_t err = parseColorInfo(data_offset, chunk_data_size);
3036 if (err != OK) {
3037 return err;
3038 }
3039 }
3040
3041 break;
3042 }
3043
3044 case FOURCC("pasp"):
3045 {
3046 *offset += chunk_size;
3047 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3048 // ignore otherwise
3049 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3050 status_t err = parsePaspBox(data_offset, chunk_data_size);
3051 if (err != OK) {
3052 return err;
3053 }
3054 }
3055
3056 break;
3057 }
3058
3059 case FOURCC("titl"):
3060 case FOURCC("perf"):
3061 case FOURCC("auth"):
3062 case FOURCC("gnre"):
3063 case FOURCC("albm"):
3064 case FOURCC("yrrc"):
3065 {
3066 *offset += chunk_size;
3067
3068 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
3069
3070 if (err != OK) {
3071 return err;
3072 }
3073
3074 break;
3075 }
3076
3077 case FOURCC("ID32"):
3078 {
3079 *offset += chunk_size;
3080
3081 if (chunk_data_size < 6) {
3082 return ERROR_MALFORMED;
3083 }
3084
3085 parseID3v2MetaData(data_offset + 6, chunk_data_size - 6);
3086
3087 break;
3088 }
3089
3090 case FOURCC("----"):
3091 {
3092 mLastCommentMean.clear();
3093 mLastCommentName.clear();
3094 mLastCommentData.clear();
3095 *offset += chunk_size;
3096 break;
3097 }
3098
3099 case FOURCC("sidx"):
3100 {
3101 status_t err = parseSegmentIndex(data_offset, chunk_data_size);
3102 if (err != OK) {
3103 return err;
3104 }
3105 *offset += chunk_size;
3106 return UNKNOWN_ERROR; // stop parsing after sidx
3107 }
3108
3109 case FOURCC("ac-3"):
3110 {
3111 *offset += chunk_size;
3112 // bypass ac-3 if parse fail
3113 if (parseAC3SpecificBox(data_offset) != OK) {
3114 if (mLastTrack != NULL) {
3115 ALOGW("Fail to parse ac-3");
3116 mLastTrack->skipTrack = true;
3117 }
3118 }
3119 return OK;
3120 }
3121
3122 case FOURCC("ec-3"):
3123 {
3124 *offset += chunk_size;
3125 // bypass ec-3 if parse fail
3126 if (parseEAC3SpecificBox(data_offset) != OK) {
3127 if (mLastTrack != NULL) {
3128 ALOGW("Fail to parse ec-3");
3129 mLastTrack->skipTrack = true;
3130 }
3131 }
3132 return OK;
3133 }
3134
3135 case FOURCC("ac-4"):
3136 {
3137 *offset += chunk_size;
3138 // bypass ac-4 if parse fail
3139 if (parseAC4SpecificBox(data_offset) != OK) {
3140 if (mLastTrack != NULL) {
3141 ALOGW("Fail to parse ac-4");
3142 mLastTrack->skipTrack = true;
3143 }
3144 }
3145 return OK;
3146 }
3147
3148 case FOURCC("ftyp"):
3149 {
3150 if (chunk_data_size < 8 || depth != 0) {
3151 return ERROR_MALFORMED;
3152 }
3153
3154 off64_t stop_offset = *offset + chunk_size;
3155 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
3156 std::set<uint32_t> brandSet;
3157 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3158 if (i == 1) {
3159 // Skip this index, it refers to the minorVersion,
3160 // not a brand.
3161 continue;
3162 }
3163
3164 uint32_t brand;
3165 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
3166 return ERROR_MALFORMED;
3167 }
3168
3169 brand = ntohl(brand);
3170 brandSet.insert(brand);
3171 }
3172
3173 if (brandSet.count(FOURCC("qt ")) > 0) {
3174 mIsQT = true;
3175 } else {
3176 if (brandSet.count(FOURCC("mif1")) > 0
3177 && brandSet.count(FOURCC("heic")) > 0) {
3178 ALOGV("identified HEIF image");
3179
3180 mIsHeif = true;
3181 brandSet.erase(FOURCC("mif1"));
3182 brandSet.erase(FOURCC("heic"));
3183 } else if (brandSet.count(FOURCC("avif")) > 0 ||
3184 brandSet.count(FOURCC("avis")) > 0) {
3185 ALOGV("identified AVIF image");
3186 mIsAvif = true;
3187 brandSet.erase(FOURCC("avif"));
3188 brandSet.erase(FOURCC("avis"));
3189 }
3190
3191 if (!brandSet.empty()) {
3192 // This means that the file should have moov box.
3193 // It could be any iso files (mp4, heifs, etc.)
3194 mHasMoovBox = true;
3195 if (mIsHeif || mIsAvif) {
3196 ALOGV("identified %s image with other tracks", mIsHeif ? "HEIF" : "AVIF");
3197 }
3198 }
3199 }
3200
3201 *offset = stop_offset;
3202
3203 break;
3204 }
3205
3206 default:
3207 {
3208 // check if we're parsing 'ilst' for meta keys
3209 // if so, treat type as a number (key-id).
3210 if (underQTMetaPath(mPath, 3)) {
3211 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
3212 if (err != OK) {
3213 return err;
3214 }
3215 }
3216
3217 *offset += chunk_size;
3218 break;
3219 }
3220 }
3221
3222 return OK;
3223 }
3224
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)3225 status_t MPEG4Extractor::parseChannelCountSampleRate(
3226 off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
3227 // skip 16 bytes:
3228 // + 6-byte reserved,
3229 // + 2-byte data reference index,
3230 // + 8-byte reserved
3231 *offset += 16;
3232 if (!mDataSource->getUInt16(*offset, channelCount)) {
3233 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
3234 return ERROR_MALFORMED;
3235 }
3236 // skip 8 bytes:
3237 // + 2-byte channelCount,
3238 // + 2-byte sample size,
3239 // + 4-byte reserved
3240 *offset += 8;
3241 if (!mDataSource->getUInt16(*offset, sampleRate)) {
3242 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
3243 return ERROR_MALFORMED;
3244 }
3245 // skip 4 bytes:
3246 // + 2-byte sampleRate,
3247 // + 2-byte reserved
3248 *offset += 4;
3249 return OK;
3250 }
3251
parseAC4SpecificBox(off64_t offset)3252 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
3253 if (mLastTrack == NULL) {
3254 return ERROR_MALFORMED;
3255 }
3256
3257 uint16_t sampleRate, channelCount;
3258 status_t status;
3259 if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
3260 return status;
3261 }
3262 uint32_t size;
3263 // + 4-byte size
3264 // + 4-byte type
3265 // + 3-byte payload
3266 const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
3267 if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
3268 ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
3269 return ERROR_MALFORMED;
3270 }
3271
3272 // + 4-byte size
3273 offset += 4;
3274 uint32_t type;
3275 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
3276 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
3277 return ERROR_MALFORMED;
3278 }
3279
3280 // + 4-byte type
3281 offset += 4;
3282 const uint32_t kAC4SpecificBoxPayloadSize = 1176;
3283 uint8_t chunk[kAC4SpecificBoxPayloadSize];
3284 ssize_t dsiSize = size - 8; // size of box - size and type fields
3285 if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
3286 mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
3287 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
3288 return ERROR_MALFORMED;
3289 }
3290 // + size-byte payload
3291 offset += dsiSize;
3292 ABitReader br(chunk, dsiSize);
3293 AC4DSIParser parser(br);
3294 if (!parser.parse()){
3295 ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
3296 return ERROR_MALFORMED;
3297 }
3298
3299 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
3300 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3301 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3302
3303 AudioPresentationCollection presentations;
3304 // translate the AC4 presentation information to audio presentations for this track
3305 AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
3306 if (!ac4Presentations.empty()) {
3307 for (const auto& ac4Presentation : ac4Presentations) {
3308 auto& presentation = ac4Presentation.second;
3309 if (!presentation.mEnabled) {
3310 continue;
3311 }
3312 AudioPresentationV1 ap;
3313 ap.mPresentationId = presentation.mGroupIndex;
3314 ap.mProgramId = presentation.mProgramID;
3315 ap.mLanguage = presentation.mLanguage;
3316 if (presentation.mPreVirtualized) {
3317 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
3318 } else {
3319 switch (presentation.mChannelMode) {
3320 case AC4Parser::AC4Presentation::kChannelMode_Mono:
3321 case AC4Parser::AC4Presentation::kChannelMode_Stereo:
3322 ap.mMasteringIndication = MASTERED_FOR_STEREO;
3323 break;
3324 case AC4Parser::AC4Presentation::kChannelMode_3_0:
3325 case AC4Parser::AC4Presentation::kChannelMode_5_0:
3326 case AC4Parser::AC4Presentation::kChannelMode_5_1:
3327 case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
3328 case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
3329 case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
3330 case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
3331 ap.mMasteringIndication = MASTERED_FOR_SURROUND;
3332 break;
3333 case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
3334 case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
3335 case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
3336 case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
3337 case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
3338 case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
3339 case AC4Parser::AC4Presentation::kChannelMode_22_2:
3340 ap.mMasteringIndication = MASTERED_FOR_3D;
3341 break;
3342 default:
3343 ALOGE("Invalid channel mode in AC4 presentation");
3344 return ERROR_MALFORMED;
3345 }
3346 }
3347
3348 ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
3349 AC4Parser::AC4Presentation::kVisuallyImpaired);
3350 ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
3351 AC4Parser::AC4Presentation::kVoiceOver);
3352 ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
3353 if (!ap.mLanguage.empty()) {
3354 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
3355 }
3356 presentations.push_back(std::move(ap));
3357 }
3358 }
3359
3360 if (presentations.empty()) {
3361 // Clear audio presentation info in metadata.
3362 AMediaFormat_setBuffer(
3363 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3364 } else {
3365 std::ostringstream outStream(std::ios::out);
3366 serializeAudioPresentations(presentations, &outStream);
3367 AMediaFormat_setBuffer(
3368 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3369 outStream.str().data(), outStream.str().size());
3370 }
3371 return OK;
3372 }
3373
parseEAC3SpecificBox(off64_t offset)3374 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3375 if (mLastTrack == NULL) {
3376 return ERROR_MALFORMED;
3377 }
3378
3379 uint16_t sampleRate, channels;
3380 status_t status;
3381 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3382 return status;
3383 }
3384 uint32_t size;
3385 // + 4-byte size
3386 // + 4-byte type
3387 // + 3-byte payload
3388 const uint32_t kEAC3SpecificBoxMinSize = 11;
3389 // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3390 // calculated from the required bits read below as well as the maximum number of independent
3391 // and dependant sub streams you can have
3392 const uint32_t kEAC3SpecificBoxMaxSize = 152;
3393 if (!mDataSource->getUInt32(offset, &size) ||
3394 size < kEAC3SpecificBoxMinSize ||
3395 size > kEAC3SpecificBoxMaxSize) {
3396 ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3397 return ERROR_MALFORMED;
3398 }
3399
3400 offset += 4;
3401 uint32_t type;
3402 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3403 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3404 return ERROR_MALFORMED;
3405 }
3406
3407 offset += 4;
3408 uint8_t* chunk = new (std::nothrow) uint8_t[size];
3409 if (chunk == NULL) {
3410 return ERROR_MALFORMED;
3411 }
3412
3413 if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3414 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3415 delete[] chunk;
3416 return ERROR_MALFORMED;
3417 }
3418
3419 ABitReader br(chunk, size);
3420 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3421 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3422
3423 if (br.numBitsLeft() < 16) {
3424 delete[] chunk;
3425 return ERROR_MALFORMED;
3426 }
3427 unsigned data_rate = br.getBits(13);
3428 ALOGV("EAC3 data rate = %d", data_rate);
3429
3430 unsigned num_ind_sub = br.getBits(3) + 1;
3431 ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3432 if (br.numBitsLeft() < (num_ind_sub * 23)) {
3433 delete[] chunk;
3434 return ERROR_MALFORMED;
3435 }
3436
3437 unsigned channelCount = 0;
3438 for (unsigned i = 0; i < num_ind_sub; i++) {
3439 unsigned fscod = br.getBits(2);
3440 if (fscod == 3) {
3441 ALOGE("Incorrect fscod (3) in EAC3 header");
3442 delete[] chunk;
3443 return ERROR_MALFORMED;
3444 }
3445 unsigned boxSampleRate = sampleRateTable[fscod];
3446 if (boxSampleRate != sampleRate) {
3447 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3448 boxSampleRate, sampleRate);
3449 delete[] chunk;
3450 return ERROR_MALFORMED;
3451 }
3452
3453 unsigned bsid = br.getBits(5);
3454 if (bsid == 9 || bsid == 10) {
3455 ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3456 } else if (bsid > 16) {
3457 ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3458 delete[] chunk;
3459 return ERROR_MALFORMED;
3460 }
3461
3462 // skip
3463 br.skipBits(2);
3464 unsigned bsmod = br.getBits(3);
3465 unsigned acmod = br.getBits(3);
3466 unsigned lfeon = br.getBits(1);
3467 // we currently only support the first stream
3468 if (i == 0)
3469 channelCount = channelCountTable[acmod] + lfeon;
3470 ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3471
3472 br.skipBits(3);
3473 unsigned num_dep_sub = br.getBits(4);
3474 ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3475 if (num_dep_sub != 0) {
3476 if (br.numBitsLeft() < 9) {
3477 delete[] chunk;
3478 return ERROR_MALFORMED;
3479 }
3480 static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3481 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3482 unsigned chan_loc = br.getBits(9);
3483 unsigned mask = 1;
3484 for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3485 if ((chan_loc & mask) != 0) {
3486 // we currently only support the first stream
3487 if (i == 0) {
3488 channelCount++;
3489 // these are 2 channels in the mask
3490 if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3491 channelCount++;
3492 }
3493 }
3494 ALOGV(" %s", chan_loc_tbl[j]);
3495 }
3496 }
3497 } else {
3498 if (br.numBitsLeft() == 0) {
3499 delete[] chunk;
3500 return ERROR_MALFORMED;
3501 }
3502 br.skipBits(1);
3503 }
3504 }
3505
3506 if (br.numBitsLeft() != 0) {
3507 if (br.numBitsLeft() < 8) {
3508 delete[] chunk;
3509 return ERROR_MALFORMED;
3510 }
3511 unsigned mask = br.getBits(8);
3512 for (unsigned i = 0; i < 8; i++) {
3513 if (((0x1 << i) & mask) == 0)
3514 continue;
3515
3516 if (br.numBitsLeft() < 8) {
3517 delete[] chunk;
3518 return ERROR_MALFORMED;
3519 }
3520 switch (i) {
3521 case 0: {
3522 unsigned complexity = br.getBits(8);
3523 ALOGV("Found a JOC stream with complexity = %d", complexity);
3524 }break;
3525 default: {
3526 br.skipBits(8);
3527 }break;
3528 }
3529 }
3530 }
3531 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3532 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3533 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3534
3535 delete[] chunk;
3536 return OK;
3537 }
3538
parseAC3SpecificBox(off64_t offset)3539 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3540 if (mLastTrack == NULL) {
3541 return ERROR_MALFORMED;
3542 }
3543
3544 uint16_t sampleRate, channels;
3545 status_t status;
3546 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3547 return status;
3548 }
3549 uint32_t size;
3550 // + 4-byte size
3551 // + 4-byte type
3552 // + 3-byte payload
3553 const uint32_t kAC3SpecificBoxSize = 11;
3554 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3555 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3556 return ERROR_MALFORMED;
3557 }
3558
3559 offset += 4;
3560 uint32_t type;
3561 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3562 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3563 return ERROR_MALFORMED;
3564 }
3565
3566 offset += 4;
3567 const uint32_t kAC3SpecificBoxPayloadSize = 3;
3568 uint8_t chunk[kAC3SpecificBoxPayloadSize];
3569 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3570 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3571 return ERROR_MALFORMED;
3572 }
3573
3574 ABitReader br(chunk, sizeof(chunk));
3575 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3576 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3577
3578 unsigned fscod = br.getBits(2);
3579 if (fscod == 3) {
3580 ALOGE("Incorrect fscod (3) in AC3 header");
3581 return ERROR_MALFORMED;
3582 }
3583 unsigned boxSampleRate = sampleRateTable[fscod];
3584 if (boxSampleRate != sampleRate) {
3585 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3586 boxSampleRate, sampleRate);
3587 return ERROR_MALFORMED;
3588 }
3589
3590 unsigned bsid = br.getBits(5);
3591 if (bsid > 8) {
3592 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3593 return ERROR_MALFORMED;
3594 }
3595
3596 // skip
3597 br.skipBits(3); // bsmod
3598
3599 unsigned acmod = br.getBits(3);
3600 unsigned lfeon = br.getBits(1);
3601 unsigned channelCount = channelCountTable[acmod] + lfeon;
3602
3603 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3604 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3605 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3606 return OK;
3607 }
3608
parseALACSampleEntry(off64_t * offset)3609 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3610 // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3611 // Store ALAC magic cookie (decoder needs it).
3612 uint8_t alacInfo[12];
3613 off64_t data_offset = *offset;
3614
3615 if (mDataSource->readAt(
3616 data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3617 return ERROR_IO;
3618 }
3619 uint32_t size = U32_AT(&alacInfo[0]);
3620 if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3621 (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3622 (U32_AT(&alacInfo[8]) != 0)) {
3623 ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3624 size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3625 return ERROR_MALFORMED;
3626 }
3627 data_offset += sizeof(alacInfo);
3628 uint8_t cookie[size - sizeof(alacInfo)];
3629 if (mDataSource->readAt(
3630 data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3631 return ERROR_IO;
3632 }
3633
3634 uint8_t bitsPerSample = cookie[5];
3635 AMediaFormat_setInt32(mLastTrack->meta,
3636 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3637 AMediaFormat_setInt32(mLastTrack->meta,
3638 AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3639 AMediaFormat_setInt32(mLastTrack->meta,
3640 AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3641 AMediaFormat_setBuffer(mLastTrack->meta,
3642 AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3643 data_offset += sizeof(cookie);
3644 *offset = data_offset;
3645 return OK;
3646 }
3647
parseSegmentIndex(off64_t offset,size_t size)3648 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3649 ALOGV("MPEG4Extractor::parseSegmentIndex");
3650
3651 if (size < 12) {
3652 return -EINVAL;
3653 }
3654
3655 uint32_t flags;
3656 if (!mDataSource->getUInt32(offset, &flags)) {
3657 return ERROR_MALFORMED;
3658 }
3659
3660 uint32_t version = flags >> 24;
3661 flags &= 0xffffff;
3662
3663 ALOGV("sidx version %d", version);
3664
3665 uint32_t referenceId;
3666 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3667 return ERROR_MALFORMED;
3668 }
3669
3670 uint32_t timeScale;
3671 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3672 return ERROR_MALFORMED;
3673 }
3674 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3675 if (timeScale == 0)
3676 return ERROR_MALFORMED;
3677
3678 uint64_t earliestPresentationTime;
3679 uint64_t firstOffset;
3680
3681 offset += 12;
3682 size -= 12;
3683
3684 if (version == 0) {
3685 if (size < 8) {
3686 return -EINVAL;
3687 }
3688 uint32_t tmp;
3689 if (!mDataSource->getUInt32(offset, &tmp)) {
3690 return ERROR_MALFORMED;
3691 }
3692 earliestPresentationTime = tmp;
3693 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3694 return ERROR_MALFORMED;
3695 }
3696 firstOffset = tmp;
3697 offset += 8;
3698 size -= 8;
3699 } else {
3700 if (size < 16) {
3701 return -EINVAL;
3702 }
3703 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3704 return ERROR_MALFORMED;
3705 }
3706 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3707 return ERROR_MALFORMED;
3708 }
3709 offset += 16;
3710 size -= 16;
3711 }
3712 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3713
3714 if (size < 4) {
3715 return -EINVAL;
3716 }
3717
3718 uint16_t referenceCount;
3719 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3720 return ERROR_MALFORMED;
3721 }
3722 offset += 4;
3723 size -= 4;
3724 ALOGV("refcount: %d", referenceCount);
3725
3726 if (size < referenceCount * 12) {
3727 return -EINVAL;
3728 }
3729
3730 uint64_t total_duration = 0;
3731 for (unsigned int i = 0; i < referenceCount; i++) {
3732 uint32_t d1, d2, d3;
3733
3734 if (!mDataSource->getUInt32(offset, &d1) || // size
3735 !mDataSource->getUInt32(offset + 4, &d2) || // duration
3736 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3737 return ERROR_MALFORMED;
3738 }
3739
3740 if (d1 & 0x80000000) {
3741 ALOGW("sub-sidx boxes not supported yet");
3742 }
3743 bool sap = d3 & 0x80000000;
3744 uint32_t saptype = (d3 >> 28) & 7;
3745 if (!sap || (saptype != 1 && saptype != 2)) {
3746 // type 1 and 2 are sync samples
3747 ALOGW("not a stream access point, or unsupported type: %08x", d3);
3748 }
3749 total_duration += d2;
3750 offset += 12;
3751 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3752 SidxEntry se;
3753 se.mSize = d1 & 0x7fffffff;
3754 se.mDurationUs = 1000000LL * d2 / timeScale;
3755 mSidxEntries.add(se);
3756 }
3757
3758 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3759
3760 if (mLastTrack == NULL)
3761 return ERROR_MALFORMED;
3762
3763 int64_t metaDuration;
3764 if (!AMediaFormat_getInt64(mLastTrack->meta,
3765 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3766 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3767 }
3768 return OK;
3769 }
3770
parseQTMetaKey(off64_t offset,size_t size)3771 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3772 if (size < 8) {
3773 return ERROR_MALFORMED;
3774 }
3775
3776 uint32_t count;
3777 if (!mDataSource->getUInt32(offset + 4, &count)) {
3778 return ERROR_MALFORMED;
3779 }
3780
3781 if (mMetaKeyMap.size() > 0) {
3782 ALOGW("'keys' atom seen again, discarding existing entries");
3783 mMetaKeyMap.clear();
3784 }
3785
3786 off64_t keyOffset = offset + 8;
3787 off64_t stopOffset = offset + size;
3788 for (size_t i = 1; i <= count; i++) {
3789 if (keyOffset + 8 > stopOffset) {
3790 return ERROR_MALFORMED;
3791 }
3792
3793 uint32_t keySize;
3794 if (!mDataSource->getUInt32(keyOffset, &keySize)
3795 || keySize < 8
3796 || keyOffset + keySize > stopOffset) {
3797 return ERROR_MALFORMED;
3798 }
3799
3800 uint32_t type;
3801 if (!mDataSource->getUInt32(keyOffset + 4, &type)
3802 || type != FOURCC("mdta")) {
3803 return ERROR_MALFORMED;
3804 }
3805
3806 keySize -= 8;
3807 keyOffset += 8;
3808
3809 auto keyData = heapbuffer<uint8_t>(keySize);
3810 if (keyData.get() == NULL) {
3811 return ERROR_MALFORMED;
3812 }
3813 if (mDataSource->readAt(
3814 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3815 return ERROR_MALFORMED;
3816 }
3817
3818 AString key((const char *)keyData.get(), keySize);
3819 mMetaKeyMap.add(i, key);
3820
3821 keyOffset += keySize;
3822 }
3823 return OK;
3824 }
3825
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3826 status_t MPEG4Extractor::parseQTMetaVal(
3827 int32_t keyId, off64_t offset, size_t size) {
3828 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3829 if (index < 0) {
3830 // corresponding key is not present, ignore
3831 return ERROR_MALFORMED;
3832 }
3833
3834 if (size <= 16) {
3835 return ERROR_MALFORMED;
3836 }
3837 uint32_t dataSize;
3838 if (!mDataSource->getUInt32(offset, &dataSize)
3839 || dataSize > size || dataSize <= 16) {
3840 return ERROR_MALFORMED;
3841 }
3842 uint32_t atomFourCC;
3843 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3844 || atomFourCC != FOURCC("data")) {
3845 return ERROR_MALFORMED;
3846 }
3847 uint32_t dataType;
3848 if (!mDataSource->getUInt32(offset + 8, &dataType)
3849 || ((dataType & 0xff000000) != 0)) {
3850 // not well-known type
3851 return ERROR_MALFORMED;
3852 }
3853
3854 dataSize -= 16;
3855 offset += 16;
3856
3857 if (dataType == 23 && dataSize >= 4) {
3858 // BE Float32
3859 uint32_t val;
3860 if (!mDataSource->getUInt32(offset, &val)) {
3861 return ERROR_MALFORMED;
3862 }
3863 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3864 AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3865 }
3866 } else if (dataType == 67 && dataSize >= 4) {
3867 // BE signed int32
3868 uint32_t val;
3869 if (!mDataSource->getUInt32(offset, &val)) {
3870 return ERROR_MALFORMED;
3871 }
3872 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3873 AMediaFormat_setInt32(mFileMetaData,
3874 AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3875 }
3876 } else {
3877 // add more keys if needed
3878 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3879 }
3880
3881 return OK;
3882 }
3883
parseTrackHeader(off64_t data_offset,off64_t data_size)3884 status_t MPEG4Extractor::parseTrackHeader(
3885 off64_t data_offset, off64_t data_size) {
3886 if (data_size < 4) {
3887 return ERROR_MALFORMED;
3888 }
3889
3890 uint8_t version;
3891 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3892 return ERROR_IO;
3893 }
3894
3895 size_t dynSize = (version == 1) ? 36 : 24;
3896
3897 uint8_t buffer[36 + 60];
3898
3899 if (data_size != (off64_t)dynSize + 60) {
3900 return ERROR_MALFORMED;
3901 }
3902
3903 if (mDataSource->readAt(
3904 data_offset, buffer, data_size) < (ssize_t)data_size) {
3905 return ERROR_IO;
3906 }
3907
3908 int32_t id;
3909
3910 if (version == 1) {
3911 // we can get ctime value from U64_AT(&buffer[4])
3912 // we can get mtime value from U64_AT(&buffer[12])
3913 id = U32_AT(&buffer[20]);
3914 // we can get duration value from U64_AT(&buffer[28])
3915 } else if (version == 0) {
3916 // we can get ctime value from U32_AT(&buffer[4])
3917 // we can get mtime value from U32_AT(&buffer[8])
3918 id = U32_AT(&buffer[12]);
3919 // we can get duration value from U32_AT(&buffer[20])
3920 } else {
3921 return ERROR_UNSUPPORTED;
3922 }
3923
3924 if (mLastTrack == NULL)
3925 return ERROR_MALFORMED;
3926
3927 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3928
3929 size_t matrixOffset = dynSize + 16;
3930 int32_t a00 = U32_AT(&buffer[matrixOffset]);
3931 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3932 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3933 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3934
3935 #if 0
3936 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3937 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3938
3939 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3940 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3941 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3942 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3943 #endif
3944
3945 uint32_t rotationDegrees;
3946
3947 static const int32_t kFixedOne = 0x10000;
3948 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3949 // Identity, no rotation
3950 rotationDegrees = 0;
3951 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3952 rotationDegrees = 90;
3953 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3954 rotationDegrees = 270;
3955 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3956 rotationDegrees = 180;
3957 } else {
3958 ALOGW("We only support 0,90,180,270 degree rotation matrices");
3959 rotationDegrees = 0;
3960 }
3961
3962 if (rotationDegrees != 0) {
3963 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3964 }
3965
3966 // Handle presentation display size, which could be different
3967 // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3968 uint32_t width = U32_AT(&buffer[dynSize + 52]);
3969 uint32_t height = U32_AT(&buffer[dynSize + 56]);
3970 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3971 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3972
3973 return OK;
3974 }
3975
parseITunesMetaData(off64_t offset,size_t size)3976 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3977 if (size == 0) {
3978 return OK;
3979 }
3980
3981 if (size < 4 || size == SIZE_MAX) {
3982 return ERROR_MALFORMED;
3983 }
3984
3985 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3986 if (buffer == NULL) {
3987 return ERROR_MALFORMED;
3988 }
3989 if (mDataSource->readAt(
3990 offset, buffer, size) != (ssize_t)size) {
3991 delete[] buffer;
3992 buffer = NULL;
3993
3994 return ERROR_IO;
3995 }
3996
3997 uint32_t flags = U32_AT(buffer);
3998
3999 const char *metadataKey = nullptr;
4000 char chunk[5];
4001 MakeFourCCString(mPath[4], chunk);
4002 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
4003 switch ((int32_t)mPath[4]) {
4004 case FOURCC("\251alb"):
4005 {
4006 metadataKey = AMEDIAFORMAT_KEY_ALBUM;
4007 break;
4008 }
4009 case FOURCC("\251ART"):
4010 {
4011 metadataKey = AMEDIAFORMAT_KEY_ARTIST;
4012 break;
4013 }
4014 case FOURCC("aART"):
4015 {
4016 metadataKey = AMEDIAFORMAT_KEY_ALBUMARTIST;
4017 break;
4018 }
4019 case FOURCC("\251day"):
4020 {
4021 metadataKey = AMEDIAFORMAT_KEY_YEAR;
4022 break;
4023 }
4024 case FOURCC("\251nam"):
4025 {
4026 metadataKey = AMEDIAFORMAT_KEY_TITLE;
4027 break;
4028 }
4029 case FOURCC("\251wrt"):
4030 {
4031 // various open source taggers agree that the "©wrt" tag is for composer, not writer
4032 metadataKey = AMEDIAFORMAT_KEY_COMPOSER;
4033 break;
4034 }
4035 case FOURCC("covr"):
4036 {
4037 metadataKey = AMEDIAFORMAT_KEY_ALBUMART;
4038 break;
4039 }
4040 case FOURCC("gnre"):
4041 case FOURCC("\251gen"):
4042 {
4043 metadataKey = AMEDIAFORMAT_KEY_GENRE;
4044 break;
4045 }
4046 case FOURCC("cpil"):
4047 {
4048 if (size == 9 && flags == 21) {
4049 char tmp[16];
4050 sprintf(tmp, "%d",
4051 (int)buffer[size - 1]);
4052
4053 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
4054 }
4055 break;
4056 }
4057 case FOURCC("trkn"):
4058 {
4059 if (size == 16 && flags == 0) {
4060 char tmp[16];
4061 uint16_t* pTrack = (uint16_t*)&buffer[10];
4062 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
4063 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
4064
4065 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4066 }
4067 break;
4068 }
4069 case FOURCC("disk"):
4070 {
4071 if ((size == 14 || size == 16) && flags == 0) {
4072 char tmp[16];
4073 uint16_t* pDisc = (uint16_t*)&buffer[10];
4074 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
4075 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
4076
4077 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
4078 }
4079 break;
4080 }
4081 case FOURCC("----"):
4082 {
4083 buffer[size] = '\0';
4084 switch (mPath[5]) {
4085 case FOURCC("mean"):
4086 mLastCommentMean.setTo((const char *)buffer + 4);
4087 break;
4088 case FOURCC("name"):
4089 mLastCommentName.setTo((const char *)buffer + 4);
4090 break;
4091 case FOURCC("data"):
4092 if (size < 8) {
4093 delete[] buffer;
4094 buffer = NULL;
4095 ALOGE("b/24346430");
4096 return ERROR_MALFORMED;
4097 }
4098 mLastCommentData.setTo((const char *)buffer + 8);
4099 break;
4100 }
4101
4102 // Once we have a set of mean/name/data info, go ahead and process
4103 // it to see if its something we are interested in. Whether or not
4104 // were are interested in the specific tag, make sure to clear out
4105 // the set so we can be ready to process another tuple should one
4106 // show up later in the file.
4107 if ((mLastCommentMean.length() != 0) &&
4108 (mLastCommentName.length() != 0) &&
4109 (mLastCommentData.length() != 0)) {
4110
4111 if (mLastCommentMean == "com.apple.iTunes"
4112 && mLastCommentName == "iTunSMPB") {
4113 int32_t delay, padding;
4114 if (sscanf(mLastCommentData,
4115 " %*x %x %x %*x", &delay, &padding) == 2) {
4116 if (mLastTrack == NULL) {
4117 delete[] buffer;
4118 return ERROR_MALFORMED;
4119 }
4120
4121 AMediaFormat_setInt32(mLastTrack->meta,
4122 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
4123 AMediaFormat_setInt32(mLastTrack->meta,
4124 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
4125 }
4126 }
4127
4128 mLastCommentMean.clear();
4129 mLastCommentName.clear();
4130 mLastCommentData.clear();
4131 }
4132 break;
4133 }
4134
4135 default:
4136 break;
4137 }
4138
4139 void *tmpData;
4140 size_t tmpDataSize;
4141 const char *s;
4142 if (size >= 8 && metadataKey &&
4143 !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
4144 !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
4145 if (!strcmp(metadataKey, "albumart")) {
4146 AMediaFormat_setBuffer(mFileMetaData, metadataKey,
4147 buffer + 8, size - 8);
4148 } else if (!strcmp(metadataKey, AMEDIAFORMAT_KEY_GENRE)) {
4149 if (flags == 0) {
4150 // uint8_t genre code, iTunes genre codes are
4151 // the standard id3 codes, except they start
4152 // at 1 instead of 0 (e.g. Pop is 14, not 13)
4153 // We use standard id3 numbering, so subtract 1.
4154 int genrecode = (int)buffer[size - 1];
4155 genrecode--;
4156 if (genrecode < 0) {
4157 genrecode = 255; // reserved for 'unknown genre'
4158 }
4159 char genre[10];
4160 sprintf(genre, "%d", genrecode);
4161
4162 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
4163 } else if (flags == 1) {
4164 // custom genre string
4165 buffer[size] = '\0';
4166
4167 AMediaFormat_setString(mFileMetaData,
4168 metadataKey, (const char *)buffer + 8);
4169 }
4170 } else {
4171 buffer[size] = '\0';
4172
4173 AMediaFormat_setString(mFileMetaData,
4174 metadataKey, (const char *)buffer + 8);
4175 }
4176 }
4177
4178 delete[] buffer;
4179 buffer = NULL;
4180
4181 return OK;
4182 }
4183
parseColorInfo(off64_t offset,size_t size)4184 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
4185 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
4186 return ERROR_MALFORMED;
4187 }
4188
4189 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4190 if (buffer == NULL) {
4191 return ERROR_MALFORMED;
4192 }
4193 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4194 delete[] buffer;
4195 buffer = NULL;
4196
4197 return ERROR_IO;
4198 }
4199
4200 int32_t type = U32_AT(&buffer[0]);
4201 if ((type == FOURCC("nclx") && size >= 11)
4202 || (type == FOURCC("nclc") && size >= 10)) {
4203 // only store the first color specification
4204 int32_t existingColor;
4205 if (!AMediaFormat_getInt32(mLastTrack->meta,
4206 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
4207 int32_t primaries = U16_AT(&buffer[4]);
4208 int32_t isotransfer = U16_AT(&buffer[6]);
4209 int32_t coeffs = U16_AT(&buffer[8]);
4210 bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
4211
4212 int32_t range = 0;
4213 int32_t standard = 0;
4214 int32_t transfer = 0;
4215 ColorUtils::convertIsoColorAspectsToPlatformAspects(
4216 primaries, isotransfer, coeffs, fullRange,
4217 &range, &standard, &transfer);
4218
4219 if (range != 0) {
4220 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
4221 }
4222 if (standard != 0) {
4223 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
4224 }
4225 if (transfer != 0) {
4226 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
4227 }
4228 }
4229 }
4230
4231 delete[] buffer;
4232 buffer = NULL;
4233
4234 return OK;
4235 }
4236
parsePaspBox(off64_t offset,size_t size)4237 status_t MPEG4Extractor::parsePaspBox(off64_t offset, size_t size) {
4238 if (size < 8 || size == SIZE_MAX || mLastTrack == NULL) {
4239 return ERROR_MALFORMED;
4240 }
4241
4242 uint32_t data[2]; // hSpacing, vSpacing
4243 if (mDataSource->readAt(offset, data, 8) < 8) {
4244 return ERROR_IO;
4245 }
4246 uint32_t hSpacing = ntohl(data[0]);
4247 uint32_t vSpacing = ntohl(data[1]);
4248
4249 if (hSpacing != 0 && vSpacing != 0) {
4250 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_WIDTH, hSpacing);
4251 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_HEIGHT, vSpacing);
4252 }
4253
4254 return OK;
4255 }
4256
parse3GPPMetaData(off64_t offset,size_t size,int depth)4257 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
4258 if (size < 4 || size == SIZE_MAX) {
4259 return ERROR_MALFORMED;
4260 }
4261
4262 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4263 if (buffer == NULL) {
4264 return ERROR_MALFORMED;
4265 }
4266 if (mDataSource->readAt(
4267 offset, buffer, size) != (ssize_t)size) {
4268 delete[] buffer;
4269 buffer = NULL;
4270
4271 return ERROR_IO;
4272 }
4273
4274 const char *metadataKey = nullptr;
4275 switch (mPath[depth]) {
4276 case FOURCC("titl"):
4277 {
4278 metadataKey = "title";
4279 break;
4280 }
4281 case FOURCC("perf"):
4282 {
4283 metadataKey = "artist";
4284 break;
4285 }
4286 case FOURCC("auth"):
4287 {
4288 metadataKey = "writer";
4289 break;
4290 }
4291 case FOURCC("gnre"):
4292 {
4293 metadataKey = "genre";
4294 break;
4295 }
4296 case FOURCC("albm"):
4297 {
4298 if (buffer[size - 1] != '\0') {
4299 char tmp[4];
4300 sprintf(tmp, "%u", buffer[size - 1]);
4301
4302 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4303 }
4304
4305 metadataKey = "album";
4306 break;
4307 }
4308 case FOURCC("yrrc"):
4309 {
4310 if (size < 6) {
4311 delete[] buffer;
4312 buffer = NULL;
4313 ALOGE("b/62133227");
4314 android_errorWriteLog(0x534e4554, "62133227");
4315 return ERROR_MALFORMED;
4316 }
4317 char tmp[5];
4318 uint16_t year = U16_AT(&buffer[4]);
4319
4320 if (year < 10000) {
4321 sprintf(tmp, "%u", year);
4322
4323 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
4324 }
4325 break;
4326 }
4327
4328 default:
4329 break;
4330 }
4331
4332 if (metadataKey) {
4333 bool isUTF8 = true; // Common case
4334 char16_t *framedata = NULL;
4335 int len16 = 0; // Number of UTF-16 characters
4336
4337 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
4338 if (size < 6) {
4339 delete[] buffer;
4340 buffer = NULL;
4341 return ERROR_MALFORMED;
4342 }
4343
4344 if (size - 6 >= 4) {
4345 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
4346 framedata = (char16_t *)(buffer + 6);
4347 if (0xfffe == *framedata) {
4348 // endianness marker (BOM) doesn't match host endianness
4349 for (int i = 0; i < len16; i++) {
4350 framedata[i] = bswap_16(framedata[i]);
4351 }
4352 // BOM is now swapped to 0xfeff, we will execute next block too
4353 }
4354
4355 if (0xfeff == *framedata) {
4356 // Remove the BOM
4357 framedata++;
4358 len16--;
4359 isUTF8 = false;
4360 }
4361 // else normal non-zero-length UTF-8 string
4362 // we can't handle UTF-16 without BOM as there is no other
4363 // indication of encoding.
4364 }
4365
4366 if (isUTF8) {
4367 buffer[size] = 0;
4368 AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
4369 } else {
4370 // Convert from UTF-16 string to UTF-8 string.
4371 String8 tmpUTF8str(framedata, len16);
4372 AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.string());
4373 }
4374 }
4375
4376 delete[] buffer;
4377 buffer = NULL;
4378
4379 return OK;
4380 }
4381
parseID3v2MetaData(off64_t offset,uint64_t size)4382 void MPEG4Extractor::parseID3v2MetaData(off64_t offset, uint64_t size) {
4383 uint8_t *buffer = new (std::nothrow) uint8_t[size];
4384 if (buffer == NULL) {
4385 return;
4386 }
4387 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4388 delete[] buffer;
4389 buffer = NULL;
4390 return;
4391 }
4392
4393 ID3 id3(buffer, size, true /* ignorev1 */);
4394 delete[] buffer;
4395
4396 if (id3.isValid()) {
4397 struct Map {
4398 const char *key;
4399 const char *tag1;
4400 const char *tag2;
4401 };
4402 static const Map kMap[] = {
4403 { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4404 { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4405 { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4406 { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4407 { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4408 { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4409 { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4410 { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4411 { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4412 { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4413 { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4414 };
4415 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4416
4417 for (size_t i = 0; i < kNumMapEntries; ++i) {
4418 const char *ss;
4419 if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4420 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4421 if (it->done()) {
4422 delete it;
4423 it = new ID3::Iterator(id3, kMap[i].tag2);
4424 }
4425
4426 if (it->done()) {
4427 delete it;
4428 continue;
4429 }
4430
4431 String8 s;
4432 it->getString(&s);
4433 delete it;
4434
4435 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4436 }
4437 }
4438
4439 size_t dataSize;
4440 String8 mime;
4441 const void *data = id3.getAlbumArt(&dataSize, &mime);
4442
4443 if (data) {
4444 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4445 }
4446 }
4447 }
4448
getTrack(size_t index)4449 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4450 status_t err;
4451 if ((err = readMetaData()) != OK) {
4452 return NULL;
4453 }
4454
4455 Track *track = mFirstTrack;
4456 while (index > 0) {
4457 if (track == NULL) {
4458 return NULL;
4459 }
4460
4461 track = track->next;
4462 --index;
4463 }
4464
4465 if (track == NULL) {
4466 return NULL;
4467 }
4468
4469
4470 Trex *trex = NULL;
4471 int32_t trackId;
4472 if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4473 for (size_t i = 0; i < mTrex.size(); i++) {
4474 Trex *t = &mTrex.editItemAt(i);
4475 if (t->track_ID == (uint32_t) trackId) {
4476 trex = t;
4477 break;
4478 }
4479 }
4480 } else {
4481 ALOGE("b/21657957");
4482 return NULL;
4483 }
4484
4485 ALOGV("getTrack called, pssh: %zu", mPssh.size());
4486
4487 const char *mime;
4488 if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4489 return NULL;
4490 }
4491 sp<ItemTable> itemTable;
4492 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4493 void *data;
4494 size_t size;
4495 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4496 return NULL;
4497 }
4498
4499 const uint8_t *ptr = (const uint8_t *)data;
4500
4501 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
4502 return NULL;
4503 }
4504 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4505 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4506 void *data;
4507 size_t size;
4508 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4509 return NULL;
4510 }
4511
4512 const uint8_t *ptr = (const uint8_t *)data;
4513
4514 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
4515 return NULL;
4516 }
4517 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4518 itemTable = mItemTable;
4519 }
4520 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4521 void *data;
4522 size_t size;
4523 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)
4524 || size != 24) {
4525 return NULL;
4526 }
4527
4528 const uint8_t *ptr = (const uint8_t *)data;
4529 // dv_major.dv_minor Should be 1.0 or 2.1
4530 if ((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)) {
4531 return NULL;
4532 }
4533 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)
4534 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4535 void *data;
4536 size_t size;
4537 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4538 return NULL;
4539 }
4540
4541 const uint8_t *ptr = (const uint8_t *)data;
4542
4543 if (size < 4 || ptr[0] != 0x81) { // configurationVersion == 1
4544 return NULL;
4545 }
4546 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4547 itemTable = mItemTable;
4548 }
4549 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4550 void *data;
4551 size_t size;
4552 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4553 return NULL;
4554 }
4555
4556 const uint8_t *ptr = (const uint8_t *)data;
4557
4558 if (size < 5 || ptr[0] != 0x01) { // configurationVersion == 1
4559 return NULL;
4560 }
4561 }
4562
4563 ALOGV("track->elst_shift_start_ticks :%" PRIu64, track->elst_shift_start_ticks);
4564
4565 uint64_t elst_initial_empty_edit_ticks = 0;
4566 if (mHeaderTimescale != 0) {
4567 // Convert empty_edit_ticks from movie timescale to media timescale.
4568 uint64_t elst_initial_empty_edit_ticks_mul = 0, elst_initial_empty_edit_ticks_add = 0;
4569 if (__builtin_mul_overflow(track->elst_initial_empty_edit_ticks, track->timescale,
4570 &elst_initial_empty_edit_ticks_mul) ||
4571 __builtin_add_overflow(elst_initial_empty_edit_ticks_mul, (mHeaderTimescale / 2),
4572 &elst_initial_empty_edit_ticks_add)) {
4573 ALOGE("track->elst_initial_empty_edit_ticks overflow");
4574 return nullptr;
4575 }
4576 elst_initial_empty_edit_ticks = elst_initial_empty_edit_ticks_add / mHeaderTimescale;
4577 }
4578 ALOGV("elst_initial_empty_edit_ticks in MediaTimeScale :%" PRIu64,
4579 elst_initial_empty_edit_ticks);
4580
4581 MPEG4Source* source =
4582 new MPEG4Source(track->meta, mDataSource, track->timescale, track->sampleTable,
4583 mSidxEntries, trex, mMoofOffset, itemTable,
4584 track->elst_shift_start_ticks, elst_initial_empty_edit_ticks);
4585 if (source->init() != OK) {
4586 delete source;
4587 return NULL;
4588 }
4589 return source;
4590 }
4591
4592 // static
verifyTrack(Track * track)4593 status_t MPEG4Extractor::verifyTrack(Track *track) {
4594 const char *mime;
4595 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4596
4597 void *data;
4598 size_t size;
4599 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4600 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4601 return ERROR_MALFORMED;
4602 }
4603 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4604 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4605 return ERROR_MALFORMED;
4606 }
4607 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4608 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4609 return ERROR_MALFORMED;
4610 }
4611 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4612 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4613 return ERROR_MALFORMED;
4614 }
4615 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4616 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4617 return ERROR_MALFORMED;
4618 }
4619 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4620 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4621 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4622 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4623 return ERROR_MALFORMED;
4624 }
4625 }
4626
4627 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4628 // Make sure we have all the metadata we need.
4629 ALOGE("stbl atom missing/invalid.");
4630 return ERROR_MALFORMED;
4631 }
4632
4633 if (track->timescale == 0) {
4634 ALOGE("timescale invalid.");
4635 return ERROR_MALFORMED;
4636 }
4637
4638 return OK;
4639 }
4640
4641 typedef enum {
4642 //AOT_NONE = -1,
4643 //AOT_NULL_OBJECT = 0,
4644 //AOT_AAC_MAIN = 1, /**< Main profile */
4645 AOT_AAC_LC = 2, /**< Low Complexity object */
4646 //AOT_AAC_SSR = 3,
4647 //AOT_AAC_LTP = 4,
4648 AOT_SBR = 5,
4649 //AOT_AAC_SCAL = 6,
4650 //AOT_TWIN_VQ = 7,
4651 //AOT_CELP = 8,
4652 //AOT_HVXC = 9,
4653 //AOT_RSVD_10 = 10, /**< (reserved) */
4654 //AOT_RSVD_11 = 11, /**< (reserved) */
4655 //AOT_TTSI = 12, /**< TTSI Object */
4656 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
4657 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
4658 //AOT_GEN_MIDI = 15, /**< General MIDI object */
4659 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4660 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
4661 //AOT_RSVD_18 = 18, /**< (reserved) */
4662 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
4663 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
4664 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
4665 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
4666 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
4667 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
4668 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
4669 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
4670 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
4671 //AOT_RSVD_28 = 28, /**< might become SSC */
4672 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
4673 //AOT_MPEGS = 30, /**< MPEG Surround */
4674
4675 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
4676
4677 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
4678 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
4679 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
4680 //AOT_RSVD_35 = 35, /**< might become DST */
4681 //AOT_RSVD_36 = 36, /**< might become ALS */
4682 //AOT_AAC_SLS = 37, /**< AAC + SLS */
4683 //AOT_SLS = 38, /**< SLS */
4684 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
4685
4686 AOT_USAC = 42, /**< USAC */
4687 //AOT_SAOC = 43, /**< SAOC */
4688 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
4689
4690 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
4691 } AUDIO_OBJECT_TYPE;
4692
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4693 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4694 const void *esds_data, size_t esds_size) {
4695 ESDS esds(esds_data, esds_size);
4696
4697 uint8_t objectTypeIndication;
4698 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4699 return ERROR_MALFORMED;
4700 }
4701
4702 if (objectTypeIndication == 0xe1) {
4703 // This isn't MPEG4 audio at all, it's QCELP 14k...
4704 if (mLastTrack == NULL)
4705 return ERROR_MALFORMED;
4706
4707 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4708 return OK;
4709 }
4710
4711 if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4712 // mp3 audio
4713 if (mLastTrack == NULL)
4714 return ERROR_MALFORMED;
4715
4716 AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4717 return OK;
4718 }
4719
4720 if (mLastTrack != NULL) {
4721 uint32_t maxBitrate = 0;
4722 uint32_t avgBitrate = 0;
4723 esds.getBitRate(&maxBitrate, &avgBitrate);
4724 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4725 AMediaFormat_setInt32(mLastTrack->meta,
4726 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4727 }
4728 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4729 AMediaFormat_setInt32(mLastTrack->meta,
4730 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4731 }
4732 }
4733
4734 const uint8_t *csd;
4735 size_t csd_size;
4736 if (esds.getCodecSpecificInfo(
4737 (const void **)&csd, &csd_size) != OK) {
4738 return ERROR_MALFORMED;
4739 }
4740
4741 if (kUseHexDump) {
4742 printf("ESD of size %zu\n", csd_size);
4743 hexdump(csd, csd_size);
4744 }
4745
4746 if (csd_size == 0) {
4747 // There's no further information, i.e. no codec specific data
4748 // Let's assume that the information provided in the mpeg4 headers
4749 // is accurate and hope for the best.
4750
4751 return OK;
4752 }
4753
4754 if (csd_size < 2) {
4755 return ERROR_MALFORMED;
4756 }
4757
4758 if (objectTypeIndication == 0xdd) {
4759 // vorbis audio
4760 if (csd[0] != 0x02) {
4761 return ERROR_MALFORMED;
4762 }
4763
4764 // codecInfo starts with two lengths, len1 and len2, that are
4765 // "Xiph-style-lacing encoded"..
4766
4767 size_t offset = 1;
4768 size_t len1 = 0;
4769 while (offset < csd_size && csd[offset] == 0xff) {
4770 if (__builtin_add_overflow(len1, 0xff, &len1)) {
4771 return ERROR_MALFORMED;
4772 }
4773 ++offset;
4774 }
4775 if (offset >= csd_size) {
4776 return ERROR_MALFORMED;
4777 }
4778 if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4779 return ERROR_MALFORMED;
4780 }
4781 ++offset;
4782 if (len1 == 0) {
4783 return ERROR_MALFORMED;
4784 }
4785
4786 size_t len2 = 0;
4787 while (offset < csd_size && csd[offset] == 0xff) {
4788 if (__builtin_add_overflow(len2, 0xff, &len2)) {
4789 return ERROR_MALFORMED;
4790 }
4791 ++offset;
4792 }
4793 if (offset >= csd_size) {
4794 return ERROR_MALFORMED;
4795 }
4796 if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4797 return ERROR_MALFORMED;
4798 }
4799 ++offset;
4800 if (len2 == 0) {
4801 return ERROR_MALFORMED;
4802 }
4803 if (offset + len1 > csd_size || csd[offset] != 0x01) {
4804 return ERROR_MALFORMED;
4805 }
4806
4807 if (mLastTrack == NULL) {
4808 return ERROR_MALFORMED;
4809 }
4810 // formerly kKeyVorbisInfo
4811 AMediaFormat_setBuffer(mLastTrack->meta,
4812 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4813
4814 if (__builtin_add_overflow(offset, len1, &offset) ||
4815 offset >= csd_size || csd[offset] != 0x03) {
4816 return ERROR_MALFORMED;
4817 }
4818
4819 if (__builtin_add_overflow(offset, len2, &offset) ||
4820 offset >= csd_size || csd[offset] != 0x05) {
4821 return ERROR_MALFORMED;
4822 }
4823
4824 // formerly kKeyVorbisBooks
4825 AMediaFormat_setBuffer(mLastTrack->meta,
4826 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4827 AMediaFormat_setString(mLastTrack->meta,
4828 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4829
4830 return OK;
4831 }
4832
4833 static uint32_t kSamplingRate[] = {
4834 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4835 16000, 12000, 11025, 8000, 7350
4836 };
4837
4838 ABitReader br(csd, csd_size);
4839 uint32_t objectType = br.getBits(5);
4840
4841 if (objectType == AOT_ESCAPE) { // AAC-ELD => additional 6 bits
4842 objectType = 32 + br.getBits(6);
4843 }
4844
4845 if (mLastTrack == NULL)
4846 return ERROR_MALFORMED;
4847
4848 //keep AOT type
4849 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4850
4851 uint32_t freqIndex = br.getBits(4);
4852
4853 int32_t sampleRate = 0;
4854 int32_t numChannels = 0;
4855 if (freqIndex == 15) {
4856 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4857 sampleRate = br.getBits(24);
4858 numChannels = br.getBits(4);
4859 } else {
4860 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4861 numChannels = br.getBits(4);
4862
4863 if (freqIndex == 13 || freqIndex == 14) {
4864 return ERROR_MALFORMED;
4865 }
4866
4867 sampleRate = kSamplingRate[freqIndex];
4868 }
4869
4870 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4871 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4872 uint32_t extFreqIndex = br.getBits(4);
4873 if (extFreqIndex == 15) {
4874 if (csd_size < 8) {
4875 return ERROR_MALFORMED;
4876 }
4877 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4878 br.skipBits(24); // extSampleRate
4879 } else {
4880 if (extFreqIndex == 13 || extFreqIndex == 14) {
4881 return ERROR_MALFORMED;
4882 }
4883 //extSampleRate = kSamplingRate[extFreqIndex];
4884 }
4885 //TODO: save the extension sampling rate value in meta data =>
4886 // AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4887 }
4888
4889 switch (numChannels) {
4890 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4891 case 0:
4892 case 1:// FC
4893 case 2:// FL FR
4894 case 3:// FC, FL FR
4895 case 4:// FC, FL FR, RC
4896 case 5:// FC, FL FR, SL SR
4897 case 6:// FC, FL FR, SL SR, LFE
4898 //numChannels already contains the right value
4899 break;
4900 case 11:// FC, FL FR, SL SR, RC, LFE
4901 numChannels = 7;
4902 break;
4903 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4904 case 12:// FC, FL FR, SL SR, RL RR, LFE
4905 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
4906 numChannels = 8;
4907 break;
4908 default:
4909 return ERROR_UNSUPPORTED;
4910 }
4911
4912 {
4913 if (objectType == AOT_SBR || objectType == AOT_PS) {
4914 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4915 objectType = br.getBits(5);
4916
4917 if (objectType == AOT_ESCAPE) {
4918 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4919 objectType = 32 + br.getBits(6);
4920 }
4921 }
4922 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4923 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4924 objectType == AOT_ER_BSAC) {
4925 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4926 br.skipBits(1); // frameLengthFlag
4927
4928 const int32_t dependsOnCoreCoder = br.getBits(1);
4929
4930 if (dependsOnCoreCoder ) {
4931 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4932 br.skipBits(14); // coreCoderDelay
4933 }
4934
4935 int32_t extensionFlag = -1;
4936 if (br.numBitsLeft() > 0) {
4937 extensionFlag = br.getBits(1);
4938 } else {
4939 switch (objectType) {
4940 // 14496-3 4.5.1.1 extensionFlag
4941 case AOT_AAC_LC:
4942 extensionFlag = 0;
4943 break;
4944 case AOT_ER_AAC_LC:
4945 case AOT_ER_AAC_SCAL:
4946 case AOT_ER_BSAC:
4947 case AOT_ER_AAC_LD:
4948 extensionFlag = 1;
4949 break;
4950 default:
4951 return ERROR_MALFORMED;
4952 break;
4953 }
4954 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4955 extensionFlag, objectType);
4956 }
4957
4958 if (numChannels == 0) {
4959 int32_t channelsEffectiveNum = 0;
4960 int32_t channelsNum = 0;
4961 if (br.numBitsLeft() < 32) {
4962 return ERROR_MALFORMED;
4963 }
4964 br.skipBits(4); // ElementInstanceTag
4965 br.skipBits(2); // Profile
4966 br.skipBits(4); // SamplingFrequencyIndex
4967 const int32_t NumFrontChannelElements = br.getBits(4);
4968 const int32_t NumSideChannelElements = br.getBits(4);
4969 const int32_t NumBackChannelElements = br.getBits(4);
4970 const int32_t NumLfeChannelElements = br.getBits(2);
4971 br.skipBits(3); // NumAssocDataElements
4972 br.skipBits(4); // NumValidCcElements
4973
4974 const int32_t MonoMixdownPresent = br.getBits(1);
4975
4976 if (MonoMixdownPresent != 0) {
4977 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4978 br.skipBits(4); // MonoMixdownElementNumber
4979 }
4980
4981 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4982 const int32_t StereoMixdownPresent = br.getBits(1);
4983 if (StereoMixdownPresent != 0) {
4984 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4985 br.skipBits(4); // StereoMixdownElementNumber
4986 }
4987
4988 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4989 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4990 if (MatrixMixdownIndexPresent != 0) {
4991 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4992 br.skipBits(2); // MatrixMixdownIndex
4993 br.skipBits(1); // PseudoSurroundEnable
4994 }
4995
4996 int i;
4997 for (i=0; i < NumFrontChannelElements; i++) {
4998 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4999 const int32_t FrontElementIsCpe = br.getBits(1);
5000 br.skipBits(4); // FrontElementTagSelect
5001 channelsNum += FrontElementIsCpe ? 2 : 1;
5002 }
5003
5004 for (i=0; i < NumSideChannelElements; i++) {
5005 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5006 const int32_t SideElementIsCpe = br.getBits(1);
5007 br.skipBits(4); // SideElementTagSelect
5008 channelsNum += SideElementIsCpe ? 2 : 1;
5009 }
5010
5011 for (i=0; i < NumBackChannelElements; i++) {
5012 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
5013 const int32_t BackElementIsCpe = br.getBits(1);
5014 br.skipBits(4); // BackElementTagSelect
5015 channelsNum += BackElementIsCpe ? 2 : 1;
5016 }
5017 channelsEffectiveNum = channelsNum;
5018
5019 for (i=0; i < NumLfeChannelElements; i++) {
5020 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
5021 br.skipBits(4); // LfeElementTagSelect
5022 channelsNum += 1;
5023 }
5024 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
5025 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
5026 numChannels = channelsNum;
5027 }
5028 }
5029 }
5030
5031 if (numChannels == 0) {
5032 return ERROR_UNSUPPORTED;
5033 }
5034
5035 if (mLastTrack == NULL)
5036 return ERROR_MALFORMED;
5037
5038 int32_t prevSampleRate;
5039 CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
5040
5041 if (prevSampleRate != sampleRate) {
5042 ALOGV("mpeg4 audio sample rate different from previous setting. "
5043 "was: %d, now: %d", prevSampleRate, sampleRate);
5044 }
5045
5046 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
5047
5048 int32_t prevChannelCount;
5049 CHECK(AMediaFormat_getInt32(mLastTrack->meta,
5050 AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
5051
5052 if (prevChannelCount != numChannels) {
5053 ALOGV("mpeg4 audio channel count different from previous setting. "
5054 "was: %d, now: %d", prevChannelCount, numChannels);
5055 }
5056
5057 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
5058
5059 return OK;
5060 }
5061
adjustRawDefaultFrameSize()5062 void MPEG4Extractor::adjustRawDefaultFrameSize() {
5063 int32_t chanCount = 0;
5064 int32_t bitWidth = 0;
5065 const char *mimeStr = NULL;
5066
5067 if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
5068 !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
5069 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
5070 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
5071 // samplesize in stsz may not right , so updade default samplesize
5072 mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
5073 }
5074 }
5075
5076 ////////////////////////////////////////////////////////////////////////////////
5077
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks,uint64_t elstInitialEmptyEditTicks)5078 MPEG4Source::MPEG4Source(
5079 AMediaFormat *format,
5080 DataSourceHelper *dataSource,
5081 int32_t timeScale,
5082 const sp<SampleTable> &sampleTable,
5083 Vector<SidxEntry> &sidx,
5084 const Trex *trex,
5085 off64_t firstMoofOffset,
5086 const sp<ItemTable> &itemTable,
5087 uint64_t elstShiftStartTicks,
5088 uint64_t elstInitialEmptyEditTicks)
5089 : mFormat(format),
5090 mDataSource(dataSource),
5091 mTimescale(timeScale),
5092 mSampleTable(sampleTable),
5093 mCurrentSampleIndex(0),
5094 mCurrentFragmentIndex(0),
5095 mSegments(sidx),
5096 mTrex(trex),
5097 mFirstMoofOffset(firstMoofOffset),
5098 mCurrentMoofOffset(firstMoofOffset),
5099 mCurrentMoofSize(0),
5100 mNextMoofOffset(-1),
5101 mCurrentTime(0),
5102 mDefaultEncryptedByteBlock(0),
5103 mDefaultSkipByteBlock(0),
5104 mCurrentSampleInfoAllocSize(0),
5105 mCurrentSampleInfoSizes(NULL),
5106 mCurrentSampleInfoOffsetsAllocSize(0),
5107 mCurrentSampleInfoOffsets(NULL),
5108 mIsAVC(false),
5109 mIsHEVC(false),
5110 mIsDolbyVision(false),
5111 mIsAC4(false),
5112 mIsPcm(false),
5113 mNALLengthSize(0),
5114 mStarted(false),
5115 mBuffer(NULL),
5116 mSrcBufferSize(0),
5117 mSrcBuffer(NULL),
5118 mItemTable(itemTable),
5119 mElstShiftStartTicks(elstShiftStartTicks),
5120 mElstInitialEmptyEditTicks(elstInitialEmptyEditTicks) {
5121
5122 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
5123
5124 AMediaFormat_getInt32(mFormat,
5125 AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
5126 mDefaultIVSize = 0;
5127 AMediaFormat_getInt32(mFormat,
5128 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
5129 void *key;
5130 size_t keysize;
5131 if (AMediaFormat_getBuffer(mFormat,
5132 AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
5133 CHECK(keysize <= 16);
5134 memset(mCryptoKey, 0, 16);
5135 memcpy(mCryptoKey, key, keysize);
5136 }
5137
5138 AMediaFormat_getInt32(mFormat,
5139 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
5140 AMediaFormat_getInt32(mFormat,
5141 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
5142
5143 const char *mime;
5144 bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
5145 CHECK(success);
5146
5147 mIsMpegH = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1) ||
5148 !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1);
5149 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
5150 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
5151 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
5152 mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
5153 mIsDolbyVision = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
5154 mIsHeif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) && mItemTable != NULL;
5155 mIsAvif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF) && mItemTable != NULL;
5156
5157 if (mIsAVC) {
5158 void *data;
5159 size_t size;
5160 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5161
5162 const uint8_t *ptr = (const uint8_t *)data;
5163
5164 CHECK(size >= 7);
5165 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5166
5167 // The number of bytes used to encode the length of a NAL unit.
5168 mNALLengthSize = 1 + (ptr[4] & 3);
5169 } else if (mIsHEVC) {
5170 void *data;
5171 size_t size;
5172 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5173
5174 const uint8_t *ptr = (const uint8_t *)data;
5175
5176 CHECK(size >= 22);
5177 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5178
5179 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
5180 } else if (mIsDolbyVision) {
5181 ALOGV("%s DolbyVision stream detected", __FUNCTION__);
5182 void *data;
5183 size_t size;
5184 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_2, &data, &size));
5185
5186 const uint8_t *ptr = (const uint8_t *)data;
5187
5188 CHECK(size == 24);
5189
5190 // dv_major.dv_minor Should be 1.0 or 2.1
5191 CHECK(!((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)));
5192
5193 const uint8_t profile = ptr[2] >> 1;
5194 // profile == (unknown,1,9) --> AVC; profile = (2,3,4,5,6,7,8) --> HEVC;
5195 // profile == (10) --> AV1
5196 if (profile > 1 && profile < 9) {
5197 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5198
5199 const uint8_t *ptr = (const uint8_t *)data;
5200
5201 CHECK(size >= 22);
5202 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5203
5204 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
5205 } else if (10 == profile) {
5206 /* AV1 profile nothing to do */
5207 } else {
5208 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5209 const uint8_t *ptr = (const uint8_t *)data;
5210
5211 CHECK(size >= 7);
5212 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5213 // The number of bytes used to encode the length of a NAL unit.
5214 mNALLengthSize = 1 + (ptr[4] & 3);
5215 }
5216 }
5217
5218 mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
5219 mIsAudio = !strncasecmp(mime, "audio/", 6);
5220
5221 int32_t aacObjectType = -1;
5222
5223 if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_AAC_PROFILE, &aacObjectType)) {
5224 mIsUsac = (aacObjectType == AOT_USAC);
5225 }
5226
5227 if (mIsPcm) {
5228 int32_t numChannels = 0;
5229 int32_t bitsPerSample = 0;
5230 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
5231 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
5232
5233 int32_t bytesPerSample = bitsPerSample >> 3;
5234 int32_t pcmSampleSize = bytesPerSample * numChannels;
5235
5236 size_t maxSampleSize;
5237 status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
5238 if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
5239 || bitsPerSample != 16) {
5240 // Not supported
5241 mIsPcm = false;
5242 } else {
5243 AMediaFormat_setInt32(mFormat,
5244 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
5245 }
5246 }
5247
5248 CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
5249 }
5250
init()5251 status_t MPEG4Source::init() {
5252 if (mFirstMoofOffset != 0) {
5253 off64_t offset = mFirstMoofOffset;
5254 return parseChunk(&offset);
5255 }
5256 return OK;
5257 }
5258
~MPEG4Source()5259 MPEG4Source::~MPEG4Source() {
5260 if (mStarted) {
5261 stop();
5262 }
5263 free(mCurrentSampleInfoSizes);
5264 free(mCurrentSampleInfoOffsets);
5265 }
5266
start()5267 media_status_t MPEG4Source::start() {
5268 Mutex::Autolock autoLock(mLock);
5269
5270 CHECK(!mStarted);
5271
5272 int32_t tmp;
5273 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
5274 size_t max_size = tmp;
5275
5276 // A somewhat arbitrary limit that should be sufficient for 8k video frames
5277 // If you see the message below for a valid input stream: increase the limit
5278 const size_t kMaxBufferSize = 64 * 1024 * 1024;
5279 if (max_size > kMaxBufferSize) {
5280 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
5281 return AMEDIA_ERROR_MALFORMED;
5282 }
5283 if (max_size == 0) {
5284 ALOGE("zero max input size");
5285 return AMEDIA_ERROR_MALFORMED;
5286 }
5287
5288 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
5289 const size_t kInitialBuffers = 2;
5290 const size_t kMaxBuffers = 8;
5291 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
5292 mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
5293 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
5294 if (mSrcBuffer == NULL) {
5295 // file probably specified a bad max size
5296 return AMEDIA_ERROR_MALFORMED;
5297 }
5298 mSrcBufferSize = max_size;
5299
5300 mStarted = true;
5301
5302 return AMEDIA_OK;
5303 }
5304
stop()5305 media_status_t MPEG4Source::stop() {
5306 Mutex::Autolock autoLock(mLock);
5307
5308 CHECK(mStarted);
5309
5310 if (mBuffer != NULL) {
5311 mBuffer->release();
5312 mBuffer = NULL;
5313 }
5314
5315 mSrcBufferSize = 0;
5316 delete[] mSrcBuffer;
5317 mSrcBuffer = NULL;
5318
5319 mStarted = false;
5320 mCurrentSampleIndex = 0;
5321
5322 return AMEDIA_OK;
5323 }
5324
parseChunk(off64_t * offset)5325 status_t MPEG4Source::parseChunk(off64_t *offset) {
5326 uint32_t hdr[2];
5327 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5328 return ERROR_IO;
5329 }
5330 uint64_t chunk_size = ntohl(hdr[0]);
5331 uint32_t chunk_type = ntohl(hdr[1]);
5332 off64_t data_offset = *offset + 8;
5333
5334 if (chunk_size == 1) {
5335 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5336 return ERROR_IO;
5337 }
5338 chunk_size = ntoh64(chunk_size);
5339 data_offset += 8;
5340
5341 if (chunk_size < 16) {
5342 // The smallest valid chunk is 16 bytes long in this case.
5343 return ERROR_MALFORMED;
5344 }
5345 } else if (chunk_size < 8) {
5346 // The smallest valid chunk is 8 bytes long.
5347 return ERROR_MALFORMED;
5348 }
5349
5350 char chunk[5];
5351 MakeFourCCString(chunk_type, chunk);
5352 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
5353
5354 off64_t chunk_data_size = *offset + chunk_size - data_offset;
5355
5356 switch(chunk_type) {
5357
5358 case FOURCC("traf"):
5359 case FOURCC("moof"): {
5360 off64_t stop_offset = *offset + chunk_size;
5361 *offset = data_offset;
5362 if (chunk_type == FOURCC("moof")) {
5363 mCurrentMoofSize = chunk_data_size;
5364 }
5365 while (*offset < stop_offset) {
5366 status_t err = parseChunk(offset);
5367 if (err != OK) {
5368 return err;
5369 }
5370 }
5371 if (chunk_type == FOURCC("moof")) {
5372 // *offset points to the box following this moof. Find the next moof from there.
5373
5374 while (true) {
5375 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5376 // no more box to the end of file.
5377 break;
5378 }
5379 chunk_size = ntohl(hdr[0]);
5380 chunk_type = ntohl(hdr[1]);
5381 if (chunk_size == 1) {
5382 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
5383 // which is defined in 4.2 Object Structure.
5384 // When chunk_size==1, 8 bytes follows as "largesize".
5385 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5386 return ERROR_IO;
5387 }
5388 chunk_size = ntoh64(chunk_size);
5389 if (chunk_size < 16) {
5390 // The smallest valid chunk is 16 bytes long in this case.
5391 return ERROR_MALFORMED;
5392 }
5393 } else if (chunk_size == 0) {
5394 // next box extends to end of file.
5395 } else if (chunk_size < 8) {
5396 // The smallest valid chunk is 8 bytes long in this case.
5397 return ERROR_MALFORMED;
5398 }
5399
5400 if (chunk_type == FOURCC("moof")) {
5401 mNextMoofOffset = *offset;
5402 break;
5403 } else if (chunk_type == FOURCC("mdat")) {
5404 parseChunk(offset);
5405 continue;
5406 } else if (chunk_size == 0) {
5407 break;
5408 }
5409 *offset += chunk_size;
5410 }
5411 }
5412 break;
5413 }
5414
5415 case FOURCC("tfhd"): {
5416 status_t err;
5417 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
5418 return err;
5419 }
5420 *offset += chunk_size;
5421 break;
5422 }
5423
5424 case FOURCC("trun"): {
5425 status_t err;
5426 if (mLastParsedTrackId == mTrackId) {
5427 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
5428 return err;
5429 }
5430 }
5431
5432 *offset += chunk_size;
5433 break;
5434 }
5435
5436 case FOURCC("saiz"): {
5437 status_t err;
5438 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
5439 return err;
5440 }
5441 *offset += chunk_size;
5442 break;
5443 }
5444 case FOURCC("saio"): {
5445 status_t err;
5446 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
5447 != OK) {
5448 return err;
5449 }
5450 *offset += chunk_size;
5451 break;
5452 }
5453
5454 case FOURCC("senc"): {
5455 status_t err;
5456 if ((err = parseSampleEncryption(data_offset, chunk_data_size)) != OK) {
5457 return err;
5458 }
5459 *offset += chunk_size;
5460 break;
5461 }
5462
5463 case FOURCC("mdat"): {
5464 // parse DRM info if present
5465 ALOGV("MPEG4Source::parseChunk mdat");
5466 // if saiz/saoi was previously observed, do something with the sampleinfos
5467 status_t err = OK;
5468 auto kv = mDrmOffsets.lower_bound(*offset);
5469 if (kv != mDrmOffsets.end()) {
5470 auto drmoffset = kv->first;
5471 auto flags = kv->second;
5472 mDrmOffsets.erase(kv);
5473 ALOGV("mdat chunk_size %" PRIu64 " drmoffset %" PRId64 " offset %" PRId64,
5474 chunk_size, drmoffset, *offset);
5475 if (chunk_size >= drmoffset - *offset) {
5476 err = parseClearEncryptedSizes(drmoffset, false, flags,
5477 chunk_size - (drmoffset - *offset));
5478 }
5479 }
5480 if (err != OK) {
5481 return err;
5482 }
5483 *offset += chunk_size;
5484 break;
5485 }
5486
5487 default: {
5488 *offset += chunk_size;
5489 break;
5490 }
5491 }
5492 return OK;
5493 }
5494
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t size)5495 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
5496 off64_t offset, off64_t size) {
5497 ALOGV("parseSampleAuxiliaryInformationSizes");
5498 if (size < 9) {
5499 return -EINVAL;
5500 }
5501 // 14496-12 8.7.12
5502 uint8_t version;
5503 if (mDataSource->readAt(
5504 offset, &version, sizeof(version))
5505 < (ssize_t)sizeof(version)) {
5506 return ERROR_IO;
5507 }
5508
5509 if (version != 0) {
5510 return ERROR_UNSUPPORTED;
5511 }
5512 offset++;
5513 size--;
5514
5515 uint32_t flags;
5516 if (!mDataSource->getUInt24(offset, &flags)) {
5517 return ERROR_IO;
5518 }
5519 offset += 3;
5520 size -= 3;
5521
5522 if (flags & 1) {
5523 if (size < 13) {
5524 return -EINVAL;
5525 }
5526 uint32_t tmp;
5527 if (!mDataSource->getUInt32(offset, &tmp)) {
5528 return ERROR_MALFORMED;
5529 }
5530 mCurrentAuxInfoType = tmp;
5531 offset += 4;
5532 size -= 4;
5533 if (!mDataSource->getUInt32(offset, &tmp)) {
5534 return ERROR_MALFORMED;
5535 }
5536 mCurrentAuxInfoTypeParameter = tmp;
5537 offset += 4;
5538 size -= 4;
5539 }
5540
5541 uint8_t defsize;
5542 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5543 return ERROR_MALFORMED;
5544 }
5545 mCurrentDefaultSampleInfoSize = defsize;
5546 offset++;
5547 size--;
5548
5549 uint32_t smplcnt;
5550 if (!mDataSource->getUInt32(offset, &smplcnt)) {
5551 return ERROR_MALFORMED;
5552 }
5553 mCurrentSampleInfoCount = smplcnt;
5554 offset += 4;
5555 size -= 4;
5556 if (mCurrentDefaultSampleInfoSize != 0) {
5557 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5558 return OK;
5559 }
5560 if(smplcnt > size) {
5561 ALOGW("b/124525515 - smplcnt(%u) > size(%ld)", (unsigned int)smplcnt, (unsigned long)size);
5562 android_errorWriteLog(0x534e4554, "124525515");
5563 return -EINVAL;
5564 }
5565 if (smplcnt > mCurrentSampleInfoAllocSize) {
5566 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5567 if (newPtr == NULL) {
5568 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5569 return NO_MEMORY;
5570 }
5571 mCurrentSampleInfoSizes = newPtr;
5572 mCurrentSampleInfoAllocSize = smplcnt;
5573 }
5574
5575 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5576 return OK;
5577 }
5578
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t size)5579 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5580 off64_t offset, off64_t size) {
5581 ALOGV("parseSampleAuxiliaryInformationOffsets");
5582 if (size < 8) {
5583 return -EINVAL;
5584 }
5585 // 14496-12 8.7.13
5586 uint8_t version;
5587 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5588 return ERROR_IO;
5589 }
5590 offset++;
5591 size--;
5592
5593 uint32_t flags;
5594 if (!mDataSource->getUInt24(offset, &flags)) {
5595 return ERROR_IO;
5596 }
5597 offset += 3;
5598 size -= 3;
5599
5600 uint32_t entrycount;
5601 if (!mDataSource->getUInt32(offset, &entrycount)) {
5602 return ERROR_IO;
5603 }
5604 offset += 4;
5605 size -= 4;
5606 if (entrycount == 0) {
5607 return OK;
5608 }
5609 if (entrycount > UINT32_MAX / 8) {
5610 return ERROR_MALFORMED;
5611 }
5612
5613 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5614 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5615 if (newPtr == NULL) {
5616 ALOGE("failed to realloc %u -> %u",
5617 mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5618 return NO_MEMORY;
5619 }
5620 mCurrentSampleInfoOffsets = newPtr;
5621 mCurrentSampleInfoOffsetsAllocSize = entrycount;
5622 }
5623 mCurrentSampleInfoOffsetCount = entrycount;
5624
5625 if (mCurrentSampleInfoOffsets == NULL) {
5626 return OK;
5627 }
5628
5629 for (size_t i = 0; i < entrycount; i++) {
5630 if (version == 0) {
5631 if (size < 4) {
5632 ALOGW("b/124526959");
5633 android_errorWriteLog(0x534e4554, "124526959");
5634 return -EINVAL;
5635 }
5636 uint32_t tmp;
5637 if (!mDataSource->getUInt32(offset, &tmp)) {
5638 return ERROR_IO;
5639 }
5640 mCurrentSampleInfoOffsets[i] = tmp;
5641 offset += 4;
5642 size -= 4;
5643 } else {
5644 if (size < 8) {
5645 ALOGW("b/124526959");
5646 android_errorWriteLog(0x534e4554, "124526959");
5647 return -EINVAL;
5648 }
5649 uint64_t tmp;
5650 if (!mDataSource->getUInt64(offset, &tmp)) {
5651 return ERROR_IO;
5652 }
5653 mCurrentSampleInfoOffsets[i] = tmp;
5654 offset += 8;
5655 size -= 8;
5656 }
5657 }
5658
5659 // parse clear/encrypted data
5660
5661 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5662
5663 drmoffset += mCurrentMoofOffset;
5664 mDrmOffsets[drmoffset] = flags;
5665 ALOGV("saio drmoffset %" PRId64 " flags %u", drmoffset, flags);
5666
5667 return OK;
5668 }
5669
parseClearEncryptedSizes(off64_t offset,bool isSampleEncryption,uint32_t flags,off64_t size)5670 status_t MPEG4Source::parseClearEncryptedSizes(
5671 off64_t offset, bool isSampleEncryption, uint32_t flags, off64_t size) {
5672
5673 int32_t ivlength;
5674 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5675 return ERROR_MALFORMED;
5676 }
5677
5678 // only 0, 8 and 16 byte initialization vectors are supported
5679 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5680 ALOGW("unsupported IV length: %d", ivlength);
5681 return ERROR_MALFORMED;
5682 }
5683
5684 uint32_t sampleCount = mCurrentSampleInfoCount;
5685 if (isSampleEncryption) {
5686 if (size < 4) {
5687 return ERROR_MALFORMED;
5688 }
5689 if (!mDataSource->getUInt32(offset, &sampleCount)) {
5690 return ERROR_IO;
5691 }
5692 offset += 4;
5693 size -= 4;
5694 }
5695
5696 // read CencSampleAuxiliaryDataFormats
5697 for (size_t i = 0; i < sampleCount; i++) {
5698 if (i >= mCurrentSamples.size()) {
5699 ALOGW("too few samples");
5700 break;
5701 }
5702 Sample *smpl = &mCurrentSamples.editItemAt(i);
5703 if (!smpl->clearsizes.isEmpty()) {
5704 continue;
5705 }
5706
5707 memset(smpl->iv, 0, 16);
5708 if (size < ivlength) {
5709 return ERROR_MALFORMED;
5710 }
5711 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5712 return ERROR_IO;
5713 }
5714
5715 offset += ivlength;
5716 size -= ivlength;
5717
5718 bool readSubsamples;
5719 if (isSampleEncryption) {
5720 readSubsamples = flags & 2;
5721 } else {
5722 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5723 if (smplinfosize == 0) {
5724 smplinfosize = mCurrentSampleInfoSizes[i];
5725 }
5726 readSubsamples = smplinfosize > ivlength;
5727 }
5728
5729 if (readSubsamples) {
5730 uint16_t numsubsamples;
5731 if (size < 2) {
5732 return ERROR_MALFORMED;
5733 }
5734 if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5735 return ERROR_IO;
5736 }
5737 offset += 2;
5738 size -= 2;
5739 for (size_t j = 0; j < numsubsamples; j++) {
5740 uint16_t numclear;
5741 uint32_t numencrypted;
5742 if (size < 6) {
5743 return ERROR_MALFORMED;
5744 }
5745 if (!mDataSource->getUInt16(offset, &numclear)) {
5746 return ERROR_IO;
5747 }
5748 offset += 2;
5749 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5750 return ERROR_IO;
5751 }
5752 offset += 4;
5753 size -= 6;
5754 smpl->clearsizes.add(numclear);
5755 smpl->encryptedsizes.add(numencrypted);
5756 }
5757 } else {
5758 smpl->clearsizes.add(0);
5759 smpl->encryptedsizes.add(smpl->size);
5760 }
5761 }
5762
5763 return OK;
5764 }
5765
parseSampleEncryption(off64_t offset,off64_t chunk_data_size)5766 status_t MPEG4Source::parseSampleEncryption(off64_t offset, off64_t chunk_data_size) {
5767 uint32_t flags;
5768 if (chunk_data_size < 4) {
5769 return ERROR_MALFORMED;
5770 }
5771 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5772 return ERROR_MALFORMED;
5773 }
5774 return parseClearEncryptedSizes(offset + 4, true, flags, chunk_data_size - 4);
5775 }
5776
parseTrackFragmentHeader(off64_t offset,off64_t size)5777 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5778
5779 if (size < 8) {
5780 return -EINVAL;
5781 }
5782
5783 uint32_t flags;
5784 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5785 return ERROR_MALFORMED;
5786 }
5787
5788 if (flags & 0xff000000) {
5789 return -EINVAL;
5790 }
5791
5792 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5793 return ERROR_MALFORMED;
5794 }
5795
5796 if (mLastParsedTrackId != mTrackId) {
5797 // this is not the right track, skip it
5798 return OK;
5799 }
5800
5801 mTrackFragmentHeaderInfo.mFlags = flags;
5802 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5803 offset += 8;
5804 size -= 8;
5805
5806 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5807
5808 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5809 if (size < 8) {
5810 return -EINVAL;
5811 }
5812
5813 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5814 return ERROR_MALFORMED;
5815 }
5816 offset += 8;
5817 size -= 8;
5818 }
5819
5820 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5821 if (size < 4) {
5822 return -EINVAL;
5823 }
5824
5825 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5826 return ERROR_MALFORMED;
5827 }
5828 offset += 4;
5829 size -= 4;
5830 }
5831
5832 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5833 if (size < 4) {
5834 return -EINVAL;
5835 }
5836
5837 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5838 return ERROR_MALFORMED;
5839 }
5840 offset += 4;
5841 size -= 4;
5842 }
5843
5844 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5845 if (size < 4) {
5846 return -EINVAL;
5847 }
5848
5849 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5850 return ERROR_MALFORMED;
5851 }
5852 offset += 4;
5853 size -= 4;
5854 }
5855
5856 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5857 if (size < 4) {
5858 return -EINVAL;
5859 }
5860
5861 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5862 return ERROR_MALFORMED;
5863 }
5864 offset += 4;
5865 size -= 4;
5866 }
5867
5868 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5869 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5870 }
5871
5872 mTrackFragmentHeaderInfo.mDataOffset = 0;
5873 return OK;
5874 }
5875
parseTrackFragmentRun(off64_t offset,off64_t size)5876 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5877
5878 ALOGV("MPEG4Source::parseTrackFragmentRun");
5879 if (size < 8) {
5880 return -EINVAL;
5881 }
5882
5883 enum {
5884 kDataOffsetPresent = 0x01,
5885 kFirstSampleFlagsPresent = 0x04,
5886 kSampleDurationPresent = 0x100,
5887 kSampleSizePresent = 0x200,
5888 kSampleFlagsPresent = 0x400,
5889 kSampleCompositionTimeOffsetPresent = 0x800,
5890 };
5891
5892 uint32_t flags;
5893 if (!mDataSource->getUInt32(offset, &flags)) {
5894 return ERROR_MALFORMED;
5895 }
5896 // |version| only affects SampleCompositionTimeOffset field.
5897 // If version == 0, SampleCompositionTimeOffset is uint32_t;
5898 // Otherwise, SampleCompositionTimeOffset is int32_t.
5899 // Sample.compositionOffset is defined as int32_t.
5900 uint8_t version = flags >> 24;
5901 flags &= 0xffffff;
5902 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5903
5904 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5905 // These two shall not be used together.
5906 return -EINVAL;
5907 }
5908
5909 uint32_t sampleCount;
5910 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5911 return ERROR_MALFORMED;
5912 }
5913 offset += 8;
5914 size -= 8;
5915
5916 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5917
5918 uint32_t firstSampleFlags = 0;
5919
5920 if (flags & kDataOffsetPresent) {
5921 if (size < 4) {
5922 return -EINVAL;
5923 }
5924
5925 uint32_t dataOffsetDelta;
5926 if (!mDataSource->getUInt32(offset, &dataOffsetDelta)) {
5927 return ERROR_MALFORMED;
5928 }
5929
5930 if (__builtin_add_overflow(
5931 mTrackFragmentHeaderInfo.mBaseDataOffset, dataOffsetDelta, &dataOffset)) {
5932 ALOGW("b/232242894 mBaseDataOffset(%" PRIu64 ") + dataOffsetDelta(%u) overflows uint64",
5933 mTrackFragmentHeaderInfo.mBaseDataOffset, dataOffsetDelta);
5934 android_errorWriteLog(0x534e4554, "232242894");
5935 return ERROR_MALFORMED;
5936 }
5937
5938 offset += 4;
5939 size -= 4;
5940 }
5941
5942 if (flags & kFirstSampleFlagsPresent) {
5943 if (size < 4) {
5944 return -EINVAL;
5945 }
5946
5947 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5948 return ERROR_MALFORMED;
5949 }
5950 offset += 4;
5951 size -= 4;
5952 }
5953
5954 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5955 sampleCtsOffset = 0;
5956
5957 size_t bytesPerSample = 0;
5958 if (flags & kSampleDurationPresent) {
5959 bytesPerSample += 4;
5960 } else if (mTrackFragmentHeaderInfo.mFlags
5961 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5962 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5963 } else if (mTrex) {
5964 sampleDuration = mTrex->default_sample_duration;
5965 }
5966
5967 if (flags & kSampleSizePresent) {
5968 bytesPerSample += 4;
5969 } else {
5970 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5971 #ifdef VERY_VERY_VERBOSE_LOGGING
5972 // We don't expect this, but also want to avoid spamming the log if
5973 // we hit this case.
5974 if (!(mTrackFragmentHeaderInfo.mFlags
5975 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent)) {
5976 ALOGW("No sample size specified");
5977 }
5978 #endif
5979 }
5980
5981 if (flags & kSampleFlagsPresent) {
5982 bytesPerSample += 4;
5983 } else {
5984 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5985 #ifdef VERY_VERY_VERBOSE_LOGGING
5986 // We don't expect this, but also want to avoid spamming the log if
5987 // we hit this case.
5988 if (!(mTrackFragmentHeaderInfo.mFlags
5989 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent)) {
5990 ALOGW("No sample flags specified");
5991 }
5992 #endif
5993 }
5994
5995 if (flags & kSampleCompositionTimeOffsetPresent) {
5996 bytesPerSample += 4;
5997 } else {
5998 sampleCtsOffset = 0;
5999 }
6000
6001 if (bytesPerSample != 0) {
6002 if (size < (off64_t)sampleCount * bytesPerSample) {
6003 return -EINVAL;
6004 }
6005 } else {
6006 if (sampleDuration == 0) {
6007 ALOGW("b/123389881 sampleDuration == 0");
6008 android_errorWriteLog(0x534e4554, "124389881 zero");
6009 return -EINVAL;
6010 }
6011
6012 // apply some quick (vs strict legality) checks
6013 //
6014 static constexpr uint32_t kMaxTrunSampleCount = 10000;
6015 if (sampleCount > kMaxTrunSampleCount) {
6016 ALOGW("b/123389881 sampleCount(%u) > kMaxTrunSampleCount(%u)",
6017 sampleCount, kMaxTrunSampleCount);
6018 android_errorWriteLog(0x534e4554, "124389881 count");
6019 return -EINVAL;
6020 }
6021 }
6022
6023 Sample tmp;
6024 for (uint32_t i = 0; i < sampleCount; ++i) {
6025 if (flags & kSampleDurationPresent) {
6026 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
6027 return ERROR_MALFORMED;
6028 }
6029 offset += 4;
6030 }
6031
6032 if (flags & kSampleSizePresent) {
6033 if (!mDataSource->getUInt32(offset, &sampleSize)) {
6034 return ERROR_MALFORMED;
6035 }
6036 offset += 4;
6037 }
6038
6039 if (flags & kSampleFlagsPresent) {
6040 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
6041 return ERROR_MALFORMED;
6042 }
6043 offset += 4;
6044 }
6045
6046 if (flags & kSampleCompositionTimeOffsetPresent) {
6047 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
6048 return ERROR_MALFORMED;
6049 }
6050 offset += 4;
6051 }
6052
6053 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
6054 " flags 0x%08x ctsOffset %" PRIu32, i + 1,
6055 dataOffset, sampleSize, sampleDuration,
6056 (flags & kFirstSampleFlagsPresent) && i == 0
6057 ? firstSampleFlags : sampleFlags, sampleCtsOffset);
6058 tmp.offset = dataOffset;
6059 tmp.size = sampleSize;
6060 tmp.duration = sampleDuration;
6061 tmp.compositionOffset = sampleCtsOffset;
6062 memset(tmp.iv, 0, sizeof(tmp.iv));
6063 if (mCurrentSamples.add(tmp) < 0) {
6064 ALOGW("b/123389881 failed saving sample(n=%zu)", mCurrentSamples.size());
6065 android_errorWriteLog(0x534e4554, "124389881 allocation");
6066 mCurrentSamples.clear();
6067 return NO_MEMORY;
6068 }
6069
6070 if (__builtin_add_overflow(dataOffset, sampleSize, &dataOffset)) {
6071 ALOGW("b/232242894 dataOffset(%" PRIu64 ") + sampleSize(%u) overflows uint64",
6072 dataOffset, sampleSize);
6073 android_errorWriteLog(0x534e4554, "232242894");
6074 return ERROR_MALFORMED;
6075 }
6076 }
6077
6078 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
6079
6080 return OK;
6081 }
6082
getFormat(AMediaFormat * meta)6083 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
6084 Mutex::Autolock autoLock(mLock);
6085 AMediaFormat_copy(meta, mFormat);
6086 return AMEDIA_OK;
6087 }
6088
parseNALSize(const uint8_t * data) const6089 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
6090 switch (mNALLengthSize) {
6091 case 1:
6092 return *data;
6093 case 2:
6094 return U16_AT(data);
6095 case 3:
6096 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
6097 case 4:
6098 return U32_AT(data);
6099 }
6100
6101 // This cannot happen, mNALLengthSize springs to life by adding 1 to
6102 // a 2-bit integer.
6103 CHECK(!"Should not be here.");
6104
6105 return 0;
6106 }
6107
parseHEVCLayerId(const uint8_t * data,size_t size)6108 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
6109 if (data == nullptr || size < mNALLengthSize + 2) {
6110 return -1;
6111 }
6112
6113 // HEVC NAL-header (16-bit)
6114 // 1 6 6 3
6115 // |-|uuuuuu|------|iii|
6116 // ^ ^
6117 // NAL_type layer_id + 1
6118 //
6119 // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
6120 enum {
6121 TSA_N = 2,
6122 TSA_R = 3,
6123 STSA_N = 4,
6124 STSA_R = 5,
6125 };
6126
6127 data += mNALLengthSize;
6128 uint16_t nalHeader = data[0] << 8 | data[1];
6129
6130 uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
6131 if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
6132 int32_t layerIdPlusOne = nalHeader & 0x7u;
6133 ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
6134 return layerIdPlusOne - 1;
6135 }
6136 return 0;
6137 }
6138
read(MediaBufferHelper ** out,const ReadOptions * options)6139 media_status_t MPEG4Source::read(
6140 MediaBufferHelper **out, const ReadOptions *options) {
6141 Mutex::Autolock autoLock(mLock);
6142
6143 CHECK(mStarted);
6144
6145 if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
6146 *out = nullptr;
6147 return AMEDIA_ERROR_WOULD_BLOCK;
6148 }
6149
6150 if (mFirstMoofOffset > 0) {
6151 return fragmentedRead(out, options);
6152 }
6153
6154 *out = NULL;
6155
6156 int64_t targetSampleTimeUs = -1;
6157
6158 int64_t seekTimeUs;
6159 ReadOptions::SeekMode mode;
6160
6161 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6162 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6163 if (mIsHeif || mIsAvif) {
6164 CHECK(mSampleTable == NULL);
6165 CHECK(mItemTable != NULL);
6166 int32_t imageIndex;
6167 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
6168 return AMEDIA_ERROR_MALFORMED;
6169 }
6170
6171 status_t err;
6172 if (seekTimeUs >= 0) {
6173 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
6174 } else {
6175 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
6176 }
6177 if (err != OK) {
6178 return AMEDIA_ERROR_UNKNOWN;
6179 }
6180 } else {
6181 uint32_t findFlags = 0;
6182 switch (mode) {
6183 case ReadOptions::SEEK_PREVIOUS_SYNC:
6184 findFlags = SampleTable::kFlagBefore;
6185 break;
6186 case ReadOptions::SEEK_NEXT_SYNC:
6187 findFlags = SampleTable::kFlagAfter;
6188 break;
6189 case ReadOptions::SEEK_CLOSEST_SYNC:
6190 case ReadOptions::SEEK_CLOSEST:
6191 findFlags = SampleTable::kFlagClosest;
6192 break;
6193 case ReadOptions::SEEK_FRAME_INDEX:
6194 findFlags = SampleTable::kFlagFrameIndex;
6195 break;
6196 default:
6197 CHECK(!"Should not be here.");
6198 break;
6199 }
6200 if( mode != ReadOptions::SEEK_FRAME_INDEX) {
6201 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6202 if (mElstInitialEmptyEditTicks > 0) {
6203 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6204 mTimescale;
6205 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6206 * Hence, lower bound on seekTimeUs is 0.
6207 */
6208 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6209 }
6210 if (mElstShiftStartTicks > 0) {
6211 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6212 seekTimeUs += elstShiftStartUs;
6213 }
6214 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6215 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6216 elstShiftStartUs);
6217 }
6218
6219 uint32_t sampleIndex;
6220 status_t err = mSampleTable->findSampleAtTime(
6221 seekTimeUs, 1000000, mTimescale,
6222 &sampleIndex, findFlags);
6223
6224 if (mode == ReadOptions::SEEK_CLOSEST
6225 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6226 // We found the closest sample already, now we want the sync
6227 // sample preceding it (or the sample itself of course), even
6228 // if the subsequent sync sample is closer.
6229 findFlags = SampleTable::kFlagBefore;
6230 }
6231
6232 uint32_t syncSampleIndex = sampleIndex;
6233 // assume every non-USAC/non-MPEGH audio sample is a sync sample.
6234 // This works around
6235 // seek issues with files that were incorrectly written with an
6236 // empty or single-sample stss block for the audio track
6237 if (err == OK && (!mIsAudio || mIsUsac || mIsMpegH)) {
6238 err = mSampleTable->findSyncSampleNear(
6239 sampleIndex, &syncSampleIndex, findFlags);
6240 }
6241
6242 uint64_t sampleTime;
6243 if (err == OK) {
6244 err = mSampleTable->getMetaDataForSample(
6245 sampleIndex, NULL, NULL, &sampleTime);
6246 }
6247
6248 if (err != OK) {
6249 if (err == ERROR_OUT_OF_RANGE) {
6250 // An attempt to seek past the end of the stream would
6251 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
6252 // this all the way to the MediaPlayer would cause abnormal
6253 // termination. Legacy behaviour appears to be to behave as if
6254 // we had seeked to the end of stream, ending normally.
6255 return AMEDIA_ERROR_END_OF_STREAM;
6256 }
6257 ALOGV("end of stream");
6258 return AMEDIA_ERROR_UNKNOWN;
6259 }
6260
6261 if (mode == ReadOptions::SEEK_CLOSEST
6262 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6263 if (mElstInitialEmptyEditTicks > 0) {
6264 sampleTime += mElstInitialEmptyEditTicks;
6265 }
6266 if (mElstShiftStartTicks > 0){
6267 if (sampleTime > mElstShiftStartTicks) {
6268 sampleTime -= mElstShiftStartTicks;
6269 } else {
6270 sampleTime = 0;
6271 }
6272 }
6273 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
6274 }
6275
6276 #if 0
6277 uint32_t syncSampleTime;
6278 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
6279 syncSampleIndex, NULL, NULL, &syncSampleTime));
6280
6281 ALOGI("seek to time %lld us => sample at time %lld us, "
6282 "sync sample at time %lld us",
6283 seekTimeUs,
6284 sampleTime * 1000000ll / mTimescale,
6285 syncSampleTime * 1000000ll / mTimescale);
6286 #endif
6287
6288 mCurrentSampleIndex = syncSampleIndex;
6289 }
6290
6291 if (mBuffer != NULL) {
6292 mBuffer->release();
6293 mBuffer = NULL;
6294 }
6295
6296 // fall through
6297 }
6298
6299 off64_t offset = 0;
6300 size_t size = 0;
6301 int64_t cts;
6302 uint64_t stts;
6303 bool isSyncSample;
6304 bool newBuffer = false;
6305 if (mBuffer == NULL) {
6306 newBuffer = true;
6307
6308 status_t err;
6309 if (!mIsHeif && !mIsAvif) {
6310 err = mSampleTable->getMetaDataForSample(mCurrentSampleIndex, &offset, &size,
6311 (uint64_t*)&cts, &isSyncSample, &stts);
6312 if(err == OK) {
6313 if (mElstInitialEmptyEditTicks > 0) {
6314 cts += mElstInitialEmptyEditTicks;
6315 }
6316 if (mElstShiftStartTicks > 0) {
6317 // cts can be negative. for example, initial audio samples for gapless playback.
6318 cts -= (int64_t)mElstShiftStartTicks;
6319 }
6320 }
6321 } else {
6322 err = mItemTable->getImageOffsetAndSize(
6323 options && options->getSeekTo(&seekTimeUs, &mode) ?
6324 &mCurrentSampleIndex : NULL, &offset, &size);
6325
6326 cts = stts = 0;
6327 isSyncSample = 0;
6328 ALOGV("image offset %lld, size %zu", (long long)offset, size);
6329 }
6330
6331 if (err != OK) {
6332 if (err == ERROR_END_OF_STREAM) {
6333 return AMEDIA_ERROR_END_OF_STREAM;
6334 }
6335 return AMEDIA_ERROR_UNKNOWN;
6336 }
6337
6338 err = mBufferGroup->acquire_buffer(&mBuffer);
6339
6340 if (err != OK || mBuffer == nullptr) {
6341 CHECK(mBuffer == NULL);
6342 return AMEDIA_ERROR_UNKNOWN;
6343 }
6344 if (size > mBuffer->size()) {
6345 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6346 mBuffer->release();
6347 mBuffer = NULL;
6348 return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
6349 }
6350 }
6351
6352 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize) && !mIsAC4) {
6353 if (newBuffer) {
6354 if (mIsPcm) {
6355 // The twos' PCM block reader assumes that all samples has the same size.
6356 uint32_t lastSampleIndexInChunk = mSampleTable->getLastSampleIndexInChunk();
6357 if (lastSampleIndexInChunk < mCurrentSampleIndex) {
6358 mBuffer->release();
6359 mBuffer = nullptr;
6360 return AMEDIA_ERROR_UNKNOWN;
6361 }
6362 uint32_t samplesToRead = lastSampleIndexInChunk - mCurrentSampleIndex + 1;
6363 if (samplesToRead > kMaxPcmFrameSize) {
6364 samplesToRead = kMaxPcmFrameSize;
6365 }
6366
6367 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
6368 samplesToRead, size, mCurrentSampleIndex,
6369 mSampleTable->getLastSampleIndexInChunk());
6370
6371 size_t totalSize = samplesToRead * size;
6372 if (mBuffer->size() < totalSize) {
6373 mBuffer->release();
6374 mBuffer = nullptr;
6375 return AMEDIA_ERROR_UNKNOWN;
6376 }
6377 uint8_t* buf = (uint8_t *)mBuffer->data();
6378 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
6379 if (bytesRead < (ssize_t)totalSize) {
6380 mBuffer->release();
6381 mBuffer = NULL;
6382 return AMEDIA_ERROR_IO;
6383 }
6384
6385 AMediaFormat *meta = mBuffer->meta_data();
6386 AMediaFormat_clear(meta);
6387 AMediaFormat_setInt64(
6388 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6389 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6390
6391 int32_t byteOrder = 0;
6392 bool isGetBigEndian = AMediaFormat_getInt32(mFormat,
6393 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
6394
6395 if (isGetBigEndian && byteOrder == 1) {
6396 // Big-endian -> little-endian
6397 uint16_t *dstData = (uint16_t *)buf;
6398 uint16_t *srcData = (uint16_t *)buf;
6399
6400 for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
6401 dstData[j] = ntohs(srcData[j]);
6402 }
6403 }
6404
6405 mCurrentSampleIndex += samplesToRead;
6406 mBuffer->set_range(0, totalSize);
6407 } else {
6408 ssize_t num_bytes_read =
6409 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6410
6411 if (num_bytes_read < (ssize_t)size) {
6412 mBuffer->release();
6413 mBuffer = NULL;
6414
6415 return AMEDIA_ERROR_IO;
6416 }
6417
6418 CHECK(mBuffer != NULL);
6419 mBuffer->set_range(0, size);
6420 AMediaFormat *meta = mBuffer->meta_data();
6421 AMediaFormat_clear(meta);
6422 AMediaFormat_setInt64(
6423 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6424 AMediaFormat_setInt64(
6425 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6426
6427 if (targetSampleTimeUs >= 0) {
6428 AMediaFormat_setInt64(
6429 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6430 }
6431
6432 if (isSyncSample) {
6433 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6434 }
6435
6436 AMediaFormat_setInt64(
6437 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/,
6438 offset);
6439
6440 if (mSampleTable != nullptr &&
6441 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6442 AMediaFormat_setInt64(
6443 meta,
6444 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6445 mSampleTable->getLastSampleIndexInChunk());
6446 }
6447
6448 ++mCurrentSampleIndex;
6449 }
6450 }
6451
6452 *out = mBuffer;
6453 mBuffer = NULL;
6454
6455 return AMEDIA_OK;
6456
6457 } else if (mIsAC4) {
6458 CHECK(mBuffer != NULL);
6459 // Make sure there is enough space to write the sync header and the raw frame
6460 if (mBuffer->range_length() < (7 + size)) {
6461 mBuffer->release();
6462 mBuffer = NULL;
6463
6464 return AMEDIA_ERROR_IO;
6465 }
6466
6467 uint8_t *dstData = (uint8_t *)mBuffer->data();
6468 size_t dstOffset = 0;
6469 // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
6470 // AC40 sync word, meaning no CRC at the end of the frame
6471 dstData[dstOffset++] = 0xAC;
6472 dstData[dstOffset++] = 0x40;
6473 dstData[dstOffset++] = 0xFF;
6474 dstData[dstOffset++] = 0xFF;
6475 dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
6476 dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
6477 dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
6478
6479 ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
6480 if (numBytesRead != (ssize_t)size) {
6481 mBuffer->release();
6482 mBuffer = NULL;
6483
6484 return AMEDIA_ERROR_IO;
6485 }
6486
6487 mBuffer->set_range(0, dstOffset + size);
6488 AMediaFormat *meta = mBuffer->meta_data();
6489 AMediaFormat_clear(meta);
6490 AMediaFormat_setInt64(
6491 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6492 AMediaFormat_setInt64(
6493 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6494
6495 if (targetSampleTimeUs >= 0) {
6496 AMediaFormat_setInt64(
6497 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6498 }
6499
6500 if (isSyncSample) {
6501 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6502 }
6503
6504 ++mCurrentSampleIndex;
6505
6506 *out = mBuffer;
6507 mBuffer = NULL;
6508
6509 return AMEDIA_OK;
6510 } else {
6511 // Whole NAL units are returned but each fragment is prefixed by
6512 // the start code (0x00 00 00 01).
6513 ssize_t num_bytes_read = 0;
6514 bool mSrcBufferFitsDataToRead = size <= mSrcBufferSize;
6515 if (mSrcBufferFitsDataToRead) {
6516 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
6517 } else {
6518 // We are trying to read a sample larger than the expected max sample size.
6519 // Fall through and let the failure be handled by the following if.
6520 android_errorWriteLog(0x534e4554, "188893559");
6521 }
6522
6523 if (num_bytes_read < (ssize_t)size) {
6524 mBuffer->release();
6525 mBuffer = NULL;
6526 return mSrcBufferFitsDataToRead ? AMEDIA_ERROR_IO : AMEDIA_ERROR_MALFORMED;
6527 }
6528
6529 uint8_t *dstData = (uint8_t *)mBuffer->data();
6530 size_t srcOffset = 0;
6531 size_t dstOffset = 0;
6532
6533 while (srcOffset < size) {
6534 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6535 size_t nalLength = 0;
6536 if (!isMalFormed) {
6537 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6538 srcOffset += mNALLengthSize;
6539 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
6540 }
6541
6542 if (isMalFormed) {
6543 //if nallength abnormal,ignore it.
6544 ALOGW("abnormal nallength, ignore this NAL");
6545 srcOffset = size;
6546 break;
6547 }
6548
6549 if (nalLength == 0) {
6550 continue;
6551 }
6552
6553 if (dstOffset > SIZE_MAX - 4 ||
6554 dstOffset + 4 > SIZE_MAX - nalLength ||
6555 dstOffset + 4 + nalLength > mBuffer->size()) {
6556 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
6557 android_errorWriteLog(0x534e4554, "27208621");
6558 mBuffer->release();
6559 mBuffer = NULL;
6560 return AMEDIA_ERROR_MALFORMED;
6561 }
6562
6563 dstData[dstOffset++] = 0;
6564 dstData[dstOffset++] = 0;
6565 dstData[dstOffset++] = 0;
6566 dstData[dstOffset++] = 1;
6567 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6568 srcOffset += nalLength;
6569 dstOffset += nalLength;
6570 }
6571 CHECK_EQ(srcOffset, size);
6572 CHECK(mBuffer != NULL);
6573 mBuffer->set_range(0, dstOffset);
6574
6575 AMediaFormat *meta = mBuffer->meta_data();
6576 AMediaFormat_clear(meta);
6577 AMediaFormat_setInt64(
6578 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6579 AMediaFormat_setInt64(
6580 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6581
6582 if (targetSampleTimeUs >= 0) {
6583 AMediaFormat_setInt64(
6584 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6585 }
6586
6587 if (mIsAVC) {
6588 uint32_t layerId = FindAVCLayerId(
6589 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6590 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6591 } else if (mIsHEVC) {
6592 int32_t layerId = parseHEVCLayerId(
6593 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6594 if (layerId >= 0) {
6595 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6596 }
6597 }
6598
6599 if (isSyncSample) {
6600 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6601 }
6602
6603 AMediaFormat_setInt64(
6604 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/, offset);
6605
6606 if (mSampleTable != nullptr &&
6607 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6608 AMediaFormat_setInt64(
6609 meta,
6610 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6611 mSampleTable->getLastSampleIndexInChunk());
6612 }
6613
6614 ++mCurrentSampleIndex;
6615
6616 *out = mBuffer;
6617 mBuffer = NULL;
6618
6619 return AMEDIA_OK;
6620 }
6621 }
6622
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)6623 media_status_t MPEG4Source::fragmentedRead(
6624 MediaBufferHelper **out, const ReadOptions *options) {
6625
6626 ALOGV("MPEG4Source::fragmentedRead");
6627
6628 CHECK(mStarted);
6629
6630 *out = NULL;
6631
6632 int64_t targetSampleTimeUs = -1;
6633
6634 int64_t seekTimeUs;
6635 ReadOptions::SeekMode mode;
6636 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6637 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6638 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6639 if (mElstInitialEmptyEditTicks > 0) {
6640 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6641 mTimescale;
6642 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6643 * Hence, lower bound on seekTimeUs is 0.
6644 */
6645 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6646 }
6647 if (mElstShiftStartTicks > 0){
6648 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6649 seekTimeUs += elstShiftStartUs;
6650 }
6651 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6652 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6653 elstShiftStartUs);
6654
6655 int numSidxEntries = mSegments.size();
6656 if (numSidxEntries != 0) {
6657 int64_t totalTime = 0;
6658 off64_t totalOffset = mFirstMoofOffset;
6659 for (int i = 0; i < numSidxEntries; i++) {
6660 const SidxEntry *se = &mSegments[i];
6661 if (totalTime + se->mDurationUs > seekTimeUs) {
6662 // The requested time is somewhere in this segment
6663 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6664 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6665 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6666 // requested next sync, or closest sync and it was closer to the end of
6667 // this segment
6668 totalTime += se->mDurationUs;
6669 totalOffset += se->mSize;
6670 }
6671 break;
6672 }
6673 totalTime += se->mDurationUs;
6674 totalOffset += se->mSize;
6675 }
6676 mCurrentMoofOffset = totalOffset;
6677 mNextMoofOffset = -1;
6678 mCurrentSamples.clear();
6679 mCurrentSampleIndex = 0;
6680 status_t err = parseChunk(&totalOffset);
6681 if (err != OK) {
6682 return AMEDIA_ERROR_UNKNOWN;
6683 }
6684 mCurrentTime = totalTime * mTimescale / 1000000ll;
6685 } else {
6686 // without sidx boxes, we can only seek to 0
6687 mCurrentMoofOffset = mFirstMoofOffset;
6688 mNextMoofOffset = -1;
6689 mCurrentSamples.clear();
6690 mCurrentSampleIndex = 0;
6691 off64_t tmp = mCurrentMoofOffset;
6692 status_t err = parseChunk(&tmp);
6693 if (err != OK) {
6694 return AMEDIA_ERROR_UNKNOWN;
6695 }
6696 mCurrentTime = 0;
6697 }
6698
6699 if (mBuffer != NULL) {
6700 mBuffer->release();
6701 mBuffer = NULL;
6702 }
6703
6704 // fall through
6705 }
6706
6707 off64_t offset = 0;
6708 size_t size = 0;
6709 int64_t cts = 0;
6710 bool isSyncSample = false;
6711 bool newBuffer = false;
6712 if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6713 newBuffer = true;
6714
6715 if (mBuffer != NULL) {
6716 mBuffer->release();
6717 mBuffer = NULL;
6718 }
6719 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6720 // move to next fragment if there is one
6721 if (mNextMoofOffset <= mCurrentMoofOffset) {
6722 return AMEDIA_ERROR_END_OF_STREAM;
6723 }
6724 off64_t nextMoof = mNextMoofOffset;
6725 mCurrentMoofOffset = nextMoof;
6726 mCurrentSamples.clear();
6727 mCurrentSampleIndex = 0;
6728 status_t err = parseChunk(&nextMoof);
6729 if (err != OK) {
6730 return AMEDIA_ERROR_UNKNOWN;
6731 }
6732 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6733 return AMEDIA_ERROR_END_OF_STREAM;
6734 }
6735 }
6736
6737 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6738 offset = smpl->offset;
6739 size = smpl->size;
6740 cts = (int64_t)mCurrentTime + (int64_t)smpl->compositionOffset;
6741
6742 if (mElstInitialEmptyEditTicks > 0) {
6743 cts += mElstInitialEmptyEditTicks;
6744 }
6745 if (mElstShiftStartTicks > 0) {
6746 // cts can be negative. for example, initial audio samples for gapless playback.
6747 cts -= (int64_t)mElstShiftStartTicks;
6748 }
6749
6750 mCurrentTime += smpl->duration;
6751 isSyncSample = (mCurrentSampleIndex == 0);
6752
6753 status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6754
6755 if (err != OK) {
6756 CHECK(mBuffer == NULL);
6757 ALOGV("acquire_buffer returned %d", err);
6758 return AMEDIA_ERROR_UNKNOWN;
6759 }
6760 if (size > mBuffer->size()) {
6761 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6762 mBuffer->release();
6763 mBuffer = NULL;
6764 return AMEDIA_ERROR_UNKNOWN;
6765 }
6766 }
6767
6768 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6769 AMediaFormat *bufmeta = mBuffer->meta_data();
6770 AMediaFormat_clear(bufmeta);
6771 if (smpl->encryptedsizes.size()) {
6772 // store clear/encrypted lengths in metadata
6773 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6774 smpl->clearsizes.array(), smpl->clearsizes.size() * sizeof(uint32_t));
6775 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6776 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * sizeof(uint32_t));
6777 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6778 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6779 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6780 AMediaFormat_setInt32(bufmeta,
6781 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6782 AMediaFormat_setInt32(bufmeta,
6783 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6784
6785 void *iv = NULL;
6786 size_t ivlength = 0;
6787 if (!AMediaFormat_getBuffer(mFormat,
6788 "crypto-iv", &iv, &ivlength)) {
6789 iv = (void *) smpl->iv;
6790 ivlength = 16; // use 16 or the actual size?
6791 }
6792 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6793 }
6794
6795 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize)) {
6796 if (newBuffer) {
6797 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6798 mBuffer->release();
6799 mBuffer = NULL;
6800
6801 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6802 return AMEDIA_ERROR_MALFORMED;
6803 }
6804
6805 ssize_t num_bytes_read =
6806 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6807
6808 if (num_bytes_read < (ssize_t)size) {
6809 mBuffer->release();
6810 mBuffer = NULL;
6811
6812 ALOGE("i/o error");
6813 return AMEDIA_ERROR_IO;
6814 }
6815
6816 CHECK(mBuffer != NULL);
6817 mBuffer->set_range(0, size);
6818 AMediaFormat_setInt64(bufmeta,
6819 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6820 AMediaFormat_setInt64(bufmeta,
6821 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6822
6823 if (targetSampleTimeUs >= 0) {
6824 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6825 }
6826
6827 if (mIsAVC) {
6828 uint32_t layerId = FindAVCLayerId(
6829 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6830 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6831 } else if (mIsHEVC) {
6832 int32_t layerId = parseHEVCLayerId(
6833 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6834 if (layerId >= 0) {
6835 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6836 }
6837 }
6838
6839 if (isSyncSample) {
6840 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6841 }
6842
6843 ++mCurrentSampleIndex;
6844 }
6845
6846 *out = mBuffer;
6847 mBuffer = NULL;
6848
6849 return AMEDIA_OK;
6850
6851 } else {
6852 ALOGV("whole NAL");
6853 // Whole NAL units are returned but each fragment is prefixed by
6854 // the start code (0x00 00 00 01).
6855 ssize_t num_bytes_read = 0;
6856 void *data = NULL;
6857 bool isMalFormed = false;
6858 int32_t max_size;
6859 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6860 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6861 isMalFormed = true;
6862 } else {
6863 data = mSrcBuffer;
6864 }
6865
6866 if (isMalFormed || data == NULL) {
6867 ALOGE("isMalFormed size %zu", size);
6868 if (mBuffer != NULL) {
6869 mBuffer->release();
6870 mBuffer = NULL;
6871 }
6872 return AMEDIA_ERROR_MALFORMED;
6873 }
6874 num_bytes_read = mDataSource->readAt(offset, data, size);
6875
6876 if (num_bytes_read < (ssize_t)size) {
6877 mBuffer->release();
6878 mBuffer = NULL;
6879
6880 ALOGE("i/o error");
6881 return AMEDIA_ERROR_IO;
6882 }
6883
6884 uint8_t *dstData = (uint8_t *)mBuffer->data();
6885 size_t srcOffset = 0;
6886 size_t dstOffset = 0;
6887
6888 while (srcOffset < size) {
6889 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6890 size_t nalLength = 0;
6891 if (!isMalFormed) {
6892 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6893 srcOffset += mNALLengthSize;
6894 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6895 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6896 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6897 }
6898
6899 if (isMalFormed) {
6900 ALOGE("Video is malformed; nalLength %zu", nalLength);
6901 mBuffer->release();
6902 mBuffer = NULL;
6903 return AMEDIA_ERROR_MALFORMED;
6904 }
6905
6906 if (nalLength == 0) {
6907 continue;
6908 }
6909
6910 if (dstOffset > SIZE_MAX - 4 ||
6911 dstOffset + 4 > SIZE_MAX - nalLength ||
6912 dstOffset + 4 + nalLength > mBuffer->size()) {
6913 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6914 android_errorWriteLog(0x534e4554, "26365349");
6915 mBuffer->release();
6916 mBuffer = NULL;
6917 return AMEDIA_ERROR_MALFORMED;
6918 }
6919
6920 dstData[dstOffset++] = 0;
6921 dstData[dstOffset++] = 0;
6922 dstData[dstOffset++] = 0;
6923 dstData[dstOffset++] = 1;
6924 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6925 srcOffset += nalLength;
6926 dstOffset += nalLength;
6927 }
6928 CHECK_EQ(srcOffset, size);
6929 CHECK(mBuffer != NULL);
6930 mBuffer->set_range(0, dstOffset);
6931
6932 AMediaFormat *bufmeta = mBuffer->meta_data();
6933 AMediaFormat_setInt64(bufmeta,
6934 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6935 AMediaFormat_setInt64(bufmeta,
6936 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6937
6938 if (targetSampleTimeUs >= 0) {
6939 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6940 }
6941
6942 if (isSyncSample) {
6943 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6944 }
6945
6946 ++mCurrentSampleIndex;
6947
6948 *out = mBuffer;
6949 mBuffer = NULL;
6950
6951 return AMEDIA_OK;
6952 }
6953
6954 return AMEDIA_OK;
6955 }
6956
findTrackByMimePrefix(const char * mimePrefix)6957 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6958 const char *mimePrefix) {
6959 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6960 const char *mime;
6961 if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6962 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6963 return track;
6964 }
6965 }
6966
6967 return NULL;
6968 }
6969
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6970 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6971 uint8_t header[8];
6972
6973 ssize_t n = source->readAt(4, header, sizeof(header));
6974 if (n < (ssize_t)sizeof(header)) {
6975 return false;
6976 }
6977
6978 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6979 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6980 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6981 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6982 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6983 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
6984 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
6985 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)
6986 || !memcmp(header, "ftypavif", 8) || !memcmp(header, "ftypavis", 8)) {
6987 *confidence = 0.4;
6988
6989 return true;
6990 }
6991
6992 return false;
6993 }
6994
isCompatibleBrand(uint32_t fourcc)6995 static bool isCompatibleBrand(uint32_t fourcc) {
6996 static const uint32_t kCompatibleBrands[] = {
6997 FOURCC("isom"),
6998 FOURCC("iso2"),
6999 FOURCC("avc1"),
7000 FOURCC("hvc1"),
7001 FOURCC("hev1"),
7002 FOURCC("av01"),
7003 FOURCC("vp09"),
7004 FOURCC("3gp4"),
7005 FOURCC("mp41"),
7006 FOURCC("mp42"),
7007 FOURCC("dash"),
7008 FOURCC("nvr1"),
7009
7010 // Won't promise that the following file types can be played.
7011 // Just give these file types a chance.
7012 FOURCC("qt "), // Apple's QuickTime
7013 FOURCC("MSNV"), // Sony's PSP
7014 FOURCC("wmf "),
7015
7016 FOURCC("3g2a"), // 3GPP2
7017 FOURCC("3g2b"),
7018 FOURCC("mif1"), // HEIF image
7019 FOURCC("heic"), // HEIF image
7020 FOURCC("msf1"), // HEIF image sequence
7021 FOURCC("hevc"), // HEIF image sequence
7022 FOURCC("avif"), // AVIF image
7023 FOURCC("avis"), // AVIF image sequence
7024 };
7025
7026 for (size_t i = 0;
7027 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
7028 ++i) {
7029 if (kCompatibleBrands[i] == fourcc) {
7030 return true;
7031 }
7032 }
7033
7034 return false;
7035 }
7036
7037 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
7038 // compatible brand is present.
7039 // Also try to identify where this file's metadata ends
7040 // (end of the 'moov' atom) and report it to the caller as part of
7041 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)7042 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
7043 // We scan up to 128 bytes to identify this file as an MP4.
7044 static const off64_t kMaxScanOffset = 128ll;
7045
7046 off64_t offset = 0ll;
7047 bool foundGoodFileType = false;
7048 off64_t moovAtomEndOffset = -1ll;
7049 bool done = false;
7050
7051 while (!done && offset < kMaxScanOffset) {
7052 uint32_t hdr[2];
7053 if (source->readAt(offset, hdr, 8) < 8) {
7054 return false;
7055 }
7056
7057 uint64_t chunkSize = ntohl(hdr[0]);
7058 uint32_t chunkType = ntohl(hdr[1]);
7059 off64_t chunkDataOffset = offset + 8;
7060
7061 if (chunkSize == 1) {
7062 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
7063 return false;
7064 }
7065
7066 chunkSize = ntoh64(chunkSize);
7067 chunkDataOffset += 8;
7068
7069 if (chunkSize < 16) {
7070 // The smallest valid chunk is 16 bytes long in this case.
7071 return false;
7072 }
7073 if (chunkSize > INT64_MAX) {
7074 // reject overly large chunk sizes that could
7075 // be interpreted as negative
7076 ALOGE("chunk size too large");
7077 return false;
7078 }
7079
7080 } else if (chunkSize < 8) {
7081 // The smallest valid chunk is 8 bytes long.
7082 return false;
7083 }
7084
7085 // (data_offset - offset) is either 8 or 16
7086 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
7087 if (chunkDataSize < 0) {
7088 ALOGE("b/23540914");
7089 return false;
7090 }
7091
7092 char chunkstring[5];
7093 MakeFourCCString(chunkType, chunkstring);
7094 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
7095 chunkstring, chunkSize, (long long)offset);
7096 switch (chunkType) {
7097 case FOURCC("ftyp"):
7098 {
7099 if (chunkDataSize < 8) {
7100 return false;
7101 }
7102
7103 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
7104 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
7105 if (i == 1) {
7106 // Skip this index, it refers to the minorVersion,
7107 // not a brand.
7108 continue;
7109 }
7110
7111 uint32_t brand;
7112 if (source->readAt(
7113 chunkDataOffset + 4 * i, &brand, 4) < 4) {
7114 return false;
7115 }
7116
7117 brand = ntohl(brand);
7118
7119 if (isCompatibleBrand(brand)) {
7120 foundGoodFileType = true;
7121 break;
7122 }
7123 }
7124
7125 if (!foundGoodFileType) {
7126 return false;
7127 }
7128
7129 break;
7130 }
7131
7132 case FOURCC("moov"):
7133 {
7134 if (__builtin_add_overflow(offset, chunkSize, &moovAtomEndOffset)) {
7135 ALOGE("chunk size + offset would overflow");
7136 return false;
7137 }
7138
7139 done = true;
7140 break;
7141 }
7142
7143 default:
7144 break;
7145 }
7146
7147 if (__builtin_add_overflow(offset, chunkSize, &offset)) {
7148 ALOGE("chunk size + offset would overflow");
7149 return false;
7150 }
7151 }
7152
7153 if (!foundGoodFileType) {
7154 return false;
7155 }
7156
7157 *confidence = 0.4f;
7158
7159 return true;
7160 }
7161
CreateExtractor(CDataSource * source,void *)7162 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
7163 return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
7164 }
7165
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)7166 static CreatorFunc Sniff(
7167 CDataSource *source, float *confidence, void **,
7168 FreeMetaFunc *) {
7169 DataSourceHelper helper(source);
7170 if (BetterSniffMPEG4(&helper, confidence)) {
7171 return CreateExtractor;
7172 }
7173
7174 if (LegacySniffMPEG4(&helper, confidence)) {
7175 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
7176 return CreateExtractor;
7177 }
7178
7179 return NULL;
7180 }
7181
7182 static const char *extensions[] = {
7183 "3g2",
7184 "3ga",
7185 "3gp",
7186 "3gpp",
7187 "3gpp2",
7188 "m4a",
7189 "m4r",
7190 "m4v",
7191 "mov",
7192 "mp4",
7193 "qt",
7194 NULL
7195 };
7196
7197 extern "C" {
7198 // This is the only symbol that needs to be exported
7199 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()7200 ExtractorDef GETEXTRACTORDEF() {
7201 return {
7202 EXTRACTORDEF_VERSION,
7203 UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
7204 2, // version
7205 "MP4 Extractor",
7206 { .v3 = {Sniff, extensions} },
7207 };
7208 }
7209
7210 } // extern "C"
7211
7212 } // namespace android
7213