1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <algorithm>
23 #include <map>
24 #include <memory>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include <log/log.h>
30 #include <utils/Log.h>
31
32 #include "AC4Parser.h"
33 #include "MPEG4Extractor.h"
34 #include "SampleTable.h"
35 #include "ItemTable.h"
36
37 #include <ESDS.h>
38 #include <ID3.h>
39 #include <media/stagefright/DataSourceBase.h>
40 #include <media/ExtractorUtils.h>
41 #include <media/stagefright/foundation/ABitReader.h>
42 #include <media/stagefright/foundation/ABuffer.h>
43 #include <media/stagefright/foundation/ADebug.h>
44 #include <media/stagefright/foundation/AMessage.h>
45 #include <media/stagefright/foundation/AudioPresentationInfo.h>
46 #include <media/stagefright/foundation/AUtils.h>
47 #include <media/stagefright/foundation/ByteUtils.h>
48 #include <media/stagefright/foundation/ColorUtils.h>
49 #include <media/stagefright/foundation/avc_utils.h>
50 #include <media/stagefright/foundation/hexdump.h>
51 #include <media/stagefright/foundation/OpusHeader.h>
52 #include <media/stagefright/MediaBufferGroup.h>
53 #include <media/stagefright/MediaDefs.h>
54 #include <media/stagefright/MetaDataBase.h>
55 #include <utils/String8.h>
56
57 #include <byteswap.h>
58
59 #ifndef UINT32_MAX
60 #define UINT32_MAX (4294967295U)
61 #endif
62
63 #define ALAC_SPECIFIC_INFO_SIZE (36)
64
65 // TODO : Remove the defines once mainline media is built against NDK >= 31.
66 // The mp4 extractor is part of mainline and builds against NDK 29 as of
67 // writing. These keys are available only from NDK 31:
68 #define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
69 "mpegh-profile-level-indication"
70 #define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
71 "mpegh-reference-channel-layout"
72 #define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
73 "mpegh-compatible-sets"
74
75 namespace android {
76
77 enum {
78 // max track header chunk to return
79 kMaxTrackHeaderSize = 32,
80
81 // maximum size of an atom. Some atoms can be bigger according to the spec,
82 // but we only allow up to this size.
83 kMaxAtomSize = 64 * 1024 * 1024,
84 };
85
86 class MPEG4Source : public MediaTrackHelper {
87 static const size_t kMaxPcmFrameSize = 8192;
88 public:
89 // Caller retains ownership of both "dataSource" and "sampleTable".
90 MPEG4Source(AMediaFormat *format,
91 DataSourceHelper *dataSource,
92 int32_t timeScale,
93 const sp<SampleTable> &sampleTable,
94 Vector<SidxEntry> &sidx,
95 const Trex *trex,
96 off64_t firstMoofOffset,
97 const sp<ItemTable> &itemTable,
98 uint64_t elstShiftStartTicks,
99 uint64_t elstInitialEmptyEditTicks);
100 virtual status_t init();
101
102 virtual media_status_t start();
103 virtual media_status_t stop();
104
105 virtual media_status_t getFormat(AMediaFormat *);
106
107 virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()108 bool supportsNonBlockingRead() override { return true; }
109 virtual media_status_t fragmentedRead(
110 MediaBufferHelper **buffer, const ReadOptions *options = NULL);
111
112 virtual ~MPEG4Source();
113
114 private:
115 Mutex mLock;
116
117 AMediaFormat *mFormat;
118 DataSourceHelper *mDataSource;
119 int32_t mTimescale;
120 sp<SampleTable> mSampleTable;
121 uint32_t mCurrentSampleIndex;
122 uint32_t mCurrentFragmentIndex;
123 Vector<SidxEntry> &mSegments;
124 const Trex *mTrex;
125 off64_t mFirstMoofOffset;
126 off64_t mCurrentMoofOffset;
127 off64_t mCurrentMoofSize;
128 off64_t mNextMoofOffset;
129 uint32_t mCurrentTime; // in media timescale ticks
130 int32_t mLastParsedTrackId;
131 int32_t mTrackId;
132
133 int32_t mCryptoMode; // passed in from extractor
134 int32_t mDefaultIVSize; // passed in from extractor
135 uint8_t mCryptoKey[16]; // passed in from extractor
136 int32_t mDefaultEncryptedByteBlock;
137 int32_t mDefaultSkipByteBlock;
138 uint32_t mCurrentAuxInfoType;
139 uint32_t mCurrentAuxInfoTypeParameter;
140 int32_t mCurrentDefaultSampleInfoSize;
141 uint32_t mCurrentSampleInfoCount;
142 uint32_t mCurrentSampleInfoAllocSize;
143 uint8_t* mCurrentSampleInfoSizes;
144 uint32_t mCurrentSampleInfoOffsetCount;
145 uint32_t mCurrentSampleInfoOffsetsAllocSize;
146 uint64_t* mCurrentSampleInfoOffsets;
147
148 bool mIsAVC;
149 bool mIsHEVC;
150 bool mIsDolbyVision;
151 bool mIsAC4;
152 bool mIsMpegH = false;
153 bool mIsPcm;
154 size_t mNALLengthSize;
155
156 bool mStarted;
157
158 MediaBufferHelper *mBuffer;
159
160 uint8_t *mSrcBuffer;
161
162 bool mIsHeif;
163 bool mIsAvif;
164 bool mIsAudio;
165 bool mIsUsac = false;
166 sp<ItemTable> mItemTable;
167
168 /* Shift start offset (move to earlier time) when media_time > 0,
169 * in media time scale.
170 */
171 uint64_t mElstShiftStartTicks;
172 /* Initial start offset (move to later time), empty edit list entry
173 * in media time scale.
174 */
175 uint64_t mElstInitialEmptyEditTicks;
176
177 size_t parseNALSize(const uint8_t *data) const;
178 status_t parseChunk(off64_t *offset);
179 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
180 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
181 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
182 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
183 status_t parseClearEncryptedSizes(off64_t offset, bool isSampleEncryption,
184 uint32_t flags, off64_t size);
185 status_t parseSampleEncryption(off64_t offset, off64_t size);
186 // returns -1 for invalid layer ID
187 int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
188
189 struct TrackFragmentHeaderInfo {
190 enum Flags {
191 kBaseDataOffsetPresent = 0x01,
192 kSampleDescriptionIndexPresent = 0x02,
193 kDefaultSampleDurationPresent = 0x08,
194 kDefaultSampleSizePresent = 0x10,
195 kDefaultSampleFlagsPresent = 0x20,
196 kDurationIsEmpty = 0x10000,
197 };
198
199 uint32_t mTrackID;
200 uint32_t mFlags;
201 uint64_t mBaseDataOffset;
202 uint32_t mSampleDescriptionIndex;
203 uint32_t mDefaultSampleDuration;
204 uint32_t mDefaultSampleSize;
205 uint32_t mDefaultSampleFlags;
206
207 uint64_t mDataOffset;
208 };
209 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
210
211 struct Sample {
212 off64_t offset;
213 size_t size;
214 uint32_t duration;
215 int32_t compositionOffset;
216 uint8_t iv[16];
217 Vector<uint32_t> clearsizes;
218 Vector<uint32_t> encryptedsizes;
219 };
220 Vector<Sample> mCurrentSamples;
221 std::map<off64_t, uint32_t> mDrmOffsets;
222
223 MPEG4Source(const MPEG4Source &);
224 MPEG4Source &operator=(const MPEG4Source &);
225 };
226
227 // This custom data source wraps an existing one and satisfies requests
228 // falling entirely within a cached range from the cache while forwarding
229 // all remaining requests to the wrapped datasource.
230 // This is used to cache the full sampletable metadata for a single track,
231 // possibly wrapping multiple times to cover all tracks, i.e.
232 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
233
234 class CachedRangedDataSource : public DataSourceHelper {
235 public:
236 explicit CachedRangedDataSource(DataSourceHelper *source);
237 virtual ~CachedRangedDataSource();
238
239 ssize_t readAt(off64_t offset, void *data, size_t size) override;
240 status_t getSize(off64_t *size) override;
241 uint32_t flags() override;
242
243 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
244
245
246 private:
247 Mutex mLock;
248
249 DataSourceHelper *mSource;
250 bool mOwnsDataSource;
251 off64_t mCachedOffset;
252 size_t mCachedSize;
253 uint8_t *mCache;
254
255 void clearCache();
256
257 CachedRangedDataSource(const CachedRangedDataSource &);
258 CachedRangedDataSource &operator=(const CachedRangedDataSource &);
259 };
260
CachedRangedDataSource(DataSourceHelper * source)261 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
262 : DataSourceHelper(source),
263 mSource(source),
264 mOwnsDataSource(false),
265 mCachedOffset(0),
266 mCachedSize(0),
267 mCache(NULL) {
268 }
269
~CachedRangedDataSource()270 CachedRangedDataSource::~CachedRangedDataSource() {
271 clearCache();
272 if (mOwnsDataSource) {
273 delete mSource;
274 }
275 }
276
clearCache()277 void CachedRangedDataSource::clearCache() {
278 if (mCache) {
279 free(mCache);
280 mCache = NULL;
281 }
282
283 mCachedOffset = 0;
284 mCachedSize = 0;
285 }
286
readAt(off64_t offset,void * data,size_t size)287 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
288 Mutex::Autolock autoLock(mLock);
289
290 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
291 memcpy(data, &mCache[offset - mCachedOffset], size);
292 return size;
293 }
294
295 return mSource->readAt(offset, data, size);
296 }
297
getSize(off64_t * size)298 status_t CachedRangedDataSource::getSize(off64_t *size) {
299 return mSource->getSize(size);
300 }
301
flags()302 uint32_t CachedRangedDataSource::flags() {
303 return mSource->flags();
304 }
305
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)306 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
307 size_t size,
308 bool assumeSourceOwnershipOnSuccess) {
309 Mutex::Autolock autoLock(mLock);
310
311 clearCache();
312
313 mCache = (uint8_t *)malloc(size);
314
315 if (mCache == NULL) {
316 return -ENOMEM;
317 }
318
319 mCachedOffset = offset;
320 mCachedSize = size;
321
322 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
323
324 if (err < (ssize_t)size) {
325 clearCache();
326
327 return ERROR_IO;
328 }
329 mOwnsDataSource = assumeSourceOwnershipOnSuccess;
330 return OK;
331 }
332
333 ////////////////////////////////////////////////////////////////////////////////
334
335 static const bool kUseHexDump = false;
336
FourCC2MIME(uint32_t fourcc)337 static const char *FourCC2MIME(uint32_t fourcc) {
338 switch (fourcc) {
339 case FOURCC("mp4a"):
340 return MEDIA_MIMETYPE_AUDIO_AAC;
341
342 case FOURCC("samr"):
343 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
344
345 case FOURCC("sawb"):
346 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
347
348 case FOURCC("ec-3"):
349 return MEDIA_MIMETYPE_AUDIO_EAC3;
350
351 case FOURCC("mp4v"):
352 return MEDIA_MIMETYPE_VIDEO_MPEG4;
353
354 case FOURCC("s263"):
355 case FOURCC("h263"):
356 case FOURCC("H263"):
357 return MEDIA_MIMETYPE_VIDEO_H263;
358
359 case FOURCC("avc1"):
360 return MEDIA_MIMETYPE_VIDEO_AVC;
361
362 case FOURCC("hvc1"):
363 case FOURCC("hev1"):
364 return MEDIA_MIMETYPE_VIDEO_HEVC;
365
366 case FOURCC("dvav"):
367 case FOURCC("dva1"):
368 case FOURCC("dvhe"):
369 case FOURCC("dvh1"):
370 case FOURCC("dav1"):
371 return MEDIA_MIMETYPE_VIDEO_DOLBY_VISION;
372
373 case FOURCC("ac-4"):
374 return MEDIA_MIMETYPE_AUDIO_AC4;
375 case FOURCC("Opus"):
376 return MEDIA_MIMETYPE_AUDIO_OPUS;
377
378 case FOURCC("twos"):
379 case FOURCC("sowt"):
380 return MEDIA_MIMETYPE_AUDIO_RAW;
381 case FOURCC("alac"):
382 return MEDIA_MIMETYPE_AUDIO_ALAC;
383 case FOURCC("fLaC"):
384 return MEDIA_MIMETYPE_AUDIO_FLAC;
385 case FOURCC("av01"):
386 return MEDIA_MIMETYPE_VIDEO_AV1;
387 case FOURCC("vp09"):
388 return MEDIA_MIMETYPE_VIDEO_VP9;
389 case FOURCC(".mp3"):
390 case 0x6D730055: // "ms U" mp3 audio
391 return MEDIA_MIMETYPE_AUDIO_MPEG;
392 case FOURCC("mha1"):
393 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1;
394 case FOURCC("mhm1"):
395 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1;
396 default:
397 ALOGW("Unknown fourcc: %c%c%c%c",
398 (fourcc >> 24) & 0xff,
399 (fourcc >> 16) & 0xff,
400 (fourcc >> 8) & 0xff,
401 fourcc & 0xff
402 );
403 return "application/octet-stream";
404 }
405 }
406
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)407 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
408 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
409 // AMR NB audio is always mono, 8kHz
410 *channels = 1;
411 *rate = 8000;
412 return true;
413 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
414 // AMR WB audio is always mono, 16kHz
415 *channels = 1;
416 *rate = 16000;
417 return true;
418 }
419 return false;
420 }
421
MPEG4Extractor(DataSourceHelper * source,const char * mime)422 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
423 : mMoofOffset(0),
424 mMoofFound(false),
425 mMdatFound(false),
426 mDataSource(source),
427 mInitCheck(NO_INIT),
428 mHeaderTimescale(0),
429 mIsQT(false),
430 mIsHeif(false),
431 mHasMoovBox(false),
432 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
433 mIsAvif(false),
434 mFirstTrack(NULL),
435 mLastTrack(NULL) {
436 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
437 mFileMetaData = AMediaFormat_new();
438 }
439
~MPEG4Extractor()440 MPEG4Extractor::~MPEG4Extractor() {
441 Track *track = mFirstTrack;
442 while (track) {
443 Track *next = track->next;
444
445 delete track;
446 track = next;
447 }
448 mFirstTrack = mLastTrack = NULL;
449
450 for (size_t i = 0; i < mPssh.size(); i++) {
451 delete [] mPssh[i].data;
452 }
453 mPssh.clear();
454
455 delete mDataSource;
456 AMediaFormat_delete(mFileMetaData);
457 }
458
flags() const459 uint32_t MPEG4Extractor::flags() const {
460 return CAN_PAUSE |
461 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
462 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
463 }
464
getMetaData(AMediaFormat * meta)465 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
466 status_t err;
467 if ((err = readMetaData()) != OK) {
468 return AMEDIA_ERROR_UNKNOWN;
469 }
470 AMediaFormat_copy(meta, mFileMetaData);
471 return AMEDIA_OK;
472 }
473
countTracks()474 size_t MPEG4Extractor::countTracks() {
475 status_t err;
476 if ((err = readMetaData()) != OK) {
477 ALOGV("MPEG4Extractor::countTracks: no tracks");
478 return 0;
479 }
480
481 size_t n = 0;
482 Track *track = mFirstTrack;
483 while (track) {
484 ++n;
485 track = track->next;
486 }
487
488 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
489 return n;
490 }
491
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)492 media_status_t MPEG4Extractor::getTrackMetaData(
493 AMediaFormat *meta,
494 size_t index, uint32_t flags) {
495 status_t err;
496 if ((err = readMetaData()) != OK) {
497 return AMEDIA_ERROR_UNKNOWN;
498 }
499
500 Track *track = mFirstTrack;
501 while (index > 0) {
502 if (track == NULL) {
503 return AMEDIA_ERROR_UNKNOWN;
504 }
505
506 track = track->next;
507 --index;
508 }
509
510 if (track == NULL) {
511 return AMEDIA_ERROR_UNKNOWN;
512 }
513
514 [=] {
515 int64_t duration;
516 int32_t samplerate;
517 // Only for audio track.
518 if (track->elst_needs_processing && mHeaderTimescale != 0 &&
519 AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
520 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
521 // Elst has to be processed only the first time this function is called.
522 track->elst_needs_processing = false;
523
524 if (track->elst_segment_duration > INT64_MAX) {
525 return;
526 }
527 int64_t segment_duration = track->elst_segment_duration;
528 int64_t media_time = track->elst_media_time;
529 int64_t halfscale = track->timescale / 2;
530
531 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
532 ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
533 segment_duration, media_time,
534 halfscale, mHeaderTimescale, track->timescale);
535
536 if ((uint32_t)samplerate != track->timescale){
537 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
538 samplerate);
539 }
540 // Both delay and paddingsamples have to be set inorder for either to be
541 // effective in the lower layers.
542 int64_t delay = 0;
543 if (media_time > 0) { // Gapless playback
544 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
545 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
546 __builtin_add_overflow(delay, halfscale, &delay) ||
547 (delay /= track->timescale, false) ||
548 delay > INT32_MAX ||
549 delay < INT32_MIN) {
550 ALOGW("ignoring edit list with bogus values");
551 return;
552 }
553 }
554 ALOGV("delay = %" PRId64, delay);
555 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
556
557 int64_t paddingsamples = 0;
558 if (segment_duration > 0) {
559 int64_t scaled_duration;
560 // scaled_duration = duration * mHeaderTimescale;
561 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
562 return;
563 }
564 ALOGV("scaled_duration = %" PRId64, scaled_duration);
565
566 int64_t segment_end;
567 int64_t padding;
568 int64_t segment_duration_e6;
569 int64_t media_time_scaled_e6;
570 int64_t media_time_scaled;
571 // padding = scaled_duration - ((segment_duration * 1000000) +
572 // ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
573 // segment_duration is based on timescale in movie header box(mdhd)
574 // media_time is based on timescale track header/media timescale
575 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
576 __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
577 __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
578 return;
579 }
580 media_time_scaled_e6 /= track->timescale;
581 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
582 || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
583 return;
584 }
585 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
586 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
587 // might be slightly shorter than the segment duration, which would make the
588 // padding negative. Clamp to zero.
589 if (padding > 0) {
590 int64_t halfscale_mht = mHeaderTimescale / 2;
591 int64_t halfscale_e6;
592 int64_t timescale_e6;
593 // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
594 // / (mHeaderTimescale * 1000000);
595 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
596 __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
597 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
598 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
599 (paddingsamples /= timescale_e6, false) ||
600 paddingsamples > INT32_MAX) {
601 return;
602 }
603 }
604 }
605 ALOGV("paddingsamples = %" PRId64, paddingsamples);
606 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
607 }
608 }();
609
610 if ((flags & kIncludeExtensiveMetaData)
611 && !track->includes_expensive_metadata) {
612 track->includes_expensive_metadata = true;
613
614 const char *mime;
615 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
616 if (!strncasecmp("video/", mime, 6)) {
617 // MPEG2 tracks do not provide CSD, so read the stream header
618 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
619 off64_t offset;
620 size_t size;
621 if (track->sampleTable->getMetaDataForSample(
622 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
623 if (size > kMaxTrackHeaderSize) {
624 size = kMaxTrackHeaderSize;
625 }
626 uint8_t header[kMaxTrackHeaderSize];
627 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
628 AMediaFormat_setBuffer(track->meta,
629 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
630 }
631 }
632 }
633
634 if (mMoofOffset > 0) {
635 int64_t duration;
636 if (AMediaFormat_getInt64(track->meta,
637 AMEDIAFORMAT_KEY_DURATION, &duration)) {
638 // nothing fancy, just pick a frame near 1/4th of the duration
639 AMediaFormat_setInt64(track->meta,
640 AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
641 }
642 } else {
643 uint32_t sampleIndex;
644 uint64_t sampleTime;
645 if (track->timescale != 0 &&
646 track->sampleTable->findThumbnailSample(&sampleIndex) == OK
647 && track->sampleTable->getMetaDataForSample(
648 sampleIndex, NULL /* offset */, NULL /* size */,
649 &sampleTime) == OK) {
650 AMediaFormat_setInt64(track->meta,
651 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
652 ((int64_t)sampleTime * 1000000) / track->timescale);
653 }
654 }
655 }
656 }
657
658 return AMediaFormat_copy(meta, track->meta);
659 }
660
readMetaData()661 status_t MPEG4Extractor::readMetaData() {
662 if (mInitCheck != NO_INIT) {
663 return mInitCheck;
664 }
665
666 off64_t offset = 0;
667 status_t err;
668 bool sawMoovOrSidx = false;
669
670 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
671 (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
672 (mItemTable != NULL) && mItemTable->isValid()))) {
673 off64_t orig_offset = offset;
674 err = parseChunk(&offset, 0);
675
676 if (err != OK && err != UNKNOWN_ERROR) {
677 break;
678 } else if (offset <= orig_offset) {
679 // only continue parsing if the offset was advanced,
680 // otherwise we might end up in an infinite loop
681 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
682 err = ERROR_MALFORMED;
683 break;
684 } else if (err == UNKNOWN_ERROR) {
685 sawMoovOrSidx = true;
686 }
687 }
688
689 if ((mIsAvif || mIsHeif) && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
690 off64_t exifOffset;
691 size_t exifSize;
692 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
693 AMediaFormat_setInt64(mFileMetaData,
694 AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
695 AMediaFormat_setInt64(mFileMetaData,
696 AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
697 }
698 off64_t xmpOffset;
699 size_t xmpSize;
700 if (mItemTable->getXmpOffsetAndSize(&xmpOffset, &xmpSize) == OK) {
701 // TODO(chz): b/175717339
702 // Use a hard-coded string here instead of named keys. The keys are available
703 // only on API 31+. The mp4 extractor is part of mainline and has min_sdk_version
704 // of 29. This hard-coded string can be replaced with the named constant once
705 // the mp4 extractor is built against API 31+.
706 AMediaFormat_setInt64(mFileMetaData,
707 "xmp-offset" /*AMEDIAFORMAT_KEY_XMP_OFFSET*/, (int64_t)xmpOffset);
708 AMediaFormat_setInt64(mFileMetaData,
709 "xmp-size" /*AMEDIAFORMAT_KEY_XMP_SIZE*/, (int64_t)xmpSize);
710 }
711 for (uint32_t imageIndex = 0;
712 imageIndex < mItemTable->countImages(); imageIndex++) {
713 AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
714 if (meta == NULL) {
715 ALOGE("heif image %u has no meta!", imageIndex);
716 continue;
717 }
718 // Some heif files advertise image sequence brands (eg. 'hevc') in
719 // ftyp box, but don't have any valid tracks in them. Instead of
720 // reporting the entire file as malformed, we override the error
721 // to allow still images to be extracted.
722 if (err != OK) {
723 ALOGW("Extracting still images only");
724 err = OK;
725 }
726 mInitCheck = OK;
727
728 ALOGV("adding %s image track %u", mIsHeif ? "HEIF" : "AVIF", imageIndex);
729 Track *track = new Track;
730 if (mLastTrack != NULL) {
731 mLastTrack->next = track;
732 } else {
733 mFirstTrack = track;
734 }
735 mLastTrack = track;
736
737 track->meta = meta;
738 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
739 track->timescale = 1000000;
740 }
741 }
742
743 if (mInitCheck == OK) {
744 if (findTrackByMimePrefix("video/") != NULL) {
745 AMediaFormat_setString(mFileMetaData,
746 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
747 } else if (findTrackByMimePrefix("audio/") != NULL) {
748 AMediaFormat_setString(mFileMetaData,
749 AMEDIAFORMAT_KEY_MIME, "audio/mp4");
750 } else if (findTrackByMimePrefix(
751 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
752 AMediaFormat_setString(mFileMetaData,
753 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
754 } else if (findTrackByMimePrefix(
755 MEDIA_MIMETYPE_IMAGE_AVIF) != NULL) {
756 AMediaFormat_setString(mFileMetaData,
757 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_IMAGE_AVIF);
758 } else {
759 AMediaFormat_setString(mFileMetaData,
760 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
761 }
762 } else {
763 mInitCheck = err;
764 }
765
766 CHECK_NE(err, (status_t)NO_INIT);
767
768 // copy pssh data into file metadata
769 uint64_t psshsize = 0;
770 for (size_t i = 0; i < mPssh.size(); i++) {
771 psshsize += 20 + mPssh[i].datalen;
772 }
773 if (psshsize > 0 && psshsize <= UINT32_MAX) {
774 char *buf = (char*)malloc(psshsize);
775 if (!buf) {
776 ALOGE("b/28471206");
777 return NO_MEMORY;
778 }
779 char *ptr = buf;
780 for (size_t i = 0; i < mPssh.size(); i++) {
781 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
782 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
783 ptr += (20 + mPssh[i].datalen);
784 }
785 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
786 free(buf);
787 }
788
789 return mInitCheck;
790 }
791
792 struct PathAdder {
PathAdderandroid::PathAdder793 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
794 : mPath(path) {
795 mPath->push(chunkType);
796 }
797
~PathAdderandroid::PathAdder798 ~PathAdder() {
799 mPath->pop();
800 }
801
802 private:
803 Vector<uint32_t> *mPath;
804
805 PathAdder(const PathAdder &);
806 PathAdder &operator=(const PathAdder &);
807 };
808
underMetaDataPath(const Vector<uint32_t> & path)809 static bool underMetaDataPath(const Vector<uint32_t> &path) {
810 return path.size() >= 5
811 && path[0] == FOURCC("moov")
812 && path[1] == FOURCC("udta")
813 && path[2] == FOURCC("meta")
814 && path[3] == FOURCC("ilst");
815 }
816
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)817 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
818 return path.size() >= 2
819 && path[0] == FOURCC("moov")
820 && path[1] == FOURCC("meta")
821 && (depth == 2
822 || (depth == 3
823 && (path[2] == FOURCC("hdlr")
824 || path[2] == FOURCC("ilst")
825 || path[2] == FOURCC("keys"))));
826 }
827
828 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)829 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
830 // delta between mpeg4 time and unix epoch time
831 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
832 if (time_1904 < INT64_MIN + delta) {
833 return false;
834 }
835 time_t time_1970 = time_1904 - delta;
836
837 char tmp[32];
838 struct tm* tm = gmtime(&time_1970);
839 if (tm != NULL &&
840 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
841 s->setTo(tmp);
842 return true;
843 }
844 return false;
845 }
846
parseChunk(off64_t * offset,int depth)847 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
848 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
849
850 if (*offset < 0) {
851 ALOGE("b/23540914");
852 return ERROR_MALFORMED;
853 }
854 if (depth > 100) {
855 ALOGE("b/27456299");
856 return ERROR_MALFORMED;
857 }
858 uint32_t hdr[2];
859 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
860 return ERROR_IO;
861 }
862 uint64_t chunk_size = ntohl(hdr[0]);
863 int32_t chunk_type = ntohl(hdr[1]);
864 off64_t data_offset = *offset + 8;
865
866 if (chunk_size == 1) {
867 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
868 return ERROR_IO;
869 }
870 chunk_size = ntoh64(chunk_size);
871 data_offset += 8;
872
873 if (chunk_size < 16) {
874 // The smallest valid chunk is 16 bytes long in this case.
875 return ERROR_MALFORMED;
876 }
877 } else if (chunk_size == 0) {
878 if (depth == 0) {
879 // atom extends to end of file
880 off64_t sourceSize;
881 if (mDataSource->getSize(&sourceSize) == OK) {
882 chunk_size = (sourceSize - *offset);
883 } else {
884 // XXX could we just pick a "sufficiently large" value here?
885 ALOGE("atom size is 0, and data source has no size");
886 return ERROR_MALFORMED;
887 }
888 } else {
889 // not allowed for non-toplevel atoms, skip it
890 *offset += 4;
891 return OK;
892 }
893 } else if (chunk_size < 8) {
894 // The smallest valid chunk is 8 bytes long.
895 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
896 return ERROR_MALFORMED;
897 }
898
899 char chunk[5];
900 MakeFourCCString(chunk_type, chunk);
901 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
902
903 if (kUseHexDump) {
904 static const char kWhitespace[] = " ";
905 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
906 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
907
908 char buffer[256];
909 size_t n = chunk_size;
910 if (n > sizeof(buffer)) {
911 n = sizeof(buffer);
912 }
913 if (mDataSource->readAt(*offset, buffer, n)
914 < (ssize_t)n) {
915 return ERROR_IO;
916 }
917
918 hexdump(buffer, n);
919 }
920
921 PathAdder autoAdder(&mPath, chunk_type);
922
923 // (data_offset - *offset) is either 8 or 16
924 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
925 if (chunk_data_size < 0) {
926 ALOGE("b/23540914");
927 return ERROR_MALFORMED;
928 }
929 if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
930 char errMsg[100];
931 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
932 ALOGE("%s (b/28615448)", errMsg);
933 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
934 return ERROR_MALFORMED;
935 }
936
937 if (chunk_type != FOURCC("cprt")
938 && chunk_type != FOURCC("covr")
939 && mPath.size() == 5 && underMetaDataPath(mPath)) {
940 off64_t stop_offset = *offset + chunk_size;
941 *offset = data_offset;
942 while (*offset < stop_offset) {
943 status_t err = parseChunk(offset, depth + 1);
944 if (err != OK) {
945 return err;
946 }
947 }
948
949 if (*offset != stop_offset) {
950 return ERROR_MALFORMED;
951 }
952
953 return OK;
954 }
955
956 switch(chunk_type) {
957 case FOURCC("moov"):
958 case FOURCC("trak"):
959 case FOURCC("mdia"):
960 case FOURCC("minf"):
961 case FOURCC("dinf"):
962 case FOURCC("stbl"):
963 case FOURCC("mvex"):
964 case FOURCC("moof"):
965 case FOURCC("traf"):
966 case FOURCC("mfra"):
967 case FOURCC("udta"):
968 case FOURCC("ilst"):
969 case FOURCC("sinf"):
970 case FOURCC("schi"):
971 case FOURCC("edts"):
972 case FOURCC("wave"):
973 {
974 if (chunk_type == FOURCC("moov") && depth != 0) {
975 ALOGE("moov: depth %d", depth);
976 return ERROR_MALFORMED;
977 }
978
979 if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
980 ALOGE("duplicate moov");
981 return ERROR_MALFORMED;
982 }
983
984 if (chunk_type == FOURCC("moof") && !mMoofFound) {
985 // store the offset of the first segment
986 mMoofFound = true;
987 mMoofOffset = *offset;
988 }
989
990 if (chunk_type == FOURCC("stbl")) {
991 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
992
993 if (mDataSource->flags()
994 & (DataSourceBase::kWantsPrefetching
995 | DataSourceBase::kIsCachingDataSource)) {
996 CachedRangedDataSource *cachedSource =
997 new CachedRangedDataSource(mDataSource);
998
999 if (cachedSource->setCachedRange(
1000 *offset, chunk_size,
1001 true /* assume ownership on success */) == OK) {
1002 mDataSource = cachedSource;
1003 } else {
1004 delete cachedSource;
1005 }
1006 }
1007
1008 if (mLastTrack == NULL) {
1009 return ERROR_MALFORMED;
1010 }
1011
1012 mLastTrack->sampleTable = new SampleTable(mDataSource);
1013 }
1014
1015 bool isTrack = false;
1016 if (chunk_type == FOURCC("trak")) {
1017 if (depth != 1) {
1018 ALOGE("trak: depth %d", depth);
1019 return ERROR_MALFORMED;
1020 }
1021 isTrack = true;
1022
1023 ALOGV("adding new track");
1024 Track *track = new Track;
1025 if (mLastTrack) {
1026 mLastTrack->next = track;
1027 } else {
1028 mFirstTrack = track;
1029 }
1030 mLastTrack = track;
1031
1032 track->meta = AMediaFormat_new();
1033 AMediaFormat_setString(track->meta,
1034 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
1035 }
1036
1037 off64_t stop_offset = *offset + chunk_size;
1038 *offset = data_offset;
1039 while (*offset < stop_offset) {
1040
1041 // pass udata terminate
1042 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
1043 // handle the case that udta terminates with terminate code x00000000
1044 // note that 0 terminator is optional and we just handle this case.
1045 uint32_t terminate_code = 1;
1046 mDataSource->readAt(*offset, &terminate_code, 4);
1047 if (0 == terminate_code) {
1048 *offset += 4;
1049 ALOGD("Terminal code for udta");
1050 continue;
1051 } else {
1052 ALOGW("invalid udta Terminal code");
1053 }
1054 }
1055
1056 status_t err = parseChunk(offset, depth + 1);
1057 if (err != OK) {
1058 if (isTrack) {
1059 mLastTrack->skipTrack = true;
1060 break;
1061 }
1062 return err;
1063 }
1064 }
1065
1066 if (*offset != stop_offset) {
1067 return ERROR_MALFORMED;
1068 }
1069
1070 if (isTrack) {
1071 int32_t trackId;
1072 // There must be exactly one track header per track.
1073
1074 if (!AMediaFormat_getInt32(mLastTrack->meta,
1075 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1076 mLastTrack->skipTrack = true;
1077 }
1078
1079 status_t err = verifyTrack(mLastTrack);
1080 if (err != OK) {
1081 mLastTrack->skipTrack = true;
1082 }
1083
1084
1085 if (mLastTrack->skipTrack) {
1086 ALOGV("skipping this track...");
1087 Track *cur = mFirstTrack;
1088
1089 if (cur == mLastTrack) {
1090 delete cur;
1091 mFirstTrack = mLastTrack = NULL;
1092 } else {
1093 while (cur && cur->next != mLastTrack) {
1094 cur = cur->next;
1095 }
1096 if (cur) {
1097 cur->next = NULL;
1098 }
1099 delete mLastTrack;
1100 mLastTrack = cur;
1101 }
1102
1103 return OK;
1104 }
1105
1106 // place things we built elsewhere into their final locations
1107
1108 // put aggregated tx3g data into the metadata
1109 if (mLastTrack->mTx3gFilled > 0) {
1110 ALOGV("Putting %zu bytes of tx3g data into meta data",
1111 mLastTrack->mTx3gFilled);
1112 AMediaFormat_setBuffer(mLastTrack->meta,
1113 AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1114 mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1115 // drop it now to reduce our footprint
1116 free(mLastTrack->mTx3gBuffer);
1117 mLastTrack->mTx3gBuffer = NULL;
1118 mLastTrack->mTx3gFilled = 0;
1119 mLastTrack->mTx3gSize = 0;
1120 }
1121
1122 const char *mime;
1123 AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime);
1124
1125 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
1126 void *data;
1127 size_t size;
1128
1129 if (AMediaFormat_getBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
1130 &data, &size)
1131 && size >= 5) {
1132 const uint8_t *ptr = (const uint8_t *)data;
1133 const uint8_t profile = ptr[2] >> 1;
1134 const uint8_t bl_compatibility_id = (ptr[4]) >> 4;
1135 bool create_two_tracks = false;
1136
1137 if (bl_compatibility_id && bl_compatibility_id != 15) {
1138 create_two_tracks = true;
1139 }
1140
1141 if (4 == profile || 7 == profile ||
1142 (profile >= 8 && profile < 11 && create_two_tracks)) {
1143 // we need a backward compatible track
1144 ALOGV("Adding new backward compatible track");
1145 Track *track_b = new Track;
1146
1147 track_b->timescale = mLastTrack->timescale;
1148 track_b->sampleTable = mLastTrack->sampleTable;
1149 track_b->includes_expensive_metadata = mLastTrack->includes_expensive_metadata;
1150 track_b->skipTrack = mLastTrack->skipTrack;
1151 track_b->elst_needs_processing = mLastTrack->elst_needs_processing;
1152 track_b->elst_media_time = mLastTrack->elst_media_time;
1153 track_b->elst_segment_duration = mLastTrack->elst_segment_duration;
1154 track_b->elst_shift_start_ticks = mLastTrack->elst_shift_start_ticks;
1155 track_b->elst_initial_empty_edit_ticks = mLastTrack->elst_initial_empty_edit_ticks;
1156 track_b->subsample_encryption = mLastTrack->subsample_encryption;
1157
1158 track_b->mTx3gBuffer = mLastTrack->mTx3gBuffer;
1159 track_b->mTx3gSize = mLastTrack->mTx3gSize;
1160 track_b->mTx3gFilled = mLastTrack->mTx3gFilled;
1161
1162 track_b->meta = AMediaFormat_new();
1163 AMediaFormat_copy(track_b->meta, mLastTrack->meta);
1164
1165 mLastTrack->next = track_b;
1166 track_b->next = NULL;
1167
1168 // we want to remove the csd-2 key from the metadata, but
1169 // don't have an AMediaFormat_* function to do so. Settle
1170 // for replacing this csd-2 with an empty csd-2.
1171 uint8_t emptybuffer[8] = {};
1172 AMediaFormat_setBuffer(track_b->meta, AMEDIAFORMAT_KEY_CSD_2,
1173 emptybuffer, 0);
1174
1175 if (4 == profile || 7 == profile || 8 == profile ) {
1176 AMediaFormat_setString(track_b->meta,
1177 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_HEVC);
1178 } else if (9 == profile) {
1179 AMediaFormat_setString(track_b->meta,
1180 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
1181 } else if (10 == profile) {
1182 AMediaFormat_setString(track_b->meta,
1183 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AV1);
1184 } // Should never get to else part
1185
1186 mLastTrack = track_b;
1187 }
1188 }
1189 }
1190 } else if (chunk_type == FOURCC("moov")) {
1191 mInitCheck = OK;
1192
1193 return UNKNOWN_ERROR; // Return a generic error.
1194 }
1195 break;
1196 }
1197
1198 case FOURCC("schm"):
1199 {
1200
1201 *offset += chunk_size;
1202 if (!mLastTrack) {
1203 return ERROR_MALFORMED;
1204 }
1205
1206 uint32_t scheme_type;
1207 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1208 return ERROR_IO;
1209 }
1210 scheme_type = ntohl(scheme_type);
1211 int32_t mode = kCryptoModeUnencrypted;
1212 switch(scheme_type) {
1213 case FOURCC("cbc1"):
1214 {
1215 mode = kCryptoModeAesCbc;
1216 break;
1217 }
1218 case FOURCC("cbcs"):
1219 {
1220 mode = kCryptoModeAesCbc;
1221 mLastTrack->subsample_encryption = true;
1222 break;
1223 }
1224 case FOURCC("cenc"):
1225 {
1226 mode = kCryptoModeAesCtr;
1227 break;
1228 }
1229 case FOURCC("cens"):
1230 {
1231 mode = kCryptoModeAesCtr;
1232 mLastTrack->subsample_encryption = true;
1233 break;
1234 }
1235 }
1236 if (mode != kCryptoModeUnencrypted) {
1237 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1238 }
1239 break;
1240 }
1241
1242
1243 case FOURCC("elst"):
1244 {
1245 *offset += chunk_size;
1246
1247 if (!mLastTrack) {
1248 return ERROR_MALFORMED;
1249 }
1250
1251 // See 14496-12 8.6.6
1252 uint8_t version;
1253 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1254 return ERROR_IO;
1255 }
1256
1257 uint32_t entry_count;
1258 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1259 return ERROR_IO;
1260 }
1261
1262 if (entry_count > 2) {
1263 /* We support a single entry for gapless playback or negating offset for
1264 * reordering B frames, two entries (empty edit) for start offset at the moment.
1265 */
1266 ALOGW("ignoring edit list with %d entries", entry_count);
1267 } else {
1268 off64_t entriesoffset = data_offset + 8;
1269 uint64_t segment_duration;
1270 int64_t media_time;
1271 bool empty_edit_present = false;
1272 for (int i = 0; i < entry_count; ++i) {
1273 switch (version) {
1274 case 0: {
1275 uint32_t sd;
1276 int32_t mt;
1277 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1278 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1279 return ERROR_IO;
1280 }
1281 segment_duration = sd;
1282 media_time = mt;
1283 // 4(segment duration) + 4(media time) + 4(media rate)
1284 entriesoffset += 12;
1285 break;
1286 }
1287 case 1: {
1288 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1289 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1290 return ERROR_IO;
1291 }
1292 // 8(segment duration) + 8(media time) + 4(media rate)
1293 entriesoffset += 20;
1294 break;
1295 }
1296 default:
1297 return ERROR_IO;
1298 break;
1299 }
1300 // Empty edit entry would have to be first entry.
1301 if (media_time == -1 && i == 0) {
1302 empty_edit_present = true;
1303 ALOGV("initial empty edit ticks: %" PRIu64, segment_duration);
1304 /* In movie header timescale, and needs to be converted to media timescale
1305 * after we get that from a track's 'mdhd' atom,
1306 * which at times come after 'elst'.
1307 */
1308 mLastTrack->elst_initial_empty_edit_ticks = segment_duration;
1309 } else if (media_time >= 0 && i == 0) {
1310 ALOGV("first edit list entry - from gapless playback files");
1311 mLastTrack->elst_media_time = media_time;
1312 mLastTrack->elst_segment_duration = segment_duration;
1313 ALOGV("segment_duration: %" PRIu64 " media_time: %" PRId64,
1314 segment_duration, media_time);
1315 // media_time is in media timescale as are STTS/CTTS entries.
1316 mLastTrack->elst_shift_start_ticks = media_time;
1317 } else if (empty_edit_present && i == 1) {
1318 // Process second entry only when the first entry was an empty edit entry.
1319 ALOGV("second edit list entry");
1320 mLastTrack->elst_shift_start_ticks = media_time;
1321 } else {
1322 ALOGW("for now, unsupported entry in edit list %" PRIu32, entry_count);
1323 }
1324 }
1325 // save these for later, because the elst atom might precede
1326 // the atoms that actually gives us the duration and sample rate
1327 // needed to calculate the padding and delay values
1328 mLastTrack->elst_needs_processing = true;
1329 }
1330 break;
1331 }
1332
1333 case FOURCC("frma"):
1334 {
1335 *offset += chunk_size;
1336
1337 uint32_t original_fourcc;
1338 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1339 return ERROR_IO;
1340 }
1341 original_fourcc = ntohl(original_fourcc);
1342 ALOGV("read original format: %d", original_fourcc);
1343
1344 if (mLastTrack == NULL) {
1345 return ERROR_MALFORMED;
1346 }
1347
1348 AMediaFormat_setString(mLastTrack->meta,
1349 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1350 uint32_t num_channels = 0;
1351 uint32_t sample_rate = 0;
1352 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1353 AMediaFormat_setInt32(mLastTrack->meta,
1354 AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1355 AMediaFormat_setInt32(mLastTrack->meta,
1356 AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1357 }
1358
1359 if (!mIsQT && original_fourcc == FOURCC("alac")) {
1360 off64_t tmpOffset = *offset;
1361 status_t err = parseALACSampleEntry(&tmpOffset);
1362 if (err != OK) {
1363 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1364 return err;
1365 }
1366 *offset = tmpOffset + 8;
1367 }
1368
1369 break;
1370 }
1371
1372 case FOURCC("tenc"):
1373 {
1374 *offset += chunk_size;
1375
1376 if (chunk_size < 32) {
1377 return ERROR_MALFORMED;
1378 }
1379
1380 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1381 // default IV size, 16 bytes default KeyID
1382 // (ISO 23001-7)
1383
1384 uint8_t version;
1385 if (mDataSource->readAt(data_offset, &version, sizeof(version))
1386 < (ssize_t)sizeof(version)) {
1387 return ERROR_IO;
1388 }
1389
1390 uint8_t buf[4];
1391 memset(buf, 0, 4);
1392 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1393 return ERROR_IO;
1394 }
1395
1396 if (mLastTrack == NULL) {
1397 return ERROR_MALFORMED;
1398 }
1399
1400 uint8_t defaultEncryptedByteBlock = 0;
1401 uint8_t defaultSkipByteBlock = 0;
1402 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1403 if (version == 1) {
1404 uint32_t pattern = buf[2];
1405 defaultEncryptedByteBlock = pattern >> 4;
1406 defaultSkipByteBlock = pattern & 0xf;
1407 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1408 // use (1,0) to mean "encrypt everything"
1409 defaultEncryptedByteBlock = 1;
1410 }
1411 } else if (mLastTrack->subsample_encryption) {
1412 ALOGW("subsample_encryption should be version 1");
1413 } else if (defaultAlgorithmId > 1) {
1414 // only 0 (clear) and 1 (AES-128) are valid
1415 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1416 defaultAlgorithmId = 1;
1417 }
1418
1419 memset(buf, 0, 4);
1420 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1421 return ERROR_IO;
1422 }
1423 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1424
1425 if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1426 // only unencrypted data must have 0 IV size
1427 return ERROR_MALFORMED;
1428 } else if (defaultIVSize != 0 &&
1429 defaultIVSize != 8 &&
1430 defaultIVSize != 16) {
1431 return ERROR_MALFORMED;
1432 }
1433
1434 uint8_t defaultKeyId[16];
1435
1436 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1437 return ERROR_IO;
1438 }
1439
1440 sp<ABuffer> defaultConstantIv;
1441 if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1442
1443 uint8_t ivlength;
1444 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1445 < (ssize_t)sizeof(ivlength)) {
1446 return ERROR_IO;
1447 }
1448
1449 if (ivlength != 8 && ivlength != 16) {
1450 ALOGW("unsupported IV length: %u", ivlength);
1451 return ERROR_MALFORMED;
1452 }
1453
1454 defaultConstantIv = new ABuffer(ivlength);
1455 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1456 < (ssize_t)ivlength) {
1457 return ERROR_IO;
1458 }
1459
1460 defaultConstantIv->setRange(0, ivlength);
1461 }
1462
1463 int32_t tmpAlgorithmId;
1464 if (!AMediaFormat_getInt32(mLastTrack->meta,
1465 AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1466 AMediaFormat_setInt32(mLastTrack->meta,
1467 AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1468 }
1469
1470 AMediaFormat_setInt32(mLastTrack->meta,
1471 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1472 AMediaFormat_setBuffer(mLastTrack->meta,
1473 AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1474 AMediaFormat_setInt32(mLastTrack->meta,
1475 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1476 AMediaFormat_setInt32(mLastTrack->meta,
1477 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1478 if (defaultConstantIv != NULL) {
1479 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1480 defaultConstantIv->data(), defaultConstantIv->size());
1481 }
1482 break;
1483 }
1484
1485 case FOURCC("tkhd"):
1486 {
1487 *offset += chunk_size;
1488
1489 status_t err;
1490 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1491 return err;
1492 }
1493
1494 break;
1495 }
1496
1497 case FOURCC("tref"):
1498 {
1499 off64_t stop_offset = *offset + chunk_size;
1500 *offset = data_offset;
1501 while (*offset < stop_offset) {
1502 status_t err = parseChunk(offset, depth + 1);
1503 if (err != OK) {
1504 return err;
1505 }
1506 }
1507 if (*offset != stop_offset) {
1508 return ERROR_MALFORMED;
1509 }
1510 break;
1511 }
1512
1513 case FOURCC("thmb"):
1514 {
1515 *offset += chunk_size;
1516
1517 if (mLastTrack != NULL) {
1518 // Skip thumbnail track for now since we don't have an
1519 // API to retrieve it yet.
1520 // The thumbnail track can't be accessed by negative index or time,
1521 // because each timed sample has its own corresponding thumbnail
1522 // in the thumbnail track. We'll need a dedicated API to retrieve
1523 // thumbnail at time instead.
1524 mLastTrack->skipTrack = true;
1525 }
1526
1527 break;
1528 }
1529
1530 case FOURCC("pssh"):
1531 {
1532 *offset += chunk_size;
1533
1534 PsshInfo pssh;
1535
1536 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1537 return ERROR_IO;
1538 }
1539
1540 uint32_t psshdatalen = 0;
1541 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1542 return ERROR_IO;
1543 }
1544 pssh.datalen = ntohl(psshdatalen);
1545 ALOGV("pssh data size: %d", pssh.datalen);
1546 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1547 // pssh data length exceeds size of containing box
1548 return ERROR_MALFORMED;
1549 }
1550
1551 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1552 if (pssh.data == NULL) {
1553 return ERROR_MALFORMED;
1554 }
1555 ALOGV("allocated pssh @ %p", pssh.data);
1556 ssize_t requested = (ssize_t) pssh.datalen;
1557 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1558 delete[] pssh.data;
1559 return ERROR_IO;
1560 }
1561 mPssh.push_back(pssh);
1562
1563 break;
1564 }
1565
1566 case FOURCC("mdhd"):
1567 {
1568 *offset += chunk_size;
1569
1570 if (chunk_data_size < 4 || mLastTrack == NULL) {
1571 return ERROR_MALFORMED;
1572 }
1573
1574 uint8_t version;
1575 if (mDataSource->readAt(
1576 data_offset, &version, sizeof(version))
1577 < (ssize_t)sizeof(version)) {
1578 return ERROR_IO;
1579 }
1580
1581 off64_t timescale_offset;
1582
1583 if (version == 1) {
1584 timescale_offset = data_offset + 4 + 16;
1585 } else if (version == 0) {
1586 timescale_offset = data_offset + 4 + 8;
1587 } else {
1588 return ERROR_IO;
1589 }
1590
1591 uint32_t timescale;
1592 if (mDataSource->readAt(
1593 timescale_offset, ×cale, sizeof(timescale))
1594 < (ssize_t)sizeof(timescale)) {
1595 return ERROR_IO;
1596 }
1597
1598 if (!timescale) {
1599 ALOGE("timescale should not be ZERO.");
1600 return ERROR_MALFORMED;
1601 }
1602
1603 mLastTrack->timescale = ntohl(timescale);
1604
1605 // 14496-12 says all ones means indeterminate, but some files seem to use
1606 // 0 instead. We treat both the same.
1607 int64_t duration = 0;
1608 if (version == 1) {
1609 if (mDataSource->readAt(
1610 timescale_offset + 4, &duration, sizeof(duration))
1611 < (ssize_t)sizeof(duration)) {
1612 return ERROR_IO;
1613 }
1614 if (duration != -1) {
1615 duration = ntoh64(duration);
1616 }
1617 } else {
1618 uint32_t duration32;
1619 if (mDataSource->readAt(
1620 timescale_offset + 4, &duration32, sizeof(duration32))
1621 < (ssize_t)sizeof(duration32)) {
1622 return ERROR_IO;
1623 }
1624 if (duration32 != 0xffffffff) {
1625 duration = ntohl(duration32);
1626 }
1627 }
1628 if (duration != 0 && mLastTrack->timescale != 0) {
1629 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1630 if (durationUs < 0 || durationUs > INT64_MAX) {
1631 ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1632 (long long) duration, (long long) mLastTrack->timescale);
1633 return ERROR_MALFORMED;
1634 }
1635 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1636 }
1637
1638 uint8_t lang[2];
1639 off64_t lang_offset;
1640 if (version == 1) {
1641 lang_offset = timescale_offset + 4 + 8;
1642 } else if (version == 0) {
1643 lang_offset = timescale_offset + 4 + 4;
1644 } else {
1645 return ERROR_IO;
1646 }
1647
1648 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1649 < (ssize_t)sizeof(lang)) {
1650 return ERROR_IO;
1651 }
1652
1653 // To get the ISO-639-2/T three character language code
1654 // 1 bit pad followed by 3 5-bits characters. Each character
1655 // is packed as the difference between its ASCII value and 0x60.
1656 char lang_code[4];
1657 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1658 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1659 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1660 lang_code[3] = '\0';
1661
1662 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1663
1664 break;
1665 }
1666
1667 case FOURCC("stsd"):
1668 {
1669 uint8_t buffer[8];
1670 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1671 return ERROR_MALFORMED;
1672 }
1673
1674 if (mDataSource->readAt(
1675 data_offset, buffer, 8) < 8) {
1676 return ERROR_IO;
1677 }
1678
1679 if (U32_AT(buffer) != 0) {
1680 // Should be version 0, flags 0.
1681 return ERROR_MALFORMED;
1682 }
1683
1684 uint32_t entry_count = U32_AT(&buffer[4]);
1685
1686 if (entry_count > 1) {
1687 // For 3GPP timed text, there could be multiple tx3g boxes contain
1688 // multiple text display formats. These formats will be used to
1689 // display the timed text.
1690 // For encrypted files, there may also be more than one entry.
1691 const char *mime;
1692
1693 if (mLastTrack == NULL)
1694 return ERROR_MALFORMED;
1695
1696 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1697 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1698 strcasecmp(mime, "application/octet-stream")) {
1699 // For now we only support a single type of media per track.
1700 mLastTrack->skipTrack = true;
1701 *offset += chunk_size;
1702 break;
1703 }
1704 }
1705 off64_t stop_offset = *offset + chunk_size;
1706 *offset = data_offset + 8;
1707 for (uint32_t i = 0; i < entry_count; ++i) {
1708 status_t err = parseChunk(offset, depth + 1);
1709 if (err != OK) {
1710 return err;
1711 }
1712 }
1713
1714 if (*offset != stop_offset) {
1715 return ERROR_MALFORMED;
1716 }
1717 break;
1718 }
1719 case FOURCC("mett"):
1720 {
1721 *offset += chunk_size;
1722
1723 // the absolute minimum size of a compliant mett box is 11 bytes:
1724 // 6 byte reserved, 2 byte index, null byte, one char mime_format, null byte
1725 // The resulting mime_format would be invalid at that size though.
1726 if (mLastTrack == NULL || chunk_data_size < 11) {
1727 return ERROR_MALFORMED;
1728 }
1729
1730 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1731 if (buffer.get() == NULL) {
1732 return NO_MEMORY;
1733 }
1734
1735 if (mDataSource->readAt(
1736 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1737 return ERROR_IO;
1738 }
1739
1740 // ISO-14496-12:
1741 // int8 reserved[6]; // should be all zeroes
1742 // int16_t data_reference_index;
1743 // char content_encoding[]; // null terminated, optional (= just the null byte)
1744 // char mime_format[]; // null terminated, mandatory
1745 // optional other boxes
1746 //
1747 // API < 29:
1748 // char mime_format[]; // null terminated
1749 //
1750 // API >= 29
1751 // char mime_format[]; // null terminated
1752 // char mime_format[]; // null terminated
1753
1754 // Prior to API 29, the metadata track was not compliant with ISO/IEC
1755 // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1756 // metatrack. As of API 29 and onwards, a change was made to metadata track to
1757 // make it somewhat compatible with the standard. The workaround is to write the
1758 // null-terminated mime_format string twice. This allows compliant parsers to
1759 // read the missing reserved, data_reference_index, and content_encoding fields
1760 // from the first mime_type string. The actual mime_format field would then be
1761 // read correctly from the second string. The non-compliant Android frameworks
1762 // from API 28 and earlier would still be able to read the mime_format correctly
1763 // as it would only read the first null-terminated mime_format string. To enable
1764 // reading metadata tracks generated from both the non-compliant and compliant
1765 // formats, a check needs to be done to see which format is used.
1766 const char *str = (const char*) buffer.get();
1767 size_t string_length = strnlen(str, chunk_data_size);
1768
1769 if (string_length == chunk_data_size - 1) {
1770 // This is likely a pre API 29 file, since it's a single null terminated
1771 // string filling the entire box.
1772 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, str);
1773 } else {
1774 // This might be a fully compliant metadata track, a "double mime" compatibility
1775 // track, or anything else, including a single non-terminated string, so we need
1776 // to determine the length of each string we want to parse out of the box.
1777 size_t encoding_length = strnlen(str + 8, chunk_data_size - 8);
1778 if (encoding_length + 8 >= chunk_data_size - 2) {
1779 // the encoding extends to the end of the box, so there's no mime_format
1780 return ERROR_MALFORMED;
1781 }
1782 String8 contentEncoding(str + 8, encoding_length);
1783 String8 mimeFormat(str + 8 + encoding_length + 1,
1784 chunk_data_size - 8 - encoding_length - 1);
1785 AMediaFormat_setString(mLastTrack->meta,
1786 AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1787 }
1788 break;
1789 }
1790
1791 case FOURCC("mp4a"):
1792 case FOURCC("enca"):
1793 case FOURCC("samr"):
1794 case FOURCC("sawb"):
1795 case FOURCC("Opus"):
1796 case FOURCC("twos"):
1797 case FOURCC("sowt"):
1798 case FOURCC("alac"):
1799 case FOURCC("fLaC"):
1800 case FOURCC(".mp3"):
1801 case 0x6D730055: // "ms U" mp3 audio
1802 case FOURCC("mha1"):
1803 case FOURCC("mhm1"):
1804 {
1805 if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1806
1807 if (chunk_type == FOURCC("alac")) {
1808 off64_t offsetTmp = *offset;
1809 status_t err = parseALACSampleEntry(&offsetTmp);
1810 if (err != OK) {
1811 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1812 return err;
1813 }
1814 }
1815
1816 // Ignore all atoms embedded in QT wave atom
1817 ALOGV("Ignore all atoms embedded in QT wave atom");
1818 *offset += chunk_size;
1819 break;
1820 }
1821
1822 uint8_t buffer[8 + 20];
1823 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1824 // Basic AudioSampleEntry size.
1825 return ERROR_MALFORMED;
1826 }
1827
1828 if (mDataSource->readAt(
1829 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1830 return ERROR_IO;
1831 }
1832
1833 // we can get data_ref_index value from U16_AT(&buffer[6])
1834 uint16_t version = U16_AT(&buffer[8]);
1835 uint32_t num_channels = U16_AT(&buffer[16]);
1836
1837 uint16_t sample_size = U16_AT(&buffer[18]);
1838 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1839
1840 if (mLastTrack == NULL)
1841 return ERROR_MALFORMED;
1842
1843 off64_t stop_offset = *offset + chunk_size;
1844 *offset = data_offset + sizeof(buffer);
1845
1846 if (mIsQT) {
1847 if (version == 1) {
1848 if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1849 return ERROR_IO;
1850 }
1851
1852 #if 0
1853 U32_AT(buffer); // samples per packet
1854 U32_AT(&buffer[4]); // bytes per packet
1855 U32_AT(&buffer[8]); // bytes per frame
1856 U32_AT(&buffer[12]); // bytes per sample
1857 #endif
1858 *offset += 16;
1859 } else if (version == 2) {
1860 uint8_t v2buffer[36];
1861 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1862 return ERROR_IO;
1863 }
1864
1865 #if 0
1866 U32_AT(v2buffer); // size of struct only
1867 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
1868 num_channels = U32_AT(&v2buffer[12]); // num audio channels
1869 U32_AT(&v2buffer[16]); // always 0x7f000000
1870 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
1871 U32_AT(&v2buffer[24]); // format specifc flags
1872 U32_AT(&v2buffer[28]); // const bytes per audio packet
1873 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
1874 #endif
1875 *offset += 36;
1876 }
1877 }
1878
1879 if (chunk_type != FOURCC("enca")) {
1880 // if the chunk type is enca, we'll get the type from the frma box later
1881 AMediaFormat_setString(mLastTrack->meta,
1882 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1883 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1884
1885 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1886 AMediaFormat_setInt32(mLastTrack->meta,
1887 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1888 if (chunk_type == FOURCC("twos")) {
1889 AMediaFormat_setInt32(mLastTrack->meta,
1890 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1891 }
1892 }
1893 }
1894 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1895 chunk, num_channels, sample_size, sample_rate);
1896 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1897 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1898
1899 if (chunk_type == FOURCC("Opus")) {
1900 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1901 data_offset += sizeof(buffer);
1902 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1903
1904 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1905 opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1906 return ERROR_MALFORMED;
1907 }
1908 // Read Opus Header
1909 if (mDataSource->readAt(
1910 data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1911 return ERROR_IO;
1912 }
1913
1914 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1915 // http://wiki.xiph.org/OggOpus#ID_Header
1916 strncpy((char *)opusInfo, "OpusHead", 8);
1917
1918 // Version shall be 0 as per mp4 Opus Specific Box
1919 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1920 if (opusInfo[8]) {
1921 return ERROR_MALFORMED;
1922 }
1923 // Force version to 1 as per OpusHead definition
1924 // (http://wiki.xiph.org/OggOpus#ID_Header)
1925 opusInfo[8] = 1;
1926
1927 // Read Opus Specific Box values
1928 size_t opusOffset = 10;
1929 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1930 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1931 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1932
1933 // Convert Opus Specific Box values. ParseOpusHeader expects
1934 // the values in LE, however MP4 stores these values as BE
1935 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1936 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1937 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1938 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1939
1940 static const int64_t kSeekPreRollNs = 80000000; // Fixed 80 msec
1941 static const int32_t kOpusSampleRate = 48000;
1942 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1943
1944 AMediaFormat_setBuffer(mLastTrack->meta,
1945 AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1946 AMediaFormat_setBuffer(mLastTrack->meta,
1947 AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1948 AMediaFormat_setBuffer(mLastTrack->meta,
1949 AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1950
1951 data_offset += opusInfoSize;
1952 *offset = data_offset;
1953 CHECK_EQ(*offset, stop_offset);
1954 }
1955
1956 if (!mIsQT && chunk_type == FOURCC("alac")) {
1957 data_offset += sizeof(buffer);
1958
1959 status_t err = parseALACSampleEntry(&data_offset);
1960 if (err != OK) {
1961 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1962 return err;
1963 }
1964 *offset = data_offset;
1965 CHECK_EQ(*offset, stop_offset);
1966 }
1967
1968 if (chunk_type == FOURCC("fLaC")) {
1969
1970 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
1971 // 4 for mime, 4 for blockType and BlockLen, 34 for metadata
1972 uint8_t flacInfo[4 + 4 + 34];
1973 // skipping dFla, version
1974 data_offset += sizeof(buffer) + 12;
1975 size_t flacOffset = 4;
1976 // Add flaC header mime type to CSD
1977 strncpy((char *)flacInfo, "fLaC", 4);
1978 if (mDataSource->readAt(
1979 data_offset, flacInfo + flacOffset, sizeof(flacInfo) - flacOffset) <
1980 (ssize_t)sizeof(flacInfo) - flacOffset) {
1981 return ERROR_IO;
1982 }
1983 data_offset += sizeof(flacInfo) - flacOffset;
1984
1985 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
1986 sizeof(flacInfo));
1987 *offset = data_offset;
1988 CHECK_EQ(*offset, stop_offset);
1989 }
1990
1991 while (*offset < stop_offset) {
1992 status_t err = parseChunk(offset, depth + 1);
1993 if (err != OK) {
1994 return err;
1995 }
1996 }
1997
1998 if (*offset != stop_offset) {
1999 return ERROR_MALFORMED;
2000 }
2001 break;
2002 }
2003 case FOURCC("mhaC"):
2004 {
2005 // See ISO_IEC_23008-3;2019 MHADecoderConfigurationRecord
2006 constexpr uint32_t mhac_header_size = 4 /* size */ + 4 /* boxtype 'mhaC' */
2007 + 1 /* configurationVersion */ + 1 /* mpegh3daProfileLevelIndication */
2008 + 1 /* referenceChannelLayout */ + 2 /* mpegh3daConfigLength */;
2009 uint8_t mhac_header[mhac_header_size];
2010 off64_t data_offset = *offset;
2011
2012 if (chunk_size < sizeof(mhac_header)) {
2013 return ERROR_MALFORMED;
2014 }
2015
2016 if (mDataSource->readAt(data_offset, mhac_header, sizeof(mhac_header))
2017 < (ssize_t)sizeof(mhac_header)) {
2018 return ERROR_IO;
2019 }
2020
2021 //get mpegh3daProfileLevelIndication
2022 const uint32_t mpegh3daProfileLevelIndication = mhac_header[9];
2023 AMediaFormat_setInt32(mLastTrack->meta,
2024 AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
2025 mpegh3daProfileLevelIndication);
2026
2027 //get referenceChannelLayout
2028 const uint32_t referenceChannelLayout = mhac_header[10];
2029 AMediaFormat_setInt32(mLastTrack->meta,
2030 AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
2031 referenceChannelLayout);
2032
2033 // get mpegh3daConfigLength
2034 const uint32_t mhac_config_size = U16_AT(&mhac_header[11]);
2035 if (chunk_size != sizeof(mhac_header) + mhac_config_size) {
2036 return ERROR_MALFORMED;
2037 }
2038
2039 data_offset += sizeof(mhac_header);
2040 uint8_t mhac_config[mhac_config_size];
2041 if (mDataSource->readAt(data_offset, mhac_config, sizeof(mhac_config))
2042 < (ssize_t)sizeof(mhac_config)) {
2043 return ERROR_IO;
2044 }
2045
2046 AMediaFormat_setBuffer(mLastTrack->meta,
2047 AMEDIAFORMAT_KEY_CSD_0, mhac_config, sizeof(mhac_config));
2048 data_offset += sizeof(mhac_config);
2049 *offset = data_offset;
2050 break;
2051 }
2052 case FOURCC("mhaP"):
2053 {
2054 // FDAmd_2 of ISO_IEC_23008-3;2019 MHAProfileAndLevelCompatibilitySetBox
2055 constexpr uint32_t mhap_header_size = 4 /* size */ + 4 /* boxtype 'mhaP' */
2056 + 1 /* numCompatibleSets */;
2057
2058 uint8_t mhap_header[mhap_header_size];
2059 off64_t data_offset = *offset;
2060
2061 if (chunk_size < (ssize_t)mhap_header_size) {
2062 return ERROR_MALFORMED;
2063 }
2064
2065 if (mDataSource->readAt(data_offset, mhap_header, sizeof(mhap_header))
2066 < (ssize_t)sizeof(mhap_header)) {
2067 return ERROR_IO;
2068 }
2069
2070 // mhap_compatible_sets_size = numCompatibleSets * sizeof(uint8_t)
2071 const uint32_t mhap_compatible_sets_size = mhap_header[8];
2072 if (chunk_size != sizeof(mhap_header) + mhap_compatible_sets_size) {
2073 return ERROR_MALFORMED;
2074 }
2075
2076 data_offset += sizeof(mhap_header);
2077 uint8_t mhap_compatible_sets[mhap_compatible_sets_size];
2078 if (mDataSource->readAt(
2079 data_offset, mhap_compatible_sets, sizeof(mhap_compatible_sets))
2080 < (ssize_t)sizeof(mhap_compatible_sets)) {
2081 return ERROR_IO;
2082 }
2083
2084 AMediaFormat_setBuffer(mLastTrack->meta,
2085 AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
2086 mhap_compatible_sets, sizeof(mhap_compatible_sets));
2087 data_offset += sizeof(mhap_compatible_sets);
2088 *offset = data_offset;
2089 break;
2090 }
2091 case FOURCC("mp4v"):
2092 case FOURCC("encv"):
2093 case FOURCC("s263"):
2094 case FOURCC("H263"):
2095 case FOURCC("h263"):
2096 case FOURCC("avc1"):
2097 case FOURCC("hvc1"):
2098 case FOURCC("hev1"):
2099 case FOURCC("dvav"):
2100 case FOURCC("dva1"):
2101 case FOURCC("dvhe"):
2102 case FOURCC("dvh1"):
2103 case FOURCC("dav1"):
2104 case FOURCC("av01"):
2105 case FOURCC("vp09"):
2106 {
2107 uint8_t buffer[78];
2108 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
2109 // Basic VideoSampleEntry size.
2110 return ERROR_MALFORMED;
2111 }
2112
2113 if (mDataSource->readAt(
2114 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
2115 return ERROR_IO;
2116 }
2117
2118 // we can get data_ref_index value from U16_AT(&buffer[6])
2119 uint16_t width = U16_AT(&buffer[6 + 18]);
2120 uint16_t height = U16_AT(&buffer[6 + 20]);
2121
2122 // The video sample is not standard-compliant if it has invalid dimension.
2123 // Use some default width and height value, and
2124 // let the decoder figure out the actual width and height (and thus
2125 // be prepared for INFO_FOMRAT_CHANGED event).
2126 if (width == 0) width = 352;
2127 if (height == 0) height = 288;
2128
2129 // printf("*** coding='%s' width=%d height=%d\n",
2130 // chunk, width, height);
2131
2132 if (mLastTrack == NULL)
2133 return ERROR_MALFORMED;
2134
2135 if (chunk_type != FOURCC("encv")) {
2136 // if the chunk type is encv, we'll get the type from the frma box later
2137 AMediaFormat_setString(mLastTrack->meta,
2138 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
2139 }
2140 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
2141 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
2142
2143 off64_t stop_offset = *offset + chunk_size;
2144 *offset = data_offset + sizeof(buffer);
2145 while (*offset < stop_offset) {
2146 status_t err = parseChunk(offset, depth + 1);
2147 if (err != OK) {
2148 return err;
2149 }
2150 }
2151
2152 if (*offset != stop_offset) {
2153 return ERROR_MALFORMED;
2154 }
2155 break;
2156 }
2157
2158 case FOURCC("stco"):
2159 case FOURCC("co64"):
2160 {
2161 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2162 return ERROR_MALFORMED;
2163 }
2164
2165 status_t err =
2166 mLastTrack->sampleTable->setChunkOffsetParams(
2167 chunk_type, data_offset, chunk_data_size);
2168
2169 *offset += chunk_size;
2170
2171 if (err != OK) {
2172 return err;
2173 }
2174
2175 break;
2176 }
2177
2178 case FOURCC("stsc"):
2179 {
2180 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2181 return ERROR_MALFORMED;
2182
2183 status_t err =
2184 mLastTrack->sampleTable->setSampleToChunkParams(
2185 data_offset, chunk_data_size);
2186
2187 *offset += chunk_size;
2188
2189 if (err != OK) {
2190 return err;
2191 }
2192
2193 break;
2194 }
2195
2196 case FOURCC("stsz"):
2197 case FOURCC("stz2"):
2198 {
2199 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2200 return ERROR_MALFORMED;
2201 }
2202
2203 status_t err =
2204 mLastTrack->sampleTable->setSampleSizeParams(
2205 chunk_type, data_offset, chunk_data_size);
2206
2207 *offset += chunk_size;
2208
2209 if (err != OK) {
2210 return err;
2211 }
2212
2213 adjustRawDefaultFrameSize();
2214
2215 size_t max_size;
2216 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
2217
2218 if (err != OK) {
2219 return err;
2220 }
2221
2222 if (max_size != 0) {
2223 // Assume that a given buffer only contains at most 10 chunks,
2224 // each chunk originally prefixed with a 2 byte length will
2225 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
2226 // and thus will grow by 2 bytes per chunk.
2227 if (max_size > SIZE_MAX - 10 * 2) {
2228 ALOGE("max sample size too big: %zu", max_size);
2229 return ERROR_MALFORMED;
2230 }
2231 AMediaFormat_setInt32(mLastTrack->meta,
2232 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
2233 } else {
2234 // No size was specified. Pick a conservatively large size.
2235 uint32_t width, height;
2236 if (!AMediaFormat_getInt32(mLastTrack->meta,
2237 AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
2238 !AMediaFormat_getInt32(mLastTrack->meta,
2239 AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
2240 ALOGE("No width or height, assuming worst case 1080p");
2241 width = 1920;
2242 height = 1080;
2243 } else {
2244 // A resolution was specified, check that it's not too big. The values below
2245 // were chosen so that the calculations below don't cause overflows, they're
2246 // not indicating that resolutions up to 32kx32k are actually supported.
2247 if (width > 32768 || height > 32768) {
2248 ALOGE("can't support %u x %u video", width, height);
2249 return ERROR_MALFORMED;
2250 }
2251 }
2252
2253 const char *mime;
2254 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2255 if (!strncmp(mime, "audio/", 6)) {
2256 // for audio, use 128KB
2257 max_size = 1024 * 128;
2258 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
2259 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
2260 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
2261 // AVC & HEVC requires compression ratio of at least 2, and uses
2262 // macroblocks
2263 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
2264 } else {
2265 // For all other formats there is no minimum compression
2266 // ratio. Use compression ratio of 1.
2267 max_size = width * height * 3 / 2;
2268 }
2269 // HACK: allow 10% overhead
2270 // TODO: read sample size from traf atom for fragmented MPEG4.
2271 max_size += max_size / 10;
2272 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2273 }
2274
2275 // NOTE: setting another piece of metadata invalidates any pointers (such as the
2276 // mimetype) previously obtained, so don't cache them.
2277 const char *mime;
2278 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2279 // Calculate average frame rate.
2280 if (!strncasecmp("video/", mime, 6)) {
2281 size_t nSamples = mLastTrack->sampleTable->countSamples();
2282 if (nSamples == 0) {
2283 int32_t trackId;
2284 if (AMediaFormat_getInt32(mLastTrack->meta,
2285 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2286 for (size_t i = 0; i < mTrex.size(); i++) {
2287 Trex *t = &mTrex.editItemAt(i);
2288 if (t->track_ID == (uint32_t) trackId) {
2289 if (t->default_sample_duration > 0) {
2290 int32_t frameRate =
2291 mLastTrack->timescale / t->default_sample_duration;
2292 AMediaFormat_setInt32(mLastTrack->meta,
2293 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2294 }
2295 break;
2296 }
2297 }
2298 }
2299 } else {
2300 int64_t durationUs;
2301 if (AMediaFormat_getInt64(mLastTrack->meta,
2302 AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2303 if (durationUs > 0) {
2304 int32_t frameRate = (nSamples * 1000000LL +
2305 (durationUs >> 1)) / durationUs;
2306 AMediaFormat_setInt32(mLastTrack->meta,
2307 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2308 }
2309 }
2310 ALOGV("setting frame count %zu", nSamples);
2311 AMediaFormat_setInt32(mLastTrack->meta,
2312 AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2313 }
2314 }
2315
2316 break;
2317 }
2318
2319 case FOURCC("stts"):
2320 {
2321 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2322 return ERROR_MALFORMED;
2323
2324 *offset += chunk_size;
2325
2326 if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2327 char chunk[5];
2328 MakeFourCCString(mPath[depth - 1], chunk);
2329 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2330 break;
2331 }
2332
2333 status_t err =
2334 mLastTrack->sampleTable->setTimeToSampleParams(
2335 data_offset, chunk_data_size);
2336
2337 if (err != OK) {
2338 return err;
2339 }
2340
2341 break;
2342 }
2343
2344 case FOURCC("ctts"):
2345 {
2346 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2347 return ERROR_MALFORMED;
2348
2349 *offset += chunk_size;
2350
2351 status_t err =
2352 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2353 data_offset, chunk_data_size);
2354
2355 if (err != OK) {
2356 return err;
2357 }
2358
2359 break;
2360 }
2361
2362 case FOURCC("stss"):
2363 {
2364 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2365 return ERROR_MALFORMED;
2366
2367 *offset += chunk_size;
2368
2369 status_t err =
2370 mLastTrack->sampleTable->setSyncSampleParams(
2371 data_offset, chunk_data_size);
2372
2373 if (err != OK) {
2374 return err;
2375 }
2376
2377 break;
2378 }
2379
2380 // \xA9xyz
2381 case FOURCC("\251xyz"):
2382 {
2383 *offset += chunk_size;
2384
2385 // Best case the total data length inside "\xA9xyz" box would
2386 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2387 // where "\x00\x05" is the text string length with value = 5,
2388 // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2389 // location (string) value with longitude = 0 and latitude = 0.
2390 // Since some devices encountered in the wild omit the trailing
2391 // slash, we'll allow that.
2392 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2393 return ERROR_MALFORMED;
2394 }
2395
2396 uint16_t len;
2397 if (!mDataSource->getUInt16(data_offset, &len)) {
2398 return ERROR_IO;
2399 }
2400
2401 // allow "+0+0" without trailing slash
2402 if (len < 4 || len > chunk_data_size - 4) {
2403 return ERROR_MALFORMED;
2404 }
2405 // The location string following the language code is formatted
2406 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2407 // Allocate 2 extra bytes, in case we need to add a trailing slash,
2408 // and to add a terminating 0.
2409 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2410 if (!buffer) {
2411 return NO_MEMORY;
2412 }
2413
2414 if (mDataSource->readAt(
2415 data_offset + 4, &buffer[0], len) < len) {
2416 return ERROR_IO;
2417 }
2418
2419 len = strlen(&buffer[0]);
2420 if (len < 4) {
2421 return ERROR_MALFORMED;
2422 }
2423 // Add a trailing slash if there wasn't one.
2424 if (buffer[len - 1] != '/') {
2425 buffer[len] = '/';
2426 }
2427 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2428 break;
2429 }
2430
2431 case FOURCC("esds"):
2432 {
2433 *offset += chunk_size;
2434
2435 if (chunk_data_size < 4) {
2436 return ERROR_MALFORMED;
2437 }
2438
2439 auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2440 uint8_t *buffer = tmp.get();
2441 if (buffer == NULL) {
2442 return -ENOMEM;
2443 }
2444
2445 if (mDataSource->readAt(
2446 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2447 return ERROR_IO;
2448 }
2449
2450 if (U32_AT(buffer) != 0) {
2451 // Should be version 0, flags 0.
2452 return ERROR_MALFORMED;
2453 }
2454
2455 if (mLastTrack == NULL)
2456 return ERROR_MALFORMED;
2457
2458 AMediaFormat_setBuffer(mLastTrack->meta,
2459 AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2460
2461 if (mPath.size() >= 2
2462 && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2463 // Information from the ESDS must be relied on for proper
2464 // setup of sample rate and channel count for MPEG4 Audio.
2465 // The generic header appears to only contain generic
2466 // information...
2467
2468 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2469 &buffer[4], chunk_data_size - 4);
2470
2471 if (err != OK) {
2472 return err;
2473 }
2474 }
2475 if (mPath.size() >= 2
2476 && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2477 // Check if the video is MPEG2
2478 ESDS esds(&buffer[4], chunk_data_size - 4);
2479
2480 uint8_t objectTypeIndication;
2481 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2482 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2483 AMediaFormat_setString(mLastTrack->meta,
2484 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2485 }
2486 }
2487 }
2488 break;
2489 }
2490
2491 case FOURCC("btrt"):
2492 {
2493 *offset += chunk_size;
2494 if (mLastTrack == NULL) {
2495 return ERROR_MALFORMED;
2496 }
2497
2498 uint8_t buffer[12];
2499 if (chunk_data_size != sizeof(buffer)) {
2500 return ERROR_MALFORMED;
2501 }
2502
2503 if (mDataSource->readAt(
2504 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2505 return ERROR_IO;
2506 }
2507
2508 uint32_t maxBitrate = U32_AT(&buffer[4]);
2509 uint32_t avgBitrate = U32_AT(&buffer[8]);
2510 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2511 AMediaFormat_setInt32(mLastTrack->meta,
2512 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2513 }
2514 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2515 AMediaFormat_setInt32(mLastTrack->meta,
2516 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2517 }
2518 break;
2519 }
2520
2521 case FOURCC("avcC"):
2522 {
2523 *offset += chunk_size;
2524
2525 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2526
2527 if (buffer.get() == NULL) {
2528 ALOGE("b/28471206");
2529 return NO_MEMORY;
2530 }
2531
2532 if (mDataSource->readAt(
2533 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2534 return ERROR_IO;
2535 }
2536
2537 if (mLastTrack == NULL)
2538 return ERROR_MALFORMED;
2539
2540 AMediaFormat_setBuffer(mLastTrack->meta,
2541 AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2542
2543 break;
2544 }
2545 case FOURCC("hvcC"):
2546 {
2547 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2548
2549 if (buffer.get() == NULL) {
2550 ALOGE("b/28471206");
2551 return NO_MEMORY;
2552 }
2553
2554 if (mDataSource->readAt(
2555 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2556 return ERROR_IO;
2557 }
2558
2559 if (mLastTrack == NULL)
2560 return ERROR_MALFORMED;
2561
2562 AMediaFormat_setBuffer(mLastTrack->meta,
2563 AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2564
2565 *offset += chunk_size;
2566 break;
2567 }
2568
2569 case FOURCC("vpcC"):
2570 case FOURCC("av1C"):
2571 {
2572 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2573
2574 if (buffer.get() == NULL) {
2575 ALOGE("b/28471206");
2576 return NO_MEMORY;
2577 }
2578
2579 if (mDataSource->readAt(
2580 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2581 return ERROR_IO;
2582 }
2583
2584 if (mLastTrack == NULL)
2585 return ERROR_MALFORMED;
2586
2587 AMediaFormat_setBuffer(mLastTrack->meta,
2588 AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2589
2590 *offset += chunk_size;
2591 break;
2592 }
2593 case FOURCC("dvcC"):
2594 case FOURCC("dvvC"): {
2595
2596 if (chunk_data_size != 24) {
2597 return ERROR_MALFORMED;
2598 }
2599
2600 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2601
2602 if (buffer.get() == NULL) {
2603 ALOGE("b/28471206");
2604 return NO_MEMORY;
2605 }
2606
2607 if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2608 return ERROR_IO;
2609 }
2610
2611 if (mLastTrack == NULL)
2612 return ERROR_MALFORMED;
2613
2614 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
2615 buffer.get(), chunk_data_size);
2616 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME,
2617 MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
2618
2619 *offset += chunk_size;
2620 break;
2621 }
2622 case FOURCC("d263"):
2623 {
2624 *offset += chunk_size;
2625 /*
2626 * d263 contains a fixed 7 bytes part:
2627 * vendor - 4 bytes
2628 * version - 1 byte
2629 * level - 1 byte
2630 * profile - 1 byte
2631 * optionally, "d263" box itself may contain a 16-byte
2632 * bit rate box (bitr)
2633 * average bit rate - 4 bytes
2634 * max bit rate - 4 bytes
2635 */
2636 char buffer[23];
2637 if (chunk_data_size != 7 &&
2638 chunk_data_size != 23) {
2639 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2640 return ERROR_MALFORMED;
2641 }
2642
2643 if (mDataSource->readAt(
2644 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2645 return ERROR_IO;
2646 }
2647
2648 if (mLastTrack == NULL)
2649 return ERROR_MALFORMED;
2650
2651 AMediaFormat_setBuffer(mLastTrack->meta,
2652 AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2653
2654 break;
2655 }
2656
2657 case FOURCC("meta"):
2658 {
2659 off64_t stop_offset = *offset + chunk_size;
2660 *offset = data_offset;
2661 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2662 if (!isParsingMetaKeys) {
2663 uint8_t buffer[4];
2664 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2665 *offset = stop_offset;
2666 return ERROR_MALFORMED;
2667 }
2668
2669 if (mDataSource->readAt(
2670 data_offset, buffer, 4) < 4) {
2671 *offset = stop_offset;
2672 return ERROR_IO;
2673 }
2674
2675 if (U32_AT(buffer) != 0) {
2676 // Should be version 0, flags 0.
2677
2678 // If it's not, let's assume this is one of those
2679 // apparently malformed chunks that don't have flags
2680 // and completely different semantics than what's
2681 // in the MPEG4 specs and skip it.
2682 *offset = stop_offset;
2683 return OK;
2684 }
2685 *offset += sizeof(buffer);
2686 }
2687
2688 while (*offset < stop_offset) {
2689 status_t err = parseChunk(offset, depth + 1);
2690 if (err != OK) {
2691 return err;
2692 }
2693 }
2694
2695 if (*offset != stop_offset) {
2696 return ERROR_MALFORMED;
2697 }
2698 break;
2699 }
2700
2701 case FOURCC("iloc"):
2702 case FOURCC("iinf"):
2703 case FOURCC("iprp"):
2704 case FOURCC("pitm"):
2705 case FOURCC("idat"):
2706 case FOURCC("iref"):
2707 case FOURCC("ipro"):
2708 {
2709 if (mIsHeif || mIsAvif) {
2710 if (mItemTable == NULL) {
2711 mItemTable = new ItemTable(mDataSource, mIsHeif);
2712 }
2713 status_t err = mItemTable->parse(
2714 chunk_type, data_offset, chunk_data_size);
2715 if (err != OK) {
2716 return err;
2717 }
2718 }
2719 *offset += chunk_size;
2720 break;
2721 }
2722
2723 case FOURCC("mean"):
2724 case FOURCC("name"):
2725 case FOURCC("data"):
2726 {
2727 *offset += chunk_size;
2728
2729 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2730 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2731
2732 if (err != OK) {
2733 return err;
2734 }
2735 }
2736
2737 break;
2738 }
2739
2740 case FOURCC("mvhd"):
2741 {
2742 *offset += chunk_size;
2743
2744 if (depth != 1) {
2745 ALOGE("mvhd: depth %d", depth);
2746 return ERROR_MALFORMED;
2747 }
2748 if (chunk_data_size < 32) {
2749 return ERROR_MALFORMED;
2750 }
2751
2752 uint8_t header[32];
2753 if (mDataSource->readAt(
2754 data_offset, header, sizeof(header))
2755 < (ssize_t)sizeof(header)) {
2756 return ERROR_IO;
2757 }
2758
2759 uint64_t creationTime;
2760 uint64_t duration = 0;
2761 if (header[0] == 1) {
2762 creationTime = U64_AT(&header[4]);
2763 mHeaderTimescale = U32_AT(&header[20]);
2764 duration = U64_AT(&header[24]);
2765 if (duration == 0xffffffffffffffff) {
2766 duration = 0;
2767 }
2768 } else if (header[0] != 0) {
2769 return ERROR_MALFORMED;
2770 } else {
2771 creationTime = U32_AT(&header[4]);
2772 mHeaderTimescale = U32_AT(&header[12]);
2773 uint32_t d32 = U32_AT(&header[16]);
2774 if (d32 == 0xffffffff) {
2775 d32 = 0;
2776 }
2777 duration = d32;
2778 }
2779 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2780 AMediaFormat_setInt64(mFileMetaData,
2781 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2782 }
2783
2784 String8 s;
2785 if (convertTimeToDate(creationTime, &s)) {
2786 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.string());
2787 }
2788
2789 break;
2790 }
2791
2792 case FOURCC("mehd"):
2793 {
2794 *offset += chunk_size;
2795
2796 if (chunk_data_size < 8) {
2797 return ERROR_MALFORMED;
2798 }
2799
2800 uint8_t flags[4];
2801 if (mDataSource->readAt(
2802 data_offset, flags, sizeof(flags))
2803 < (ssize_t)sizeof(flags)) {
2804 return ERROR_IO;
2805 }
2806
2807 uint64_t duration = 0;
2808 if (flags[0] == 1) {
2809 // 64 bit
2810 if (chunk_data_size < 12) {
2811 return ERROR_MALFORMED;
2812 }
2813 mDataSource->getUInt64(data_offset + 4, &duration);
2814 if (duration == 0xffffffffffffffff) {
2815 duration = 0;
2816 }
2817 } else if (flags[0] == 0) {
2818 // 32 bit
2819 uint32_t d32;
2820 mDataSource->getUInt32(data_offset + 4, &d32);
2821 if (d32 == 0xffffffff) {
2822 d32 = 0;
2823 }
2824 duration = d32;
2825 } else {
2826 return ERROR_MALFORMED;
2827 }
2828
2829 if (duration != 0 && mHeaderTimescale != 0) {
2830 AMediaFormat_setInt64(mFileMetaData,
2831 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2832 }
2833
2834 break;
2835 }
2836
2837 case FOURCC("mdat"):
2838 {
2839 mMdatFound = true;
2840
2841 *offset += chunk_size;
2842 break;
2843 }
2844
2845 case FOURCC("hdlr"):
2846 {
2847 *offset += chunk_size;
2848
2849 if (underQTMetaPath(mPath, 3)) {
2850 break;
2851 }
2852
2853 uint32_t buffer;
2854 if (mDataSource->readAt(
2855 data_offset + 8, &buffer, 4) < 4) {
2856 return ERROR_IO;
2857 }
2858
2859 uint32_t type = ntohl(buffer);
2860 // For the 3GPP file format, the handler-type within the 'hdlr' box
2861 // shall be 'text'. We also want to support 'sbtl' handler type
2862 // for a practical reason as various MPEG4 containers use it.
2863 if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2864 if (mLastTrack != NULL) {
2865 AMediaFormat_setString(mLastTrack->meta,
2866 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2867 }
2868 }
2869
2870 break;
2871 }
2872
2873 case FOURCC("keys"):
2874 {
2875 *offset += chunk_size;
2876
2877 if (underQTMetaPath(mPath, 3)) {
2878 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2879 if (err != OK) {
2880 return err;
2881 }
2882 }
2883 break;
2884 }
2885
2886 case FOURCC("trex"):
2887 {
2888 *offset += chunk_size;
2889
2890 if (chunk_data_size < 24) {
2891 return ERROR_IO;
2892 }
2893 Trex trex;
2894 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2895 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2896 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2897 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2898 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2899 return ERROR_IO;
2900 }
2901 mTrex.add(trex);
2902 break;
2903 }
2904
2905 case FOURCC("tx3g"):
2906 {
2907 if (mLastTrack == NULL)
2908 return ERROR_MALFORMED;
2909
2910 // complain about ridiculous chunks
2911 if (chunk_size > kMaxAtomSize) {
2912 return ERROR_MALFORMED;
2913 }
2914
2915 // complain about empty atoms
2916 if (chunk_data_size <= 0) {
2917 ALOGE("b/124330204");
2918 android_errorWriteLog(0x534e4554, "124330204");
2919 return ERROR_MALFORMED;
2920 }
2921
2922 // should fill buffer based on "data_offset" and "chunk_data_size"
2923 // instead of *offset and chunk_size;
2924 // but we've been feeding the extra data to consumers for multiple releases and
2925 // if those apps are compensating for it, we'd break them with such a change
2926 //
2927
2928 if (mLastTrack->mTx3gBuffer == NULL) {
2929 mLastTrack->mTx3gSize = 0;
2930 mLastTrack->mTx3gFilled = 0;
2931 }
2932 if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2933 size_t growth = kTx3gGrowth;
2934 if (growth < chunk_size) {
2935 growth = chunk_size;
2936 }
2937 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2938 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2939 ALOGE("b/124330204 - too much space");
2940 android_errorWriteLog(0x534e4554, "124330204");
2941 return ERROR_MALFORMED;
2942 }
2943 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2944 mLastTrack->mTx3gSize + growth);
2945 if (updated == NULL) {
2946 return ERROR_MALFORMED;
2947 }
2948 mLastTrack->mTx3gBuffer = updated;
2949 mLastTrack->mTx3gSize += growth;
2950 }
2951
2952 if ((size_t)(mDataSource->readAt(*offset,
2953 mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2954 chunk_size))
2955 < chunk_size) {
2956
2957 // advance read pointer so we don't end up reading this again
2958 *offset += chunk_size;
2959 return ERROR_IO;
2960 }
2961
2962 mLastTrack->mTx3gFilled += chunk_size;
2963 *offset += chunk_size;
2964 break;
2965 }
2966
2967 case FOURCC("covr"):
2968 {
2969 *offset += chunk_size;
2970
2971 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2972 chunk_data_size, data_offset);
2973
2974 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2975 return ERROR_MALFORMED;
2976 }
2977 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2978 if (buffer.get() == NULL) {
2979 ALOGE("b/28471206");
2980 return NO_MEMORY;
2981 }
2982 if (mDataSource->readAt(
2983 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2984 return ERROR_IO;
2985 }
2986 const int kSkipBytesOfDataBox = 16;
2987 if (chunk_data_size <= kSkipBytesOfDataBox) {
2988 return ERROR_MALFORMED;
2989 }
2990
2991 AMediaFormat_setBuffer(mFileMetaData,
2992 AMEDIAFORMAT_KEY_ALBUMART,
2993 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2994
2995 break;
2996 }
2997
2998 case FOURCC("colr"):
2999 {
3000 *offset += chunk_size;
3001 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3002 // ignore otherwise
3003 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3004 status_t err = parseColorInfo(data_offset, chunk_data_size);
3005 if (err != OK) {
3006 return err;
3007 }
3008 }
3009
3010 break;
3011 }
3012
3013 case FOURCC("pasp"):
3014 {
3015 *offset += chunk_size;
3016 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3017 // ignore otherwise
3018 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3019 status_t err = parsePaspBox(data_offset, chunk_data_size);
3020 if (err != OK) {
3021 return err;
3022 }
3023 }
3024
3025 break;
3026 }
3027
3028 case FOURCC("titl"):
3029 case FOURCC("perf"):
3030 case FOURCC("auth"):
3031 case FOURCC("gnre"):
3032 case FOURCC("albm"):
3033 case FOURCC("yrrc"):
3034 {
3035 *offset += chunk_size;
3036
3037 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
3038
3039 if (err != OK) {
3040 return err;
3041 }
3042
3043 break;
3044 }
3045
3046 case FOURCC("ID32"):
3047 {
3048 *offset += chunk_size;
3049
3050 if (chunk_data_size < 6) {
3051 return ERROR_MALFORMED;
3052 }
3053
3054 parseID3v2MetaData(data_offset + 6, chunk_data_size - 6);
3055
3056 break;
3057 }
3058
3059 case FOURCC("----"):
3060 {
3061 mLastCommentMean.clear();
3062 mLastCommentName.clear();
3063 mLastCommentData.clear();
3064 *offset += chunk_size;
3065 break;
3066 }
3067
3068 case FOURCC("sidx"):
3069 {
3070 status_t err = parseSegmentIndex(data_offset, chunk_data_size);
3071 if (err != OK) {
3072 return err;
3073 }
3074 *offset += chunk_size;
3075 return UNKNOWN_ERROR; // stop parsing after sidx
3076 }
3077
3078 case FOURCC("ac-3"):
3079 {
3080 *offset += chunk_size;
3081 // bypass ac-3 if parse fail
3082 if (parseAC3SpecificBox(data_offset) != OK) {
3083 if (mLastTrack != NULL) {
3084 ALOGW("Fail to parse ac-3");
3085 mLastTrack->skipTrack = true;
3086 }
3087 }
3088 return OK;
3089 }
3090
3091 case FOURCC("ec-3"):
3092 {
3093 *offset += chunk_size;
3094 // bypass ec-3 if parse fail
3095 if (parseEAC3SpecificBox(data_offset) != OK) {
3096 if (mLastTrack != NULL) {
3097 ALOGW("Fail to parse ec-3");
3098 mLastTrack->skipTrack = true;
3099 }
3100 }
3101 return OK;
3102 }
3103
3104 case FOURCC("ac-4"):
3105 {
3106 *offset += chunk_size;
3107 // bypass ac-4 if parse fail
3108 if (parseAC4SpecificBox(data_offset) != OK) {
3109 if (mLastTrack != NULL) {
3110 ALOGW("Fail to parse ac-4");
3111 mLastTrack->skipTrack = true;
3112 }
3113 }
3114 return OK;
3115 }
3116
3117 case FOURCC("ftyp"):
3118 {
3119 if (chunk_data_size < 8 || depth != 0) {
3120 return ERROR_MALFORMED;
3121 }
3122
3123 off64_t stop_offset = *offset + chunk_size;
3124 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
3125 std::set<uint32_t> brandSet;
3126 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3127 if (i == 1) {
3128 // Skip this index, it refers to the minorVersion,
3129 // not a brand.
3130 continue;
3131 }
3132
3133 uint32_t brand;
3134 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
3135 return ERROR_MALFORMED;
3136 }
3137
3138 brand = ntohl(brand);
3139 brandSet.insert(brand);
3140 }
3141
3142 if (brandSet.count(FOURCC("qt ")) > 0) {
3143 mIsQT = true;
3144 } else {
3145 if (brandSet.count(FOURCC("mif1")) > 0
3146 && brandSet.count(FOURCC("heic")) > 0) {
3147 ALOGV("identified HEIF image");
3148
3149 mIsHeif = true;
3150 brandSet.erase(FOURCC("mif1"));
3151 brandSet.erase(FOURCC("heic"));
3152 } else if (brandSet.count(FOURCC("avif")) > 0 ||
3153 brandSet.count(FOURCC("avis")) > 0) {
3154 ALOGV("identified AVIF image");
3155 mIsAvif = true;
3156 brandSet.erase(FOURCC("avif"));
3157 brandSet.erase(FOURCC("avis"));
3158 }
3159
3160 if (!brandSet.empty()) {
3161 // This means that the file should have moov box.
3162 // It could be any iso files (mp4, heifs, etc.)
3163 mHasMoovBox = true;
3164 if (mIsHeif || mIsAvif) {
3165 ALOGV("identified %s image with other tracks", mIsHeif ? "HEIF" : "AVIF");
3166 }
3167 }
3168 }
3169
3170 *offset = stop_offset;
3171
3172 break;
3173 }
3174
3175 default:
3176 {
3177 // check if we're parsing 'ilst' for meta keys
3178 // if so, treat type as a number (key-id).
3179 if (underQTMetaPath(mPath, 3)) {
3180 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
3181 if (err != OK) {
3182 return err;
3183 }
3184 }
3185
3186 *offset += chunk_size;
3187 break;
3188 }
3189 }
3190
3191 return OK;
3192 }
3193
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)3194 status_t MPEG4Extractor::parseChannelCountSampleRate(
3195 off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
3196 // skip 16 bytes:
3197 // + 6-byte reserved,
3198 // + 2-byte data reference index,
3199 // + 8-byte reserved
3200 *offset += 16;
3201 if (!mDataSource->getUInt16(*offset, channelCount)) {
3202 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
3203 return ERROR_MALFORMED;
3204 }
3205 // skip 8 bytes:
3206 // + 2-byte channelCount,
3207 // + 2-byte sample size,
3208 // + 4-byte reserved
3209 *offset += 8;
3210 if (!mDataSource->getUInt16(*offset, sampleRate)) {
3211 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
3212 return ERROR_MALFORMED;
3213 }
3214 // skip 4 bytes:
3215 // + 2-byte sampleRate,
3216 // + 2-byte reserved
3217 *offset += 4;
3218 return OK;
3219 }
3220
parseAC4SpecificBox(off64_t offset)3221 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
3222 if (mLastTrack == NULL) {
3223 return ERROR_MALFORMED;
3224 }
3225
3226 uint16_t sampleRate, channelCount;
3227 status_t status;
3228 if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
3229 return status;
3230 }
3231 uint32_t size;
3232 // + 4-byte size
3233 // + 4-byte type
3234 // + 3-byte payload
3235 const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
3236 if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
3237 ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
3238 return ERROR_MALFORMED;
3239 }
3240
3241 // + 4-byte size
3242 offset += 4;
3243 uint32_t type;
3244 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
3245 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
3246 return ERROR_MALFORMED;
3247 }
3248
3249 // + 4-byte type
3250 offset += 4;
3251 const uint32_t kAC4SpecificBoxPayloadSize = 1176;
3252 uint8_t chunk[kAC4SpecificBoxPayloadSize];
3253 ssize_t dsiSize = size - 8; // size of box - size and type fields
3254 if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
3255 mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
3256 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
3257 return ERROR_MALFORMED;
3258 }
3259 // + size-byte payload
3260 offset += dsiSize;
3261 ABitReader br(chunk, dsiSize);
3262 AC4DSIParser parser(br);
3263 if (!parser.parse()){
3264 ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
3265 return ERROR_MALFORMED;
3266 }
3267
3268 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
3269 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3270 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3271
3272 AudioPresentationCollection presentations;
3273 // translate the AC4 presentation information to audio presentations for this track
3274 AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
3275 if (!ac4Presentations.empty()) {
3276 for (const auto& ac4Presentation : ac4Presentations) {
3277 auto& presentation = ac4Presentation.second;
3278 if (!presentation.mEnabled) {
3279 continue;
3280 }
3281 AudioPresentationV1 ap;
3282 ap.mPresentationId = presentation.mGroupIndex;
3283 ap.mProgramId = presentation.mProgramID;
3284 ap.mLanguage = presentation.mLanguage;
3285 if (presentation.mPreVirtualized) {
3286 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
3287 } else {
3288 switch (presentation.mChannelMode) {
3289 case AC4Parser::AC4Presentation::kChannelMode_Mono:
3290 case AC4Parser::AC4Presentation::kChannelMode_Stereo:
3291 ap.mMasteringIndication = MASTERED_FOR_STEREO;
3292 break;
3293 case AC4Parser::AC4Presentation::kChannelMode_3_0:
3294 case AC4Parser::AC4Presentation::kChannelMode_5_0:
3295 case AC4Parser::AC4Presentation::kChannelMode_5_1:
3296 case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
3297 case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
3298 case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
3299 case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
3300 ap.mMasteringIndication = MASTERED_FOR_SURROUND;
3301 break;
3302 case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
3303 case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
3304 case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
3305 case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
3306 case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
3307 case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
3308 case AC4Parser::AC4Presentation::kChannelMode_22_2:
3309 ap.mMasteringIndication = MASTERED_FOR_3D;
3310 break;
3311 default:
3312 ALOGE("Invalid channel mode in AC4 presentation");
3313 return ERROR_MALFORMED;
3314 }
3315 }
3316
3317 ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
3318 AC4Parser::AC4Presentation::kVisuallyImpaired);
3319 ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
3320 AC4Parser::AC4Presentation::kVoiceOver);
3321 ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
3322 if (!ap.mLanguage.empty()) {
3323 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
3324 }
3325 presentations.push_back(std::move(ap));
3326 }
3327 }
3328
3329 if (presentations.empty()) {
3330 // Clear audio presentation info in metadata.
3331 AMediaFormat_setBuffer(
3332 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3333 } else {
3334 std::ostringstream outStream(std::ios::out);
3335 serializeAudioPresentations(presentations, &outStream);
3336 AMediaFormat_setBuffer(
3337 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3338 outStream.str().data(), outStream.str().size());
3339 }
3340 return OK;
3341 }
3342
parseEAC3SpecificBox(off64_t offset)3343 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3344 if (mLastTrack == NULL) {
3345 return ERROR_MALFORMED;
3346 }
3347
3348 uint16_t sampleRate, channels;
3349 status_t status;
3350 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3351 return status;
3352 }
3353 uint32_t size;
3354 // + 4-byte size
3355 // + 4-byte type
3356 // + 3-byte payload
3357 const uint32_t kEAC3SpecificBoxMinSize = 11;
3358 // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3359 // calculated from the required bits read below as well as the maximum number of independent
3360 // and dependant sub streams you can have
3361 const uint32_t kEAC3SpecificBoxMaxSize = 152;
3362 if (!mDataSource->getUInt32(offset, &size) ||
3363 size < kEAC3SpecificBoxMinSize ||
3364 size > kEAC3SpecificBoxMaxSize) {
3365 ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3366 return ERROR_MALFORMED;
3367 }
3368
3369 offset += 4;
3370 uint32_t type;
3371 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3372 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3373 return ERROR_MALFORMED;
3374 }
3375
3376 offset += 4;
3377 uint8_t* chunk = new (std::nothrow) uint8_t[size];
3378 if (chunk == NULL) {
3379 return ERROR_MALFORMED;
3380 }
3381
3382 if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3383 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3384 delete[] chunk;
3385 return ERROR_MALFORMED;
3386 }
3387
3388 ABitReader br(chunk, size);
3389 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3390 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3391
3392 if (br.numBitsLeft() < 16) {
3393 delete[] chunk;
3394 return ERROR_MALFORMED;
3395 }
3396 unsigned data_rate = br.getBits(13);
3397 ALOGV("EAC3 data rate = %d", data_rate);
3398
3399 unsigned num_ind_sub = br.getBits(3) + 1;
3400 ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3401 if (br.numBitsLeft() < (num_ind_sub * 23)) {
3402 delete[] chunk;
3403 return ERROR_MALFORMED;
3404 }
3405
3406 unsigned channelCount = 0;
3407 for (unsigned i = 0; i < num_ind_sub; i++) {
3408 unsigned fscod = br.getBits(2);
3409 if (fscod == 3) {
3410 ALOGE("Incorrect fscod (3) in EAC3 header");
3411 delete[] chunk;
3412 return ERROR_MALFORMED;
3413 }
3414 unsigned boxSampleRate = sampleRateTable[fscod];
3415 if (boxSampleRate != sampleRate) {
3416 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3417 boxSampleRate, sampleRate);
3418 delete[] chunk;
3419 return ERROR_MALFORMED;
3420 }
3421
3422 unsigned bsid = br.getBits(5);
3423 if (bsid == 9 || bsid == 10) {
3424 ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3425 } else if (bsid > 16) {
3426 ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3427 delete[] chunk;
3428 return ERROR_MALFORMED;
3429 }
3430
3431 // skip
3432 br.skipBits(2);
3433 unsigned bsmod = br.getBits(3);
3434 unsigned acmod = br.getBits(3);
3435 unsigned lfeon = br.getBits(1);
3436 // we currently only support the first stream
3437 if (i == 0)
3438 channelCount = channelCountTable[acmod] + lfeon;
3439 ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3440
3441 br.skipBits(3);
3442 unsigned num_dep_sub = br.getBits(4);
3443 ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3444 if (num_dep_sub != 0) {
3445 if (br.numBitsLeft() < 9) {
3446 delete[] chunk;
3447 return ERROR_MALFORMED;
3448 }
3449 static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3450 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3451 unsigned chan_loc = br.getBits(9);
3452 unsigned mask = 1;
3453 for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3454 if ((chan_loc & mask) != 0) {
3455 // we currently only support the first stream
3456 if (i == 0) {
3457 channelCount++;
3458 // these are 2 channels in the mask
3459 if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3460 channelCount++;
3461 }
3462 }
3463 ALOGV(" %s", chan_loc_tbl[j]);
3464 }
3465 }
3466 } else {
3467 if (br.numBitsLeft() == 0) {
3468 delete[] chunk;
3469 return ERROR_MALFORMED;
3470 }
3471 br.skipBits(1);
3472 }
3473 }
3474
3475 if (br.numBitsLeft() != 0) {
3476 if (br.numBitsLeft() < 8) {
3477 delete[] chunk;
3478 return ERROR_MALFORMED;
3479 }
3480 unsigned mask = br.getBits(8);
3481 for (unsigned i = 0; i < 8; i++) {
3482 if (((0x1 << i) && mask) == 0)
3483 continue;
3484
3485 if (br.numBitsLeft() < 8) {
3486 delete[] chunk;
3487 return ERROR_MALFORMED;
3488 }
3489 switch (i) {
3490 case 0: {
3491 unsigned complexity = br.getBits(8);
3492 ALOGV("Found a JOC stream with complexity = %d", complexity);
3493 }break;
3494 default: {
3495 br.skipBits(8);
3496 }break;
3497 }
3498 }
3499 }
3500 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3501 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3502 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3503
3504 delete[] chunk;
3505 return OK;
3506 }
3507
parseAC3SpecificBox(off64_t offset)3508 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3509 if (mLastTrack == NULL) {
3510 return ERROR_MALFORMED;
3511 }
3512
3513 uint16_t sampleRate, channels;
3514 status_t status;
3515 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3516 return status;
3517 }
3518 uint32_t size;
3519 // + 4-byte size
3520 // + 4-byte type
3521 // + 3-byte payload
3522 const uint32_t kAC3SpecificBoxSize = 11;
3523 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3524 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3525 return ERROR_MALFORMED;
3526 }
3527
3528 offset += 4;
3529 uint32_t type;
3530 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3531 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3532 return ERROR_MALFORMED;
3533 }
3534
3535 offset += 4;
3536 const uint32_t kAC3SpecificBoxPayloadSize = 3;
3537 uint8_t chunk[kAC3SpecificBoxPayloadSize];
3538 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3539 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3540 return ERROR_MALFORMED;
3541 }
3542
3543 ABitReader br(chunk, sizeof(chunk));
3544 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3545 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3546
3547 unsigned fscod = br.getBits(2);
3548 if (fscod == 3) {
3549 ALOGE("Incorrect fscod (3) in AC3 header");
3550 return ERROR_MALFORMED;
3551 }
3552 unsigned boxSampleRate = sampleRateTable[fscod];
3553 if (boxSampleRate != sampleRate) {
3554 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3555 boxSampleRate, sampleRate);
3556 return ERROR_MALFORMED;
3557 }
3558
3559 unsigned bsid = br.getBits(5);
3560 if (bsid > 8) {
3561 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3562 return ERROR_MALFORMED;
3563 }
3564
3565 // skip
3566 br.skipBits(3); // bsmod
3567
3568 unsigned acmod = br.getBits(3);
3569 unsigned lfeon = br.getBits(1);
3570 unsigned channelCount = channelCountTable[acmod] + lfeon;
3571
3572 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3573 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3574 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3575 return OK;
3576 }
3577
parseALACSampleEntry(off64_t * offset)3578 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3579 // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3580 // Store ALAC magic cookie (decoder needs it).
3581 uint8_t alacInfo[12];
3582 off64_t data_offset = *offset;
3583
3584 if (mDataSource->readAt(
3585 data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3586 return ERROR_IO;
3587 }
3588 uint32_t size = U32_AT(&alacInfo[0]);
3589 if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3590 (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3591 (U32_AT(&alacInfo[8]) != 0)) {
3592 ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3593 size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3594 return ERROR_MALFORMED;
3595 }
3596 data_offset += sizeof(alacInfo);
3597 uint8_t cookie[size - sizeof(alacInfo)];
3598 if (mDataSource->readAt(
3599 data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3600 return ERROR_IO;
3601 }
3602
3603 uint8_t bitsPerSample = cookie[5];
3604 AMediaFormat_setInt32(mLastTrack->meta,
3605 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3606 AMediaFormat_setInt32(mLastTrack->meta,
3607 AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3608 AMediaFormat_setInt32(mLastTrack->meta,
3609 AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3610 AMediaFormat_setBuffer(mLastTrack->meta,
3611 AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3612 data_offset += sizeof(cookie);
3613 *offset = data_offset;
3614 return OK;
3615 }
3616
parseSegmentIndex(off64_t offset,size_t size)3617 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3618 ALOGV("MPEG4Extractor::parseSegmentIndex");
3619
3620 if (size < 12) {
3621 return -EINVAL;
3622 }
3623
3624 uint32_t flags;
3625 if (!mDataSource->getUInt32(offset, &flags)) {
3626 return ERROR_MALFORMED;
3627 }
3628
3629 uint32_t version = flags >> 24;
3630 flags &= 0xffffff;
3631
3632 ALOGV("sidx version %d", version);
3633
3634 uint32_t referenceId;
3635 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3636 return ERROR_MALFORMED;
3637 }
3638
3639 uint32_t timeScale;
3640 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3641 return ERROR_MALFORMED;
3642 }
3643 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3644 if (timeScale == 0)
3645 return ERROR_MALFORMED;
3646
3647 uint64_t earliestPresentationTime;
3648 uint64_t firstOffset;
3649
3650 offset += 12;
3651 size -= 12;
3652
3653 if (version == 0) {
3654 if (size < 8) {
3655 return -EINVAL;
3656 }
3657 uint32_t tmp;
3658 if (!mDataSource->getUInt32(offset, &tmp)) {
3659 return ERROR_MALFORMED;
3660 }
3661 earliestPresentationTime = tmp;
3662 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3663 return ERROR_MALFORMED;
3664 }
3665 firstOffset = tmp;
3666 offset += 8;
3667 size -= 8;
3668 } else {
3669 if (size < 16) {
3670 return -EINVAL;
3671 }
3672 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3673 return ERROR_MALFORMED;
3674 }
3675 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3676 return ERROR_MALFORMED;
3677 }
3678 offset += 16;
3679 size -= 16;
3680 }
3681 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3682
3683 if (size < 4) {
3684 return -EINVAL;
3685 }
3686
3687 uint16_t referenceCount;
3688 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3689 return ERROR_MALFORMED;
3690 }
3691 offset += 4;
3692 size -= 4;
3693 ALOGV("refcount: %d", referenceCount);
3694
3695 if (size < referenceCount * 12) {
3696 return -EINVAL;
3697 }
3698
3699 uint64_t total_duration = 0;
3700 for (unsigned int i = 0; i < referenceCount; i++) {
3701 uint32_t d1, d2, d3;
3702
3703 if (!mDataSource->getUInt32(offset, &d1) || // size
3704 !mDataSource->getUInt32(offset + 4, &d2) || // duration
3705 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3706 return ERROR_MALFORMED;
3707 }
3708
3709 if (d1 & 0x80000000) {
3710 ALOGW("sub-sidx boxes not supported yet");
3711 }
3712 bool sap = d3 & 0x80000000;
3713 uint32_t saptype = (d3 >> 28) & 7;
3714 if (!sap || (saptype != 1 && saptype != 2)) {
3715 // type 1 and 2 are sync samples
3716 ALOGW("not a stream access point, or unsupported type: %08x", d3);
3717 }
3718 total_duration += d2;
3719 offset += 12;
3720 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3721 SidxEntry se;
3722 se.mSize = d1 & 0x7fffffff;
3723 se.mDurationUs = 1000000LL * d2 / timeScale;
3724 mSidxEntries.add(se);
3725 }
3726
3727 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3728
3729 if (mLastTrack == NULL)
3730 return ERROR_MALFORMED;
3731
3732 int64_t metaDuration;
3733 if (!AMediaFormat_getInt64(mLastTrack->meta,
3734 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3735 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3736 }
3737 return OK;
3738 }
3739
parseQTMetaKey(off64_t offset,size_t size)3740 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3741 if (size < 8) {
3742 return ERROR_MALFORMED;
3743 }
3744
3745 uint32_t count;
3746 if (!mDataSource->getUInt32(offset + 4, &count)) {
3747 return ERROR_MALFORMED;
3748 }
3749
3750 if (mMetaKeyMap.size() > 0) {
3751 ALOGW("'keys' atom seen again, discarding existing entries");
3752 mMetaKeyMap.clear();
3753 }
3754
3755 off64_t keyOffset = offset + 8;
3756 off64_t stopOffset = offset + size;
3757 for (size_t i = 1; i <= count; i++) {
3758 if (keyOffset + 8 > stopOffset) {
3759 return ERROR_MALFORMED;
3760 }
3761
3762 uint32_t keySize;
3763 if (!mDataSource->getUInt32(keyOffset, &keySize)
3764 || keySize < 8
3765 || keyOffset + keySize > stopOffset) {
3766 return ERROR_MALFORMED;
3767 }
3768
3769 uint32_t type;
3770 if (!mDataSource->getUInt32(keyOffset + 4, &type)
3771 || type != FOURCC("mdta")) {
3772 return ERROR_MALFORMED;
3773 }
3774
3775 keySize -= 8;
3776 keyOffset += 8;
3777
3778 auto keyData = heapbuffer<uint8_t>(keySize);
3779 if (keyData.get() == NULL) {
3780 return ERROR_MALFORMED;
3781 }
3782 if (mDataSource->readAt(
3783 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3784 return ERROR_MALFORMED;
3785 }
3786
3787 AString key((const char *)keyData.get(), keySize);
3788 mMetaKeyMap.add(i, key);
3789
3790 keyOffset += keySize;
3791 }
3792 return OK;
3793 }
3794
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3795 status_t MPEG4Extractor::parseQTMetaVal(
3796 int32_t keyId, off64_t offset, size_t size) {
3797 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3798 if (index < 0) {
3799 // corresponding key is not present, ignore
3800 return ERROR_MALFORMED;
3801 }
3802
3803 if (size <= 16) {
3804 return ERROR_MALFORMED;
3805 }
3806 uint32_t dataSize;
3807 if (!mDataSource->getUInt32(offset, &dataSize)
3808 || dataSize > size || dataSize <= 16) {
3809 return ERROR_MALFORMED;
3810 }
3811 uint32_t atomFourCC;
3812 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3813 || atomFourCC != FOURCC("data")) {
3814 return ERROR_MALFORMED;
3815 }
3816 uint32_t dataType;
3817 if (!mDataSource->getUInt32(offset + 8, &dataType)
3818 || ((dataType & 0xff000000) != 0)) {
3819 // not well-known type
3820 return ERROR_MALFORMED;
3821 }
3822
3823 dataSize -= 16;
3824 offset += 16;
3825
3826 if (dataType == 23 && dataSize >= 4) {
3827 // BE Float32
3828 uint32_t val;
3829 if (!mDataSource->getUInt32(offset, &val)) {
3830 return ERROR_MALFORMED;
3831 }
3832 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3833 AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3834 }
3835 } else if (dataType == 67 && dataSize >= 4) {
3836 // BE signed int32
3837 uint32_t val;
3838 if (!mDataSource->getUInt32(offset, &val)) {
3839 return ERROR_MALFORMED;
3840 }
3841 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3842 AMediaFormat_setInt32(mFileMetaData,
3843 AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3844 }
3845 } else {
3846 // add more keys if needed
3847 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3848 }
3849
3850 return OK;
3851 }
3852
parseTrackHeader(off64_t data_offset,off64_t data_size)3853 status_t MPEG4Extractor::parseTrackHeader(
3854 off64_t data_offset, off64_t data_size) {
3855 if (data_size < 4) {
3856 return ERROR_MALFORMED;
3857 }
3858
3859 uint8_t version;
3860 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3861 return ERROR_IO;
3862 }
3863
3864 size_t dynSize = (version == 1) ? 36 : 24;
3865
3866 uint8_t buffer[36 + 60];
3867
3868 if (data_size != (off64_t)dynSize + 60) {
3869 return ERROR_MALFORMED;
3870 }
3871
3872 if (mDataSource->readAt(
3873 data_offset, buffer, data_size) < (ssize_t)data_size) {
3874 return ERROR_IO;
3875 }
3876
3877 int32_t id;
3878
3879 if (version == 1) {
3880 // we can get ctime value from U64_AT(&buffer[4])
3881 // we can get mtime value from U64_AT(&buffer[12])
3882 id = U32_AT(&buffer[20]);
3883 // we can get duration value from U64_AT(&buffer[28])
3884 } else if (version == 0) {
3885 // we can get ctime value from U32_AT(&buffer[4])
3886 // we can get mtime value from U32_AT(&buffer[8])
3887 id = U32_AT(&buffer[12]);
3888 // we can get duration value from U32_AT(&buffer[20])
3889 } else {
3890 return ERROR_UNSUPPORTED;
3891 }
3892
3893 if (mLastTrack == NULL)
3894 return ERROR_MALFORMED;
3895
3896 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3897
3898 size_t matrixOffset = dynSize + 16;
3899 int32_t a00 = U32_AT(&buffer[matrixOffset]);
3900 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3901 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3902 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3903
3904 #if 0
3905 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3906 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3907
3908 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3909 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3910 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3911 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3912 #endif
3913
3914 uint32_t rotationDegrees;
3915
3916 static const int32_t kFixedOne = 0x10000;
3917 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3918 // Identity, no rotation
3919 rotationDegrees = 0;
3920 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3921 rotationDegrees = 90;
3922 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3923 rotationDegrees = 270;
3924 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3925 rotationDegrees = 180;
3926 } else {
3927 ALOGW("We only support 0,90,180,270 degree rotation matrices");
3928 rotationDegrees = 0;
3929 }
3930
3931 if (rotationDegrees != 0) {
3932 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3933 }
3934
3935 // Handle presentation display size, which could be different
3936 // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3937 uint32_t width = U32_AT(&buffer[dynSize + 52]);
3938 uint32_t height = U32_AT(&buffer[dynSize + 56]);
3939 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3940 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3941
3942 return OK;
3943 }
3944
parseITunesMetaData(off64_t offset,size_t size)3945 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3946 if (size == 0) {
3947 return OK;
3948 }
3949
3950 if (size < 4 || size == SIZE_MAX) {
3951 return ERROR_MALFORMED;
3952 }
3953
3954 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3955 if (buffer == NULL) {
3956 return ERROR_MALFORMED;
3957 }
3958 if (mDataSource->readAt(
3959 offset, buffer, size) != (ssize_t)size) {
3960 delete[] buffer;
3961 buffer = NULL;
3962
3963 return ERROR_IO;
3964 }
3965
3966 uint32_t flags = U32_AT(buffer);
3967
3968 const char *metadataKey = nullptr;
3969 char chunk[5];
3970 MakeFourCCString(mPath[4], chunk);
3971 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
3972 switch ((int32_t)mPath[4]) {
3973 case FOURCC("\251alb"):
3974 {
3975 metadataKey = AMEDIAFORMAT_KEY_ALBUM;
3976 break;
3977 }
3978 case FOURCC("\251ART"):
3979 {
3980 metadataKey = AMEDIAFORMAT_KEY_ARTIST;
3981 break;
3982 }
3983 case FOURCC("aART"):
3984 {
3985 metadataKey = AMEDIAFORMAT_KEY_ALBUMARTIST;
3986 break;
3987 }
3988 case FOURCC("\251day"):
3989 {
3990 metadataKey = AMEDIAFORMAT_KEY_YEAR;
3991 break;
3992 }
3993 case FOURCC("\251nam"):
3994 {
3995 metadataKey = AMEDIAFORMAT_KEY_TITLE;
3996 break;
3997 }
3998 case FOURCC("\251wrt"):
3999 {
4000 // various open source taggers agree that the "©wrt" tag is for composer, not writer
4001 metadataKey = AMEDIAFORMAT_KEY_COMPOSER;
4002 break;
4003 }
4004 case FOURCC("covr"):
4005 {
4006 metadataKey = AMEDIAFORMAT_KEY_ALBUMART;
4007 break;
4008 }
4009 case FOURCC("gnre"):
4010 case FOURCC("\251gen"):
4011 {
4012 metadataKey = AMEDIAFORMAT_KEY_GENRE;
4013 break;
4014 }
4015 case FOURCC("cpil"):
4016 {
4017 if (size == 9 && flags == 21) {
4018 char tmp[16];
4019 sprintf(tmp, "%d",
4020 (int)buffer[size - 1]);
4021
4022 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
4023 }
4024 break;
4025 }
4026 case FOURCC("trkn"):
4027 {
4028 if (size == 16 && flags == 0) {
4029 char tmp[16];
4030 uint16_t* pTrack = (uint16_t*)&buffer[10];
4031 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
4032 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
4033
4034 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4035 }
4036 break;
4037 }
4038 case FOURCC("disk"):
4039 {
4040 if ((size == 14 || size == 16) && flags == 0) {
4041 char tmp[16];
4042 uint16_t* pDisc = (uint16_t*)&buffer[10];
4043 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
4044 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
4045
4046 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
4047 }
4048 break;
4049 }
4050 case FOURCC("----"):
4051 {
4052 buffer[size] = '\0';
4053 switch (mPath[5]) {
4054 case FOURCC("mean"):
4055 mLastCommentMean.setTo((const char *)buffer + 4);
4056 break;
4057 case FOURCC("name"):
4058 mLastCommentName.setTo((const char *)buffer + 4);
4059 break;
4060 case FOURCC("data"):
4061 if (size < 8) {
4062 delete[] buffer;
4063 buffer = NULL;
4064 ALOGE("b/24346430");
4065 return ERROR_MALFORMED;
4066 }
4067 mLastCommentData.setTo((const char *)buffer + 8);
4068 break;
4069 }
4070
4071 // Once we have a set of mean/name/data info, go ahead and process
4072 // it to see if its something we are interested in. Whether or not
4073 // were are interested in the specific tag, make sure to clear out
4074 // the set so we can be ready to process another tuple should one
4075 // show up later in the file.
4076 if ((mLastCommentMean.length() != 0) &&
4077 (mLastCommentName.length() != 0) &&
4078 (mLastCommentData.length() != 0)) {
4079
4080 if (mLastCommentMean == "com.apple.iTunes"
4081 && mLastCommentName == "iTunSMPB") {
4082 int32_t delay, padding;
4083 if (sscanf(mLastCommentData,
4084 " %*x %x %x %*x", &delay, &padding) == 2) {
4085 if (mLastTrack == NULL) {
4086 delete[] buffer;
4087 return ERROR_MALFORMED;
4088 }
4089
4090 AMediaFormat_setInt32(mLastTrack->meta,
4091 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
4092 AMediaFormat_setInt32(mLastTrack->meta,
4093 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
4094 }
4095 }
4096
4097 mLastCommentMean.clear();
4098 mLastCommentName.clear();
4099 mLastCommentData.clear();
4100 }
4101 break;
4102 }
4103
4104 default:
4105 break;
4106 }
4107
4108 void *tmpData;
4109 size_t tmpDataSize;
4110 const char *s;
4111 if (size >= 8 && metadataKey &&
4112 !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
4113 !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
4114 if (!strcmp(metadataKey, "albumart")) {
4115 AMediaFormat_setBuffer(mFileMetaData, metadataKey,
4116 buffer + 8, size - 8);
4117 } else if (!strcmp(metadataKey, AMEDIAFORMAT_KEY_GENRE)) {
4118 if (flags == 0) {
4119 // uint8_t genre code, iTunes genre codes are
4120 // the standard id3 codes, except they start
4121 // at 1 instead of 0 (e.g. Pop is 14, not 13)
4122 // We use standard id3 numbering, so subtract 1.
4123 int genrecode = (int)buffer[size - 1];
4124 genrecode--;
4125 if (genrecode < 0) {
4126 genrecode = 255; // reserved for 'unknown genre'
4127 }
4128 char genre[10];
4129 sprintf(genre, "%d", genrecode);
4130
4131 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
4132 } else if (flags == 1) {
4133 // custom genre string
4134 buffer[size] = '\0';
4135
4136 AMediaFormat_setString(mFileMetaData,
4137 metadataKey, (const char *)buffer + 8);
4138 }
4139 } else {
4140 buffer[size] = '\0';
4141
4142 AMediaFormat_setString(mFileMetaData,
4143 metadataKey, (const char *)buffer + 8);
4144 }
4145 }
4146
4147 delete[] buffer;
4148 buffer = NULL;
4149
4150 return OK;
4151 }
4152
parseColorInfo(off64_t offset,size_t size)4153 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
4154 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
4155 return ERROR_MALFORMED;
4156 }
4157
4158 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4159 if (buffer == NULL) {
4160 return ERROR_MALFORMED;
4161 }
4162 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4163 delete[] buffer;
4164 buffer = NULL;
4165
4166 return ERROR_IO;
4167 }
4168
4169 int32_t type = U32_AT(&buffer[0]);
4170 if ((type == FOURCC("nclx") && size >= 11)
4171 || (type == FOURCC("nclc") && size >= 10)) {
4172 // only store the first color specification
4173 int32_t existingColor;
4174 if (!AMediaFormat_getInt32(mLastTrack->meta,
4175 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
4176 int32_t primaries = U16_AT(&buffer[4]);
4177 int32_t isotransfer = U16_AT(&buffer[6]);
4178 int32_t coeffs = U16_AT(&buffer[8]);
4179 bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
4180
4181 int32_t range = 0;
4182 int32_t standard = 0;
4183 int32_t transfer = 0;
4184 ColorUtils::convertIsoColorAspectsToPlatformAspects(
4185 primaries, isotransfer, coeffs, fullRange,
4186 &range, &standard, &transfer);
4187
4188 if (range != 0) {
4189 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
4190 }
4191 if (standard != 0) {
4192 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
4193 }
4194 if (transfer != 0) {
4195 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
4196 }
4197 }
4198 }
4199
4200 delete[] buffer;
4201 buffer = NULL;
4202
4203 return OK;
4204 }
4205
parsePaspBox(off64_t offset,size_t size)4206 status_t MPEG4Extractor::parsePaspBox(off64_t offset, size_t size) {
4207 if (size < 8 || size == SIZE_MAX || mLastTrack == NULL) {
4208 return ERROR_MALFORMED;
4209 }
4210
4211 uint32_t data[2]; // hSpacing, vSpacing
4212 if (mDataSource->readAt(offset, data, 8) < 8) {
4213 return ERROR_IO;
4214 }
4215 uint32_t hSpacing = ntohl(data[0]);
4216 uint32_t vSpacing = ntohl(data[1]);
4217
4218 if (hSpacing != 0 && vSpacing != 0) {
4219 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_WIDTH, hSpacing);
4220 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_HEIGHT, vSpacing);
4221 }
4222
4223 return OK;
4224 }
4225
parse3GPPMetaData(off64_t offset,size_t size,int depth)4226 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
4227 if (size < 4 || size == SIZE_MAX) {
4228 return ERROR_MALFORMED;
4229 }
4230
4231 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4232 if (buffer == NULL) {
4233 return ERROR_MALFORMED;
4234 }
4235 if (mDataSource->readAt(
4236 offset, buffer, size) != (ssize_t)size) {
4237 delete[] buffer;
4238 buffer = NULL;
4239
4240 return ERROR_IO;
4241 }
4242
4243 const char *metadataKey = nullptr;
4244 switch (mPath[depth]) {
4245 case FOURCC("titl"):
4246 {
4247 metadataKey = "title";
4248 break;
4249 }
4250 case FOURCC("perf"):
4251 {
4252 metadataKey = "artist";
4253 break;
4254 }
4255 case FOURCC("auth"):
4256 {
4257 metadataKey = "writer";
4258 break;
4259 }
4260 case FOURCC("gnre"):
4261 {
4262 metadataKey = "genre";
4263 break;
4264 }
4265 case FOURCC("albm"):
4266 {
4267 if (buffer[size - 1] != '\0') {
4268 char tmp[4];
4269 sprintf(tmp, "%u", buffer[size - 1]);
4270
4271 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4272 }
4273
4274 metadataKey = "album";
4275 break;
4276 }
4277 case FOURCC("yrrc"):
4278 {
4279 if (size < 6) {
4280 delete[] buffer;
4281 buffer = NULL;
4282 ALOGE("b/62133227");
4283 android_errorWriteLog(0x534e4554, "62133227");
4284 return ERROR_MALFORMED;
4285 }
4286 char tmp[5];
4287 uint16_t year = U16_AT(&buffer[4]);
4288
4289 if (year < 10000) {
4290 sprintf(tmp, "%u", year);
4291
4292 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
4293 }
4294 break;
4295 }
4296
4297 default:
4298 break;
4299 }
4300
4301 if (metadataKey) {
4302 bool isUTF8 = true; // Common case
4303 char16_t *framedata = NULL;
4304 int len16 = 0; // Number of UTF-16 characters
4305
4306 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
4307 if (size < 6) {
4308 delete[] buffer;
4309 buffer = NULL;
4310 return ERROR_MALFORMED;
4311 }
4312
4313 if (size - 6 >= 4) {
4314 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
4315 framedata = (char16_t *)(buffer + 6);
4316 if (0xfffe == *framedata) {
4317 // endianness marker (BOM) doesn't match host endianness
4318 for (int i = 0; i < len16; i++) {
4319 framedata[i] = bswap_16(framedata[i]);
4320 }
4321 // BOM is now swapped to 0xfeff, we will execute next block too
4322 }
4323
4324 if (0xfeff == *framedata) {
4325 // Remove the BOM
4326 framedata++;
4327 len16--;
4328 isUTF8 = false;
4329 }
4330 // else normal non-zero-length UTF-8 string
4331 // we can't handle UTF-16 without BOM as there is no other
4332 // indication of encoding.
4333 }
4334
4335 if (isUTF8) {
4336 buffer[size] = 0;
4337 AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
4338 } else {
4339 // Convert from UTF-16 string to UTF-8 string.
4340 String8 tmpUTF8str(framedata, len16);
4341 AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.string());
4342 }
4343 }
4344
4345 delete[] buffer;
4346 buffer = NULL;
4347
4348 return OK;
4349 }
4350
parseID3v2MetaData(off64_t offset,uint64_t size)4351 void MPEG4Extractor::parseID3v2MetaData(off64_t offset, uint64_t size) {
4352 uint8_t *buffer = new (std::nothrow) uint8_t[size];
4353 if (buffer == NULL) {
4354 return;
4355 }
4356 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4357 delete[] buffer;
4358 buffer = NULL;
4359 return;
4360 }
4361
4362 ID3 id3(buffer, size, true /* ignorev1 */);
4363 delete[] buffer;
4364
4365 if (id3.isValid()) {
4366 struct Map {
4367 const char *key;
4368 const char *tag1;
4369 const char *tag2;
4370 };
4371 static const Map kMap[] = {
4372 { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4373 { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4374 { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4375 { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4376 { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4377 { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4378 { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4379 { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4380 { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4381 { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4382 { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4383 };
4384 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4385
4386 for (size_t i = 0; i < kNumMapEntries; ++i) {
4387 const char *ss;
4388 if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4389 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4390 if (it->done()) {
4391 delete it;
4392 it = new ID3::Iterator(id3, kMap[i].tag2);
4393 }
4394
4395 if (it->done()) {
4396 delete it;
4397 continue;
4398 }
4399
4400 String8 s;
4401 it->getString(&s);
4402 delete it;
4403
4404 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4405 }
4406 }
4407
4408 size_t dataSize;
4409 String8 mime;
4410 const void *data = id3.getAlbumArt(&dataSize, &mime);
4411
4412 if (data) {
4413 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4414 }
4415 }
4416 }
4417
getTrack(size_t index)4418 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4419 status_t err;
4420 if ((err = readMetaData()) != OK) {
4421 return NULL;
4422 }
4423
4424 Track *track = mFirstTrack;
4425 while (index > 0) {
4426 if (track == NULL) {
4427 return NULL;
4428 }
4429
4430 track = track->next;
4431 --index;
4432 }
4433
4434 if (track == NULL) {
4435 return NULL;
4436 }
4437
4438
4439 Trex *trex = NULL;
4440 int32_t trackId;
4441 if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4442 for (size_t i = 0; i < mTrex.size(); i++) {
4443 Trex *t = &mTrex.editItemAt(i);
4444 if (t->track_ID == (uint32_t) trackId) {
4445 trex = t;
4446 break;
4447 }
4448 }
4449 } else {
4450 ALOGE("b/21657957");
4451 return NULL;
4452 }
4453
4454 ALOGV("getTrack called, pssh: %zu", mPssh.size());
4455
4456 const char *mime;
4457 if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4458 return NULL;
4459 }
4460
4461 sp<ItemTable> itemTable;
4462 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4463 void *data;
4464 size_t size;
4465 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4466 return NULL;
4467 }
4468
4469 const uint8_t *ptr = (const uint8_t *)data;
4470
4471 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
4472 return NULL;
4473 }
4474 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4475 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4476 void *data;
4477 size_t size;
4478 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4479 return NULL;
4480 }
4481
4482 const uint8_t *ptr = (const uint8_t *)data;
4483
4484 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
4485 return NULL;
4486 }
4487 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4488 itemTable = mItemTable;
4489 }
4490 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4491 void *data;
4492 size_t size;
4493 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4494 return NULL;
4495 }
4496
4497 const uint8_t *ptr = (const uint8_t *)data;
4498
4499 // dv_major.dv_minor Should be 1.0 or 2.1
4500 if (size != 24 || ((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1))) {
4501 return NULL;
4502 }
4503 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)
4504 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4505 void *data;
4506 size_t size;
4507 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4508 return NULL;
4509 }
4510
4511 const uint8_t *ptr = (const uint8_t *)data;
4512
4513 if (size < 4 || ptr[0] != 0x81) { // configurationVersion == 1
4514 return NULL;
4515 }
4516 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4517 itemTable = mItemTable;
4518 }
4519 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4520 void *data;
4521 size_t size;
4522 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4523 return NULL;
4524 }
4525
4526 const uint8_t *ptr = (const uint8_t *)data;
4527
4528 if (size < 5 || ptr[0] != 0x01) { // configurationVersion == 1
4529 return NULL;
4530 }
4531 }
4532
4533 ALOGV("track->elst_shift_start_ticks :%" PRIu64, track->elst_shift_start_ticks);
4534
4535 uint64_t elst_initial_empty_edit_ticks = 0;
4536 if (mHeaderTimescale != 0) {
4537 // Convert empty_edit_ticks from movie timescale to media timescale.
4538 uint64_t elst_initial_empty_edit_ticks_mul = 0, elst_initial_empty_edit_ticks_add = 0;
4539 if (__builtin_mul_overflow(track->elst_initial_empty_edit_ticks, track->timescale,
4540 &elst_initial_empty_edit_ticks_mul) ||
4541 __builtin_add_overflow(elst_initial_empty_edit_ticks_mul, (mHeaderTimescale / 2),
4542 &elst_initial_empty_edit_ticks_add)) {
4543 ALOGE("track->elst_initial_empty_edit_ticks overflow");
4544 return nullptr;
4545 }
4546 elst_initial_empty_edit_ticks = elst_initial_empty_edit_ticks_add / mHeaderTimescale;
4547 }
4548 ALOGV("elst_initial_empty_edit_ticks in MediaTimeScale :%" PRIu64,
4549 elst_initial_empty_edit_ticks);
4550
4551 MPEG4Source* source =
4552 new MPEG4Source(track->meta, mDataSource, track->timescale, track->sampleTable,
4553 mSidxEntries, trex, mMoofOffset, itemTable,
4554 track->elst_shift_start_ticks, elst_initial_empty_edit_ticks);
4555 if (source->init() != OK) {
4556 delete source;
4557 return NULL;
4558 }
4559 return source;
4560 }
4561
4562 // static
verifyTrack(Track * track)4563 status_t MPEG4Extractor::verifyTrack(Track *track) {
4564 const char *mime;
4565 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4566
4567 void *data;
4568 size_t size;
4569 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4570 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4571 return ERROR_MALFORMED;
4572 }
4573 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4574 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4575 return ERROR_MALFORMED;
4576 }
4577 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4578 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4579 return ERROR_MALFORMED;
4580 }
4581 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4582 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4583 return ERROR_MALFORMED;
4584 }
4585 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4586 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4587 return ERROR_MALFORMED;
4588 }
4589 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4590 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4591 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4592 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4593 return ERROR_MALFORMED;
4594 }
4595 }
4596
4597 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4598 // Make sure we have all the metadata we need.
4599 ALOGE("stbl atom missing/invalid.");
4600 return ERROR_MALFORMED;
4601 }
4602
4603 if (track->timescale == 0) {
4604 ALOGE("timescale invalid.");
4605 return ERROR_MALFORMED;
4606 }
4607
4608 return OK;
4609 }
4610
4611 typedef enum {
4612 //AOT_NONE = -1,
4613 //AOT_NULL_OBJECT = 0,
4614 //AOT_AAC_MAIN = 1, /**< Main profile */
4615 AOT_AAC_LC = 2, /**< Low Complexity object */
4616 //AOT_AAC_SSR = 3,
4617 //AOT_AAC_LTP = 4,
4618 AOT_SBR = 5,
4619 //AOT_AAC_SCAL = 6,
4620 //AOT_TWIN_VQ = 7,
4621 //AOT_CELP = 8,
4622 //AOT_HVXC = 9,
4623 //AOT_RSVD_10 = 10, /**< (reserved) */
4624 //AOT_RSVD_11 = 11, /**< (reserved) */
4625 //AOT_TTSI = 12, /**< TTSI Object */
4626 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
4627 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
4628 //AOT_GEN_MIDI = 15, /**< General MIDI object */
4629 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4630 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
4631 //AOT_RSVD_18 = 18, /**< (reserved) */
4632 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
4633 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
4634 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
4635 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
4636 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
4637 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
4638 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
4639 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
4640 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
4641 //AOT_RSVD_28 = 28, /**< might become SSC */
4642 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
4643 //AOT_MPEGS = 30, /**< MPEG Surround */
4644
4645 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
4646
4647 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
4648 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
4649 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
4650 //AOT_RSVD_35 = 35, /**< might become DST */
4651 //AOT_RSVD_36 = 36, /**< might become ALS */
4652 //AOT_AAC_SLS = 37, /**< AAC + SLS */
4653 //AOT_SLS = 38, /**< SLS */
4654 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
4655
4656 AOT_USAC = 42, /**< USAC */
4657 //AOT_SAOC = 43, /**< SAOC */
4658 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
4659
4660 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
4661 } AUDIO_OBJECT_TYPE;
4662
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4663 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4664 const void *esds_data, size_t esds_size) {
4665 ESDS esds(esds_data, esds_size);
4666
4667 uint8_t objectTypeIndication;
4668 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4669 return ERROR_MALFORMED;
4670 }
4671
4672 if (objectTypeIndication == 0xe1) {
4673 // This isn't MPEG4 audio at all, it's QCELP 14k...
4674 if (mLastTrack == NULL)
4675 return ERROR_MALFORMED;
4676
4677 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4678 return OK;
4679 }
4680
4681 if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4682 // mp3 audio
4683 if (mLastTrack == NULL)
4684 return ERROR_MALFORMED;
4685
4686 AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4687 return OK;
4688 }
4689
4690 if (mLastTrack != NULL) {
4691 uint32_t maxBitrate = 0;
4692 uint32_t avgBitrate = 0;
4693 esds.getBitRate(&maxBitrate, &avgBitrate);
4694 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4695 AMediaFormat_setInt32(mLastTrack->meta,
4696 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4697 }
4698 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4699 AMediaFormat_setInt32(mLastTrack->meta,
4700 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4701 }
4702 }
4703
4704 const uint8_t *csd;
4705 size_t csd_size;
4706 if (esds.getCodecSpecificInfo(
4707 (const void **)&csd, &csd_size) != OK) {
4708 return ERROR_MALFORMED;
4709 }
4710
4711 if (kUseHexDump) {
4712 printf("ESD of size %zu\n", csd_size);
4713 hexdump(csd, csd_size);
4714 }
4715
4716 if (csd_size == 0) {
4717 // There's no further information, i.e. no codec specific data
4718 // Let's assume that the information provided in the mpeg4 headers
4719 // is accurate and hope for the best.
4720
4721 return OK;
4722 }
4723
4724 if (csd_size < 2) {
4725 return ERROR_MALFORMED;
4726 }
4727
4728 if (objectTypeIndication == 0xdd) {
4729 // vorbis audio
4730 if (csd[0] != 0x02) {
4731 return ERROR_MALFORMED;
4732 }
4733
4734 // codecInfo starts with two lengths, len1 and len2, that are
4735 // "Xiph-style-lacing encoded"..
4736
4737 size_t offset = 1;
4738 size_t len1 = 0;
4739 while (offset < csd_size && csd[offset] == 0xff) {
4740 if (__builtin_add_overflow(len1, 0xff, &len1)) {
4741 return ERROR_MALFORMED;
4742 }
4743 ++offset;
4744 }
4745 if (offset >= csd_size) {
4746 return ERROR_MALFORMED;
4747 }
4748 if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4749 return ERROR_MALFORMED;
4750 }
4751 ++offset;
4752 if (len1 == 0) {
4753 return ERROR_MALFORMED;
4754 }
4755
4756 size_t len2 = 0;
4757 while (offset < csd_size && csd[offset] == 0xff) {
4758 if (__builtin_add_overflow(len2, 0xff, &len2)) {
4759 return ERROR_MALFORMED;
4760 }
4761 ++offset;
4762 }
4763 if (offset >= csd_size) {
4764 return ERROR_MALFORMED;
4765 }
4766 if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4767 return ERROR_MALFORMED;
4768 }
4769 ++offset;
4770 if (len2 == 0) {
4771 return ERROR_MALFORMED;
4772 }
4773 if (offset >= csd_size || csd[offset] != 0x01) {
4774 return ERROR_MALFORMED;
4775 }
4776
4777 if (mLastTrack == NULL) {
4778 return ERROR_MALFORMED;
4779 }
4780 // formerly kKeyVorbisInfo
4781 AMediaFormat_setBuffer(mLastTrack->meta,
4782 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4783
4784 if (__builtin_add_overflow(offset, len1, &offset) ||
4785 offset >= csd_size || csd[offset] != 0x03) {
4786 return ERROR_MALFORMED;
4787 }
4788
4789 if (__builtin_add_overflow(offset, len2, &offset) ||
4790 offset >= csd_size || csd[offset] != 0x05) {
4791 return ERROR_MALFORMED;
4792 }
4793
4794 // formerly kKeyVorbisBooks
4795 AMediaFormat_setBuffer(mLastTrack->meta,
4796 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4797 AMediaFormat_setString(mLastTrack->meta,
4798 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4799
4800 return OK;
4801 }
4802
4803 static uint32_t kSamplingRate[] = {
4804 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4805 16000, 12000, 11025, 8000, 7350
4806 };
4807
4808 ABitReader br(csd, csd_size);
4809 uint32_t objectType = br.getBits(5);
4810
4811 if (objectType == AOT_ESCAPE) { // AAC-ELD => additional 6 bits
4812 objectType = 32 + br.getBits(6);
4813 }
4814
4815 if (mLastTrack == NULL)
4816 return ERROR_MALFORMED;
4817
4818 //keep AOT type
4819 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4820
4821 uint32_t freqIndex = br.getBits(4);
4822
4823 int32_t sampleRate = 0;
4824 int32_t numChannels = 0;
4825 if (freqIndex == 15) {
4826 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4827 sampleRate = br.getBits(24);
4828 numChannels = br.getBits(4);
4829 } else {
4830 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4831 numChannels = br.getBits(4);
4832
4833 if (freqIndex == 13 || freqIndex == 14) {
4834 return ERROR_MALFORMED;
4835 }
4836
4837 sampleRate = kSamplingRate[freqIndex];
4838 }
4839
4840 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4841 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4842 uint32_t extFreqIndex = br.getBits(4);
4843 if (extFreqIndex == 15) {
4844 if (csd_size < 8) {
4845 return ERROR_MALFORMED;
4846 }
4847 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4848 br.skipBits(24); // extSampleRate
4849 } else {
4850 if (extFreqIndex == 13 || extFreqIndex == 14) {
4851 return ERROR_MALFORMED;
4852 }
4853 //extSampleRate = kSamplingRate[extFreqIndex];
4854 }
4855 //TODO: save the extension sampling rate value in meta data =>
4856 // AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4857 }
4858
4859 switch (numChannels) {
4860 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4861 case 0:
4862 case 1:// FC
4863 case 2:// FL FR
4864 case 3:// FC, FL FR
4865 case 4:// FC, FL FR, RC
4866 case 5:// FC, FL FR, SL SR
4867 case 6:// FC, FL FR, SL SR, LFE
4868 //numChannels already contains the right value
4869 break;
4870 case 11:// FC, FL FR, SL SR, RC, LFE
4871 numChannels = 7;
4872 break;
4873 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4874 case 12:// FC, FL FR, SL SR, RL RR, LFE
4875 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
4876 numChannels = 8;
4877 break;
4878 default:
4879 return ERROR_UNSUPPORTED;
4880 }
4881
4882 {
4883 if (objectType == AOT_SBR || objectType == AOT_PS) {
4884 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4885 objectType = br.getBits(5);
4886
4887 if (objectType == AOT_ESCAPE) {
4888 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4889 objectType = 32 + br.getBits(6);
4890 }
4891 }
4892 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4893 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4894 objectType == AOT_ER_BSAC) {
4895 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4896 br.skipBits(1); // frameLengthFlag
4897
4898 const int32_t dependsOnCoreCoder = br.getBits(1);
4899
4900 if (dependsOnCoreCoder ) {
4901 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4902 br.skipBits(14); // coreCoderDelay
4903 }
4904
4905 int32_t extensionFlag = -1;
4906 if (br.numBitsLeft() > 0) {
4907 extensionFlag = br.getBits(1);
4908 } else {
4909 switch (objectType) {
4910 // 14496-3 4.5.1.1 extensionFlag
4911 case AOT_AAC_LC:
4912 extensionFlag = 0;
4913 break;
4914 case AOT_ER_AAC_LC:
4915 case AOT_ER_AAC_SCAL:
4916 case AOT_ER_BSAC:
4917 case AOT_ER_AAC_LD:
4918 extensionFlag = 1;
4919 break;
4920 default:
4921 return ERROR_MALFORMED;
4922 break;
4923 }
4924 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4925 extensionFlag, objectType);
4926 }
4927
4928 if (numChannels == 0) {
4929 int32_t channelsEffectiveNum = 0;
4930 int32_t channelsNum = 0;
4931 if (br.numBitsLeft() < 32) {
4932 return ERROR_MALFORMED;
4933 }
4934 br.skipBits(4); // ElementInstanceTag
4935 br.skipBits(2); // Profile
4936 br.skipBits(4); // SamplingFrequencyIndex
4937 const int32_t NumFrontChannelElements = br.getBits(4);
4938 const int32_t NumSideChannelElements = br.getBits(4);
4939 const int32_t NumBackChannelElements = br.getBits(4);
4940 const int32_t NumLfeChannelElements = br.getBits(2);
4941 br.skipBits(3); // NumAssocDataElements
4942 br.skipBits(4); // NumValidCcElements
4943
4944 const int32_t MonoMixdownPresent = br.getBits(1);
4945
4946 if (MonoMixdownPresent != 0) {
4947 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4948 br.skipBits(4); // MonoMixdownElementNumber
4949 }
4950
4951 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4952 const int32_t StereoMixdownPresent = br.getBits(1);
4953 if (StereoMixdownPresent != 0) {
4954 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4955 br.skipBits(4); // StereoMixdownElementNumber
4956 }
4957
4958 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4959 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4960 if (MatrixMixdownIndexPresent != 0) {
4961 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4962 br.skipBits(2); // MatrixMixdownIndex
4963 br.skipBits(1); // PseudoSurroundEnable
4964 }
4965
4966 int i;
4967 for (i=0; i < NumFrontChannelElements; i++) {
4968 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4969 const int32_t FrontElementIsCpe = br.getBits(1);
4970 br.skipBits(4); // FrontElementTagSelect
4971 channelsNum += FrontElementIsCpe ? 2 : 1;
4972 }
4973
4974 for (i=0; i < NumSideChannelElements; i++) {
4975 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4976 const int32_t SideElementIsCpe = br.getBits(1);
4977 br.skipBits(4); // SideElementTagSelect
4978 channelsNum += SideElementIsCpe ? 2 : 1;
4979 }
4980
4981 for (i=0; i < NumBackChannelElements; i++) {
4982 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4983 const int32_t BackElementIsCpe = br.getBits(1);
4984 br.skipBits(4); // BackElementTagSelect
4985 channelsNum += BackElementIsCpe ? 2 : 1;
4986 }
4987 channelsEffectiveNum = channelsNum;
4988
4989 for (i=0; i < NumLfeChannelElements; i++) {
4990 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4991 br.skipBits(4); // LfeElementTagSelect
4992 channelsNum += 1;
4993 }
4994 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
4995 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
4996 numChannels = channelsNum;
4997 }
4998 }
4999 }
5000
5001 if (numChannels == 0) {
5002 return ERROR_UNSUPPORTED;
5003 }
5004
5005 if (mLastTrack == NULL)
5006 return ERROR_MALFORMED;
5007
5008 int32_t prevSampleRate;
5009 CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
5010
5011 if (prevSampleRate != sampleRate) {
5012 ALOGV("mpeg4 audio sample rate different from previous setting. "
5013 "was: %d, now: %d", prevSampleRate, sampleRate);
5014 }
5015
5016 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
5017
5018 int32_t prevChannelCount;
5019 CHECK(AMediaFormat_getInt32(mLastTrack->meta,
5020 AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
5021
5022 if (prevChannelCount != numChannels) {
5023 ALOGV("mpeg4 audio channel count different from previous setting. "
5024 "was: %d, now: %d", prevChannelCount, numChannels);
5025 }
5026
5027 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
5028
5029 return OK;
5030 }
5031
adjustRawDefaultFrameSize()5032 void MPEG4Extractor::adjustRawDefaultFrameSize() {
5033 int32_t chanCount = 0;
5034 int32_t bitWidth = 0;
5035 const char *mimeStr = NULL;
5036
5037 if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
5038 !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
5039 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
5040 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
5041 // samplesize in stsz may not right , so updade default samplesize
5042 mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
5043 }
5044 }
5045
5046 ////////////////////////////////////////////////////////////////////////////////
5047
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks,uint64_t elstInitialEmptyEditTicks)5048 MPEG4Source::MPEG4Source(
5049 AMediaFormat *format,
5050 DataSourceHelper *dataSource,
5051 int32_t timeScale,
5052 const sp<SampleTable> &sampleTable,
5053 Vector<SidxEntry> &sidx,
5054 const Trex *trex,
5055 off64_t firstMoofOffset,
5056 const sp<ItemTable> &itemTable,
5057 uint64_t elstShiftStartTicks,
5058 uint64_t elstInitialEmptyEditTicks)
5059 : mFormat(format),
5060 mDataSource(dataSource),
5061 mTimescale(timeScale),
5062 mSampleTable(sampleTable),
5063 mCurrentSampleIndex(0),
5064 mCurrentFragmentIndex(0),
5065 mSegments(sidx),
5066 mTrex(trex),
5067 mFirstMoofOffset(firstMoofOffset),
5068 mCurrentMoofOffset(firstMoofOffset),
5069 mCurrentMoofSize(0),
5070 mNextMoofOffset(-1),
5071 mCurrentTime(0),
5072 mDefaultEncryptedByteBlock(0),
5073 mDefaultSkipByteBlock(0),
5074 mCurrentSampleInfoAllocSize(0),
5075 mCurrentSampleInfoSizes(NULL),
5076 mCurrentSampleInfoOffsetsAllocSize(0),
5077 mCurrentSampleInfoOffsets(NULL),
5078 mIsAVC(false),
5079 mIsHEVC(false),
5080 mIsDolbyVision(false),
5081 mIsAC4(false),
5082 mIsPcm(false),
5083 mNALLengthSize(0),
5084 mStarted(false),
5085 mBuffer(NULL),
5086 mSrcBuffer(NULL),
5087 mItemTable(itemTable),
5088 mElstShiftStartTicks(elstShiftStartTicks),
5089 mElstInitialEmptyEditTicks(elstInitialEmptyEditTicks) {
5090
5091 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
5092
5093 AMediaFormat_getInt32(mFormat,
5094 AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
5095 mDefaultIVSize = 0;
5096 AMediaFormat_getInt32(mFormat,
5097 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
5098 void *key;
5099 size_t keysize;
5100 if (AMediaFormat_getBuffer(mFormat,
5101 AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
5102 CHECK(keysize <= 16);
5103 memset(mCryptoKey, 0, 16);
5104 memcpy(mCryptoKey, key, keysize);
5105 }
5106
5107 AMediaFormat_getInt32(mFormat,
5108 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
5109 AMediaFormat_getInt32(mFormat,
5110 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
5111
5112 const char *mime;
5113 bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
5114 CHECK(success);
5115
5116 mIsMpegH = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1) ||
5117 !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1);
5118 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
5119 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
5120 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
5121 mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
5122 mIsDolbyVision = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
5123 mIsHeif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) && mItemTable != NULL;
5124 mIsAvif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF) && mItemTable != NULL;
5125
5126 if (mIsAVC) {
5127 void *data;
5128 size_t size;
5129 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5130
5131 const uint8_t *ptr = (const uint8_t *)data;
5132
5133 CHECK(size >= 7);
5134 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5135
5136 // The number of bytes used to encode the length of a NAL unit.
5137 mNALLengthSize = 1 + (ptr[4] & 3);
5138 } else if (mIsHEVC) {
5139 void *data;
5140 size_t size;
5141 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5142
5143 const uint8_t *ptr = (const uint8_t *)data;
5144
5145 CHECK(size >= 22);
5146 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5147
5148 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
5149 } else if (mIsDolbyVision) {
5150 ALOGV("%s DolbyVision stream detected", __FUNCTION__);
5151 void *data;
5152 size_t size;
5153 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_2, &data, &size));
5154
5155 const uint8_t *ptr = (const uint8_t *)data;
5156
5157 CHECK(size == 24);
5158
5159 // dv_major.dv_minor Should be 1.0 or 2.1
5160 CHECK(!((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)));
5161
5162 const uint8_t profile = ptr[2] >> 1;
5163 // profile == (unknown,1,9) --> AVC; profile = (2,3,4,5,6,7,8) --> HEVC;
5164 // profile == (10) --> AV1
5165 if (profile > 1 && profile < 9) {
5166 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5167
5168 const uint8_t *ptr = (const uint8_t *)data;
5169
5170 CHECK(size >= 22);
5171 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5172
5173 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
5174 } else if (10 == profile) {
5175 /* AV1 profile nothing to do */
5176 } else {
5177 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5178 const uint8_t *ptr = (const uint8_t *)data;
5179
5180 CHECK(size >= 7);
5181 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5182 // The number of bytes used to encode the length of a NAL unit.
5183 mNALLengthSize = 1 + (ptr[4] & 3);
5184 }
5185 }
5186
5187 mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
5188 mIsAudio = !strncasecmp(mime, "audio/", 6);
5189
5190 int32_t aacObjectType = -1;
5191
5192 if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_AAC_PROFILE, &aacObjectType)) {
5193 mIsUsac = (aacObjectType == AOT_USAC);
5194 }
5195
5196 if (mIsPcm) {
5197 int32_t numChannels = 0;
5198 int32_t bitsPerSample = 0;
5199 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
5200 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
5201
5202 int32_t bytesPerSample = bitsPerSample >> 3;
5203 int32_t pcmSampleSize = bytesPerSample * numChannels;
5204
5205 size_t maxSampleSize;
5206 status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
5207 if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
5208 || bitsPerSample != 16) {
5209 // Not supported
5210 mIsPcm = false;
5211 } else {
5212 AMediaFormat_setInt32(mFormat,
5213 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
5214 }
5215 }
5216
5217 CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
5218 }
5219
init()5220 status_t MPEG4Source::init() {
5221 if (mFirstMoofOffset != 0) {
5222 off64_t offset = mFirstMoofOffset;
5223 return parseChunk(&offset);
5224 }
5225 return OK;
5226 }
5227
~MPEG4Source()5228 MPEG4Source::~MPEG4Source() {
5229 if (mStarted) {
5230 stop();
5231 }
5232 free(mCurrentSampleInfoSizes);
5233 free(mCurrentSampleInfoOffsets);
5234 }
5235
start()5236 media_status_t MPEG4Source::start() {
5237 Mutex::Autolock autoLock(mLock);
5238
5239 CHECK(!mStarted);
5240
5241 int32_t tmp;
5242 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
5243 size_t max_size = tmp;
5244
5245 // A somewhat arbitrary limit that should be sufficient for 8k video frames
5246 // If you see the message below for a valid input stream: increase the limit
5247 const size_t kMaxBufferSize = 64 * 1024 * 1024;
5248 if (max_size > kMaxBufferSize) {
5249 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
5250 return AMEDIA_ERROR_MALFORMED;
5251 }
5252 if (max_size == 0) {
5253 ALOGE("zero max input size");
5254 return AMEDIA_ERROR_MALFORMED;
5255 }
5256
5257 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
5258 const size_t kInitialBuffers = 2;
5259 const size_t kMaxBuffers = 8;
5260 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
5261 mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
5262 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
5263 if (mSrcBuffer == NULL) {
5264 // file probably specified a bad max size
5265 return AMEDIA_ERROR_MALFORMED;
5266 }
5267
5268 mStarted = true;
5269
5270 return AMEDIA_OK;
5271 }
5272
stop()5273 media_status_t MPEG4Source::stop() {
5274 Mutex::Autolock autoLock(mLock);
5275
5276 CHECK(mStarted);
5277
5278 if (mBuffer != NULL) {
5279 mBuffer->release();
5280 mBuffer = NULL;
5281 }
5282
5283 delete[] mSrcBuffer;
5284 mSrcBuffer = NULL;
5285
5286 mStarted = false;
5287 mCurrentSampleIndex = 0;
5288
5289 return AMEDIA_OK;
5290 }
5291
parseChunk(off64_t * offset)5292 status_t MPEG4Source::parseChunk(off64_t *offset) {
5293 uint32_t hdr[2];
5294 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5295 return ERROR_IO;
5296 }
5297 uint64_t chunk_size = ntohl(hdr[0]);
5298 uint32_t chunk_type = ntohl(hdr[1]);
5299 off64_t data_offset = *offset + 8;
5300
5301 if (chunk_size == 1) {
5302 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5303 return ERROR_IO;
5304 }
5305 chunk_size = ntoh64(chunk_size);
5306 data_offset += 8;
5307
5308 if (chunk_size < 16) {
5309 // The smallest valid chunk is 16 bytes long in this case.
5310 return ERROR_MALFORMED;
5311 }
5312 } else if (chunk_size < 8) {
5313 // The smallest valid chunk is 8 bytes long.
5314 return ERROR_MALFORMED;
5315 }
5316
5317 char chunk[5];
5318 MakeFourCCString(chunk_type, chunk);
5319 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
5320
5321 off64_t chunk_data_size = *offset + chunk_size - data_offset;
5322
5323 switch(chunk_type) {
5324
5325 case FOURCC("traf"):
5326 case FOURCC("moof"): {
5327 off64_t stop_offset = *offset + chunk_size;
5328 *offset = data_offset;
5329 if (chunk_type == FOURCC("moof")) {
5330 mCurrentMoofSize = chunk_data_size;
5331 }
5332 while (*offset < stop_offset) {
5333 status_t err = parseChunk(offset);
5334 if (err != OK) {
5335 return err;
5336 }
5337 }
5338 if (chunk_type == FOURCC("moof")) {
5339 // *offset points to the box following this moof. Find the next moof from there.
5340
5341 while (true) {
5342 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5343 // no more box to the end of file.
5344 break;
5345 }
5346 chunk_size = ntohl(hdr[0]);
5347 chunk_type = ntohl(hdr[1]);
5348 if (chunk_size == 1) {
5349 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
5350 // which is defined in 4.2 Object Structure.
5351 // When chunk_size==1, 8 bytes follows as "largesize".
5352 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5353 return ERROR_IO;
5354 }
5355 chunk_size = ntoh64(chunk_size);
5356 if (chunk_size < 16) {
5357 // The smallest valid chunk is 16 bytes long in this case.
5358 return ERROR_MALFORMED;
5359 }
5360 } else if (chunk_size == 0) {
5361 // next box extends to end of file.
5362 } else if (chunk_size < 8) {
5363 // The smallest valid chunk is 8 bytes long in this case.
5364 return ERROR_MALFORMED;
5365 }
5366
5367 if (chunk_type == FOURCC("moof")) {
5368 mNextMoofOffset = *offset;
5369 break;
5370 } else if (chunk_type == FOURCC("mdat")) {
5371 parseChunk(offset);
5372 continue;
5373 } else if (chunk_size == 0) {
5374 break;
5375 }
5376 *offset += chunk_size;
5377 }
5378 }
5379 break;
5380 }
5381
5382 case FOURCC("tfhd"): {
5383 status_t err;
5384 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
5385 return err;
5386 }
5387 *offset += chunk_size;
5388 break;
5389 }
5390
5391 case FOURCC("trun"): {
5392 status_t err;
5393 if (mLastParsedTrackId == mTrackId) {
5394 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
5395 return err;
5396 }
5397 }
5398
5399 *offset += chunk_size;
5400 break;
5401 }
5402
5403 case FOURCC("saiz"): {
5404 status_t err;
5405 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
5406 return err;
5407 }
5408 *offset += chunk_size;
5409 break;
5410 }
5411 case FOURCC("saio"): {
5412 status_t err;
5413 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
5414 != OK) {
5415 return err;
5416 }
5417 *offset += chunk_size;
5418 break;
5419 }
5420
5421 case FOURCC("senc"): {
5422 status_t err;
5423 if ((err = parseSampleEncryption(data_offset, chunk_data_size)) != OK) {
5424 return err;
5425 }
5426 *offset += chunk_size;
5427 break;
5428 }
5429
5430 case FOURCC("mdat"): {
5431 // parse DRM info if present
5432 ALOGV("MPEG4Source::parseChunk mdat");
5433 // if saiz/saoi was previously observed, do something with the sampleinfos
5434 status_t err = OK;
5435 auto kv = mDrmOffsets.lower_bound(*offset);
5436 if (kv != mDrmOffsets.end()) {
5437 auto drmoffset = kv->first;
5438 auto flags = kv->second;
5439 mDrmOffsets.erase(kv);
5440 ALOGV("mdat chunk_size %" PRIu64 " drmoffset %" PRId64 " offset %" PRId64,
5441 chunk_size, drmoffset, *offset);
5442 if (chunk_size >= drmoffset - *offset) {
5443 err = parseClearEncryptedSizes(drmoffset, false, flags,
5444 chunk_size - (drmoffset - *offset));
5445 }
5446 }
5447 if (err != OK) {
5448 return err;
5449 }
5450 *offset += chunk_size;
5451 break;
5452 }
5453
5454 default: {
5455 *offset += chunk_size;
5456 break;
5457 }
5458 }
5459 return OK;
5460 }
5461
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t size)5462 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
5463 off64_t offset, off64_t size) {
5464 ALOGV("parseSampleAuxiliaryInformationSizes");
5465 if (size < 9) {
5466 return -EINVAL;
5467 }
5468 // 14496-12 8.7.12
5469 uint8_t version;
5470 if (mDataSource->readAt(
5471 offset, &version, sizeof(version))
5472 < (ssize_t)sizeof(version)) {
5473 return ERROR_IO;
5474 }
5475
5476 if (version != 0) {
5477 return ERROR_UNSUPPORTED;
5478 }
5479 offset++;
5480 size--;
5481
5482 uint32_t flags;
5483 if (!mDataSource->getUInt24(offset, &flags)) {
5484 return ERROR_IO;
5485 }
5486 offset += 3;
5487 size -= 3;
5488
5489 if (flags & 1) {
5490 if (size < 13) {
5491 return -EINVAL;
5492 }
5493 uint32_t tmp;
5494 if (!mDataSource->getUInt32(offset, &tmp)) {
5495 return ERROR_MALFORMED;
5496 }
5497 mCurrentAuxInfoType = tmp;
5498 offset += 4;
5499 size -= 4;
5500 if (!mDataSource->getUInt32(offset, &tmp)) {
5501 return ERROR_MALFORMED;
5502 }
5503 mCurrentAuxInfoTypeParameter = tmp;
5504 offset += 4;
5505 size -= 4;
5506 }
5507
5508 uint8_t defsize;
5509 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5510 return ERROR_MALFORMED;
5511 }
5512 mCurrentDefaultSampleInfoSize = defsize;
5513 offset++;
5514 size--;
5515
5516 uint32_t smplcnt;
5517 if (!mDataSource->getUInt32(offset, &smplcnt)) {
5518 return ERROR_MALFORMED;
5519 }
5520 mCurrentSampleInfoCount = smplcnt;
5521 offset += 4;
5522 size -= 4;
5523 if (mCurrentDefaultSampleInfoSize != 0) {
5524 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5525 return OK;
5526 }
5527 if(smplcnt > size) {
5528 ALOGW("b/124525515 - smplcnt(%u) > size(%ld)", (unsigned int)smplcnt, (unsigned long)size);
5529 android_errorWriteLog(0x534e4554, "124525515");
5530 return -EINVAL;
5531 }
5532 if (smplcnt > mCurrentSampleInfoAllocSize) {
5533 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5534 if (newPtr == NULL) {
5535 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5536 return NO_MEMORY;
5537 }
5538 mCurrentSampleInfoSizes = newPtr;
5539 mCurrentSampleInfoAllocSize = smplcnt;
5540 }
5541
5542 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5543 return OK;
5544 }
5545
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t size)5546 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5547 off64_t offset, off64_t size) {
5548 ALOGV("parseSampleAuxiliaryInformationOffsets");
5549 if (size < 8) {
5550 return -EINVAL;
5551 }
5552 // 14496-12 8.7.13
5553 uint8_t version;
5554 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5555 return ERROR_IO;
5556 }
5557 offset++;
5558 size--;
5559
5560 uint32_t flags;
5561 if (!mDataSource->getUInt24(offset, &flags)) {
5562 return ERROR_IO;
5563 }
5564 offset += 3;
5565 size -= 3;
5566
5567 uint32_t entrycount;
5568 if (!mDataSource->getUInt32(offset, &entrycount)) {
5569 return ERROR_IO;
5570 }
5571 offset += 4;
5572 size -= 4;
5573 if (entrycount == 0) {
5574 return OK;
5575 }
5576 if (entrycount > UINT32_MAX / 8) {
5577 return ERROR_MALFORMED;
5578 }
5579
5580 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5581 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5582 if (newPtr == NULL) {
5583 ALOGE("failed to realloc %u -> %u",
5584 mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5585 return NO_MEMORY;
5586 }
5587 mCurrentSampleInfoOffsets = newPtr;
5588 mCurrentSampleInfoOffsetsAllocSize = entrycount;
5589 }
5590 mCurrentSampleInfoOffsetCount = entrycount;
5591
5592 if (mCurrentSampleInfoOffsets == NULL) {
5593 return OK;
5594 }
5595
5596 for (size_t i = 0; i < entrycount; i++) {
5597 if (version == 0) {
5598 if (size < 4) {
5599 ALOGW("b/124526959");
5600 android_errorWriteLog(0x534e4554, "124526959");
5601 return -EINVAL;
5602 }
5603 uint32_t tmp;
5604 if (!mDataSource->getUInt32(offset, &tmp)) {
5605 return ERROR_IO;
5606 }
5607 mCurrentSampleInfoOffsets[i] = tmp;
5608 offset += 4;
5609 size -= 4;
5610 } else {
5611 if (size < 8) {
5612 ALOGW("b/124526959");
5613 android_errorWriteLog(0x534e4554, "124526959");
5614 return -EINVAL;
5615 }
5616 uint64_t tmp;
5617 if (!mDataSource->getUInt64(offset, &tmp)) {
5618 return ERROR_IO;
5619 }
5620 mCurrentSampleInfoOffsets[i] = tmp;
5621 offset += 8;
5622 size -= 8;
5623 }
5624 }
5625
5626 // parse clear/encrypted data
5627
5628 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5629
5630 drmoffset += mCurrentMoofOffset;
5631 mDrmOffsets[drmoffset] = flags;
5632 ALOGV("saio drmoffset %" PRId64 " flags %u", drmoffset, flags);
5633
5634 return OK;
5635 }
5636
parseClearEncryptedSizes(off64_t offset,bool isSampleEncryption,uint32_t flags,off64_t size)5637 status_t MPEG4Source::parseClearEncryptedSizes(
5638 off64_t offset, bool isSampleEncryption, uint32_t flags, off64_t size) {
5639
5640 int32_t ivlength;
5641 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5642 return ERROR_MALFORMED;
5643 }
5644
5645 // only 0, 8 and 16 byte initialization vectors are supported
5646 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5647 ALOGW("unsupported IV length: %d", ivlength);
5648 return ERROR_MALFORMED;
5649 }
5650
5651 uint32_t sampleCount = mCurrentSampleInfoCount;
5652 if (isSampleEncryption) {
5653 if (size < 4) {
5654 return ERROR_MALFORMED;
5655 }
5656 if (!mDataSource->getUInt32(offset, &sampleCount)) {
5657 return ERROR_IO;
5658 }
5659 offset += 4;
5660 size -= 4;
5661 }
5662
5663 // read CencSampleAuxiliaryDataFormats
5664 for (size_t i = 0; i < sampleCount; i++) {
5665 if (i >= mCurrentSamples.size()) {
5666 ALOGW("too few samples");
5667 break;
5668 }
5669 Sample *smpl = &mCurrentSamples.editItemAt(i);
5670 if (!smpl->clearsizes.isEmpty()) {
5671 continue;
5672 }
5673
5674 memset(smpl->iv, 0, 16);
5675 if (size < ivlength) {
5676 return ERROR_MALFORMED;
5677 }
5678 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5679 return ERROR_IO;
5680 }
5681
5682 offset += ivlength;
5683 size -= ivlength;
5684
5685 bool readSubsamples;
5686 if (isSampleEncryption) {
5687 readSubsamples = flags & 2;
5688 } else {
5689 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5690 if (smplinfosize == 0) {
5691 smplinfosize = mCurrentSampleInfoSizes[i];
5692 }
5693 readSubsamples = smplinfosize > ivlength;
5694 }
5695
5696 if (readSubsamples) {
5697 uint16_t numsubsamples;
5698 if (size < 2) {
5699 return ERROR_MALFORMED;
5700 }
5701 if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5702 return ERROR_IO;
5703 }
5704 offset += 2;
5705 size -= 2;
5706 for (size_t j = 0; j < numsubsamples; j++) {
5707 uint16_t numclear;
5708 uint32_t numencrypted;
5709 if (size < 6) {
5710 return ERROR_MALFORMED;
5711 }
5712 if (!mDataSource->getUInt16(offset, &numclear)) {
5713 return ERROR_IO;
5714 }
5715 offset += 2;
5716 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5717 return ERROR_IO;
5718 }
5719 offset += 4;
5720 size -= 6;
5721 smpl->clearsizes.add(numclear);
5722 smpl->encryptedsizes.add(numencrypted);
5723 }
5724 } else {
5725 smpl->clearsizes.add(0);
5726 smpl->encryptedsizes.add(smpl->size);
5727 }
5728 }
5729
5730 return OK;
5731 }
5732
parseSampleEncryption(off64_t offset,off64_t chunk_data_size)5733 status_t MPEG4Source::parseSampleEncryption(off64_t offset, off64_t chunk_data_size) {
5734 uint32_t flags;
5735 if (chunk_data_size < 4) {
5736 return ERROR_MALFORMED;
5737 }
5738 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5739 return ERROR_MALFORMED;
5740 }
5741 return parseClearEncryptedSizes(offset + 4, true, flags, chunk_data_size - 4);
5742 }
5743
parseTrackFragmentHeader(off64_t offset,off64_t size)5744 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5745
5746 if (size < 8) {
5747 return -EINVAL;
5748 }
5749
5750 uint32_t flags;
5751 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5752 return ERROR_MALFORMED;
5753 }
5754
5755 if (flags & 0xff000000) {
5756 return -EINVAL;
5757 }
5758
5759 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5760 return ERROR_MALFORMED;
5761 }
5762
5763 if (mLastParsedTrackId != mTrackId) {
5764 // this is not the right track, skip it
5765 return OK;
5766 }
5767
5768 mTrackFragmentHeaderInfo.mFlags = flags;
5769 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5770 offset += 8;
5771 size -= 8;
5772
5773 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5774
5775 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5776 if (size < 8) {
5777 return -EINVAL;
5778 }
5779
5780 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5781 return ERROR_MALFORMED;
5782 }
5783 offset += 8;
5784 size -= 8;
5785 }
5786
5787 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5788 if (size < 4) {
5789 return -EINVAL;
5790 }
5791
5792 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5793 return ERROR_MALFORMED;
5794 }
5795 offset += 4;
5796 size -= 4;
5797 }
5798
5799 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5800 if (size < 4) {
5801 return -EINVAL;
5802 }
5803
5804 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5805 return ERROR_MALFORMED;
5806 }
5807 offset += 4;
5808 size -= 4;
5809 }
5810
5811 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5812 if (size < 4) {
5813 return -EINVAL;
5814 }
5815
5816 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5817 return ERROR_MALFORMED;
5818 }
5819 offset += 4;
5820 size -= 4;
5821 }
5822
5823 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5824 if (size < 4) {
5825 return -EINVAL;
5826 }
5827
5828 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5829 return ERROR_MALFORMED;
5830 }
5831 offset += 4;
5832 size -= 4;
5833 }
5834
5835 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5836 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5837 }
5838
5839 mTrackFragmentHeaderInfo.mDataOffset = 0;
5840 return OK;
5841 }
5842
parseTrackFragmentRun(off64_t offset,off64_t size)5843 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5844
5845 ALOGV("MPEG4Source::parseTrackFragmentRun");
5846 if (size < 8) {
5847 return -EINVAL;
5848 }
5849
5850 enum {
5851 kDataOffsetPresent = 0x01,
5852 kFirstSampleFlagsPresent = 0x04,
5853 kSampleDurationPresent = 0x100,
5854 kSampleSizePresent = 0x200,
5855 kSampleFlagsPresent = 0x400,
5856 kSampleCompositionTimeOffsetPresent = 0x800,
5857 };
5858
5859 uint32_t flags;
5860 if (!mDataSource->getUInt32(offset, &flags)) {
5861 return ERROR_MALFORMED;
5862 }
5863 // |version| only affects SampleCompositionTimeOffset field.
5864 // If version == 0, SampleCompositionTimeOffset is uint32_t;
5865 // Otherwise, SampleCompositionTimeOffset is int32_t.
5866 // Sample.compositionOffset is defined as int32_t.
5867 uint8_t version = flags >> 24;
5868 flags &= 0xffffff;
5869 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5870
5871 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5872 // These two shall not be used together.
5873 return -EINVAL;
5874 }
5875
5876 uint32_t sampleCount;
5877 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5878 return ERROR_MALFORMED;
5879 }
5880 offset += 8;
5881 size -= 8;
5882
5883 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5884
5885 uint32_t firstSampleFlags = 0;
5886
5887 if (flags & kDataOffsetPresent) {
5888 if (size < 4) {
5889 return -EINVAL;
5890 }
5891
5892 int32_t dataOffsetDelta;
5893 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
5894 return ERROR_MALFORMED;
5895 }
5896
5897 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
5898
5899 offset += 4;
5900 size -= 4;
5901 }
5902
5903 if (flags & kFirstSampleFlagsPresent) {
5904 if (size < 4) {
5905 return -EINVAL;
5906 }
5907
5908 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5909 return ERROR_MALFORMED;
5910 }
5911 offset += 4;
5912 size -= 4;
5913 }
5914
5915 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5916 sampleCtsOffset = 0;
5917
5918 size_t bytesPerSample = 0;
5919 if (flags & kSampleDurationPresent) {
5920 bytesPerSample += 4;
5921 } else if (mTrackFragmentHeaderInfo.mFlags
5922 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5923 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5924 } else if (mTrex) {
5925 sampleDuration = mTrex->default_sample_duration;
5926 }
5927
5928 if (flags & kSampleSizePresent) {
5929 bytesPerSample += 4;
5930 } else {
5931 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5932 #ifdef VERY_VERY_VERBOSE_LOGGING
5933 // We don't expect this, but also want to avoid spamming the log if
5934 // we hit this case.
5935 if (!(mTrackFragmentHeaderInfo.mFlags
5936 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent)) {
5937 ALOGW("No sample size specified");
5938 }
5939 #endif
5940 }
5941
5942 if (flags & kSampleFlagsPresent) {
5943 bytesPerSample += 4;
5944 } else {
5945 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5946 #ifdef VERY_VERY_VERBOSE_LOGGING
5947 // We don't expect this, but also want to avoid spamming the log if
5948 // we hit this case.
5949 if (!(mTrackFragmentHeaderInfo.mFlags
5950 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent)) {
5951 ALOGW("No sample flags specified");
5952 }
5953 #endif
5954 }
5955
5956 if (flags & kSampleCompositionTimeOffsetPresent) {
5957 bytesPerSample += 4;
5958 } else {
5959 sampleCtsOffset = 0;
5960 }
5961
5962 if (bytesPerSample != 0) {
5963 if (size < (off64_t)sampleCount * bytesPerSample) {
5964 return -EINVAL;
5965 }
5966 } else {
5967 if (sampleDuration == 0) {
5968 ALOGW("b/123389881 sampleDuration == 0");
5969 android_errorWriteLog(0x534e4554, "124389881 zero");
5970 return -EINVAL;
5971 }
5972
5973 // apply some quick (vs strict legality) checks
5974 //
5975 static constexpr uint32_t kMaxTrunSampleCount = 10000;
5976 if (sampleCount > kMaxTrunSampleCount) {
5977 ALOGW("b/123389881 sampleCount(%u) > kMaxTrunSampleCount(%u)",
5978 sampleCount, kMaxTrunSampleCount);
5979 android_errorWriteLog(0x534e4554, "124389881 count");
5980 return -EINVAL;
5981 }
5982 }
5983
5984 Sample tmp;
5985 for (uint32_t i = 0; i < sampleCount; ++i) {
5986 if (flags & kSampleDurationPresent) {
5987 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
5988 return ERROR_MALFORMED;
5989 }
5990 offset += 4;
5991 }
5992
5993 if (flags & kSampleSizePresent) {
5994 if (!mDataSource->getUInt32(offset, &sampleSize)) {
5995 return ERROR_MALFORMED;
5996 }
5997 offset += 4;
5998 }
5999
6000 if (flags & kSampleFlagsPresent) {
6001 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
6002 return ERROR_MALFORMED;
6003 }
6004 offset += 4;
6005 }
6006
6007 if (flags & kSampleCompositionTimeOffsetPresent) {
6008 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
6009 return ERROR_MALFORMED;
6010 }
6011 offset += 4;
6012 }
6013
6014 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
6015 " flags 0x%08x ctsOffset %" PRIu32, i + 1,
6016 dataOffset, sampleSize, sampleDuration,
6017 (flags & kFirstSampleFlagsPresent) && i == 0
6018 ? firstSampleFlags : sampleFlags, sampleCtsOffset);
6019 tmp.offset = dataOffset;
6020 tmp.size = sampleSize;
6021 tmp.duration = sampleDuration;
6022 tmp.compositionOffset = sampleCtsOffset;
6023 memset(tmp.iv, 0, sizeof(tmp.iv));
6024 if (mCurrentSamples.add(tmp) < 0) {
6025 ALOGW("b/123389881 failed saving sample(n=%zu)", mCurrentSamples.size());
6026 android_errorWriteLog(0x534e4554, "124389881 allocation");
6027 mCurrentSamples.clear();
6028 return NO_MEMORY;
6029 }
6030
6031 dataOffset += sampleSize;
6032 }
6033
6034 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
6035
6036 return OK;
6037 }
6038
getFormat(AMediaFormat * meta)6039 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
6040 Mutex::Autolock autoLock(mLock);
6041 AMediaFormat_copy(meta, mFormat);
6042 return AMEDIA_OK;
6043 }
6044
parseNALSize(const uint8_t * data) const6045 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
6046 switch (mNALLengthSize) {
6047 case 1:
6048 return *data;
6049 case 2:
6050 return U16_AT(data);
6051 case 3:
6052 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
6053 case 4:
6054 return U32_AT(data);
6055 }
6056
6057 // This cannot happen, mNALLengthSize springs to life by adding 1 to
6058 // a 2-bit integer.
6059 CHECK(!"Should not be here.");
6060
6061 return 0;
6062 }
6063
parseHEVCLayerId(const uint8_t * data,size_t size)6064 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
6065 if (data == nullptr || size < mNALLengthSize + 2) {
6066 return -1;
6067 }
6068
6069 // HEVC NAL-header (16-bit)
6070 // 1 6 6 3
6071 // |-|uuuuuu|------|iii|
6072 // ^ ^
6073 // NAL_type layer_id + 1
6074 //
6075 // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
6076 enum {
6077 TSA_N = 2,
6078 TSA_R = 3,
6079 STSA_N = 4,
6080 STSA_R = 5,
6081 };
6082
6083 data += mNALLengthSize;
6084 uint16_t nalHeader = data[0] << 8 | data[1];
6085
6086 uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
6087 if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
6088 int32_t layerIdPlusOne = nalHeader & 0x7u;
6089 ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
6090 return layerIdPlusOne - 1;
6091 }
6092 return 0;
6093 }
6094
read(MediaBufferHelper ** out,const ReadOptions * options)6095 media_status_t MPEG4Source::read(
6096 MediaBufferHelper **out, const ReadOptions *options) {
6097 Mutex::Autolock autoLock(mLock);
6098
6099 CHECK(mStarted);
6100
6101 if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
6102 *out = nullptr;
6103 return AMEDIA_ERROR_WOULD_BLOCK;
6104 }
6105
6106 if (mFirstMoofOffset > 0) {
6107 return fragmentedRead(out, options);
6108 }
6109
6110 *out = NULL;
6111
6112 int64_t targetSampleTimeUs = -1;
6113
6114 int64_t seekTimeUs;
6115 ReadOptions::SeekMode mode;
6116
6117 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6118 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6119 if (mIsHeif || mIsAvif) {
6120 CHECK(mSampleTable == NULL);
6121 CHECK(mItemTable != NULL);
6122 int32_t imageIndex;
6123 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
6124 return AMEDIA_ERROR_MALFORMED;
6125 }
6126
6127 status_t err;
6128 if (seekTimeUs >= 0) {
6129 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
6130 } else {
6131 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
6132 }
6133 if (err != OK) {
6134 return AMEDIA_ERROR_UNKNOWN;
6135 }
6136 } else {
6137 uint32_t findFlags = 0;
6138 switch (mode) {
6139 case ReadOptions::SEEK_PREVIOUS_SYNC:
6140 findFlags = SampleTable::kFlagBefore;
6141 break;
6142 case ReadOptions::SEEK_NEXT_SYNC:
6143 findFlags = SampleTable::kFlagAfter;
6144 break;
6145 case ReadOptions::SEEK_CLOSEST_SYNC:
6146 case ReadOptions::SEEK_CLOSEST:
6147 findFlags = SampleTable::kFlagClosest;
6148 break;
6149 case ReadOptions::SEEK_FRAME_INDEX:
6150 findFlags = SampleTable::kFlagFrameIndex;
6151 break;
6152 default:
6153 CHECK(!"Should not be here.");
6154 break;
6155 }
6156 if( mode != ReadOptions::SEEK_FRAME_INDEX) {
6157 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6158 if (mElstInitialEmptyEditTicks > 0) {
6159 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6160 mTimescale;
6161 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6162 * Hence, lower bound on seekTimeUs is 0.
6163 */
6164 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6165 }
6166 if (mElstShiftStartTicks > 0) {
6167 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6168 seekTimeUs += elstShiftStartUs;
6169 }
6170 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6171 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6172 elstShiftStartUs);
6173 }
6174
6175 uint32_t sampleIndex;
6176 status_t err = mSampleTable->findSampleAtTime(
6177 seekTimeUs, 1000000, mTimescale,
6178 &sampleIndex, findFlags);
6179
6180 if (mode == ReadOptions::SEEK_CLOSEST
6181 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6182 // We found the closest sample already, now we want the sync
6183 // sample preceding it (or the sample itself of course), even
6184 // if the subsequent sync sample is closer.
6185 findFlags = SampleTable::kFlagBefore;
6186 }
6187
6188 uint32_t syncSampleIndex = sampleIndex;
6189 // assume every non-USAC/non-MPEGH audio sample is a sync sample.
6190 // This works around
6191 // seek issues with files that were incorrectly written with an
6192 // empty or single-sample stss block for the audio track
6193 if (err == OK && (!mIsAudio || mIsUsac || mIsMpegH)) {
6194 err = mSampleTable->findSyncSampleNear(
6195 sampleIndex, &syncSampleIndex, findFlags);
6196 }
6197
6198 uint64_t sampleTime;
6199 if (err == OK) {
6200 err = mSampleTable->getMetaDataForSample(
6201 sampleIndex, NULL, NULL, &sampleTime);
6202 }
6203
6204 if (err != OK) {
6205 if (err == ERROR_OUT_OF_RANGE) {
6206 // An attempt to seek past the end of the stream would
6207 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
6208 // this all the way to the MediaPlayer would cause abnormal
6209 // termination. Legacy behaviour appears to be to behave as if
6210 // we had seeked to the end of stream, ending normally.
6211 return AMEDIA_ERROR_END_OF_STREAM;
6212 }
6213 ALOGV("end of stream");
6214 return AMEDIA_ERROR_UNKNOWN;
6215 }
6216
6217 if (mode == ReadOptions::SEEK_CLOSEST
6218 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6219 if (mElstInitialEmptyEditTicks > 0) {
6220 sampleTime += mElstInitialEmptyEditTicks;
6221 }
6222 if (mElstShiftStartTicks > 0){
6223 if (sampleTime > mElstShiftStartTicks) {
6224 sampleTime -= mElstShiftStartTicks;
6225 } else {
6226 sampleTime = 0;
6227 }
6228 }
6229 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
6230 }
6231
6232 #if 0
6233 uint32_t syncSampleTime;
6234 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
6235 syncSampleIndex, NULL, NULL, &syncSampleTime));
6236
6237 ALOGI("seek to time %lld us => sample at time %lld us, "
6238 "sync sample at time %lld us",
6239 seekTimeUs,
6240 sampleTime * 1000000ll / mTimescale,
6241 syncSampleTime * 1000000ll / mTimescale);
6242 #endif
6243
6244 mCurrentSampleIndex = syncSampleIndex;
6245 }
6246
6247 if (mBuffer != NULL) {
6248 mBuffer->release();
6249 mBuffer = NULL;
6250 }
6251
6252 // fall through
6253 }
6254
6255 off64_t offset = 0;
6256 size_t size = 0;
6257 int64_t cts;
6258 uint64_t stts;
6259 bool isSyncSample;
6260 bool newBuffer = false;
6261 if (mBuffer == NULL) {
6262 newBuffer = true;
6263
6264 status_t err;
6265 if (!mIsHeif && !mIsAvif) {
6266 err = mSampleTable->getMetaDataForSample(mCurrentSampleIndex, &offset, &size,
6267 (uint64_t*)&cts, &isSyncSample, &stts);
6268 if(err == OK) {
6269 if (mElstInitialEmptyEditTicks > 0) {
6270 cts += mElstInitialEmptyEditTicks;
6271 }
6272 if (mElstShiftStartTicks > 0) {
6273 // cts can be negative. for example, initial audio samples for gapless playback.
6274 cts -= (int64_t)mElstShiftStartTicks;
6275 }
6276 }
6277 } else {
6278 err = mItemTable->getImageOffsetAndSize(
6279 options && options->getSeekTo(&seekTimeUs, &mode) ?
6280 &mCurrentSampleIndex : NULL, &offset, &size);
6281
6282 cts = stts = 0;
6283 isSyncSample = 0;
6284 ALOGV("image offset %lld, size %zu", (long long)offset, size);
6285 }
6286
6287 if (err != OK) {
6288 if (err == ERROR_END_OF_STREAM) {
6289 return AMEDIA_ERROR_END_OF_STREAM;
6290 }
6291 return AMEDIA_ERROR_UNKNOWN;
6292 }
6293
6294 err = mBufferGroup->acquire_buffer(&mBuffer);
6295
6296 if (err != OK) {
6297 CHECK(mBuffer == NULL);
6298 return AMEDIA_ERROR_UNKNOWN;
6299 }
6300 if (size > mBuffer->size()) {
6301 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6302 mBuffer->release();
6303 mBuffer = NULL;
6304 return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
6305 }
6306 }
6307
6308 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize) && !mIsAC4) {
6309 if (newBuffer) {
6310 if (mIsPcm) {
6311 // The twos' PCM block reader assumes that all samples has the same size.
6312 uint32_t lastSampleIndexInChunk = mSampleTable->getLastSampleIndexInChunk();
6313 if (lastSampleIndexInChunk < mCurrentSampleIndex) {
6314 mBuffer->release();
6315 mBuffer = nullptr;
6316 return AMEDIA_ERROR_UNKNOWN;
6317 }
6318 uint32_t samplesToRead = lastSampleIndexInChunk - mCurrentSampleIndex + 1;
6319 if (samplesToRead > kMaxPcmFrameSize) {
6320 samplesToRead = kMaxPcmFrameSize;
6321 }
6322
6323 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
6324 samplesToRead, size, mCurrentSampleIndex,
6325 mSampleTable->getLastSampleIndexInChunk());
6326
6327 size_t totalSize = samplesToRead * size;
6328 if (mBuffer->size() < totalSize) {
6329 mBuffer->release();
6330 mBuffer = nullptr;
6331 return AMEDIA_ERROR_UNKNOWN;
6332 }
6333 uint8_t* buf = (uint8_t *)mBuffer->data();
6334 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
6335 if (bytesRead < (ssize_t)totalSize) {
6336 mBuffer->release();
6337 mBuffer = NULL;
6338 return AMEDIA_ERROR_IO;
6339 }
6340
6341 AMediaFormat *meta = mBuffer->meta_data();
6342 AMediaFormat_clear(meta);
6343 AMediaFormat_setInt64(
6344 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6345 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6346
6347 int32_t byteOrder = 0;
6348 bool isGetBigEndian = AMediaFormat_getInt32(mFormat,
6349 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
6350
6351 if (isGetBigEndian && byteOrder == 1) {
6352 // Big-endian -> little-endian
6353 uint16_t *dstData = (uint16_t *)buf;
6354 uint16_t *srcData = (uint16_t *)buf;
6355
6356 for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
6357 dstData[j] = ntohs(srcData[j]);
6358 }
6359 }
6360
6361 mCurrentSampleIndex += samplesToRead;
6362 mBuffer->set_range(0, totalSize);
6363 } else {
6364 ssize_t num_bytes_read =
6365 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6366
6367 if (num_bytes_read < (ssize_t)size) {
6368 mBuffer->release();
6369 mBuffer = NULL;
6370
6371 return AMEDIA_ERROR_IO;
6372 }
6373
6374 CHECK(mBuffer != NULL);
6375 mBuffer->set_range(0, size);
6376 AMediaFormat *meta = mBuffer->meta_data();
6377 AMediaFormat_clear(meta);
6378 AMediaFormat_setInt64(
6379 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6380 AMediaFormat_setInt64(
6381 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6382
6383 if (targetSampleTimeUs >= 0) {
6384 AMediaFormat_setInt64(
6385 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6386 }
6387
6388 if (isSyncSample) {
6389 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6390 }
6391
6392 AMediaFormat_setInt64(
6393 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/,
6394 offset);
6395
6396 if (mSampleTable != nullptr &&
6397 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6398 AMediaFormat_setInt64(
6399 meta,
6400 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6401 mSampleTable->getLastSampleIndexInChunk());
6402 }
6403
6404 ++mCurrentSampleIndex;
6405 }
6406 }
6407
6408 *out = mBuffer;
6409 mBuffer = NULL;
6410
6411 return AMEDIA_OK;
6412
6413 } else if (mIsAC4) {
6414 CHECK(mBuffer != NULL);
6415 // Make sure there is enough space to write the sync header and the raw frame
6416 if (mBuffer->range_length() < (7 + size)) {
6417 mBuffer->release();
6418 mBuffer = NULL;
6419
6420 return AMEDIA_ERROR_IO;
6421 }
6422
6423 uint8_t *dstData = (uint8_t *)mBuffer->data();
6424 size_t dstOffset = 0;
6425 // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
6426 // AC40 sync word, meaning no CRC at the end of the frame
6427 dstData[dstOffset++] = 0xAC;
6428 dstData[dstOffset++] = 0x40;
6429 dstData[dstOffset++] = 0xFF;
6430 dstData[dstOffset++] = 0xFF;
6431 dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
6432 dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
6433 dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
6434
6435 ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
6436 if (numBytesRead != (ssize_t)size) {
6437 mBuffer->release();
6438 mBuffer = NULL;
6439
6440 return AMEDIA_ERROR_IO;
6441 }
6442
6443 mBuffer->set_range(0, dstOffset + size);
6444 AMediaFormat *meta = mBuffer->meta_data();
6445 AMediaFormat_clear(meta);
6446 AMediaFormat_setInt64(
6447 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6448 AMediaFormat_setInt64(
6449 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6450
6451 if (targetSampleTimeUs >= 0) {
6452 AMediaFormat_setInt64(
6453 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6454 }
6455
6456 if (isSyncSample) {
6457 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6458 }
6459
6460 ++mCurrentSampleIndex;
6461
6462 *out = mBuffer;
6463 mBuffer = NULL;
6464
6465 return AMEDIA_OK;
6466 } else {
6467 // Whole NAL units are returned but each fragment is prefixed by
6468 // the start code (0x00 00 00 01).
6469 ssize_t num_bytes_read = 0;
6470 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
6471
6472 if (num_bytes_read < (ssize_t)size) {
6473 mBuffer->release();
6474 mBuffer = NULL;
6475
6476 return AMEDIA_ERROR_IO;
6477 }
6478
6479 uint8_t *dstData = (uint8_t *)mBuffer->data();
6480 size_t srcOffset = 0;
6481 size_t dstOffset = 0;
6482
6483 while (srcOffset < size) {
6484 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6485 size_t nalLength = 0;
6486 if (!isMalFormed) {
6487 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6488 srcOffset += mNALLengthSize;
6489 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
6490 }
6491
6492 if (isMalFormed) {
6493 //if nallength abnormal,ignore it.
6494 ALOGW("abnormal nallength, ignore this NAL");
6495 srcOffset = size;
6496 break;
6497 }
6498
6499 if (nalLength == 0) {
6500 continue;
6501 }
6502
6503 if (dstOffset > SIZE_MAX - 4 ||
6504 dstOffset + 4 > SIZE_MAX - nalLength ||
6505 dstOffset + 4 + nalLength > mBuffer->size()) {
6506 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
6507 android_errorWriteLog(0x534e4554, "27208621");
6508 mBuffer->release();
6509 mBuffer = NULL;
6510 return AMEDIA_ERROR_MALFORMED;
6511 }
6512
6513 dstData[dstOffset++] = 0;
6514 dstData[dstOffset++] = 0;
6515 dstData[dstOffset++] = 0;
6516 dstData[dstOffset++] = 1;
6517 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6518 srcOffset += nalLength;
6519 dstOffset += nalLength;
6520 }
6521 CHECK_EQ(srcOffset, size);
6522 CHECK(mBuffer != NULL);
6523 mBuffer->set_range(0, dstOffset);
6524
6525 AMediaFormat *meta = mBuffer->meta_data();
6526 AMediaFormat_clear(meta);
6527 AMediaFormat_setInt64(
6528 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6529 AMediaFormat_setInt64(
6530 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6531
6532 if (targetSampleTimeUs >= 0) {
6533 AMediaFormat_setInt64(
6534 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6535 }
6536
6537 if (mIsAVC) {
6538 uint32_t layerId = FindAVCLayerId(
6539 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6540 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6541 } else if (mIsHEVC) {
6542 int32_t layerId = parseHEVCLayerId(
6543 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6544 if (layerId >= 0) {
6545 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6546 }
6547 }
6548
6549 if (isSyncSample) {
6550 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6551 }
6552
6553 AMediaFormat_setInt64(
6554 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/, offset);
6555
6556 if (mSampleTable != nullptr &&
6557 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6558 AMediaFormat_setInt64(
6559 meta,
6560 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6561 mSampleTable->getLastSampleIndexInChunk());
6562 }
6563
6564 ++mCurrentSampleIndex;
6565
6566 *out = mBuffer;
6567 mBuffer = NULL;
6568
6569 return AMEDIA_OK;
6570 }
6571 }
6572
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)6573 media_status_t MPEG4Source::fragmentedRead(
6574 MediaBufferHelper **out, const ReadOptions *options) {
6575
6576 ALOGV("MPEG4Source::fragmentedRead");
6577
6578 CHECK(mStarted);
6579
6580 *out = NULL;
6581
6582 int64_t targetSampleTimeUs = -1;
6583
6584 int64_t seekTimeUs;
6585 ReadOptions::SeekMode mode;
6586 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6587 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6588 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6589 if (mElstInitialEmptyEditTicks > 0) {
6590 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6591 mTimescale;
6592 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6593 * Hence, lower bound on seekTimeUs is 0.
6594 */
6595 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6596 }
6597 if (mElstShiftStartTicks > 0){
6598 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6599 seekTimeUs += elstShiftStartUs;
6600 }
6601 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6602 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6603 elstShiftStartUs);
6604
6605 int numSidxEntries = mSegments.size();
6606 if (numSidxEntries != 0) {
6607 int64_t totalTime = 0;
6608 off64_t totalOffset = mFirstMoofOffset;
6609 for (int i = 0; i < numSidxEntries; i++) {
6610 const SidxEntry *se = &mSegments[i];
6611 if (totalTime + se->mDurationUs > seekTimeUs) {
6612 // The requested time is somewhere in this segment
6613 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6614 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6615 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6616 // requested next sync, or closest sync and it was closer to the end of
6617 // this segment
6618 totalTime += se->mDurationUs;
6619 totalOffset += se->mSize;
6620 }
6621 break;
6622 }
6623 totalTime += se->mDurationUs;
6624 totalOffset += se->mSize;
6625 }
6626 mCurrentMoofOffset = totalOffset;
6627 mNextMoofOffset = -1;
6628 mCurrentSamples.clear();
6629 mCurrentSampleIndex = 0;
6630 status_t err = parseChunk(&totalOffset);
6631 if (err != OK) {
6632 return AMEDIA_ERROR_UNKNOWN;
6633 }
6634 mCurrentTime = totalTime * mTimescale / 1000000ll;
6635 } else {
6636 // without sidx boxes, we can only seek to 0
6637 mCurrentMoofOffset = mFirstMoofOffset;
6638 mNextMoofOffset = -1;
6639 mCurrentSamples.clear();
6640 mCurrentSampleIndex = 0;
6641 off64_t tmp = mCurrentMoofOffset;
6642 status_t err = parseChunk(&tmp);
6643 if (err != OK) {
6644 return AMEDIA_ERROR_UNKNOWN;
6645 }
6646 mCurrentTime = 0;
6647 }
6648
6649 if (mBuffer != NULL) {
6650 mBuffer->release();
6651 mBuffer = NULL;
6652 }
6653
6654 // fall through
6655 }
6656
6657 off64_t offset = 0;
6658 size_t size = 0;
6659 int64_t cts = 0;
6660 bool isSyncSample = false;
6661 bool newBuffer = false;
6662 if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6663 newBuffer = true;
6664
6665 if (mBuffer != NULL) {
6666 mBuffer->release();
6667 mBuffer = NULL;
6668 }
6669 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6670 // move to next fragment if there is one
6671 if (mNextMoofOffset <= mCurrentMoofOffset) {
6672 return AMEDIA_ERROR_END_OF_STREAM;
6673 }
6674 off64_t nextMoof = mNextMoofOffset;
6675 mCurrentMoofOffset = nextMoof;
6676 mCurrentSamples.clear();
6677 mCurrentSampleIndex = 0;
6678 status_t err = parseChunk(&nextMoof);
6679 if (err != OK) {
6680 return AMEDIA_ERROR_UNKNOWN;
6681 }
6682 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6683 return AMEDIA_ERROR_END_OF_STREAM;
6684 }
6685 }
6686
6687 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6688 offset = smpl->offset;
6689 size = smpl->size;
6690 cts = mCurrentTime + smpl->compositionOffset;
6691
6692 if (mElstInitialEmptyEditTicks > 0) {
6693 cts += mElstInitialEmptyEditTicks;
6694 }
6695 if (mElstShiftStartTicks > 0) {
6696 // cts can be negative. for example, initial audio samples for gapless playback.
6697 cts -= (int64_t)mElstShiftStartTicks;
6698 }
6699
6700 mCurrentTime += smpl->duration;
6701 isSyncSample = (mCurrentSampleIndex == 0);
6702
6703 status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6704
6705 if (err != OK) {
6706 CHECK(mBuffer == NULL);
6707 ALOGV("acquire_buffer returned %d", err);
6708 return AMEDIA_ERROR_UNKNOWN;
6709 }
6710 if (size > mBuffer->size()) {
6711 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6712 mBuffer->release();
6713 mBuffer = NULL;
6714 return AMEDIA_ERROR_UNKNOWN;
6715 }
6716 }
6717
6718 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6719 AMediaFormat *bufmeta = mBuffer->meta_data();
6720 AMediaFormat_clear(bufmeta);
6721 if (smpl->encryptedsizes.size()) {
6722 // store clear/encrypted lengths in metadata
6723 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6724 smpl->clearsizes.array(), smpl->clearsizes.size() * sizeof(uint32_t));
6725 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6726 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * sizeof(uint32_t));
6727 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6728 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6729 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6730 AMediaFormat_setInt32(bufmeta,
6731 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6732 AMediaFormat_setInt32(bufmeta,
6733 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6734
6735 void *iv = NULL;
6736 size_t ivlength = 0;
6737 if (!AMediaFormat_getBuffer(mFormat,
6738 "crypto-iv", &iv, &ivlength)) {
6739 iv = (void *) smpl->iv;
6740 ivlength = 16; // use 16 or the actual size?
6741 }
6742 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6743 }
6744
6745 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize)) {
6746 if (newBuffer) {
6747 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6748 mBuffer->release();
6749 mBuffer = NULL;
6750
6751 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6752 return AMEDIA_ERROR_MALFORMED;
6753 }
6754
6755 ssize_t num_bytes_read =
6756 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6757
6758 if (num_bytes_read < (ssize_t)size) {
6759 mBuffer->release();
6760 mBuffer = NULL;
6761
6762 ALOGE("i/o error");
6763 return AMEDIA_ERROR_IO;
6764 }
6765
6766 CHECK(mBuffer != NULL);
6767 mBuffer->set_range(0, size);
6768 AMediaFormat_setInt64(bufmeta,
6769 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6770 AMediaFormat_setInt64(bufmeta,
6771 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6772
6773 if (targetSampleTimeUs >= 0) {
6774 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6775 }
6776
6777 if (mIsAVC) {
6778 uint32_t layerId = FindAVCLayerId(
6779 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6780 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6781 } else if (mIsHEVC) {
6782 int32_t layerId = parseHEVCLayerId(
6783 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6784 if (layerId >= 0) {
6785 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6786 }
6787 }
6788
6789 if (isSyncSample) {
6790 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6791 }
6792
6793 ++mCurrentSampleIndex;
6794 }
6795
6796 *out = mBuffer;
6797 mBuffer = NULL;
6798
6799 return AMEDIA_OK;
6800
6801 } else {
6802 ALOGV("whole NAL");
6803 // Whole NAL units are returned but each fragment is prefixed by
6804 // the start code (0x00 00 00 01).
6805 ssize_t num_bytes_read = 0;
6806 void *data = NULL;
6807 bool isMalFormed = false;
6808 int32_t max_size;
6809 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6810 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6811 isMalFormed = true;
6812 } else {
6813 data = mSrcBuffer;
6814 }
6815
6816 if (isMalFormed || data == NULL) {
6817 ALOGE("isMalFormed size %zu", size);
6818 if (mBuffer != NULL) {
6819 mBuffer->release();
6820 mBuffer = NULL;
6821 }
6822 return AMEDIA_ERROR_MALFORMED;
6823 }
6824 num_bytes_read = mDataSource->readAt(offset, data, size);
6825
6826 if (num_bytes_read < (ssize_t)size) {
6827 mBuffer->release();
6828 mBuffer = NULL;
6829
6830 ALOGE("i/o error");
6831 return AMEDIA_ERROR_IO;
6832 }
6833
6834 uint8_t *dstData = (uint8_t *)mBuffer->data();
6835 size_t srcOffset = 0;
6836 size_t dstOffset = 0;
6837
6838 while (srcOffset < size) {
6839 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6840 size_t nalLength = 0;
6841 if (!isMalFormed) {
6842 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6843 srcOffset += mNALLengthSize;
6844 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6845 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6846 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6847 }
6848
6849 if (isMalFormed) {
6850 ALOGE("Video is malformed; nalLength %zu", nalLength);
6851 mBuffer->release();
6852 mBuffer = NULL;
6853 return AMEDIA_ERROR_MALFORMED;
6854 }
6855
6856 if (nalLength == 0) {
6857 continue;
6858 }
6859
6860 if (dstOffset > SIZE_MAX - 4 ||
6861 dstOffset + 4 > SIZE_MAX - nalLength ||
6862 dstOffset + 4 + nalLength > mBuffer->size()) {
6863 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6864 android_errorWriteLog(0x534e4554, "26365349");
6865 mBuffer->release();
6866 mBuffer = NULL;
6867 return AMEDIA_ERROR_MALFORMED;
6868 }
6869
6870 dstData[dstOffset++] = 0;
6871 dstData[dstOffset++] = 0;
6872 dstData[dstOffset++] = 0;
6873 dstData[dstOffset++] = 1;
6874 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6875 srcOffset += nalLength;
6876 dstOffset += nalLength;
6877 }
6878 CHECK_EQ(srcOffset, size);
6879 CHECK(mBuffer != NULL);
6880 mBuffer->set_range(0, dstOffset);
6881
6882 AMediaFormat *bufmeta = mBuffer->meta_data();
6883 AMediaFormat_setInt64(bufmeta,
6884 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6885 AMediaFormat_setInt64(bufmeta,
6886 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6887
6888 if (targetSampleTimeUs >= 0) {
6889 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6890 }
6891
6892 if (isSyncSample) {
6893 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6894 }
6895
6896 ++mCurrentSampleIndex;
6897
6898 *out = mBuffer;
6899 mBuffer = NULL;
6900
6901 return AMEDIA_OK;
6902 }
6903
6904 return AMEDIA_OK;
6905 }
6906
findTrackByMimePrefix(const char * mimePrefix)6907 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6908 const char *mimePrefix) {
6909 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6910 const char *mime;
6911 if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6912 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6913 return track;
6914 }
6915 }
6916
6917 return NULL;
6918 }
6919
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6920 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6921 uint8_t header[8];
6922
6923 ssize_t n = source->readAt(4, header, sizeof(header));
6924 if (n < (ssize_t)sizeof(header)) {
6925 return false;
6926 }
6927
6928 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6929 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6930 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6931 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6932 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6933 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
6934 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
6935 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)
6936 || !memcmp(header, "ftypavif", 8) || !memcmp(header, "ftypavis", 8)) {
6937 *confidence = 0.4;
6938
6939 return true;
6940 }
6941
6942 return false;
6943 }
6944
isCompatibleBrand(uint32_t fourcc)6945 static bool isCompatibleBrand(uint32_t fourcc) {
6946 static const uint32_t kCompatibleBrands[] = {
6947 FOURCC("isom"),
6948 FOURCC("iso2"),
6949 FOURCC("avc1"),
6950 FOURCC("hvc1"),
6951 FOURCC("hev1"),
6952 FOURCC("av01"),
6953 FOURCC("vp09"),
6954 FOURCC("3gp4"),
6955 FOURCC("mp41"),
6956 FOURCC("mp42"),
6957 FOURCC("dash"),
6958 FOURCC("nvr1"),
6959
6960 // Won't promise that the following file types can be played.
6961 // Just give these file types a chance.
6962 FOURCC("qt "), // Apple's QuickTime
6963 FOURCC("MSNV"), // Sony's PSP
6964 FOURCC("wmf "),
6965
6966 FOURCC("3g2a"), // 3GPP2
6967 FOURCC("3g2b"),
6968 FOURCC("mif1"), // HEIF image
6969 FOURCC("heic"), // HEIF image
6970 FOURCC("msf1"), // HEIF image sequence
6971 FOURCC("hevc"), // HEIF image sequence
6972 FOURCC("avif"), // AVIF image
6973 FOURCC("avis"), // AVIF image sequence
6974 };
6975
6976 for (size_t i = 0;
6977 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
6978 ++i) {
6979 if (kCompatibleBrands[i] == fourcc) {
6980 return true;
6981 }
6982 }
6983
6984 return false;
6985 }
6986
6987 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
6988 // compatible brand is present.
6989 // Also try to identify where this file's metadata ends
6990 // (end of the 'moov' atom) and report it to the caller as part of
6991 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)6992 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
6993 // We scan up to 128 bytes to identify this file as an MP4.
6994 static const off64_t kMaxScanOffset = 128ll;
6995
6996 off64_t offset = 0ll;
6997 bool foundGoodFileType = false;
6998 off64_t moovAtomEndOffset = -1ll;
6999 bool done = false;
7000
7001 while (!done && offset < kMaxScanOffset) {
7002 uint32_t hdr[2];
7003 if (source->readAt(offset, hdr, 8) < 8) {
7004 return false;
7005 }
7006
7007 uint64_t chunkSize = ntohl(hdr[0]);
7008 uint32_t chunkType = ntohl(hdr[1]);
7009 off64_t chunkDataOffset = offset + 8;
7010
7011 if (chunkSize == 1) {
7012 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
7013 return false;
7014 }
7015
7016 chunkSize = ntoh64(chunkSize);
7017 chunkDataOffset += 8;
7018
7019 if (chunkSize < 16) {
7020 // The smallest valid chunk is 16 bytes long in this case.
7021 return false;
7022 }
7023 if (chunkSize > INT64_MAX) {
7024 // reject overly large chunk sizes that could
7025 // be interpreted as negative
7026 ALOGE("chunk size too large");
7027 return false;
7028 }
7029
7030 } else if (chunkSize < 8) {
7031 // The smallest valid chunk is 8 bytes long.
7032 return false;
7033 }
7034
7035 // (data_offset - offset) is either 8 or 16
7036 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
7037 if (chunkDataSize < 0) {
7038 ALOGE("b/23540914");
7039 return false;
7040 }
7041
7042 char chunkstring[5];
7043 MakeFourCCString(chunkType, chunkstring);
7044 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
7045 chunkstring, chunkSize, (long long)offset);
7046 switch (chunkType) {
7047 case FOURCC("ftyp"):
7048 {
7049 if (chunkDataSize < 8) {
7050 return false;
7051 }
7052
7053 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
7054 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
7055 if (i == 1) {
7056 // Skip this index, it refers to the minorVersion,
7057 // not a brand.
7058 continue;
7059 }
7060
7061 uint32_t brand;
7062 if (source->readAt(
7063 chunkDataOffset + 4 * i, &brand, 4) < 4) {
7064 return false;
7065 }
7066
7067 brand = ntohl(brand);
7068
7069 if (isCompatibleBrand(brand)) {
7070 foundGoodFileType = true;
7071 break;
7072 }
7073 }
7074
7075 if (!foundGoodFileType) {
7076 return false;
7077 }
7078
7079 break;
7080 }
7081
7082 case FOURCC("moov"):
7083 {
7084 if (__builtin_add_overflow(offset, chunkSize, &moovAtomEndOffset)) {
7085 ALOGE("chunk size + offset would overflow");
7086 return false;
7087 }
7088
7089 done = true;
7090 break;
7091 }
7092
7093 default:
7094 break;
7095 }
7096
7097 if (__builtin_add_overflow(offset, chunkSize, &offset)) {
7098 ALOGE("chunk size + offset would overflow");
7099 return false;
7100 }
7101 }
7102
7103 if (!foundGoodFileType) {
7104 return false;
7105 }
7106
7107 *confidence = 0.4f;
7108
7109 return true;
7110 }
7111
CreateExtractor(CDataSource * source,void *)7112 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
7113 return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
7114 }
7115
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)7116 static CreatorFunc Sniff(
7117 CDataSource *source, float *confidence, void **,
7118 FreeMetaFunc *) {
7119 DataSourceHelper helper(source);
7120 if (BetterSniffMPEG4(&helper, confidence)) {
7121 return CreateExtractor;
7122 }
7123
7124 if (LegacySniffMPEG4(&helper, confidence)) {
7125 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
7126 return CreateExtractor;
7127 }
7128
7129 return NULL;
7130 }
7131
7132 static const char *extensions[] = {
7133 "3g2",
7134 "3ga",
7135 "3gp",
7136 "3gpp",
7137 "3gpp2",
7138 "m4a",
7139 "m4r",
7140 "m4v",
7141 "mov",
7142 "mp4",
7143 "qt",
7144 NULL
7145 };
7146
7147 extern "C" {
7148 // This is the only symbol that needs to be exported
7149 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()7150 ExtractorDef GETEXTRACTORDEF() {
7151 return {
7152 EXTRACTORDEF_VERSION,
7153 UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
7154 2, // version
7155 "MP4 Extractor",
7156 { .v3 = {Sniff, extensions} },
7157 };
7158 }
7159
7160 } // extern "C"
7161
7162 } // namespace android
7163