1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "ESQueue"
19 #include <media/stagefright/foundation/ADebug.h>
20
21 #include "ESQueue.h"
22
23 #include <media/stagefright/foundation/hexdump.h>
24 #include <media/stagefright/foundation/ABitReader.h>
25 #include <media/stagefright/foundation/ABuffer.h>
26 #include <media/stagefright/foundation/AMessage.h>
27 #include <media/stagefright/foundation/ByteUtils.h>
28 #include <media/stagefright/foundation/avc_utils.h>
29 #include <media/stagefright/MediaErrors.h>
30 #include <media/stagefright/MediaDefs.h>
31 #include <media/stagefright/MetaData.h>
32 #include <media/stagefright/MetaDataUtils.h>
33 #include <media/cas/DescramblerAPI.h>
34 #include <media/hardware/CryptoAPI.h>
35
36 #include <inttypes.h>
37 #include <netinet/in.h>
38
39 namespace android {
40
ElementaryStreamQueue(Mode mode,uint32_t flags)41 ElementaryStreamQueue::ElementaryStreamQueue(Mode mode, uint32_t flags)
42 : mMode(mode),
43 mFlags(flags),
44 mEOSReached(false),
45 mCASystemId(0),
46 mAUIndex(0) {
47
48 ALOGV("ElementaryStreamQueue(%p) mode %x flags %x isScrambled %d isSampleEncrypted %d",
49 this, mode, flags, isScrambled(), isSampleEncrypted());
50
51 // Create the decryptor anyway since we don't know the use-case unless key is provided
52 // Won't decrypt if key info not available (e.g., scanner/extractor just parsing ts files)
53 mSampleDecryptor = isSampleEncrypted() ? new HlsSampleDecryptor : NULL;
54 }
55
getFormat()56 sp<MetaData> ElementaryStreamQueue::getFormat() {
57 return mFormat;
58 }
59
clear(bool clearFormat)60 void ElementaryStreamQueue::clear(bool clearFormat) {
61 if (mBuffer != NULL) {
62 mBuffer->setRange(0, 0);
63 }
64
65 mRangeInfos.clear();
66
67 if (mScrambledBuffer != NULL) {
68 mScrambledBuffer->setRange(0, 0);
69 }
70 mScrambledRangeInfos.clear();
71
72 if (clearFormat) {
73 mFormat.clear();
74 }
75
76 mEOSReached = false;
77 }
78
isScrambled() const79 bool ElementaryStreamQueue::isScrambled() const {
80 return (mFlags & kFlag_ScrambledData) != 0;
81 }
82
setCasInfo(int32_t systemId,const std::vector<uint8_t> & sessionId)83 void ElementaryStreamQueue::setCasInfo(
84 int32_t systemId, const std::vector<uint8_t> &sessionId) {
85 mCASystemId = systemId;
86 mCasSessionId = sessionId;
87 }
88
readVariableBits(ABitReader & bits,int32_t nbits)89 static int32_t readVariableBits(ABitReader &bits, int32_t nbits) {
90 int32_t value = 0;
91 int32_t more_bits = 1;
92
93 while (more_bits) {
94 value += bits.getBits(nbits);
95 more_bits = bits.getBits(1);
96 if (!more_bits)
97 break;
98 value++;
99 value <<= nbits;
100 }
101 return value;
102 }
103
104 // Parse AC3 header assuming the current ptr is start position of syncframe,
105 // update metadata only applicable, and return the payload size
parseAC3SyncFrame(const uint8_t * ptr,size_t size,sp<MetaData> * metaData)106 static unsigned parseAC3SyncFrame(
107 const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
108 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
109 static const unsigned samplingRateTable[] = {48000, 44100, 32000};
110
111 static const unsigned frameSizeTable[19][3] = {
112 { 64, 69, 96 },
113 { 80, 87, 120 },
114 { 96, 104, 144 },
115 { 112, 121, 168 },
116 { 128, 139, 192 },
117 { 160, 174, 240 },
118 { 192, 208, 288 },
119 { 224, 243, 336 },
120 { 256, 278, 384 },
121 { 320, 348, 480 },
122 { 384, 417, 576 },
123 { 448, 487, 672 },
124 { 512, 557, 768 },
125 { 640, 696, 960 },
126 { 768, 835, 1152 },
127 { 896, 975, 1344 },
128 { 1024, 1114, 1536 },
129 { 1152, 1253, 1728 },
130 { 1280, 1393, 1920 },
131 };
132
133 ABitReader bits(ptr, size);
134 if (bits.numBitsLeft() < 16) {
135 return 0;
136 }
137 if (bits.getBits(16) != 0x0B77) {
138 return 0;
139 }
140
141 if (bits.numBitsLeft() < 16 + 2 + 6 + 5 + 3 + 3) {
142 ALOGV("Not enough bits left for further parsing");
143 return 0;
144 }
145 bits.skipBits(16); // crc1
146
147 unsigned fscod = bits.getBits(2);
148 if (fscod == 3) {
149 ALOGW("Incorrect fscod in AC3 header");
150 return 0;
151 }
152
153 unsigned frmsizecod = bits.getBits(6);
154 if (frmsizecod > 37) {
155 ALOGW("Incorrect frmsizecod in AC3 header");
156 return 0;
157 }
158
159 unsigned bsid = bits.getBits(5);
160 if (bsid > 8) {
161 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
162 return 0;
163 }
164
165 unsigned bsmod __unused = bits.getBits(3);
166 unsigned acmod = bits.getBits(3);
167 unsigned cmixlev __unused = 0;
168 unsigned surmixlev __unused = 0;
169 unsigned dsurmod __unused = 0;
170
171 if ((acmod & 1) > 0 && acmod != 1) {
172 if (bits.numBitsLeft() < 2) {
173 return 0;
174 }
175 cmixlev = bits.getBits(2);
176 }
177 if ((acmod & 4) > 0) {
178 if (bits.numBitsLeft() < 2) {
179 return 0;
180 }
181 surmixlev = bits.getBits(2);
182 }
183 if (acmod == 2) {
184 if (bits.numBitsLeft() < 2) {
185 return 0;
186 }
187 dsurmod = bits.getBits(2);
188 }
189
190 if (bits.numBitsLeft() < 1) {
191 return 0;
192 }
193 unsigned lfeon = bits.getBits(1);
194
195 unsigned samplingRate = samplingRateTable[fscod];
196 unsigned payloadSize = frameSizeTable[frmsizecod >> 1][fscod];
197 if (fscod == 1) {
198 payloadSize += frmsizecod & 1;
199 }
200 payloadSize <<= 1; // convert from 16-bit words to bytes
201
202 unsigned channelCount = channelCountTable[acmod] + lfeon;
203
204 if (metaData != NULL) {
205 (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC3);
206 (*metaData)->setInt32(kKeyChannelCount, channelCount);
207 (*metaData)->setInt32(kKeySampleRate, samplingRate);
208 }
209
210 return payloadSize;
211 }
212
213 // Parse EAC3 header assuming the current ptr is start position of syncframe,
214 // update metadata only applicable, and return the payload size
215 // ATSC A/52:2012 E2.3.1
parseEAC3SyncFrame(const uint8_t * ptr,size_t size,sp<MetaData> * metaData)216 static unsigned parseEAC3SyncFrame(
217 const uint8_t *ptr, size_t size, sp<MetaData> *metaData) {
218 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
219 static const unsigned samplingRateTable[] = {48000, 44100, 32000};
220 static const unsigned samplingRateTable2[] = {24000, 22050, 16000};
221
222 ABitReader bits(ptr, size);
223 if (bits.numBitsLeft() < 16) {
224 ALOGE("Not enough bits left for further parsing");
225 return 0;
226 }
227 if (bits.getBits(16) != 0x0B77) {
228 ALOGE("No valid sync word in EAC3 header");
229 return 0;
230 }
231
232 // we parse up to bsid so there needs to be at least that many bits
233 if (bits.numBitsLeft() < 2 + 3 + 11 + 2 + 2 + 3 + 1 + 5) {
234 ALOGE("Not enough bits left for further parsing");
235 return 0;
236 }
237
238 unsigned strmtyp = bits.getBits(2);
239 if (strmtyp == 3) {
240 ALOGE("Incorrect strmtyp in EAC3 header");
241 return 0;
242 }
243
244 unsigned substreamid = bits.getBits(3);
245 // only the first independent stream is supported
246 if ((strmtyp == 0 || strmtyp == 2) && substreamid != 0)
247 return 0;
248
249 unsigned frmsiz = bits.getBits(11);
250 unsigned fscod = bits.getBits(2);
251
252 unsigned samplingRate = 0;
253 if (fscod == 0x3) {
254 unsigned fscod2 = bits.getBits(2);
255 if (fscod2 == 3) {
256 ALOGW("Incorrect fscod2 in EAC3 header");
257 return 0;
258 }
259 samplingRate = samplingRateTable2[fscod2];
260 } else {
261 samplingRate = samplingRateTable[fscod];
262 unsigned numblkscod __unused = bits.getBits(2);
263 }
264
265 unsigned acmod = bits.getBits(3);
266 unsigned lfeon = bits.getBits(1);
267 unsigned bsid = bits.getBits(5);
268 if (bsid < 11 || bsid > 16) {
269 ALOGW("Incorrect bsid in EAC3 header. Could be AC-3 or some unknown EAC3 format");
270 return 0;
271 }
272
273 // we currently only support the first independant stream
274 if (metaData != NULL && (strmtyp == 0 || strmtyp == 2)) {
275 unsigned channelCount = channelCountTable[acmod] + lfeon;
276 ALOGV("EAC3 channelCount = %d", channelCount);
277 ALOGV("EAC3 samplingRate = %d", samplingRate);
278 (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_EAC3);
279 (*metaData)->setInt32(kKeyChannelCount, channelCount);
280 (*metaData)->setInt32(kKeySampleRate, samplingRate);
281 (*metaData)->setInt32(kKeyIsSyncFrame, 1);
282 }
283
284 unsigned payloadSize = frmsiz + 1;
285 payloadSize <<= 1; // convert from 16-bit words to bytes
286
287 return payloadSize;
288 }
289
290 // Parse AC4 header assuming the current ptr is start position of syncframe
291 // and update frameSize and metadata.
parseAC4SyncFrame(const uint8_t * ptr,size_t size,unsigned & frameSize,sp<MetaData> * metaData)292 static status_t parseAC4SyncFrame(
293 const uint8_t *ptr, size_t size, unsigned &frameSize, sp<MetaData> *metaData) {
294 // ETSI TS 103 190-2 V1.1.1 (2015-09), Annex C
295 // The sync_word can be either 0xAC40 or 0xAC41.
296 static const int kSyncWordAC40 = 0xAC40;
297 static const int kSyncWordAC41 = 0xAC41;
298
299 size_t headerSize = 0;
300 ABitReader bits(ptr, size);
301 int32_t syncWord = bits.getBits(16);
302 if ((syncWord != kSyncWordAC40) && (syncWord != kSyncWordAC41)) {
303 ALOGE("Invalid syncword in AC4 header");
304 return ERROR_MALFORMED;
305 }
306 headerSize += 2;
307
308 frameSize = bits.getBits(16);
309 headerSize += 2;
310 if (frameSize == 0xFFFF) {
311 frameSize = bits.getBits(24);
312 headerSize += 3;
313 }
314
315 if (frameSize == 0) {
316 ALOGE("Invalid frame size in AC4 header");
317 return ERROR_MALFORMED;
318 }
319 frameSize += headerSize;
320 // If the sync_word is 0xAC41, a crc_word is also transmitted.
321 if (syncWord == kSyncWordAC41) {
322 frameSize += 2; // crc_word
323 }
324 ALOGV("AC4 frameSize = %u", frameSize);
325
326 // ETSI TS 103 190-2 V1.1.1 6.2.1.1
327 uint32_t bitstreamVersion = bits.getBits(2);
328 if (bitstreamVersion == 3) {
329 bitstreamVersion += readVariableBits(bits, 2);
330 }
331
332 bits.skipBits(10); // Sequence Counter
333
334 uint32_t bWaitFrames = bits.getBits(1);
335 if (bWaitFrames) {
336 uint32_t waitFrames = bits.getBits(3);
337 if (waitFrames > 0) {
338 bits.skipBits(2); // br_code;
339 }
340 }
341
342 // ETSI TS 103 190 V1.1.1 Table 82
343 bool fsIndex = bits.getBits(1);
344 uint32_t samplingRate = fsIndex ? 48000 : 44100;
345
346 if (metaData != NULL) {
347 ALOGV("dequeueAccessUnitAC4 Setting mFormat");
348 (*metaData)->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_AC4);
349 (*metaData)->setInt32(kKeyIsSyncFrame, 1);
350 // [FIXME] AC4 channel count is defined per presentation. Provide a default channel count
351 // as stereo for the entire stream.
352 (*metaData)->setInt32(kKeyChannelCount, 2);
353 (*metaData)->setInt32(kKeySampleRate, samplingRate);
354 }
355 return OK;
356 }
357
IsSeeminglyValidAC4Header(const uint8_t * ptr,size_t size,unsigned & frameSize)358 static status_t IsSeeminglyValidAC4Header(const uint8_t *ptr, size_t size, unsigned &frameSize) {
359 return parseAC4SyncFrame(ptr, size, frameSize, NULL);
360 }
361
IsSeeminglyValidADTSHeader(const uint8_t * ptr,size_t size,size_t * frameLength)362 static bool IsSeeminglyValidADTSHeader(
363 const uint8_t *ptr, size_t size, size_t *frameLength) {
364 if (size < 7) {
365 // Not enough data to verify header.
366 return false;
367 }
368
369 if (ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
370 return false;
371 }
372
373 unsigned layer = (ptr[1] >> 1) & 3;
374
375 if (layer != 0) {
376 return false;
377 }
378
379 unsigned ID = (ptr[1] >> 3) & 1;
380 unsigned profile_ObjectType = ptr[2] >> 6;
381
382 if (ID == 1 && profile_ObjectType == 3) {
383 // MPEG-2 profile 3 is reserved.
384 return false;
385 }
386
387 size_t frameLengthInHeader =
388 ((ptr[3] & 3) << 11) + (ptr[4] << 3) + ((ptr[5] >> 5) & 7);
389 if (frameLengthInHeader > size) {
390 return false;
391 }
392
393 *frameLength = frameLengthInHeader;
394 return true;
395 }
396
IsSeeminglyValidMPEGAudioHeader(const uint8_t * ptr,size_t size)397 static bool IsSeeminglyValidMPEGAudioHeader(const uint8_t *ptr, size_t size) {
398 if (size < 3) {
399 // Not enough data to verify header.
400 return false;
401 }
402
403 if (ptr[0] != 0xff || (ptr[1] >> 5) != 0x07) {
404 return false;
405 }
406
407 unsigned ID = (ptr[1] >> 3) & 3;
408
409 if (ID == 1) {
410 return false; // reserved
411 }
412
413 unsigned layer = (ptr[1] >> 1) & 3;
414
415 if (layer == 0) {
416 return false; // reserved
417 }
418
419 unsigned bitrateIndex = (ptr[2] >> 4);
420
421 if (bitrateIndex == 0x0f) {
422 return false; // reserved
423 }
424
425 unsigned samplingRateIndex = (ptr[2] >> 2) & 3;
426
427 if (samplingRateIndex == 3) {
428 return false; // reserved
429 }
430
431 return true;
432 }
433
appendData(const void * data,size_t size,int64_t timeUs,int32_t payloadOffset,uint32_t pesScramblingControl)434 status_t ElementaryStreamQueue::appendData(
435 const void *data, size_t size, int64_t timeUs,
436 int32_t payloadOffset, uint32_t pesScramblingControl) {
437
438 if (mEOSReached) {
439 ALOGE("appending data after EOS");
440 return ERROR_MALFORMED;
441 }
442
443 if (!isScrambled() && (mBuffer == NULL || mBuffer->size() == 0)) {
444 switch (mMode) {
445 case H264:
446 case MPEG_VIDEO:
447 {
448 #if 0
449 if (size < 4 || memcmp("\x00\x00\x00\x01", data, 4)) {
450 return ERROR_MALFORMED;
451 }
452 #else
453 uint8_t *ptr = (uint8_t *)data;
454
455 ssize_t startOffset = -1;
456 for (size_t i = 0; i + 2 < size; ++i) {
457 if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
458 startOffset = i;
459 break;
460 }
461 }
462
463 if (startOffset < 0) {
464 return ERROR_MALFORMED;
465 }
466
467 if (mFormat == NULL && startOffset > 0) {
468 ALOGI("found something resembling an H.264/MPEG syncword "
469 "at offset %zd",
470 startOffset);
471 }
472
473 data = &ptr[startOffset];
474 size -= startOffset;
475 #endif
476 break;
477 }
478
479 case MPEG4_VIDEO:
480 {
481 #if 0
482 if (size < 3 || memcmp("\x00\x00\x01", data, 3)) {
483 return ERROR_MALFORMED;
484 }
485 #else
486 uint8_t *ptr = (uint8_t *)data;
487
488 ssize_t startOffset = -1;
489 for (size_t i = 0; i + 2 < size; ++i) {
490 if (!memcmp("\x00\x00\x01", &ptr[i], 3)) {
491 startOffset = i;
492 break;
493 }
494 }
495
496 if (startOffset < 0) {
497 return ERROR_MALFORMED;
498 }
499
500 if (startOffset > 0) {
501 ALOGI("found something resembling an H.264/MPEG syncword "
502 "at offset %zd",
503 startOffset);
504 }
505
506 data = &ptr[startOffset];
507 size -= startOffset;
508 #endif
509 break;
510 }
511
512 case AAC:
513 {
514 uint8_t *ptr = (uint8_t *)data;
515
516 #if 0
517 if (size < 2 || ptr[0] != 0xff || (ptr[1] >> 4) != 0x0f) {
518 return ERROR_MALFORMED;
519 }
520 #else
521 ssize_t startOffset = -1;
522 size_t frameLength;
523 for (size_t i = 0; i < size; ++i) {
524 if (IsSeeminglyValidADTSHeader(
525 &ptr[i], size - i, &frameLength)) {
526 startOffset = i;
527 break;
528 }
529 }
530
531 if (startOffset < 0) {
532 return ERROR_MALFORMED;
533 }
534
535 if (startOffset > 0) {
536 ALOGI("found something resembling an AAC syncword at "
537 "offset %zd",
538 startOffset);
539 }
540
541 if (frameLength != size - startOffset) {
542 ALOGV("First ADTS AAC frame length is %zd bytes, "
543 "while the buffer size is %zd bytes.",
544 frameLength, size - startOffset);
545 }
546
547 data = &ptr[startOffset];
548 size -= startOffset;
549 #endif
550 break;
551 }
552
553 case AC3:
554 case EAC3:
555 {
556 uint8_t *ptr = (uint8_t *)data;
557
558 ssize_t startOffset = -1;
559 for (size_t i = 0; i < size; ++i) {
560 unsigned payloadSize = 0;
561 if (mMode == AC3) {
562 payloadSize = parseAC3SyncFrame(&ptr[i], size - i, NULL);
563 } else if (mMode == EAC3) {
564 payloadSize = parseEAC3SyncFrame(&ptr[i], size - i, NULL);
565 }
566 if (payloadSize > 0) {
567 startOffset = i;
568 break;
569 }
570 }
571
572 if (startOffset < 0) {
573 return ERROR_MALFORMED;
574 }
575
576 if (startOffset > 0) {
577 ALOGI("found something resembling an (E)AC3 syncword at "
578 "offset %zd",
579 startOffset);
580 }
581
582 data = &ptr[startOffset];
583 size -= startOffset;
584 break;
585 }
586
587 case AC4:
588 {
589 uint8_t *ptr = (uint8_t *)data;
590 unsigned frameSize = 0;
591 ssize_t startOffset = -1;
592
593 // A valid AC4 stream should have minimum of 7 bytes in its buffer.
594 // (Sync header 4 bytes + AC4 toc 3 bytes)
595 if (size < 7) {
596 return ERROR_MALFORMED;
597 }
598 for (size_t i = 0; i < size; ++i) {
599 if (IsSeeminglyValidAC4Header(&ptr[i], size - i, frameSize) == OK) {
600 startOffset = i;
601 break;
602 }
603 }
604
605 if (startOffset < 0) {
606 return ERROR_MALFORMED;
607 }
608
609 if (startOffset > 0) {
610 ALOGI("found something resembling an AC4 syncword at "
611 "offset %zd",
612 startOffset);
613 }
614 if (frameSize != size - startOffset) {
615 ALOGV("AC4 frame size is %u bytes, while the buffer size is %zd bytes.",
616 frameSize, size - startOffset);
617 }
618
619 data = &ptr[startOffset];
620 size -= startOffset;
621 break;
622 }
623
624 case MPEG_AUDIO:
625 {
626 uint8_t *ptr = (uint8_t *)data;
627
628 ssize_t startOffset = -1;
629 for (size_t i = 0; i < size; ++i) {
630 if (IsSeeminglyValidMPEGAudioHeader(&ptr[i], size - i)) {
631 startOffset = i;
632 break;
633 }
634 }
635
636 if (startOffset < 0) {
637 return ERROR_MALFORMED;
638 }
639
640 if (startOffset > 0) {
641 ALOGI("found something resembling an MPEG audio "
642 "syncword at offset %zd",
643 startOffset);
644 }
645
646 data = &ptr[startOffset];
647 size -= startOffset;
648 break;
649 }
650
651 case PCM_AUDIO:
652 case METADATA:
653 {
654 break;
655 }
656
657 default:
658 ALOGE("Unknown mode: %d", mMode);
659 return ERROR_MALFORMED;
660 }
661 }
662
663 size_t neededSize = (mBuffer == NULL ? 0 : mBuffer->size()) + size;
664 if (mBuffer == NULL || neededSize > mBuffer->capacity()) {
665 neededSize = (neededSize + 65535) & ~65535;
666
667 ALOGV("resizing buffer to size %zu", neededSize);
668
669 sp<ABuffer> buffer = new ABuffer(neededSize);
670 if (mBuffer != NULL) {
671 memcpy(buffer->data(), mBuffer->data(), mBuffer->size());
672 buffer->setRange(0, mBuffer->size());
673 } else {
674 buffer->setRange(0, 0);
675 }
676
677 mBuffer = buffer;
678 }
679
680 memcpy(mBuffer->data() + mBuffer->size(), data, size);
681 mBuffer->setRange(0, mBuffer->size() + size);
682
683 RangeInfo info;
684 info.mLength = size;
685 info.mTimestampUs = timeUs;
686 info.mPesOffset = payloadOffset;
687 info.mPesScramblingControl = pesScramblingControl;
688 mRangeInfos.push_back(info);
689
690 #if 0
691 if (mMode == AAC) {
692 ALOGI("size = %zu, timeUs = %.2f secs", size, timeUs / 1E6);
693 hexdump(data, size);
694 }
695 #endif
696
697 return OK;
698 }
699
appendScrambledData(const void * data,size_t size,size_t leadingClearBytes,int32_t keyId,bool isSync,sp<ABuffer> clearSizes,sp<ABuffer> encSizes)700 void ElementaryStreamQueue::appendScrambledData(
701 const void *data, size_t size,
702 size_t leadingClearBytes,
703 int32_t keyId, bool isSync,
704 sp<ABuffer> clearSizes, sp<ABuffer> encSizes) {
705 if (!isScrambled()) {
706 return;
707 }
708
709 size_t neededSize = (mScrambledBuffer == NULL ? 0 : mScrambledBuffer->size()) + size;
710 if (mScrambledBuffer == NULL || neededSize > mScrambledBuffer->capacity()) {
711 neededSize = (neededSize + 65535) & ~65535;
712
713 ALOGI("resizing scrambled buffer to size %zu", neededSize);
714
715 sp<ABuffer> buffer = new ABuffer(neededSize);
716 if (mScrambledBuffer != NULL) {
717 memcpy(buffer->data(), mScrambledBuffer->data(), mScrambledBuffer->size());
718 buffer->setRange(0, mScrambledBuffer->size());
719 } else {
720 buffer->setRange(0, 0);
721 }
722
723 mScrambledBuffer = buffer;
724 }
725 memcpy(mScrambledBuffer->data() + mScrambledBuffer->size(), data, size);
726 mScrambledBuffer->setRange(0, mScrambledBuffer->size() + size);
727
728 ScrambledRangeInfo scrambledInfo;
729 scrambledInfo.mLength = size;
730 scrambledInfo.mLeadingClearBytes = leadingClearBytes;
731 scrambledInfo.mKeyId = keyId;
732 scrambledInfo.mIsSync = isSync;
733 scrambledInfo.mClearSizes = clearSizes;
734 scrambledInfo.mEncSizes = encSizes;
735
736 ALOGV("[stream %d] appending scrambled range: size=%zu", mMode, size);
737
738 mScrambledRangeInfos.push_back(scrambledInfo);
739 }
740
dequeueScrambledAccessUnit()741 sp<ABuffer> ElementaryStreamQueue::dequeueScrambledAccessUnit() {
742 size_t nextScan = mBuffer->size();
743 int32_t pesOffset = 0, pesScramblingControl = 0;
744 int64_t timeUs = fetchTimestamp(nextScan, &pesOffset, &pesScramblingControl);
745 if (timeUs < 0ll) {
746 ALOGE("Negative timeUs");
747 return NULL;
748 }
749
750 // return scrambled unit
751 int32_t keyId = pesScramblingControl, isSync = 0, scrambledLength = 0;
752 sp<ABuffer> clearSizes, encSizes;
753 size_t leadingClearBytes;
754 while (mScrambledRangeInfos.size() > mRangeInfos.size()) {
755 auto it = mScrambledRangeInfos.begin();
756 ALOGV("[stream %d] fetching scrambled range: size=%zu", mMode, it->mLength);
757
758 if (scrambledLength > 0) {
759 // This shouldn't happen since we always dequeue the entire PES.
760 ALOGW("Discarding srambled length %d", scrambledLength);
761 }
762 scrambledLength = it->mLength;
763
764 // TODO: handle key id change, use first non-zero keyId for now
765 if (keyId == 0) {
766 keyId = it->mKeyId;
767 }
768 clearSizes = it->mClearSizes;
769 encSizes = it->mEncSizes;
770 isSync = it->mIsSync;
771 leadingClearBytes = it->mLeadingClearBytes;
772 mScrambledRangeInfos.erase(it);
773 }
774 if (scrambledLength == 0) {
775 ALOGE("[stream %d] empty scrambled unit!", mMode);
776 return NULL;
777 }
778
779 // Retrieve the leading clear bytes info, and use it to set the clear
780 // range on mBuffer. Note that the leading clear bytes includes the
781 // PES header portion, while mBuffer doesn't.
782 if ((int32_t)leadingClearBytes > pesOffset) {
783 mBuffer->setRange(0, leadingClearBytes - pesOffset);
784 } else {
785 mBuffer->setRange(0, 0);
786 }
787
788 // Try to parse formats, and if unavailable set up a dummy format.
789 // Only support the following modes for scrambled content for now.
790 // (will be expanded later).
791 if (mFormat == NULL) {
792 mFormat = new MetaData;
793 switch (mMode) {
794 case H264:
795 {
796 if (!MakeAVCCodecSpecificData(
797 *mFormat, mBuffer->data(), mBuffer->size())) {
798 ALOGI("Creating dummy AVC format for scrambled content");
799
800 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
801 mFormat->setInt32(kKeyWidth, 1280);
802 mFormat->setInt32(kKeyHeight, 720);
803 }
804 break;
805 }
806 case AAC:
807 {
808 if (!MakeAACCodecSpecificData(
809 *mFormat, mBuffer->data(), mBuffer->size())) {
810 ALOGI("Creating dummy AAC format for scrambled content");
811
812 MakeAACCodecSpecificData(*mFormat,
813 1 /*profile*/, 7 /*sampling_freq_index*/, 1 /*channel_config*/);
814 mFormat->setInt32(kKeyIsADTS, true);
815 }
816
817 break;
818 }
819 case MPEG_VIDEO:
820 {
821 ALOGI("Creating dummy MPEG format for scrambled content");
822
823 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
824 mFormat->setInt32(kKeyWidth, 1280);
825 mFormat->setInt32(kKeyHeight, 720);
826 break;
827 }
828 default:
829 {
830 ALOGE("Unknown mode for scrambled content");
831 return NULL;
832 }
833 }
834
835 // for MediaExtractor.CasInfo
836 mFormat->setInt32(kKeyCASystemID, mCASystemId);
837 mFormat->setData(kKeyCASessionID,
838 0, mCasSessionId.data(), mCasSessionId.size());
839 }
840
841 mBuffer->setRange(0, 0);
842
843 // copy into scrambled access unit
844 sp<ABuffer> scrambledAccessUnit = ABuffer::CreateAsCopy(
845 mScrambledBuffer->data(), scrambledLength);
846
847 scrambledAccessUnit->meta()->setInt64("timeUs", timeUs);
848 if (isSync) {
849 scrambledAccessUnit->meta()->setInt32("isSync", 1);
850 }
851
852 // fill in CryptoInfo fields for AnotherPacketSource::read()
853 // MediaCas doesn't use cryptoMode, but set to non-zero value here.
854 scrambledAccessUnit->meta()->setInt32(
855 "cryptoMode", CryptoPlugin::kMode_AES_CTR);
856 scrambledAccessUnit->meta()->setInt32("cryptoKey", keyId);
857 scrambledAccessUnit->meta()->setBuffer("clearBytes", clearSizes);
858 scrambledAccessUnit->meta()->setBuffer("encBytes", encSizes);
859 scrambledAccessUnit->meta()->setInt32("pesOffset", pesOffset);
860
861 memmove(mScrambledBuffer->data(),
862 mScrambledBuffer->data() + scrambledLength,
863 mScrambledBuffer->size() - scrambledLength);
864
865 mScrambledBuffer->setRange(0, mScrambledBuffer->size() - scrambledLength);
866
867 ALOGV("[stream %d] dequeued scrambled AU: timeUs=%lld, size=%zu",
868 mMode, (long long)timeUs, scrambledAccessUnit->size());
869
870 return scrambledAccessUnit;
871 }
872
dequeueAccessUnit()873 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnit() {
874 if (isScrambled()) {
875 return dequeueScrambledAccessUnit();
876 }
877
878 if ((mFlags & kFlag_AlignedData) && mMode == H264) {
879 if (mRangeInfos.empty()) {
880 return NULL;
881 }
882
883 RangeInfo info = *mRangeInfos.begin();
884 mRangeInfos.erase(mRangeInfos.begin());
885
886 sp<ABuffer> accessUnit = new ABuffer(info.mLength);
887 memcpy(accessUnit->data(), mBuffer->data(), info.mLength);
888 accessUnit->meta()->setInt64("timeUs", info.mTimestampUs);
889
890 memmove(mBuffer->data(),
891 mBuffer->data() + info.mLength,
892 mBuffer->size() - info.mLength);
893
894 mBuffer->setRange(0, mBuffer->size() - info.mLength);
895
896 if (mFormat == NULL) {
897 mFormat = new MetaData;
898 if (!MakeAVCCodecSpecificData(*mFormat, accessUnit->data(), accessUnit->size())) {
899 mFormat.clear();
900 }
901 }
902
903 return accessUnit;
904 }
905
906 switch (mMode) {
907 case H264:
908 return dequeueAccessUnitH264();
909 case AAC:
910 return dequeueAccessUnitAAC();
911 case AC3:
912 case EAC3:
913 return dequeueAccessUnitEAC3();
914 case AC4:
915 return dequeueAccessUnitAC4();
916 case MPEG_VIDEO:
917 return dequeueAccessUnitMPEGVideo();
918 case MPEG4_VIDEO:
919 return dequeueAccessUnitMPEG4Video();
920 case PCM_AUDIO:
921 return dequeueAccessUnitPCMAudio();
922 case METADATA:
923 return dequeueAccessUnitMetadata();
924 default:
925 if (mMode != MPEG_AUDIO) {
926 ALOGE("Unknown mode");
927 return NULL;
928 }
929 return dequeueAccessUnitMPEGAudio();
930 }
931 }
932
dequeueAccessUnitEAC3()933 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitEAC3() {
934 unsigned syncStartPos = 0; // in bytes
935 unsigned payloadSize = 0;
936 sp<MetaData> format = new MetaData;
937
938 ALOGV("dequeueAccessUnitEAC3[%d]: mBuffer %p(%zu)", mAUIndex,
939 mBuffer->data(), mBuffer->size());
940
941 while (true) {
942 if (syncStartPos + 2 >= mBuffer->size()) {
943 return NULL;
944 }
945
946 uint8_t *ptr = mBuffer->data() + syncStartPos;
947 size_t size = mBuffer->size() - syncStartPos;
948 if (mMode == AC3) {
949 payloadSize = parseAC3SyncFrame(ptr, size, &format);
950 } else if (mMode == EAC3) {
951 payloadSize = parseEAC3SyncFrame(ptr, size, &format);
952 }
953 if (payloadSize > 0) {
954 break;
955 }
956
957 ALOGV("dequeueAccessUnitEAC3[%d]: syncStartPos %u payloadSize %u",
958 mAUIndex, syncStartPos, payloadSize);
959
960 ++syncStartPos;
961 }
962
963 if (mBuffer->size() < syncStartPos + payloadSize) {
964 ALOGV("Not enough buffer size for E/AC3");
965 return NULL;
966 }
967
968 if (mFormat == NULL) {
969 mFormat = format;
970 }
971
972 int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
973 if (timeUs < 0ll) {
974 ALOGE("negative timeUs");
975 return NULL;
976 }
977
978 // Not decrypting if key info not available (e.g., scanner/extractor parsing ts files)
979 if (mSampleDecryptor != NULL) {
980 if (mMode == AC3) {
981 mSampleDecryptor->processAC3(mBuffer->data() + syncStartPos, payloadSize);
982 } else if (mMode == EAC3) {
983 ALOGE("EAC3 AU is encrypted and decryption is not supported");
984 return NULL;
985 }
986 }
987 mAUIndex++;
988
989 sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
990 memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
991
992 accessUnit->meta()->setInt64("timeUs", timeUs);
993 accessUnit->meta()->setInt32("isSync", 1);
994
995 memmove(
996 mBuffer->data(),
997 mBuffer->data() + syncStartPos + payloadSize,
998 mBuffer->size() - syncStartPos - payloadSize);
999
1000 mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
1001
1002 return accessUnit;
1003 }
1004
dequeueAccessUnitAC4()1005 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAC4() {
1006 unsigned syncStartPos = 0;
1007 unsigned payloadSize = 0;
1008 sp<MetaData> format = new MetaData;
1009 ALOGV("dequeueAccessUnit_AC4[%d]: mBuffer %p(%zu)", mAUIndex, mBuffer->data(), mBuffer->size());
1010
1011 // A valid AC4 stream should have minimum of 7 bytes in its buffer.
1012 // (Sync header 4 bytes + AC4 toc 3 bytes)
1013 if (mBuffer->size() < 7) {
1014 return NULL;
1015 }
1016
1017 while (true) {
1018 if (syncStartPos + 2 >= mBuffer->size()) {
1019 return NULL;
1020 }
1021
1022 status_t status = parseAC4SyncFrame(
1023 mBuffer->data() + syncStartPos,
1024 mBuffer->size() - syncStartPos,
1025 payloadSize,
1026 &format);
1027 if (status == OK) {
1028 break;
1029 }
1030
1031 ALOGV("dequeueAccessUnit_AC4[%d]: syncStartPos %u payloadSize %u",
1032 mAUIndex, syncStartPos, payloadSize);
1033
1034 ++syncStartPos;
1035 }
1036
1037 if (mBuffer->size() < syncStartPos + payloadSize) {
1038 ALOGV("Not enough buffer size for AC4");
1039 return NULL;
1040 }
1041
1042 if (mFormat == NULL) {
1043 mFormat = format;
1044 }
1045
1046 int64_t timeUs = fetchTimestamp(syncStartPos + payloadSize);
1047 if (timeUs < 0ll) {
1048 ALOGE("negative timeUs");
1049 return NULL;
1050 }
1051 mAUIndex++;
1052
1053 sp<ABuffer> accessUnit = new ABuffer(syncStartPos + payloadSize);
1054 memcpy(accessUnit->data(), mBuffer->data(), syncStartPos + payloadSize);
1055
1056 accessUnit->meta()->setInt64("timeUs", timeUs);
1057 accessUnit->meta()->setInt32("isSync", 1);
1058
1059 memmove(
1060 mBuffer->data(),
1061 mBuffer->data() + syncStartPos + payloadSize,
1062 mBuffer->size() - syncStartPos - payloadSize);
1063
1064 mBuffer->setRange(0, mBuffer->size() - syncStartPos - payloadSize);
1065 return accessUnit;
1066 }
1067
dequeueAccessUnitPCMAudio()1068 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitPCMAudio() {
1069 if (mBuffer->size() < 4) {
1070 return NULL;
1071 }
1072
1073 ABitReader bits(mBuffer->data(), 4);
1074 if (bits.getBits(8) != 0xa0) {
1075 ALOGE("Unexpected bit values");
1076 return NULL;
1077 }
1078 unsigned numAUs = bits.getBits(8);
1079 bits.skipBits(8);
1080 unsigned quantization_word_length __unused = bits.getBits(2);
1081 unsigned audio_sampling_frequency = bits.getBits(3);
1082 unsigned num_channels = bits.getBits(3);
1083
1084 if (audio_sampling_frequency != 2) {
1085 ALOGE("Wrong sampling freq");
1086 return NULL;
1087 }
1088 if (num_channels != 1u) {
1089 ALOGE("Wrong channel #");
1090 return NULL;
1091 }
1092
1093 if (mFormat == NULL) {
1094 mFormat = new MetaData;
1095 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
1096 mFormat->setInt32(kKeyChannelCount, 2);
1097 mFormat->setInt32(kKeySampleRate, 48000);
1098 mFormat->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
1099 }
1100
1101 static const size_t kFramesPerAU = 80;
1102 size_t frameSize = 2 /* numChannels */ * sizeof(int16_t);
1103
1104 size_t payloadSize = numAUs * frameSize * kFramesPerAU;
1105
1106 if (mBuffer->size() < 4 + payloadSize) {
1107 return NULL;
1108 }
1109
1110 sp<ABuffer> accessUnit = new ABuffer(payloadSize);
1111 memcpy(accessUnit->data(), mBuffer->data() + 4, payloadSize);
1112
1113 int64_t timeUs = fetchTimestamp(payloadSize + 4);
1114 if (timeUs < 0LL) {
1115 ALOGE("Negative timeUs");
1116 return NULL;
1117 }
1118 accessUnit->meta()->setInt64("timeUs", timeUs);
1119 accessUnit->meta()->setInt32("isSync", 1);
1120
1121 int16_t *ptr = (int16_t *)accessUnit->data();
1122 for (size_t i = 0; i < payloadSize / sizeof(int16_t); ++i) {
1123 ptr[i] = ntohs(ptr[i]);
1124 }
1125
1126 memmove(
1127 mBuffer->data(),
1128 mBuffer->data() + 4 + payloadSize,
1129 mBuffer->size() - 4 - payloadSize);
1130
1131 mBuffer->setRange(0, mBuffer->size() - 4 - payloadSize);
1132
1133 return accessUnit;
1134 }
1135
dequeueAccessUnitAAC()1136 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitAAC() {
1137 if (mBuffer->size() == 0) {
1138 return NULL;
1139 }
1140
1141 if (mRangeInfos.empty()) {
1142 return NULL;
1143 }
1144
1145 const RangeInfo &info = *mRangeInfos.begin();
1146 if (mBuffer->size() < info.mLength) {
1147 return NULL;
1148 }
1149
1150 if (info.mTimestampUs < 0LL) {
1151 ALOGE("Negative info.mTimestampUs");
1152 return NULL;
1153 }
1154
1155 ALOGV("dequeueAccessUnit_AAC[%d]: mBuffer %zu info.mLength %zu",
1156 mAUIndex, mBuffer->size(), info.mLength);
1157
1158 struct ADTSPosition {
1159 size_t offset;
1160 size_t headerSize;
1161 size_t length;
1162 };
1163
1164 Vector<ADTSPosition> frames;
1165
1166 // The idea here is consume all AAC frames starting at offsets before
1167 // info.mLength so we can assign a meaningful timestamp without
1168 // having to interpolate.
1169 // The final AAC frame may well extend into the next RangeInfo but
1170 // that's ok.
1171 size_t offset = 0;
1172 while (offset < info.mLength) {
1173 if (offset + 7 > mBuffer->size()) {
1174 return NULL;
1175 }
1176
1177 ABitReader bits(mBuffer->data() + offset, mBuffer->size() - offset);
1178
1179 // adts_fixed_header
1180
1181 if (bits.getBits(12) != 0xfffu) {
1182 ALOGE("Wrong atds_fixed_header");
1183 return NULL;
1184 }
1185 bits.skipBits(3); // ID, layer
1186 bool protection_absent = bits.getBits(1) != 0;
1187
1188 if (mFormat == NULL) {
1189 mFormat = new MetaData;
1190 if (!MakeAACCodecSpecificData(
1191 *mFormat, mBuffer->data() + offset, mBuffer->size() - offset)) {
1192 return NULL;
1193 }
1194
1195 int32_t sampleRate;
1196 int32_t numChannels;
1197 if (!mFormat->findInt32(kKeySampleRate, &sampleRate)) {
1198 ALOGE("SampleRate not found");
1199 return NULL;
1200 }
1201 if (!mFormat->findInt32(kKeyChannelCount, &numChannels)) {
1202 ALOGE("ChannelCount not found");
1203 return NULL;
1204 }
1205
1206 ALOGI("found AAC codec config (%d Hz, %d channels)",
1207 sampleRate, numChannels);
1208 }
1209
1210 // profile_ObjectType, sampling_frequency_index, private_bits,
1211 // channel_configuration, original_copy, home
1212 bits.skipBits(12);
1213
1214 // adts_variable_header
1215
1216 // copyright_identification_bit, copyright_identification_start
1217 bits.skipBits(2);
1218
1219 unsigned aac_frame_length = bits.getBits(13);
1220 if (aac_frame_length == 0){
1221 ALOGE("b/62673179, Invalid AAC frame length!");
1222 android_errorWriteLog(0x534e4554, "62673179");
1223 return NULL;
1224 }
1225
1226 bits.skipBits(11); // adts_buffer_fullness
1227
1228 unsigned number_of_raw_data_blocks_in_frame = bits.getBits(2);
1229
1230 if (number_of_raw_data_blocks_in_frame != 0) {
1231 // To be implemented.
1232 ALOGE("Should not reach here.");
1233 return NULL;
1234 }
1235
1236 if (offset + aac_frame_length > mBuffer->size()) {
1237 return NULL;
1238 }
1239
1240 size_t headerSize = protection_absent ? 7 : 9;
1241
1242 // tracking the frame positions first then decrypt only if an accessUnit to be generated
1243 if (mSampleDecryptor != NULL) {
1244 ADTSPosition frame = {
1245 .offset = offset,
1246 .headerSize = headerSize,
1247 .length = aac_frame_length
1248 };
1249
1250 frames.push(frame);
1251 }
1252
1253 offset += aac_frame_length;
1254 }
1255
1256 // Decrypting only if the loop didn't exit early and an accessUnit is about to be generated
1257 // Not decrypting if key info not available (e.g., scanner/extractor parsing ts files)
1258 if (mSampleDecryptor != NULL) {
1259 for (size_t frameId = 0; frameId < frames.size(); frameId++) {
1260 const ADTSPosition &frame = frames.itemAt(frameId);
1261
1262 mSampleDecryptor->processAAC(frame.headerSize,
1263 mBuffer->data() + frame.offset, frame.length);
1264 // ALOGV("dequeueAccessUnitAAC[%zu]: while offset %zu headerSize %zu frame_len %zu",
1265 // frameId, frame.offset, frame.headerSize, frame.length);
1266 }
1267 }
1268 mAUIndex++;
1269
1270 int64_t timeUs = fetchTimestamp(offset);
1271
1272 sp<ABuffer> accessUnit = new ABuffer(offset);
1273 memcpy(accessUnit->data(), mBuffer->data(), offset);
1274
1275 memmove(mBuffer->data(), mBuffer->data() + offset,
1276 mBuffer->size() - offset);
1277 mBuffer->setRange(0, mBuffer->size() - offset);
1278
1279 accessUnit->meta()->setInt64("timeUs", timeUs);
1280 accessUnit->meta()->setInt32("isSync", 1);
1281
1282 return accessUnit;
1283 }
1284
fetchTimestamp(size_t size,int32_t * pesOffset,int32_t * pesScramblingControl)1285 int64_t ElementaryStreamQueue::fetchTimestamp(
1286 size_t size, int32_t *pesOffset, int32_t *pesScramblingControl) {
1287 int64_t timeUs = -1;
1288 bool first = true;
1289
1290 while (size > 0) {
1291 if (mRangeInfos.empty()) {
1292 return timeUs;
1293 }
1294
1295 RangeInfo *info = &*mRangeInfos.begin();
1296
1297 if (first) {
1298 timeUs = info->mTimestampUs;
1299 if (pesOffset != NULL) {
1300 *pesOffset = info->mPesOffset;
1301 }
1302 if (pesScramblingControl != NULL) {
1303 *pesScramblingControl = info->mPesScramblingControl;
1304 }
1305 first = false;
1306 }
1307
1308 if (info->mLength > size) {
1309 info->mLength -= size;
1310 size = 0;
1311 } else {
1312 size -= info->mLength;
1313
1314 mRangeInfos.erase(mRangeInfos.begin());
1315 info = NULL;
1316 }
1317
1318 }
1319
1320 if (timeUs == 0LL) {
1321 ALOGV("Returning 0 timestamp");
1322 }
1323
1324 return timeUs;
1325 }
1326
dequeueAccessUnitH264()1327 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitH264() {
1328 const uint8_t *data = mBuffer->data();
1329
1330 size_t size = mBuffer->size();
1331 Vector<NALPosition> nals;
1332
1333 size_t totalSize = 0;
1334 size_t seiCount = 0;
1335
1336 status_t err;
1337 const uint8_t *nalStart;
1338 size_t nalSize;
1339 bool foundSlice = false;
1340 bool foundIDR = false;
1341
1342 ALOGV("dequeueAccessUnit_H264[%d] %p/%zu", mAUIndex, data, size);
1343
1344 while ((err = getNextNALUnit(&data, &size, &nalStart, &nalSize)) == OK) {
1345 if (nalSize == 0) continue;
1346
1347 unsigned nalType = nalStart[0] & 0x1f;
1348 bool flush = false;
1349
1350 if (nalType == 1 || nalType == 5) {
1351 if (nalType == 5) {
1352 foundIDR = true;
1353 }
1354 if (foundSlice) {
1355 //TODO: Shouldn't this have been called with nalSize-1?
1356 ABitReader br(nalStart + 1, nalSize);
1357 unsigned first_mb_in_slice = parseUE(&br);
1358
1359 if (first_mb_in_slice == 0) {
1360 // This slice starts a new frame.
1361
1362 flush = true;
1363 }
1364 }
1365
1366 foundSlice = true;
1367 } else if ((nalType == 9 || nalType == 7) && foundSlice) {
1368 // Access unit delimiter and SPS will be associated with the
1369 // next frame.
1370
1371 flush = true;
1372 } else if (nalType == 6 && nalSize > 0) {
1373 // found non-zero sized SEI
1374 ++seiCount;
1375 }
1376
1377 if (flush) {
1378 // The access unit will contain all nal units up to, but excluding
1379 // the current one, separated by 0x00 0x00 0x00 0x01 startcodes.
1380
1381 size_t auSize = 4 * nals.size() + totalSize;
1382 sp<ABuffer> accessUnit = new ABuffer(auSize);
1383 sp<ABuffer> sei;
1384
1385 if (seiCount > 0) {
1386 sei = new ABuffer(seiCount * sizeof(NALPosition));
1387 accessUnit->meta()->setBuffer("sei", sei);
1388 }
1389
1390 #if !LOG_NDEBUG
1391 AString out;
1392 #endif
1393
1394 size_t dstOffset = 0;
1395 size_t seiIndex = 0;
1396 size_t shrunkBytes = 0;
1397 for (size_t i = 0; i < nals.size(); ++i) {
1398 const NALPosition &pos = nals.itemAt(i);
1399
1400 unsigned nalType = mBuffer->data()[pos.nalOffset] & 0x1f;
1401
1402 if (nalType == 6 && pos.nalSize > 0) {
1403 if (seiIndex >= sei->size() / sizeof(NALPosition)) {
1404 ALOGE("Wrong seiIndex");
1405 return NULL;
1406 }
1407 NALPosition &seiPos = ((NALPosition *)sei->data())[seiIndex++];
1408 seiPos.nalOffset = dstOffset + 4;
1409 seiPos.nalSize = pos.nalSize;
1410 }
1411
1412 #if !LOG_NDEBUG
1413 char tmp[128];
1414 sprintf(tmp, "0x%02x", nalType);
1415 if (i > 0) {
1416 out.append(", ");
1417 }
1418 out.append(tmp);
1419 #endif
1420
1421 memcpy(accessUnit->data() + dstOffset, "\x00\x00\x00\x01", 4);
1422
1423 if (mSampleDecryptor != NULL && (nalType == 1 || nalType == 5)) {
1424 uint8_t *nalData = mBuffer->data() + pos.nalOffset;
1425 size_t newSize = mSampleDecryptor->processNal(nalData, pos.nalSize);
1426 // Note: the data can shrink due to unescaping
1427 memcpy(accessUnit->data() + dstOffset + 4,
1428 nalData,
1429 newSize);
1430 dstOffset += newSize + 4;
1431
1432 size_t thisShrunkBytes = pos.nalSize - newSize;
1433 //ALOGV("dequeueAccessUnitH264[%d]: nalType: %d -> %zu (%zu)",
1434 // nalType, (int)pos.nalSize, newSize, thisShrunkBytes);
1435
1436 shrunkBytes += thisShrunkBytes;
1437 }
1438 else {
1439 memcpy(accessUnit->data() + dstOffset + 4,
1440 mBuffer->data() + pos.nalOffset,
1441 pos.nalSize);
1442
1443 dstOffset += pos.nalSize + 4;
1444 //ALOGV("dequeueAccessUnitH264 [%d] %d @%d",
1445 // nalType, (int)pos.nalSize, (int)pos.nalOffset);
1446 }
1447 }
1448
1449 #if !LOG_NDEBUG
1450 ALOGV("accessUnit contains nal types %s", out.c_str());
1451 #endif
1452
1453 const NALPosition &pos = nals.itemAt(nals.size() - 1);
1454 size_t nextScan = pos.nalOffset + pos.nalSize;
1455
1456 memmove(mBuffer->data(),
1457 mBuffer->data() + nextScan,
1458 mBuffer->size() - nextScan);
1459
1460 mBuffer->setRange(0, mBuffer->size() - nextScan);
1461
1462 int64_t timeUs = fetchTimestamp(nextScan);
1463 if (timeUs < 0LL) {
1464 ALOGE("Negative timeUs");
1465 return NULL;
1466 }
1467
1468 accessUnit->meta()->setInt64("timeUs", timeUs);
1469 if (foundIDR) {
1470 accessUnit->meta()->setInt32("isSync", 1);
1471 }
1472
1473 if (mFormat == NULL) {
1474 mFormat = new MetaData;
1475 if (!MakeAVCCodecSpecificData(*mFormat,
1476 accessUnit->data(),
1477 accessUnit->size())) {
1478 mFormat.clear();
1479 }
1480 }
1481
1482 if (mSampleDecryptor != NULL && shrunkBytes > 0) {
1483 size_t adjustedSize = accessUnit->size() - shrunkBytes;
1484 ALOGV("dequeueAccessUnitH264[%d]: AU size adjusted %zu -> %zu",
1485 mAUIndex, accessUnit->size(), adjustedSize);
1486 accessUnit->setRange(0, adjustedSize);
1487 }
1488
1489 ALOGV("dequeueAccessUnitH264[%d]: AU %p(%zu) dstOffset:%zu, nals:%zu, totalSize:%zu ",
1490 mAUIndex, accessUnit->data(), accessUnit->size(),
1491 dstOffset, nals.size(), totalSize);
1492 mAUIndex++;
1493
1494 return accessUnit;
1495 }
1496
1497 NALPosition pos;
1498 pos.nalOffset = nalStart - mBuffer->data();
1499 pos.nalSize = nalSize;
1500
1501 nals.push(pos);
1502
1503 totalSize += nalSize;
1504 }
1505 if (err != (status_t)-EAGAIN) {
1506 ALOGE("Unexpeted err");
1507 return NULL;
1508 }
1509
1510 return NULL;
1511 }
1512
dequeueAccessUnitMPEGAudio()1513 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGAudio() {
1514 const uint8_t *data = mBuffer->data();
1515 size_t size = mBuffer->size();
1516
1517 if (size < 4) {
1518 return NULL;
1519 }
1520
1521 uint32_t header = U32_AT(data);
1522
1523 size_t frameSize;
1524 int samplingRate, numChannels, bitrate, numSamples;
1525 if (!GetMPEGAudioFrameSize(
1526 header, &frameSize, &samplingRate, &numChannels,
1527 &bitrate, &numSamples)) {
1528 ALOGE("Failed to get audio frame size");
1529 mBuffer->setRange(0, 0);
1530 return NULL;
1531 }
1532
1533 if (size < frameSize) {
1534 return NULL;
1535 }
1536
1537 unsigned layer = 4 - ((header >> 17) & 3);
1538
1539 sp<ABuffer> accessUnit = new ABuffer(frameSize);
1540 memcpy(accessUnit->data(), data, frameSize);
1541
1542 memmove(mBuffer->data(),
1543 mBuffer->data() + frameSize,
1544 mBuffer->size() - frameSize);
1545
1546 mBuffer->setRange(0, mBuffer->size() - frameSize);
1547
1548 int64_t timeUs = fetchTimestamp(frameSize);
1549 if (timeUs < 0LL) {
1550 ALOGE("Negative timeUs");
1551 return NULL;
1552 }
1553
1554 if (mFormat != NULL) {
1555 const char *mime;
1556 if (mFormat->findCString(kKeyMIMEType, &mime)) {
1557 if ((layer == 1) && strcmp (mime, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I)) {
1558 ALOGE("Audio layer is not MPEG_LAYER_I");
1559 return NULL;
1560 } else if ((layer == 2) && strcmp (mime, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II)) {
1561 ALOGE("Audio layer is not MPEG_LAYER_II");
1562 return NULL;
1563 } else if ((layer == 3) && strcmp (mime, MEDIA_MIMETYPE_AUDIO_MPEG)) {
1564 ALOGE("Audio layer is not AUDIO_MPEG");
1565 return NULL;
1566 }
1567 }
1568 }
1569
1570 accessUnit->meta()->setInt64("timeUs", timeUs);
1571 accessUnit->meta()->setInt32("isSync", 1);
1572
1573 if (mFormat == NULL) {
1574 mFormat = new MetaData;
1575
1576 switch (layer) {
1577 case 1:
1578 mFormat->setCString(
1579 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
1580 break;
1581 case 2:
1582 mFormat->setCString(
1583 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_II);
1584 break;
1585 case 3:
1586 mFormat->setCString(
1587 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
1588 break;
1589 default:
1590 return NULL;
1591 }
1592
1593 mFormat->setInt32(kKeySampleRate, samplingRate);
1594 mFormat->setInt32(kKeyChannelCount, numChannels);
1595 }
1596
1597 return accessUnit;
1598 }
1599
EncodeSize14(uint8_t ** _ptr,size_t size)1600 static void EncodeSize14(uint8_t **_ptr, size_t size) {
1601 if (size > 0x3fff) {
1602 ALOGE("Wrong size");
1603 return;
1604 }
1605
1606 uint8_t *ptr = *_ptr;
1607
1608 *ptr++ = 0x80 | (size >> 7);
1609 *ptr++ = size & 0x7f;
1610
1611 *_ptr = ptr;
1612 }
1613
MakeMPEGVideoESDS(const sp<ABuffer> & csd)1614 static sp<ABuffer> MakeMPEGVideoESDS(const sp<ABuffer> &csd) {
1615 sp<ABuffer> esds = new ABuffer(csd->size() + 25);
1616
1617 uint8_t *ptr = esds->data();
1618 *ptr++ = 0x03;
1619 EncodeSize14(&ptr, 22 + csd->size());
1620
1621 *ptr++ = 0x00; // ES_ID
1622 *ptr++ = 0x00;
1623
1624 *ptr++ = 0x00; // streamDependenceFlag, URL_Flag, OCRstreamFlag
1625
1626 *ptr++ = 0x04;
1627 EncodeSize14(&ptr, 16 + csd->size());
1628
1629 *ptr++ = 0x40; // Audio ISO/IEC 14496-3
1630
1631 for (size_t i = 0; i < 12; ++i) {
1632 *ptr++ = 0x00;
1633 }
1634
1635 *ptr++ = 0x05;
1636 EncodeSize14(&ptr, csd->size());
1637
1638 memcpy(ptr, csd->data(), csd->size());
1639
1640 return esds;
1641 }
1642
dequeueAccessUnitMPEGVideo()1643 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEGVideo() {
1644 const uint8_t *data = mBuffer->data();
1645 size_t size = mBuffer->size();
1646
1647 Vector<size_t> userDataPositions;
1648
1649 bool sawPictureStart = false;
1650 int pprevStartCode = -1;
1651 int prevStartCode = -1;
1652 int currentStartCode = -1;
1653 bool gopFound = false;
1654 bool isClosedGop = false;
1655 bool brokenLink = false;
1656
1657 size_t offset = 0;
1658 while (offset + 3 < size) {
1659 if (memcmp(&data[offset], "\x00\x00\x01", 3)) {
1660 ++offset;
1661 continue;
1662 }
1663
1664 pprevStartCode = prevStartCode;
1665 prevStartCode = currentStartCode;
1666 currentStartCode = data[offset + 3];
1667
1668 if (currentStartCode == 0xb3 && mFormat == NULL) {
1669 memmove(mBuffer->data(), mBuffer->data() + offset, size - offset);
1670 size -= offset;
1671 (void)fetchTimestamp(offset);
1672 offset = 0;
1673 mBuffer->setRange(0, size);
1674 }
1675
1676 if ((prevStartCode == 0xb3 && currentStartCode != 0xb5)
1677 || (pprevStartCode == 0xb3 && prevStartCode == 0xb5)) {
1678 // seqHeader without/with extension
1679
1680 if (mFormat == NULL) {
1681 if (size < 7u) {
1682 ALOGE("Size too small");
1683 return NULL;
1684 }
1685
1686 unsigned width =
1687 (data[4] << 4) | data[5] >> 4;
1688
1689 unsigned height =
1690 ((data[5] & 0x0f) << 8) | data[6];
1691
1692 mFormat = new MetaData;
1693 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG2);
1694 mFormat->setInt32(kKeyWidth, width);
1695 mFormat->setInt32(kKeyHeight, height);
1696
1697 ALOGI("found MPEG2 video codec config (%d x %d)", width, height);
1698
1699 sp<ABuffer> csd = new ABuffer(offset);
1700 memcpy(csd->data(), data, offset);
1701
1702 memmove(mBuffer->data(),
1703 mBuffer->data() + offset,
1704 mBuffer->size() - offset);
1705
1706 mBuffer->setRange(0, mBuffer->size() - offset);
1707 size -= offset;
1708 (void)fetchTimestamp(offset);
1709 offset = 0;
1710
1711 // hexdump(csd->data(), csd->size());
1712
1713 sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1714 mFormat->setData(
1715 kKeyESDS, kTypeESDS, esds->data(), esds->size());
1716
1717 return NULL;
1718 }
1719 }
1720
1721 if (mFormat != NULL && currentStartCode == 0xb8) {
1722 // GOP layer
1723 if (offset + 7 >= size) {
1724 ALOGE("Size too small");
1725 return NULL;
1726 }
1727 gopFound = true;
1728 isClosedGop = (data[offset + 7] & 0x40) != 0;
1729 brokenLink = (data[offset + 7] & 0x20) != 0;
1730 }
1731
1732 if (mFormat != NULL && currentStartCode == 0xb2) {
1733 userDataPositions.add(offset);
1734 }
1735
1736 if (mFormat != NULL && currentStartCode == 0x00) {
1737 // Picture start
1738
1739 if (!sawPictureStart) {
1740 sawPictureStart = true;
1741 } else {
1742 sp<ABuffer> accessUnit = new ABuffer(offset);
1743 memcpy(accessUnit->data(), data, offset);
1744
1745 memmove(mBuffer->data(),
1746 mBuffer->data() + offset,
1747 mBuffer->size() - offset);
1748
1749 mBuffer->setRange(0, mBuffer->size() - offset);
1750
1751 int64_t timeUs = fetchTimestamp(offset);
1752 if (timeUs < 0LL) {
1753 ALOGE("Negative timeUs");
1754 return NULL;
1755 }
1756
1757 offset = 0;
1758
1759 accessUnit->meta()->setInt64("timeUs", timeUs);
1760 if (gopFound && (!brokenLink || isClosedGop)) {
1761 accessUnit->meta()->setInt32("isSync", 1);
1762 }
1763
1764 ALOGV("returning MPEG video access unit at time %" PRId64 " us",
1765 timeUs);
1766
1767 // hexdump(accessUnit->data(), accessUnit->size());
1768
1769 if (userDataPositions.size() > 0) {
1770 sp<ABuffer> mpegUserData =
1771 new ABuffer(userDataPositions.size() * sizeof(size_t));
1772 if (mpegUserData != NULL && mpegUserData->data() != NULL) {
1773 for (size_t i = 0; i < userDataPositions.size(); ++i) {
1774 memcpy(
1775 mpegUserData->data() + i * sizeof(size_t),
1776 &userDataPositions[i], sizeof(size_t));
1777 }
1778 accessUnit->meta()->setBuffer("mpeg-user-data", mpegUserData);
1779 }
1780 }
1781
1782 return accessUnit;
1783 }
1784 }
1785
1786 ++offset;
1787 }
1788
1789 return NULL;
1790 }
1791
getNextChunkSize(const uint8_t * data,size_t size)1792 static ssize_t getNextChunkSize(
1793 const uint8_t *data, size_t size) {
1794 static const char kStartCode[] = "\x00\x00\x01";
1795
1796 // per ISO/IEC 14496-2 6.2.1, a chunk has a 3-byte prefix + 1-byte start code
1797 // we need at least <prefix><start><next prefix> to successfully scan
1798 if (size < 3 + 1 + 3) {
1799 return -EAGAIN;
1800 }
1801
1802 if (memcmp(kStartCode, data, 3)) {
1803 return -EAGAIN;
1804 }
1805
1806 size_t offset = 4;
1807 while (offset + 2 < size) {
1808 if (!memcmp(&data[offset], kStartCode, 3)) {
1809 return offset;
1810 }
1811
1812 ++offset;
1813 }
1814
1815 return -EAGAIN;
1816 }
1817
dequeueAccessUnitMPEG4Video()1818 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMPEG4Video() {
1819 uint8_t *data = mBuffer->data();
1820 size_t size = mBuffer->size();
1821
1822 enum {
1823 SKIP_TO_VISUAL_OBJECT_SEQ_START,
1824 EXPECT_VISUAL_OBJECT_START,
1825 EXPECT_VO_START,
1826 EXPECT_VOL_START,
1827 WAIT_FOR_VOP_START,
1828 SKIP_TO_VOP_START,
1829
1830 } state;
1831
1832 if (mFormat == NULL) {
1833 state = SKIP_TO_VISUAL_OBJECT_SEQ_START;
1834 } else {
1835 state = SKIP_TO_VOP_START;
1836 }
1837
1838 int32_t width = -1, height = -1;
1839
1840 size_t offset = 0;
1841 ssize_t chunkSize;
1842 while ((chunkSize = getNextChunkSize(
1843 &data[offset], size - offset)) > 0) {
1844 bool discard = false;
1845
1846 unsigned chunkType = data[offset + 3];
1847
1848 switch (state) {
1849 case SKIP_TO_VISUAL_OBJECT_SEQ_START:
1850 {
1851 if (chunkType == 0xb0) {
1852 // Discard anything before this marker.
1853
1854 state = EXPECT_VISUAL_OBJECT_START;
1855 } else {
1856 discard = true;
1857 offset += chunkSize;
1858 ALOGW("b/74114680, advance to next chunk");
1859 android_errorWriteLog(0x534e4554, "74114680");
1860 }
1861 break;
1862 }
1863
1864 case EXPECT_VISUAL_OBJECT_START:
1865 {
1866 if (chunkType != 0xb5) {
1867 ALOGE("Unexpected chunkType");
1868 return NULL;
1869 }
1870 state = EXPECT_VO_START;
1871 break;
1872 }
1873
1874 case EXPECT_VO_START:
1875 {
1876 if (chunkType > 0x1f) {
1877 ALOGE("Unexpected chunkType");
1878 return NULL;
1879 }
1880 state = EXPECT_VOL_START;
1881 break;
1882 }
1883
1884 case EXPECT_VOL_START:
1885 {
1886 if ((chunkType & 0xf0) != 0x20) {
1887 ALOGE("Wrong chunkType");
1888 return NULL;
1889 }
1890
1891 if (!ExtractDimensionsFromVOLHeader(
1892 &data[offset], chunkSize,
1893 &width, &height)) {
1894 ALOGE("Failed to get dimension");
1895 return NULL;
1896 }
1897
1898 state = WAIT_FOR_VOP_START;
1899 break;
1900 }
1901
1902 case WAIT_FOR_VOP_START:
1903 {
1904 if (chunkType == 0xb3 || chunkType == 0xb6) {
1905 // group of VOP or VOP start.
1906
1907 mFormat = new MetaData;
1908 mFormat->setCString(
1909 kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_MPEG4);
1910
1911 mFormat->setInt32(kKeyWidth, width);
1912 mFormat->setInt32(kKeyHeight, height);
1913
1914 ALOGI("found MPEG4 video codec config (%d x %d)",
1915 width, height);
1916
1917 sp<ABuffer> csd = new ABuffer(offset);
1918 memcpy(csd->data(), data, offset);
1919
1920 // hexdump(csd->data(), csd->size());
1921
1922 sp<ABuffer> esds = MakeMPEGVideoESDS(csd);
1923 mFormat->setData(
1924 kKeyESDS, kTypeESDS,
1925 esds->data(), esds->size());
1926
1927 discard = true;
1928 state = SKIP_TO_VOP_START;
1929 }
1930
1931 break;
1932 }
1933
1934 case SKIP_TO_VOP_START:
1935 {
1936 if (chunkType == 0xb6) {
1937 int vopCodingType = (data[offset + 4] & 0xc0) >> 6;
1938
1939 offset += chunkSize;
1940
1941 sp<ABuffer> accessUnit = new ABuffer(offset);
1942 memcpy(accessUnit->data(), data, offset);
1943
1944 memmove(data, &data[offset], size - offset);
1945 size -= offset;
1946 mBuffer->setRange(0, size);
1947
1948 int64_t timeUs = fetchTimestamp(offset);
1949 if (timeUs < 0LL) {
1950 ALOGE("Negative timeus");
1951 return NULL;
1952 }
1953
1954 offset = 0;
1955
1956 accessUnit->meta()->setInt64("timeUs", timeUs);
1957 if (vopCodingType == 0) { // intra-coded VOP
1958 accessUnit->meta()->setInt32("isSync", 1);
1959 }
1960
1961 ALOGV("returning MPEG4 video access unit at time %" PRId64 " us",
1962 timeUs);
1963
1964 // hexdump(accessUnit->data(), accessUnit->size());
1965
1966 return accessUnit;
1967 } else if (chunkType != 0xb3) {
1968 offset += chunkSize;
1969 discard = true;
1970 }
1971
1972 break;
1973 }
1974
1975 default:
1976 ALOGE("Unknown state: %d", state);
1977 return NULL;
1978 }
1979
1980 if (discard) {
1981 (void)fetchTimestamp(offset);
1982 memmove(data, &data[offset], size - offset);
1983 size -= offset;
1984 offset = 0;
1985 mBuffer->setRange(0, size);
1986 } else {
1987 offset += chunkSize;
1988 }
1989 }
1990
1991 return NULL;
1992 }
1993
signalEOS()1994 void ElementaryStreamQueue::signalEOS() {
1995 if (!mEOSReached) {
1996 if (mMode == MPEG_VIDEO) {
1997 const char *theEnd = "\x00\x00\x01\x00";
1998 appendData(theEnd, 4, 0);
1999 }
2000 mEOSReached = true;
2001 } else {
2002 ALOGW("EOS already signaled");
2003 }
2004 }
2005
dequeueAccessUnitMetadata()2006 sp<ABuffer> ElementaryStreamQueue::dequeueAccessUnitMetadata() {
2007 size_t size = mBuffer->size();
2008 if (!size) {
2009 return NULL;
2010 }
2011
2012 sp<ABuffer> accessUnit = new ABuffer(size);
2013 int64_t timeUs = fetchTimestamp(size);
2014 accessUnit->meta()->setInt64("timeUs", timeUs);
2015
2016 memcpy(accessUnit->data(), mBuffer->data(), size);
2017 mBuffer->setRange(0, 0);
2018
2019 if (mFormat == NULL) {
2020 mFormat = new MetaData;
2021 mFormat->setCString(kKeyMIMEType, MEDIA_MIMETYPE_DATA_TIMED_ID3);
2022 }
2023
2024 return accessUnit;
2025 }
2026
signalNewSampleAesKey(const sp<AMessage> & keyItem)2027 void ElementaryStreamQueue::signalNewSampleAesKey(const sp<AMessage> &keyItem) {
2028 if (mSampleDecryptor == NULL) {
2029 ALOGE("signalNewSampleAesKey: Stream %x is not encrypted; keyItem: %p",
2030 mMode, keyItem.get());
2031 return;
2032 }
2033
2034 mSampleDecryptor->signalNewSampleAesKey(keyItem);
2035 }
2036
2037
2038 } // namespace android
2039