1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "WAVExtractor"
19 #include <utils/Log.h>
20
21 #include "include/WAVExtractor.h"
22
23 #include <audio_utils/primitives.h>
24 #include <media/stagefright/foundation/ADebug.h>
25 #include <media/stagefright/DataSource.h>
26 #include <media/stagefright/MediaBufferGroup.h>
27 #include <media/stagefright/MediaDefs.h>
28 #include <media/stagefright/MediaErrors.h>
29 #include <media/stagefright/MediaSource.h>
30 #include <media/stagefright/MetaData.h>
31 #include <utils/String8.h>
32 #include <cutils/bitops.h>
33
34 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
35
36 namespace android {
37
38 enum {
39 WAVE_FORMAT_PCM = 0x0001,
40 WAVE_FORMAT_IEEE_FLOAT = 0x0003,
41 WAVE_FORMAT_ALAW = 0x0006,
42 WAVE_FORMAT_MULAW = 0x0007,
43 WAVE_FORMAT_MSGSM = 0x0031,
44 WAVE_FORMAT_EXTENSIBLE = 0xFFFE
45 };
46
47 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
48 static const char* AMBISONIC_SUBFORMAT = "\x00\x00\x21\x07\xD3\x11\x86\x44\xC8\xC1\xCA\x00\x00\x00";
49
U32_LE_AT(const uint8_t * ptr)50 static uint32_t U32_LE_AT(const uint8_t *ptr) {
51 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
52 }
53
U16_LE_AT(const uint8_t * ptr)54 static uint16_t U16_LE_AT(const uint8_t *ptr) {
55 return ptr[1] << 8 | ptr[0];
56 }
57
58 struct WAVSource : public MediaSource {
59 WAVSource(
60 const sp<DataSource> &dataSource,
61 const sp<MetaData> &meta,
62 uint16_t waveFormat,
63 int32_t bitsPerSample,
64 off64_t offset, size_t size);
65
66 virtual status_t start(MetaData *params = NULL);
67 virtual status_t stop();
68 virtual sp<MetaData> getFormat();
69
70 virtual status_t read(
71 MediaBuffer **buffer, const ReadOptions *options = NULL);
72
supportNonblockingReadandroid::WAVSource73 virtual bool supportNonblockingRead() { return true; }
74
75 protected:
76 virtual ~WAVSource();
77
78 private:
79 static const size_t kMaxFrameSize;
80
81 sp<DataSource> mDataSource;
82 sp<MetaData> mMeta;
83 uint16_t mWaveFormat;
84 int32_t mSampleRate;
85 int32_t mNumChannels;
86 int32_t mBitsPerSample;
87 off64_t mOffset;
88 size_t mSize;
89 bool mStarted;
90 MediaBufferGroup *mGroup;
91 off64_t mCurrentPos;
92
93 WAVSource(const WAVSource &);
94 WAVSource &operator=(const WAVSource &);
95 };
96
WAVExtractor(const sp<DataSource> & source)97 WAVExtractor::WAVExtractor(const sp<DataSource> &source)
98 : mDataSource(source),
99 mValidFormat(false),
100 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
101 mInitCheck = init();
102 }
103
~WAVExtractor()104 WAVExtractor::~WAVExtractor() {
105 }
106
getMetaData()107 sp<MetaData> WAVExtractor::getMetaData() {
108 sp<MetaData> meta = new MetaData;
109
110 if (mInitCheck != OK) {
111 return meta;
112 }
113
114 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
115
116 return meta;
117 }
118
countTracks()119 size_t WAVExtractor::countTracks() {
120 return mInitCheck == OK ? 1 : 0;
121 }
122
getTrack(size_t index)123 sp<IMediaSource> WAVExtractor::getTrack(size_t index) {
124 if (mInitCheck != OK || index > 0) {
125 return NULL;
126 }
127
128 return new WAVSource(
129 mDataSource, mTrackMeta,
130 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
131 }
132
getTrackMetaData(size_t index,uint32_t)133 sp<MetaData> WAVExtractor::getTrackMetaData(
134 size_t index, uint32_t /* flags */) {
135 if (mInitCheck != OK || index > 0) {
136 return NULL;
137 }
138
139 return mTrackMeta;
140 }
141
init()142 status_t WAVExtractor::init() {
143 uint8_t header[12];
144 if (mDataSource->readAt(
145 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
146 return NO_INIT;
147 }
148
149 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
150 return NO_INIT;
151 }
152
153 size_t totalSize = U32_LE_AT(&header[4]);
154
155 off64_t offset = 12;
156 size_t remainingSize = totalSize;
157 while (remainingSize >= 8) {
158 uint8_t chunkHeader[8];
159 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
160 return NO_INIT;
161 }
162
163 remainingSize -= 8;
164 offset += 8;
165
166 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
167
168 if (chunkSize > remainingSize) {
169 return NO_INIT;
170 }
171
172 if (!memcmp(chunkHeader, "fmt ", 4)) {
173 if (chunkSize < 16) {
174 return NO_INIT;
175 }
176
177 uint8_t formatSpec[40];
178 if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
179 return NO_INIT;
180 }
181
182 mWaveFormat = U16_LE_AT(formatSpec);
183 if (mWaveFormat != WAVE_FORMAT_PCM
184 && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
185 && mWaveFormat != WAVE_FORMAT_ALAW
186 && mWaveFormat != WAVE_FORMAT_MULAW
187 && mWaveFormat != WAVE_FORMAT_MSGSM
188 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
189 return ERROR_UNSUPPORTED;
190 }
191
192 uint8_t fmtSize = 16;
193 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
194 fmtSize = 40;
195 }
196 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
197 return NO_INIT;
198 }
199
200 mNumChannels = U16_LE_AT(&formatSpec[2]);
201
202 if (mNumChannels < 1 || mNumChannels > 8) {
203 ALOGE("Unsupported number of channels (%d)", mNumChannels);
204 return ERROR_UNSUPPORTED;
205 }
206
207 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
208 if (mNumChannels != 1 && mNumChannels != 2) {
209 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
210 mNumChannels);
211 }
212 }
213
214 mSampleRate = U32_LE_AT(&formatSpec[4]);
215
216 if (mSampleRate == 0) {
217 return ERROR_MALFORMED;
218 }
219
220 mBitsPerSample = U16_LE_AT(&formatSpec[14]);
221
222 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
223 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
224 if (validBitsPerSample != mBitsPerSample) {
225 if (validBitsPerSample != 0) {
226 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
227 validBitsPerSample, mBitsPerSample);
228 return ERROR_UNSUPPORTED;
229 } else {
230 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
231 // writers don't correctly set the valid bits value, and leave it at 0.
232 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
233 }
234 }
235
236 mChannelMask = U32_LE_AT(&formatSpec[20]);
237 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
238 if ((mChannelMask >> 18) != 0) {
239 ALOGE("invalid channel mask 0x%x", mChannelMask);
240 return ERROR_MALFORMED;
241 }
242
243 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
244 && (popcount(mChannelMask) != mNumChannels)) {
245 ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
246 popcount(mChannelMask), mChannelMask);
247 return ERROR_MALFORMED;
248 }
249
250 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
251 // the sample format, using the same definitions as a regular WAV header
252 mWaveFormat = U16_LE_AT(&formatSpec[24]);
253 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14) &&
254 memcmp(&formatSpec[26], AMBISONIC_SUBFORMAT, 14)) {
255 ALOGE("unsupported GUID");
256 return ERROR_UNSUPPORTED;
257 }
258 }
259
260 if (mWaveFormat == WAVE_FORMAT_PCM) {
261 if (mBitsPerSample != 8 && mBitsPerSample != 16
262 && mBitsPerSample != 24 && mBitsPerSample != 32) {
263 return ERROR_UNSUPPORTED;
264 }
265 } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
266 if (mBitsPerSample != 32) { // TODO we don't support double
267 return ERROR_UNSUPPORTED;
268 }
269 }
270 else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
271 if (mBitsPerSample != 0) {
272 return ERROR_UNSUPPORTED;
273 }
274 } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
275 if (mBitsPerSample != 8) {
276 return ERROR_UNSUPPORTED;
277 }
278 } else {
279 return ERROR_UNSUPPORTED;
280 }
281
282 mValidFormat = true;
283 } else if (!memcmp(chunkHeader, "data", 4)) {
284 if (mValidFormat) {
285 mDataOffset = offset;
286 mDataSize = chunkSize;
287
288 mTrackMeta = new MetaData;
289
290 switch (mWaveFormat) {
291 case WAVE_FORMAT_PCM:
292 case WAVE_FORMAT_IEEE_FLOAT:
293 mTrackMeta->setCString(
294 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
295 break;
296 case WAVE_FORMAT_ALAW:
297 mTrackMeta->setCString(
298 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
299 break;
300 case WAVE_FORMAT_MSGSM:
301 mTrackMeta->setCString(
302 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
303 break;
304 default:
305 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
306 mTrackMeta->setCString(
307 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
308 break;
309 }
310
311 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
312 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
313 mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
314 mTrackMeta->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
315
316 int64_t durationUs = 0;
317 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
318 // 65 bytes decode to 320 8kHz samples
319 durationUs =
320 1000000LL * (mDataSize / 65 * 320) / 8000;
321 } else {
322 size_t bytesPerSample = mBitsPerSample >> 3;
323
324 if (!bytesPerSample || !mNumChannels)
325 return ERROR_MALFORMED;
326
327 size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
328
329 if (!mSampleRate)
330 return ERROR_MALFORMED;
331
332 durationUs =
333 1000000LL * num_samples / mSampleRate;
334 }
335
336 mTrackMeta->setInt64(kKeyDuration, durationUs);
337
338 return OK;
339 }
340 }
341
342 offset += chunkSize;
343 }
344
345 return NO_INIT;
346 }
347
348 const size_t WAVSource::kMaxFrameSize = 32768;
349
WAVSource(const sp<DataSource> & dataSource,const sp<MetaData> & meta,uint16_t waveFormat,int32_t bitsPerSample,off64_t offset,size_t size)350 WAVSource::WAVSource(
351 const sp<DataSource> &dataSource,
352 const sp<MetaData> &meta,
353 uint16_t waveFormat,
354 int32_t bitsPerSample,
355 off64_t offset, size_t size)
356 : mDataSource(dataSource),
357 mMeta(meta),
358 mWaveFormat(waveFormat),
359 mSampleRate(0),
360 mNumChannels(0),
361 mBitsPerSample(bitsPerSample),
362 mOffset(offset),
363 mSize(size),
364 mStarted(false),
365 mGroup(NULL) {
366 CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
367 CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
368
369 mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
370 }
371
~WAVSource()372 WAVSource::~WAVSource() {
373 if (mStarted) {
374 stop();
375 }
376 }
377
start(MetaData *)378 status_t WAVSource::start(MetaData * /* params */) {
379 ALOGV("WAVSource::start");
380
381 CHECK(!mStarted);
382
383 // some WAV files may have large audio buffers that use shared memory transfer.
384 mGroup = new MediaBufferGroup(4 /* buffers */, kMaxFrameSize);
385
386 if (mBitsPerSample == 8) {
387 // As a temporary buffer for 8->16 bit conversion.
388 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
389 }
390
391 mCurrentPos = mOffset;
392
393 mStarted = true;
394
395 return OK;
396 }
397
stop()398 status_t WAVSource::stop() {
399 ALOGV("WAVSource::stop");
400
401 CHECK(mStarted);
402
403 delete mGroup;
404 mGroup = NULL;
405
406 mStarted = false;
407
408 return OK;
409 }
410
getFormat()411 sp<MetaData> WAVSource::getFormat() {
412 ALOGV("WAVSource::getFormat");
413
414 return mMeta;
415 }
416
read(MediaBuffer ** out,const ReadOptions * options)417 status_t WAVSource::read(
418 MediaBuffer **out, const ReadOptions *options) {
419 *out = NULL;
420
421 if (options != nullptr && options->getNonBlocking() && !mGroup->has_buffers()) {
422 return WOULD_BLOCK;
423 }
424
425 int64_t seekTimeUs;
426 ReadOptions::SeekMode mode;
427 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
428 int64_t pos = 0;
429
430 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
431 // 65 bytes decode to 320 8kHz samples
432 int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
433 int64_t framenumber = samplenumber / 320;
434 pos = framenumber * 65;
435 } else {
436 pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
437 }
438 if (pos > (off64_t)mSize) {
439 pos = mSize;
440 }
441 mCurrentPos = pos + mOffset;
442 }
443
444 MediaBuffer *buffer;
445 status_t err = mGroup->acquire_buffer(&buffer);
446 if (err != OK) {
447 return err;
448 }
449
450 // make sure that maxBytesToRead is multiple of 3, in 24-bit case
451 size_t maxBytesToRead =
452 mBitsPerSample == 8 ? kMaxFrameSize / 2 :
453 (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
454
455 size_t maxBytesAvailable =
456 (mCurrentPos - mOffset >= (off64_t)mSize)
457 ? 0 : mSize - (mCurrentPos - mOffset);
458
459 if (maxBytesToRead > maxBytesAvailable) {
460 maxBytesToRead = maxBytesAvailable;
461 }
462
463 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
464 // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
465 // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
466 if (maxBytesToRead > 1024) {
467 maxBytesToRead = 1024;
468 }
469 maxBytesToRead = (maxBytesToRead / 65) * 65;
470 } else {
471 // read only integral amounts of audio unit frames.
472 const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
473 maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
474 }
475
476 ssize_t n = mDataSource->readAt(
477 mCurrentPos, buffer->data(),
478 maxBytesToRead);
479
480 if (n <= 0) {
481 buffer->release();
482 buffer = NULL;
483
484 return ERROR_END_OF_STREAM;
485 }
486
487 buffer->set_range(0, n);
488
489 // TODO: add capability to return data as float PCM instead of 16 bit PCM.
490 if (mWaveFormat == WAVE_FORMAT_PCM) {
491 if (mBitsPerSample == 8) {
492 // Convert 8-bit unsigned samples to 16-bit signed.
493
494 // Create new buffer with 2 byte wide samples
495 MediaBuffer *tmp;
496 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
497 tmp->set_range(0, 2 * n);
498
499 memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n);
500 buffer->release();
501 buffer = tmp;
502 } else if (mBitsPerSample == 24) {
503 // Convert 24-bit signed samples to 16-bit signed in place
504 const size_t numSamples = n / 3;
505
506 memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples);
507 buffer->set_range(0, 2 * numSamples);
508 } else if (mBitsPerSample == 32) {
509 // Convert 32-bit signed samples to 16-bit signed in place
510 const size_t numSamples = n / 4;
511
512 memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples);
513 buffer->set_range(0, 2 * numSamples);
514 }
515 } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
516 if (mBitsPerSample == 32) {
517 // Convert 32-bit float samples to 16-bit signed in place
518 const size_t numSamples = n / 4;
519
520 memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples);
521 buffer->set_range(0, 2 * numSamples);
522 }
523 }
524
525 int64_t timeStampUs = 0;
526
527 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
528 timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
529 } else {
530 size_t bytesPerSample = mBitsPerSample >> 3;
531 timeStampUs = 1000000LL * (mCurrentPos - mOffset)
532 / (mNumChannels * bytesPerSample) / mSampleRate;
533 }
534
535 buffer->meta_data()->setInt64(kKeyTime, timeStampUs);
536
537 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
538 mCurrentPos += n;
539
540 *out = buffer;
541
542 return OK;
543 }
544
545 ////////////////////////////////////////////////////////////////////////////////
546
SniffWAV(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> *)547 bool SniffWAV(
548 const sp<DataSource> &source, String8 *mimeType, float *confidence,
549 sp<AMessage> *) {
550 char header[12];
551 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
552 return false;
553 }
554
555 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
556 return false;
557 }
558
559 sp<MediaExtractor> extractor = new WAVExtractor(source);
560 if (extractor->countTracks() == 0) {
561 return false;
562 }
563
564 *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
565 *confidence = 0.3f;
566
567 return true;
568 }
569
570 } // namespace android
571