1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "WAVExtractor"
19 #include <utils/Log.h>
20
21 #include "include/WAVExtractor.h"
22
23 #include <audio_utils/primitives.h>
24 #include <media/stagefright/foundation/ADebug.h>
25 #include <media/stagefright/DataSource.h>
26 #include <media/stagefright/MediaBufferGroup.h>
27 #include <media/stagefright/MediaDefs.h>
28 #include <media/stagefright/MediaErrors.h>
29 #include <media/stagefright/MediaSource.h>
30 #include <media/stagefright/MetaData.h>
31 #include <utils/String8.h>
32 #include <cutils/bitops.h>
33
34 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
35
36 namespace android {
37
38 enum {
39 WAVE_FORMAT_PCM = 0x0001,
40 WAVE_FORMAT_IEEE_FLOAT = 0x0003,
41 WAVE_FORMAT_ALAW = 0x0006,
42 WAVE_FORMAT_MULAW = 0x0007,
43 WAVE_FORMAT_MSGSM = 0x0031,
44 WAVE_FORMAT_EXTENSIBLE = 0xFFFE
45 };
46
47 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
48
49
U32_LE_AT(const uint8_t * ptr)50 static uint32_t U32_LE_AT(const uint8_t *ptr) {
51 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
52 }
53
U16_LE_AT(const uint8_t * ptr)54 static uint16_t U16_LE_AT(const uint8_t *ptr) {
55 return ptr[1] << 8 | ptr[0];
56 }
57
58 struct WAVSource : public MediaSource {
59 WAVSource(
60 const sp<DataSource> &dataSource,
61 const sp<MetaData> &meta,
62 uint16_t waveFormat,
63 int32_t bitsPerSample,
64 off64_t offset, size_t size);
65
66 virtual status_t start(MetaData *params = NULL);
67 virtual status_t stop();
68 virtual sp<MetaData> getFormat();
69
70 virtual status_t read(
71 MediaBuffer **buffer, const ReadOptions *options = NULL);
72
73 protected:
74 virtual ~WAVSource();
75
76 private:
77 static const size_t kMaxFrameSize;
78
79 sp<DataSource> mDataSource;
80 sp<MetaData> mMeta;
81 uint16_t mWaveFormat;
82 int32_t mSampleRate;
83 int32_t mNumChannels;
84 int32_t mBitsPerSample;
85 off64_t mOffset;
86 size_t mSize;
87 bool mStarted;
88 MediaBufferGroup *mGroup;
89 off64_t mCurrentPos;
90
91 WAVSource(const WAVSource &);
92 WAVSource &operator=(const WAVSource &);
93 };
94
WAVExtractor(const sp<DataSource> & source)95 WAVExtractor::WAVExtractor(const sp<DataSource> &source)
96 : mDataSource(source),
97 mValidFormat(false),
98 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
99 mInitCheck = init();
100 }
101
~WAVExtractor()102 WAVExtractor::~WAVExtractor() {
103 }
104
getMetaData()105 sp<MetaData> WAVExtractor::getMetaData() {
106 sp<MetaData> meta = new MetaData;
107
108 if (mInitCheck != OK) {
109 return meta;
110 }
111
112 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
113
114 return meta;
115 }
116
countTracks()117 size_t WAVExtractor::countTracks() {
118 return mInitCheck == OK ? 1 : 0;
119 }
120
getTrack(size_t index)121 sp<IMediaSource> WAVExtractor::getTrack(size_t index) {
122 if (mInitCheck != OK || index > 0) {
123 return NULL;
124 }
125
126 return new WAVSource(
127 mDataSource, mTrackMeta,
128 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
129 }
130
getTrackMetaData(size_t index,uint32_t)131 sp<MetaData> WAVExtractor::getTrackMetaData(
132 size_t index, uint32_t /* flags */) {
133 if (mInitCheck != OK || index > 0) {
134 return NULL;
135 }
136
137 return mTrackMeta;
138 }
139
init()140 status_t WAVExtractor::init() {
141 uint8_t header[12];
142 if (mDataSource->readAt(
143 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
144 return NO_INIT;
145 }
146
147 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
148 return NO_INIT;
149 }
150
151 size_t totalSize = U32_LE_AT(&header[4]);
152
153 off64_t offset = 12;
154 size_t remainingSize = totalSize;
155 while (remainingSize >= 8) {
156 uint8_t chunkHeader[8];
157 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
158 return NO_INIT;
159 }
160
161 remainingSize -= 8;
162 offset += 8;
163
164 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
165
166 if (chunkSize > remainingSize) {
167 return NO_INIT;
168 }
169
170 if (!memcmp(chunkHeader, "fmt ", 4)) {
171 if (chunkSize < 16) {
172 return NO_INIT;
173 }
174
175 uint8_t formatSpec[40];
176 if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
177 return NO_INIT;
178 }
179
180 mWaveFormat = U16_LE_AT(formatSpec);
181 if (mWaveFormat != WAVE_FORMAT_PCM
182 && mWaveFormat != WAVE_FORMAT_IEEE_FLOAT
183 && mWaveFormat != WAVE_FORMAT_ALAW
184 && mWaveFormat != WAVE_FORMAT_MULAW
185 && mWaveFormat != WAVE_FORMAT_MSGSM
186 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
187 return ERROR_UNSUPPORTED;
188 }
189
190 uint8_t fmtSize = 16;
191 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
192 fmtSize = 40;
193 }
194 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
195 return NO_INIT;
196 }
197
198 mNumChannels = U16_LE_AT(&formatSpec[2]);
199
200 if (mNumChannels < 1 || mNumChannels > 8) {
201 ALOGE("Unsupported number of channels (%d)", mNumChannels);
202 return ERROR_UNSUPPORTED;
203 }
204
205 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
206 if (mNumChannels != 1 && mNumChannels != 2) {
207 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
208 mNumChannels);
209 }
210 }
211
212 mSampleRate = U32_LE_AT(&formatSpec[4]);
213
214 if (mSampleRate == 0) {
215 return ERROR_MALFORMED;
216 }
217
218 mBitsPerSample = U16_LE_AT(&formatSpec[14]);
219
220 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
221 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
222 if (validBitsPerSample != mBitsPerSample) {
223 if (validBitsPerSample != 0) {
224 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
225 validBitsPerSample, mBitsPerSample);
226 return ERROR_UNSUPPORTED;
227 } else {
228 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
229 // writers don't correctly set the valid bits value, and leave it at 0.
230 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
231 }
232 }
233
234 mChannelMask = U32_LE_AT(&formatSpec[20]);
235 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
236 if ((mChannelMask >> 18) != 0) {
237 ALOGE("invalid channel mask 0x%x", mChannelMask);
238 return ERROR_MALFORMED;
239 }
240
241 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
242 && (popcount(mChannelMask) != mNumChannels)) {
243 ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
244 popcount(mChannelMask), mChannelMask);
245 return ERROR_MALFORMED;
246 }
247
248 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
249 // the sample format, using the same definitions as a regular WAV header
250 mWaveFormat = U16_LE_AT(&formatSpec[24]);
251 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) {
252 ALOGE("unsupported GUID");
253 return ERROR_UNSUPPORTED;
254 }
255 }
256
257 if (mWaveFormat == WAVE_FORMAT_PCM) {
258 if (mBitsPerSample != 8 && mBitsPerSample != 16
259 && mBitsPerSample != 24 && mBitsPerSample != 32) {
260 return ERROR_UNSUPPORTED;
261 }
262 } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
263 if (mBitsPerSample != 32) { // TODO we don't support double
264 return ERROR_UNSUPPORTED;
265 }
266 }
267 else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
268 if (mBitsPerSample != 0) {
269 return ERROR_UNSUPPORTED;
270 }
271 } else if (mWaveFormat == WAVE_FORMAT_MULAW || mWaveFormat == WAVE_FORMAT_ALAW) {
272 if (mBitsPerSample != 8) {
273 return ERROR_UNSUPPORTED;
274 }
275 } else {
276 return ERROR_UNSUPPORTED;
277 }
278
279 mValidFormat = true;
280 } else if (!memcmp(chunkHeader, "data", 4)) {
281 if (mValidFormat) {
282 mDataOffset = offset;
283 mDataSize = chunkSize;
284
285 mTrackMeta = new MetaData;
286
287 switch (mWaveFormat) {
288 case WAVE_FORMAT_PCM:
289 case WAVE_FORMAT_IEEE_FLOAT:
290 mTrackMeta->setCString(
291 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
292 break;
293 case WAVE_FORMAT_ALAW:
294 mTrackMeta->setCString(
295 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
296 break;
297 case WAVE_FORMAT_MSGSM:
298 mTrackMeta->setCString(
299 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
300 break;
301 default:
302 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
303 mTrackMeta->setCString(
304 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
305 break;
306 }
307
308 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
309 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
310 mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
311 mTrackMeta->setInt32(kKeyPcmEncoding, kAudioEncodingPcm16bit);
312
313 int64_t durationUs = 0;
314 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
315 // 65 bytes decode to 320 8kHz samples
316 durationUs =
317 1000000LL * (mDataSize / 65 * 320) / 8000;
318 } else {
319 size_t bytesPerSample = mBitsPerSample >> 3;
320
321 if (!bytesPerSample || !mNumChannels)
322 return ERROR_MALFORMED;
323
324 size_t num_samples = mDataSize / (mNumChannels * bytesPerSample);
325
326 if (!mSampleRate)
327 return ERROR_MALFORMED;
328
329 durationUs =
330 1000000LL * num_samples / mSampleRate;
331 }
332
333 mTrackMeta->setInt64(kKeyDuration, durationUs);
334
335 return OK;
336 }
337 }
338
339 offset += chunkSize;
340 }
341
342 return NO_INIT;
343 }
344
345 const size_t WAVSource::kMaxFrameSize = 32768;
346
WAVSource(const sp<DataSource> & dataSource,const sp<MetaData> & meta,uint16_t waveFormat,int32_t bitsPerSample,off64_t offset,size_t size)347 WAVSource::WAVSource(
348 const sp<DataSource> &dataSource,
349 const sp<MetaData> &meta,
350 uint16_t waveFormat,
351 int32_t bitsPerSample,
352 off64_t offset, size_t size)
353 : mDataSource(dataSource),
354 mMeta(meta),
355 mWaveFormat(waveFormat),
356 mSampleRate(0),
357 mNumChannels(0),
358 mBitsPerSample(bitsPerSample),
359 mOffset(offset),
360 mSize(size),
361 mStarted(false),
362 mGroup(NULL) {
363 CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
364 CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
365
366 mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
367 }
368
~WAVSource()369 WAVSource::~WAVSource() {
370 if (mStarted) {
371 stop();
372 }
373 }
374
start(MetaData *)375 status_t WAVSource::start(MetaData * /* params */) {
376 ALOGV("WAVSource::start");
377
378 CHECK(!mStarted);
379
380 mGroup = new MediaBufferGroup;
381 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
382
383 if (mBitsPerSample == 8) {
384 // As a temporary buffer for 8->16 bit conversion.
385 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
386 }
387
388 mCurrentPos = mOffset;
389
390 mStarted = true;
391
392 return OK;
393 }
394
stop()395 status_t WAVSource::stop() {
396 ALOGV("WAVSource::stop");
397
398 CHECK(mStarted);
399
400 delete mGroup;
401 mGroup = NULL;
402
403 mStarted = false;
404
405 return OK;
406 }
407
getFormat()408 sp<MetaData> WAVSource::getFormat() {
409 ALOGV("WAVSource::getFormat");
410
411 return mMeta;
412 }
413
read(MediaBuffer ** out,const ReadOptions * options)414 status_t WAVSource::read(
415 MediaBuffer **out, const ReadOptions *options) {
416 *out = NULL;
417
418 int64_t seekTimeUs;
419 ReadOptions::SeekMode mode;
420 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
421 int64_t pos = 0;
422
423 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
424 // 65 bytes decode to 320 8kHz samples
425 int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
426 int64_t framenumber = samplenumber / 320;
427 pos = framenumber * 65;
428 } else {
429 pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
430 }
431 if (pos > (off64_t)mSize) {
432 pos = mSize;
433 }
434 mCurrentPos = pos + mOffset;
435 }
436
437 MediaBuffer *buffer;
438 status_t err = mGroup->acquire_buffer(&buffer);
439 if (err != OK) {
440 return err;
441 }
442
443 // make sure that maxBytesToRead is multiple of 3, in 24-bit case
444 size_t maxBytesToRead =
445 mBitsPerSample == 8 ? kMaxFrameSize / 2 :
446 (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
447
448 size_t maxBytesAvailable =
449 (mCurrentPos - mOffset >= (off64_t)mSize)
450 ? 0 : mSize - (mCurrentPos - mOffset);
451
452 if (maxBytesToRead > maxBytesAvailable) {
453 maxBytesToRead = maxBytesAvailable;
454 }
455
456 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
457 // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
458 // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
459 if (maxBytesToRead > 1024) {
460 maxBytesToRead = 1024;
461 }
462 maxBytesToRead = (maxBytesToRead / 65) * 65;
463 } else {
464 // read only integral amounts of audio unit frames.
465 const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
466 maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
467 }
468
469 ssize_t n = mDataSource->readAt(
470 mCurrentPos, buffer->data(),
471 maxBytesToRead);
472
473 if (n <= 0) {
474 buffer->release();
475 buffer = NULL;
476
477 return ERROR_END_OF_STREAM;
478 }
479
480 buffer->set_range(0, n);
481
482 // TODO: add capability to return data as float PCM instead of 16 bit PCM.
483 if (mWaveFormat == WAVE_FORMAT_PCM) {
484 if (mBitsPerSample == 8) {
485 // Convert 8-bit unsigned samples to 16-bit signed.
486
487 // Create new buffer with 2 byte wide samples
488 MediaBuffer *tmp;
489 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
490 tmp->set_range(0, 2 * n);
491
492 memcpy_to_i16_from_u8((int16_t *)tmp->data(), (const uint8_t *)buffer->data(), n);
493 buffer->release();
494 buffer = tmp;
495 } else if (mBitsPerSample == 24) {
496 // Convert 24-bit signed samples to 16-bit signed in place
497 const size_t numSamples = n / 3;
498
499 memcpy_to_i16_from_p24((int16_t *)buffer->data(), (const uint8_t *)buffer->data(), numSamples);
500 buffer->set_range(0, 2 * numSamples);
501 } else if (mBitsPerSample == 32) {
502 // Convert 32-bit signed samples to 16-bit signed in place
503 const size_t numSamples = n / 4;
504
505 memcpy_to_i16_from_i32((int16_t *)buffer->data(), (const int32_t *)buffer->data(), numSamples);
506 buffer->set_range(0, 2 * numSamples);
507 }
508 } else if (mWaveFormat == WAVE_FORMAT_IEEE_FLOAT) {
509 if (mBitsPerSample == 32) {
510 // Convert 32-bit float samples to 16-bit signed in place
511 const size_t numSamples = n / 4;
512
513 memcpy_to_i16_from_float((int16_t *)buffer->data(), (const float *)buffer->data(), numSamples);
514 buffer->set_range(0, 2 * numSamples);
515 }
516 }
517
518 int64_t timeStampUs = 0;
519
520 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
521 timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
522 } else {
523 size_t bytesPerSample = mBitsPerSample >> 3;
524 timeStampUs = 1000000LL * (mCurrentPos - mOffset)
525 / (mNumChannels * bytesPerSample) / mSampleRate;
526 }
527
528 buffer->meta_data()->setInt64(kKeyTime, timeStampUs);
529
530 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
531 mCurrentPos += n;
532
533 *out = buffer;
534
535 return OK;
536 }
537
538 ////////////////////////////////////////////////////////////////////////////////
539
SniffWAV(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> *)540 bool SniffWAV(
541 const sp<DataSource> &source, String8 *mimeType, float *confidence,
542 sp<AMessage> *) {
543 char header[12];
544 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
545 return false;
546 }
547
548 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
549 return false;
550 }
551
552 sp<MediaExtractor> extractor = new WAVExtractor(source);
553 if (extractor->countTracks() == 0) {
554 return false;
555 }
556
557 *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
558 *confidence = 0.3f;
559
560 return true;
561 }
562
563 } // namespace android
564