1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "WAVExtractor"
19 #include <utils/Log.h>
20
21 #include "include/WAVExtractor.h"
22
23 #include <media/stagefright/foundation/ADebug.h>
24 #include <media/stagefright/DataSource.h>
25 #include <media/stagefright/MediaBufferGroup.h>
26 #include <media/stagefright/MediaDefs.h>
27 #include <media/stagefright/MediaErrors.h>
28 #include <media/stagefright/MediaSource.h>
29 #include <media/stagefright/MetaData.h>
30 #include <utils/String8.h>
31 #include <cutils/bitops.h>
32
33 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
34
35 namespace android {
36
37 enum {
38 WAVE_FORMAT_PCM = 0x0001,
39 WAVE_FORMAT_ALAW = 0x0006,
40 WAVE_FORMAT_MULAW = 0x0007,
41 WAVE_FORMAT_MSGSM = 0x0031,
42 WAVE_FORMAT_EXTENSIBLE = 0xFFFE
43 };
44
45 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
46
47
U32_LE_AT(const uint8_t * ptr)48 static uint32_t U32_LE_AT(const uint8_t *ptr) {
49 return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
50 }
51
U16_LE_AT(const uint8_t * ptr)52 static uint16_t U16_LE_AT(const uint8_t *ptr) {
53 return ptr[1] << 8 | ptr[0];
54 }
55
56 struct WAVSource : public MediaSource {
57 WAVSource(
58 const sp<DataSource> &dataSource,
59 const sp<MetaData> &meta,
60 uint16_t waveFormat,
61 int32_t bitsPerSample,
62 off64_t offset, size_t size);
63
64 virtual status_t start(MetaData *params = NULL);
65 virtual status_t stop();
66 virtual sp<MetaData> getFormat();
67
68 virtual status_t read(
69 MediaBuffer **buffer, const ReadOptions *options = NULL);
70
71 protected:
72 virtual ~WAVSource();
73
74 private:
75 static const size_t kMaxFrameSize;
76
77 sp<DataSource> mDataSource;
78 sp<MetaData> mMeta;
79 uint16_t mWaveFormat;
80 int32_t mSampleRate;
81 int32_t mNumChannels;
82 int32_t mBitsPerSample;
83 off64_t mOffset;
84 size_t mSize;
85 bool mStarted;
86 MediaBufferGroup *mGroup;
87 off64_t mCurrentPos;
88
89 WAVSource(const WAVSource &);
90 WAVSource &operator=(const WAVSource &);
91 };
92
WAVExtractor(const sp<DataSource> & source)93 WAVExtractor::WAVExtractor(const sp<DataSource> &source)
94 : mDataSource(source),
95 mValidFormat(false),
96 mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
97 mInitCheck = init();
98 }
99
~WAVExtractor()100 WAVExtractor::~WAVExtractor() {
101 }
102
getMetaData()103 sp<MetaData> WAVExtractor::getMetaData() {
104 sp<MetaData> meta = new MetaData;
105
106 if (mInitCheck != OK) {
107 return meta;
108 }
109
110 meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
111
112 return meta;
113 }
114
countTracks()115 size_t WAVExtractor::countTracks() {
116 return mInitCheck == OK ? 1 : 0;
117 }
118
getTrack(size_t index)119 sp<MediaSource> WAVExtractor::getTrack(size_t index) {
120 if (mInitCheck != OK || index > 0) {
121 return NULL;
122 }
123
124 return new WAVSource(
125 mDataSource, mTrackMeta,
126 mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
127 }
128
getTrackMetaData(size_t index,uint32_t)129 sp<MetaData> WAVExtractor::getTrackMetaData(
130 size_t index, uint32_t /* flags */) {
131 if (mInitCheck != OK || index > 0) {
132 return NULL;
133 }
134
135 return mTrackMeta;
136 }
137
init()138 status_t WAVExtractor::init() {
139 uint8_t header[12];
140 if (mDataSource->readAt(
141 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
142 return NO_INIT;
143 }
144
145 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
146 return NO_INIT;
147 }
148
149 size_t totalSize = U32_LE_AT(&header[4]);
150
151 off64_t offset = 12;
152 size_t remainingSize = totalSize;
153 while (remainingSize >= 8) {
154 uint8_t chunkHeader[8];
155 if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
156 return NO_INIT;
157 }
158
159 remainingSize -= 8;
160 offset += 8;
161
162 uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
163
164 if (chunkSize > remainingSize) {
165 return NO_INIT;
166 }
167
168 if (!memcmp(chunkHeader, "fmt ", 4)) {
169 if (chunkSize < 16) {
170 return NO_INIT;
171 }
172
173 uint8_t formatSpec[40];
174 if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
175 return NO_INIT;
176 }
177
178 mWaveFormat = U16_LE_AT(formatSpec);
179 if (mWaveFormat != WAVE_FORMAT_PCM
180 && mWaveFormat != WAVE_FORMAT_ALAW
181 && mWaveFormat != WAVE_FORMAT_MULAW
182 && mWaveFormat != WAVE_FORMAT_MSGSM
183 && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
184 return ERROR_UNSUPPORTED;
185 }
186
187 uint8_t fmtSize = 16;
188 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
189 fmtSize = 40;
190 }
191 if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
192 return NO_INIT;
193 }
194
195 mNumChannels = U16_LE_AT(&formatSpec[2]);
196 if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
197 if (mNumChannels != 1 && mNumChannels != 2) {
198 ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
199 mNumChannels);
200 }
201 } else {
202 if (mNumChannels < 1 && mNumChannels > 8) {
203 return ERROR_UNSUPPORTED;
204 }
205 }
206
207 mSampleRate = U32_LE_AT(&formatSpec[4]);
208
209 if (mSampleRate == 0) {
210 return ERROR_MALFORMED;
211 }
212
213 mBitsPerSample = U16_LE_AT(&formatSpec[14]);
214
215 if (mWaveFormat == WAVE_FORMAT_PCM
216 || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
217 if (mBitsPerSample != 8 && mBitsPerSample != 16
218 && mBitsPerSample != 24) {
219 return ERROR_UNSUPPORTED;
220 }
221 } else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
222 if (mBitsPerSample != 0) {
223 return ERROR_UNSUPPORTED;
224 }
225 } else {
226 CHECK(mWaveFormat == WAVE_FORMAT_MULAW
227 || mWaveFormat == WAVE_FORMAT_ALAW);
228 if (mBitsPerSample != 8) {
229 return ERROR_UNSUPPORTED;
230 }
231 }
232
233 if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
234 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
235 if (validBitsPerSample != mBitsPerSample) {
236 if (validBitsPerSample != 0) {
237 ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
238 validBitsPerSample, mBitsPerSample);
239 return ERROR_UNSUPPORTED;
240 } else {
241 // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
242 // writers don't correctly set the valid bits value, and leave it at 0.
243 ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
244 }
245 }
246
247 mChannelMask = U32_LE_AT(&formatSpec[20]);
248 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
249 if ((mChannelMask >> 18) != 0) {
250 ALOGE("invalid channel mask 0x%x", mChannelMask);
251 return ERROR_MALFORMED;
252 }
253
254 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
255 && (popcount(mChannelMask) != mNumChannels)) {
256 ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
257 popcount(mChannelMask), mChannelMask);
258 return ERROR_MALFORMED;
259 }
260
261 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
262 // the sample format, using the same definitions as a regular WAV header
263 mWaveFormat = U16_LE_AT(&formatSpec[24]);
264 if (mWaveFormat != WAVE_FORMAT_PCM
265 && mWaveFormat != WAVE_FORMAT_ALAW
266 && mWaveFormat != WAVE_FORMAT_MULAW) {
267 return ERROR_UNSUPPORTED;
268 }
269 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) {
270 ALOGE("unsupported GUID");
271 return ERROR_UNSUPPORTED;
272 }
273 }
274
275 mValidFormat = true;
276 } else if (!memcmp(chunkHeader, "data", 4)) {
277 if (mValidFormat) {
278 mDataOffset = offset;
279 mDataSize = chunkSize;
280
281 mTrackMeta = new MetaData;
282
283 switch (mWaveFormat) {
284 case WAVE_FORMAT_PCM:
285 mTrackMeta->setCString(
286 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
287 break;
288 case WAVE_FORMAT_ALAW:
289 mTrackMeta->setCString(
290 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
291 break;
292 case WAVE_FORMAT_MSGSM:
293 mTrackMeta->setCString(
294 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
295 break;
296 default:
297 CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
298 mTrackMeta->setCString(
299 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
300 break;
301 }
302
303 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
304 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
305 mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
306
307 int64_t durationUs = 0;
308 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
309 // 65 bytes decode to 320 8kHz samples
310 durationUs =
311 1000000LL * (mDataSize / 65 * 320) / 8000;
312 } else {
313 size_t bytesPerSample = mBitsPerSample >> 3;
314 durationUs =
315 1000000LL * (mDataSize / (mNumChannels * bytesPerSample))
316 / mSampleRate;
317 }
318
319 mTrackMeta->setInt64(kKeyDuration, durationUs);
320
321 return OK;
322 }
323 }
324
325 offset += chunkSize;
326 }
327
328 return NO_INIT;
329 }
330
331 const size_t WAVSource::kMaxFrameSize = 32768;
332
WAVSource(const sp<DataSource> & dataSource,const sp<MetaData> & meta,uint16_t waveFormat,int32_t bitsPerSample,off64_t offset,size_t size)333 WAVSource::WAVSource(
334 const sp<DataSource> &dataSource,
335 const sp<MetaData> &meta,
336 uint16_t waveFormat,
337 int32_t bitsPerSample,
338 off64_t offset, size_t size)
339 : mDataSource(dataSource),
340 mMeta(meta),
341 mWaveFormat(waveFormat),
342 mSampleRate(0),
343 mNumChannels(0),
344 mBitsPerSample(bitsPerSample),
345 mOffset(offset),
346 mSize(size),
347 mStarted(false),
348 mGroup(NULL) {
349 CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
350 CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
351
352 mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
353 }
354
~WAVSource()355 WAVSource::~WAVSource() {
356 if (mStarted) {
357 stop();
358 }
359 }
360
start(MetaData *)361 status_t WAVSource::start(MetaData * /* params */) {
362 ALOGV("WAVSource::start");
363
364 CHECK(!mStarted);
365
366 mGroup = new MediaBufferGroup;
367 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
368
369 if (mBitsPerSample == 8) {
370 // As a temporary buffer for 8->16 bit conversion.
371 mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
372 }
373
374 mCurrentPos = mOffset;
375
376 mStarted = true;
377
378 return OK;
379 }
380
stop()381 status_t WAVSource::stop() {
382 ALOGV("WAVSource::stop");
383
384 CHECK(mStarted);
385
386 delete mGroup;
387 mGroup = NULL;
388
389 mStarted = false;
390
391 return OK;
392 }
393
getFormat()394 sp<MetaData> WAVSource::getFormat() {
395 ALOGV("WAVSource::getFormat");
396
397 return mMeta;
398 }
399
read(MediaBuffer ** out,const ReadOptions * options)400 status_t WAVSource::read(
401 MediaBuffer **out, const ReadOptions *options) {
402 *out = NULL;
403
404 int64_t seekTimeUs;
405 ReadOptions::SeekMode mode;
406 if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
407 int64_t pos = 0;
408
409 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
410 // 65 bytes decode to 320 8kHz samples
411 int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
412 int64_t framenumber = samplenumber / 320;
413 pos = framenumber * 65;
414 } else {
415 pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
416 }
417 if (pos > (off64_t)mSize) {
418 pos = mSize;
419 }
420 mCurrentPos = pos + mOffset;
421 }
422
423 MediaBuffer *buffer;
424 status_t err = mGroup->acquire_buffer(&buffer);
425 if (err != OK) {
426 return err;
427 }
428
429 // make sure that maxBytesToRead is multiple of 3, in 24-bit case
430 size_t maxBytesToRead =
431 mBitsPerSample == 8 ? kMaxFrameSize / 2 :
432 (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
433
434 size_t maxBytesAvailable =
435 (mCurrentPos - mOffset >= (off64_t)mSize)
436 ? 0 : mSize - (mCurrentPos - mOffset);
437
438 if (maxBytesToRead > maxBytesAvailable) {
439 maxBytesToRead = maxBytesAvailable;
440 }
441
442 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
443 // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
444 // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
445 if (maxBytesToRead > 1024) {
446 maxBytesToRead = 1024;
447 }
448 maxBytesToRead = (maxBytesToRead / 65) * 65;
449 } else {
450 // read only integral amounts of audio unit frames.
451 const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
452 maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
453 }
454
455 ssize_t n = mDataSource->readAt(
456 mCurrentPos, buffer->data(),
457 maxBytesToRead);
458
459 if (n <= 0) {
460 buffer->release();
461 buffer = NULL;
462
463 return ERROR_END_OF_STREAM;
464 }
465
466 buffer->set_range(0, n);
467
468 if (mWaveFormat == WAVE_FORMAT_PCM || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
469 if (mBitsPerSample == 8) {
470 // Convert 8-bit unsigned samples to 16-bit signed.
471
472 MediaBuffer *tmp;
473 CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
474
475 // The new buffer holds the sample number of samples, but each
476 // one is 2 bytes wide.
477 tmp->set_range(0, 2 * n);
478
479 int16_t *dst = (int16_t *)tmp->data();
480 const uint8_t *src = (const uint8_t *)buffer->data();
481 ssize_t numBytes = n;
482
483 while (numBytes-- > 0) {
484 *dst++ = ((int16_t)(*src) - 128) * 256;
485 ++src;
486 }
487
488 buffer->release();
489 buffer = tmp;
490 } else if (mBitsPerSample == 24) {
491 // Convert 24-bit signed samples to 16-bit signed.
492
493 const uint8_t *src =
494 (const uint8_t *)buffer->data() + buffer->range_offset();
495 int16_t *dst = (int16_t *)src;
496
497 size_t numSamples = buffer->range_length() / 3;
498 for (size_t i = 0; i < numSamples; ++i) {
499 int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16);
500 x = (x << 8) >> 8; // sign extension
501
502 x = x >> 8;
503 *dst++ = (int16_t)x;
504 src += 3;
505 }
506
507 buffer->set_range(buffer->range_offset(), 2 * numSamples);
508 }
509 }
510
511 int64_t timeStampUs = 0;
512
513 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
514 timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
515 } else {
516 size_t bytesPerSample = mBitsPerSample >> 3;
517 timeStampUs = 1000000LL * (mCurrentPos - mOffset)
518 / (mNumChannels * bytesPerSample) / mSampleRate;
519 }
520
521 buffer->meta_data()->setInt64(kKeyTime, timeStampUs);
522
523 buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
524 mCurrentPos += n;
525
526 *out = buffer;
527
528 return OK;
529 }
530
531 ////////////////////////////////////////////////////////////////////////////////
532
SniffWAV(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> *)533 bool SniffWAV(
534 const sp<DataSource> &source, String8 *mimeType, float *confidence,
535 sp<AMessage> *) {
536 char header[12];
537 if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
538 return false;
539 }
540
541 if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
542 return false;
543 }
544
545 sp<MediaExtractor> extractor = new WAVExtractor(source);
546 if (extractor->countTracks() == 0) {
547 return false;
548 }
549
550 *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
551 *confidence = 0.3f;
552
553 return true;
554 }
555
556 } // namespace android
557