• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "WAVExtractor"
19 #include <utils/Log.h>
20 
21 #include "include/WAVExtractor.h"
22 
23 #include <media/stagefright/foundation/ADebug.h>
24 #include <media/stagefright/DataSource.h>
25 #include <media/stagefright/MediaBufferGroup.h>
26 #include <media/stagefright/MediaDefs.h>
27 #include <media/stagefright/MediaErrors.h>
28 #include <media/stagefright/MediaSource.h>
29 #include <media/stagefright/MetaData.h>
30 #include <utils/String8.h>
31 #include <cutils/bitops.h>
32 
33 #define CHANNEL_MASK_USE_CHANNEL_ORDER 0
34 
35 namespace android {
36 
37 enum {
38     WAVE_FORMAT_PCM        = 0x0001,
39     WAVE_FORMAT_ALAW       = 0x0006,
40     WAVE_FORMAT_MULAW      = 0x0007,
41     WAVE_FORMAT_MSGSM      = 0x0031,
42     WAVE_FORMAT_EXTENSIBLE = 0xFFFE
43 };
44 
45 static const char* WAVEEXT_SUBFORMAT = "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71";
46 
47 
U32_LE_AT(const uint8_t * ptr)48 static uint32_t U32_LE_AT(const uint8_t *ptr) {
49     return ptr[3] << 24 | ptr[2] << 16 | ptr[1] << 8 | ptr[0];
50 }
51 
U16_LE_AT(const uint8_t * ptr)52 static uint16_t U16_LE_AT(const uint8_t *ptr) {
53     return ptr[1] << 8 | ptr[0];
54 }
55 
56 struct WAVSource : public MediaSource {
57     WAVSource(
58             const sp<DataSource> &dataSource,
59             const sp<MetaData> &meta,
60             uint16_t waveFormat,
61             int32_t bitsPerSample,
62             off64_t offset, size_t size);
63 
64     virtual status_t start(MetaData *params = NULL);
65     virtual status_t stop();
66     virtual sp<MetaData> getFormat();
67 
68     virtual status_t read(
69             MediaBuffer **buffer, const ReadOptions *options = NULL);
70 
71 protected:
72     virtual ~WAVSource();
73 
74 private:
75     static const size_t kMaxFrameSize;
76 
77     sp<DataSource> mDataSource;
78     sp<MetaData> mMeta;
79     uint16_t mWaveFormat;
80     int32_t mSampleRate;
81     int32_t mNumChannels;
82     int32_t mBitsPerSample;
83     off64_t mOffset;
84     size_t mSize;
85     bool mStarted;
86     MediaBufferGroup *mGroup;
87     off64_t mCurrentPos;
88 
89     WAVSource(const WAVSource &);
90     WAVSource &operator=(const WAVSource &);
91 };
92 
WAVExtractor(const sp<DataSource> & source)93 WAVExtractor::WAVExtractor(const sp<DataSource> &source)
94     : mDataSource(source),
95       mValidFormat(false),
96       mChannelMask(CHANNEL_MASK_USE_CHANNEL_ORDER) {
97     mInitCheck = init();
98 }
99 
~WAVExtractor()100 WAVExtractor::~WAVExtractor() {
101 }
102 
getMetaData()103 sp<MetaData> WAVExtractor::getMetaData() {
104     sp<MetaData> meta = new MetaData;
105 
106     if (mInitCheck != OK) {
107         return meta;
108     }
109 
110     meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_WAV);
111 
112     return meta;
113 }
114 
countTracks()115 size_t WAVExtractor::countTracks() {
116     return mInitCheck == OK ? 1 : 0;
117 }
118 
getTrack(size_t index)119 sp<MediaSource> WAVExtractor::getTrack(size_t index) {
120     if (mInitCheck != OK || index > 0) {
121         return NULL;
122     }
123 
124     return new WAVSource(
125             mDataSource, mTrackMeta,
126             mWaveFormat, mBitsPerSample, mDataOffset, mDataSize);
127 }
128 
getTrackMetaData(size_t index,uint32_t)129 sp<MetaData> WAVExtractor::getTrackMetaData(
130         size_t index, uint32_t /* flags */) {
131     if (mInitCheck != OK || index > 0) {
132         return NULL;
133     }
134 
135     return mTrackMeta;
136 }
137 
init()138 status_t WAVExtractor::init() {
139     uint8_t header[12];
140     if (mDataSource->readAt(
141                 0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
142         return NO_INIT;
143     }
144 
145     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
146         return NO_INIT;
147     }
148 
149     size_t totalSize = U32_LE_AT(&header[4]);
150 
151     off64_t offset = 12;
152     size_t remainingSize = totalSize;
153     while (remainingSize >= 8) {
154         uint8_t chunkHeader[8];
155         if (mDataSource->readAt(offset, chunkHeader, 8) < 8) {
156             return NO_INIT;
157         }
158 
159         remainingSize -= 8;
160         offset += 8;
161 
162         uint32_t chunkSize = U32_LE_AT(&chunkHeader[4]);
163 
164         if (chunkSize > remainingSize) {
165             return NO_INIT;
166         }
167 
168         if (!memcmp(chunkHeader, "fmt ", 4)) {
169             if (chunkSize < 16) {
170                 return NO_INIT;
171             }
172 
173             uint8_t formatSpec[40];
174             if (mDataSource->readAt(offset, formatSpec, 2) < 2) {
175                 return NO_INIT;
176             }
177 
178             mWaveFormat = U16_LE_AT(formatSpec);
179             if (mWaveFormat != WAVE_FORMAT_PCM
180                     && mWaveFormat != WAVE_FORMAT_ALAW
181                     && mWaveFormat != WAVE_FORMAT_MULAW
182                     && mWaveFormat != WAVE_FORMAT_MSGSM
183                     && mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
184                 return ERROR_UNSUPPORTED;
185             }
186 
187             uint8_t fmtSize = 16;
188             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
189                 fmtSize = 40;
190             }
191             if (mDataSource->readAt(offset, formatSpec, fmtSize) < fmtSize) {
192                 return NO_INIT;
193             }
194 
195             mNumChannels = U16_LE_AT(&formatSpec[2]);
196             if (mWaveFormat != WAVE_FORMAT_EXTENSIBLE) {
197                 if (mNumChannels != 1 && mNumChannels != 2) {
198                     ALOGW("More than 2 channels (%d) in non-WAVE_EXT, unknown channel mask",
199                             mNumChannels);
200                 }
201             } else {
202                 if (mNumChannels < 1 && mNumChannels > 8) {
203                     return ERROR_UNSUPPORTED;
204                 }
205             }
206 
207             mSampleRate = U32_LE_AT(&formatSpec[4]);
208 
209             if (mSampleRate == 0) {
210                 return ERROR_MALFORMED;
211             }
212 
213             mBitsPerSample = U16_LE_AT(&formatSpec[14]);
214 
215             if (mWaveFormat == WAVE_FORMAT_PCM
216                     || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
217                 if (mBitsPerSample != 8 && mBitsPerSample != 16
218                     && mBitsPerSample != 24) {
219                     return ERROR_UNSUPPORTED;
220                 }
221             } else if (mWaveFormat == WAVE_FORMAT_MSGSM) {
222                 if (mBitsPerSample != 0) {
223                     return ERROR_UNSUPPORTED;
224                 }
225             } else {
226                 CHECK(mWaveFormat == WAVE_FORMAT_MULAW
227                         || mWaveFormat == WAVE_FORMAT_ALAW);
228                 if (mBitsPerSample != 8) {
229                     return ERROR_UNSUPPORTED;
230                 }
231             }
232 
233             if (mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
234                 uint16_t validBitsPerSample = U16_LE_AT(&formatSpec[18]);
235                 if (validBitsPerSample != mBitsPerSample) {
236                     if (validBitsPerSample != 0) {
237                         ALOGE("validBits(%d) != bitsPerSample(%d) are not supported",
238                                 validBitsPerSample, mBitsPerSample);
239                         return ERROR_UNSUPPORTED;
240                     } else {
241                         // we only support valitBitsPerSample == bitsPerSample but some WAV_EXT
242                         // writers don't correctly set the valid bits value, and leave it at 0.
243                         ALOGW("WAVE_EXT has 0 valid bits per sample, ignoring");
244                     }
245                 }
246 
247                 mChannelMask = U32_LE_AT(&formatSpec[20]);
248                 ALOGV("numChannels=%d channelMask=0x%x", mNumChannels, mChannelMask);
249                 if ((mChannelMask >> 18) != 0) {
250                     ALOGE("invalid channel mask 0x%x", mChannelMask);
251                     return ERROR_MALFORMED;
252                 }
253 
254                 if ((mChannelMask != CHANNEL_MASK_USE_CHANNEL_ORDER)
255                         && (popcount(mChannelMask) != mNumChannels)) {
256                     ALOGE("invalid number of channels (%d) in channel mask (0x%x)",
257                             popcount(mChannelMask), mChannelMask);
258                     return ERROR_MALFORMED;
259                 }
260 
261                 // In a WAVE_EXT header, the first two bytes of the GUID stored at byte 24 contain
262                 // the sample format, using the same definitions as a regular WAV header
263                 mWaveFormat = U16_LE_AT(&formatSpec[24]);
264                 if (mWaveFormat != WAVE_FORMAT_PCM
265                         && mWaveFormat != WAVE_FORMAT_ALAW
266                         && mWaveFormat != WAVE_FORMAT_MULAW) {
267                     return ERROR_UNSUPPORTED;
268                 }
269                 if (memcmp(&formatSpec[26], WAVEEXT_SUBFORMAT, 14)) {
270                     ALOGE("unsupported GUID");
271                     return ERROR_UNSUPPORTED;
272                 }
273             }
274 
275             mValidFormat = true;
276         } else if (!memcmp(chunkHeader, "data", 4)) {
277             if (mValidFormat) {
278                 mDataOffset = offset;
279                 mDataSize = chunkSize;
280 
281                 mTrackMeta = new MetaData;
282 
283                 switch (mWaveFormat) {
284                     case WAVE_FORMAT_PCM:
285                         mTrackMeta->setCString(
286                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_RAW);
287                         break;
288                     case WAVE_FORMAT_ALAW:
289                         mTrackMeta->setCString(
290                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_ALAW);
291                         break;
292                     case WAVE_FORMAT_MSGSM:
293                         mTrackMeta->setCString(
294                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MSGSM);
295                         break;
296                     default:
297                         CHECK_EQ(mWaveFormat, (uint16_t)WAVE_FORMAT_MULAW);
298                         mTrackMeta->setCString(
299                                 kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_G711_MLAW);
300                         break;
301                 }
302 
303                 mTrackMeta->setInt32(kKeyChannelCount, mNumChannels);
304                 mTrackMeta->setInt32(kKeyChannelMask, mChannelMask);
305                 mTrackMeta->setInt32(kKeySampleRate, mSampleRate);
306 
307                 int64_t durationUs = 0;
308                 if (mWaveFormat == WAVE_FORMAT_MSGSM) {
309                     // 65 bytes decode to 320 8kHz samples
310                     durationUs =
311                         1000000LL * (mDataSize / 65 * 320) / 8000;
312                 } else {
313                     size_t bytesPerSample = mBitsPerSample >> 3;
314                     durationUs =
315                         1000000LL * (mDataSize / (mNumChannels * bytesPerSample))
316                             / mSampleRate;
317                 }
318 
319                 mTrackMeta->setInt64(kKeyDuration, durationUs);
320 
321                 return OK;
322             }
323         }
324 
325         offset += chunkSize;
326     }
327 
328     return NO_INIT;
329 }
330 
331 const size_t WAVSource::kMaxFrameSize = 32768;
332 
WAVSource(const sp<DataSource> & dataSource,const sp<MetaData> & meta,uint16_t waveFormat,int32_t bitsPerSample,off64_t offset,size_t size)333 WAVSource::WAVSource(
334         const sp<DataSource> &dataSource,
335         const sp<MetaData> &meta,
336         uint16_t waveFormat,
337         int32_t bitsPerSample,
338         off64_t offset, size_t size)
339     : mDataSource(dataSource),
340       mMeta(meta),
341       mWaveFormat(waveFormat),
342       mSampleRate(0),
343       mNumChannels(0),
344       mBitsPerSample(bitsPerSample),
345       mOffset(offset),
346       mSize(size),
347       mStarted(false),
348       mGroup(NULL) {
349     CHECK(mMeta->findInt32(kKeySampleRate, &mSampleRate));
350     CHECK(mMeta->findInt32(kKeyChannelCount, &mNumChannels));
351 
352     mMeta->setInt32(kKeyMaxInputSize, kMaxFrameSize);
353 }
354 
~WAVSource()355 WAVSource::~WAVSource() {
356     if (mStarted) {
357         stop();
358     }
359 }
360 
start(MetaData *)361 status_t WAVSource::start(MetaData * /* params */) {
362     ALOGV("WAVSource::start");
363 
364     CHECK(!mStarted);
365 
366     mGroup = new MediaBufferGroup;
367     mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
368 
369     if (mBitsPerSample == 8) {
370         // As a temporary buffer for 8->16 bit conversion.
371         mGroup->add_buffer(new MediaBuffer(kMaxFrameSize));
372     }
373 
374     mCurrentPos = mOffset;
375 
376     mStarted = true;
377 
378     return OK;
379 }
380 
stop()381 status_t WAVSource::stop() {
382     ALOGV("WAVSource::stop");
383 
384     CHECK(mStarted);
385 
386     delete mGroup;
387     mGroup = NULL;
388 
389     mStarted = false;
390 
391     return OK;
392 }
393 
getFormat()394 sp<MetaData> WAVSource::getFormat() {
395     ALOGV("WAVSource::getFormat");
396 
397     return mMeta;
398 }
399 
read(MediaBuffer ** out,const ReadOptions * options)400 status_t WAVSource::read(
401         MediaBuffer **out, const ReadOptions *options) {
402     *out = NULL;
403 
404     int64_t seekTimeUs;
405     ReadOptions::SeekMode mode;
406     if (options != NULL && options->getSeekTo(&seekTimeUs, &mode)) {
407         int64_t pos = 0;
408 
409         if (mWaveFormat == WAVE_FORMAT_MSGSM) {
410             // 65 bytes decode to 320 8kHz samples
411             int64_t samplenumber = (seekTimeUs * mSampleRate) / 1000000;
412             int64_t framenumber = samplenumber / 320;
413             pos = framenumber * 65;
414         } else {
415             pos = (seekTimeUs * mSampleRate) / 1000000 * mNumChannels * (mBitsPerSample >> 3);
416         }
417         if (pos > (off64_t)mSize) {
418             pos = mSize;
419         }
420         mCurrentPos = pos + mOffset;
421     }
422 
423     MediaBuffer *buffer;
424     status_t err = mGroup->acquire_buffer(&buffer);
425     if (err != OK) {
426         return err;
427     }
428 
429     // make sure that maxBytesToRead is multiple of 3, in 24-bit case
430     size_t maxBytesToRead =
431         mBitsPerSample == 8 ? kMaxFrameSize / 2 :
432         (mBitsPerSample == 24 ? 3*(kMaxFrameSize/3): kMaxFrameSize);
433 
434     size_t maxBytesAvailable =
435         (mCurrentPos - mOffset >= (off64_t)mSize)
436             ? 0 : mSize - (mCurrentPos - mOffset);
437 
438     if (maxBytesToRead > maxBytesAvailable) {
439         maxBytesToRead = maxBytesAvailable;
440     }
441 
442     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
443         // Microsoft packs 2 frames into 65 bytes, rather than using separate 33-byte frames,
444         // so read multiples of 65, and use smaller buffers to account for ~10:1 expansion ratio
445         if (maxBytesToRead > 1024) {
446             maxBytesToRead = 1024;
447         }
448         maxBytesToRead = (maxBytesToRead / 65) * 65;
449     } else {
450         // read only integral amounts of audio unit frames.
451         const size_t inputUnitFrameSize = mNumChannels * mBitsPerSample / 8;
452         maxBytesToRead -= maxBytesToRead % inputUnitFrameSize;
453     }
454 
455     ssize_t n = mDataSource->readAt(
456             mCurrentPos, buffer->data(),
457             maxBytesToRead);
458 
459     if (n <= 0) {
460         buffer->release();
461         buffer = NULL;
462 
463         return ERROR_END_OF_STREAM;
464     }
465 
466     buffer->set_range(0, n);
467 
468     if (mWaveFormat == WAVE_FORMAT_PCM || mWaveFormat == WAVE_FORMAT_EXTENSIBLE) {
469         if (mBitsPerSample == 8) {
470             // Convert 8-bit unsigned samples to 16-bit signed.
471 
472             MediaBuffer *tmp;
473             CHECK_EQ(mGroup->acquire_buffer(&tmp), (status_t)OK);
474 
475             // The new buffer holds the sample number of samples, but each
476             // one is 2 bytes wide.
477             tmp->set_range(0, 2 * n);
478 
479             int16_t *dst = (int16_t *)tmp->data();
480             const uint8_t *src = (const uint8_t *)buffer->data();
481             ssize_t numBytes = n;
482 
483             while (numBytes-- > 0) {
484                 *dst++ = ((int16_t)(*src) - 128) * 256;
485                 ++src;
486             }
487 
488             buffer->release();
489             buffer = tmp;
490         } else if (mBitsPerSample == 24) {
491             // Convert 24-bit signed samples to 16-bit signed.
492 
493             const uint8_t *src =
494                 (const uint8_t *)buffer->data() + buffer->range_offset();
495             int16_t *dst = (int16_t *)src;
496 
497             size_t numSamples = buffer->range_length() / 3;
498             for (size_t i = 0; i < numSamples; ++i) {
499                 int32_t x = (int32_t)(src[0] | src[1] << 8 | src[2] << 16);
500                 x = (x << 8) >> 8;  // sign extension
501 
502                 x = x >> 8;
503                 *dst++ = (int16_t)x;
504                 src += 3;
505             }
506 
507             buffer->set_range(buffer->range_offset(), 2 * numSamples);
508         }
509     }
510 
511     int64_t timeStampUs = 0;
512 
513     if (mWaveFormat == WAVE_FORMAT_MSGSM) {
514         timeStampUs = 1000000LL * (mCurrentPos - mOffset) * 320 / 65 / mSampleRate;
515     } else {
516         size_t bytesPerSample = mBitsPerSample >> 3;
517         timeStampUs = 1000000LL * (mCurrentPos - mOffset)
518                 / (mNumChannels * bytesPerSample) / mSampleRate;
519     }
520 
521     buffer->meta_data()->setInt64(kKeyTime, timeStampUs);
522 
523     buffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
524     mCurrentPos += n;
525 
526     *out = buffer;
527 
528     return OK;
529 }
530 
531 ////////////////////////////////////////////////////////////////////////////////
532 
SniffWAV(const sp<DataSource> & source,String8 * mimeType,float * confidence,sp<AMessage> *)533 bool SniffWAV(
534         const sp<DataSource> &source, String8 *mimeType, float *confidence,
535         sp<AMessage> *) {
536     char header[12];
537     if (source->readAt(0, header, sizeof(header)) < (ssize_t)sizeof(header)) {
538         return false;
539     }
540 
541     if (memcmp(header, "RIFF", 4) || memcmp(&header[8], "WAVE", 4)) {
542         return false;
543     }
544 
545     sp<MediaExtractor> extractor = new WAVExtractor(source);
546     if (extractor->countTracks() == 0) {
547         return false;
548     }
549 
550     *mimeType = MEDIA_MIMETYPE_CONTAINER_WAV;
551     *confidence = 0.3f;
552 
553     return true;
554 }
555 
556 }  // namespace android
557