1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/renderer/media/android/audio_decoder_android.h"
6
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <limits.h>
10 #include <sys/mman.h>
11 #include <unistd.h>
12 #include <vector>
13
14 #include "base/file_descriptor_posix.h"
15 #include "base/logging.h"
16 #include "base/memory/shared_memory.h"
17 #include "base/posix/eintr_wrapper.h"
18 #include "content/common/view_messages.h"
19 #include "media/base/android/webaudio_media_codec_info.h"
20 #include "media/base/audio_bus.h"
21 #include "media/base/limits.h"
22 #include "third_party/WebKit/public/platform/WebAudioBus.h"
23
24 namespace content {
25
26 class AudioDecoderIO {
27 public:
28 AudioDecoderIO(const char* data, size_t data_size);
29 ~AudioDecoderIO();
30 bool ShareEncodedToProcess(base::SharedMemoryHandle* handle);
31
32 // Returns true if AudioDecoderIO was successfully created.
33 bool IsValid() const;
34
read_fd() const35 int read_fd() const { return read_fd_; }
write_fd() const36 int write_fd() const { return write_fd_; }
37
38 private:
39 // Shared memory that will hold the encoded audio data. This is
40 // used by MediaCodec for decoding.
41 base::SharedMemory encoded_shared_memory_;
42
43 // A pipe used to communicate with MediaCodec. MediaCodec owns
44 // write_fd_ and writes to it.
45 int read_fd_;
46 int write_fd_;
47
48 DISALLOW_COPY_AND_ASSIGN(AudioDecoderIO);
49 };
50
AudioDecoderIO(const char * data,size_t data_size)51 AudioDecoderIO::AudioDecoderIO(const char* data, size_t data_size)
52 : read_fd_(-1),
53 write_fd_(-1) {
54
55 if (!data || !data_size || data_size > 0x80000000)
56 return;
57
58 // Create the shared memory and copy our data to it so that
59 // MediaCodec can access it.
60 encoded_shared_memory_.CreateAndMapAnonymous(data_size);
61
62 if (!encoded_shared_memory_.memory())
63 return;
64
65 memcpy(encoded_shared_memory_.memory(), data, data_size);
66
67 // Create a pipe for reading/writing the decoded PCM data
68 int pipefd[2];
69
70 if (pipe(pipefd))
71 return;
72
73 read_fd_ = pipefd[0];
74 write_fd_ = pipefd[1];
75 }
76
~AudioDecoderIO()77 AudioDecoderIO::~AudioDecoderIO() {
78 // Close the read end of the pipe. The write end should have been
79 // closed by MediaCodec.
80 if (read_fd_ >= 0 && close(read_fd_)) {
81 DVLOG(1) << "Cannot close read fd " << read_fd_
82 << ": " << strerror(errno);
83 }
84 }
85
IsValid() const86 bool AudioDecoderIO::IsValid() const {
87 return read_fd_ >= 0 && write_fd_ >= 0 &&
88 encoded_shared_memory_.memory();
89 }
90
ShareEncodedToProcess(base::SharedMemoryHandle * handle)91 bool AudioDecoderIO::ShareEncodedToProcess(base::SharedMemoryHandle* handle) {
92 return encoded_shared_memory_.ShareToProcess(
93 base::Process::Current().handle(),
94 handle);
95 }
96
ConvertSampleToFloat(int16_t sample)97 static float ConvertSampleToFloat(int16_t sample) {
98 const float kMaxScale = 1.0f / std::numeric_limits<int16_t>::max();
99 const float kMinScale = -1.0f / std::numeric_limits<int16_t>::min();
100
101 return sample * (sample < 0 ? kMinScale : kMaxScale);
102 }
103
104 // A basic WAVE file decoder. See
105 // https://ccrma.stanford.edu/courses/422/projects/WaveFormat/ for a
106 // basic guide to the WAVE file format.
107 class WAVEDecoder {
108 public:
109 WAVEDecoder(const uint8* data, size_t data_size);
110 ~WAVEDecoder();
111
112 // Try to decode the data as a WAVE file. If the data is a supported
113 // WAVE file, |destination_bus| is filled with the decoded data and
114 // DecodeWAVEFile returns true. Otherwise, DecodeWAVEFile returns
115 // false.
116 bool DecodeWAVEFile(blink::WebAudioBus* destination_bus);
117
118 private:
119 // Minimum number of bytes in a WAVE file to hold all of the data we
120 // need to interpret it as a WAVE file.
121 static const unsigned kMinimumWAVLength = 44;
122
123 // Number of bytes in the chunk ID field.
124 static const unsigned kChunkIDLength = 4;
125
126 // Number of bytes in the chunk size field.
127 static const unsigned kChunkSizeLength = 4;
128
129 // Number of bytes in the format field of the "RIFF" chunk.
130 static const unsigned kFormatFieldLength = 4;
131
132 // Number of bytes in a valid "fmt" chunk.
133 static const unsigned kFMTChunkLength = 16;
134
135 // Supported audio format in a WAVE file.
136 // TODO(rtoy): Consider supporting other formats here, if necessary.
137 static const int16_t kAudioFormatPCM = 1;
138
139 // Maximum number (inclusive) of bytes per sample supported by this
140 // decoder.
141 static const unsigned kMaximumBytesPerSample = 3;
142
143 // Read an unsigned integer of |length| bytes from |buffer|. The
144 // integer is interpreted as being in little-endian order.
145 uint32_t ReadUnsignedInteger(const uint8_t* buffer, size_t length);
146
147 // Read a PCM sample from the WAVE data at |pcm_data|.
148 int16_t ReadPCMSample(const uint8_t* pcm_data);
149
150 // Read a WAVE chunk header including the chunk ID and chunk size.
151 // Returns false if the header could not be read.
152 bool ReadChunkHeader();
153
154 // Read and parse the "fmt" chunk. Returns false if the fmt chunk
155 // could not be read or contained unsupported formats.
156 bool ReadFMTChunk();
157
158 // Read data chunk and save it to |destination_bus|. Returns false
159 // if the data chunk could not be read correctly.
160 bool CopyDataChunkToBus(blink::WebAudioBus* destination_bus);
161
162 // The WAVE chunk ID that identifies the chunk.
163 uint8_t chunk_id_[kChunkIDLength];
164
165 // The number of bytes in the data portion of the chunk.
166 size_t chunk_size_;
167
168 // The total number of bytes in the encoded data.
169 size_t data_size_;
170
171 // The current position within the WAVE file.
172 const uint8_t* buffer_;
173
174 // Points one byte past the end of the in-memory WAVE file. Used for
175 // detecting if we've reached the end of the file.
176 const uint8_t* buffer_end_;
177
178 size_t bytes_per_sample_;
179
180 uint16_t number_of_channels_;
181
182 // Sample rate of the WAVE data, in Hz.
183 uint32_t sample_rate_;
184
185 DISALLOW_COPY_AND_ASSIGN(WAVEDecoder);
186 };
187
WAVEDecoder(const uint8_t * encoded_data,size_t data_size)188 WAVEDecoder::WAVEDecoder(const uint8_t* encoded_data, size_t data_size)
189 : data_size_(data_size),
190 buffer_(encoded_data),
191 buffer_end_(encoded_data + 1),
192 bytes_per_sample_(0),
193 number_of_channels_(0),
194 sample_rate_(0) {
195 if (buffer_ + data_size > buffer_)
196 buffer_end_ = buffer_ + data_size;
197 }
198
~WAVEDecoder()199 WAVEDecoder::~WAVEDecoder() {}
200
ReadUnsignedInteger(const uint8_t * buffer,size_t length)201 uint32_t WAVEDecoder::ReadUnsignedInteger(const uint8_t* buffer,
202 size_t length) {
203 unsigned value = 0;
204
205 if (length == 0 || length > sizeof(value)) {
206 DCHECK(false) << "ReadUnsignedInteger: Invalid length: " << length;
207 return 0;
208 }
209
210 // All integer fields in a WAVE file are little-endian.
211 for (size_t k = length; k > 0; --k)
212 value = (value << 8) + buffer[k - 1];
213
214 return value;
215 }
216
ReadPCMSample(const uint8_t * pcm_data)217 int16_t WAVEDecoder::ReadPCMSample(const uint8_t* pcm_data) {
218 uint32_t unsigned_sample = ReadUnsignedInteger(pcm_data, bytes_per_sample_);
219 int16_t sample;
220
221 // Convert the unsigned data into a 16-bit PCM sample.
222 switch (bytes_per_sample_) {
223 case 1:
224 sample = (unsigned_sample - 128) << 8;
225 break;
226 case 2:
227 sample = static_cast<int16_t>(unsigned_sample);
228 break;
229 case 3:
230 // Android currently converts 24-bit WAVE data into 16-bit
231 // samples by taking the high-order 16 bits without rounding.
232 // We do the same here for consistency.
233 sample = static_cast<int16_t>(unsigned_sample >> 8);
234 break;
235 default:
236 sample = 0;
237 break;
238 }
239 return sample;
240 }
241
ReadChunkHeader()242 bool WAVEDecoder::ReadChunkHeader() {
243 if (buffer_ + kChunkIDLength + kChunkSizeLength >= buffer_end_)
244 return false;
245
246 memcpy(chunk_id_, buffer_, kChunkIDLength);
247
248 chunk_size_ = ReadUnsignedInteger(buffer_ + kChunkIDLength, kChunkSizeLength);
249
250 // Adjust for padding
251 if (chunk_size_ % 2)
252 ++chunk_size_;
253
254 // Check for completely bogus chunk size.
255 if (chunk_size_ > data_size_)
256 return false;
257
258 return true;
259 }
260
ReadFMTChunk()261 bool WAVEDecoder::ReadFMTChunk() {
262 // The fmt chunk has basic info about the format of the audio
263 // data. Only a basic PCM format is supported.
264 if (chunk_size_ < kFMTChunkLength) {
265 DVLOG(1) << "FMT chunk too short: " << chunk_size_;
266 return 0;
267 }
268
269 uint16_t audio_format = ReadUnsignedInteger(buffer_, 2);
270
271 if (audio_format != kAudioFormatPCM) {
272 DVLOG(1) << "Audio format not supported: " << audio_format;
273 return false;
274 }
275
276 number_of_channels_ = ReadUnsignedInteger(buffer_ + 2, 2);
277 sample_rate_ = ReadUnsignedInteger(buffer_ + 4, 4);
278 unsigned bits_per_sample = ReadUnsignedInteger(buffer_ + 14, 2);
279
280 // Sanity checks.
281
282 if (!number_of_channels_ ||
283 number_of_channels_ > media::limits::kMaxChannels) {
284 DVLOG(1) << "Unsupported number of channels: " << number_of_channels_;
285 return false;
286 }
287
288 if (sample_rate_ < media::limits::kMinSampleRate ||
289 sample_rate_ > media::limits::kMaxSampleRate) {
290 DVLOG(1) << "Unsupported sample rate: " << sample_rate_;
291 return false;
292 }
293
294 // We only support 8, 16, and 24 bits per sample.
295 if (bits_per_sample == 8 || bits_per_sample == 16 || bits_per_sample == 24) {
296 bytes_per_sample_ = bits_per_sample / 8;
297 return true;
298 }
299
300 DVLOG(1) << "Unsupported bits per sample: " << bits_per_sample;
301 return false;
302 }
303
CopyDataChunkToBus(blink::WebAudioBus * destination_bus)304 bool WAVEDecoder::CopyDataChunkToBus(blink::WebAudioBus* destination_bus) {
305 // The data chunk contains the audio data itself.
306 if (!bytes_per_sample_ || bytes_per_sample_ > kMaximumBytesPerSample) {
307 DVLOG(1) << "WARNING: data chunk without preceeding fmt chunk,"
308 << " or invalid bytes per sample.";
309 return false;
310 }
311
312 VLOG(0) << "Decoding WAVE file: " << number_of_channels_ << " channels, "
313 << sample_rate_ << " kHz, "
314 << chunk_size_ / bytes_per_sample_ / number_of_channels_
315 << " frames, " << 8 * bytes_per_sample_ << " bits/sample";
316
317 // Create the destination bus of the appropriate size and then decode
318 // the data into the bus.
319 size_t number_of_frames =
320 chunk_size_ / bytes_per_sample_ / number_of_channels_;
321
322 destination_bus->initialize(
323 number_of_channels_, number_of_frames, sample_rate_);
324
325 for (size_t m = 0; m < number_of_frames; ++m) {
326 for (uint16_t k = 0; k < number_of_channels_; ++k) {
327 int16_t sample = ReadPCMSample(buffer_);
328
329 buffer_ += bytes_per_sample_;
330 destination_bus->channelData(k)[m] = ConvertSampleToFloat(sample);
331 }
332 }
333
334 return true;
335 }
336
DecodeWAVEFile(blink::WebAudioBus * destination_bus)337 bool WAVEDecoder::DecodeWAVEFile(blink::WebAudioBus* destination_bus) {
338 // Parse and decode WAVE file. If we can't parse it, return false.
339
340 if (buffer_ + kMinimumWAVLength > buffer_end_) {
341 DVLOG(1) << "Buffer too small to contain full WAVE header: ";
342 return false;
343 }
344
345 // Do we have a RIFF file?
346 ReadChunkHeader();
347 if (memcmp(chunk_id_, "RIFF", kChunkIDLength) != 0) {
348 DVLOG(1) << "RIFF missing";
349 return false;
350 }
351 buffer_ += kChunkIDLength + kChunkSizeLength;
352
353 // Check the format field of the RIFF chunk
354 memcpy(chunk_id_, buffer_, kFormatFieldLength);
355 if (memcmp(chunk_id_, "WAVE", kFormatFieldLength) != 0) {
356 DVLOG(1) << "Invalid WAVE file: missing WAVE header";
357 return false;
358 }
359 // Advance past the format field
360 buffer_ += kFormatFieldLength;
361
362 // We have a WAVE file. Start parsing the chunks.
363
364 while (buffer_ < buffer_end_) {
365 if (!ReadChunkHeader()) {
366 DVLOG(1) << "Couldn't read chunk header";
367 return false;
368 }
369
370 // Consume the chunk ID and chunk size
371 buffer_ += kChunkIDLength + kChunkSizeLength;
372
373 // Make sure we can read all chunk_size bytes.
374 if (buffer_ + chunk_size_ > buffer_end_) {
375 DVLOG(1) << "Insufficient bytes to read chunk of size " << chunk_size_;
376 return false;
377 }
378
379 if (memcmp(chunk_id_, "fmt ", kChunkIDLength) == 0) {
380 if (!ReadFMTChunk())
381 return false;
382 } else if (memcmp(chunk_id_, "data", kChunkIDLength) == 0) {
383 // Return after reading the data chunk, whether we succeeded or
384 // not.
385 return CopyDataChunkToBus(destination_bus);
386 } else {
387 // Ignore these chunks that we don't know about.
388 DVLOG(0) << "Ignoring WAVE chunk `" << chunk_id_ << "' size "
389 << chunk_size_;
390 }
391
392 // Advance to next chunk.
393 buffer_ += chunk_size_;
394 }
395
396 // If we get here, that means we didn't find a data chunk, so we
397 // couldn't handle this WAVE file.
398
399 return false;
400 }
401
402 // The number of frames is known so preallocate the destination
403 // bus and copy the pcm data to the destination bus as it's being
404 // received.
CopyPcmDataToBus(int input_fd,blink::WebAudioBus * destination_bus,size_t number_of_frames,unsigned number_of_channels,double file_sample_rate)405 static void CopyPcmDataToBus(int input_fd,
406 blink::WebAudioBus* destination_bus,
407 size_t number_of_frames,
408 unsigned number_of_channels,
409 double file_sample_rate) {
410 destination_bus->initialize(number_of_channels,
411 number_of_frames,
412 file_sample_rate);
413
414 int16_t pipe_data[PIPE_BUF / sizeof(int16_t)];
415 size_t decoded_frames = 0;
416 size_t current_sample_in_frame = 0;
417 ssize_t nread;
418
419 while ((nread = HANDLE_EINTR(read(input_fd, pipe_data, sizeof(pipe_data)))) >
420 0) {
421 size_t samples_in_pipe = nread / sizeof(int16_t);
422
423 // The pipe may not contain a whole number of frames. This is
424 // especially true if the number of channels is greater than
425 // 2. Thus, keep track of which sample in a frame is being
426 // processed, so we handle the boundary at the end of the pipe
427 // correctly.
428 for (size_t m = 0; m < samples_in_pipe; ++m) {
429 if (decoded_frames >= number_of_frames)
430 break;
431
432 destination_bus->channelData(current_sample_in_frame)[decoded_frames] =
433 ConvertSampleToFloat(pipe_data[m]);
434 ++current_sample_in_frame;
435
436 if (current_sample_in_frame >= number_of_channels) {
437 current_sample_in_frame = 0;
438 ++decoded_frames;
439 }
440 }
441 }
442
443 // number_of_frames is only an estimate. Resize the buffer with the
444 // actual number of received frames.
445 if (decoded_frames < number_of_frames)
446 destination_bus->resizeSmaller(decoded_frames);
447 }
448
449 // The number of frames is unknown, so keep reading and buffering
450 // until there's no more data and then copy the data to the
451 // destination bus.
BufferAndCopyPcmDataToBus(int input_fd,blink::WebAudioBus * destination_bus,unsigned number_of_channels,double file_sample_rate)452 static void BufferAndCopyPcmDataToBus(int input_fd,
453 blink::WebAudioBus* destination_bus,
454 unsigned number_of_channels,
455 double file_sample_rate) {
456 int16_t pipe_data[PIPE_BUF / sizeof(int16_t)];
457 std::vector<int16_t> decoded_samples;
458 ssize_t nread;
459
460 while ((nread = HANDLE_EINTR(read(input_fd, pipe_data, sizeof(pipe_data)))) >
461 0) {
462 size_t samples_in_pipe = nread / sizeof(int16_t);
463 if (decoded_samples.size() + samples_in_pipe > decoded_samples.capacity()) {
464 decoded_samples.reserve(std::max(samples_in_pipe,
465 2 * decoded_samples.capacity()));
466 }
467 std::copy(pipe_data,
468 pipe_data + samples_in_pipe,
469 back_inserter(decoded_samples));
470 }
471
472 DVLOG(1) << "Total samples read = " << decoded_samples.size();
473
474 // Convert the samples and save them in the audio bus.
475 size_t number_of_samples = decoded_samples.size();
476 size_t number_of_frames = decoded_samples.size() / number_of_channels;
477 size_t decoded_frames = 0;
478
479 destination_bus->initialize(number_of_channels,
480 number_of_frames,
481 file_sample_rate);
482
483 for (size_t m = 0; m < number_of_samples; m += number_of_channels) {
484 for (size_t k = 0; k < number_of_channels; ++k) {
485 int16_t sample = decoded_samples[m + k];
486 destination_bus->channelData(k)[decoded_frames] =
487 ConvertSampleToFloat(sample);
488 }
489 ++decoded_frames;
490 }
491
492 // number_of_frames is only an estimate. Resize the buffer with the
493 // actual number of received frames.
494 if (decoded_frames < number_of_frames)
495 destination_bus->resizeSmaller(decoded_frames);
496 }
497
TryWAVEFileDecoder(blink::WebAudioBus * destination_bus,const uint8_t * encoded_data,size_t data_size)498 static bool TryWAVEFileDecoder(blink::WebAudioBus* destination_bus,
499 const uint8_t* encoded_data,
500 size_t data_size) {
501 WAVEDecoder decoder(encoded_data, data_size);
502
503 return decoder.DecodeWAVEFile(destination_bus);
504 }
505
506 // To decode audio data, we want to use the Android MediaCodec class.
507 // But this can't run in a sandboxed process so we need initiate the
508 // request to MediaCodec in the browser. To do this, we create a
509 // shared memory buffer that holds the audio data. We send a message
510 // to the browser to start the decoder using this buffer and one end
511 // of a pipe. The MediaCodec class will decode the data from the
512 // shared memory and write the PCM samples back to us over a pipe.
DecodeAudioFileData(blink::WebAudioBus * destination_bus,const char * data,size_t data_size,scoped_refptr<ThreadSafeSender> sender)513 bool DecodeAudioFileData(blink::WebAudioBus* destination_bus, const char* data,
514 size_t data_size,
515 scoped_refptr<ThreadSafeSender> sender) {
516 // Try to decode the data as a WAVE file first. If it can't be
517 // decoded, use MediaCodec. See crbug.com/259048.
518 if (TryWAVEFileDecoder(
519 destination_bus, reinterpret_cast<const uint8_t*>(data), data_size)) {
520 return true;
521 }
522
523 AudioDecoderIO audio_decoder(data, data_size);
524
525 if (!audio_decoder.IsValid())
526 return false;
527
528 base::SharedMemoryHandle encoded_data_handle;
529 audio_decoder.ShareEncodedToProcess(&encoded_data_handle);
530 base::FileDescriptor fd(audio_decoder.write_fd(), true);
531
532 DVLOG(1) << "DecodeAudioFileData: Starting MediaCodec";
533
534 // Start MediaCodec processing in the browser which will read from
535 // encoded_data_handle for our shared memory and write the decoded
536 // PCM samples (16-bit integer) to our pipe.
537
538 sender->Send(new ViewHostMsg_RunWebAudioMediaCodec(
539 encoded_data_handle, fd, data_size));
540
541 // First, read the number of channels, the sample rate, and the
542 // number of frames and a flag indicating if the file is an
543 // ogg/vorbis file. This must be coordinated with
544 // WebAudioMediaCodecBridge!
545 //
546 // If we know the number of samples, we can create the destination
547 // bus directly and do the conversion directly to the bus instead of
548 // buffering up everything before saving the data to the bus.
549
550 int input_fd = audio_decoder.read_fd();
551 struct media::WebAudioMediaCodecInfo info;
552
553 DVLOG(1) << "Reading audio file info from fd " << input_fd;
554 ssize_t nread = HANDLE_EINTR(read(input_fd, &info, sizeof(info)));
555 DVLOG(1) << "read: " << nread << " bytes:\n"
556 << " 0: number of channels = " << info.channel_count << "\n"
557 << " 1: sample rate = " << info.sample_rate << "\n"
558 << " 2: number of frames = " << info.number_of_frames << "\n";
559
560 if (nread != sizeof(info))
561 return false;
562
563 unsigned number_of_channels = info.channel_count;
564 double file_sample_rate = static_cast<double>(info.sample_rate);
565 size_t number_of_frames = info.number_of_frames;
566
567 // Sanity checks
568 if (!number_of_channels ||
569 number_of_channels > media::limits::kMaxChannels ||
570 file_sample_rate < media::limits::kMinSampleRate ||
571 file_sample_rate > media::limits::kMaxSampleRate) {
572 return false;
573 }
574
575 if (number_of_frames > 0) {
576 CopyPcmDataToBus(input_fd,
577 destination_bus,
578 number_of_frames,
579 number_of_channels,
580 file_sample_rate);
581 } else {
582 BufferAndCopyPcmDataToBus(input_fd,
583 destination_bus,
584 number_of_channels,
585 file_sample_rate);
586 }
587
588 return true;
589 }
590
591 } // namespace content
592