• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "media/base/audio_splicer.h"
6 
7 #include <cstdlib>
8 #include <deque>
9 
10 #include "base/logging.h"
11 #include "media/base/audio_buffer.h"
12 #include "media/base/audio_bus.h"
13 #include "media/base/audio_decoder_config.h"
14 #include "media/base/audio_timestamp_helper.h"
15 #include "media/base/vector_math.h"
16 
17 namespace media {
18 
19 // Minimum gap size needed before the splicer will take action to
20 // fill a gap. This avoids periodically inserting and then dropping samples
21 // when the buffer timestamps are slightly off because of timestamp rounding
22 // in the source content. Unit is frames.
23 static const int kMinGapSize = 2;
24 
25 // AudioBuffer::TrimStart() is not as accurate as the timestamp helper, so
26 // manually adjust the duration and timestamp after trimming.
AccurateTrimStart(int frames_to_trim,const scoped_refptr<AudioBuffer> buffer,const AudioTimestampHelper & timestamp_helper)27 static void AccurateTrimStart(int frames_to_trim,
28                               const scoped_refptr<AudioBuffer> buffer,
29                               const AudioTimestampHelper& timestamp_helper) {
30   buffer->TrimStart(frames_to_trim);
31   buffer->set_timestamp(timestamp_helper.GetTimestamp());
32 }
33 
34 // Returns an AudioBus whose frame buffer is backed by the provided AudioBuffer.
CreateAudioBufferWrapper(const scoped_refptr<AudioBuffer> & buffer)35 static scoped_ptr<AudioBus> CreateAudioBufferWrapper(
36     const scoped_refptr<AudioBuffer>& buffer) {
37   scoped_ptr<AudioBus> wrapper =
38       AudioBus::CreateWrapper(buffer->channel_count());
39   wrapper->set_frames(buffer->frame_count());
40   for (int ch = 0; ch < buffer->channel_count(); ++ch) {
41     wrapper->SetChannelData(
42         ch, reinterpret_cast<float*>(buffer->channel_data()[ch]));
43   }
44   return wrapper.Pass();
45 }
46 
47 class AudioStreamSanitizer {
48  public:
49   explicit AudioStreamSanitizer(int samples_per_second);
50   ~AudioStreamSanitizer();
51 
52   // Resets the sanitizer state by clearing the output buffers queue, and
53   // resetting the timestamp helper.
54   void Reset();
55 
56   // Similar to Reset(), but initializes the timestamp helper with the given
57   // parameters.
58   void ResetTimestampState(int64 frame_count, base::TimeDelta base_timestamp);
59 
60   // Adds a new buffer full of samples or end of stream buffer to the splicer.
61   // Returns true if the buffer was accepted. False is returned if an error
62   // occurred.
63   bool AddInput(const scoped_refptr<AudioBuffer>& input);
64 
65   // Returns true if the sanitizer has a buffer to return.
66   bool HasNextBuffer() const;
67 
68   // Removes the next buffer from the output buffer queue and returns it; should
69   // only be called if HasNextBuffer() returns true.
70   scoped_refptr<AudioBuffer> GetNextBuffer();
71 
72   // Returns the total frame count of all buffers available for output.
73   int GetFrameCount() const;
74 
timestamp_helper()75   const AudioTimestampHelper& timestamp_helper() {
76     return output_timestamp_helper_;
77   }
78 
79   // Transfer all buffers into |output|.  Returns false if AddInput() on the
80   // |output| sanitizer fails for any buffer removed from |this|.
81   bool DrainInto(AudioStreamSanitizer* output);
82 
83  private:
84   void AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer);
85 
86   AudioTimestampHelper output_timestamp_helper_;
87   bool received_end_of_stream_;
88 
89   typedef std::deque<scoped_refptr<AudioBuffer> > BufferQueue;
90   BufferQueue output_buffers_;
91 
92   DISALLOW_ASSIGN(AudioStreamSanitizer);
93 };
94 
AudioStreamSanitizer(int samples_per_second)95 AudioStreamSanitizer::AudioStreamSanitizer(int samples_per_second)
96     : output_timestamp_helper_(samples_per_second),
97       received_end_of_stream_(false) {}
98 
~AudioStreamSanitizer()99 AudioStreamSanitizer::~AudioStreamSanitizer() {}
100 
Reset()101 void AudioStreamSanitizer::Reset() {
102   ResetTimestampState(0, kNoTimestamp());
103 }
104 
ResetTimestampState(int64 frame_count,base::TimeDelta base_timestamp)105 void AudioStreamSanitizer::ResetTimestampState(int64 frame_count,
106                                                base::TimeDelta base_timestamp) {
107   output_buffers_.clear();
108   received_end_of_stream_ = false;
109   output_timestamp_helper_.SetBaseTimestamp(base_timestamp);
110   if (frame_count > 0)
111     output_timestamp_helper_.AddFrames(frame_count);
112 }
113 
AddInput(const scoped_refptr<AudioBuffer> & input)114 bool AudioStreamSanitizer::AddInput(const scoped_refptr<AudioBuffer>& input) {
115   DCHECK(!received_end_of_stream_ || input->end_of_stream());
116 
117   if (input->end_of_stream()) {
118     output_buffers_.push_back(input);
119     received_end_of_stream_ = true;
120     return true;
121   }
122 
123   DCHECK(input->timestamp() != kNoTimestamp());
124   DCHECK(input->duration() > base::TimeDelta());
125   DCHECK_GT(input->frame_count(), 0);
126 
127   if (output_timestamp_helper_.base_timestamp() == kNoTimestamp())
128     output_timestamp_helper_.SetBaseTimestamp(input->timestamp());
129 
130   if (output_timestamp_helper_.base_timestamp() > input->timestamp()) {
131     DVLOG(1) << "Input timestamp is before the base timestamp.";
132     return false;
133   }
134 
135   const base::TimeDelta timestamp = input->timestamp();
136   const base::TimeDelta expected_timestamp =
137       output_timestamp_helper_.GetTimestamp();
138   const base::TimeDelta delta = timestamp - expected_timestamp;
139 
140   if (std::abs(delta.InMilliseconds()) >
141       AudioSplicer::kMaxTimeDeltaInMilliseconds) {
142     DVLOG(1) << "Timestamp delta too large: " << delta.InMicroseconds() << "us";
143     return false;
144   }
145 
146   int frames_to_fill = 0;
147   if (delta != base::TimeDelta())
148     frames_to_fill = output_timestamp_helper_.GetFramesToTarget(timestamp);
149 
150   if (frames_to_fill == 0 || std::abs(frames_to_fill) < kMinGapSize) {
151     AddOutputBuffer(input);
152     return true;
153   }
154 
155   if (frames_to_fill > 0) {
156     DVLOG(1) << "Gap detected @ " << expected_timestamp.InMicroseconds()
157              << " us: " << delta.InMicroseconds() << " us";
158 
159     // Create a buffer with enough silence samples to fill the gap and
160     // add it to the output buffer.
161     scoped_refptr<AudioBuffer> gap =
162         AudioBuffer::CreateEmptyBuffer(input->channel_layout(),
163                                        input->channel_count(),
164                                        input->sample_rate(),
165                                        frames_to_fill,
166                                        expected_timestamp);
167     AddOutputBuffer(gap);
168 
169     // Add the input buffer now that the gap has been filled.
170     AddOutputBuffer(input);
171     return true;
172   }
173 
174   // Overlapping buffers marked as splice frames are handled by AudioSplicer,
175   // but decoder and demuxer quirks may sometimes produce overlapping samples
176   // which need to be sanitized.
177   //
178   // A crossfade can't be done here because only the current buffer is available
179   // at this point, not previous buffers.
180   DVLOG(1) << "Overlap detected @ " << expected_timestamp.InMicroseconds()
181            << " us: " << -delta.InMicroseconds() << " us";
182 
183   const int frames_to_skip = -frames_to_fill;
184   if (input->frame_count() <= frames_to_skip) {
185     DVLOG(1) << "Dropping whole buffer";
186     return true;
187   }
188 
189   // Copy the trailing samples that do not overlap samples already output
190   // into a new buffer.  Add this new buffer to the output queue.
191   //
192   // TODO(acolwell): Implement a cross-fade here so the transition is less
193   // jarring.
194   AccurateTrimStart(frames_to_skip, input, output_timestamp_helper_);
195   AddOutputBuffer(input);
196   return true;
197 }
198 
HasNextBuffer() const199 bool AudioStreamSanitizer::HasNextBuffer() const {
200   return !output_buffers_.empty();
201 }
202 
GetNextBuffer()203 scoped_refptr<AudioBuffer> AudioStreamSanitizer::GetNextBuffer() {
204   scoped_refptr<AudioBuffer> ret = output_buffers_.front();
205   output_buffers_.pop_front();
206   return ret;
207 }
208 
AddOutputBuffer(const scoped_refptr<AudioBuffer> & buffer)209 void AudioStreamSanitizer::AddOutputBuffer(
210     const scoped_refptr<AudioBuffer>& buffer) {
211   output_timestamp_helper_.AddFrames(buffer->frame_count());
212   output_buffers_.push_back(buffer);
213 }
214 
GetFrameCount() const215 int AudioStreamSanitizer::GetFrameCount() const {
216   int frame_count = 0;
217   for (const auto& buffer : output_buffers_)
218     frame_count += buffer->frame_count();
219   return frame_count;
220 }
221 
DrainInto(AudioStreamSanitizer * output)222 bool AudioStreamSanitizer::DrainInto(AudioStreamSanitizer* output) {
223   while (HasNextBuffer()) {
224     if (!output->AddInput(GetNextBuffer()))
225       return false;
226   }
227   return true;
228 }
229 
AudioSplicer(int samples_per_second)230 AudioSplicer::AudioSplicer(int samples_per_second)
231     : max_crossfade_duration_(
232           base::TimeDelta::FromMilliseconds(kCrossfadeDurationInMilliseconds)),
233       splice_timestamp_(kNoTimestamp()),
234       max_splice_end_timestamp_(kNoTimestamp()),
235       output_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
236       pre_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
237       post_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),
238       have_all_pre_splice_buffers_(false) {}
239 
~AudioSplicer()240 AudioSplicer::~AudioSplicer() {}
241 
Reset()242 void AudioSplicer::Reset() {
243   output_sanitizer_->Reset();
244   pre_splice_sanitizer_->Reset();
245   post_splice_sanitizer_->Reset();
246   have_all_pre_splice_buffers_ = false;
247   reset_splice_timestamps();
248 }
249 
AddInput(const scoped_refptr<AudioBuffer> & input)250 bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {
251   // If we're not processing a splice, add the input to the output queue.
252   if (splice_timestamp_ == kNoTimestamp()) {
253     DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
254     DCHECK(!post_splice_sanitizer_->HasNextBuffer());
255     return output_sanitizer_->AddInput(input);
256   }
257 
258   const AudioTimestampHelper& output_ts_helper =
259       output_sanitizer_->timestamp_helper();
260 
261   if (!have_all_pre_splice_buffers_) {
262     DCHECK(!input->end_of_stream());
263 
264     // If the provided buffer is entirely before the splice point it can also be
265     // added to the output queue.
266     if (input->timestamp() + input->duration() < splice_timestamp_) {
267       DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
268       return output_sanitizer_->AddInput(input);
269     }
270 
271     // If we've encountered the first pre splice buffer, reset the pre splice
272     // sanitizer based on |output_sanitizer_|.  This is done so that gaps and
273     // overlaps between buffers across the sanitizers are accounted for prior
274     // to calculating crossfade.
275     if (!pre_splice_sanitizer_->HasNextBuffer()) {
276       pre_splice_sanitizer_->ResetTimestampState(
277           output_ts_helper.frame_count(), output_ts_helper.base_timestamp());
278     }
279 
280     return pre_splice_sanitizer_->AddInput(input);
281   }
282 
283   // The first post splice buffer is expected to match |splice_timestamp_|.
284   if (!post_splice_sanitizer_->HasNextBuffer())
285     CHECK(splice_timestamp_ == input->timestamp());
286 
287   // At this point we have all the fade out preroll buffers from the decoder.
288   // We now need to wait until we have enough data to perform the crossfade (or
289   // we receive an end of stream).
290   if (!post_splice_sanitizer_->AddInput(input))
291     return false;
292 
293   // Ensure |output_sanitizer_| has a valid base timestamp so we can use it for
294   // timestamp calculations.
295   if (output_ts_helper.base_timestamp() == kNoTimestamp()) {
296     output_sanitizer_->ResetTimestampState(
297         0, pre_splice_sanitizer_->timestamp_helper().base_timestamp());
298   }
299 
300   // If a splice frame was incorrectly marked due to poor demuxed timestamps, we
301   // may not actually have a splice.  Here we check if any frames exist before
302   // the splice.  In this case, just transfer all data to the output sanitizer.
303   const int frames_before_splice =
304       output_ts_helper.GetFramesToTarget(splice_timestamp_);
305   if (frames_before_splice < 0 ||
306       pre_splice_sanitizer_->GetFrameCount() <= frames_before_splice) {
307     CHECK(pre_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
308 
309     // If the file contains incorrectly muxed timestamps, there may be huge gaps
310     // between the demuxed and decoded timestamps.
311     if (!post_splice_sanitizer_->DrainInto(output_sanitizer_.get()))
312       return false;
313 
314     reset_splice_timestamps();
315     return true;
316   }
317 
318   // Wait until we have enough data to crossfade or end of stream.
319   if (!input->end_of_stream() &&
320       input->timestamp() + input->duration() < max_splice_end_timestamp_) {
321     return true;
322   }
323 
324   scoped_refptr<AudioBuffer> crossfade_buffer;
325   scoped_ptr<AudioBus> pre_splice =
326       ExtractCrossfadeFromPreSplice(&crossfade_buffer);
327 
328   // Crossfade the pre splice and post splice sections and transfer all relevant
329   // buffers into |output_sanitizer_|.
330   CrossfadePostSplice(pre_splice.Pass(), crossfade_buffer);
331 
332   // Clear the splice timestamp so new splices can be accepted.
333   reset_splice_timestamps();
334   return true;
335 }
336 
HasNextBuffer() const337 bool AudioSplicer::HasNextBuffer() const {
338   return output_sanitizer_->HasNextBuffer();
339 }
340 
GetNextBuffer()341 scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() {
342   return output_sanitizer_->GetNextBuffer();
343 }
344 
SetSpliceTimestamp(base::TimeDelta splice_timestamp)345 void AudioSplicer::SetSpliceTimestamp(base::TimeDelta splice_timestamp) {
346   if (splice_timestamp == kNoTimestamp()) {
347     DCHECK(splice_timestamp_ != kNoTimestamp());
348     DCHECK(!have_all_pre_splice_buffers_);
349     have_all_pre_splice_buffers_ = true;
350     return;
351   }
352 
353   if (splice_timestamp_ == splice_timestamp)
354     return;
355 
356   // TODO(dalecurtis): We may need the concept of a future_splice_timestamp_ to
357   // handle cases where another splice comes in before we've received 5ms of
358   // data from the last one.  Leave this as a CHECK for now to figure out if
359   // this case is possible.
360   CHECK(splice_timestamp_ == kNoTimestamp());
361   splice_timestamp_ = splice_timestamp;
362   max_splice_end_timestamp_ = splice_timestamp_ + max_crossfade_duration_;
363   pre_splice_sanitizer_->Reset();
364   post_splice_sanitizer_->Reset();
365   have_all_pre_splice_buffers_ = false;
366 }
367 
ExtractCrossfadeFromPreSplice(scoped_refptr<AudioBuffer> * crossfade_buffer)368 scoped_ptr<AudioBus> AudioSplicer::ExtractCrossfadeFromPreSplice(
369     scoped_refptr<AudioBuffer>* crossfade_buffer) {
370   DCHECK(crossfade_buffer);
371   const AudioTimestampHelper& output_ts_helper =
372       output_sanitizer_->timestamp_helper();
373 
374   int frames_before_splice =
375       output_ts_helper.GetFramesToTarget(splice_timestamp_);
376 
377   // Determine crossfade frame count based on available frames in each splicer
378   // and capping to the maximum crossfade duration.
379   const int max_crossfade_frame_count =
380       output_ts_helper.GetFramesToTarget(max_splice_end_timestamp_) -
381       frames_before_splice;
382   const int frames_to_crossfade = std::min(
383       max_crossfade_frame_count,
384       std::min(pre_splice_sanitizer_->GetFrameCount() - frames_before_splice,
385                post_splice_sanitizer_->GetFrameCount()));
386   // There must always be frames to crossfade, otherwise the splice should not
387   // have been generated.
388   DCHECK_GT(frames_to_crossfade, 0);
389 
390   int frames_read = 0;
391   scoped_ptr<AudioBus> output_bus;
392   while (pre_splice_sanitizer_->HasNextBuffer() &&
393          frames_read < frames_to_crossfade) {
394     scoped_refptr<AudioBuffer> preroll = pre_splice_sanitizer_->GetNextBuffer();
395 
396     // We don't know the channel count until we see the first buffer, so wait
397     // until the first buffer to allocate the output AudioBus.
398     if (!output_bus) {
399       output_bus =
400           AudioBus::Create(preroll->channel_count(), frames_to_crossfade);
401       // Allocate output buffer for crossfade.
402       *crossfade_buffer = AudioBuffer::CreateBuffer(kSampleFormatPlanarF32,
403                                                     preroll->channel_layout(),
404                                                     preroll->channel_count(),
405                                                     preroll->sample_rate(),
406                                                     frames_to_crossfade);
407     }
408 
409     // There may be enough of a gap introduced during decoding such that an
410     // entire buffer exists before the splice point.
411     if (frames_before_splice >= preroll->frame_count()) {
412       // Adjust the number of frames remaining before the splice.  NOTE: This is
413       // safe since |pre_splice_sanitizer_| is a continuation of the timeline in
414       // |output_sanitizer_|.  As such we're guaranteed there are no gaps or
415       // overlaps in the timeline between the two sanitizers.
416       frames_before_splice -= preroll->frame_count();
417       CHECK(output_sanitizer_->AddInput(preroll));
418       continue;
419     }
420 
421     const int frames_to_read =
422         std::min(preroll->frame_count() - frames_before_splice,
423                  output_bus->frames() - frames_read);
424     preroll->ReadFrames(
425         frames_to_read, frames_before_splice, frames_read, output_bus.get());
426     frames_read += frames_to_read;
427 
428     // If only part of the buffer was consumed, trim it appropriately and stick
429     // it into the output queue.
430     if (frames_before_splice) {
431       preroll->TrimEnd(preroll->frame_count() - frames_before_splice);
432       CHECK(output_sanitizer_->AddInput(preroll));
433       frames_before_splice = 0;
434     }
435   }
436 
437   // Ensure outputs were properly allocated.  The method should not have been
438   // called if there is not enough data to crossfade.
439   // TODO(dalecurtis): Convert to DCHECK() once http://crbug.com/356073 fixed.
440   CHECK(output_bus);
441   CHECK(crossfade_buffer->get());
442 
443   // All necessary buffers have been processed, it's safe to reset.
444   pre_splice_sanitizer_->Reset();
445   DCHECK_EQ(output_bus->frames(), frames_read);
446   DCHECK_EQ(output_ts_helper.GetFramesToTarget(splice_timestamp_), 0);
447   return output_bus.Pass();
448 }
449 
CrossfadePostSplice(scoped_ptr<AudioBus> pre_splice_bus,const scoped_refptr<AudioBuffer> & crossfade_buffer)450 void AudioSplicer::CrossfadePostSplice(
451     scoped_ptr<AudioBus> pre_splice_bus,
452     const scoped_refptr<AudioBuffer>& crossfade_buffer) {
453   // Use the calculated timestamp and duration to ensure there's no extra gaps
454   // or overlaps to process when adding the buffer to |output_sanitizer_|.
455   const AudioTimestampHelper& output_ts_helper =
456       output_sanitizer_->timestamp_helper();
457   crossfade_buffer->set_timestamp(output_ts_helper.GetTimestamp());
458 
459   // AudioBuffer::ReadFrames() only allows output into an AudioBus, so wrap
460   // our AudioBuffer in one so we can avoid extra data copies.
461   scoped_ptr<AudioBus> output_bus = CreateAudioBufferWrapper(crossfade_buffer);
462 
463   // Extract crossfade section from the |post_splice_sanitizer_|.
464   int frames_read = 0, frames_to_trim = 0;
465   scoped_refptr<AudioBuffer> remainder;
466   while (post_splice_sanitizer_->HasNextBuffer() &&
467          frames_read < output_bus->frames()) {
468     scoped_refptr<AudioBuffer> postroll =
469         post_splice_sanitizer_->GetNextBuffer();
470     const int frames_to_read =
471         std::min(postroll->frame_count(), output_bus->frames() - frames_read);
472     postroll->ReadFrames(frames_to_read, 0, frames_read, output_bus.get());
473     frames_read += frames_to_read;
474 
475     // If only part of the buffer was consumed, save it for after we've added
476     // the crossfade buffer
477     if (frames_to_read < postroll->frame_count()) {
478       DCHECK(!remainder.get());
479       remainder.swap(postroll);
480       frames_to_trim = frames_to_read;
481     }
482   }
483 
484   DCHECK_EQ(output_bus->frames(), frames_read);
485 
486   // Crossfade the audio into |crossfade_buffer|.
487   for (int ch = 0; ch < output_bus->channels(); ++ch) {
488     vector_math::Crossfade(pre_splice_bus->channel(ch),
489                            pre_splice_bus->frames(),
490                            output_bus->channel(ch));
491   }
492 
493   CHECK(output_sanitizer_->AddInput(crossfade_buffer));
494   DCHECK_EQ(crossfade_buffer->frame_count(), output_bus->frames());
495 
496   if (remainder.get()) {
497     // Trim off consumed frames.
498     AccurateTrimStart(frames_to_trim, remainder, output_ts_helper);
499     CHECK(output_sanitizer_->AddInput(remainder));
500   }
501 
502   // Transfer all remaining buffers out and reset once empty.
503   CHECK(post_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
504   post_splice_sanitizer_->Reset();
505 }
506 
507 }  // namespace media
508