• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_coding/codecs/cng/audio_encoder_cng.h"
12 
13 #include <cstdint>
14 #include <memory>
15 #include <utility>
16 
17 #include "absl/types/optional.h"
18 #include "api/units/time_delta.h"
19 #include "modules/audio_coding/codecs/cng/webrtc_cng.h"
20 #include "rtc_base/checks.h"
21 
22 namespace webrtc {
23 
24 namespace {
25 
26 const int kMaxFrameSizeMs = 60;
27 
28 class AudioEncoderCng final : public AudioEncoder {
29  public:
30   explicit AudioEncoderCng(AudioEncoderCngConfig&& config);
31   ~AudioEncoderCng() override;
32 
33   // Not copyable or moveable.
34   AudioEncoderCng(const AudioEncoderCng&) = delete;
35   AudioEncoderCng(AudioEncoderCng&&) = delete;
36   AudioEncoderCng& operator=(const AudioEncoderCng&) = delete;
37   AudioEncoderCng& operator=(AudioEncoderCng&&) = delete;
38 
39   int SampleRateHz() const override;
40   size_t NumChannels() const override;
41   int RtpTimestampRateHz() const override;
42   size_t Num10MsFramesInNextPacket() const override;
43   size_t Max10MsFramesInAPacket() const override;
44   int GetTargetBitrate() const override;
45   EncodedInfo EncodeImpl(uint32_t rtp_timestamp,
46                          rtc::ArrayView<const int16_t> audio,
47                          rtc::Buffer* encoded) override;
48   void Reset() override;
49   bool SetFec(bool enable) override;
50   bool SetDtx(bool enable) override;
51   bool SetApplication(Application application) override;
52   void SetMaxPlaybackRate(int frequency_hz) override;
53   rtc::ArrayView<std::unique_ptr<AudioEncoder>> ReclaimContainedEncoders()
54       override;
55   void OnReceivedUplinkPacketLossFraction(
56       float uplink_packet_loss_fraction) override;
57   void OnReceivedUplinkBandwidth(
58       int target_audio_bitrate_bps,
59       absl::optional<int64_t> bwe_period_ms) override;
60   absl::optional<std::pair<TimeDelta, TimeDelta>> GetFrameLengthRange()
61       const override;
62 
63  private:
64   EncodedInfo EncodePassive(size_t frames_to_encode, rtc::Buffer* encoded);
65   EncodedInfo EncodeActive(size_t frames_to_encode, rtc::Buffer* encoded);
66   size_t SamplesPer10msFrame() const;
67 
68   std::unique_ptr<AudioEncoder> speech_encoder_;
69   const int cng_payload_type_;
70   const int num_cng_coefficients_;
71   const int sid_frame_interval_ms_;
72   std::vector<int16_t> speech_buffer_;
73   std::vector<uint32_t> rtp_timestamps_;
74   bool last_frame_active_;
75   std::unique_ptr<Vad> vad_;
76   std::unique_ptr<ComfortNoiseEncoder> cng_encoder_;
77 };
78 
AudioEncoderCng(AudioEncoderCngConfig && config)79 AudioEncoderCng::AudioEncoderCng(AudioEncoderCngConfig&& config)
80     : speech_encoder_((static_cast<void>([&] {
81                          RTC_CHECK(config.IsOk()) << "Invalid configuration.";
82                        }()),
83                        std::move(config.speech_encoder))),
84       cng_payload_type_(config.payload_type),
85       num_cng_coefficients_(config.num_cng_coefficients),
86       sid_frame_interval_ms_(config.sid_frame_interval_ms),
87       last_frame_active_(true),
88       vad_(config.vad ? std::unique_ptr<Vad>(config.vad)
89                       : CreateVad(config.vad_mode)),
90       cng_encoder_(new ComfortNoiseEncoder(SampleRateHz(),
91                                            sid_frame_interval_ms_,
92                                            num_cng_coefficients_)) {}
93 
94 AudioEncoderCng::~AudioEncoderCng() = default;
95 
SampleRateHz() const96 int AudioEncoderCng::SampleRateHz() const {
97   return speech_encoder_->SampleRateHz();
98 }
99 
NumChannels() const100 size_t AudioEncoderCng::NumChannels() const {
101   return 1;
102 }
103 
RtpTimestampRateHz() const104 int AudioEncoderCng::RtpTimestampRateHz() const {
105   return speech_encoder_->RtpTimestampRateHz();
106 }
107 
Num10MsFramesInNextPacket() const108 size_t AudioEncoderCng::Num10MsFramesInNextPacket() const {
109   return speech_encoder_->Num10MsFramesInNextPacket();
110 }
111 
Max10MsFramesInAPacket() const112 size_t AudioEncoderCng::Max10MsFramesInAPacket() const {
113   return speech_encoder_->Max10MsFramesInAPacket();
114 }
115 
GetTargetBitrate() const116 int AudioEncoderCng::GetTargetBitrate() const {
117   return speech_encoder_->GetTargetBitrate();
118 }
119 
EncodeImpl(uint32_t rtp_timestamp,rtc::ArrayView<const int16_t> audio,rtc::Buffer * encoded)120 AudioEncoder::EncodedInfo AudioEncoderCng::EncodeImpl(
121     uint32_t rtp_timestamp,
122     rtc::ArrayView<const int16_t> audio,
123     rtc::Buffer* encoded) {
124   const size_t samples_per_10ms_frame = SamplesPer10msFrame();
125   RTC_CHECK_EQ(speech_buffer_.size(),
126                rtp_timestamps_.size() * samples_per_10ms_frame);
127   rtp_timestamps_.push_back(rtp_timestamp);
128   RTC_DCHECK_EQ(samples_per_10ms_frame, audio.size());
129   speech_buffer_.insert(speech_buffer_.end(), audio.cbegin(), audio.cend());
130   const size_t frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket();
131   if (rtp_timestamps_.size() < frames_to_encode) {
132     return EncodedInfo();
133   }
134   RTC_CHECK_LE(frames_to_encode * 10, kMaxFrameSizeMs)
135       << "Frame size cannot be larger than " << kMaxFrameSizeMs
136       << " ms when using VAD/CNG.";
137 
138   // Group several 10 ms blocks per VAD call. Call VAD once or twice using the
139   // following split sizes:
140   // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms;
141   // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms.
142   size_t blocks_in_first_vad_call =
143       (frames_to_encode > 3 ? 3 : frames_to_encode);
144   if (frames_to_encode == 4)
145     blocks_in_first_vad_call = 2;
146   RTC_CHECK_GE(frames_to_encode, blocks_in_first_vad_call);
147   const size_t blocks_in_second_vad_call =
148       frames_to_encode - blocks_in_first_vad_call;
149 
150   // Check if all of the buffer is passive speech. Start with checking the first
151   // block.
152   Vad::Activity activity = vad_->VoiceActivity(
153       &speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call,
154       SampleRateHz());
155   if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) {
156     // Only check the second block if the first was passive.
157     activity = vad_->VoiceActivity(
158         &speech_buffer_[samples_per_10ms_frame * blocks_in_first_vad_call],
159         samples_per_10ms_frame * blocks_in_second_vad_call, SampleRateHz());
160   }
161 
162   EncodedInfo info;
163   switch (activity) {
164     case Vad::kPassive: {
165       info = EncodePassive(frames_to_encode, encoded);
166       last_frame_active_ = false;
167       break;
168     }
169     case Vad::kActive: {
170       info = EncodeActive(frames_to_encode, encoded);
171       last_frame_active_ = true;
172       break;
173     }
174     case Vad::kError: {
175       FATAL();  // Fails only if fed invalid data.
176       break;
177     }
178   }
179 
180   speech_buffer_.erase(
181       speech_buffer_.begin(),
182       speech_buffer_.begin() + frames_to_encode * samples_per_10ms_frame);
183   rtp_timestamps_.erase(rtp_timestamps_.begin(),
184                         rtp_timestamps_.begin() + frames_to_encode);
185   return info;
186 }
187 
Reset()188 void AudioEncoderCng::Reset() {
189   speech_encoder_->Reset();
190   speech_buffer_.clear();
191   rtp_timestamps_.clear();
192   last_frame_active_ = true;
193   vad_->Reset();
194   cng_encoder_.reset(new ComfortNoiseEncoder(
195       SampleRateHz(), sid_frame_interval_ms_, num_cng_coefficients_));
196 }
197 
SetFec(bool enable)198 bool AudioEncoderCng::SetFec(bool enable) {
199   return speech_encoder_->SetFec(enable);
200 }
201 
SetDtx(bool enable)202 bool AudioEncoderCng::SetDtx(bool enable) {
203   return speech_encoder_->SetDtx(enable);
204 }
205 
SetApplication(Application application)206 bool AudioEncoderCng::SetApplication(Application application) {
207   return speech_encoder_->SetApplication(application);
208 }
209 
SetMaxPlaybackRate(int frequency_hz)210 void AudioEncoderCng::SetMaxPlaybackRate(int frequency_hz) {
211   speech_encoder_->SetMaxPlaybackRate(frequency_hz);
212 }
213 
214 rtc::ArrayView<std::unique_ptr<AudioEncoder>>
ReclaimContainedEncoders()215 AudioEncoderCng::ReclaimContainedEncoders() {
216   return rtc::ArrayView<std::unique_ptr<AudioEncoder>>(&speech_encoder_, 1);
217 }
218 
OnReceivedUplinkPacketLossFraction(float uplink_packet_loss_fraction)219 void AudioEncoderCng::OnReceivedUplinkPacketLossFraction(
220     float uplink_packet_loss_fraction) {
221   speech_encoder_->OnReceivedUplinkPacketLossFraction(
222       uplink_packet_loss_fraction);
223 }
224 
OnReceivedUplinkBandwidth(int target_audio_bitrate_bps,absl::optional<int64_t> bwe_period_ms)225 void AudioEncoderCng::OnReceivedUplinkBandwidth(
226     int target_audio_bitrate_bps,
227     absl::optional<int64_t> bwe_period_ms) {
228   speech_encoder_->OnReceivedUplinkBandwidth(target_audio_bitrate_bps,
229                                              bwe_period_ms);
230 }
231 
232 absl::optional<std::pair<TimeDelta, TimeDelta>>
GetFrameLengthRange() const233 AudioEncoderCng::GetFrameLengthRange() const {
234   return speech_encoder_->GetFrameLengthRange();
235 }
236 
EncodePassive(size_t frames_to_encode,rtc::Buffer * encoded)237 AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive(
238     size_t frames_to_encode,
239     rtc::Buffer* encoded) {
240   bool force_sid = last_frame_active_;
241   bool output_produced = false;
242   const size_t samples_per_10ms_frame = SamplesPer10msFrame();
243   AudioEncoder::EncodedInfo info;
244 
245   for (size_t i = 0; i < frames_to_encode; ++i) {
246     // It's important not to pass &info.encoded_bytes directly to
247     // WebRtcCng_Encode(), since later loop iterations may return zero in
248     // that value, in which case we don't want to overwrite any value from
249     // an earlier iteration.
250     size_t encoded_bytes_tmp =
251         cng_encoder_->Encode(rtc::ArrayView<const int16_t>(
252                                  &speech_buffer_[i * samples_per_10ms_frame],
253                                  samples_per_10ms_frame),
254                              force_sid, encoded);
255 
256     if (encoded_bytes_tmp > 0) {
257       RTC_CHECK(!output_produced);
258       info.encoded_bytes = encoded_bytes_tmp;
259       output_produced = true;
260       force_sid = false;
261     }
262   }
263 
264   info.encoded_timestamp = rtp_timestamps_.front();
265   info.payload_type = cng_payload_type_;
266   info.send_even_if_empty = true;
267   info.speech = false;
268   return info;
269 }
270 
EncodeActive(size_t frames_to_encode,rtc::Buffer * encoded)271 AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive(size_t frames_to_encode,
272                                                         rtc::Buffer* encoded) {
273   const size_t samples_per_10ms_frame = SamplesPer10msFrame();
274   AudioEncoder::EncodedInfo info;
275   for (size_t i = 0; i < frames_to_encode; ++i) {
276     info =
277         speech_encoder_->Encode(rtp_timestamps_.front(),
278                                 rtc::ArrayView<const int16_t>(
279                                     &speech_buffer_[i * samples_per_10ms_frame],
280                                     samples_per_10ms_frame),
281                                 encoded);
282     if (i + 1 == frames_to_encode) {
283       RTC_CHECK_GT(info.encoded_bytes, 0) << "Encoder didn't deliver data.";
284     } else {
285       RTC_CHECK_EQ(info.encoded_bytes, 0)
286           << "Encoder delivered data too early.";
287     }
288   }
289   return info;
290 }
291 
SamplesPer10msFrame() const292 size_t AudioEncoderCng::SamplesPer10msFrame() const {
293   return rtc::CheckedDivExact(10 * SampleRateHz(), 1000);
294 }
295 
296 }  // namespace
297 
298 AudioEncoderCngConfig::AudioEncoderCngConfig() = default;
299 AudioEncoderCngConfig::AudioEncoderCngConfig(AudioEncoderCngConfig&&) = default;
300 AudioEncoderCngConfig::~AudioEncoderCngConfig() = default;
301 
IsOk() const302 bool AudioEncoderCngConfig::IsOk() const {
303   if (num_channels != 1)
304     return false;
305   if (!speech_encoder)
306     return false;
307   if (num_channels != speech_encoder->NumChannels())
308     return false;
309   if (sid_frame_interval_ms <
310       static_cast<int>(speech_encoder->Max10MsFramesInAPacket() * 10))
311     return false;
312   if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER ||
313       num_cng_coefficients <= 0)
314     return false;
315   return true;
316 }
317 
CreateComfortNoiseEncoder(AudioEncoderCngConfig && config)318 std::unique_ptr<AudioEncoder> CreateComfortNoiseEncoder(
319     AudioEncoderCngConfig&& config) {
320   return std::make_unique<AudioEncoderCng>(std::move(config));
321 }
322 
323 }  // namespace webrtc
324