1 /*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_coding/neteq/decision_logic.h"
12
13 #include <assert.h>
14 #include <stdio.h>
15
16 #include <string>
17
18 #include "absl/types/optional.h"
19 #include "modules/audio_coding/neteq/packet_buffer.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/experiments/field_trial_parser.h"
22 #include "rtc_base/logging.h"
23 #include "rtc_base/numerics/safe_conversions.h"
24 #include "system_wrappers/include/field_trial.h"
25
26 namespace {
27
28 constexpr int kPostponeDecodingLevel = 50;
29 constexpr int kDefaultTargetLevelWindowMs = 100;
30
31 } // namespace
32
33 namespace webrtc {
34
DecisionLogic(NetEqController::Config config)35 DecisionLogic::DecisionLogic(NetEqController::Config config)
36 : delay_manager_(DelayManager::Create(config.max_packets_in_buffer,
37 config.base_min_delay_ms,
38 config.enable_rtx_handling,
39 config.tick_timer)),
40 tick_timer_(config.tick_timer),
41 disallow_time_stretching_(!config.allow_time_stretching),
42 timescale_countdown_(
43 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
44 estimate_dtx_delay_("estimate_dtx_delay", false),
45 time_stretch_cn_("time_stretch_cn", false),
46 target_level_window_ms_("target_level_window",
47 kDefaultTargetLevelWindowMs,
48 0,
49 absl::nullopt) {
50 const std::string field_trial_name =
51 field_trial::FindFullName("WebRTC-Audio-NetEqDecisionLogicSettings");
52 ParseFieldTrial(
53 {&estimate_dtx_delay_, &time_stretch_cn_, &target_level_window_ms_},
54 field_trial_name);
55 RTC_LOG(LS_INFO) << "NetEq decision logic settings:"
56 " estimate_dtx_delay="
57 << estimate_dtx_delay_
58 << " time_stretch_cn=" << time_stretch_cn_
59 << " target_level_window_ms=" << target_level_window_ms_;
60 }
61
62 DecisionLogic::~DecisionLogic() = default;
63
Reset()64 void DecisionLogic::Reset() {
65 cng_state_ = kCngOff;
66 noise_fast_forward_ = 0;
67 packet_length_samples_ = 0;
68 sample_memory_ = 0;
69 prev_time_scale_ = false;
70 timescale_countdown_.reset();
71 num_consecutive_expands_ = 0;
72 time_stretched_cn_samples_ = 0;
73 }
74
SoftReset()75 void DecisionLogic::SoftReset() {
76 packet_length_samples_ = 0;
77 sample_memory_ = 0;
78 prev_time_scale_ = false;
79 timescale_countdown_ =
80 tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
81 time_stretched_cn_samples_ = 0;
82 delay_manager_->Reset();
83 buffer_level_filter_.Reset();
84 }
85
SetSampleRate(int fs_hz,size_t output_size_samples)86 void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
87 // TODO(hlundin): Change to an enumerator and skip assert.
88 assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
89 sample_rate_ = fs_hz;
90 output_size_samples_ = output_size_samples;
91 }
92
GetDecision(const NetEqStatus & status,bool * reset_decoder)93 NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status,
94 bool* reset_decoder) {
95 // If last mode was CNG (or Expand, since this could be covering up for
96 // a lost CNG packet), remember that CNG is on. This is needed if comfort
97 // noise is interrupted by DTMF.
98 if (status.last_mode == NetEq::Mode::kRfc3389Cng) {
99 cng_state_ = kCngRfc3389On;
100 } else if (status.last_mode == NetEq::Mode::kCodecInternalCng) {
101 cng_state_ = kCngInternalOn;
102 }
103
104 size_t cur_size_samples = estimate_dtx_delay_
105 ? status.packet_buffer_info.span_samples
106 : status.packet_buffer_info.num_samples;
107 prev_time_scale_ =
108 prev_time_scale_ &&
109 (status.last_mode == NetEq::Mode::kAccelerateSuccess ||
110 status.last_mode == NetEq::Mode::kAccelerateLowEnergy ||
111 status.last_mode == NetEq::Mode::kPreemptiveExpandSuccess ||
112 status.last_mode == NetEq::Mode::kPreemptiveExpandLowEnergy);
113
114 // Do not update buffer history if currently playing CNG since it will bias
115 // the filtered buffer level.
116 if (status.last_mode != NetEq::Mode::kRfc3389Cng &&
117 status.last_mode != NetEq::Mode::kCodecInternalCng &&
118 !(status.next_packet && status.next_packet->is_dtx &&
119 !estimate_dtx_delay_)) {
120 FilterBufferLevel(cur_size_samples);
121 }
122
123 // Guard for errors, to avoid getting stuck in error mode.
124 if (status.last_mode == NetEq::Mode::kError) {
125 if (!status.next_packet) {
126 return NetEq::Operation::kExpand;
127 } else {
128 // Use kUndefined to flag for a reset.
129 return NetEq::Operation::kUndefined;
130 }
131 }
132
133 if (status.next_packet && status.next_packet->is_cng) {
134 return CngOperation(status.last_mode, status.target_timestamp,
135 status.next_packet->timestamp,
136 status.generated_noise_samples);
137 }
138
139 // Handle the case with no packet at all available (except maybe DTMF).
140 if (!status.next_packet) {
141 return NoPacket(status.play_dtmf);
142 }
143
144 // If the expand period was very long, reset NetEQ since it is likely that the
145 // sender was restarted.
146 if (num_consecutive_expands_ > kReinitAfterExpands) {
147 *reset_decoder = true;
148 return NetEq::Operation::kNormal;
149 }
150
151 // Make sure we don't restart audio too soon after an expansion to avoid
152 // running out of data right away again. We should only wait if there are no
153 // DTX or CNG packets in the buffer (otherwise we should just play out what we
154 // have, since we cannot know the exact duration of DTX or CNG packets), and
155 // if the mute factor is low enough (otherwise the expansion was short enough
156 // to not be noticable).
157 // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
158 const size_t current_span =
159 estimate_dtx_delay_ ? status.packet_buffer_info.span_samples
160 : status.packet_buffer_info.span_samples_no_dtx;
161 if ((status.last_mode == NetEq::Mode::kExpand ||
162 status.last_mode == NetEq::Mode::kCodecPlc) &&
163 status.expand_mutefactor < 16384 / 2 &&
164 current_span<static_cast<size_t>(delay_manager_->TargetLevel() *
165 packet_length_samples_ *
166 kPostponeDecodingLevel / 100)>> 8 &&
167 !status.packet_buffer_info.dtx_or_cng) {
168 return NetEq::Operation::kExpand;
169 }
170
171 const uint32_t five_seconds_samples = static_cast<uint32_t>(5 * sample_rate_);
172 // Check if the required packet is available.
173 if (status.target_timestamp == status.next_packet->timestamp) {
174 return ExpectedPacketAvailable(status.last_mode, status.play_dtmf);
175 } else if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp,
176 status.target_timestamp,
177 five_seconds_samples)) {
178 return FuturePacketAvailable(
179 status.last_packet_samples, status.last_mode, status.target_timestamp,
180 status.next_packet->timestamp, status.play_dtmf,
181 status.generated_noise_samples, status.packet_buffer_info.span_samples,
182 status.packet_buffer_info.num_packets);
183 } else {
184 // This implies that available_timestamp < target_timestamp, which can
185 // happen when a new stream or codec is received. Signal for a reset.
186 return NetEq::Operation::kUndefined;
187 }
188 }
189
ExpandDecision(NetEq::Operation operation)190 void DecisionLogic::ExpandDecision(NetEq::Operation operation) {
191 if (operation == NetEq::Operation::kExpand) {
192 num_consecutive_expands_++;
193 } else {
194 num_consecutive_expands_ = 0;
195 }
196 }
197
PacketArrived(bool last_cng_or_dtmf,size_t packet_length_samples,bool should_update_stats,uint16_t main_sequence_number,uint32_t main_timestamp,int fs_hz)198 absl::optional<int> DecisionLogic::PacketArrived(bool last_cng_or_dtmf,
199 size_t packet_length_samples,
200 bool should_update_stats,
201 uint16_t main_sequence_number,
202 uint32_t main_timestamp,
203 int fs_hz) {
204 delay_manager_->LastDecodedWasCngOrDtmf(last_cng_or_dtmf);
205 absl::optional<int> relative_delay;
206 if (delay_manager_->last_pack_cng_or_dtmf() == 0) {
207 // Calculate the total speech length carried in each packet.
208 if (packet_length_samples > 0 &&
209 packet_length_samples != packet_length_samples_) {
210 packet_length_samples_ = packet_length_samples;
211 delay_manager_->SetPacketAudioLength(
212 rtc::dchecked_cast<int>((1000 * packet_length_samples) / fs_hz));
213 }
214
215 // Update statistics.
216 if (should_update_stats) {
217 relative_delay =
218 delay_manager_->Update(main_sequence_number, main_timestamp, fs_hz);
219 }
220 } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) {
221 // This is first "normal" packet after CNG or DTMF.
222 // Reset packet time counter and measure time until next packet,
223 // but don't update statistics.
224 delay_manager_->set_last_pack_cng_or_dtmf(0);
225 delay_manager_->ResetPacketIatCount();
226 }
227 return relative_delay;
228 }
229
FilterBufferLevel(size_t buffer_size_samples)230 void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) {
231 buffer_level_filter_.SetTargetBufferLevel(
232 delay_manager_->base_target_level());
233
234 int time_stretched_samples = time_stretched_cn_samples_;
235 if (prev_time_scale_) {
236 time_stretched_samples += sample_memory_;
237 timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval);
238 }
239
240 buffer_level_filter_.Update(buffer_size_samples, time_stretched_samples);
241 prev_time_scale_ = false;
242 time_stretched_cn_samples_ = 0;
243 }
244
CngOperation(NetEq::Mode prev_mode,uint32_t target_timestamp,uint32_t available_timestamp,size_t generated_noise_samples)245 NetEq::Operation DecisionLogic::CngOperation(NetEq::Mode prev_mode,
246 uint32_t target_timestamp,
247 uint32_t available_timestamp,
248 size_t generated_noise_samples) {
249 // Signed difference between target and available timestamp.
250 int32_t timestamp_diff = static_cast<int32_t>(
251 static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
252 available_timestamp);
253 int32_t optimal_level_samp = static_cast<int32_t>(
254 (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
255 const int64_t excess_waiting_time_samp =
256 -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
257
258 if (excess_waiting_time_samp > optimal_level_samp / 2) {
259 // The waiting time for this packet will be longer than 1.5
260 // times the wanted buffer delay. Apply fast-forward to cut the
261 // waiting time down to the optimal.
262 noise_fast_forward_ = rtc::saturated_cast<size_t>(noise_fast_forward_ +
263 excess_waiting_time_samp);
264 timestamp_diff =
265 rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
266 }
267
268 if (timestamp_diff < 0 && prev_mode == NetEq::Mode::kRfc3389Cng) {
269 // Not time to play this packet yet. Wait another round before using this
270 // packet. Keep on playing CNG from previous CNG parameters.
271 return NetEq::Operation::kRfc3389CngNoPacket;
272 } else {
273 // Otherwise, go for the CNG packet now.
274 noise_fast_forward_ = 0;
275 return NetEq::Operation::kRfc3389Cng;
276 }
277 }
278
NoPacket(bool play_dtmf)279 NetEq::Operation DecisionLogic::NoPacket(bool play_dtmf) {
280 if (cng_state_ == kCngRfc3389On) {
281 // Keep on playing comfort noise.
282 return NetEq::Operation::kRfc3389CngNoPacket;
283 } else if (cng_state_ == kCngInternalOn) {
284 // Keep on playing codec internal comfort noise.
285 return NetEq::Operation::kCodecInternalCng;
286 } else if (play_dtmf) {
287 return NetEq::Operation::kDtmf;
288 } else {
289 // Nothing to play, do expand.
290 return NetEq::Operation::kExpand;
291 }
292 }
293
ExpectedPacketAvailable(NetEq::Mode prev_mode,bool play_dtmf)294 NetEq::Operation DecisionLogic::ExpectedPacketAvailable(NetEq::Mode prev_mode,
295 bool play_dtmf) {
296 if (!disallow_time_stretching_ && prev_mode != NetEq::Mode::kExpand &&
297 !play_dtmf) {
298 // Check criterion for time-stretching. The values are in number of packets
299 // in Q8.
300 int low_limit, high_limit;
301 delay_manager_->BufferLimits(&low_limit, &high_limit);
302 int buffer_level_packets = 0;
303 if (packet_length_samples_ > 0) {
304 buffer_level_packets =
305 ((1 << 8) * buffer_level_filter_.filtered_current_level()) /
306 packet_length_samples_;
307 }
308 if (buffer_level_packets >= high_limit << 2)
309 return NetEq::Operation::kFastAccelerate;
310 if (TimescaleAllowed()) {
311 if (buffer_level_packets >= high_limit)
312 return NetEq::Operation::kAccelerate;
313 if (buffer_level_packets < low_limit)
314 return NetEq::Operation::kPreemptiveExpand;
315 }
316 }
317 return NetEq::Operation::kNormal;
318 }
319
FuturePacketAvailable(size_t decoder_frame_length,NetEq::Mode prev_mode,uint32_t target_timestamp,uint32_t available_timestamp,bool play_dtmf,size_t generated_noise_samples,size_t span_samples_in_packet_buffer,size_t num_packets_in_packet_buffer)320 NetEq::Operation DecisionLogic::FuturePacketAvailable(
321 size_t decoder_frame_length,
322 NetEq::Mode prev_mode,
323 uint32_t target_timestamp,
324 uint32_t available_timestamp,
325 bool play_dtmf,
326 size_t generated_noise_samples,
327 size_t span_samples_in_packet_buffer,
328 size_t num_packets_in_packet_buffer) {
329 // Required packet is not available, but a future packet is.
330 // Check if we should continue with an ongoing expand because the new packet
331 // is too far into the future.
332 uint32_t timestamp_leap = available_timestamp - target_timestamp;
333 if ((prev_mode == NetEq::Mode::kExpand ||
334 prev_mode == NetEq::Mode::kCodecPlc) &&
335 !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() &&
336 PacketTooEarly(timestamp_leap) && UnderTargetLevel()) {
337 if (play_dtmf) {
338 // Still have DTMF to play, so do not do expand.
339 return NetEq::Operation::kDtmf;
340 } else {
341 // Nothing to play.
342 return NetEq::Operation::kExpand;
343 }
344 }
345
346 if (prev_mode == NetEq::Mode::kCodecPlc) {
347 return NetEq::Operation::kNormal;
348 }
349
350 // If previous was comfort noise, then no merge is needed.
351 if (prev_mode == NetEq::Mode::kRfc3389Cng ||
352 prev_mode == NetEq::Mode::kCodecInternalCng) {
353 size_t cur_size_samples =
354 estimate_dtx_delay_
355 ? cur_size_samples = span_samples_in_packet_buffer
356 : num_packets_in_packet_buffer * decoder_frame_length;
357 // Target level is in number of packets in Q8.
358 const size_t target_level_samples =
359 (delay_manager_->TargetLevel() * packet_length_samples_) >> 8;
360 const bool generated_enough_noise =
361 static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
362 available_timestamp;
363
364 if (time_stretch_cn_) {
365 const size_t target_threshold_samples =
366 target_level_window_ms_ / 2 * (sample_rate_ / 1000);
367 const bool above_target_window =
368 cur_size_samples > target_level_samples + target_threshold_samples;
369 const bool below_target_window =
370 target_level_samples > target_threshold_samples &&
371 cur_size_samples < target_level_samples - target_threshold_samples;
372 // Keep the delay same as before CNG, but make sure that it is within the
373 // target window.
374 if ((generated_enough_noise && !below_target_window) ||
375 above_target_window) {
376 time_stretched_cn_samples_ = timestamp_leap - generated_noise_samples;
377 return NetEq::Operation::kNormal;
378 }
379 } else {
380 // Keep the same delay as before the CNG, but make sure that the number of
381 // samples in buffer is no higher than 4 times the optimal level.
382 if (generated_enough_noise ||
383 cur_size_samples > target_level_samples * 4) {
384 // Time to play this new packet.
385 return NetEq::Operation::kNormal;
386 }
387 }
388
389 // Too early to play this new packet; keep on playing comfort noise.
390 if (prev_mode == NetEq::Mode::kRfc3389Cng) {
391 return NetEq::Operation::kRfc3389CngNoPacket;
392 }
393 // prevPlayMode == kModeCodecInternalCng.
394 return NetEq::Operation::kCodecInternalCng;
395 }
396
397 // Do not merge unless we have done an expand before.
398 if (prev_mode == NetEq::Mode::kExpand) {
399 return NetEq::Operation::kMerge;
400 } else if (play_dtmf) {
401 // Play DTMF instead of expand.
402 return NetEq::Operation::kDtmf;
403 } else {
404 return NetEq::Operation::kExpand;
405 }
406 }
407
UnderTargetLevel() const408 bool DecisionLogic::UnderTargetLevel() const {
409 int buffer_level_packets = 0;
410 if (packet_length_samples_ > 0) {
411 buffer_level_packets =
412 ((1 << 8) * buffer_level_filter_.filtered_current_level()) /
413 packet_length_samples_;
414 }
415 return buffer_level_packets <= delay_manager_->TargetLevel();
416 }
417
ReinitAfterExpands(uint32_t timestamp_leap) const418 bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
419 return timestamp_leap >=
420 static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
421 }
422
PacketTooEarly(uint32_t timestamp_leap) const423 bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
424 return timestamp_leap >
425 static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
426 }
427
MaxWaitForPacket() const428 bool DecisionLogic::MaxWaitForPacket() const {
429 return num_consecutive_expands_ >= kMaxWaitForPacket;
430 }
431
432 } // namespace webrtc
433