1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ 12 #define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ 13 14 #include <map> 15 #include <memory> 16 #include <string> 17 #include <utility> 18 #include <vector> 19 20 #include "absl/types/optional.h" 21 #include "api/audio/audio_frame.h" 22 #include "api/neteq/neteq.h" 23 #include "api/neteq/neteq_controller.h" 24 #include "api/neteq/neteq_controller_factory.h" 25 #include "api/neteq/tick_timer.h" 26 #include "api/rtp_packet_info.h" 27 #include "modules/audio_coding/neteq/audio_multi_vector.h" 28 #include "modules/audio_coding/neteq/expand_uma_logger.h" 29 #include "modules/audio_coding/neteq/packet.h" 30 #include "modules/audio_coding/neteq/random_vector.h" 31 #include "modules/audio_coding/neteq/statistics_calculator.h" 32 #include "rtc_base/constructor_magic.h" 33 #include "rtc_base/synchronization/mutex.h" 34 #include "rtc_base/thread_annotations.h" 35 36 namespace webrtc { 37 38 // Forward declarations. 39 class Accelerate; 40 class BackgroundNoise; 41 class Clock; 42 class ComfortNoise; 43 class DecoderDatabase; 44 class DtmfBuffer; 45 class DtmfToneGenerator; 46 class Expand; 47 class Merge; 48 class NackTracker; 49 class Normal; 50 class PacketBuffer; 51 class RedPayloadSplitter; 52 class PostDecodeVad; 53 class PreemptiveExpand; 54 class RandomVector; 55 class SyncBuffer; 56 class TimestampScaler; 57 struct AccelerateFactory; 58 struct DtmfEvent; 59 struct ExpandFactory; 60 struct PreemptiveExpandFactory; 61 62 class NetEqImpl : public webrtc::NetEq { 63 public: 64 enum class OutputType { 65 kNormalSpeech, 66 kPLC, 67 kCNG, 68 kPLCCNG, 69 kVadPassive, 70 kCodecPLC 71 }; 72 73 enum ErrorCodes { 74 kNoError = 0, 75 kOtherError, 76 kUnknownRtpPayloadType, 77 kDecoderNotFound, 78 kInvalidPointer, 79 kAccelerateError, 80 kPreemptiveExpandError, 81 kComfortNoiseErrorCode, 82 kDecoderErrorCode, 83 kOtherDecoderError, 84 kInvalidOperation, 85 kDtmfParsingError, 86 kDtmfInsertError, 87 kSampleUnderrun, 88 kDecodedTooMuch, 89 kRedundancySplitError, 90 kPacketBufferCorruption 91 }; 92 93 struct Dependencies { 94 // The constructor populates the Dependencies struct with the default 95 // implementations of the objects. They can all be replaced by the user 96 // before sending the struct to the NetEqImpl constructor. However, there 97 // are dependencies between some of the classes inside the struct, so 98 // swapping out one may make it necessary to re-create another one. 99 Dependencies(const NetEq::Config& config, 100 Clock* clock, 101 const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory, 102 const NetEqControllerFactory& controller_factory); 103 ~Dependencies(); 104 105 Clock* const clock; 106 std::unique_ptr<TickTimer> tick_timer; 107 std::unique_ptr<StatisticsCalculator> stats; 108 std::unique_ptr<DecoderDatabase> decoder_database; 109 std::unique_ptr<DtmfBuffer> dtmf_buffer; 110 std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator; 111 std::unique_ptr<PacketBuffer> packet_buffer; 112 std::unique_ptr<NetEqController> neteq_controller; 113 std::unique_ptr<RedPayloadSplitter> red_payload_splitter; 114 std::unique_ptr<TimestampScaler> timestamp_scaler; 115 std::unique_ptr<AccelerateFactory> accelerate_factory; 116 std::unique_ptr<ExpandFactory> expand_factory; 117 std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory; 118 }; 119 120 // Creates a new NetEqImpl object. 121 NetEqImpl(const NetEq::Config& config, 122 Dependencies&& deps, 123 bool create_components = true); 124 125 ~NetEqImpl() override; 126 127 // Inserts a new packet into NetEq. Returns 0 on success, -1 on failure. 128 int InsertPacket(const RTPHeader& rtp_header, 129 rtc::ArrayView<const uint8_t> payload) override; 130 131 void InsertEmptyPacket(const RTPHeader& rtp_header) override; 132 133 int GetAudio( 134 AudioFrame* audio_frame, 135 bool* muted, 136 absl::optional<Operation> action_override = absl::nullopt) override; 137 138 void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override; 139 140 bool RegisterPayloadType(int rtp_payload_type, 141 const SdpAudioFormat& audio_format) override; 142 143 // Removes |rtp_payload_type| from the codec database. Returns 0 on success, 144 // -1 on failure. 145 int RemovePayloadType(uint8_t rtp_payload_type) override; 146 147 void RemoveAllPayloadTypes() override; 148 149 bool SetMinimumDelay(int delay_ms) override; 150 151 bool SetMaximumDelay(int delay_ms) override; 152 153 bool SetBaseMinimumDelayMs(int delay_ms) override; 154 155 int GetBaseMinimumDelayMs() const override; 156 157 int TargetDelayMs() const override; 158 159 int FilteredCurrentDelayMs() const override; 160 161 // Writes the current network statistics to |stats|. The statistics are reset 162 // after the call. 163 int NetworkStatistics(NetEqNetworkStatistics* stats) override; 164 165 NetEqLifetimeStatistics GetLifetimeStatistics() const override; 166 167 NetEqOperationsAndState GetOperationsAndState() const override; 168 169 // Enables post-decode VAD. When enabled, GetAudio() will return 170 // kOutputVADPassive when the signal contains no speech. 171 void EnableVad() override; 172 173 // Disables post-decode VAD. 174 void DisableVad() override; 175 176 absl::optional<uint32_t> GetPlayoutTimestamp() const override; 177 178 int last_output_sample_rate_hz() const override; 179 180 absl::optional<DecoderFormat> GetDecoderFormat( 181 int payload_type) const override; 182 183 // Flushes both the packet buffer and the sync buffer. 184 void FlushBuffers() override; 185 186 void EnableNack(size_t max_nack_list_size) override; 187 188 void DisableNack() override; 189 190 std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override; 191 192 std::vector<uint32_t> LastDecodedTimestamps() const override; 193 194 int SyncBufferSizeMs() const override; 195 196 // This accessor method is only intended for testing purposes. 197 const SyncBuffer* sync_buffer_for_test() const; 198 Operation last_operation_for_test() const; 199 200 protected: 201 static const int kOutputSizeMs = 10; 202 static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz. 203 // TODO(hlundin): Provide a better value for kSyncBufferSize. 204 // Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for 205 // calculating correlations of current frame against history. 206 static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48; 207 208 // Inserts a new packet into NetEq. This is used by the InsertPacket method 209 // above. Returns 0 on success, otherwise an error code. 210 // TODO(hlundin): Merge this with InsertPacket above? 211 int InsertPacketInternal(const RTPHeader& rtp_header, 212 rtc::ArrayView<const uint8_t> payload) 213 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 214 215 // Delivers 10 ms of audio data. The data is written to |audio_frame|. 216 // Returns 0 on success, otherwise an error code. 217 int GetAudioInternal(AudioFrame* audio_frame, 218 bool* muted, 219 absl::optional<Operation> action_override) 220 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 221 222 // Provides a decision to the GetAudioInternal method. The decision what to 223 // do is written to |operation|. Packets to decode are written to 224 // |packet_list|, and a DTMF event to play is written to |dtmf_event|. When 225 // DTMF should be played, |play_dtmf| is set to true by the method. 226 // Returns 0 on success, otherwise an error code. 227 int GetDecision(Operation* operation, 228 PacketList* packet_list, 229 DtmfEvent* dtmf_event, 230 bool* play_dtmf, 231 absl::optional<Operation> action_override) 232 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 233 234 // Decodes the speech packets in |packet_list|, and writes the results to 235 // |decoded_buffer|, which is allocated to hold |decoded_buffer_length| 236 // elements. The length of the decoded data is written to |decoded_length|. 237 // The speech type -- speech or (codec-internal) comfort noise -- is written 238 // to |speech_type|. If |packet_list| contains any SID frames for RFC 3389 239 // comfort noise, those are not decoded. 240 int Decode(PacketList* packet_list, 241 Operation* operation, 242 int* decoded_length, 243 AudioDecoder::SpeechType* speech_type) 244 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 245 246 // Sub-method to Decode(). Performs codec internal CNG. 247 int DecodeCng(AudioDecoder* decoder, 248 int* decoded_length, 249 AudioDecoder::SpeechType* speech_type) 250 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 251 252 // Sub-method to Decode(). Performs the actual decoding. 253 int DecodeLoop(PacketList* packet_list, 254 const Operation& operation, 255 AudioDecoder* decoder, 256 int* decoded_length, 257 AudioDecoder::SpeechType* speech_type) 258 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 259 260 // Sub-method which calls the Normal class to perform the normal operation. 261 void DoNormal(const int16_t* decoded_buffer, 262 size_t decoded_length, 263 AudioDecoder::SpeechType speech_type, 264 bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 265 266 // Sub-method which calls the Merge class to perform the merge operation. 267 void DoMerge(int16_t* decoded_buffer, 268 size_t decoded_length, 269 AudioDecoder::SpeechType speech_type, 270 bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 271 272 bool DoCodecPlc() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 273 274 // Sub-method which calls the Expand class to perform the expand operation. 275 int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 276 277 // Sub-method which calls the Accelerate class to perform the accelerate 278 // operation. 279 int DoAccelerate(int16_t* decoded_buffer, 280 size_t decoded_length, 281 AudioDecoder::SpeechType speech_type, 282 bool play_dtmf, 283 bool fast_accelerate) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 284 285 // Sub-method which calls the PreemptiveExpand class to perform the 286 // preemtive expand operation. 287 int DoPreemptiveExpand(int16_t* decoded_buffer, 288 size_t decoded_length, 289 AudioDecoder::SpeechType speech_type, 290 bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 291 292 // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort 293 // noise. |packet_list| can either contain one SID frame to update the 294 // noise parameters, or no payload at all, in which case the previously 295 // received parameters are used. 296 int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) 297 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 298 299 // Calls the audio decoder to generate codec-internal comfort noise when 300 // no packet was received. 301 void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length) 302 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 303 304 // Calls the DtmfToneGenerator class to generate DTMF tones. 305 int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) 306 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 307 308 // Overdub DTMF on top of |output|. 309 int DtmfOverdub(const DtmfEvent& dtmf_event, 310 size_t num_channels, 311 int16_t* output) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 312 313 // Extracts packets from |packet_buffer_| to produce at least 314 // |required_samples| samples. The packets are inserted into |packet_list|. 315 // Returns the number of samples that the packets in the list will produce, or 316 // -1 in case of an error. 317 int ExtractPackets(size_t required_samples, PacketList* packet_list) 318 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 319 320 // Resets various variables and objects to new values based on the sample rate 321 // |fs_hz| and |channels| number audio channels. 322 void SetSampleRateAndChannels(int fs_hz, size_t channels) 323 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 324 325 // Returns the output type for the audio produced by the latest call to 326 // GetAudio(). 327 OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 328 329 // Updates Expand and Merge. 330 virtual void UpdatePlcComponents(int fs_hz, size_t channels) 331 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 332 333 Clock* const clock_; 334 335 mutable Mutex mutex_; 336 const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(mutex_); 337 const std::unique_ptr<DecoderDatabase> decoder_database_ 338 RTC_GUARDED_BY(mutex_); 339 const std::unique_ptr<DtmfBuffer> dtmf_buffer_ RTC_GUARDED_BY(mutex_); 340 const std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator_ 341 RTC_GUARDED_BY(mutex_); 342 const std::unique_ptr<PacketBuffer> packet_buffer_ RTC_GUARDED_BY(mutex_); 343 const std::unique_ptr<RedPayloadSplitter> red_payload_splitter_ 344 RTC_GUARDED_BY(mutex_); 345 const std::unique_ptr<TimestampScaler> timestamp_scaler_ 346 RTC_GUARDED_BY(mutex_); 347 const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(mutex_); 348 const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_); 349 const std::unique_ptr<AccelerateFactory> accelerate_factory_ 350 RTC_GUARDED_BY(mutex_); 351 const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_ 352 RTC_GUARDED_BY(mutex_); 353 const std::unique_ptr<StatisticsCalculator> stats_ RTC_GUARDED_BY(mutex_); 354 355 std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(mutex_); 356 std::unique_ptr<NetEqController> controller_ RTC_GUARDED_BY(mutex_); 357 std::unique_ptr<AudioMultiVector> algorithm_buffer_ RTC_GUARDED_BY(mutex_); 358 std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(mutex_); 359 std::unique_ptr<Expand> expand_ RTC_GUARDED_BY(mutex_); 360 std::unique_ptr<Normal> normal_ RTC_GUARDED_BY(mutex_); 361 std::unique_ptr<Merge> merge_ RTC_GUARDED_BY(mutex_); 362 std::unique_ptr<Accelerate> accelerate_ RTC_GUARDED_BY(mutex_); 363 std::unique_ptr<PreemptiveExpand> preemptive_expand_ RTC_GUARDED_BY(mutex_); 364 RandomVector random_vector_ RTC_GUARDED_BY(mutex_); 365 std::unique_ptr<ComfortNoise> comfort_noise_ RTC_GUARDED_BY(mutex_); 366 int fs_hz_ RTC_GUARDED_BY(mutex_); 367 int fs_mult_ RTC_GUARDED_BY(mutex_); 368 int last_output_sample_rate_hz_ RTC_GUARDED_BY(mutex_); 369 size_t output_size_samples_ RTC_GUARDED_BY(mutex_); 370 size_t decoder_frame_length_ RTC_GUARDED_BY(mutex_); 371 Mode last_mode_ RTC_GUARDED_BY(mutex_); 372 Operation last_operation_ RTC_GUARDED_BY(mutex_); 373 size_t decoded_buffer_length_ RTC_GUARDED_BY(mutex_); 374 std::unique_ptr<int16_t[]> decoded_buffer_ RTC_GUARDED_BY(mutex_); 375 uint32_t playout_timestamp_ RTC_GUARDED_BY(mutex_); 376 bool new_codec_ RTC_GUARDED_BY(mutex_); 377 uint32_t timestamp_ RTC_GUARDED_BY(mutex_); 378 bool reset_decoder_ RTC_GUARDED_BY(mutex_); 379 absl::optional<uint8_t> current_rtp_payload_type_ RTC_GUARDED_BY(mutex_); 380 absl::optional<uint8_t> current_cng_rtp_payload_type_ RTC_GUARDED_BY(mutex_); 381 bool first_packet_ RTC_GUARDED_BY(mutex_); 382 bool enable_fast_accelerate_ RTC_GUARDED_BY(mutex_); 383 std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_); 384 bool nack_enabled_ RTC_GUARDED_BY(mutex_); 385 const bool enable_muted_state_ RTC_GUARDED_BY(mutex_); 386 AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) = 387 AudioFrame::kVadPassive; 388 std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_ 389 RTC_GUARDED_BY(mutex_); 390 std::vector<uint32_t> last_decoded_timestamps_ RTC_GUARDED_BY(mutex_); 391 std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_); 392 ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(mutex_); 393 ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(mutex_); 394 bool no_time_stretching_ RTC_GUARDED_BY(mutex_); // Only used for test. 395 rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(mutex_); 396 const bool enable_rtx_handling_ RTC_GUARDED_BY(mutex_); 397 // Data members used for adding extra delay to the output of NetEq. 398 // The delay in ms (which is 10 times the number of elements in 399 // output_delay_chain_). 400 const int output_delay_chain_ms_ RTC_GUARDED_BY(mutex_); 401 // Vector of AudioFrames which contains the delayed audio. Accessed as a 402 // circular buffer. 403 std::vector<AudioFrame> output_delay_chain_ RTC_GUARDED_BY(mutex_); 404 // Index into output_delay_chain_. 405 size_t output_delay_chain_ix_ RTC_GUARDED_BY(mutex_) = 0; 406 // Did output_delay_chain_ get populated yet? 407 bool output_delay_chain_empty_ RTC_GUARDED_BY(mutex_) = true; 408 // Contains the sample rate of the AudioFrame last emitted from the delay 409 // chain. If the extra output delay chain is not used, or if no audio has been 410 // emitted yet, the variable is empty. 411 absl::optional<int> delayed_last_output_sample_rate_hz_ 412 RTC_GUARDED_BY(mutex_); 413 414 private: 415 RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl); 416 }; 417 418 } // namespace webrtc 419 #endif // MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ 420