1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_ 12 #define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_ 13 14 #include <string.h> // Provide access to size_t. 15 16 #include <string> 17 18 #include "webrtc/base/constructormagic.h" 19 #include "webrtc/common_types.h" 20 #include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h" 21 #include "webrtc/typedefs.h" 22 23 namespace webrtc { 24 25 // Forward declarations. 26 struct WebRtcRTPHeader; 27 28 struct NetEqNetworkStatistics { 29 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms. 30 uint16_t preferred_buffer_size_ms; // Target buffer size in ms. 31 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky 32 // jitter; 0 otherwise. 33 uint16_t packet_loss_rate; // Loss rate (network + late) in Q14. 34 uint16_t packet_discard_rate; // Late loss rate in Q14. 35 uint16_t expand_rate; // Fraction (of original stream) of synthesized 36 // audio inserted through expansion (in Q14). 37 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized 38 // speech inserted through expansion (in Q14). 39 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive 40 // expansion (in Q14). 41 uint16_t accelerate_rate; // Fraction of data removed through acceleration 42 // (in Q14). 43 uint16_t secondary_decoded_rate; // Fraction of data coming from secondary 44 // decoding (in Q14). 45 int32_t clockdrift_ppm; // Average clock-drift in parts-per-million 46 // (positive or negative). 47 size_t added_zero_samples; // Number of zero samples added in "off" mode. 48 // Statistics for packet waiting times, i.e., the time between a packet 49 // arrives until it is decoded. 50 int mean_waiting_time_ms; 51 int median_waiting_time_ms; 52 int min_waiting_time_ms; 53 int max_waiting_time_ms; 54 }; 55 56 enum NetEqOutputType { 57 kOutputNormal, 58 kOutputPLC, 59 kOutputCNG, 60 kOutputPLCtoCNG, 61 kOutputVADPassive 62 }; 63 64 enum NetEqPlayoutMode { 65 kPlayoutOn, 66 kPlayoutOff, 67 kPlayoutFax, 68 kPlayoutStreaming 69 }; 70 71 // This is the interface class for NetEq. 72 class NetEq { 73 public: 74 enum BackgroundNoiseMode { 75 kBgnOn, // Default behavior with eternal noise. 76 kBgnFade, // Noise fades to zero after some time. 77 kBgnOff // Background noise is always zero. 78 }; 79 80 struct Config { ConfigConfig81 Config() 82 : sample_rate_hz(16000), 83 enable_audio_classifier(false), 84 enable_post_decode_vad(false), 85 max_packets_in_buffer(50), 86 // |max_delay_ms| has the same effect as calling SetMaximumDelay(). 87 max_delay_ms(2000), 88 background_noise_mode(kBgnOff), 89 playout_mode(kPlayoutOn), 90 enable_fast_accelerate(false) {} 91 92 std::string ToString() const; 93 94 int sample_rate_hz; // Initial value. Will change with input data. 95 bool enable_audio_classifier; 96 bool enable_post_decode_vad; 97 size_t max_packets_in_buffer; 98 int max_delay_ms; 99 BackgroundNoiseMode background_noise_mode; 100 NetEqPlayoutMode playout_mode; 101 bool enable_fast_accelerate; 102 }; 103 104 enum ReturnCodes { 105 kOK = 0, 106 kFail = -1, 107 kNotImplemented = -2 108 }; 109 110 enum ErrorCodes { 111 kNoError = 0, 112 kOtherError, 113 kInvalidRtpPayloadType, 114 kUnknownRtpPayloadType, 115 kCodecNotSupported, 116 kDecoderExists, 117 kDecoderNotFound, 118 kInvalidSampleRate, 119 kInvalidPointer, 120 kAccelerateError, 121 kPreemptiveExpandError, 122 kComfortNoiseErrorCode, 123 kDecoderErrorCode, 124 kOtherDecoderError, 125 kInvalidOperation, 126 kDtmfParameterError, 127 kDtmfParsingError, 128 kDtmfInsertError, 129 kStereoNotSupported, 130 kSampleUnderrun, 131 kDecodedTooMuch, 132 kFrameSplitError, 133 kRedundancySplitError, 134 kPacketBufferCorruption, 135 kSyncPacketNotAccepted 136 }; 137 138 // Creates a new NetEq object, with parameters set in |config|. The |config| 139 // object will only have to be valid for the duration of the call to this 140 // method. 141 static NetEq* Create(const NetEq::Config& config); 142 ~NetEq()143 virtual ~NetEq() {} 144 145 // Inserts a new packet into NetEq. The |receive_timestamp| is an indication 146 // of the time when the packet was received, and should be measured with 147 // the same tick rate as the RTP timestamp of the current payload. 148 // Returns 0 on success, -1 on failure. 149 virtual int InsertPacket(const WebRtcRTPHeader& rtp_header, 150 rtc::ArrayView<const uint8_t> payload, 151 uint32_t receive_timestamp) = 0; 152 153 // Inserts a sync-packet into packet queue. Sync-packets are decoded to 154 // silence and are intended to keep AV-sync intact in an event of long packet 155 // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq 156 // might insert sync-packet when they observe that buffer level of NetEq is 157 // decreasing below a certain threshold, defined by the application. 158 // Sync-packets should have the same payload type as the last audio payload 159 // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change 160 // can be implied by inserting a sync-packet. 161 // Returns kOk on success, kFail on failure. 162 virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header, 163 uint32_t receive_timestamp) = 0; 164 165 // Instructs NetEq to deliver 10 ms of audio data. The data is written to 166 // |output_audio|, which can hold (at least) |max_length| elements. 167 // The number of channels that were written to the output is provided in 168 // the output variable |num_channels|, and each channel contains 169 // |samples_per_channel| elements. If more than one channel is written, 170 // the samples are interleaved. 171 // The speech type is written to |type|, if |type| is not NULL. 172 // Returns kOK on success, or kFail in case of an error. 173 virtual int GetAudio(size_t max_length, int16_t* output_audio, 174 size_t* samples_per_channel, size_t* num_channels, 175 NetEqOutputType* type) = 0; 176 177 // Associates |rtp_payload_type| with |codec| and |codec_name|, and stores the 178 // information in the codec database. Returns 0 on success, -1 on failure. 179 // The name is only used to provide information back to the caller about the 180 // decoders. Hence, the name is arbitrary, and may be empty. 181 virtual int RegisterPayloadType(NetEqDecoder codec, 182 const std::string& codec_name, 183 uint8_t rtp_payload_type) = 0; 184 185 // Provides an externally created decoder object |decoder| to insert in the 186 // decoder database. The decoder implements a decoder of type |codec| and 187 // associates it with |rtp_payload_type| and |codec_name|. The decoder will 188 // produce samples at the rate |sample_rate_hz|. Returns kOK on success, kFail 189 // on failure. 190 // The name is only used to provide information back to the caller about the 191 // decoders. Hence, the name is arbitrary, and may be empty. 192 virtual int RegisterExternalDecoder(AudioDecoder* decoder, 193 NetEqDecoder codec, 194 const std::string& codec_name, 195 uint8_t rtp_payload_type, 196 int sample_rate_hz) = 0; 197 198 // Removes |rtp_payload_type| from the codec database. Returns 0 on success, 199 // -1 on failure. 200 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0; 201 202 // Sets a minimum delay in millisecond for packet buffer. The minimum is 203 // maintained unless a higher latency is dictated by channel condition. 204 // Returns true if the minimum is successfully applied, otherwise false is 205 // returned. 206 virtual bool SetMinimumDelay(int delay_ms) = 0; 207 208 // Sets a maximum delay in milliseconds for packet buffer. The latency will 209 // not exceed the given value, even required delay (given the channel 210 // conditions) is higher. Calling this method has the same effect as setting 211 // the |max_delay_ms| value in the NetEq::Config struct. 212 virtual bool SetMaximumDelay(int delay_ms) = 0; 213 214 // The smallest latency required. This is computed bases on inter-arrival 215 // time and internal NetEq logic. Note that in computing this latency none of 216 // the user defined limits (applied by calling setMinimumDelay() and/or 217 // SetMaximumDelay()) are applied. 218 virtual int LeastRequiredDelayMs() const = 0; 219 220 // Not implemented. 221 virtual int SetTargetDelay() = 0; 222 223 // Not implemented. 224 virtual int TargetDelay() = 0; 225 226 // Returns the current total delay (packet buffer and sync buffer) in ms. 227 virtual int CurrentDelayMs() const = 0; 228 229 // Sets the playout mode to |mode|. 230 // Deprecated. Set the mode in the Config struct passed to the constructor. 231 // TODO(henrik.lundin) Delete. 232 virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0; 233 234 // Returns the current playout mode. 235 // Deprecated. 236 // TODO(henrik.lundin) Delete. 237 virtual NetEqPlayoutMode PlayoutMode() const = 0; 238 239 // Writes the current network statistics to |stats|. The statistics are reset 240 // after the call. 241 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0; 242 243 // Writes the current RTCP statistics to |stats|. The statistics are reset 244 // and a new report period is started with the call. 245 virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0; 246 247 // Same as RtcpStatistics(), but does not reset anything. 248 virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0; 249 250 // Enables post-decode VAD. When enabled, GetAudio() will return 251 // kOutputVADPassive when the signal contains no speech. 252 virtual void EnableVad() = 0; 253 254 // Disables post-decode VAD. 255 virtual void DisableVad() = 0; 256 257 // Gets the RTP timestamp for the last sample delivered by GetAudio(). 258 // Returns true if the RTP timestamp is valid, otherwise false. 259 virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0; 260 261 // Returns the sample rate in Hz of the audio produced in the last GetAudio 262 // call. If GetAudio has not been called yet, the configured sample rate 263 // (Config::sample_rate_hz) is returned. 264 virtual int last_output_sample_rate_hz() const = 0; 265 266 // Not implemented. 267 virtual int SetTargetNumberOfChannels() = 0; 268 269 // Not implemented. 270 virtual int SetTargetSampleRate() = 0; 271 272 // Returns the error code for the last occurred error. If no error has 273 // occurred, 0 is returned. 274 virtual int LastError() const = 0; 275 276 // Returns the error code last returned by a decoder (audio or comfort noise). 277 // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check 278 // this method to get the decoder's error code. 279 virtual int LastDecoderError() = 0; 280 281 // Flushes both the packet buffer and the sync buffer. 282 virtual void FlushBuffers() = 0; 283 284 // Current usage of packet-buffer and it's limits. 285 virtual void PacketBufferStatistics(int* current_num_packets, 286 int* max_num_packets) const = 0; 287 288 // Enables NACK and sets the maximum size of the NACK list, which should be 289 // positive and no larger than Nack::kNackListSizeLimit. If NACK is already 290 // enabled then the maximum NACK list size is modified accordingly. 291 virtual void EnableNack(size_t max_nack_list_size) = 0; 292 293 virtual void DisableNack() = 0; 294 295 // Returns a list of RTP sequence numbers corresponding to packets to be 296 // retransmitted, given an estimate of the round-trip time in milliseconds. 297 virtual std::vector<uint16_t> GetNackList( 298 int64_t round_trip_time_ms) const = 0; 299 300 protected: NetEq()301 NetEq() {} 302 303 private: 304 RTC_DISALLOW_COPY_AND_ASSIGN(NetEq); 305 }; 306 307 } // namespace webrtc 308 #endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_ 309