1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_GENERIC_CODEC_H_ 12 #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_GENERIC_CODEC_H_ 13 14 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h" 15 #include "webrtc/modules/audio_coding/main/acm2/acm_common_defs.h" 16 #include "webrtc/modules/audio_coding/neteq/interface/neteq.h" 17 #include "webrtc/modules/audio_coding/neteq/interface/audio_decoder.h" 18 #include "webrtc/system_wrappers/interface/rw_lock_wrapper.h" 19 #include "webrtc/system_wrappers/interface/thread_annotations.h" 20 #include "webrtc/system_wrappers/interface/trace.h" 21 22 #define MAX_FRAME_SIZE_10MSEC 6 23 24 // forward declaration 25 struct WebRtcVadInst; 26 struct WebRtcCngEncInst; 27 28 namespace webrtc { 29 30 struct WebRtcACMCodecParams; 31 struct CodecInst; 32 33 namespace acm2 { 34 35 // forward declaration 36 class AcmReceiver; 37 38 class ACMGenericCodec { 39 public: 40 /////////////////////////////////////////////////////////////////////////// 41 // Constructor of the class 42 // 43 ACMGenericCodec(); 44 45 /////////////////////////////////////////////////////////////////////////// 46 // Destructor of the class. 47 // 48 virtual ~ACMGenericCodec(); 49 50 /////////////////////////////////////////////////////////////////////////// 51 // ACMGenericCodec* CreateInstance(); 52 // The function will be used for FEC. It is not implemented yet. 53 // 54 virtual ACMGenericCodec* CreateInstance() = 0; 55 56 /////////////////////////////////////////////////////////////////////////// 57 // int16_t Encode() 58 // The function is called to perform an encoding of the audio stored in 59 // audio buffer. An encoding is performed only if enough audio, i.e. equal 60 // to the frame-size of the codec, exist. The audio frame will be processed 61 // by VAD and CN/DTX if required. There are few different cases. 62 // 63 // A) Neither VAD nor DTX is active; the frame is encoded by the encoder. 64 // 65 // B) VAD is enabled but not DTX; in this case the audio is processed by VAD 66 // and encoded by the encoder. The "*encoding_type" will be either 67 // "kActiveNormalEncode" or "kPassiveNormalEncode" if frame is active or 68 // passive, respectively. 69 // 70 // C) DTX is enabled; if the codec has internal VAD/DTX we just encode the 71 // frame by the encoder. Otherwise, the frame is passed through VAD and 72 // if identified as passive, then it will be processed by CN/DTX. If the 73 // frame is active it will be encoded by the encoder. 74 // 75 // This function acquires the appropriate locks and calls EncodeSafe() for 76 // the actual processing. 77 // 78 // Outputs: 79 // -bitstream : a buffer where bit-stream will be written to. 80 // -bitstream_len_byte : contains the length of the bit-stream in 81 // bytes. 82 // -timestamp : contains the RTP timestamp, this is the 83 // sampling time of the first sample encoded 84 // (measured in number of samples). 85 // -encoding_type : contains the type of encoding applied on the 86 // audio samples. The alternatives are 87 // (c.f. acm_common_types.h) 88 // -kNoEncoding: 89 // there was not enough data to encode. or 90 // some error has happened that we could 91 // not do encoding. 92 // -kActiveNormalEncoded: 93 // the audio frame is active and encoded by 94 // the given codec. 95 // -kPassiveNormalEncoded: 96 // the audio frame is passive but coded with 97 // the given codec (NO DTX). 98 // -kPassiveDTXWB: 99 // The audio frame is passive and used 100 // wide-band CN to encode. 101 // -kPassiveDTXNB: 102 // The audio frame is passive and used 103 // narrow-band CN to encode. 104 // 105 // Return value: 106 // -1 if error is occurred, otherwise the length of the bit-stream in 107 // bytes. 108 // 109 int16_t Encode(uint8_t* bitstream, 110 int16_t* bitstream_len_byte, 111 uint32_t* timestamp, 112 WebRtcACMEncodingType* encoding_type); 113 114 /////////////////////////////////////////////////////////////////////////// 115 // bool EncoderInitialized(); 116 // 117 // Return value: 118 // True if the encoder is successfully initialized, 119 // false otherwise. 120 // 121 bool EncoderInitialized(); 122 123 /////////////////////////////////////////////////////////////////////////// 124 // int16_t EncoderParams() 125 // It is called to get encoder parameters. It will call 126 // EncoderParamsSafe() in turn. 127 // 128 // Output: 129 // -enc_params : a buffer where the encoder parameters is 130 // written to. If the encoder is not 131 // initialized this buffer is filled with 132 // invalid values 133 // Return value: 134 // -1 if the encoder is not initialized, 135 // 0 otherwise. 136 // 137 int16_t EncoderParams(WebRtcACMCodecParams* enc_params); 138 139 /////////////////////////////////////////////////////////////////////////// 140 // int16_t InitEncoder(...) 141 // This function is called to initialize the encoder with the given 142 // parameters. 143 // 144 // Input: 145 // -codec_params : parameters of encoder. 146 // -force_initialization: if false the initialization is invoked only if 147 // the encoder is not initialized. If true the 148 // encoder is forced to (re)initialize. 149 // 150 // Return value: 151 // 0 if could initialize successfully, 152 // -1 if failed to initialize. 153 // 154 // 155 int16_t InitEncoder(WebRtcACMCodecParams* codec_params, 156 bool force_initialization); 157 158 /////////////////////////////////////////////////////////////////////////// 159 // int32_t Add10MsData(...) 160 // This function is called to add 10 ms of audio to the audio buffer of 161 // the codec. 162 // 163 // Inputs: 164 // -timestamp : the timestamp of the 10 ms audio. the timestamp 165 // is the sampling time of the 166 // first sample measured in number of samples. 167 // -data : a buffer that contains the audio. The codec 168 // expects to get the audio in correct sampling 169 // frequency 170 // -length : the length of the audio buffer 171 // -audio_channel : 0 for mono, 1 for stereo (not supported yet) 172 // 173 // Return values: 174 // -1 if failed 175 // 0 otherwise. 176 // 177 int32_t Add10MsData(const uint32_t timestamp, 178 const int16_t* data, 179 const uint16_t length, 180 const uint8_t audio_channel); 181 182 /////////////////////////////////////////////////////////////////////////// 183 // uint32_t NoMissedSamples() 184 // This function returns the number of samples which are overwritten in 185 // the audio buffer. The audio samples are overwritten if the input audio 186 // buffer is full, but Add10MsData() is called. (We might remove this 187 // function if it is not used) 188 // 189 // Return Value: 190 // Number of samples which are overwritten. 191 // 192 uint32_t NoMissedSamples() const; 193 194 /////////////////////////////////////////////////////////////////////////// 195 // void ResetNoMissedSamples() 196 // This function resets the number of overwritten samples to zero. 197 // (We might remove this function if we remove NoMissedSamples()) 198 // 199 void ResetNoMissedSamples(); 200 201 /////////////////////////////////////////////////////////////////////////// 202 // int16_t SetBitRate() 203 // The function is called to set the encoding rate. 204 // 205 // Input: 206 // -bitrate_bps : encoding rate in bits per second 207 // 208 // Return value: 209 // -1 if failed to set the rate, due to invalid input or given 210 // codec is not rate-adjustable. 211 // 0 if the rate is adjusted successfully 212 // 213 int16_t SetBitRate(const int32_t bitrate_bps); 214 215 /////////////////////////////////////////////////////////////////////////// 216 // DestructEncoderInst() 217 // This API is used in conferencing. It will free the memory that is pointed 218 // by |ptr_inst|. |ptr_inst| is a pointer to encoder instance, created and 219 // filled up by calling EncoderInst(...). 220 // 221 // Inputs: 222 // -ptr_inst : pointer to an encoder instance to be deleted. 223 // 224 // 225 void DestructEncoderInst(void* ptr_inst); 226 227 /////////////////////////////////////////////////////////////////////////// 228 // uint32_t EarliestTimestamp() 229 // Returns the timestamp of the first 10 ms in audio buffer. This is used 230 // to identify if a synchronization of two encoders is required. 231 // 232 // Return value: 233 // timestamp of the first 10 ms audio in the audio buffer. 234 // 235 uint32_t EarliestTimestamp() const; 236 237 /////////////////////////////////////////////////////////////////////////// 238 // int16_t SetVAD() 239 // This is called to set VAD & DTX. If the codec has internal DTX, it will 240 // be used. If DTX is enabled and the codec does not have internal DTX, 241 // WebRtc-VAD will be used to decide if the frame is active. If DTX is 242 // disabled but VAD is enabled, the audio is passed through VAD to label it 243 // as active or passive, but the frame is encoded normally. However the 244 // bit-stream is labeled properly so that ACM::Process() can use this 245 // information. In case of failure, the previous states of the VAD & DTX 246 // are kept. 247 // 248 // Inputs/Output: 249 // -enable_dtx : if true DTX will be enabled otherwise the DTX is 250 // disabled. If codec has internal DTX that will be 251 // used, otherwise WebRtc-CNG is used. In the latter 252 // case VAD is automatically activated. 253 // -enable_vad : if true WebRtc-VAD is enabled, otherwise VAD is 254 // disabled, except for the case that DTX is enabled 255 // but codec doesn't have internal DTX. In this case 256 // VAD is enabled regardless of the value of 257 // |enable_vad|. 258 // -mode : this specifies the aggressiveness of VAD. 259 // 260 // Return value 261 // -1 if failed to set DTX & VAD as specified, 262 // 0 if succeeded. 263 // 264 int16_t SetVAD(bool* enable_dtx, bool* enable_vad, ACMVADMode* mode); 265 266 /////////////////////////////////////////////////////////////////////////// 267 // int32_t ReplaceInternalDTX() 268 // This is called to replace the codec internal DTX with WebRtc DTX. 269 // This is only valid for G729 where the user has possibility to replace 270 // AnnexB with WebRtc DTX. For other codecs this function has no effect. 271 // 272 // Input: 273 // -replace_internal_dtx : if true the internal DTX is replaced with WebRtc. 274 // 275 // Return value 276 // -1 if failed to replace internal DTX, 277 // 0 if succeeded. 278 // 279 int32_t ReplaceInternalDTX(const bool replace_internal_dtx); 280 281 /////////////////////////////////////////////////////////////////////////// 282 // int32_t IsInternalDTXReplaced() 283 // This is called to check if the codec internal DTX is replaced by WebRtc 284 // DTX. This is only valid for G729 where the user has possibility to replace 285 // AnnexB with WebRtc DTX. For other codecs this function has no effect. 286 // 287 // Output: 288 // -internal_dtx_replaced: if true the internal DTX is replaced with WebRtc. 289 // 290 // Return value 291 // -1 if failed to check 292 // 0 if succeeded. 293 // 294 int32_t IsInternalDTXReplaced(bool* internal_dtx_replaced); 295 296 /////////////////////////////////////////////////////////////////////////// 297 // void SetNetEqDecodeLock() 298 // Passes the NetEq lock to the codec. 299 // 300 // Input: 301 // -neteq_decode_lock : pointer to the lock associated with NetEQ of ACM. 302 // SetNetEqDecodeLock(RWLockWrapper * neteq_decode_lock)303 void SetNetEqDecodeLock(RWLockWrapper* neteq_decode_lock) { 304 neteq_decode_lock_ = neteq_decode_lock; 305 } 306 307 /////////////////////////////////////////////////////////////////////////// 308 // bool HasInternalDTX() 309 // Used to check if the codec has internal DTX. 310 // 311 // Return value: 312 // true if the codec has an internal DTX, e.g. G729, 313 // false otherwise. 314 // HasInternalDTX()315 bool HasInternalDTX() const { return has_internal_dtx_; } 316 317 /////////////////////////////////////////////////////////////////////////// 318 // int32_t GetEstimatedBandwidth() 319 // Used to get decoder estimated bandwidth. Only iSAC will provide a value. 320 // 321 // 322 // Return value: 323 // -1 if fails to get decoder estimated bandwidth, 324 // >0 estimated bandwidth in bits/sec. 325 // 326 int32_t GetEstimatedBandwidth(); 327 328 /////////////////////////////////////////////////////////////////////////// 329 // int32_t SetEstimatedBandwidth() 330 // Used to set estiamted bandwidth sent out of band from other side. Only 331 // iSAC will have use for the value. 332 // 333 // Input: 334 // -estimated_bandwidth: estimated bandwidth in bits/sec 335 // 336 // Return value: 337 // -1 if fails to set estimated bandwidth, 338 // 0 on success. 339 // 340 int32_t SetEstimatedBandwidth(int32_t estimated_bandwidth); 341 342 /////////////////////////////////////////////////////////////////////////// 343 // int32_t GetRedPayload() 344 // Used to get codec specific RED payload (if such is implemented). 345 // Currently only done in iSAC. 346 // 347 // Outputs: 348 // -red_payload : a pointer to the data for RED payload. 349 // -payload_bytes : number of bytes in RED payload. 350 // 351 // Return value: 352 // -1 if fails to get codec specific RED, 353 // 0 if succeeded. 354 // 355 int32_t GetRedPayload(uint8_t* red_payload, int16_t* payload_bytes); 356 357 /////////////////////////////////////////////////////////////////////////// 358 // int16_t ResetEncoder() 359 // By calling this function you would re-initialize the encoder with the 360 // current parameters. All the settings, e.g. VAD/DTX, frame-size... should 361 // remain unchanged. (In case of iSAC we don't want to lose BWE history.) 362 // 363 // Return value 364 // -1 if failed, 365 // 0 if succeeded. 366 // 367 int16_t ResetEncoder(); 368 369 /////////////////////////////////////////////////////////////////////////// 370 // void DestructEncoder() 371 // This function is called to delete the encoder instance, if possible, to 372 // have a fresh start. For codecs where encoder and decoder share the same 373 // instance we cannot delete the encoder and instead we will initialize the 374 // encoder. We also delete VAD and DTX if they have been created. 375 // 376 void DestructEncoder(); 377 378 /////////////////////////////////////////////////////////////////////////// 379 // int16_t SamplesLeftToEncode() 380 // Returns the number of samples required to be able to do encoding. 381 // 382 // Return value: 383 // Number of samples. 384 // 385 int16_t SamplesLeftToEncode(); 386 387 /////////////////////////////////////////////////////////////////////////// 388 // SetUniqueID() 389 // Set a unique ID for the codec to be used for tracing and debugging 390 // 391 // Input 392 // -id : A number to identify the codec. 393 // 394 void SetUniqueID(const uint32_t id); 395 396 /////////////////////////////////////////////////////////////////////////// 397 // UpdateDecoderSampFreq() 398 // For most of the codecs this function does nothing. It must be 399 // implemented for those codecs that one codec instance serves as the 400 // decoder for different flavors of the codec. One example is iSAC. there, 401 // iSAC 16 kHz and iSAC 32 kHz are treated as two different codecs with 402 // different payload types, however, there is only one iSAC instance to 403 // decode. The reason for that is we would like to decode and encode with 404 // the same codec instance for bandwidth estimator to work. 405 // 406 // Each time that we receive a new payload type, we call this function to 407 // prepare the decoder associated with the new payload. Normally, decoders 408 // doesn't have to do anything. For iSAC the decoder has to change it's 409 // sampling rate. The input parameter specifies the current flavor of the 410 // codec in codec database. For instance, if we just got a SWB payload then 411 // the input parameter is ACMCodecDB::isacswb. 412 // 413 // Input: 414 // -codec_id : the ID of the codec associated with the 415 // payload type that we just received. 416 // 417 // Return value: 418 // 0 if succeeded in updating the decoder. 419 // -1 if failed to update. 420 // UpdateDecoderSampFreq(int16_t)421 virtual int16_t UpdateDecoderSampFreq(int16_t /* codec_id */) { return 0; } 422 423 /////////////////////////////////////////////////////////////////////////// 424 // UpdateEncoderSampFreq() 425 // Call this function to update the encoder sampling frequency. This 426 // is for codecs where one payload-name supports several encoder sampling 427 // frequencies. Otherwise, to change the sampling frequency we need to 428 // register new codec. ACM will consider that as registration of a new 429 // codec, not a change in parameter. For iSAC, switching from WB to SWB 430 // is treated as a change in parameter. Therefore, we need this function. 431 // 432 // Input: 433 // -samp_freq_hz : encoder sampling frequency. 434 // 435 // Return value: 436 // -1 if failed, or if this is meaningless for the given codec. 437 // 0 if succeeded. 438 // 439 virtual int16_t UpdateEncoderSampFreq(uint16_t samp_freq_hz); 440 441 /////////////////////////////////////////////////////////////////////////// 442 // EncoderSampFreq() 443 // Get the sampling frequency that the encoder (WebRtc wrapper) expects. 444 // 445 // Output: 446 // -samp_freq_hz : sampling frequency, in Hertz, which the encoder 447 // should be fed with. 448 // 449 // Return value: 450 // -1 if failed to output sampling rate. 451 // 0 if the sample rate is returned successfully. 452 // 453 virtual int16_t EncoderSampFreq(uint16_t* samp_freq_hz); 454 455 /////////////////////////////////////////////////////////////////////////// 456 // int32_t ConfigISACBandwidthEstimator() 457 // Call this function to configure the bandwidth estimator of ISAC. 458 // During the adaptation of bit-rate, iSAC automatically adjusts the 459 // frame-size (either 30 or 60 ms) to save on RTP header. The initial 460 // frame-size can be specified by the first argument. The configuration also 461 // regards the initial estimate of bandwidths. The estimator starts from 462 // this point and converges to the actual bottleneck. This is given by the 463 // second parameter. Furthermore, it is also possible to control the 464 // adaptation of frame-size. This is specified by the last parameter. 465 // 466 // Input: 467 // -init_frame_fize_ms : initial frame-size in milliseconds. For iSAC-wb 468 // 30 ms and 60 ms (default) are acceptable values, 469 // and for iSAC-swb 30 ms is the only acceptable 470 // value. Zero indicates default value. 471 // -init_rate_bps : initial estimate of the bandwidth. Values 472 // between 10000 and 58000 are acceptable. 473 // -enforce_frame_size : if true, the frame-size will not be adapted. 474 // 475 // Return value: 476 // -1 if failed to configure the bandwidth estimator, 477 // 0 if the configuration was successfully applied. 478 // 479 virtual int32_t ConfigISACBandwidthEstimator( 480 const uint8_t init_frame_size_msec, 481 const uint16_t init_rate_bps, 482 const bool enforce_frame_size); 483 484 /////////////////////////////////////////////////////////////////////////// 485 // SetISACMaxPayloadSize() 486 // Set the maximum payload size of iSAC packets. No iSAC payload, 487 // regardless of its frame-size, may exceed the given limit. For 488 // an iSAC payload of size B bits and frame-size T sec we have; 489 // (B < max_payload_len_bytes * 8) and (B/T < max_rate_bit_per_sec), c.f. 490 // SetISACMaxRate(). 491 // 492 // Input: 493 // -max_payload_len_bytes : maximum payload size in bytes. 494 // 495 // Return value: 496 // -1 if failed to set the maximum payload-size. 497 // 0 if the given length is set successfully. 498 // 499 virtual int32_t SetISACMaxPayloadSize(const uint16_t max_payload_len_bytes); 500 501 /////////////////////////////////////////////////////////////////////////// 502 // SetISACMaxRate() 503 // Set the maximum instantaneous rate of iSAC. For a payload of B bits 504 // with a frame-size of T sec the instantaneous rate is B/T bits per 505 // second. Therefore, (B/T < max_rate_bit_per_sec) and 506 // (B < max_payload_len_bytes * 8) are always satisfied for iSAC payloads, 507 // c.f SetISACMaxPayloadSize(). 508 // 509 // Input: 510 // -max_rate_bps : maximum instantaneous bit-rate given in bits/sec. 511 // 512 // Return value: 513 // -1 if failed to set the maximum rate. 514 // 0 if the maximum rate is set successfully. 515 // 516 virtual int32_t SetISACMaxRate(const uint32_t max_rate_bps); 517 FrameSize()518 int32_t FrameSize() { return frame_len_smpl_; } 519 520 /////////////////////////////////////////////////////////////////////////// 521 // REDPayloadISAC() 522 // This is an iSAC-specific function. The function is called to get RED 523 // payload from a default-encoder. 524 // 525 // Inputs: 526 // -isac_rate : the target rate of the main payload. A RED 527 // payload is generated according to the rate of 528 // main payload. Note that we are not specifying the 529 // rate of RED payload, but the main payload. 530 // -isac_bw_estimate : bandwidth information should be inserted in 531 // RED payload. 532 // 533 // Output: 534 // -payload : pointer to a buffer where the RED payload will 535 // written to. 536 // -payload_len_bytes : a place-holder to write the length of the RED 537 // payload in Bytes. 538 // 539 // Return value: 540 // -1 if an error occurs, otherwise the length of the payload (in Bytes) 541 // is returned. 542 // 543 virtual int16_t REDPayloadISAC(const int32_t isac_rate, 544 const int16_t isac_bw_estimate, 545 uint8_t* payload, 546 int16_t* payload_len_bytes); 547 548 /////////////////////////////////////////////////////////////////////////// 549 // HasFrameToEncode() 550 // Returns true if there is enough audio buffered for encoding, such that 551 // calling Encode() will return a payload. 552 // 553 bool HasFrameToEncode() const; 554 555 // 556 // Returns pointer to the AudioDecoder class of this codec. A codec which 557 // should own its own decoder (e.g. iSAC which need same instance for encoding 558 // and decoding, or a codec which should access decoder instance for specific 559 // decoder setting) should implement this method. This method is called if 560 // and only if the ACMCodecDB::codec_settings[codec_id].owns_decoder is true. 561 // Decoder(int)562 virtual AudioDecoder* Decoder(int /* codec_id */) { return NULL; } 563 564 /////////////////////////////////////////////////////////////////////////// 565 // bool HasInternalFEC() 566 // Used to check if the codec has internal FEC. 567 // 568 // Return value: 569 // true if the codec has an internal FEC, e.g. Opus. 570 // false otherwise. 571 // HasInternalFEC()572 bool HasInternalFEC() const { return has_internal_fec_; } 573 574 /////////////////////////////////////////////////////////////////////////// 575 // int SetFEC(); 576 // Sets the codec internal FEC. No effects on codecs that do not provide 577 // internal FEC. 578 // 579 // Input: 580 // -enable_fec : if true FEC will be enabled otherwise the FEC is 581 // disabled. 582 // 583 // Return value: 584 // -1 if failed, or the codec does not support FEC 585 // 0 if succeeded. 586 // SetFEC(bool)587 virtual int SetFEC(bool /* enable_fec */) { return -1; } 588 589 /////////////////////////////////////////////////////////////////////////// 590 // int SetPacketLossRate() 591 // Sets expected packet loss rate for encoding. Some encoders provide packet 592 // loss gnostic encoding to make stream less sensitive to packet losses, 593 // through e.g., FEC. No effects on codecs that do not provide such encoding. 594 // 595 // Input: 596 // -loss_rate : expected packet loss rate (0 -- 100 inclusive). 597 // 598 // Return value: 599 // -1 if failed, 600 // 0 if succeeded or packet loss rate is ignored. 601 // SetPacketLossRate(int)602 virtual int SetPacketLossRate(int /* loss_rate */) { return 0; } 603 604 protected: 605 /////////////////////////////////////////////////////////////////////////// 606 // All the functions with FunctionNameSafe(...) contain the actual 607 // implementation of FunctionName(...). FunctionName() acquires an 608 // appropriate lock and calls FunctionNameSafe() to do the actual work. 609 // Therefore, for the description of functionality, input/output arguments 610 // and return value we refer to FunctionName() 611 // 612 613 /////////////////////////////////////////////////////////////////////////// 614 // See Add10MsSafe() for the description of function, input(s)/output(s) 615 // and return value. 616 // 617 virtual int32_t Add10MsDataSafe(const uint32_t timestamp, 618 const int16_t* data, 619 const uint16_t length, 620 const uint8_t audio_channel) 621 EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); 622 623 /////////////////////////////////////////////////////////////////////////// 624 // See EncoderParam() for the description of function, input(s)/output(s) 625 // and return value. 626 // 627 int16_t EncoderParamsSafe(WebRtcACMCodecParams* enc_params); 628 629 /////////////////////////////////////////////////////////////////////////// 630 // See ResetEncoder() for the description of function, input(s)/output(s) 631 // and return value. 632 // 633 int16_t ResetEncoderSafe() EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); 634 635 /////////////////////////////////////////////////////////////////////////// 636 // See InitEncoder() for the description of function, input(s)/output(s) 637 // and return value. 638 // 639 int16_t InitEncoderSafe(WebRtcACMCodecParams* codec_params, 640 bool force_initialization) 641 EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); 642 643 /////////////////////////////////////////////////////////////////////////// 644 // See InitDecoder() for the description of function, input(s)/output(s) 645 // and return value. 646 // 647 int16_t InitDecoderSafe(WebRtcACMCodecParams* codec_params, 648 bool force_initialization); 649 650 /////////////////////////////////////////////////////////////////////////// 651 // See DestructEncoder() for the description of function, 652 // input(s)/output(s) and return value. 653 // 654 virtual void DestructEncoderSafe() = 0; 655 656 /////////////////////////////////////////////////////////////////////////// 657 // See SetBitRate() for the description of function, input(s)/output(s) 658 // and return value. 659 // 660 // Any codec that can change the bit-rate has to implement this. 661 // 662 virtual int16_t SetBitRateSafe(const int32_t bitrate_bps); 663 664 /////////////////////////////////////////////////////////////////////////// 665 // See GetEstimatedBandwidth() for the description of function, 666 // input(s)/output(s) and return value. 667 // 668 virtual int32_t GetEstimatedBandwidthSafe(); 669 670 /////////////////////////////////////////////////////////////////////////// 671 // See SetEstimatedBandwidth() for the description of function, 672 // input(s)/output(s) and return value. 673 // 674 virtual int32_t SetEstimatedBandwidthSafe(int32_t estimated_bandwidth); 675 676 /////////////////////////////////////////////////////////////////////////// 677 // See GetRedPayload() for the description of function, input(s)/output(s) 678 // and return value. 679 // 680 virtual int32_t GetRedPayloadSafe(uint8_t* red_payload, 681 int16_t* payload_bytes); 682 683 /////////////////////////////////////////////////////////////////////////// 684 // See SetVAD() for the description of function, input(s)/output(s) and 685 // return value. 686 // 687 int16_t SetVADSafe(bool* enable_dtx, bool* enable_vad, ACMVADMode* mode) 688 EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); 689 690 /////////////////////////////////////////////////////////////////////////// 691 // See ReplaceInternalDTX() for the description of function, input and 692 // return value. 693 // 694 virtual int32_t ReplaceInternalDTXSafe(const bool replace_internal_dtx); 695 696 /////////////////////////////////////////////////////////////////////////// 697 // See IsInternalDTXReplaced() for the description of function, input and 698 // return value. 699 // 700 virtual int32_t IsInternalDTXReplacedSafe(bool* internal_dtx_replaced); 701 702 /////////////////////////////////////////////////////////////////////////// 703 // int16_t CreateEncoder() 704 // Creates the encoder instance. 705 // 706 // Return value: 707 // -1 if failed, 708 // 0 if succeeded. 709 // 710 int16_t CreateEncoder(); 711 712 /////////////////////////////////////////////////////////////////////////// 713 // int16_t EnableVAD(); 714 // Enables VAD with the given mode. The VAD instance will be created if 715 // it does not exists. 716 // 717 // Input: 718 // -mode : VAD mode c.f. audio_coding_module_typedefs.h for 719 // the options. 720 // 721 // Return value: 722 // -1 if failed, 723 // 0 if succeeded. 724 // 725 int16_t EnableVAD(ACMVADMode mode) 726 EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); 727 728 /////////////////////////////////////////////////////////////////////////// 729 // int16_t DisableVAD() 730 // Disables VAD. 731 // 732 // Return value: 733 // -1 if failed, 734 // 0 if succeeded. 735 // 736 int16_t DisableVAD() EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); 737 738 /////////////////////////////////////////////////////////////////////////// 739 // int16_t EnableDTX() 740 // Enables DTX. This method should be overwritten for codecs which have 741 // internal DTX. 742 // 743 // Return value: 744 // -1 if failed, 745 // 0 if succeeded. 746 // 747 virtual int16_t EnableDTX() EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); 748 749 /////////////////////////////////////////////////////////////////////////// 750 // int16_t DisableDTX() 751 // Disables usage of DTX. This method should be overwritten for codecs which 752 // have internal DTX. 753 // 754 // Return value: 755 // -1 if failed, 756 // 0 if succeeded. 757 // 758 virtual int16_t DisableDTX() EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); 759 760 /////////////////////////////////////////////////////////////////////////// 761 // int16_t InternalEncode() 762 // This is a codec-specific function called in EncodeSafe() to actually 763 // encode a frame of audio. 764 // 765 // Outputs: 766 // -bitstream : pointer to a buffer where the bit-stream is 767 // written to. 768 // -bitstream_len_byte : the length of the bit-stream in bytes, 769 // a negative value indicates error. 770 // 771 // Return value: 772 // -1 if failed, 773 // otherwise the length of the bit-stream is returned. 774 // 775 virtual int16_t InternalEncode(uint8_t* bitstream, 776 int16_t* bitstream_len_byte) = 0; 777 778 /////////////////////////////////////////////////////////////////////////// 779 // int16_t InternalInitEncoder() 780 // This is a codec-specific function called in InitEncoderSafe(), it has to 781 // do all codec-specific operation to initialize the encoder given the 782 // encoder parameters. 783 // 784 // Input: 785 // -codec_params : pointer to a structure that contains parameters to 786 // initialize encoder. 787 // Set codec_params->codec_inst.rate to -1 for 788 // iSAC to operate in adaptive mode. 789 // (to do: if frame-length is -1 frame-length will be 790 // automatically adjusted, otherwise, given 791 // frame-length is forced) 792 // 793 // Return value: 794 // -1 if failed, 795 // 0 if succeeded. 796 // 797 virtual int16_t InternalInitEncoder(WebRtcACMCodecParams* codec_params) = 0; 798 799 /////////////////////////////////////////////////////////////////////////// 800 // void IncreaseNoMissedSamples() 801 // This method is called to increase the number of samples that are 802 // overwritten in the audio buffer. 803 // 804 // Input: 805 // -num_samples : the number of overwritten samples is incremented 806 // by this value. 807 // 808 void IncreaseNoMissedSamples(const int16_t num_samples); 809 810 /////////////////////////////////////////////////////////////////////////// 811 // int16_t InternalCreateEncoder() 812 // This is a codec-specific method called in CreateEncoderSafe() it is 813 // supposed to perform all codec-specific operations to create encoder 814 // instance. 815 // 816 // Return value: 817 // -1 if failed, 818 // 0 if succeeded. 819 // 820 virtual int16_t InternalCreateEncoder() = 0; 821 822 /////////////////////////////////////////////////////////////////////////// 823 // void InternalDestructEncoderInst() 824 // This is a codec-specific method, used in conferencing, called from 825 // DestructEncoderInst(). The input argument is pointer to encoder instance 826 // (codec instance for codecs that encoder and decoder share the same 827 // instance). This method is called to free the memory that |ptr_inst| is 828 // pointing to. 829 // 830 // Input: 831 // -ptr_inst : pointer to encoder instance. 832 // 833 // Return value: 834 // -1 if failed, 835 // 0 if succeeded. 836 // 837 virtual void InternalDestructEncoderInst(void* ptr_inst) = 0; 838 839 /////////////////////////////////////////////////////////////////////////// 840 // int16_t InternalResetEncoder() 841 // This method is called to reset the states of encoder. However, the 842 // current parameters, e.g. frame-length, should remain as they are. For 843 // most of the codecs a re-initialization of the encoder is what needs to 844 // be down. But for iSAC we like to keep the BWE history so we cannot 845 // re-initialize. As soon as such an API is implemented in iSAC this method 846 // has to be overwritten in ACMISAC class. 847 // 848 // Return value: 849 // -1 if failed, 850 // 0 if succeeded. 851 // 852 virtual int16_t InternalResetEncoder(); 853 854 /////////////////////////////////////////////////////////////////////////// 855 // int16_t ProcessFrameVADDTX() 856 // This function is called when a full frame of audio is available. It will 857 // break the audio frame into blocks such that each block could be processed 858 // by VAD & CN/DTX. If a frame is divided into two blocks then there are two 859 // cases. First, the first block is active, the second block will not be 860 // processed by CN/DTX but only by VAD and return to caller with 861 // '*samples_processed' set to zero. There, the audio frame will be encoded 862 // by the encoder. Second, the first block is inactive and is processed by 863 // CN/DTX, then we stop processing the next block and return to the caller 864 // which is EncodeSafe(), with "*samples_processed" equal to the number of 865 // samples in first block. 866 // 867 // Output: 868 // -bitstream : pointer to a buffer where DTX frame, if 869 // generated, will be written to. 870 // -bitstream_len_byte : contains the length of bit-stream in bytes, if 871 // generated. Zero if no bit-stream is generated. 872 // -samples_processed : contains no of samples that actually CN has 873 // processed. Those samples processed by CN will not 874 // be encoded by the encoder, obviously. If 875 // contains zero, it means that the frame has been 876 // identified as active by VAD. Note that 877 // "*samples_processed" might be non-zero but 878 // "*bitstream_len_byte" be zero. 879 // 880 // Return value: 881 // -1 if failed, 882 // 0 if succeeded. 883 // 884 int16_t ProcessFrameVADDTX(uint8_t* bitstream, 885 int16_t* bitstream_len_byte, 886 int16_t* samples_processed) 887 EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); 888 889 /////////////////////////////////////////////////////////////////////////// 890 // CurrentRate() 891 // Call to get the current encoding rate of the encoder. This function 892 // should be overwritten for codecs which automatically change their 893 // target rate. One example is iSAC. The output of the function is the 894 // current target rate. 895 // 896 // Output: 897 // -rate_bps : the current target rate of the codec. 898 // CurrentRate(int32_t *)899 virtual void CurrentRate(int32_t* /* rate_bps */) {} 900 901 // &in_audio_[in_audio_ix_write_] always point to where new audio can be 902 // written to 903 int16_t in_audio_ix_write_; 904 905 // &in_audio_[in_audio_ix_read_] points to where audio has to be read from 906 int16_t in_audio_ix_read_; 907 908 int16_t in_timestamp_ix_write_; 909 910 // Where the audio is stored before encoding, 911 // To save memory the following buffer can be allocated 912 // dynamically for 80 ms depending on the sampling frequency 913 // of the codec. 914 int16_t* in_audio_; 915 uint32_t* in_timestamp_; 916 917 int16_t frame_len_smpl_; 918 uint16_t num_channels_; 919 920 // This will point to a static database of the supported codecs 921 int16_t codec_id_; 922 923 // This will account for the number of samples were not encoded 924 // the case is rare, either samples are missed due to overwrite 925 // at input buffer or due to encoding error 926 uint32_t num_missed_samples_; 927 928 // True if the encoder instance created 929 bool encoder_exist_; 930 931 // True if the encoder instance initialized 932 bool encoder_initialized_; 933 934 const bool registered_in_neteq_; // TODO(henrik.lundin) Remove? 935 936 // VAD/DTX 937 bool has_internal_dtx_; 938 WebRtcVadInst* ptr_vad_inst_ GUARDED_BY(codec_wrapper_lock_); 939 bool vad_enabled_ GUARDED_BY(codec_wrapper_lock_); 940 ACMVADMode vad_mode_ GUARDED_BY(codec_wrapper_lock_); 941 int16_t vad_label_[MAX_FRAME_SIZE_10MSEC] GUARDED_BY(codec_wrapper_lock_); 942 bool dtx_enabled_ GUARDED_BY(codec_wrapper_lock_); 943 WebRtcCngEncInst* ptr_dtx_inst_ GUARDED_BY(codec_wrapper_lock_); 944 uint8_t num_lpc_params_ // TODO(henrik.lundin) Delete and 945 GUARDED_BY(codec_wrapper_lock_); // replace with kNewCNGNumLPCParams. 946 bool sent_cn_previous_ GUARDED_BY(codec_wrapper_lock_); 947 int16_t prev_frame_cng_ GUARDED_BY(codec_wrapper_lock_); 948 949 // FEC. 950 bool has_internal_fec_; 951 952 WebRtcACMCodecParams encoder_params_; 953 954 // Used as a global lock for all available decoders 955 // so that no decoder is used when NetEQ decodes. 956 RWLockWrapper* neteq_decode_lock_; 957 958 // Used to lock wrapper internal data 959 // such as buffers and state variables. 960 RWLockWrapper& codec_wrapper_lock_; 961 962 uint32_t last_timestamp_ GUARDED_BY(codec_wrapper_lock_); 963 uint32_t unique_id_; 964 }; 965 966 } // namespace acm2 967 968 } // namespace webrtc 969 970 #endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_GENERIC_CODEC_H_ 971