1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef API_AUDIO_CODECS_AUDIO_DECODER_H_ 12 #define API_AUDIO_CODECS_AUDIO_DECODER_H_ 13 14 #include <stddef.h> 15 #include <stdint.h> 16 17 #include <memory> 18 #include <vector> 19 20 #include "absl/types/optional.h" 21 #include "api/array_view.h" 22 #include "rtc_base/buffer.h" 23 #include "rtc_base/constructor_magic.h" 24 25 namespace webrtc { 26 27 class AudioDecoder { 28 public: 29 enum SpeechType { 30 kSpeech = 1, 31 kComfortNoise = 2, 32 }; 33 34 // Used by PacketDuration below. Save the value -1 for errors. 35 enum { kNotImplemented = -2 }; 36 37 AudioDecoder() = default; 38 virtual ~AudioDecoder() = default; 39 40 class EncodedAudioFrame { 41 public: 42 struct DecodeResult { 43 size_t num_decoded_samples; 44 SpeechType speech_type; 45 }; 46 47 virtual ~EncodedAudioFrame() = default; 48 49 // Returns the duration in samples-per-channel of this audio frame. 50 // If no duration can be ascertained, returns zero. 51 virtual size_t Duration() const = 0; 52 53 // Returns true if this packet contains DTX. 54 virtual bool IsDtxPacket() const; 55 56 // Decodes this frame of audio and writes the result in |decoded|. 57 // |decoded| must be large enough to store as many samples as indicated by a 58 // call to Duration() . On success, returns an absl::optional containing the 59 // total number of samples across all channels, as well as whether the 60 // decoder produced comfort noise or speech. On failure, returns an empty 61 // absl::optional. Decode may be called at most once per frame object. 62 virtual absl::optional<DecodeResult> Decode( 63 rtc::ArrayView<int16_t> decoded) const = 0; 64 }; 65 66 struct ParseResult { 67 ParseResult(); 68 ParseResult(uint32_t timestamp, 69 int priority, 70 std::unique_ptr<EncodedAudioFrame> frame); 71 ParseResult(ParseResult&& b); 72 ~ParseResult(); 73 74 ParseResult& operator=(ParseResult&& b); 75 76 // The timestamp of the frame is in samples per channel. 77 uint32_t timestamp; 78 // The relative priority of the frame compared to other frames of the same 79 // payload and the same timeframe. A higher value means a lower priority. 80 // The highest priority is zero - negative values are not allowed. 81 int priority; 82 std::unique_ptr<EncodedAudioFrame> frame; 83 }; 84 85 // Let the decoder parse this payload and prepare zero or more decodable 86 // frames. Each frame must be between 10 ms and 120 ms long. The caller must 87 // ensure that the AudioDecoder object outlives any frame objects returned by 88 // this call. The decoder is free to swap or move the data from the |payload| 89 // buffer. |timestamp| is the input timestamp, in samples, corresponding to 90 // the start of the payload. 91 virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload, 92 uint32_t timestamp); 93 94 // TODO(bugs.webrtc.org/10098): The Decode and DecodeRedundant methods are 95 // obsolete; callers should call ParsePayload instead. For now, subclasses 96 // must still implement DecodeInternal. 97 98 // Decodes |encode_len| bytes from |encoded| and writes the result in 99 // |decoded|. The maximum bytes allowed to be written into |decoded| is 100 // |max_decoded_bytes|. Returns the total number of samples across all 101 // channels. If the decoder produced comfort noise, |speech_type| 102 // is set to kComfortNoise, otherwise it is kSpeech. The desired output 103 // sample rate is provided in |sample_rate_hz|, which must be valid for the 104 // codec at hand. 105 int Decode(const uint8_t* encoded, 106 size_t encoded_len, 107 int sample_rate_hz, 108 size_t max_decoded_bytes, 109 int16_t* decoded, 110 SpeechType* speech_type); 111 112 // Same as Decode(), but interfaces to the decoders redundant decode function. 113 // The default implementation simply calls the regular Decode() method. 114 int DecodeRedundant(const uint8_t* encoded, 115 size_t encoded_len, 116 int sample_rate_hz, 117 size_t max_decoded_bytes, 118 int16_t* decoded, 119 SpeechType* speech_type); 120 121 // Indicates if the decoder implements the DecodePlc method. 122 virtual bool HasDecodePlc() const; 123 124 // Calls the packet-loss concealment of the decoder to update the state after 125 // one or several lost packets. The caller has to make sure that the 126 // memory allocated in |decoded| should accommodate |num_frames| frames. 127 virtual size_t DecodePlc(size_t num_frames, int16_t* decoded); 128 129 // Asks the decoder to generate packet-loss concealment and append it to the 130 // end of |concealment_audio|. The concealment audio should be in 131 // channel-interleaved format, with as many channels as the last decoded 132 // packet produced. The implementation must produce at least 133 // requested_samples_per_channel, or nothing at all. This is a signal to the 134 // caller to conceal the loss with other means. If the implementation provides 135 // concealment samples, it is also responsible for "stitching" it together 136 // with the decoded audio on either side of the concealment. 137 // Note: The default implementation of GeneratePlc will be deleted soon. All 138 // implementations must provide their own, which can be a simple as a no-op. 139 // TODO(bugs.webrtc.org/9676): Remove default impementation. 140 virtual void GeneratePlc(size_t requested_samples_per_channel, 141 rtc::BufferT<int16_t>* concealment_audio); 142 143 // Resets the decoder state (empty buffers etc.). 144 virtual void Reset() = 0; 145 146 // Returns the last error code from the decoder. 147 virtual int ErrorCode(); 148 149 // Returns the duration in samples-per-channel of the payload in |encoded| 150 // which is |encoded_len| bytes long. Returns kNotImplemented if no duration 151 // estimate is available, or -1 in case of an error. 152 virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; 153 154 // Returns the duration in samples-per-channel of the redandant payload in 155 // |encoded| which is |encoded_len| bytes long. Returns kNotImplemented if no 156 // duration estimate is available, or -1 in case of an error. 157 virtual int PacketDurationRedundant(const uint8_t* encoded, 158 size_t encoded_len) const; 159 160 // Detects whether a packet has forward error correction. The packet is 161 // comprised of the samples in |encoded| which is |encoded_len| bytes long. 162 // Returns true if the packet has FEC and false otherwise. 163 virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const; 164 165 // Returns the actual sample rate of the decoder's output. This value may not 166 // change during the lifetime of the decoder. 167 virtual int SampleRateHz() const = 0; 168 169 // The number of channels in the decoder's output. This value may not change 170 // during the lifetime of the decoder. 171 virtual size_t Channels() const = 0; 172 173 protected: 174 static SpeechType ConvertSpeechType(int16_t type); 175 176 virtual int DecodeInternal(const uint8_t* encoded, 177 size_t encoded_len, 178 int sample_rate_hz, 179 int16_t* decoded, 180 SpeechType* speech_type) = 0; 181 182 virtual int DecodeRedundantInternal(const uint8_t* encoded, 183 size_t encoded_len, 184 int sample_rate_hz, 185 int16_t* decoded, 186 SpeechType* speech_type); 187 188 private: 189 RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder); 190 }; 191 192 } // namespace webrtc 193 #endif // API_AUDIO_CODECS_AUDIO_DECODER_H_ 194