1 /* 2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef API_AUDIO_AUDIO_FRAME_H_ 12 #define API_AUDIO_AUDIO_FRAME_H_ 13 14 #include <stddef.h> 15 #include <stdint.h> 16 17 #include <utility> 18 19 #include "api/audio/channel_layout.h" 20 #include "api/rtp_packet_infos.h" 21 #include "rtc_base/constructor_magic.h" 22 23 namespace webrtc { 24 25 /* This class holds up to 120 ms of super-wideband (32 kHz) stereo audio. It 26 * allows for adding and subtracting frames while keeping track of the resulting 27 * states. 28 * 29 * Notes 30 * - This is a de-facto api, not designed for external use. The AudioFrame class 31 * is in need of overhaul or even replacement, and anyone depending on it 32 * should be prepared for that. 33 * - The total number of samples is samples_per_channel_ * num_channels_. 34 * - Stereo data is interleaved starting with the left channel. 35 */ 36 class AudioFrame { 37 public: 38 // Using constexpr here causes linker errors unless the variable also has an 39 // out-of-class definition, which is impractical in this header-only class. 40 // (This makes no sense because it compiles as an enum value, which we most 41 // certainly cannot take the address of, just fine.) C++17 introduces inline 42 // variables which should allow us to switch to constexpr and keep this a 43 // header-only class. 44 enum : size_t { 45 // Stereo, 32 kHz, 120 ms (2 * 32 * 120) 46 // Stereo, 192 kHz, 20 ms (2 * 192 * 20) 47 kMaxDataSizeSamples = 7680, 48 kMaxDataSizeBytes = kMaxDataSizeSamples * sizeof(int16_t), 49 }; 50 51 enum VADActivity { kVadActive = 0, kVadPassive = 1, kVadUnknown = 2 }; 52 enum SpeechType { 53 kNormalSpeech = 0, 54 kPLC = 1, 55 kCNG = 2, 56 kPLCCNG = 3, 57 kCodecPLC = 5, 58 kUndefined = 4 59 }; 60 61 AudioFrame(); 62 63 friend void swap(AudioFrame& a, AudioFrame& b); 64 65 // Resets all members to their default state. 66 void Reset(); 67 // Same as Reset(), but leaves mute state unchanged. Muting a frame requires 68 // the buffer to be zeroed on the next call to mutable_data(). Callers 69 // intending to write to the buffer immediately after Reset() can instead use 70 // ResetWithoutMuting() to skip this wasteful zeroing. 71 void ResetWithoutMuting(); 72 73 void UpdateFrame(uint32_t timestamp, 74 const int16_t* data, 75 size_t samples_per_channel, 76 int sample_rate_hz, 77 SpeechType speech_type, 78 VADActivity vad_activity, 79 size_t num_channels = 1); 80 81 void CopyFrom(const AudioFrame& src); 82 83 // Sets a wall-time clock timestamp in milliseconds to be used for profiling 84 // of time between two points in the audio chain. 85 // Example: 86 // t0: UpdateProfileTimeStamp() 87 // t1: ElapsedProfileTimeMs() => t1 - t0 [msec] 88 void UpdateProfileTimeStamp(); 89 // Returns the time difference between now and when UpdateProfileTimeStamp() 90 // was last called. Returns -1 if UpdateProfileTimeStamp() has not yet been 91 // called. 92 int64_t ElapsedProfileTimeMs() const; 93 94 // data() returns a zeroed static buffer if the frame is muted. 95 // mutable_frame() always returns a non-static buffer; the first call to 96 // mutable_frame() zeros the non-static buffer and marks the frame unmuted. 97 const int16_t* data() const; 98 int16_t* mutable_data(); 99 100 // Prefer to mute frames using AudioFrameOperations::Mute. 101 void Mute(); 102 // Frame is muted by default. 103 bool muted() const; 104 max_16bit_samples()105 size_t max_16bit_samples() const { return kMaxDataSizeSamples; } samples_per_channel()106 size_t samples_per_channel() const { return samples_per_channel_; } num_channels()107 size_t num_channels() const { return num_channels_; } channel_layout()108 ChannelLayout channel_layout() const { return channel_layout_; } sample_rate_hz()109 int sample_rate_hz() const { return sample_rate_hz_; } 110 set_absolute_capture_timestamp_ms(int64_t absolute_capture_time_stamp_ms)111 void set_absolute_capture_timestamp_ms( 112 int64_t absolute_capture_time_stamp_ms) { 113 absolute_capture_timestamp_ms_ = absolute_capture_time_stamp_ms; 114 } 115 absolute_capture_timestamp_ms()116 absl::optional<int64_t> absolute_capture_timestamp_ms() const { 117 return absolute_capture_timestamp_ms_; 118 } 119 120 // RTP timestamp of the first sample in the AudioFrame. 121 uint32_t timestamp_ = 0; 122 // Time since the first frame in milliseconds. 123 // -1 represents an uninitialized value. 124 int64_t elapsed_time_ms_ = -1; 125 // NTP time of the estimated capture time in local timebase in milliseconds. 126 // -1 represents an uninitialized value. 127 int64_t ntp_time_ms_ = -1; 128 size_t samples_per_channel_ = 0; 129 int sample_rate_hz_ = 0; 130 size_t num_channels_ = 0; 131 ChannelLayout channel_layout_ = CHANNEL_LAYOUT_NONE; 132 SpeechType speech_type_ = kUndefined; 133 VADActivity vad_activity_ = kVadUnknown; 134 // Monotonically increasing timestamp intended for profiling of audio frames. 135 // Typically used for measuring elapsed time between two different points in 136 // the audio path. No lock is used to save resources and we are thread safe 137 // by design. 138 // TODO(nisse@webrtc.org): consider using absl::optional. 139 int64_t profile_timestamp_ms_ = 0; 140 141 // Information about packets used to assemble this audio frame. This is needed 142 // by |SourceTracker| when the frame is delivered to the RTCRtpReceiver's 143 // MediaStreamTrack, in order to implement getContributingSources(). See: 144 // https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources 145 // 146 // TODO(bugs.webrtc.org/10757): 147 // Note that this information might not be fully accurate since we currently 148 // don't have a proper way to track it across the audio sync buffer. The 149 // sync buffer is the small sample-holding buffer located after the audio 150 // decoder and before where samples are assembled into output frames. 151 // 152 // |RtpPacketInfos| may also be empty if the audio samples did not come from 153 // RTP packets. E.g. if the audio were locally generated by packet loss 154 // concealment, comfort noise generation, etc. 155 RtpPacketInfos packet_infos_; 156 157 private: 158 // A permanently zeroed out buffer to represent muted frames. This is a 159 // header-only class, so the only way to avoid creating a separate empty 160 // buffer per translation unit is to wrap a static in an inline function. 161 static const int16_t* empty_data(); 162 163 int16_t data_[kMaxDataSizeSamples]; 164 bool muted_ = true; 165 166 // Absolute capture timestamp when this audio frame was originally captured. 167 // This is only valid for audio frames captured on this machine. The absolute 168 // capture timestamp of a received frame is found in |packet_infos_|. 169 // This timestamp MUST be based on the same clock as rtc::TimeMillis(). 170 absl::optional<int64_t> absolute_capture_timestamp_ms_; 171 172 RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame); 173 }; 174 175 } // namespace webrtc 176 177 #endif // API_AUDIO_AUDIO_FRAME_H_ 178