1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/rtp_rtcp/source/rtp_sender_audio.h"
12
13 #include <string.h>
14
15 #include <memory>
16 #include <utility>
17
18 #include "absl/strings/match.h"
19 #include "absl/types/optional.h"
20 #include "api/audio_codecs/audio_format.h"
21 #include "api/rtp_headers.h"
22 #include "modules/audio_coding/include/audio_coding_module_typedefs.h"
23 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
24 #include "modules/rtp_rtcp/source/absolute_capture_time_sender.h"
25 #include "modules/rtp_rtcp/source/byte_io.h"
26 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
27 #include "modules/rtp_rtcp/source/rtp_packet.h"
28 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
29 #include "modules/rtp_rtcp/source/time_util.h"
30 #include "rtc_base/checks.h"
31 #include "rtc_base/logging.h"
32 #include "rtc_base/trace_event.h"
33 #include "system_wrappers/include/ntp_time.h"
34
35 namespace webrtc {
36
37 namespace {
38
39 #if RTC_TRACE_EVENTS_ENABLED
FrameTypeToString(AudioFrameType frame_type)40 const char* FrameTypeToString(AudioFrameType frame_type) {
41 switch (frame_type) {
42 case AudioFrameType::kEmptyFrame:
43 return "empty";
44 case AudioFrameType::kAudioFrameSpeech:
45 return "audio_speech";
46 case AudioFrameType::kAudioFrameCN:
47 return "audio_cn";
48 }
49 }
50 #endif
51
52 } // namespace
53
RTPSenderAudio(Clock * clock,RTPSender * rtp_sender)54 RTPSenderAudio::RTPSenderAudio(Clock* clock, RTPSender* rtp_sender)
55 : clock_(clock),
56 rtp_sender_(rtp_sender),
57 absolute_capture_time_sender_(clock) {
58 RTC_DCHECK(clock_);
59 }
60
~RTPSenderAudio()61 RTPSenderAudio::~RTPSenderAudio() {}
62
RegisterAudioPayload(absl::string_view payload_name,const int8_t payload_type,const uint32_t frequency,const size_t channels,const uint32_t rate)63 int32_t RTPSenderAudio::RegisterAudioPayload(absl::string_view payload_name,
64 const int8_t payload_type,
65 const uint32_t frequency,
66 const size_t channels,
67 const uint32_t rate) {
68 if (absl::EqualsIgnoreCase(payload_name, "cn")) {
69 MutexLock lock(&send_audio_mutex_);
70 // we can have multiple CNG payload types
71 switch (frequency) {
72 case 8000:
73 cngnb_payload_type_ = payload_type;
74 break;
75 case 16000:
76 cngwb_payload_type_ = payload_type;
77 break;
78 case 32000:
79 cngswb_payload_type_ = payload_type;
80 break;
81 case 48000:
82 cngfb_payload_type_ = payload_type;
83 break;
84 default:
85 return -1;
86 }
87 } else if (absl::EqualsIgnoreCase(payload_name, "telephone-event")) {
88 MutexLock lock(&send_audio_mutex_);
89 // Don't add it to the list
90 // we dont want to allow send with a DTMF payloadtype
91 dtmf_payload_type_ = payload_type;
92 dtmf_payload_freq_ = frequency;
93 return 0;
94 } else if (payload_name == "audio") {
95 MutexLock lock(&send_audio_mutex_);
96 encoder_rtp_timestamp_frequency_ = frequency;
97 return 0;
98 }
99 return 0;
100 }
101
MarkerBit(AudioFrameType frame_type,int8_t payload_type)102 bool RTPSenderAudio::MarkerBit(AudioFrameType frame_type, int8_t payload_type) {
103 MutexLock lock(&send_audio_mutex_);
104 // for audio true for first packet in a speech burst
105 bool marker_bit = false;
106 if (last_payload_type_ != payload_type) {
107 if (payload_type != -1 && (cngnb_payload_type_ == payload_type ||
108 cngwb_payload_type_ == payload_type ||
109 cngswb_payload_type_ == payload_type ||
110 cngfb_payload_type_ == payload_type)) {
111 // Only set a marker bit when we change payload type to a non CNG
112 return false;
113 }
114
115 // payload_type differ
116 if (last_payload_type_ == -1) {
117 if (frame_type != AudioFrameType::kAudioFrameCN) {
118 // first packet and NOT CNG
119 return true;
120 } else {
121 // first packet and CNG
122 inband_vad_active_ = true;
123 return false;
124 }
125 }
126
127 // not first packet AND
128 // not CNG AND
129 // payload_type changed
130
131 // set a marker bit when we change payload type
132 marker_bit = true;
133 }
134
135 // For G.723 G.729, AMR etc we can have inband VAD
136 if (frame_type == AudioFrameType::kAudioFrameCN) {
137 inband_vad_active_ = true;
138 } else if (inband_vad_active_) {
139 inband_vad_active_ = false;
140 marker_bit = true;
141 }
142 return marker_bit;
143 }
144
SendAudio(AudioFrameType frame_type,int8_t payload_type,uint32_t rtp_timestamp,const uint8_t * payload_data,size_t payload_size)145 bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
146 int8_t payload_type,
147 uint32_t rtp_timestamp,
148 const uint8_t* payload_data,
149 size_t payload_size) {
150 return SendAudio(frame_type, payload_type, rtp_timestamp, payload_data,
151 payload_size,
152 // TODO(bugs.webrtc.org/10739) replace once plumbed.
153 /*absolute_capture_timestamp_ms=*/0);
154 }
155
SendAudio(AudioFrameType frame_type,int8_t payload_type,uint32_t rtp_timestamp,const uint8_t * payload_data,size_t payload_size,int64_t absolute_capture_timestamp_ms)156 bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
157 int8_t payload_type,
158 uint32_t rtp_timestamp,
159 const uint8_t* payload_data,
160 size_t payload_size,
161 int64_t absolute_capture_timestamp_ms) {
162 #if RTC_TRACE_EVENTS_ENABLED
163 TRACE_EVENT_ASYNC_STEP1("webrtc", "Audio", rtp_timestamp, "Send", "type",
164 FrameTypeToString(frame_type));
165 #endif
166
167 // From RFC 4733:
168 // A source has wide latitude as to how often it sends event updates. A
169 // natural interval is the spacing between non-event audio packets. [...]
170 // Alternatively, a source MAY decide to use a different spacing for event
171 // updates, with a value of 50 ms RECOMMENDED.
172 constexpr int kDtmfIntervalTimeMs = 50;
173 uint8_t audio_level_dbov = 0;
174 uint32_t dtmf_payload_freq = 0;
175 absl::optional<uint32_t> encoder_rtp_timestamp_frequency;
176 {
177 MutexLock lock(&send_audio_mutex_);
178 audio_level_dbov = audio_level_dbov_;
179 dtmf_payload_freq = dtmf_payload_freq_;
180 encoder_rtp_timestamp_frequency = encoder_rtp_timestamp_frequency_;
181 }
182
183 // Check if we have pending DTMFs to send
184 if (!dtmf_event_is_on_ && dtmf_queue_.PendingDtmf()) {
185 if ((clock_->TimeInMilliseconds() - dtmf_time_last_sent_) >
186 kDtmfIntervalTimeMs) {
187 // New tone to play
188 dtmf_timestamp_ = rtp_timestamp;
189 if (dtmf_queue_.NextDtmf(&dtmf_current_event_)) {
190 dtmf_event_first_packet_sent_ = false;
191 dtmf_length_samples_ =
192 dtmf_current_event_.duration_ms * (dtmf_payload_freq / 1000);
193 dtmf_event_is_on_ = true;
194 }
195 }
196 }
197
198 // A source MAY send events and coded audio packets for the same time
199 // but we don't support it
200 if (dtmf_event_is_on_) {
201 if (frame_type == AudioFrameType::kEmptyFrame) {
202 // kEmptyFrame is used to drive the DTMF when in CN mode
203 // it can be triggered more frequently than we want to send the
204 // DTMF packets.
205 const unsigned int dtmf_interval_time_rtp =
206 dtmf_payload_freq * kDtmfIntervalTimeMs / 1000;
207 if ((rtp_timestamp - dtmf_timestamp_last_sent_) <
208 dtmf_interval_time_rtp) {
209 // not time to send yet
210 return true;
211 }
212 }
213 dtmf_timestamp_last_sent_ = rtp_timestamp;
214 uint32_t dtmf_duration_samples = rtp_timestamp - dtmf_timestamp_;
215 bool ended = false;
216 bool send = true;
217
218 if (dtmf_length_samples_ > dtmf_duration_samples) {
219 if (dtmf_duration_samples <= 0) {
220 // Skip send packet at start, since we shouldn't use duration 0
221 send = false;
222 }
223 } else {
224 ended = true;
225 dtmf_event_is_on_ = false;
226 dtmf_time_last_sent_ = clock_->TimeInMilliseconds();
227 }
228 if (send) {
229 if (dtmf_duration_samples > 0xffff) {
230 // RFC 4733 2.5.2.3 Long-Duration Events
231 SendTelephoneEventPacket(ended, dtmf_timestamp_,
232 static_cast<uint16_t>(0xffff), false);
233
234 // set new timestap for this segment
235 dtmf_timestamp_ = rtp_timestamp;
236 dtmf_duration_samples -= 0xffff;
237 dtmf_length_samples_ -= 0xffff;
238
239 return SendTelephoneEventPacket(
240 ended, dtmf_timestamp_,
241 static_cast<uint16_t>(dtmf_duration_samples), false);
242 } else {
243 if (!SendTelephoneEventPacket(ended, dtmf_timestamp_,
244 dtmf_duration_samples,
245 !dtmf_event_first_packet_sent_)) {
246 return false;
247 }
248 dtmf_event_first_packet_sent_ = true;
249 return true;
250 }
251 }
252 return true;
253 }
254 if (payload_size == 0 || payload_data == NULL) {
255 if (frame_type == AudioFrameType::kEmptyFrame) {
256 // we don't send empty audio RTP packets
257 // no error since we use it to either drive DTMF when we use VAD, or
258 // enter DTX.
259 return true;
260 }
261 return false;
262 }
263
264 std::unique_ptr<RtpPacketToSend> packet = rtp_sender_->AllocatePacket();
265 packet->SetMarker(MarkerBit(frame_type, payload_type));
266 packet->SetPayloadType(payload_type);
267 packet->SetTimestamp(rtp_timestamp);
268 packet->set_capture_time_ms(clock_->TimeInMilliseconds());
269 // Update audio level extension, if included.
270 packet->SetExtension<AudioLevel>(
271 frame_type == AudioFrameType::kAudioFrameSpeech, audio_level_dbov);
272
273 // Send absolute capture time periodically in order to optimize and save
274 // network traffic. Missing absolute capture times can be interpolated on the
275 // receiving end if sending intervals are small enough.
276 auto absolute_capture_time = absolute_capture_time_sender_.OnSendPacket(
277 AbsoluteCaptureTimeSender::GetSource(packet->Ssrc(), packet->Csrcs()),
278 packet->Timestamp(),
279 // Replace missing value with 0 (invalid frequency), this will trigger
280 // absolute capture time sending.
281 encoder_rtp_timestamp_frequency.value_or(0),
282 Int64MsToUQ32x32(absolute_capture_timestamp_ms + NtpOffsetMs()),
283 /*estimated_capture_clock_offset=*/absl::nullopt);
284 if (absolute_capture_time) {
285 // It also checks that extension was registered during SDP negotiation. If
286 // not then setter won't do anything.
287 packet->SetExtension<AbsoluteCaptureTimeExtension>(*absolute_capture_time);
288 }
289
290 uint8_t* payload = packet->AllocatePayload(payload_size);
291 if (!payload) // Too large payload buffer.
292 return false;
293 memcpy(payload, payload_data, payload_size);
294
295 if (!rtp_sender_->AssignSequenceNumber(packet.get()))
296 return false;
297
298 {
299 MutexLock lock(&send_audio_mutex_);
300 last_payload_type_ = payload_type;
301 }
302 TRACE_EVENT_ASYNC_END2("webrtc", "Audio", rtp_timestamp, "timestamp",
303 packet->Timestamp(), "seqnum",
304 packet->SequenceNumber());
305 packet->set_packet_type(RtpPacketMediaType::kAudio);
306 packet->set_allow_retransmission(true);
307 bool send_result = rtp_sender_->SendToNetwork(std::move(packet));
308 if (first_packet_sent_()) {
309 RTC_LOG(LS_INFO) << "First audio RTP packet sent to pacer";
310 }
311 return send_result;
312 }
313
314 // Audio level magnitude and voice activity flag are set for each RTP packet
SetAudioLevel(uint8_t level_dbov)315 int32_t RTPSenderAudio::SetAudioLevel(uint8_t level_dbov) {
316 if (level_dbov > 127) {
317 return -1;
318 }
319 MutexLock lock(&send_audio_mutex_);
320 audio_level_dbov_ = level_dbov;
321 return 0;
322 }
323
324 // Send a TelephoneEvent tone using RFC 2833 (4733)
SendTelephoneEvent(uint8_t key,uint16_t time_ms,uint8_t level)325 int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key,
326 uint16_t time_ms,
327 uint8_t level) {
328 DtmfQueue::Event event;
329 {
330 MutexLock lock(&send_audio_mutex_);
331 if (dtmf_payload_type_ < 0) {
332 // TelephoneEvent payloadtype not configured
333 return -1;
334 }
335 event.payload_type = dtmf_payload_type_;
336 }
337 event.key = key;
338 event.duration_ms = time_ms;
339 event.level = level;
340 return dtmf_queue_.AddDtmf(event) ? 0 : -1;
341 }
342
SendTelephoneEventPacket(bool ended,uint32_t dtmf_timestamp,uint16_t duration,bool marker_bit)343 bool RTPSenderAudio::SendTelephoneEventPacket(bool ended,
344 uint32_t dtmf_timestamp,
345 uint16_t duration,
346 bool marker_bit) {
347 uint8_t send_count = 1;
348 bool result = true;
349
350 if (ended) {
351 // resend last packet in an event 3 times
352 send_count = 3;
353 }
354 do {
355 // Send DTMF data.
356 constexpr RtpPacketToSend::ExtensionManager* kNoExtensions = nullptr;
357 constexpr size_t kDtmfSize = 4;
358 std::unique_ptr<RtpPacketToSend> packet(
359 new RtpPacketToSend(kNoExtensions, kRtpHeaderSize + kDtmfSize));
360 packet->SetPayloadType(dtmf_current_event_.payload_type);
361 packet->SetMarker(marker_bit);
362 packet->SetSsrc(rtp_sender_->SSRC());
363 packet->SetTimestamp(dtmf_timestamp);
364 packet->set_capture_time_ms(clock_->TimeInMilliseconds());
365 if (!rtp_sender_->AssignSequenceNumber(packet.get()))
366 return false;
367
368 // Create DTMF data.
369 uint8_t* dtmfbuffer = packet->AllocatePayload(kDtmfSize);
370 RTC_DCHECK(dtmfbuffer);
371 /* From RFC 2833:
372 0 1 2 3
373 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
374 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
375 | event |E|R| volume | duration |
376 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
377 */
378 // R bit always cleared
379 uint8_t R = 0x00;
380 uint8_t volume = dtmf_current_event_.level;
381
382 // First packet un-ended
383 uint8_t E = ended ? 0x80 : 0x00;
384
385 // First byte is Event number, equals key number
386 dtmfbuffer[0] = dtmf_current_event_.key;
387 dtmfbuffer[1] = E | R | volume;
388 ByteWriter<uint16_t>::WriteBigEndian(dtmfbuffer + 2, duration);
389
390 packet->set_packet_type(RtpPacketMediaType::kAudio);
391 packet->set_allow_retransmission(true);
392 result = rtp_sender_->SendToNetwork(std::move(packet));
393 send_count--;
394 } while (send_count > 0 && result);
395
396 return result;
397 }
398 } // namespace webrtc
399