1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/rtp_rtcp/source/rtp_sender_audio.h"
12
13 #include <string.h>
14
15 #include "webrtc/base/trace_event.h"
16 #include "webrtc/modules/rtp_rtcp/include/rtp_rtcp_defines.h"
17 #include "webrtc/modules/rtp_rtcp/source/byte_io.h"
18 #include "webrtc/system_wrappers/include/tick_util.h"
19
20 namespace webrtc {
21
22 static const int kDtmfFrequencyHz = 8000;
23
RTPSenderAudio(Clock * clock,RTPSender * rtpSender,RtpAudioFeedback * audio_feedback)24 RTPSenderAudio::RTPSenderAudio(Clock* clock,
25 RTPSender* rtpSender,
26 RtpAudioFeedback* audio_feedback)
27 : _clock(clock),
28 _rtpSender(rtpSender),
29 _audioFeedback(audio_feedback),
30 _sendAudioCritsect(CriticalSectionWrapper::CreateCriticalSection()),
31 _packetSizeSamples(160),
32 _dtmfEventIsOn(false),
33 _dtmfEventFirstPacketSent(false),
34 _dtmfPayloadType(-1),
35 _dtmfTimestamp(0),
36 _dtmfKey(0),
37 _dtmfLengthSamples(0),
38 _dtmfLevel(0),
39 _dtmfTimeLastSent(0),
40 _dtmfTimestampLastSent(0),
41 _REDPayloadType(-1),
42 _inbandVADactive(false),
43 _cngNBPayloadType(-1),
44 _cngWBPayloadType(-1),
45 _cngSWBPayloadType(-1),
46 _cngFBPayloadType(-1),
47 _lastPayloadType(-1),
48 _audioLevel_dBov(0) {}
49
~RTPSenderAudio()50 RTPSenderAudio::~RTPSenderAudio() {}
51
AudioFrequency() const52 int RTPSenderAudio::AudioFrequency() const {
53 return kDtmfFrequencyHz;
54 }
55
56 // set audio packet size, used to determine when it's time to send a DTMF packet
57 // in silence (CNG)
SetAudioPacketSize(uint16_t packetSizeSamples)58 int32_t RTPSenderAudio::SetAudioPacketSize(uint16_t packetSizeSamples) {
59 CriticalSectionScoped cs(_sendAudioCritsect.get());
60
61 _packetSizeSamples = packetSizeSamples;
62 return 0;
63 }
64
RegisterAudioPayload(const char payloadName[RTP_PAYLOAD_NAME_SIZE],const int8_t payloadType,const uint32_t frequency,const size_t channels,const uint32_t rate,RtpUtility::Payload ** payload)65 int32_t RTPSenderAudio::RegisterAudioPayload(
66 const char payloadName[RTP_PAYLOAD_NAME_SIZE],
67 const int8_t payloadType,
68 const uint32_t frequency,
69 const size_t channels,
70 const uint32_t rate,
71 RtpUtility::Payload** payload) {
72 if (RtpUtility::StringCompare(payloadName, "cn", 2)) {
73 CriticalSectionScoped cs(_sendAudioCritsect.get());
74 // we can have multiple CNG payload types
75 switch (frequency) {
76 case 8000:
77 _cngNBPayloadType = payloadType;
78 break;
79 case 16000:
80 _cngWBPayloadType = payloadType;
81 break;
82 case 32000:
83 _cngSWBPayloadType = payloadType;
84 break;
85 case 48000:
86 _cngFBPayloadType = payloadType;
87 break;
88 default:
89 return -1;
90 }
91 } else if (RtpUtility::StringCompare(payloadName, "telephone-event", 15)) {
92 CriticalSectionScoped cs(_sendAudioCritsect.get());
93 // Don't add it to the list
94 // we dont want to allow send with a DTMF payloadtype
95 _dtmfPayloadType = payloadType;
96 return 0;
97 // The default timestamp rate is 8000 Hz, but other rates may be defined.
98 }
99 *payload = new RtpUtility::Payload;
100 (*payload)->typeSpecific.Audio.frequency = frequency;
101 (*payload)->typeSpecific.Audio.channels = channels;
102 (*payload)->typeSpecific.Audio.rate = rate;
103 (*payload)->audio = true;
104 (*payload)->name[RTP_PAYLOAD_NAME_SIZE - 1] = '\0';
105 strncpy((*payload)->name, payloadName, RTP_PAYLOAD_NAME_SIZE - 1);
106 return 0;
107 }
108
MarkerBit(FrameType frameType,int8_t payload_type)109 bool RTPSenderAudio::MarkerBit(FrameType frameType, int8_t payload_type) {
110 CriticalSectionScoped cs(_sendAudioCritsect.get());
111 // for audio true for first packet in a speech burst
112 bool markerBit = false;
113 if (_lastPayloadType != payload_type) {
114 if (payload_type != -1 && (_cngNBPayloadType == payload_type ||
115 _cngWBPayloadType == payload_type ||
116 _cngSWBPayloadType == payload_type ||
117 _cngFBPayloadType == payload_type)) {
118 // Only set a marker bit when we change payload type to a non CNG
119 return false;
120 }
121
122 // payload_type differ
123 if (_lastPayloadType == -1) {
124 if (frameType != kAudioFrameCN) {
125 // first packet and NOT CNG
126 return true;
127 } else {
128 // first packet and CNG
129 _inbandVADactive = true;
130 return false;
131 }
132 }
133
134 // not first packet AND
135 // not CNG AND
136 // payload_type changed
137
138 // set a marker bit when we change payload type
139 markerBit = true;
140 }
141
142 // For G.723 G.729, AMR etc we can have inband VAD
143 if (frameType == kAudioFrameCN) {
144 _inbandVADactive = true;
145 } else if (_inbandVADactive) {
146 _inbandVADactive = false;
147 markerBit = true;
148 }
149 return markerBit;
150 }
151
SendAudio(FrameType frameType,int8_t payloadType,uint32_t captureTimeStamp,const uint8_t * payloadData,size_t dataSize,const RTPFragmentationHeader * fragmentation)152 int32_t RTPSenderAudio::SendAudio(FrameType frameType,
153 int8_t payloadType,
154 uint32_t captureTimeStamp,
155 const uint8_t* payloadData,
156 size_t dataSize,
157 const RTPFragmentationHeader* fragmentation) {
158 // TODO(pwestin) Breakup function in smaller functions.
159 size_t payloadSize = dataSize;
160 size_t maxPayloadLength = _rtpSender->MaxPayloadLength();
161 bool dtmfToneStarted = false;
162 uint16_t dtmfLengthMS = 0;
163 uint8_t key = 0;
164 int red_payload_type;
165 uint8_t audio_level_dbov;
166 int8_t dtmf_payload_type;
167 uint16_t packet_size_samples;
168 {
169 CriticalSectionScoped cs(_sendAudioCritsect.get());
170 red_payload_type = _REDPayloadType;
171 audio_level_dbov = _audioLevel_dBov;
172 dtmf_payload_type = _dtmfPayloadType;
173 packet_size_samples = _packetSizeSamples;
174 }
175
176 // Check if we have pending DTMFs to send
177 if (!_dtmfEventIsOn && PendingDTMF()) {
178 int64_t delaySinceLastDTMF =
179 _clock->TimeInMilliseconds() - _dtmfTimeLastSent;
180
181 if (delaySinceLastDTMF > 100) {
182 // New tone to play
183 _dtmfTimestamp = captureTimeStamp;
184 if (NextDTMF(&key, &dtmfLengthMS, &_dtmfLevel) >= 0) {
185 _dtmfEventFirstPacketSent = false;
186 _dtmfKey = key;
187 _dtmfLengthSamples = (kDtmfFrequencyHz / 1000) * dtmfLengthMS;
188 dtmfToneStarted = true;
189 _dtmfEventIsOn = true;
190 }
191 }
192 }
193 if (dtmfToneStarted) {
194 if (_audioFeedback)
195 _audioFeedback->OnPlayTelephoneEvent(key, dtmfLengthMS, _dtmfLevel);
196 }
197
198 // A source MAY send events and coded audio packets for the same time
199 // but we don't support it
200 if (_dtmfEventIsOn) {
201 if (frameType == kEmptyFrame) {
202 // kEmptyFrame is used to drive the DTMF when in CN mode
203 // it can be triggered more frequently than we want to send the
204 // DTMF packets.
205 if (packet_size_samples > (captureTimeStamp - _dtmfTimestampLastSent)) {
206 // not time to send yet
207 return 0;
208 }
209 }
210 _dtmfTimestampLastSent = captureTimeStamp;
211 uint32_t dtmfDurationSamples = captureTimeStamp - _dtmfTimestamp;
212 bool ended = false;
213 bool send = true;
214
215 if (_dtmfLengthSamples > dtmfDurationSamples) {
216 if (dtmfDurationSamples <= 0) {
217 // Skip send packet at start, since we shouldn't use duration 0
218 send = false;
219 }
220 } else {
221 ended = true;
222 _dtmfEventIsOn = false;
223 _dtmfTimeLastSent = _clock->TimeInMilliseconds();
224 }
225 if (send) {
226 if (dtmfDurationSamples > 0xffff) {
227 // RFC 4733 2.5.2.3 Long-Duration Events
228 SendTelephoneEventPacket(ended, dtmf_payload_type, _dtmfTimestamp,
229 static_cast<uint16_t>(0xffff), false);
230
231 // set new timestap for this segment
232 _dtmfTimestamp = captureTimeStamp;
233 dtmfDurationSamples -= 0xffff;
234 _dtmfLengthSamples -= 0xffff;
235
236 return SendTelephoneEventPacket(
237 ended, dtmf_payload_type, _dtmfTimestamp,
238 static_cast<uint16_t>(dtmfDurationSamples), false);
239 } else {
240 if (SendTelephoneEventPacket(ended, dtmf_payload_type, _dtmfTimestamp,
241 static_cast<uint16_t>(dtmfDurationSamples),
242 !_dtmfEventFirstPacketSent) != 0) {
243 return -1;
244 }
245 _dtmfEventFirstPacketSent = true;
246 return 0;
247 }
248 }
249 return 0;
250 }
251 if (payloadSize == 0 || payloadData == NULL) {
252 if (frameType == kEmptyFrame) {
253 // we don't send empty audio RTP packets
254 // no error since we use it to drive DTMF when we use VAD
255 return 0;
256 }
257 return -1;
258 }
259 uint8_t dataBuffer[IP_PACKET_SIZE];
260 bool markerBit = MarkerBit(frameType, payloadType);
261
262 int32_t rtpHeaderLength = 0;
263 uint16_t timestampOffset = 0;
264
265 if (red_payload_type >= 0 && fragmentation && !markerBit &&
266 fragmentation->fragmentationVectorSize > 1) {
267 // have we configured RED? use its payload type
268 // we need to get the current timestamp to calc the diff
269 uint32_t oldTimeStamp = _rtpSender->Timestamp();
270 rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, red_payload_type,
271 markerBit, captureTimeStamp,
272 _clock->TimeInMilliseconds());
273
274 timestampOffset = uint16_t(_rtpSender->Timestamp() - oldTimeStamp);
275 } else {
276 rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, payloadType,
277 markerBit, captureTimeStamp,
278 _clock->TimeInMilliseconds());
279 }
280 if (rtpHeaderLength <= 0) {
281 return -1;
282 }
283 if (maxPayloadLength < (rtpHeaderLength + payloadSize)) {
284 // Too large payload buffer.
285 return -1;
286 }
287 if (red_payload_type >= 0 && // Have we configured RED?
288 fragmentation && fragmentation->fragmentationVectorSize > 1 &&
289 !markerBit) {
290 if (timestampOffset <= 0x3fff) {
291 if (fragmentation->fragmentationVectorSize != 2) {
292 // we only support 2 codecs when using RED
293 return -1;
294 }
295 // only 0x80 if we have multiple blocks
296 dataBuffer[rtpHeaderLength++] =
297 0x80 + fragmentation->fragmentationPlType[1];
298 size_t blockLength = fragmentation->fragmentationLength[1];
299
300 // sanity blockLength
301 if (blockLength > 0x3ff) { // block length 10 bits 1023 bytes
302 return -1;
303 }
304 uint32_t REDheader = (timestampOffset << 10) + blockLength;
305 ByteWriter<uint32_t>::WriteBigEndian(dataBuffer + rtpHeaderLength,
306 REDheader);
307 rtpHeaderLength += 3;
308
309 dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
310 // copy the RED data
311 memcpy(dataBuffer + rtpHeaderLength,
312 payloadData + fragmentation->fragmentationOffset[1],
313 fragmentation->fragmentationLength[1]);
314
315 // copy the normal data
316 memcpy(
317 dataBuffer + rtpHeaderLength + fragmentation->fragmentationLength[1],
318 payloadData + fragmentation->fragmentationOffset[0],
319 fragmentation->fragmentationLength[0]);
320
321 payloadSize = fragmentation->fragmentationLength[0] +
322 fragmentation->fragmentationLength[1];
323 } else {
324 // silence for too long send only new data
325 dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
326 memcpy(dataBuffer + rtpHeaderLength,
327 payloadData + fragmentation->fragmentationOffset[0],
328 fragmentation->fragmentationLength[0]);
329
330 payloadSize = fragmentation->fragmentationLength[0];
331 }
332 } else {
333 if (fragmentation && fragmentation->fragmentationVectorSize > 0) {
334 // use the fragment info if we have one
335 dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
336 memcpy(dataBuffer + rtpHeaderLength,
337 payloadData + fragmentation->fragmentationOffset[0],
338 fragmentation->fragmentationLength[0]);
339
340 payloadSize = fragmentation->fragmentationLength[0];
341 } else {
342 memcpy(dataBuffer + rtpHeaderLength, payloadData, payloadSize);
343 }
344 }
345 {
346 CriticalSectionScoped cs(_sendAudioCritsect.get());
347 _lastPayloadType = payloadType;
348 }
349 // Update audio level extension, if included.
350 size_t packetSize = payloadSize + rtpHeaderLength;
351 RtpUtility::RtpHeaderParser rtp_parser(dataBuffer, packetSize);
352 RTPHeader rtp_header;
353 rtp_parser.Parse(&rtp_header);
354 _rtpSender->UpdateAudioLevel(dataBuffer, packetSize, rtp_header,
355 (frameType == kAudioFrameSpeech),
356 audio_level_dbov);
357 TRACE_EVENT_ASYNC_END2("webrtc", "Audio", captureTimeStamp, "timestamp",
358 _rtpSender->Timestamp(), "seqnum",
359 _rtpSender->SequenceNumber());
360 return _rtpSender->SendToNetwork(dataBuffer, payloadSize, rtpHeaderLength,
361 TickTime::MillisecondTimestamp(),
362 kAllowRetransmission,
363 RtpPacketSender::kHighPriority);
364 }
365
366 // Audio level magnitude and voice activity flag are set for each RTP packet
SetAudioLevel(uint8_t level_dBov)367 int32_t RTPSenderAudio::SetAudioLevel(uint8_t level_dBov) {
368 if (level_dBov > 127) {
369 return -1;
370 }
371 CriticalSectionScoped cs(_sendAudioCritsect.get());
372 _audioLevel_dBov = level_dBov;
373 return 0;
374 }
375
376 // Set payload type for Redundant Audio Data RFC 2198
SetRED(int8_t payloadType)377 int32_t RTPSenderAudio::SetRED(int8_t payloadType) {
378 if (payloadType < -1) {
379 return -1;
380 }
381 CriticalSectionScoped cs(_sendAudioCritsect.get());
382 _REDPayloadType = payloadType;
383 return 0;
384 }
385
386 // Get payload type for Redundant Audio Data RFC 2198
RED(int8_t * payloadType) const387 int32_t RTPSenderAudio::RED(int8_t* payloadType) const {
388 CriticalSectionScoped cs(_sendAudioCritsect.get());
389 if (_REDPayloadType == -1) {
390 // not configured
391 return -1;
392 }
393 *payloadType = _REDPayloadType;
394 return 0;
395 }
396
397 // Send a TelephoneEvent tone using RFC 2833 (4733)
SendTelephoneEvent(uint8_t key,uint16_t time_ms,uint8_t level)398 int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key,
399 uint16_t time_ms,
400 uint8_t level) {
401 {
402 CriticalSectionScoped lock(_sendAudioCritsect.get());
403 if (_dtmfPayloadType < 0) {
404 // TelephoneEvent payloadtype not configured
405 return -1;
406 }
407 }
408 return AddDTMF(key, time_ms, level);
409 }
410
SendTelephoneEventPacket(bool ended,int8_t dtmf_payload_type,uint32_t dtmfTimeStamp,uint16_t duration,bool markerBit)411 int32_t RTPSenderAudio::SendTelephoneEventPacket(bool ended,
412 int8_t dtmf_payload_type,
413 uint32_t dtmfTimeStamp,
414 uint16_t duration,
415 bool markerBit) {
416 uint8_t dtmfbuffer[IP_PACKET_SIZE];
417 uint8_t sendCount = 1;
418 int32_t retVal = 0;
419
420 if (ended) {
421 // resend last packet in an event 3 times
422 sendCount = 3;
423 }
424 do {
425 // Send DTMF data
426 _rtpSender->BuildRTPheader(dtmfbuffer, dtmf_payload_type, markerBit,
427 dtmfTimeStamp, _clock->TimeInMilliseconds());
428
429 // reset CSRC and X bit
430 dtmfbuffer[0] &= 0xe0;
431
432 // Create DTMF data
433 /* From RFC 2833:
434
435 0 1 2 3
436 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
437 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
438 | event |E|R| volume | duration |
439 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
440 */
441 // R bit always cleared
442 uint8_t R = 0x00;
443 uint8_t volume = _dtmfLevel;
444
445 // First packet un-ended
446 uint8_t E = ended ? 0x80 : 0x00;
447
448 // First byte is Event number, equals key number
449 dtmfbuffer[12] = _dtmfKey;
450 dtmfbuffer[13] = E | R | volume;
451 ByteWriter<uint16_t>::WriteBigEndian(dtmfbuffer + 14, duration);
452
453 TRACE_EVENT_INSTANT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"),
454 "Audio::SendTelephoneEvent", "timestamp",
455 dtmfTimeStamp, "seqnum", _rtpSender->SequenceNumber());
456 retVal = _rtpSender->SendToNetwork(
457 dtmfbuffer, 4, 12, TickTime::MillisecondTimestamp(),
458 kAllowRetransmission, RtpPacketSender::kHighPriority);
459 sendCount--;
460 } while (sendCount > 0 && retVal == 0);
461
462 return retVal;
463 }
464 } // namespace webrtc
465