1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.h"
12
13 #include <assert.h> // assert
14 #include <math.h> // pow()
15 #include <string.h> // memcpy()
16
17 #include "webrtc/base/logging.h"
18 #include "webrtc/base/trace_event.h"
19 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"
20
21 namespace webrtc {
CreateAudioStrategy(RtpData * data_callback,RtpAudioFeedback * incoming_messages_callback)22 RTPReceiverStrategy* RTPReceiverStrategy::CreateAudioStrategy(
23 RtpData* data_callback,
24 RtpAudioFeedback* incoming_messages_callback) {
25 return new RTPReceiverAudio(data_callback, incoming_messages_callback);
26 }
27
RTPReceiverAudio(RtpData * data_callback,RtpAudioFeedback * incoming_messages_callback)28 RTPReceiverAudio::RTPReceiverAudio(RtpData* data_callback,
29 RtpAudioFeedback* incoming_messages_callback)
30 : RTPReceiverStrategy(data_callback),
31 TelephoneEventHandler(),
32 last_received_frequency_(8000),
33 telephone_event_forward_to_decoder_(false),
34 telephone_event_payload_type_(-1),
35 cng_nb_payload_type_(-1),
36 cng_wb_payload_type_(-1),
37 cng_swb_payload_type_(-1),
38 cng_fb_payload_type_(-1),
39 cng_payload_type_(-1),
40 g722_payload_type_(-1),
41 last_received_g722_(false),
42 num_energy_(0),
43 current_remote_energy_(),
44 cb_audio_feedback_(incoming_messages_callback) {
45 last_payload_.Audio.channels = 1;
46 memset(current_remote_energy_, 0, sizeof(current_remote_energy_));
47 }
48
49 // Outband TelephoneEvent(DTMF) detection
SetTelephoneEventForwardToDecoder(bool forward_to_decoder)50 void RTPReceiverAudio::SetTelephoneEventForwardToDecoder(
51 bool forward_to_decoder) {
52 CriticalSectionScoped lock(crit_sect_.get());
53 telephone_event_forward_to_decoder_ = forward_to_decoder;
54 }
55
56 // Is forwarding of outband telephone events turned on/off?
TelephoneEventForwardToDecoder() const57 bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const {
58 CriticalSectionScoped lock(crit_sect_.get());
59 return telephone_event_forward_to_decoder_;
60 }
61
TelephoneEventPayloadType(int8_t payload_type) const62 bool RTPReceiverAudio::TelephoneEventPayloadType(
63 int8_t payload_type) const {
64 CriticalSectionScoped lock(crit_sect_.get());
65 return telephone_event_payload_type_ == payload_type;
66 }
67
CNGPayloadType(int8_t payload_type,uint32_t * frequency,bool * cng_payload_type_has_changed)68 bool RTPReceiverAudio::CNGPayloadType(int8_t payload_type,
69 uint32_t* frequency,
70 bool* cng_payload_type_has_changed) {
71 CriticalSectionScoped lock(crit_sect_.get());
72 *cng_payload_type_has_changed = false;
73
74 // We can have four CNG on 8000Hz, 16000Hz, 32000Hz and 48000Hz.
75 if (cng_nb_payload_type_ == payload_type) {
76 *frequency = 8000;
77 if (cng_payload_type_ != -1 && cng_payload_type_ != cng_nb_payload_type_)
78 *cng_payload_type_has_changed = true;
79
80 cng_payload_type_ = cng_nb_payload_type_;
81 return true;
82 } else if (cng_wb_payload_type_ == payload_type) {
83 // if last received codec is G.722 we must use frequency 8000
84 if (last_received_g722_) {
85 *frequency = 8000;
86 } else {
87 *frequency = 16000;
88 }
89 if (cng_payload_type_ != -1 && cng_payload_type_ != cng_wb_payload_type_)
90 *cng_payload_type_has_changed = true;
91 cng_payload_type_ = cng_wb_payload_type_;
92 return true;
93 } else if (cng_swb_payload_type_ == payload_type) {
94 *frequency = 32000;
95 if ((cng_payload_type_ != -1) &&
96 (cng_payload_type_ != cng_swb_payload_type_))
97 *cng_payload_type_has_changed = true;
98 cng_payload_type_ = cng_swb_payload_type_;
99 return true;
100 } else if (cng_fb_payload_type_ == payload_type) {
101 *frequency = 48000;
102 if (cng_payload_type_ != -1 && cng_payload_type_ != cng_fb_payload_type_)
103 *cng_payload_type_has_changed = true;
104 cng_payload_type_ = cng_fb_payload_type_;
105 return true;
106 } else {
107 // not CNG
108 if (g722_payload_type_ == payload_type) {
109 last_received_g722_ = true;
110 } else {
111 last_received_g722_ = false;
112 }
113 }
114 return false;
115 }
116
ShouldReportCsrcChanges(uint8_t payload_type) const117 bool RTPReceiverAudio::ShouldReportCsrcChanges(uint8_t payload_type) const {
118 // Don't do this for DTMF packets, otherwise it's fine.
119 return !TelephoneEventPayloadType(payload_type);
120 }
121
122 // - Sample based or frame based codecs based on RFC 3551
123 // -
124 // - NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples.
125 // - The correct rate is 4 bits/sample.
126 // -
127 // - name of sampling default
128 // - encoding sample/frame bits/sample rate ms/frame ms/packet
129 // -
130 // - Sample based audio codecs
131 // - DVI4 sample 4 var. 20
132 // - G722 sample 4 16,000 20
133 // - G726-40 sample 5 8,000 20
134 // - G726-32 sample 4 8,000 20
135 // - G726-24 sample 3 8,000 20
136 // - G726-16 sample 2 8,000 20
137 // - L8 sample 8 var. 20
138 // - L16 sample 16 var. 20
139 // - PCMA sample 8 var. 20
140 // - PCMU sample 8 var. 20
141 // -
142 // - Frame based audio codecs
143 // - G723 frame N/A 8,000 30 30
144 // - G728 frame N/A 8,000 2.5 20
145 // - G729 frame N/A 8,000 10 20
146 // - G729D frame N/A 8,000 10 20
147 // - G729E frame N/A 8,000 10 20
148 // - GSM frame N/A 8,000 20 20
149 // - GSM-EFR frame N/A 8,000 20 20
150 // - LPC frame N/A 8,000 20 20
151 // - MPA frame N/A var. var.
152 // -
153 // - G7221 frame N/A
OnNewPayloadTypeCreated(const char payload_name[RTP_PAYLOAD_NAME_SIZE],int8_t payload_type,uint32_t frequency)154 int32_t RTPReceiverAudio::OnNewPayloadTypeCreated(
155 const char payload_name[RTP_PAYLOAD_NAME_SIZE],
156 int8_t payload_type,
157 uint32_t frequency) {
158 CriticalSectionScoped lock(crit_sect_.get());
159
160 if (RtpUtility::StringCompare(payload_name, "telephone-event", 15)) {
161 telephone_event_payload_type_ = payload_type;
162 }
163 if (RtpUtility::StringCompare(payload_name, "cn", 2)) {
164 // we can have three CNG on 8000Hz, 16000Hz and 32000Hz
165 if (frequency == 8000) {
166 cng_nb_payload_type_ = payload_type;
167 } else if (frequency == 16000) {
168 cng_wb_payload_type_ = payload_type;
169 } else if (frequency == 32000) {
170 cng_swb_payload_type_ = payload_type;
171 } else if (frequency == 48000) {
172 cng_fb_payload_type_ = payload_type;
173 } else {
174 assert(false);
175 return -1;
176 }
177 }
178 return 0;
179 }
180
ParseRtpPacket(WebRtcRTPHeader * rtp_header,const PayloadUnion & specific_payload,bool is_red,const uint8_t * payload,size_t payload_length,int64_t timestamp_ms,bool is_first_packet)181 int32_t RTPReceiverAudio::ParseRtpPacket(WebRtcRTPHeader* rtp_header,
182 const PayloadUnion& specific_payload,
183 bool is_red,
184 const uint8_t* payload,
185 size_t payload_length,
186 int64_t timestamp_ms,
187 bool is_first_packet) {
188 TRACE_EVENT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"), "Audio::ParseRtp",
189 "seqnum", rtp_header->header.sequenceNumber, "timestamp",
190 rtp_header->header.timestamp);
191 rtp_header->type.Audio.numEnergy = rtp_header->header.numCSRCs;
192 num_energy_ = rtp_header->type.Audio.numEnergy;
193 if (rtp_header->type.Audio.numEnergy > 0 &&
194 rtp_header->type.Audio.numEnergy <= kRtpCsrcSize) {
195 memcpy(current_remote_energy_,
196 rtp_header->type.Audio.arrOfEnergy,
197 rtp_header->type.Audio.numEnergy);
198 }
199
200 return ParseAudioCodecSpecific(rtp_header,
201 payload,
202 payload_length,
203 specific_payload.Audio,
204 is_red);
205 }
206
GetPayloadTypeFrequency() const207 int RTPReceiverAudio::GetPayloadTypeFrequency() const {
208 CriticalSectionScoped lock(crit_sect_.get());
209 if (last_received_g722_) {
210 return 8000;
211 }
212 return last_received_frequency_;
213 }
214
ProcessDeadOrAlive(uint16_t last_payload_length) const215 RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive(
216 uint16_t last_payload_length) const {
217
218 // Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check
219 // kRtpNoRtp against NetEq speech_type kOutputPLCtoCNG.
220 if (last_payload_length < 10) { // our CNG is 9 bytes
221 return kRtpNoRtp;
222 } else {
223 return kRtpDead;
224 }
225 }
226
CheckPayloadChanged(int8_t payload_type,PayloadUnion * specific_payload,bool * should_discard_changes)227 void RTPReceiverAudio::CheckPayloadChanged(int8_t payload_type,
228 PayloadUnion* specific_payload,
229 bool* should_discard_changes) {
230 *should_discard_changes = false;
231
232 if (TelephoneEventPayloadType(payload_type)) {
233 // Don't do callbacks for DTMF packets.
234 *should_discard_changes = true;
235 return;
236 }
237 // frequency is updated for CNG
238 bool cng_payload_type_has_changed = false;
239 bool is_cng_payload_type = CNGPayloadType(payload_type,
240 &specific_payload->Audio.frequency,
241 &cng_payload_type_has_changed);
242
243 if (is_cng_payload_type) {
244 // Don't do callbacks for DTMF packets.
245 *should_discard_changes = true;
246 return;
247 }
248 }
249
Energy(uint8_t array_of_energy[kRtpCsrcSize]) const250 int RTPReceiverAudio::Energy(uint8_t array_of_energy[kRtpCsrcSize]) const {
251 CriticalSectionScoped cs(crit_sect_.get());
252
253 assert(num_energy_ <= kRtpCsrcSize);
254
255 if (num_energy_ > 0) {
256 memcpy(array_of_energy, current_remote_energy_,
257 sizeof(uint8_t) * num_energy_);
258 }
259 return num_energy_;
260 }
261
InvokeOnInitializeDecoder(RtpFeedback * callback,int8_t payload_type,const char payload_name[RTP_PAYLOAD_NAME_SIZE],const PayloadUnion & specific_payload) const262 int32_t RTPReceiverAudio::InvokeOnInitializeDecoder(
263 RtpFeedback* callback,
264 int8_t payload_type,
265 const char payload_name[RTP_PAYLOAD_NAME_SIZE],
266 const PayloadUnion& specific_payload) const {
267 if (-1 ==
268 callback->OnInitializeDecoder(
269 payload_type, payload_name, specific_payload.Audio.frequency,
270 specific_payload.Audio.channels, specific_payload.Audio.rate)) {
271 LOG(LS_ERROR) << "Failed to create decoder for payload type: "
272 << payload_name << "/" << static_cast<int>(payload_type);
273 return -1;
274 }
275 return 0;
276 }
277
278 // We are not allowed to have any critsects when calling data_callback.
ParseAudioCodecSpecific(WebRtcRTPHeader * rtp_header,const uint8_t * payload_data,size_t payload_length,const AudioPayload & audio_specific,bool is_red)279 int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
280 WebRtcRTPHeader* rtp_header,
281 const uint8_t* payload_data,
282 size_t payload_length,
283 const AudioPayload& audio_specific,
284 bool is_red) {
285
286 if (payload_length == 0) {
287 return 0;
288 }
289
290 bool telephone_event_packet =
291 TelephoneEventPayloadType(rtp_header->header.payloadType);
292 if (telephone_event_packet) {
293 CriticalSectionScoped lock(crit_sect_.get());
294
295 // RFC 4733 2.3
296 // 0 1 2 3
297 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
298 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
299 // | event |E|R| volume | duration |
300 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
301 //
302 if (payload_length % 4 != 0) {
303 return -1;
304 }
305 size_t number_of_events = payload_length / 4;
306
307 // sanity
308 if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
309 number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
310 }
311 for (size_t n = 0; n < number_of_events; ++n) {
312 bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;
313
314 std::set<uint8_t>::iterator event =
315 telephone_event_reported_.find(payload_data[4 * n]);
316
317 if (event != telephone_event_reported_.end()) {
318 // we have already seen this event
319 if (end) {
320 telephone_event_reported_.erase(payload_data[4 * n]);
321 }
322 } else {
323 if (end) {
324 // don't add if it's a end of a tone
325 } else {
326 telephone_event_reported_.insert(payload_data[4 * n]);
327 }
328 }
329 }
330
331 // RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events
332 // should not be a problem since we don't care about the duration
333
334 // RFC 4733 See 2.5.1.5. & 2.5.2.4. Multiple Events in a Packet
335 }
336
337 {
338 CriticalSectionScoped lock(crit_sect_.get());
339
340 if (!telephone_event_packet) {
341 last_received_frequency_ = audio_specific.frequency;
342 }
343
344 // Check if this is a CNG packet, receiver might want to know
345 uint32_t ignored;
346 bool also_ignored;
347 if (CNGPayloadType(rtp_header->header.payloadType,
348 &ignored,
349 &also_ignored)) {
350 rtp_header->type.Audio.isCNG = true;
351 rtp_header->frameType = kAudioFrameCN;
352 } else {
353 rtp_header->frameType = kAudioFrameSpeech;
354 rtp_header->type.Audio.isCNG = false;
355 }
356
357 // check if it's a DTMF event, hence something we can playout
358 if (telephone_event_packet) {
359 if (!telephone_event_forward_to_decoder_) {
360 // don't forward event to decoder
361 return 0;
362 }
363 std::set<uint8_t>::iterator first =
364 telephone_event_reported_.begin();
365 if (first != telephone_event_reported_.end() && *first > 15) {
366 // don't forward non DTMF events
367 return 0;
368 }
369 }
370 }
371 // TODO(holmer): Break this out to have RED parsing handled generically.
372 if (is_red && !(payload_data[0] & 0x80)) {
373 // we recive only one frame packed in a RED packet remove the RED wrapper
374 rtp_header->header.payloadType = payload_data[0];
375
376 // only one frame in the RED strip the one byte to help NetEq
377 return data_callback_->OnReceivedPayloadData(
378 payload_data + 1, payload_length - 1, rtp_header);
379 }
380
381 rtp_header->type.Audio.channel = audio_specific.channels;
382 return data_callback_->OnReceivedPayloadData(
383 payload_data, payload_length, rtp_header);
384 }
385 } // namespace webrtc
386