1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_
12 #define WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_
13
14 #include <assert.h>
15 #include <string.h> // memcpy
16
17 #include <algorithm>
18 #include <limits>
19
20 #include "webrtc/base/constructormagic.h"
21 #include "webrtc/common_types.h"
22 #include "webrtc/common_video/rotation.h"
23 #include "webrtc/typedefs.h"
24
25 namespace webrtc {
26
27 struct RTPAudioHeader {
28 uint8_t numEnergy; // number of valid entries in arrOfEnergy
29 uint8_t arrOfEnergy[kRtpCsrcSize]; // one energy byte (0-9) per channel
30 bool isCNG; // is this CNG
31 size_t channel; // number of channels 2 = stereo
32 };
33
34 const int16_t kNoPictureId = -1;
35 const int16_t kMaxOneBytePictureId = 0x7F; // 7 bits
36 const int16_t kMaxTwoBytePictureId = 0x7FFF; // 15 bits
37 const int16_t kNoTl0PicIdx = -1;
38 const uint8_t kNoTemporalIdx = 0xFF;
39 const uint8_t kNoSpatialIdx = 0xFF;
40 const uint8_t kNoGofIdx = 0xFF;
41 const uint8_t kNumVp9Buffers = 8;
42 const size_t kMaxVp9RefPics = 3;
43 const size_t kMaxVp9FramesInGof = 0xFF; // 8 bits
44 const size_t kMaxVp9NumberOfSpatialLayers = 8;
45 const int kNoKeyIdx = -1;
46
47 struct RTPVideoHeaderVP8 {
InitRTPVideoHeaderVP8RTPVideoHeaderVP848 void InitRTPVideoHeaderVP8() {
49 nonReference = false;
50 pictureId = kNoPictureId;
51 tl0PicIdx = kNoTl0PicIdx;
52 temporalIdx = kNoTemporalIdx;
53 layerSync = false;
54 keyIdx = kNoKeyIdx;
55 partitionId = 0;
56 beginningOfPartition = false;
57 }
58
59 bool nonReference; // Frame is discardable.
60 int16_t pictureId; // Picture ID index, 15 bits;
61 // kNoPictureId if PictureID does not exist.
62 int16_t tl0PicIdx; // TL0PIC_IDX, 8 bits;
63 // kNoTl0PicIdx means no value provided.
64 uint8_t temporalIdx; // Temporal layer index, or kNoTemporalIdx.
65 bool layerSync; // This frame is a layer sync frame.
66 // Disabled if temporalIdx == kNoTemporalIdx.
67 int keyIdx; // 5 bits; kNoKeyIdx means not used.
68 int partitionId; // VP8 partition ID
69 bool beginningOfPartition; // True if this packet is the first
70 // in a VP8 partition. Otherwise false
71 };
72
73 enum TemporalStructureMode {
74 kTemporalStructureMode1, // 1 temporal layer structure - i.e., IPPP...
75 kTemporalStructureMode2, // 2 temporal layers 0-1-0-1...
76 kTemporalStructureMode3 // 3 temporal layers 0-2-1-2-0-2-1-2...
77 };
78
79 struct GofInfoVP9 {
SetGofInfoVP9GofInfoVP980 void SetGofInfoVP9(TemporalStructureMode tm) {
81 switch (tm) {
82 case kTemporalStructureMode1:
83 num_frames_in_gof = 1;
84 temporal_idx[0] = 0;
85 temporal_up_switch[0] = false;
86 num_ref_pics[0] = 1;
87 pid_diff[0][0] = 1;
88 break;
89 case kTemporalStructureMode2:
90 num_frames_in_gof = 2;
91 temporal_idx[0] = 0;
92 temporal_up_switch[0] = false;
93 num_ref_pics[0] = 1;
94 pid_diff[0][0] = 2;
95
96 temporal_idx[1] = 1;
97 temporal_up_switch[1] = true;
98 num_ref_pics[1] = 1;
99 pid_diff[1][0] = 1;
100 break;
101 case kTemporalStructureMode3:
102 num_frames_in_gof = 4;
103 temporal_idx[0] = 0;
104 temporal_up_switch[0] = false;
105 num_ref_pics[0] = 1;
106 pid_diff[0][0] = 4;
107
108 temporal_idx[1] = 2;
109 temporal_up_switch[1] = true;
110 num_ref_pics[1] = 1;
111 pid_diff[1][0] = 1;
112
113 temporal_idx[2] = 1;
114 temporal_up_switch[2] = true;
115 num_ref_pics[2] = 1;
116 pid_diff[2][0] = 2;
117
118 temporal_idx[3] = 2;
119 temporal_up_switch[3] = false;
120 num_ref_pics[3] = 2;
121 pid_diff[3][0] = 1;
122 pid_diff[3][1] = 2;
123 break;
124 default:
125 assert(false);
126 }
127 }
128
CopyGofInfoVP9GofInfoVP9129 void CopyGofInfoVP9(const GofInfoVP9& src) {
130 num_frames_in_gof = src.num_frames_in_gof;
131 for (size_t i = 0; i < num_frames_in_gof; ++i) {
132 temporal_idx[i] = src.temporal_idx[i];
133 temporal_up_switch[i] = src.temporal_up_switch[i];
134 num_ref_pics[i] = src.num_ref_pics[i];
135 for (uint8_t r = 0; r < num_ref_pics[i]; ++r) {
136 pid_diff[i][r] = src.pid_diff[i][r];
137 }
138 }
139 }
140
141 size_t num_frames_in_gof;
142 uint8_t temporal_idx[kMaxVp9FramesInGof];
143 bool temporal_up_switch[kMaxVp9FramesInGof];
144 uint8_t num_ref_pics[kMaxVp9FramesInGof];
145 uint8_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics];
146 };
147
148 struct RTPVideoHeaderVP9 {
InitRTPVideoHeaderVP9RTPVideoHeaderVP9149 void InitRTPVideoHeaderVP9() {
150 inter_pic_predicted = false;
151 flexible_mode = false;
152 beginning_of_frame = false;
153 end_of_frame = false;
154 ss_data_available = false;
155 picture_id = kNoPictureId;
156 max_picture_id = kMaxTwoBytePictureId;
157 tl0_pic_idx = kNoTl0PicIdx;
158 temporal_idx = kNoTemporalIdx;
159 spatial_idx = kNoSpatialIdx;
160 temporal_up_switch = false;
161 inter_layer_predicted = false;
162 gof_idx = kNoGofIdx;
163 num_ref_pics = 0;
164 num_spatial_layers = 1;
165 }
166
167 bool inter_pic_predicted; // This layer frame is dependent on previously
168 // coded frame(s).
169 bool flexible_mode; // This frame is in flexible mode.
170 bool beginning_of_frame; // True if this packet is the first in a VP9 layer
171 // frame.
172 bool end_of_frame; // True if this packet is the last in a VP9 layer frame.
173 bool ss_data_available; // True if SS data is available in this payload
174 // descriptor.
175 int16_t picture_id; // PictureID index, 15 bits;
176 // kNoPictureId if PictureID does not exist.
177 int16_t max_picture_id; // Maximum picture ID index; either 0x7F or 0x7FFF;
178 int16_t tl0_pic_idx; // TL0PIC_IDX, 8 bits;
179 // kNoTl0PicIdx means no value provided.
180 uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx.
181 uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx.
182 bool temporal_up_switch; // True if upswitch to higher frame rate is possible
183 // starting from this frame.
184 bool inter_layer_predicted; // Frame is dependent on directly lower spatial
185 // layer frame.
186
187 uint8_t gof_idx; // Index to predefined temporal frame info in SS data.
188
189 uint8_t num_ref_pics; // Number of reference pictures used by this layer
190 // frame.
191 uint8_t pid_diff[kMaxVp9RefPics]; // P_DIFF signaled to derive the PictureID
192 // of the reference pictures.
193 int16_t ref_picture_id[kMaxVp9RefPics]; // PictureID of reference pictures.
194
195 // SS data.
196 size_t num_spatial_layers; // Always populated.
197 bool spatial_layer_resolution_present;
198 uint16_t width[kMaxVp9NumberOfSpatialLayers];
199 uint16_t height[kMaxVp9NumberOfSpatialLayers];
200 GofInfoVP9 gof;
201 };
202
203 // The packetization types that we support: single, aggregated, and fragmented.
204 enum H264PacketizationTypes {
205 kH264SingleNalu, // This packet contains a single NAL unit.
206 kH264StapA, // This packet contains STAP-A (single time
207 // aggregation) packets. If this packet has an
208 // associated NAL unit type, it'll be for the
209 // first such aggregated packet.
210 kH264FuA, // This packet contains a FU-A (fragmentation
211 // unit) packet, meaning it is a part of a frame
212 // that was too large to fit into a single packet.
213 };
214
215 struct RTPVideoHeaderH264 {
216 uint8_t nalu_type; // The NAL unit type. If this is a header for a
217 // fragmented packet, it's the NAL unit type of
218 // the original data. If this is the header for an
219 // aggregated packet, it's the NAL unit type of
220 // the first NAL unit in the packet.
221 H264PacketizationTypes packetization_type;
222 };
223
224 union RTPVideoTypeHeader {
225 RTPVideoHeaderVP8 VP8;
226 RTPVideoHeaderVP9 VP9;
227 RTPVideoHeaderH264 H264;
228 };
229
230 enum RtpVideoCodecTypes {
231 kRtpVideoNone,
232 kRtpVideoGeneric,
233 kRtpVideoVp8,
234 kRtpVideoVp9,
235 kRtpVideoH264
236 };
237 // Since RTPVideoHeader is used as a member of a union, it can't have a
238 // non-trivial default constructor.
239 struct RTPVideoHeader {
240 uint16_t width; // size
241 uint16_t height;
242 VideoRotation rotation;
243
244 bool isFirstPacket; // first packet in frame
245 uint8_t simulcastIdx; // Index if the simulcast encoder creating
246 // this frame, 0 if not using simulcast.
247 RtpVideoCodecTypes codec;
248 RTPVideoTypeHeader codecHeader;
249 };
250 union RTPTypeHeader {
251 RTPAudioHeader Audio;
252 RTPVideoHeader Video;
253 };
254
255 struct WebRtcRTPHeader {
256 RTPHeader header;
257 FrameType frameType;
258 RTPTypeHeader type;
259 // NTP time of the capture time in local timebase in milliseconds.
260 int64_t ntp_time_ms;
261 };
262
263 class RTPFragmentationHeader {
264 public:
RTPFragmentationHeader()265 RTPFragmentationHeader()
266 : fragmentationVectorSize(0),
267 fragmentationOffset(NULL),
268 fragmentationLength(NULL),
269 fragmentationTimeDiff(NULL),
270 fragmentationPlType(NULL) {};
271
~RTPFragmentationHeader()272 ~RTPFragmentationHeader() {
273 delete[] fragmentationOffset;
274 delete[] fragmentationLength;
275 delete[] fragmentationTimeDiff;
276 delete[] fragmentationPlType;
277 }
278
CopyFrom(const RTPFragmentationHeader & src)279 void CopyFrom(const RTPFragmentationHeader& src) {
280 if (this == &src) {
281 return;
282 }
283
284 if (src.fragmentationVectorSize != fragmentationVectorSize) {
285 // new size of vectors
286
287 // delete old
288 delete[] fragmentationOffset;
289 fragmentationOffset = NULL;
290 delete[] fragmentationLength;
291 fragmentationLength = NULL;
292 delete[] fragmentationTimeDiff;
293 fragmentationTimeDiff = NULL;
294 delete[] fragmentationPlType;
295 fragmentationPlType = NULL;
296
297 if (src.fragmentationVectorSize > 0) {
298 // allocate new
299 if (src.fragmentationOffset) {
300 fragmentationOffset = new size_t[src.fragmentationVectorSize];
301 }
302 if (src.fragmentationLength) {
303 fragmentationLength = new size_t[src.fragmentationVectorSize];
304 }
305 if (src.fragmentationTimeDiff) {
306 fragmentationTimeDiff = new uint16_t[src.fragmentationVectorSize];
307 }
308 if (src.fragmentationPlType) {
309 fragmentationPlType = new uint8_t[src.fragmentationVectorSize];
310 }
311 }
312 // set new size
313 fragmentationVectorSize = src.fragmentationVectorSize;
314 }
315
316 if (src.fragmentationVectorSize > 0) {
317 // copy values
318 if (src.fragmentationOffset) {
319 memcpy(fragmentationOffset, src.fragmentationOffset,
320 src.fragmentationVectorSize * sizeof(size_t));
321 }
322 if (src.fragmentationLength) {
323 memcpy(fragmentationLength, src.fragmentationLength,
324 src.fragmentationVectorSize * sizeof(size_t));
325 }
326 if (src.fragmentationTimeDiff) {
327 memcpy(fragmentationTimeDiff, src.fragmentationTimeDiff,
328 src.fragmentationVectorSize * sizeof(uint16_t));
329 }
330 if (src.fragmentationPlType) {
331 memcpy(fragmentationPlType, src.fragmentationPlType,
332 src.fragmentationVectorSize * sizeof(uint8_t));
333 }
334 }
335 }
336
VerifyAndAllocateFragmentationHeader(const size_t size)337 void VerifyAndAllocateFragmentationHeader(const size_t size) {
338 assert(size <= std::numeric_limits<uint16_t>::max());
339 const uint16_t size16 = static_cast<uint16_t>(size);
340 if (fragmentationVectorSize < size16) {
341 uint16_t oldVectorSize = fragmentationVectorSize;
342 {
343 // offset
344 size_t* oldOffsets = fragmentationOffset;
345 fragmentationOffset = new size_t[size16];
346 memset(fragmentationOffset + oldVectorSize, 0,
347 sizeof(size_t) * (size16 - oldVectorSize));
348 // copy old values
349 memcpy(fragmentationOffset, oldOffsets,
350 sizeof(size_t) * oldVectorSize);
351 delete[] oldOffsets;
352 }
353 // length
354 {
355 size_t* oldLengths = fragmentationLength;
356 fragmentationLength = new size_t[size16];
357 memset(fragmentationLength + oldVectorSize, 0,
358 sizeof(size_t) * (size16 - oldVectorSize));
359 memcpy(fragmentationLength, oldLengths,
360 sizeof(size_t) * oldVectorSize);
361 delete[] oldLengths;
362 }
363 // time diff
364 {
365 uint16_t* oldTimeDiffs = fragmentationTimeDiff;
366 fragmentationTimeDiff = new uint16_t[size16];
367 memset(fragmentationTimeDiff + oldVectorSize, 0,
368 sizeof(uint16_t) * (size16 - oldVectorSize));
369 memcpy(fragmentationTimeDiff, oldTimeDiffs,
370 sizeof(uint16_t) * oldVectorSize);
371 delete[] oldTimeDiffs;
372 }
373 // payload type
374 {
375 uint8_t* oldTimePlTypes = fragmentationPlType;
376 fragmentationPlType = new uint8_t[size16];
377 memset(fragmentationPlType + oldVectorSize, 0,
378 sizeof(uint8_t) * (size16 - oldVectorSize));
379 memcpy(fragmentationPlType, oldTimePlTypes,
380 sizeof(uint8_t) * oldVectorSize);
381 delete[] oldTimePlTypes;
382 }
383 fragmentationVectorSize = size16;
384 }
385 }
386
387 uint16_t fragmentationVectorSize; // Number of fragmentations
388 size_t* fragmentationOffset; // Offset of pointer to data for each
389 // fragmentation
390 size_t* fragmentationLength; // Data size for each fragmentation
391 uint16_t* fragmentationTimeDiff; // Timestamp difference relative "now" for
392 // each fragmentation
393 uint8_t* fragmentationPlType; // Payload type of each fragmentation
394
395 private:
396 RTC_DISALLOW_COPY_AND_ASSIGN(RTPFragmentationHeader);
397 };
398
399 struct RTCPVoIPMetric {
400 // RFC 3611 4.7
401 uint8_t lossRate;
402 uint8_t discardRate;
403 uint8_t burstDensity;
404 uint8_t gapDensity;
405 uint16_t burstDuration;
406 uint16_t gapDuration;
407 uint16_t roundTripDelay;
408 uint16_t endSystemDelay;
409 uint8_t signalLevel;
410 uint8_t noiseLevel;
411 uint8_t RERL;
412 uint8_t Gmin;
413 uint8_t Rfactor;
414 uint8_t extRfactor;
415 uint8_t MOSLQ;
416 uint8_t MOSCQ;
417 uint8_t RXconfig;
418 uint16_t JBnominal;
419 uint16_t JBmax;
420 uint16_t JBabsMax;
421 };
422
423 // Types for the FEC packet masks. The type |kFecMaskRandom| is based on a
424 // random loss model. The type |kFecMaskBursty| is based on a bursty/consecutive
425 // loss model. The packet masks are defined in
426 // modules/rtp_rtcp/fec_private_tables_random(bursty).h
427 enum FecMaskType {
428 kFecMaskRandom,
429 kFecMaskBursty,
430 };
431
432 // Struct containing forward error correction settings.
433 struct FecProtectionParams {
434 int fec_rate;
435 bool use_uep_protection;
436 int max_fec_frames;
437 FecMaskType fec_mask_type;
438 };
439
440 // Interface used by the CallStats class to distribute call statistics.
441 // Callbacks will be triggered as soon as the class has been registered to a
442 // CallStats object using RegisterStatsObserver.
443 class CallStatsObserver {
444 public:
445 virtual void OnRttUpdate(int64_t avg_rtt_ms, int64_t max_rtt_ms) = 0;
446
~CallStatsObserver()447 virtual ~CallStatsObserver() {}
448 };
449
450 struct VideoContentMetrics {
VideoContentMetricsVideoContentMetrics451 VideoContentMetrics()
452 : motion_magnitude(0.0f),
453 spatial_pred_err(0.0f),
454 spatial_pred_err_h(0.0f),
455 spatial_pred_err_v(0.0f) {}
456
ResetVideoContentMetrics457 void Reset() {
458 motion_magnitude = 0.0f;
459 spatial_pred_err = 0.0f;
460 spatial_pred_err_h = 0.0f;
461 spatial_pred_err_v = 0.0f;
462 }
463 float motion_magnitude;
464 float spatial_pred_err;
465 float spatial_pred_err_h;
466 float spatial_pred_err_v;
467 };
468
469 /* This class holds up to 60 ms of super-wideband (32 kHz) stereo audio. It
470 * allows for adding and subtracting frames while keeping track of the resulting
471 * states.
472 *
473 * Notes
474 * - The total number of samples in |data_| is
475 * samples_per_channel_ * num_channels_
476 *
477 * - Stereo data is interleaved starting with the left channel.
478 *
479 * - The +operator assume that you would never add exactly opposite frames when
480 * deciding the resulting state. To do this use the -operator.
481 */
482 class AudioFrame {
483 public:
484 // Stereo, 32 kHz, 60 ms (2 * 32 * 60)
485 static const size_t kMaxDataSizeSamples = 3840;
486
487 enum VADActivity {
488 kVadActive = 0,
489 kVadPassive = 1,
490 kVadUnknown = 2
491 };
492 enum SpeechType {
493 kNormalSpeech = 0,
494 kPLC = 1,
495 kCNG = 2,
496 kPLCCNG = 3,
497 kUndefined = 4
498 };
499
500 AudioFrame();
~AudioFrame()501 virtual ~AudioFrame() {}
502
503 // Resets all members to their default state (except does not modify the
504 // contents of |data_|).
505 void Reset();
506
507 // |interleaved_| is not changed by this method.
508 void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
509 size_t samples_per_channel, int sample_rate_hz,
510 SpeechType speech_type, VADActivity vad_activity,
511 size_t num_channels = 1, uint32_t energy = -1);
512
513 AudioFrame& Append(const AudioFrame& rhs);
514
515 void CopyFrom(const AudioFrame& src);
516
517 void Mute();
518
519 AudioFrame& operator>>=(const int rhs);
520 AudioFrame& operator+=(const AudioFrame& rhs);
521 AudioFrame& operator-=(const AudioFrame& rhs);
522
523 int id_;
524 // RTP timestamp of the first sample in the AudioFrame.
525 uint32_t timestamp_;
526 // Time since the first frame in milliseconds.
527 // -1 represents an uninitialized value.
528 int64_t elapsed_time_ms_;
529 // NTP time of the estimated capture time in local timebase in milliseconds.
530 // -1 represents an uninitialized value.
531 int64_t ntp_time_ms_;
532 int16_t data_[kMaxDataSizeSamples];
533 size_t samples_per_channel_;
534 int sample_rate_hz_;
535 size_t num_channels_;
536 SpeechType speech_type_;
537 VADActivity vad_activity_;
538 // Note that there is no guarantee that |energy_| is correct. Any user of this
539 // member must verify that the value is correct.
540 // TODO(henrike) Remove |energy_|.
541 // See https://code.google.com/p/webrtc/issues/detail?id=3315.
542 uint32_t energy_;
543 bool interleaved_;
544
545 private:
546 RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
547 };
548
AudioFrame()549 inline AudioFrame::AudioFrame()
550 : data_() {
551 Reset();
552 }
553
Reset()554 inline void AudioFrame::Reset() {
555 id_ = -1;
556 // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
557 // to an invalid value, or add a new member to indicate invalidity.
558 timestamp_ = 0;
559 elapsed_time_ms_ = -1;
560 ntp_time_ms_ = -1;
561 samples_per_channel_ = 0;
562 sample_rate_hz_ = 0;
563 num_channels_ = 0;
564 speech_type_ = kUndefined;
565 vad_activity_ = kVadUnknown;
566 energy_ = 0xffffffff;
567 interleaved_ = true;
568 }
569
UpdateFrame(int id,uint32_t timestamp,const int16_t * data,size_t samples_per_channel,int sample_rate_hz,SpeechType speech_type,VADActivity vad_activity,size_t num_channels,uint32_t energy)570 inline void AudioFrame::UpdateFrame(int id,
571 uint32_t timestamp,
572 const int16_t* data,
573 size_t samples_per_channel,
574 int sample_rate_hz,
575 SpeechType speech_type,
576 VADActivity vad_activity,
577 size_t num_channels,
578 uint32_t energy) {
579 id_ = id;
580 timestamp_ = timestamp;
581 samples_per_channel_ = samples_per_channel;
582 sample_rate_hz_ = sample_rate_hz;
583 speech_type_ = speech_type;
584 vad_activity_ = vad_activity;
585 num_channels_ = num_channels;
586 energy_ = energy;
587
588 const size_t length = samples_per_channel * num_channels;
589 assert(length <= kMaxDataSizeSamples);
590 if (data != NULL) {
591 memcpy(data_, data, sizeof(int16_t) * length);
592 } else {
593 memset(data_, 0, sizeof(int16_t) * length);
594 }
595 }
596
CopyFrom(const AudioFrame & src)597 inline void AudioFrame::CopyFrom(const AudioFrame& src) {
598 if (this == &src) return;
599
600 id_ = src.id_;
601 timestamp_ = src.timestamp_;
602 elapsed_time_ms_ = src.elapsed_time_ms_;
603 ntp_time_ms_ = src.ntp_time_ms_;
604 samples_per_channel_ = src.samples_per_channel_;
605 sample_rate_hz_ = src.sample_rate_hz_;
606 speech_type_ = src.speech_type_;
607 vad_activity_ = src.vad_activity_;
608 num_channels_ = src.num_channels_;
609 energy_ = src.energy_;
610 interleaved_ = src.interleaved_;
611
612 const size_t length = samples_per_channel_ * num_channels_;
613 assert(length <= kMaxDataSizeSamples);
614 memcpy(data_, src.data_, sizeof(int16_t) * length);
615 }
616
Mute()617 inline void AudioFrame::Mute() {
618 memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));
619 }
620
621 inline AudioFrame& AudioFrame::operator>>=(const int rhs) {
622 assert((num_channels_ > 0) && (num_channels_ < 3));
623 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
624
625 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
626 data_[i] = static_cast<int16_t>(data_[i] >> rhs);
627 }
628 return *this;
629 }
630
Append(const AudioFrame & rhs)631 inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) {
632 // Sanity check
633 assert((num_channels_ > 0) && (num_channels_ < 3));
634 assert(interleaved_ == rhs.interleaved_);
635 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
636 if (num_channels_ != rhs.num_channels_) return *this;
637
638 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
639 vad_activity_ = kVadActive;
640 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
641 vad_activity_ = kVadUnknown;
642 }
643 if (speech_type_ != rhs.speech_type_) {
644 speech_type_ = kUndefined;
645 }
646
647 size_t offset = samples_per_channel_ * num_channels_;
648 for (size_t i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) {
649 data_[offset + i] = rhs.data_[i];
650 }
651 samples_per_channel_ += rhs.samples_per_channel_;
652 return *this;
653 }
654
655 namespace {
ClampToInt16(int32_t input)656 inline int16_t ClampToInt16(int32_t input) {
657 if (input < -0x00008000) {
658 return -0x8000;
659 } else if (input > 0x00007FFF) {
660 return 0x7FFF;
661 } else {
662 return static_cast<int16_t>(input);
663 }
664 }
665 }
666
667 inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
668 // Sanity check
669 assert((num_channels_ > 0) && (num_channels_ < 3));
670 assert(interleaved_ == rhs.interleaved_);
671 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
672 if (num_channels_ != rhs.num_channels_) return *this;
673
674 bool noPrevData = false;
675 if (samples_per_channel_ != rhs.samples_per_channel_) {
676 if (samples_per_channel_ == 0) {
677 // special case we have no data to start with
678 samples_per_channel_ = rhs.samples_per_channel_;
679 noPrevData = true;
680 } else {
681 return *this;
682 }
683 }
684
685 if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) {
686 vad_activity_ = kVadActive;
687 } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) {
688 vad_activity_ = kVadUnknown;
689 }
690
691 if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;
692
693 if (noPrevData) {
694 memcpy(data_, rhs.data_,
695 sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
696 } else {
697 // IMPROVEMENT this can be done very fast in assembly
698 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
699 int32_t wrap_guard =
700 static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
701 data_[i] = ClampToInt16(wrap_guard);
702 }
703 }
704 energy_ = 0xffffffff;
705 return *this;
706 }
707
708 inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) {
709 // Sanity check
710 assert((num_channels_ > 0) && (num_channels_ < 3));
711 assert(interleaved_ == rhs.interleaved_);
712 if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
713
714 if ((samples_per_channel_ != rhs.samples_per_channel_) ||
715 (num_channels_ != rhs.num_channels_)) {
716 return *this;
717 }
718 if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) {
719 vad_activity_ = kVadUnknown;
720 }
721 speech_type_ = kUndefined;
722
723 for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
724 int32_t wrap_guard =
725 static_cast<int32_t>(data_[i]) - static_cast<int32_t>(rhs.data_[i]);
726 data_[i] = ClampToInt16(wrap_guard);
727 }
728 energy_ = 0xffffffff;
729 return *this;
730 }
731
IsNewerSequenceNumber(uint16_t sequence_number,uint16_t prev_sequence_number)732 inline bool IsNewerSequenceNumber(uint16_t sequence_number,
733 uint16_t prev_sequence_number) {
734 // Distinguish between elements that are exactly 0x8000 apart.
735 // If s1>s2 and |s1-s2| = 0x8000: IsNewer(s1,s2)=true, IsNewer(s2,s1)=false
736 // rather than having IsNewer(s1,s2) = IsNewer(s2,s1) = false.
737 if (static_cast<uint16_t>(sequence_number - prev_sequence_number) == 0x8000) {
738 return sequence_number > prev_sequence_number;
739 }
740 return sequence_number != prev_sequence_number &&
741 static_cast<uint16_t>(sequence_number - prev_sequence_number) < 0x8000;
742 }
743
IsNewerTimestamp(uint32_t timestamp,uint32_t prev_timestamp)744 inline bool IsNewerTimestamp(uint32_t timestamp, uint32_t prev_timestamp) {
745 // Distinguish between elements that are exactly 0x80000000 apart.
746 // If t1>t2 and |t1-t2| = 0x80000000: IsNewer(t1,t2)=true,
747 // IsNewer(t2,t1)=false
748 // rather than having IsNewer(t1,t2) = IsNewer(t2,t1) = false.
749 if (static_cast<uint32_t>(timestamp - prev_timestamp) == 0x80000000) {
750 return timestamp > prev_timestamp;
751 }
752 return timestamp != prev_timestamp &&
753 static_cast<uint32_t>(timestamp - prev_timestamp) < 0x80000000;
754 }
755
LatestSequenceNumber(uint16_t sequence_number1,uint16_t sequence_number2)756 inline uint16_t LatestSequenceNumber(uint16_t sequence_number1,
757 uint16_t sequence_number2) {
758 return IsNewerSequenceNumber(sequence_number1, sequence_number2)
759 ? sequence_number1
760 : sequence_number2;
761 }
762
LatestTimestamp(uint32_t timestamp1,uint32_t timestamp2)763 inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) {
764 return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2;
765 }
766
767 // Utility class to unwrap a sequence number to a larger type, for easier
768 // handling large ranges. Note that sequence numbers will never be unwrapped
769 // to a negative value.
770 class SequenceNumberUnwrapper {
771 public:
SequenceNumberUnwrapper()772 SequenceNumberUnwrapper() : last_seq_(-1) {}
773
774 // Get the unwrapped sequence, but don't update the internal state.
UnwrapWithoutUpdate(uint16_t sequence_number)775 int64_t UnwrapWithoutUpdate(uint16_t sequence_number) {
776 if (last_seq_ == -1)
777 return sequence_number;
778
779 uint16_t cropped_last = static_cast<uint16_t>(last_seq_);
780 int64_t delta = sequence_number - cropped_last;
781 if (IsNewerSequenceNumber(sequence_number, cropped_last)) {
782 if (delta < 0)
783 delta += (1 << 16); // Wrap forwards.
784 } else if (delta > 0 && (last_seq_ + delta - (1 << 16)) >= 0) {
785 // If sequence_number is older but delta is positive, this is a backwards
786 // wrap-around. However, don't wrap backwards past 0 (unwrapped).
787 delta -= (1 << 16);
788 }
789
790 return last_seq_ + delta;
791 }
792
793 // Only update the internal state to the specified last (unwrapped) sequence.
UpdateLast(int64_t last_sequence)794 void UpdateLast(int64_t last_sequence) { last_seq_ = last_sequence; }
795
796 // Unwrap the sequence number and update the internal state.
Unwrap(uint16_t sequence_number)797 int64_t Unwrap(uint16_t sequence_number) {
798 int64_t unwrapped = UnwrapWithoutUpdate(sequence_number);
799 UpdateLast(unwrapped);
800 return unwrapped;
801 }
802
803 private:
804 int64_t last_seq_;
805 };
806
807 } // namespace webrtc
808
809 #endif // WEBRTC_MODULES_INCLUDE_MODULE_COMMON_TYPES_H_
810