1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/video_coding/decoding_state.h"
12
13 #include "common_video/h264/h264_common.h"
14 #include "modules/include/module_common_types_public.h"
15 #include "modules/video_coding/frame_buffer.h"
16 #include "modules/video_coding/jitter_buffer_common.h"
17 #include "modules/video_coding/packet.h"
18 #include "rtc_base/logging.h"
19
20 namespace webrtc {
21
VCMDecodingState()22 VCMDecodingState::VCMDecodingState()
23 : sequence_num_(0),
24 time_stamp_(0),
25 picture_id_(kNoPictureId),
26 temporal_id_(kNoTemporalIdx),
27 tl0_pic_id_(kNoTl0PicIdx),
28 full_sync_(true),
29 in_initial_state_(true) {
30 memset(frame_decoded_, 0, sizeof(frame_decoded_));
31 }
32
~VCMDecodingState()33 VCMDecodingState::~VCMDecodingState() {}
34
Reset()35 void VCMDecodingState::Reset() {
36 // TODO(mikhal): Verify - not always would want to reset the sync
37 sequence_num_ = 0;
38 time_stamp_ = 0;
39 picture_id_ = kNoPictureId;
40 temporal_id_ = kNoTemporalIdx;
41 tl0_pic_id_ = kNoTl0PicIdx;
42 full_sync_ = true;
43 in_initial_state_ = true;
44 memset(frame_decoded_, 0, sizeof(frame_decoded_));
45 received_sps_.clear();
46 received_pps_.clear();
47 }
48
time_stamp() const49 uint32_t VCMDecodingState::time_stamp() const {
50 return time_stamp_;
51 }
52
sequence_num() const53 uint16_t VCMDecodingState::sequence_num() const {
54 return sequence_num_;
55 }
56
IsOldFrame(const VCMFrameBuffer * frame) const57 bool VCMDecodingState::IsOldFrame(const VCMFrameBuffer* frame) const {
58 RTC_DCHECK(frame);
59 if (in_initial_state_)
60 return false;
61 return !IsNewerTimestamp(frame->Timestamp(), time_stamp_);
62 }
63
IsOldPacket(const VCMPacket * packet) const64 bool VCMDecodingState::IsOldPacket(const VCMPacket* packet) const {
65 RTC_DCHECK(packet);
66 if (in_initial_state_)
67 return false;
68 return !IsNewerTimestamp(packet->timestamp, time_stamp_);
69 }
70
SetState(const VCMFrameBuffer * frame)71 void VCMDecodingState::SetState(const VCMFrameBuffer* frame) {
72 RTC_DCHECK(frame);
73 RTC_CHECK_GE(frame->GetHighSeqNum(), 0);
74 if (!UsingFlexibleMode(frame))
75 UpdateSyncState(frame);
76 sequence_num_ = static_cast<uint16_t>(frame->GetHighSeqNum());
77 time_stamp_ = frame->Timestamp();
78 picture_id_ = frame->PictureId();
79 temporal_id_ = frame->TemporalId();
80 tl0_pic_id_ = frame->Tl0PicId();
81
82 for (const NaluInfo& nalu : frame->GetNaluInfos()) {
83 if (nalu.type == H264::NaluType::kPps) {
84 if (nalu.pps_id < 0) {
85 RTC_LOG(LS_WARNING) << "Received pps without pps id.";
86 } else if (nalu.sps_id < 0) {
87 RTC_LOG(LS_WARNING) << "Received pps without sps id.";
88 } else {
89 received_pps_[nalu.pps_id] = nalu.sps_id;
90 }
91 } else if (nalu.type == H264::NaluType::kSps) {
92 if (nalu.sps_id < 0) {
93 RTC_LOG(LS_WARNING) << "Received sps without sps id.";
94 } else {
95 received_sps_.insert(nalu.sps_id);
96 }
97 }
98 }
99
100 if (UsingFlexibleMode(frame)) {
101 uint16_t frame_index = picture_id_ % kFrameDecodedLength;
102 if (in_initial_state_) {
103 frame_decoded_cleared_to_ = frame_index;
104 } else if (frame->FrameType() == VideoFrameType::kVideoFrameKey) {
105 memset(frame_decoded_, 0, sizeof(frame_decoded_));
106 frame_decoded_cleared_to_ = frame_index;
107 } else {
108 if (AheadOfFramesDecodedClearedTo(frame_index)) {
109 while (frame_decoded_cleared_to_ != frame_index) {
110 frame_decoded_cleared_to_ =
111 (frame_decoded_cleared_to_ + 1) % kFrameDecodedLength;
112 frame_decoded_[frame_decoded_cleared_to_] = false;
113 }
114 }
115 }
116 frame_decoded_[frame_index] = true;
117 }
118
119 in_initial_state_ = false;
120 }
121
CopyFrom(const VCMDecodingState & state)122 void VCMDecodingState::CopyFrom(const VCMDecodingState& state) {
123 sequence_num_ = state.sequence_num_;
124 time_stamp_ = state.time_stamp_;
125 picture_id_ = state.picture_id_;
126 temporal_id_ = state.temporal_id_;
127 tl0_pic_id_ = state.tl0_pic_id_;
128 full_sync_ = state.full_sync_;
129 in_initial_state_ = state.in_initial_state_;
130 frame_decoded_cleared_to_ = state.frame_decoded_cleared_to_;
131 memcpy(frame_decoded_, state.frame_decoded_, sizeof(frame_decoded_));
132 received_sps_ = state.received_sps_;
133 received_pps_ = state.received_pps_;
134 }
135
UpdateEmptyFrame(const VCMFrameBuffer * frame)136 bool VCMDecodingState::UpdateEmptyFrame(const VCMFrameBuffer* frame) {
137 bool empty_packet = frame->GetHighSeqNum() == frame->GetLowSeqNum();
138 if (in_initial_state_ && empty_packet) {
139 // Drop empty packets as long as we are in the initial state.
140 return true;
141 }
142 if ((empty_packet && ContinuousSeqNum(frame->GetHighSeqNum())) ||
143 ContinuousFrame(frame)) {
144 // Continuous empty packets or continuous frames can be dropped if we
145 // advance the sequence number.
146 sequence_num_ = frame->GetHighSeqNum();
147 time_stamp_ = frame->Timestamp();
148 return true;
149 }
150 return false;
151 }
152
UpdateOldPacket(const VCMPacket * packet)153 void VCMDecodingState::UpdateOldPacket(const VCMPacket* packet) {
154 RTC_DCHECK(packet);
155 if (packet->timestamp == time_stamp_) {
156 // Late packet belonging to the last decoded frame - make sure we update the
157 // last decoded sequence number.
158 sequence_num_ = LatestSequenceNumber(packet->seqNum, sequence_num_);
159 }
160 }
161
SetSeqNum(uint16_t new_seq_num)162 void VCMDecodingState::SetSeqNum(uint16_t new_seq_num) {
163 sequence_num_ = new_seq_num;
164 }
165
in_initial_state() const166 bool VCMDecodingState::in_initial_state() const {
167 return in_initial_state_;
168 }
169
full_sync() const170 bool VCMDecodingState::full_sync() const {
171 return full_sync_;
172 }
173
UpdateSyncState(const VCMFrameBuffer * frame)174 void VCMDecodingState::UpdateSyncState(const VCMFrameBuffer* frame) {
175 if (in_initial_state_)
176 return;
177 if (frame->TemporalId() == kNoTemporalIdx ||
178 frame->Tl0PicId() == kNoTl0PicIdx) {
179 full_sync_ = true;
180 } else if (frame->FrameType() == VideoFrameType::kVideoFrameKey ||
181 frame->LayerSync()) {
182 full_sync_ = true;
183 } else if (full_sync_) {
184 // Verify that we are still in sync.
185 // Sync will be broken if continuity is true for layers but not for the
186 // other methods (PictureId and SeqNum).
187 if (UsingPictureId(frame)) {
188 // First check for a valid tl0PicId.
189 if (frame->Tl0PicId() - tl0_pic_id_ > 1) {
190 full_sync_ = false;
191 } else {
192 full_sync_ = ContinuousPictureId(frame->PictureId());
193 }
194 } else {
195 full_sync_ =
196 ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum()));
197 }
198 }
199 }
200
ContinuousFrame(const VCMFrameBuffer * frame) const201 bool VCMDecodingState::ContinuousFrame(const VCMFrameBuffer* frame) const {
202 // Check continuity based on the following hierarchy:
203 // - Temporal layers (stop here if out of sync).
204 // - Picture Id when available.
205 // - Sequence numbers.
206 // Return true when in initial state.
207 // Note that when a method is not applicable it will return false.
208 RTC_DCHECK(frame);
209 // A key frame is always considered continuous as it doesn't refer to any
210 // frames and therefore won't introduce any errors even if prior frames are
211 // missing.
212 if (frame->FrameType() == VideoFrameType::kVideoFrameKey &&
213 HaveSpsAndPps(frame->GetNaluInfos())) {
214 return true;
215 }
216 // When in the initial state we always require a key frame to start decoding.
217 if (in_initial_state_)
218 return false;
219 if (ContinuousLayer(frame->TemporalId(), frame->Tl0PicId()))
220 return true;
221 // tl0picId is either not used, or should remain unchanged.
222 if (frame->Tl0PicId() != tl0_pic_id_)
223 return false;
224 // Base layers are not continuous or temporal layers are inactive.
225 // In the presence of temporal layers, check for Picture ID/sequence number
226 // continuity if sync can be restored by this frame.
227 if (!full_sync_ && !frame->LayerSync())
228 return false;
229 if (UsingPictureId(frame)) {
230 if (UsingFlexibleMode(frame)) {
231 return ContinuousFrameRefs(frame);
232 } else {
233 return ContinuousPictureId(frame->PictureId());
234 }
235 } else {
236 return ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum())) &&
237 HaveSpsAndPps(frame->GetNaluInfos());
238 }
239 }
240
ContinuousPictureId(int picture_id) const241 bool VCMDecodingState::ContinuousPictureId(int picture_id) const {
242 int next_picture_id = picture_id_ + 1;
243 if (picture_id < picture_id_) {
244 // Wrap
245 if (picture_id_ >= 0x80) {
246 // 15 bits used for picture id
247 return ((next_picture_id & 0x7FFF) == picture_id);
248 } else {
249 // 7 bits used for picture id
250 return ((next_picture_id & 0x7F) == picture_id);
251 }
252 }
253 // No wrap
254 return (next_picture_id == picture_id);
255 }
256
ContinuousSeqNum(uint16_t seq_num) const257 bool VCMDecodingState::ContinuousSeqNum(uint16_t seq_num) const {
258 return seq_num == static_cast<uint16_t>(sequence_num_ + 1);
259 }
260
ContinuousLayer(int temporal_id,int tl0_pic_id) const261 bool VCMDecodingState::ContinuousLayer(int temporal_id, int tl0_pic_id) const {
262 // First, check if applicable.
263 if (temporal_id == kNoTemporalIdx || tl0_pic_id == kNoTl0PicIdx)
264 return false;
265 // If this is the first frame to use temporal layers, make sure we start
266 // from base.
267 else if (tl0_pic_id_ == kNoTl0PicIdx && temporal_id_ == kNoTemporalIdx &&
268 temporal_id == 0)
269 return true;
270
271 // Current implementation: Look for base layer continuity.
272 if (temporal_id != 0)
273 return false;
274 return (static_cast<uint8_t>(tl0_pic_id_ + 1) == tl0_pic_id);
275 }
276
ContinuousFrameRefs(const VCMFrameBuffer * frame) const277 bool VCMDecodingState::ContinuousFrameRefs(const VCMFrameBuffer* frame) const {
278 uint8_t num_refs = frame->CodecSpecific()->codecSpecific.VP9.num_ref_pics;
279 for (uint8_t r = 0; r < num_refs; ++r) {
280 uint16_t frame_ref = frame->PictureId() -
281 frame->CodecSpecific()->codecSpecific.VP9.p_diff[r];
282 uint16_t frame_index = frame_ref % kFrameDecodedLength;
283 if (AheadOfFramesDecodedClearedTo(frame_index) ||
284 !frame_decoded_[frame_index]) {
285 return false;
286 }
287 }
288 return true;
289 }
290
UsingPictureId(const VCMFrameBuffer * frame) const291 bool VCMDecodingState::UsingPictureId(const VCMFrameBuffer* frame) const {
292 return (frame->PictureId() != kNoPictureId && picture_id_ != kNoPictureId);
293 }
294
UsingFlexibleMode(const VCMFrameBuffer * frame) const295 bool VCMDecodingState::UsingFlexibleMode(const VCMFrameBuffer* frame) const {
296 bool is_flexible_mode =
297 frame->CodecSpecific()->codecType == kVideoCodecVP9 &&
298 frame->CodecSpecific()->codecSpecific.VP9.flexible_mode;
299 if (is_flexible_mode && frame->PictureId() == kNoPictureId) {
300 RTC_LOG(LS_WARNING) << "Frame is marked as using flexible mode but no"
301 "picture id is set.";
302 return false;
303 }
304 return is_flexible_mode;
305 }
306
307 // TODO(philipel): change how check work, this check practially
308 // limits the max p_diff to 64.
AheadOfFramesDecodedClearedTo(uint16_t index) const309 bool VCMDecodingState::AheadOfFramesDecodedClearedTo(uint16_t index) const {
310 // No way of knowing for sure if we are actually ahead of
311 // frame_decoded_cleared_to_. We just make the assumption
312 // that we are not trying to reference back to a very old
313 // index, but instead are referencing a newer index.
314 uint16_t diff =
315 index > frame_decoded_cleared_to_
316 ? kFrameDecodedLength - (index - frame_decoded_cleared_to_)
317 : frame_decoded_cleared_to_ - index;
318 return diff > kFrameDecodedLength / 2;
319 }
320
HaveSpsAndPps(const std::vector<NaluInfo> & nalus) const321 bool VCMDecodingState::HaveSpsAndPps(const std::vector<NaluInfo>& nalus) const {
322 std::set<int> new_sps;
323 std::map<int, int> new_pps;
324 for (const NaluInfo& nalu : nalus) {
325 // Check if this nalu actually contains sps/pps information or dependencies.
326 if (nalu.sps_id == -1 && nalu.pps_id == -1)
327 continue;
328 switch (nalu.type) {
329 case H264::NaluType::kPps:
330 if (nalu.pps_id < 0) {
331 RTC_LOG(LS_WARNING) << "Received pps without pps id.";
332 } else if (nalu.sps_id < 0) {
333 RTC_LOG(LS_WARNING) << "Received pps without sps id.";
334 } else {
335 new_pps[nalu.pps_id] = nalu.sps_id;
336 }
337 break;
338 case H264::NaluType::kSps:
339 if (nalu.sps_id < 0) {
340 RTC_LOG(LS_WARNING) << "Received sps without sps id.";
341 } else {
342 new_sps.insert(nalu.sps_id);
343 }
344 break;
345 default: {
346 int needed_sps = -1;
347 auto pps_it = new_pps.find(nalu.pps_id);
348 if (pps_it != new_pps.end()) {
349 needed_sps = pps_it->second;
350 } else {
351 auto pps_it2 = received_pps_.find(nalu.pps_id);
352 if (pps_it2 == received_pps_.end()) {
353 return false;
354 }
355 needed_sps = pps_it2->second;
356 }
357 if (new_sps.find(needed_sps) == new_sps.end() &&
358 received_sps_.find(needed_sps) == received_sps_.end()) {
359 return false;
360 }
361 break;
362 }
363 }
364 }
365 return true;
366 }
367
368 } // namespace webrtc
369