1 /*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/video_coding/rtp_vp9_ref_finder.h"
12
13 #include <algorithm>
14 #include <utility>
15
16 #include "rtc_base/logging.h"
17
18 namespace webrtc {
ManageFrame(std::unique_ptr<RtpFrameObject> frame)19 RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
20 std::unique_ptr<RtpFrameObject> frame) {
21 const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
22 frame->GetRtpVideoHeader().video_type_header);
23
24 if (codec_header.temporal_idx != kNoTemporalIdx)
25 frame->SetTemporalIndex(codec_header.temporal_idx);
26 frame->SetSpatialIndex(codec_header.spatial_idx);
27 frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
28
29 FrameDecision decision;
30 if (codec_header.temporal_idx >= kMaxTemporalLayers ||
31 codec_header.spatial_idx >= kMaxSpatialLayers) {
32 decision = kDrop;
33 } else if (codec_header.flexible_mode) {
34 decision = ManageFrameFlexible(frame.get(), codec_header);
35 } else {
36 if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
37 RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
38 "non-flexible mode.";
39 decision = kDrop;
40 } else {
41 int64_t unwrapped_tl0 =
42 tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
43 decision = ManageFrameGof(frame.get(), codec_header, unwrapped_tl0);
44
45 if (decision == kStash) {
46 if (stashed_frames_.size() > kMaxStashedFrames) {
47 stashed_frames_.pop_back();
48 }
49
50 stashed_frames_.push_front(
51 {.unwrapped_tl0 = unwrapped_tl0, .frame = std::move(frame)});
52 }
53 }
54 }
55
56 RtpFrameReferenceFinder::ReturnVector res;
57 switch (decision) {
58 case kStash:
59 return res;
60 case kHandOff:
61 res.push_back(std::move(frame));
62 RetryStashedFrames(res);
63 return res;
64 case kDrop:
65 return res;
66 }
67
68 return res;
69 }
70
ManageFrameFlexible(RtpFrameObject * frame,const RTPVideoHeaderVP9 & codec_header)71 RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameFlexible(
72 RtpFrameObject* frame,
73 const RTPVideoHeaderVP9& codec_header) {
74 if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
75 return kDrop;
76 }
77
78 frame->num_references = codec_header.num_ref_pics;
79 for (size_t i = 0; i < frame->num_references; ++i) {
80 frame->references[i] =
81 Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
82 }
83
84 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
85 return kHandOff;
86 }
87
ManageFrameGof(RtpFrameObject * frame,const RTPVideoHeaderVP9 & codec_header,int64_t unwrapped_tl0)88 RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof(
89 RtpFrameObject* frame,
90 const RTPVideoHeaderVP9& codec_header,
91 int64_t unwrapped_tl0) {
92 GofInfo* info;
93 if (codec_header.ss_data_available) {
94 if (codec_header.temporal_idx != 0) {
95 RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
96 "layer frame. Scalability structure ignored.";
97 } else {
98 if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
99 return kDrop;
100 }
101
102 for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
103 if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
104 return kDrop;
105 }
106 }
107
108 GofInfoVP9 gof = codec_header.gof;
109 if (gof.num_frames_in_gof == 0) {
110 RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
111 "that stream has only one temporal layer.";
112 gof.SetGofInfoVP9(kTemporalStructureMode1);
113 }
114
115 current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
116 scalability_structures_[current_ss_idx_] = gof;
117 scalability_structures_[current_ss_idx_].pid_start = frame->Id();
118 gof_info_.emplace(
119 unwrapped_tl0,
120 GofInfo(&scalability_structures_[current_ss_idx_], frame->Id()));
121 }
122
123 const auto gof_info_it = gof_info_.find(unwrapped_tl0);
124 if (gof_info_it == gof_info_.end())
125 return kStash;
126
127 info = &gof_info_it->second;
128
129 if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
130 frame->num_references = 0;
131 FrameReceivedVp9(frame->Id(), info);
132 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
133 return kHandOff;
134 }
135 } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
136 if (frame->SpatialIndex() == 0) {
137 RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
138 return kDrop;
139 }
140 const auto gof_info_it = gof_info_.find(unwrapped_tl0);
141 if (gof_info_it == gof_info_.end())
142 return kStash;
143
144 info = &gof_info_it->second;
145
146 frame->num_references = 0;
147 FrameReceivedVp9(frame->Id(), info);
148 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
149 return kHandOff;
150 } else {
151 auto gof_info_it = gof_info_.find(
152 (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
153
154 // Gof info for this frame is not available yet, stash this frame.
155 if (gof_info_it == gof_info_.end())
156 return kStash;
157
158 if (codec_header.temporal_idx == 0) {
159 gof_info_it = gof_info_
160 .emplace(unwrapped_tl0,
161 GofInfo(gof_info_it->second.gof, frame->Id()))
162 .first;
163 }
164
165 info = &gof_info_it->second;
166 }
167
168 // Clean up info for base layers that are too old.
169 int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
170 auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
171 gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
172
173 FrameReceivedVp9(frame->Id(), info);
174
175 // Make sure we don't miss any frame that could potentially have the
176 // up switch flag set.
177 if (MissingRequiredFrameVp9(frame->Id(), *info))
178 return kStash;
179
180 if (codec_header.temporal_up_switch)
181 up_switch_.emplace(frame->Id(), codec_header.temporal_idx);
182
183 // Clean out old info about up switch frames.
184 uint16_t old_picture_id = Subtract<kFrameIdLength>(frame->Id(), 50);
185 auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
186 up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
187
188 size_t diff =
189 ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, frame->Id());
190 size_t gof_idx = diff % info->gof->num_frames_in_gof;
191
192 if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
193 return kDrop;
194 }
195 // Populate references according to the scalability structure.
196 frame->num_references = info->gof->num_ref_pics[gof_idx];
197 for (size_t i = 0; i < frame->num_references; ++i) {
198 frame->references[i] =
199 Subtract<kFrameIdLength>(frame->Id(), info->gof->pid_diff[gof_idx][i]);
200
201 // If this is a reference to a frame earlier than the last up switch point,
202 // then ignore this reference.
203 if (UpSwitchInIntervalVp9(frame->Id(), codec_header.temporal_idx,
204 frame->references[i])) {
205 --frame->num_references;
206 }
207 }
208
209 // Override GOF references.
210 if (!codec_header.inter_pic_predicted) {
211 frame->num_references = 0;
212 }
213
214 FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
215 return kHandOff;
216 }
217
MissingRequiredFrameVp9(uint16_t picture_id,const GofInfo & info)218 bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id,
219 const GofInfo& info) {
220 size_t diff =
221 ForwardDiff<uint16_t, kFrameIdLength>(info.gof->pid_start, picture_id);
222 size_t gof_idx = diff % info.gof->num_frames_in_gof;
223 size_t temporal_idx = info.gof->temporal_idx[gof_idx];
224
225 if (temporal_idx >= kMaxTemporalLayers) {
226 RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
227 << " temporal "
228 "layers are supported.";
229 return true;
230 }
231
232 // For every reference this frame has, check if there is a frame missing in
233 // the interval (`ref_pid`, `picture_id`) in any of the lower temporal
234 // layers. If so, we are missing a required frame.
235 uint8_t num_references = info.gof->num_ref_pics[gof_idx];
236 for (size_t i = 0; i < num_references; ++i) {
237 uint16_t ref_pid =
238 Subtract<kFrameIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
239 for (size_t l = 0; l < temporal_idx; ++l) {
240 auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
241 if (missing_frame_it != missing_frames_for_layer_[l].end() &&
242 AheadOf<uint16_t, kFrameIdLength>(picture_id, *missing_frame_it)) {
243 return true;
244 }
245 }
246 }
247 return false;
248 }
249
FrameReceivedVp9(uint16_t picture_id,GofInfo * info)250 void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) {
251 int last_picture_id = info->last_picture_id;
252 size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
253
254 // If there is a gap, find which temporal layer the missing frames
255 // belong to and add the frame as missing for that temporal layer.
256 // Otherwise, remove this frame from the set of missing frames.
257 if (AheadOf<uint16_t, kFrameIdLength>(picture_id, last_picture_id)) {
258 size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
259 last_picture_id);
260 size_t gof_idx = diff % gof_size;
261
262 last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
263 while (last_picture_id != picture_id) {
264 gof_idx = (gof_idx + 1) % gof_size;
265 RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
266
267 size_t temporal_idx = info->gof->temporal_idx[gof_idx];
268 if (temporal_idx >= kMaxTemporalLayers) {
269 RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
270 << " temporal "
271 "layers are supported.";
272 return;
273 }
274
275 missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
276 last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
277 }
278
279 info->last_picture_id = last_picture_id;
280 } else {
281 size_t diff =
282 ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, picture_id);
283 size_t gof_idx = diff % gof_size;
284 RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
285
286 size_t temporal_idx = info->gof->temporal_idx[gof_idx];
287 if (temporal_idx >= kMaxTemporalLayers) {
288 RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
289 << " temporal "
290 "layers are supported.";
291 return;
292 }
293
294 missing_frames_for_layer_[temporal_idx].erase(picture_id);
295 }
296 }
297
UpSwitchInIntervalVp9(uint16_t picture_id,uint8_t temporal_idx,uint16_t pid_ref)298 bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
299 uint8_t temporal_idx,
300 uint16_t pid_ref) {
301 for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
302 up_switch_it != up_switch_.end() &&
303 AheadOf<uint16_t, kFrameIdLength>(picture_id, up_switch_it->first);
304 ++up_switch_it) {
305 if (up_switch_it->second < temporal_idx)
306 return true;
307 }
308
309 return false;
310 }
311
RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector & res)312 void RtpVp9RefFinder::RetryStashedFrames(
313 RtpFrameReferenceFinder::ReturnVector& res) {
314 bool complete_frame = false;
315 do {
316 complete_frame = false;
317 for (auto it = stashed_frames_.begin(); it != stashed_frames_.end();) {
318 const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
319 it->frame->GetRtpVideoHeader().video_type_header);
320 RTC_DCHECK(!codec_header.flexible_mode);
321 FrameDecision decision =
322 ManageFrameGof(it->frame.get(), codec_header, it->unwrapped_tl0);
323
324 switch (decision) {
325 case kStash:
326 ++it;
327 break;
328 case kHandOff:
329 complete_frame = true;
330 res.push_back(std::move(it->frame));
331 [[fallthrough]];
332 case kDrop:
333 it = stashed_frames_.erase(it);
334 }
335 }
336 } while (complete_frame);
337 }
338
FlattenFrameIdAndRefs(RtpFrameObject * frame,bool inter_layer_predicted)339 void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame,
340 bool inter_layer_predicted) {
341 for (size_t i = 0; i < frame->num_references; ++i) {
342 frame->references[i] =
343 unwrapper_.Unwrap(frame->references[i]) * kMaxSpatialLayers +
344 *frame->SpatialIndex();
345 }
346 frame->SetId(unwrapper_.Unwrap(frame->Id()) * kMaxSpatialLayers +
347 *frame->SpatialIndex());
348
349 if (inter_layer_predicted &&
350 frame->num_references + 1 <= EncodedFrame::kMaxFrameReferences) {
351 frame->references[frame->num_references] = frame->Id() - 1;
352 ++frame->num_references;
353 }
354 }
355
ClearTo(uint16_t seq_num)356 void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
357 auto it = stashed_frames_.begin();
358 while (it != stashed_frames_.end()) {
359 if (AheadOf<uint16_t>(seq_num, it->frame->first_seq_num())) {
360 it = stashed_frames_.erase(it);
361 } else {
362 ++it;
363 }
364 }
365 }
366
367 } // namespace webrtc
368