• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/video_coding/rtp_vp9_ref_finder.h"
12 
13 #include <algorithm>
14 #include <utility>
15 
16 #include "rtc_base/logging.h"
17 
18 namespace webrtc {
ManageFrame(std::unique_ptr<RtpFrameObject> frame)19 RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
20     std::unique_ptr<RtpFrameObject> frame) {
21   const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
22       frame->GetRtpVideoHeader().video_type_header);
23 
24   if (codec_header.temporal_idx != kNoTemporalIdx)
25     frame->SetTemporalIndex(codec_header.temporal_idx);
26   frame->SetSpatialIndex(codec_header.spatial_idx);
27   frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
28 
29   FrameDecision decision;
30   if (codec_header.temporal_idx >= kMaxTemporalLayers ||
31       codec_header.spatial_idx >= kMaxSpatialLayers) {
32     decision = kDrop;
33   } else if (codec_header.flexible_mode) {
34     decision = ManageFrameFlexible(frame.get(), codec_header);
35   } else {
36     if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
37       RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
38                              "non-flexible mode.";
39       decision = kDrop;
40     } else {
41       int64_t unwrapped_tl0 =
42           tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
43       decision = ManageFrameGof(frame.get(), codec_header, unwrapped_tl0);
44 
45       if (decision == kStash) {
46         if (stashed_frames_.size() > kMaxStashedFrames) {
47           stashed_frames_.pop_back();
48         }
49 
50         stashed_frames_.push_front(
51             {.unwrapped_tl0 = unwrapped_tl0, .frame = std::move(frame)});
52       }
53     }
54   }
55 
56   RtpFrameReferenceFinder::ReturnVector res;
57   switch (decision) {
58     case kStash:
59       return res;
60     case kHandOff:
61       res.push_back(std::move(frame));
62       RetryStashedFrames(res);
63       return res;
64     case kDrop:
65       return res;
66   }
67 
68   return res;
69 }
70 
ManageFrameFlexible(RtpFrameObject * frame,const RTPVideoHeaderVP9 & codec_header)71 RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameFlexible(
72     RtpFrameObject* frame,
73     const RTPVideoHeaderVP9& codec_header) {
74   if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
75     return kDrop;
76   }
77 
78   frame->num_references = codec_header.num_ref_pics;
79   for (size_t i = 0; i < frame->num_references; ++i) {
80     frame->references[i] =
81         Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
82   }
83 
84   FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
85   return kHandOff;
86 }
87 
ManageFrameGof(RtpFrameObject * frame,const RTPVideoHeaderVP9 & codec_header,int64_t unwrapped_tl0)88 RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof(
89     RtpFrameObject* frame,
90     const RTPVideoHeaderVP9& codec_header,
91     int64_t unwrapped_tl0) {
92   GofInfo* info;
93   if (codec_header.ss_data_available) {
94     if (codec_header.temporal_idx != 0) {
95       RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
96                              "layer frame. Scalability structure ignored.";
97     } else {
98       if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) {
99         return kDrop;
100       }
101 
102       for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) {
103         if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) {
104           return kDrop;
105         }
106       }
107 
108       GofInfoVP9 gof = codec_header.gof;
109       if (gof.num_frames_in_gof == 0) {
110         RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume "
111                                "that stream has only one temporal layer.";
112         gof.SetGofInfoVP9(kTemporalStructureMode1);
113       }
114 
115       current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
116       scalability_structures_[current_ss_idx_] = gof;
117       scalability_structures_[current_ss_idx_].pid_start = frame->Id();
118       gof_info_.emplace(
119           unwrapped_tl0,
120           GofInfo(&scalability_structures_[current_ss_idx_], frame->Id()));
121     }
122 
123     const auto gof_info_it = gof_info_.find(unwrapped_tl0);
124     if (gof_info_it == gof_info_.end())
125       return kStash;
126 
127     info = &gof_info_it->second;
128 
129     if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
130       frame->num_references = 0;
131       FrameReceivedVp9(frame->Id(), info);
132       FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
133       return kHandOff;
134     }
135   } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) {
136     if (frame->SpatialIndex() == 0) {
137       RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure";
138       return kDrop;
139     }
140     const auto gof_info_it = gof_info_.find(unwrapped_tl0);
141     if (gof_info_it == gof_info_.end())
142       return kStash;
143 
144     info = &gof_info_it->second;
145 
146     frame->num_references = 0;
147     FrameReceivedVp9(frame->Id(), info);
148     FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
149     return kHandOff;
150   } else {
151     auto gof_info_it = gof_info_.find(
152         (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0);
153 
154     // Gof info for this frame is not available yet, stash this frame.
155     if (gof_info_it == gof_info_.end())
156       return kStash;
157 
158     if (codec_header.temporal_idx == 0) {
159       gof_info_it = gof_info_
160                         .emplace(unwrapped_tl0,
161                                  GofInfo(gof_info_it->second.gof, frame->Id()))
162                         .first;
163     }
164 
165     info = &gof_info_it->second;
166   }
167 
168   // Clean up info for base layers that are too old.
169   int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved;
170   auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
171   gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
172 
173   FrameReceivedVp9(frame->Id(), info);
174 
175   // Make sure we don't miss any frame that could potentially have the
176   // up switch flag set.
177   if (MissingRequiredFrameVp9(frame->Id(), *info))
178     return kStash;
179 
180   if (codec_header.temporal_up_switch)
181     up_switch_.emplace(frame->Id(), codec_header.temporal_idx);
182 
183   // Clean out old info about up switch frames.
184   uint16_t old_picture_id = Subtract<kFrameIdLength>(frame->Id(), 50);
185   auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
186   up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
187 
188   size_t diff =
189       ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, frame->Id());
190   size_t gof_idx = diff % info->gof->num_frames_in_gof;
191 
192   if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) {
193     return kDrop;
194   }
195   // Populate references according to the scalability structure.
196   frame->num_references = info->gof->num_ref_pics[gof_idx];
197   for (size_t i = 0; i < frame->num_references; ++i) {
198     frame->references[i] =
199         Subtract<kFrameIdLength>(frame->Id(), info->gof->pid_diff[gof_idx][i]);
200 
201     // If this is a reference to a frame earlier than the last up switch point,
202     // then ignore this reference.
203     if (UpSwitchInIntervalVp9(frame->Id(), codec_header.temporal_idx,
204                               frame->references[i])) {
205       --frame->num_references;
206     }
207   }
208 
209   // Override GOF references.
210   if (!codec_header.inter_pic_predicted) {
211     frame->num_references = 0;
212   }
213 
214   FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
215   return kHandOff;
216 }
217 
MissingRequiredFrameVp9(uint16_t picture_id,const GofInfo & info)218 bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id,
219                                               const GofInfo& info) {
220   size_t diff =
221       ForwardDiff<uint16_t, kFrameIdLength>(info.gof->pid_start, picture_id);
222   size_t gof_idx = diff % info.gof->num_frames_in_gof;
223   size_t temporal_idx = info.gof->temporal_idx[gof_idx];
224 
225   if (temporal_idx >= kMaxTemporalLayers) {
226     RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
227                         << " temporal "
228                            "layers are supported.";
229     return true;
230   }
231 
232   // For every reference this frame has, check if there is a frame missing in
233   // the interval (`ref_pid`, `picture_id`) in any of the lower temporal
234   // layers. If so, we are missing a required frame.
235   uint8_t num_references = info.gof->num_ref_pics[gof_idx];
236   for (size_t i = 0; i < num_references; ++i) {
237     uint16_t ref_pid =
238         Subtract<kFrameIdLength>(picture_id, info.gof->pid_diff[gof_idx][i]);
239     for (size_t l = 0; l < temporal_idx; ++l) {
240       auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
241       if (missing_frame_it != missing_frames_for_layer_[l].end() &&
242           AheadOf<uint16_t, kFrameIdLength>(picture_id, *missing_frame_it)) {
243         return true;
244       }
245     }
246   }
247   return false;
248 }
249 
FrameReceivedVp9(uint16_t picture_id,GofInfo * info)250 void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) {
251   int last_picture_id = info->last_picture_id;
252   size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof);
253 
254   // If there is a gap, find which temporal layer the missing frames
255   // belong to and add the frame as missing for that temporal layer.
256   // Otherwise, remove this frame from the set of missing frames.
257   if (AheadOf<uint16_t, kFrameIdLength>(picture_id, last_picture_id)) {
258     size_t diff = ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start,
259                                                         last_picture_id);
260     size_t gof_idx = diff % gof_size;
261 
262     last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
263     while (last_picture_id != picture_id) {
264       gof_idx = (gof_idx + 1) % gof_size;
265       RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
266 
267       size_t temporal_idx = info->gof->temporal_idx[gof_idx];
268       if (temporal_idx >= kMaxTemporalLayers) {
269         RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
270                             << " temporal "
271                                "layers are supported.";
272         return;
273       }
274 
275       missing_frames_for_layer_[temporal_idx].insert(last_picture_id);
276       last_picture_id = Add<kFrameIdLength>(last_picture_id, 1);
277     }
278 
279     info->last_picture_id = last_picture_id;
280   } else {
281     size_t diff =
282         ForwardDiff<uint16_t, kFrameIdLength>(info->gof->pid_start, picture_id);
283     size_t gof_idx = diff % gof_size;
284     RTC_CHECK(gof_idx < kMaxVp9FramesInGof);
285 
286     size_t temporal_idx = info->gof->temporal_idx[gof_idx];
287     if (temporal_idx >= kMaxTemporalLayers) {
288       RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers
289                           << " temporal "
290                              "layers are supported.";
291       return;
292     }
293 
294     missing_frames_for_layer_[temporal_idx].erase(picture_id);
295   }
296 }
297 
UpSwitchInIntervalVp9(uint16_t picture_id,uint8_t temporal_idx,uint16_t pid_ref)298 bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
299                                             uint8_t temporal_idx,
300                                             uint16_t pid_ref) {
301   for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
302        up_switch_it != up_switch_.end() &&
303        AheadOf<uint16_t, kFrameIdLength>(picture_id, up_switch_it->first);
304        ++up_switch_it) {
305     if (up_switch_it->second < temporal_idx)
306       return true;
307   }
308 
309   return false;
310 }
311 
RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector & res)312 void RtpVp9RefFinder::RetryStashedFrames(
313     RtpFrameReferenceFinder::ReturnVector& res) {
314   bool complete_frame = false;
315   do {
316     complete_frame = false;
317     for (auto it = stashed_frames_.begin(); it != stashed_frames_.end();) {
318       const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
319           it->frame->GetRtpVideoHeader().video_type_header);
320       RTC_DCHECK(!codec_header.flexible_mode);
321       FrameDecision decision =
322           ManageFrameGof(it->frame.get(), codec_header, it->unwrapped_tl0);
323 
324       switch (decision) {
325         case kStash:
326           ++it;
327           break;
328         case kHandOff:
329           complete_frame = true;
330           res.push_back(std::move(it->frame));
331           [[fallthrough]];
332         case kDrop:
333           it = stashed_frames_.erase(it);
334       }
335     }
336   } while (complete_frame);
337 }
338 
FlattenFrameIdAndRefs(RtpFrameObject * frame,bool inter_layer_predicted)339 void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame,
340                                             bool inter_layer_predicted) {
341   for (size_t i = 0; i < frame->num_references; ++i) {
342     frame->references[i] =
343         unwrapper_.Unwrap(frame->references[i]) * kMaxSpatialLayers +
344         *frame->SpatialIndex();
345   }
346   frame->SetId(unwrapper_.Unwrap(frame->Id()) * kMaxSpatialLayers +
347                *frame->SpatialIndex());
348 
349   if (inter_layer_predicted &&
350       frame->num_references + 1 <= EncodedFrame::kMaxFrameReferences) {
351     frame->references[frame->num_references] = frame->Id() - 1;
352     ++frame->num_references;
353   }
354 }
355 
ClearTo(uint16_t seq_num)356 void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
357   auto it = stashed_frames_.begin();
358   while (it != stashed_frames_.end()) {
359     if (AheadOf<uint16_t>(seq_num, it->frame->first_seq_num())) {
360       it = stashed_frames_.erase(it);
361     } else {
362       ++it;
363     }
364   }
365 }
366 
367 }  // namespace webrtc
368