• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "api/video/rtp_video_frame_assembler.h"
12 
13 #include <algorithm>
14 #include <cstdint>
15 #include <map>
16 #include <memory>
17 #include <utility>
18 #include <vector>
19 
20 #include "absl/container/inlined_vector.h"
21 #include "absl/types/optional.h"
22 #include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h"
23 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
24 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
25 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h"
26 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_generic.h"
27 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h"
28 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_raw.h"
29 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.h"
30 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h"
31 #include "modules/video_coding/frame_object.h"
32 #include "modules/video_coding/packet_buffer.h"
33 #include "modules/video_coding/rtp_frame_reference_finder.h"
34 #include "rtc_base/logging.h"
35 
36 namespace webrtc {
37 namespace {
CreateDepacketizer(RtpVideoFrameAssembler::PayloadFormat payload_format)38 std::unique_ptr<VideoRtpDepacketizer> CreateDepacketizer(
39     RtpVideoFrameAssembler::PayloadFormat payload_format) {
40   switch (payload_format) {
41     case RtpVideoFrameAssembler::kRaw:
42       return std::make_unique<VideoRtpDepacketizerRaw>();
43     case RtpVideoFrameAssembler::kH264:
44       return std::make_unique<VideoRtpDepacketizerH264>();
45     case RtpVideoFrameAssembler::kVp8:
46       return std::make_unique<VideoRtpDepacketizerVp8>();
47     case RtpVideoFrameAssembler::kVp9:
48       return std::make_unique<VideoRtpDepacketizerVp9>();
49     case RtpVideoFrameAssembler::kAv1:
50       return std::make_unique<VideoRtpDepacketizerAv1>();
51     case RtpVideoFrameAssembler::kGeneric:
52       return std::make_unique<VideoRtpDepacketizerGeneric>();
53   }
54   RTC_DCHECK_NOTREACHED();
55   return nullptr;
56 }
57 }  // namespace
58 
59 class RtpVideoFrameAssembler::Impl {
60  public:
61   explicit Impl(std::unique_ptr<VideoRtpDepacketizer> depacketizer);
62   ~Impl() = default;
63 
64   FrameVector InsertPacket(const RtpPacketReceived& packet);
65 
66  private:
67   using RtpFrameVector =
68       absl::InlinedVector<std::unique_ptr<RtpFrameObject>, 3>;
69 
70   RtpFrameVector AssembleFrames(
71       video_coding::PacketBuffer::InsertResult insert_result);
72   FrameVector FindReferences(RtpFrameVector frames);
73   FrameVector UpdateWithPadding(uint16_t seq_num);
74   bool ParseDependenciesDescriptorExtension(const RtpPacketReceived& rtp_packet,
75                                             RTPVideoHeader& video_header);
76   bool ParseGenericDescriptorExtension(const RtpPacketReceived& rtp_packet,
77                                        RTPVideoHeader& video_header);
78   void ClearOldData(uint16_t incoming_seq_num);
79 
80   std::unique_ptr<FrameDependencyStructure> video_structure_;
81   SeqNumUnwrapper<uint16_t> frame_id_unwrapper_;
82   absl::optional<int64_t> video_structure_frame_id_;
83   std::unique_ptr<VideoRtpDepacketizer> depacketizer_;
84   video_coding::PacketBuffer packet_buffer_;
85   RtpFrameReferenceFinder reference_finder_;
86 };
87 
Impl(std::unique_ptr<VideoRtpDepacketizer> depacketizer)88 RtpVideoFrameAssembler::Impl::Impl(
89     std::unique_ptr<VideoRtpDepacketizer> depacketizer)
90     : depacketizer_(std::move(depacketizer)),
91       packet_buffer_(/*start_buffer_size=*/2048, /*max_buffer_size=*/2048) {}
92 
InsertPacket(const RtpPacketReceived & rtp_packet)93 RtpVideoFrameAssembler::FrameVector RtpVideoFrameAssembler::Impl::InsertPacket(
94     const RtpPacketReceived& rtp_packet) {
95   if (rtp_packet.payload_size() == 0) {
96     ClearOldData(rtp_packet.SequenceNumber());
97     return UpdateWithPadding(rtp_packet.SequenceNumber());
98   }
99 
100   absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed_payload =
101       depacketizer_->Parse(rtp_packet.PayloadBuffer());
102 
103   if (parsed_payload == absl::nullopt) {
104     return {};
105   }
106 
107   if (rtp_packet.HasExtension<RtpDependencyDescriptorExtension>()) {
108     if (!ParseDependenciesDescriptorExtension(rtp_packet,
109                                               parsed_payload->video_header)) {
110       return {};
111     }
112   } else if (rtp_packet.HasExtension<RtpGenericFrameDescriptorExtension00>()) {
113     if (!ParseGenericDescriptorExtension(rtp_packet,
114                                          parsed_payload->video_header)) {
115       return {};
116     }
117   }
118 
119   parsed_payload->video_header.is_last_packet_in_frame |= rtp_packet.Marker();
120 
121   auto packet = std::make_unique<video_coding::PacketBuffer::Packet>(
122       rtp_packet, parsed_payload->video_header);
123   packet->video_payload = std::move(parsed_payload->video_payload);
124 
125   ClearOldData(rtp_packet.SequenceNumber());
126   return FindReferences(
127       AssembleFrames(packet_buffer_.InsertPacket(std::move(packet))));
128 }
129 
ClearOldData(uint16_t incoming_seq_num)130 void RtpVideoFrameAssembler::Impl::ClearOldData(uint16_t incoming_seq_num) {
131   constexpr uint16_t kOldSeqNumThreshold = 2000;
132   uint16_t old_seq_num = incoming_seq_num - kOldSeqNumThreshold;
133   packet_buffer_.ClearTo(old_seq_num);
134   reference_finder_.ClearTo(old_seq_num);
135 }
136 
137 RtpVideoFrameAssembler::Impl::RtpFrameVector
AssembleFrames(video_coding::PacketBuffer::InsertResult insert_result)138 RtpVideoFrameAssembler::Impl::AssembleFrames(
139     video_coding::PacketBuffer::InsertResult insert_result) {
140   video_coding::PacketBuffer::Packet* first_packet = nullptr;
141   std::vector<rtc::ArrayView<const uint8_t>> payloads;
142   RtpFrameVector result;
143 
144   for (auto& packet : insert_result.packets) {
145     if (packet->is_first_packet_in_frame()) {
146       first_packet = packet.get();
147       payloads.clear();
148     }
149     payloads.emplace_back(packet->video_payload);
150 
151     if (packet->is_last_packet_in_frame()) {
152       rtc::scoped_refptr<EncodedImageBuffer> bitstream =
153           depacketizer_->AssembleFrame(payloads);
154 
155       if (!bitstream) {
156         continue;
157       }
158 
159       const video_coding::PacketBuffer::Packet& last_packet = *packet;
160       result.push_back(std::make_unique<RtpFrameObject>(
161           first_packet->seq_num,                  //
162           last_packet.seq_num,                    //
163           last_packet.marker_bit,                 //
164           /*times_nacked=*/0,                     //
165           /*first_packet_received_time=*/0,       //
166           /*last_packet_received_time=*/0,        //
167           first_packet->timestamp,                //
168           /*ntp_time_ms=*/0,                      //
169           /*timing=*/VideoSendTiming(),           //
170           first_packet->payload_type,             //
171           first_packet->codec(),                  //
172           last_packet.video_header.rotation,      //
173           last_packet.video_header.content_type,  //
174           first_packet->video_header,             //
175           last_packet.video_header.color_space,   //
176           /*packet_infos=*/RtpPacketInfos(),      //
177           std::move(bitstream)));
178     }
179   }
180 
181   return result;
182 }
183 
184 RtpVideoFrameAssembler::FrameVector
FindReferences(RtpFrameVector frames)185 RtpVideoFrameAssembler::Impl::FindReferences(RtpFrameVector frames) {
186   FrameVector res;
187   for (auto& frame : frames) {
188     auto complete_frames = reference_finder_.ManageFrame(std::move(frame));
189     for (std::unique_ptr<RtpFrameObject>& complete_frame : complete_frames) {
190       uint16_t rtp_seq_num_start = complete_frame->first_seq_num();
191       uint16_t rtp_seq_num_end = complete_frame->last_seq_num();
192       res.emplace_back(rtp_seq_num_start, rtp_seq_num_end,
193                        std::move(complete_frame));
194     }
195   }
196   return res;
197 }
198 
199 RtpVideoFrameAssembler::FrameVector
UpdateWithPadding(uint16_t seq_num)200 RtpVideoFrameAssembler::Impl::UpdateWithPadding(uint16_t seq_num) {
201   auto res =
202       FindReferences(AssembleFrames(packet_buffer_.InsertPadding(seq_num)));
203   auto ref_finder_update = reference_finder_.PaddingReceived(seq_num);
204 
205   for (std::unique_ptr<RtpFrameObject>& complete_frame : ref_finder_update) {
206     uint16_t rtp_seq_num_start = complete_frame->first_seq_num();
207     uint16_t rtp_seq_num_end = complete_frame->last_seq_num();
208     res.emplace_back(rtp_seq_num_start, rtp_seq_num_end,
209                      std::move(complete_frame));
210   }
211 
212   return res;
213 }
214 
ParseDependenciesDescriptorExtension(const RtpPacketReceived & rtp_packet,RTPVideoHeader & video_header)215 bool RtpVideoFrameAssembler::Impl::ParseDependenciesDescriptorExtension(
216     const RtpPacketReceived& rtp_packet,
217     RTPVideoHeader& video_header) {
218   webrtc::DependencyDescriptor dependency_descriptor;
219 
220   if (!rtp_packet.GetExtension<RtpDependencyDescriptorExtension>(
221           video_structure_.get(), &dependency_descriptor)) {
222     // Descriptor is either malformed, or the template referenced is not in
223     // the `video_structure_` currently being held.
224     // TODO(bugs.webrtc.org/10342): Improve packet reordering behavior.
225     RTC_LOG(LS_WARNING) << "ssrc: " << rtp_packet.Ssrc()
226                         << " Failed to parse dependency descriptor.";
227     return false;
228   }
229 
230   if (dependency_descriptor.attached_structure != nullptr &&
231       !dependency_descriptor.first_packet_in_frame) {
232     RTC_LOG(LS_WARNING) << "ssrc: " << rtp_packet.Ssrc()
233                         << "Invalid dependency descriptor: structure "
234                            "attached to non first packet of a frame.";
235     return false;
236   }
237 
238   video_header.is_first_packet_in_frame =
239       dependency_descriptor.first_packet_in_frame;
240   video_header.is_last_packet_in_frame =
241       dependency_descriptor.last_packet_in_frame;
242 
243   int64_t frame_id =
244       frame_id_unwrapper_.Unwrap(dependency_descriptor.frame_number);
245   auto& generic_descriptor_info = video_header.generic.emplace();
246   generic_descriptor_info.frame_id = frame_id;
247   generic_descriptor_info.spatial_index =
248       dependency_descriptor.frame_dependencies.spatial_id;
249   generic_descriptor_info.temporal_index =
250       dependency_descriptor.frame_dependencies.temporal_id;
251 
252   for (int fdiff : dependency_descriptor.frame_dependencies.frame_diffs) {
253     generic_descriptor_info.dependencies.push_back(frame_id - fdiff);
254   }
255   for (int cdiff : dependency_descriptor.frame_dependencies.chain_diffs) {
256     generic_descriptor_info.chain_diffs.push_back(frame_id - cdiff);
257   }
258   generic_descriptor_info.decode_target_indications =
259       dependency_descriptor.frame_dependencies.decode_target_indications;
260   if (dependency_descriptor.resolution) {
261     video_header.width = dependency_descriptor.resolution->Width();
262     video_header.height = dependency_descriptor.resolution->Height();
263   }
264   if (dependency_descriptor.active_decode_targets_bitmask.has_value()) {
265     generic_descriptor_info.active_decode_targets =
266         *dependency_descriptor.active_decode_targets_bitmask;
267   }
268 
269   // FrameDependencyStructure is sent in the dependency descriptor of the first
270   // packet of a key frame and is required to parse all subsequent packets until
271   // the next key frame.
272   if (dependency_descriptor.attached_structure) {
273     RTC_DCHECK(dependency_descriptor.first_packet_in_frame);
274     if (video_structure_frame_id_ > frame_id) {
275       RTC_LOG(LS_WARNING)
276           << "Arrived key frame with id " << frame_id << " and structure id "
277           << dependency_descriptor.attached_structure->structure_id
278           << " is older than the latest received key frame with id "
279           << *video_structure_frame_id_ << " and structure id "
280           << video_structure_->structure_id;
281       return false;
282     }
283     video_structure_ = std::move(dependency_descriptor.attached_structure);
284     video_structure_frame_id_ = frame_id;
285     video_header.frame_type = VideoFrameType::kVideoFrameKey;
286   } else {
287     video_header.frame_type = VideoFrameType::kVideoFrameDelta;
288   }
289   return true;
290 }
291 
ParseGenericDescriptorExtension(const RtpPacketReceived & rtp_packet,RTPVideoHeader & video_header)292 bool RtpVideoFrameAssembler::Impl::ParseGenericDescriptorExtension(
293     const RtpPacketReceived& rtp_packet,
294     RTPVideoHeader& video_header) {
295   RtpGenericFrameDescriptor generic_frame_descriptor;
296   if (!rtp_packet.GetExtension<RtpGenericFrameDescriptorExtension00>(
297           &generic_frame_descriptor)) {
298     return false;
299   }
300 
301   video_header.is_first_packet_in_frame =
302       generic_frame_descriptor.FirstPacketInSubFrame();
303   video_header.is_last_packet_in_frame =
304       generic_frame_descriptor.LastPacketInSubFrame();
305 
306   if (generic_frame_descriptor.FirstPacketInSubFrame()) {
307     video_header.frame_type =
308         generic_frame_descriptor.FrameDependenciesDiffs().empty()
309             ? VideoFrameType::kVideoFrameKey
310             : VideoFrameType::kVideoFrameDelta;
311 
312     auto& generic_descriptor_info = video_header.generic.emplace();
313     int64_t frame_id =
314         frame_id_unwrapper_.Unwrap(generic_frame_descriptor.FrameId());
315     generic_descriptor_info.frame_id = frame_id;
316     generic_descriptor_info.spatial_index =
317         generic_frame_descriptor.SpatialLayer();
318     generic_descriptor_info.temporal_index =
319         generic_frame_descriptor.TemporalLayer();
320     for (uint16_t fdiff : generic_frame_descriptor.FrameDependenciesDiffs()) {
321       generic_descriptor_info.dependencies.push_back(frame_id - fdiff);
322     }
323   }
324   video_header.width = generic_frame_descriptor.Width();
325   video_header.height = generic_frame_descriptor.Height();
326   return true;
327 }
328 
RtpVideoFrameAssembler(PayloadFormat payload_format)329 RtpVideoFrameAssembler::RtpVideoFrameAssembler(PayloadFormat payload_format)
330     : impl_(std::make_unique<Impl>(CreateDepacketizer(payload_format))) {}
331 
332 RtpVideoFrameAssembler::~RtpVideoFrameAssembler() = default;
333 
InsertPacket(const RtpPacketReceived & packet)334 RtpVideoFrameAssembler::FrameVector RtpVideoFrameAssembler::InsertPacket(
335     const RtpPacketReceived& packet) {
336   return impl_->InsertPacket(packet);
337 }
338 
339 }  // namespace webrtc
340