1 /*
2 * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h"
12
13 #include <stddef.h>
14 #include <stdint.h>
15
16 #include <utility>
17
18 #include "modules/rtp_rtcp/source/rtp_video_header.h"
19 #include "rtc_base/byte_buffer.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/logging.h"
22 #include "rtc_base/numerics/safe_conversions.h"
23
24 namespace webrtc {
25 namespace {
26 // AV1 format:
27 //
28 // RTP payload syntax:
29 // 0 1 2 3 4 5 6 7
30 // +-+-+-+-+-+-+-+-+
31 // |Z|Y| W |N|-|-|-| (REQUIRED)
32 // +=+=+=+=+=+=+=+=+ (REPEATED W-1 times, or any times if W = 0)
33 // |1| |
34 // +-+ OBU fragment|
35 // |1| | (REQUIRED, leb128 encoded)
36 // +-+ size |
37 // |0| |
38 // +-+-+-+-+-+-+-+-+
39 // | OBU fragment |
40 // | ... |
41 // +=+=+=+=+=+=+=+=+
42 // | ... |
43 // +=+=+=+=+=+=+=+=+ if W > 0, last fragment MUST NOT have size field
44 // | OBU fragment |
45 // | ... |
46 // +=+=+=+=+=+=+=+=+
47 //
48 //
49 // OBU syntax:
50 // 0 1 2 3 4 5 6 7
51 // +-+-+-+-+-+-+-+-+
52 // |0| type |X|S|-| (REQUIRED)
53 // +-+-+-+-+-+-+-+-+
54 // X: | TID |SID|-|-|-| (OPTIONAL)
55 // +-+-+-+-+-+-+-+-+
56 // |1| |
57 // +-+ OBU payload |
58 // S: |1| | (OPTIONAL, variable length leb128 encoded)
59 // +-+ size |
60 // |0| |
61 // +-+-+-+-+-+-+-+-+
62 // | OBU payload |
63 // | ... |
64 class ArrayOfArrayViews {
65 public:
66 class const_iterator;
67 ArrayOfArrayViews() = default;
68 ArrayOfArrayViews(const ArrayOfArrayViews&) = default;
69 ArrayOfArrayViews& operator=(const ArrayOfArrayViews&) = default;
70 ~ArrayOfArrayViews() = default;
71
72 const_iterator begin() const;
73 const_iterator end() const;
empty() const74 bool empty() const { return data_.empty(); }
size() const75 size_t size() const { return size_; }
76 void CopyTo(uint8_t* destination, const_iterator first) const;
77
Append(const uint8_t * data,size_t size)78 void Append(const uint8_t* data, size_t size) {
79 data_.emplace_back(data, size);
80 size_ += size;
81 }
82
83 private:
84 using Storage = absl::InlinedVector<rtc::ArrayView<const uint8_t>, 2>;
85
86 size_t size_ = 0;
87 Storage data_;
88 };
89
90 class ArrayOfArrayViews::const_iterator {
91 public:
92 const_iterator() = default;
93 const_iterator(const const_iterator&) = default;
94 const_iterator& operator=(const const_iterator&) = default;
95
operator ++()96 const_iterator& operator++() {
97 if (++inner_ == outer_->size()) {
98 ++outer_;
99 inner_ = 0;
100 }
101 return *this;
102 }
operator *() const103 uint8_t operator*() const { return (*outer_)[inner_]; }
104
operator ==(const const_iterator & lhs,const const_iterator & rhs)105 friend bool operator==(const const_iterator& lhs, const const_iterator& rhs) {
106 return lhs.outer_ == rhs.outer_ && lhs.inner_ == rhs.inner_;
107 }
108
109 private:
110 friend ArrayOfArrayViews;
const_iterator(ArrayOfArrayViews::Storage::const_iterator outer,size_t inner)111 const_iterator(ArrayOfArrayViews::Storage::const_iterator outer, size_t inner)
112 : outer_(outer), inner_(inner) {}
113
114 Storage::const_iterator outer_;
115 size_t inner_;
116 };
117
begin() const118 ArrayOfArrayViews::const_iterator ArrayOfArrayViews::begin() const {
119 return const_iterator(data_.begin(), 0);
120 }
121
end() const122 ArrayOfArrayViews::const_iterator ArrayOfArrayViews::end() const {
123 return const_iterator(data_.end(), 0);
124 }
125
CopyTo(uint8_t * destination,const_iterator first) const126 void ArrayOfArrayViews::CopyTo(uint8_t* destination,
127 const_iterator first) const {
128 if (first == end()) {
129 // Empty OBU payload. E.g. Temporal Delimiters are always empty.
130 return;
131 }
132 size_t first_chunk_size = first.outer_->size() - first.inner_;
133 memcpy(destination, first.outer_->data() + first.inner_, first_chunk_size);
134 destination += first_chunk_size;
135 for (auto it = std::next(first.outer_); it != data_.end(); ++it) {
136 memcpy(destination, it->data(), it->size());
137 destination += it->size();
138 }
139 }
140
141 struct ObuInfo {
142 // Size of the obu_header and obu_size fields in the ouput frame.
143 size_t prefix_size = 0;
144 // obu_header() and obu_size (leb128 encoded payload_size).
145 // obu_header can be up to 2 bytes, obu_size - up to 5.
146 std::array<uint8_t, 7> prefix;
147 // Size of the obu payload in the output frame, i.e. excluding header
148 size_t payload_size = 0;
149 // iterator pointing to the beginning of the obu payload.
150 ArrayOfArrayViews::const_iterator payload_offset;
151 // OBU payloads as written in the rtp packet payloads.
152 ArrayOfArrayViews data;
153 };
154 // Expect that majority of the frame won't use more than 4 obus.
155 // In a simple stream delta frame consist of single Frame OBU, while key frame
156 // also has Sequence Header OBU.
157 using VectorObuInfo = absl::InlinedVector<ObuInfo, 4>;
158
159 constexpr uint8_t kObuSizePresentBit = 0b0'0000'010;
160
ObuHasExtension(uint8_t obu_header)161 bool ObuHasExtension(uint8_t obu_header) {
162 return obu_header & 0b0'0000'100u;
163 }
164
ObuHasSize(uint8_t obu_header)165 bool ObuHasSize(uint8_t obu_header) {
166 return obu_header & kObuSizePresentBit;
167 }
168
RtpStartsWithFragment(uint8_t aggregation_header)169 bool RtpStartsWithFragment(uint8_t aggregation_header) {
170 return aggregation_header & 0b1000'0000u;
171 }
RtpEndsWithFragment(uint8_t aggregation_header)172 bool RtpEndsWithFragment(uint8_t aggregation_header) {
173 return aggregation_header & 0b0100'0000u;
174 }
RtpNumObus(uint8_t aggregation_header)175 int RtpNumObus(uint8_t aggregation_header) { // 0 for any number of obus.
176 return (aggregation_header & 0b0011'0000u) >> 4;
177 }
RtpStartsNewCodedVideoSequence(uint8_t aggregation_header)178 int RtpStartsNewCodedVideoSequence(uint8_t aggregation_header) {
179 return aggregation_header & 0b0000'1000u;
180 }
181
182 // Reorgonizes array of rtp payloads into array of obus:
183 // fills ObuInfo::data field.
184 // Returns empty vector on error.
ParseObus(rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads)185 VectorObuInfo ParseObus(
186 rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads) {
187 VectorObuInfo obu_infos;
188 bool expect_continues_obu = false;
189 for (rtc::ArrayView<const uint8_t> rtp_payload : rtp_payloads) {
190 rtc::ByteBufferReader payload(
191 reinterpret_cast<const char*>(rtp_payload.data()), rtp_payload.size());
192 uint8_t aggregation_header;
193 if (!payload.ReadUInt8(&aggregation_header)) {
194 RTC_DLOG(WARNING) << "Failed to find aggregation header in the packet.";
195 return {};
196 }
197 // Z-bit: 1 if the first OBU contained in the packet is a continuation of a
198 // previous OBU.
199 bool continues_obu = RtpStartsWithFragment(aggregation_header);
200 if (continues_obu != expect_continues_obu) {
201 RTC_DLOG(WARNING) << "Unexpected Z-bit " << continues_obu;
202 return {};
203 }
204 int num_expected_obus = RtpNumObus(aggregation_header);
205 if (payload.Length() == 0) {
206 // rtp packet has just the aggregation header. That may be valid only when
207 // there is exactly one fragment in the packet of size 0.
208 if (num_expected_obus != 1) {
209 RTC_DLOG(WARNING) << "Invalid packet with just an aggregation header.";
210 return {};
211 }
212 if (!continues_obu) {
213 // Empty packet just to notify there is a new OBU.
214 obu_infos.emplace_back();
215 }
216 expect_continues_obu = RtpEndsWithFragment(aggregation_header);
217 continue;
218 }
219
220 for (int obu_index = 1; payload.Length() > 0; ++obu_index) {
221 ObuInfo& obu_info = (obu_index == 1 && continues_obu)
222 ? obu_infos.back()
223 : obu_infos.emplace_back();
224 uint64_t fragment_size;
225 // When num_expected_obus > 0, last OBU (fragment) is not preceeded by
226 // the size field. See W field in
227 // https://aomediacodec.github.io/av1-rtp-spec/#43-av1-aggregation-header
228 bool has_fragment_size = (obu_index != num_expected_obus);
229 if (has_fragment_size) {
230 if (!payload.ReadUVarint(&fragment_size)) {
231 RTC_DLOG(WARNING) << "Failed to read fragment size for obu #"
232 << obu_index << "/" << num_expected_obus;
233 return {};
234 }
235 if (fragment_size > payload.Length()) {
236 // Malformed input: written size is larger than remaining buffer.
237 RTC_DLOG(WARNING) << "Malformed fragment size " << fragment_size
238 << " is larger than remaining size "
239 << payload.Length() << " while reading obu #"
240 << obu_index << "/" << num_expected_obus;
241 return {};
242 }
243 } else {
244 fragment_size = payload.Length();
245 }
246 // While it is in-practical to pass empty fragments, it is still possible.
247 if (fragment_size > 0) {
248 obu_info.data.Append(reinterpret_cast<const uint8_t*>(payload.Data()),
249 fragment_size);
250 payload.Consume(fragment_size);
251 }
252 }
253 // Z flag should be same as Y flag of the next packet.
254 expect_continues_obu = RtpEndsWithFragment(aggregation_header);
255 }
256 if (expect_continues_obu) {
257 RTC_DLOG(WARNING) << "Last packet shouldn't have last obu fragmented.";
258 return {};
259 }
260 return obu_infos;
261 }
262
263 // Returns number of bytes consumed.
WriteLeb128(uint32_t value,uint8_t * buffer)264 int WriteLeb128(uint32_t value, uint8_t* buffer) {
265 int size = 0;
266 while (value >= 0x80) {
267 buffer[size] = 0x80 | (value & 0x7F);
268 ++size;
269 value >>= 7;
270 }
271 buffer[size] = value;
272 ++size;
273 return size;
274 }
275
276 // Calculates sizes for the Obu, i.e. base on ObuInfo::data field calculates
277 // all other fields in the ObuInfo structure.
278 // Returns false if obu found to be misformed.
CalculateObuSizes(ObuInfo * obu_info)279 bool CalculateObuSizes(ObuInfo* obu_info) {
280 if (obu_info->data.empty()) {
281 RTC_DLOG(WARNING) << "Invalid bitstream: empty obu provided.";
282 return false;
283 }
284 auto it = obu_info->data.begin();
285 uint8_t obu_header = *it;
286 obu_info->prefix[0] = obu_header | kObuSizePresentBit;
287 obu_info->prefix_size = 1;
288 ++it;
289 if (ObuHasExtension(obu_header)) {
290 if (it == obu_info->data.end()) {
291 return false;
292 }
293 obu_info->prefix[1] = *it; // obu_extension_header
294 obu_info->prefix_size = 2;
295 ++it;
296 }
297 // Read, validate, and skip size, if present.
298 if (!ObuHasSize(obu_header)) {
299 obu_info->payload_size = obu_info->data.size() - obu_info->prefix_size;
300 } else {
301 // Read leb128 encoded field obu_size.
302 uint64_t obu_size_bytes = 0;
303 // Number of bytes obu_size field occupy in the bitstream.
304 int size_of_obu_size_bytes = 0;
305 uint8_t leb128_byte;
306 do {
307 if (it == obu_info->data.end() || size_of_obu_size_bytes >= 8) {
308 RTC_DLOG(WARNING)
309 << "Failed to read obu_size. obu_size field is too long: "
310 << size_of_obu_size_bytes << " bytes processed.";
311 return false;
312 }
313 leb128_byte = *it;
314 obu_size_bytes |= uint64_t{leb128_byte & 0x7Fu}
315 << (size_of_obu_size_bytes * 7);
316 ++size_of_obu_size_bytes;
317 ++it;
318 } while ((leb128_byte & 0x80) != 0);
319
320 obu_info->payload_size =
321 obu_info->data.size() - obu_info->prefix_size - size_of_obu_size_bytes;
322 if (obu_size_bytes != obu_info->payload_size) {
323 // obu_size was present in the bitstream and mismatches calculated size.
324 RTC_DLOG(WARNING) << "Mismatch in obu_size. signaled: " << obu_size_bytes
325 << ", actual: " << obu_info->payload_size;
326 return false;
327 }
328 }
329 obu_info->payload_offset = it;
330 obu_info->prefix_size +=
331 WriteLeb128(rtc::dchecked_cast<uint32_t>(obu_info->payload_size),
332 obu_info->prefix.data() + obu_info->prefix_size);
333 return true;
334 }
335
336 } // namespace
337
AssembleFrame(rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads)338 rtc::scoped_refptr<EncodedImageBuffer> VideoRtpDepacketizerAv1::AssembleFrame(
339 rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads) {
340 VectorObuInfo obu_infos = ParseObus(rtp_payloads);
341 if (obu_infos.empty()) {
342 return nullptr;
343 }
344
345 size_t frame_size = 0;
346 for (ObuInfo& obu_info : obu_infos) {
347 if (!CalculateObuSizes(&obu_info)) {
348 return nullptr;
349 }
350 frame_size += (obu_info.prefix_size + obu_info.payload_size);
351 }
352
353 rtc::scoped_refptr<EncodedImageBuffer> bitstream =
354 EncodedImageBuffer::Create(frame_size);
355 uint8_t* write_at = bitstream->data();
356 for (const ObuInfo& obu_info : obu_infos) {
357 // Copy the obu_header and obu_size fields.
358 memcpy(write_at, obu_info.prefix.data(), obu_info.prefix_size);
359 write_at += obu_info.prefix_size;
360 // Copy the obu payload.
361 obu_info.data.CopyTo(write_at, obu_info.payload_offset);
362 write_at += obu_info.payload_size;
363 }
364 RTC_CHECK_EQ(write_at - bitstream->data(), bitstream->size());
365 return bitstream;
366 }
367
368 absl::optional<VideoRtpDepacketizer::ParsedRtpPayload>
Parse(rtc::CopyOnWriteBuffer rtp_payload)369 VideoRtpDepacketizerAv1::Parse(rtc::CopyOnWriteBuffer rtp_payload) {
370 if (rtp_payload.size() == 0) {
371 RTC_DLOG(LS_ERROR) << "Empty rtp payload.";
372 return absl::nullopt;
373 }
374 uint8_t aggregation_header = rtp_payload.cdata()[0];
375 if (RtpStartsNewCodedVideoSequence(aggregation_header) &&
376 RtpStartsWithFragment(aggregation_header)) {
377 // new coded video sequence can't start from an OBU fragment.
378 return absl::nullopt;
379 }
380 absl::optional<ParsedRtpPayload> parsed(absl::in_place);
381
382 // To assemble frame, all of the rtp payload is required, including
383 // aggregation header.
384 parsed->video_payload = std::move(rtp_payload);
385
386 parsed->video_header.codec = VideoCodecType::kVideoCodecAV1;
387 // These are not accurate since frame may consist of several packet aligned
388 // chunks of obus, but should be good enough for most cases. It might produce
389 // frame that do not map to any real frame, but av1 decoder should be able to
390 // handle it since it promise to handle individual obus rather than full
391 // frames.
392 parsed->video_header.is_first_packet_in_frame =
393 !RtpStartsWithFragment(aggregation_header);
394 parsed->video_header.is_last_packet_in_frame =
395 !RtpEndsWithFragment(aggregation_header);
396
397 parsed->video_header.frame_type =
398 RtpStartsNewCodedVideoSequence(aggregation_header)
399 ? VideoFrameType::kVideoFrameKey
400 : VideoFrameType::kVideoFrameDelta;
401 return parsed;
402 }
403
404 } // namespace webrtc
405