1 /*
2 * RTP parser for VP9 payload format (draft version 02) - experimental
3 * Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "libavutil/intreadwrite.h"
23
24 #include "avio_internal.h"
25 #include "rtpdec_formats.h"
26
27 #define RTP_VP9_DESC_REQUIRED_SIZE 1
28
29 struct PayloadContext {
30 AVIOContext *buf;
31 uint32_t timestamp;
32 };
33
vp9_init(AVFormatContext * ctx,int st_index,PayloadContext * data)34 static av_cold int vp9_init(AVFormatContext *ctx, int st_index,
35 PayloadContext *data)
36 {
37 av_log(ctx, AV_LOG_WARNING,
38 "RTP/VP9 support is still experimental\n");
39
40 return 0;
41 }
42
vp9_handle_packet(AVFormatContext * ctx,PayloadContext * rtp_vp9_ctx,AVStream * st,AVPacket * pkt,uint32_t * timestamp,const uint8_t * buf,int len,uint16_t seq,int flags)43 static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
44 AVStream *st, AVPacket *pkt, uint32_t *timestamp,
45 const uint8_t *buf, int len, uint16_t seq,
46 int flags)
47 {
48 int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data;
49 av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame;
50 av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1;
51 int ref_fields = 0, has_ref_field_ext_pic_id = 0;
52 int first_fragment, last_fragment;
53 int rtp_m;
54 int res = 0;
55
56 /* drop data of previous packets in case of non-continuous (lossy) packet stream */
57 if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp)
58 ffio_free_dyn_buf(&rtp_vp9_ctx->buf);
59
60 /* sanity check for size of input packet: 1 byte payload at least */
61 if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) {
62 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len);
63 return AVERROR_INVALIDDATA;
64 }
65
66 /*
67 * decode the required VP9 payload descriptor according to section 4.2 of the spec.:
68 *
69 * 0 1 2 3 4 5 6 7
70 * +-+-+-+-+-+-+-+-+
71 * |I|P|L|F|B|E|V|-| (REQUIRED)
72 * +-+-+-+-+-+-+-+-+
73 *
74 * I: PictureID present
75 * P: Inter-picture predicted layer frame
76 * L: Layer indices present
77 * F: Flexible mode
78 * B: Start of VP9 frame
79 * E: End of picture
80 * V: Scalability Structure (SS) present
81 */
82 has_pic_id = !!(buf[0] & 0x80);
83 inter_picture_layer_frame = !!(buf[0] & 0x40);
84 has_layer_idc = !!(buf[0] & 0x20);
85 has_ref_idc = !!(buf[0] & 0x10);
86 first_fragment = !!(buf[0] & 0x08);
87 last_fragment = !!(buf[0] & 0x04);
88 has_ss_data = !!(buf[0] & 0x02);
89
90 rtp_m = !!(flags & RTP_FLAG_MARKER);
91
92 /* sanity check for markers: B should always be equal to the RTP M marker */
93 if (last_fragment != rtp_m) {
94 av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m);
95 return AVERROR_INVALIDDATA;
96 }
97
98 /* pass the extensions field */
99 buf += RTP_VP9_DESC_REQUIRED_SIZE;
100 len -= RTP_VP9_DESC_REQUIRED_SIZE;
101
102 /*
103 * decode the 1-byte/2-byte picture ID:
104 *
105 * 0 1 2 3 4 5 6 7
106 * +-+-+-+-+-+-+-+-+
107 * I: |M|PICTURE ID | (RECOMMENDED)
108 * +-+-+-+-+-+-+-+-+
109 * M: | EXTENDED PID | (RECOMMENDED)
110 * +-+-+-+-+-+-+-+-+
111 *
112 * M: The most significant bit of the first octet is an extension flag.
113 * PictureID: 8 or 16 bits including the M bit.
114 */
115 if (has_pic_id) {
116 /* check for 1-byte or 2-byte picture index */
117 if (buf[0] & 0x80) {
118 if (len < 2) {
119 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
120 return AVERROR_INVALIDDATA;
121 }
122 pic_id = AV_RB16(buf) & 0x7fff;
123 buf += 2;
124 len -= 2;
125 } else {
126 pic_id = buf[0] & 0x7f;
127 buf++;
128 len--;
129 }
130 }
131
132 /*
133 * decode layer indices
134 *
135 * 0 1 2 3 4 5 6 7
136 * +-+-+-+-+-+-+-+-+
137 * L: | T | S | Q | R | (CONDITIONALLY RECOMMENDED)
138 * +-+-+-+-+-+-+-+-+
139 *
140 * T, S and Q are 2-bit indices for temporal, spatial, and quality layers.
141 * If "F" is set in the initial octet, R is 2 bits representing the number
142 * of reference fields this frame refers to.
143 */
144 if (has_layer_idc) {
145 if (len < 1) {
146 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
147 return AVERROR_INVALIDDATA;
148 }
149 layer_temporal = buf[0] & 0xC0;
150 layer_spatial = buf[0] & 0x30;
151 layer_quality = buf[0] & 0x0C;
152 if (has_ref_idc) {
153 ref_fields = buf[0] & 0x03;
154 if (ref_fields)
155 non_key_frame = 1;
156 }
157 buf++;
158 len--;
159 }
160
161 /*
162 * decode the reference fields
163 *
164 * 0 1 2 3 4 5 6 7
165 * +-+-+-+-+-+-+-+-+ -\
166 * F: | PID |X| RS| RQ| (OPTIONAL) .
167 * +-+-+-+-+-+-+-+-+ . - R times
168 * X: | EXTENDED PID | (OPTIONAL) .
169 * +-+-+-+-+-+-+-+-+ -/
170 *
171 * PID: The relative Picture ID referred to by this frame.
172 * RS and RQ: The spatial and quality layer IDs.
173 * X: 1 if this layer index has an extended relative Picture ID.
174 */
175 if (has_ref_idc) {
176 while (ref_fields) {
177 if (len < 1) {
178 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
179 return AVERROR_INVALIDDATA;
180 }
181
182 has_ref_field_ext_pic_id = buf[0] & 0x10;
183
184 /* pass ref. field */
185 if (has_ref_field_ext_pic_id) {
186 if (len < 2) {
187 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
188 return AVERROR_INVALIDDATA;
189 }
190
191 /* ignore ref. data */
192
193 buf += 2;
194 len -= 2;
195 } else {
196
197 /* ignore ref. data */
198
199 buf++;
200 len--;
201 }
202 ref_fields--;
203 }
204 }
205
206 /*
207 * decode the scalability structure (SS)
208 *
209 * 0 1 2 3 4 5 6 7
210 * +-+-+-+-+-+-+-+-+
211 * V: | PATTERN LENGTH|
212 * +-+-+-+-+-+-+-+-+ -\
213 * | T | S | Q | R | (OPTIONAL) .
214 * +-+-+-+-+-+-+-+-+ -\ .
215 * | PID |X| RS| RQ| (OPTIONAL) . . - PAT. LEN. times
216 * +-+-+-+-+-+-+-+-+ . - R times .
217 * X: | EXTENDED PID | (OPTIONAL) . .
218 * +-+-+-+-+-+-+-+-+ -/ -/
219 *
220 * PID: The relative Picture ID referred to by this frame.
221 * RS and RQ: The spatial and quality layer IDs.
222 * X: 1 if this layer index has an extended relative Picture ID.
223 */
224 if (has_ss_data) {
225 int n_s, y, g, i;
226 if (len < 1) {
227 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
228 return AVERROR_INVALIDDATA;
229 }
230 n_s = buf[0] >> 5;
231 y = !!(buf[0] & 0x10);
232 g = !!(buf[0] & 0x08);
233 buf++;
234 len--;
235 if (n_s > 0) {
236 avpriv_report_missing_feature(ctx, "VP9 scalability structure with multiple layers");
237 return AVERROR_PATCHWELCOME;
238 }
239 if (y) {
240 if (len < 4 * (n_s + 1)) {
241 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
242 return AVERROR_INVALIDDATA;
243 }
244 for (i = 0; i < n_s + 1; i++) {
245 av_unused int w, h;
246 w = AV_RB16(buf);
247 h = AV_RB16(buf + 2);
248 buf += 4;
249 len -= 4;
250 }
251 }
252 if (g) {
253 int n_g;
254 if (len < 1) {
255 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
256 return AVERROR_INVALIDDATA;
257 }
258 n_g = buf[0];
259 buf++;
260 len--;
261 for (i = 0; i < n_g; i++) {
262 av_unused int t, u, r, j;
263 if (len < 1) {
264 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
265 return AVERROR_INVALIDDATA;
266 }
267 t = buf[0] >> 5;
268 u = !!(buf[0] & 0x10);
269 r = (buf[0] >> 2) & 0x03;
270 buf++;
271 len--;
272 if (len < r) {
273 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
274 return AVERROR_INVALIDDATA;
275 }
276 for (j = 0; j < r; j++) {
277 av_unused int p_diff = buf[0];
278 buf++;
279 len--;
280 }
281 }
282 }
283 }
284
285 /*
286 * decode the VP9 payload header
287 *
288 * spec. is tbd
289 */
290 //XXX: implement when specified
291
292 /* sanity check: 1 byte payload as minimum */
293 if (len < 1) {
294 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
295 return AVERROR_INVALIDDATA;
296 }
297
298 /* start frame buffering with new dynamic buffer */
299 if (!rtp_vp9_ctx->buf) {
300 /* sanity check: a new frame should have started */
301 if (first_fragment) {
302 res = avio_open_dyn_buf(&rtp_vp9_ctx->buf);
303 if (res < 0)
304 return res;
305 /* update the timestamp in the frame packet with the one from the RTP packet */
306 rtp_vp9_ctx->timestamp = *timestamp;
307 } else {
308 /* frame not started yet, need more packets */
309 return AVERROR(EAGAIN);
310 }
311 }
312
313 /* write the fragment to the dyn. buffer */
314 avio_write(rtp_vp9_ctx->buf, buf, len);
315
316 /* do we need more fragments? */
317 if (!last_fragment)
318 return AVERROR(EAGAIN);
319
320 /* close frame buffering and create resulting A/V packet */
321 res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index);
322 if (res < 0)
323 return res;
324
325 return 0;
326 }
327
vp9_close_context(PayloadContext * vp9)328 static void vp9_close_context(PayloadContext *vp9)
329 {
330 ffio_free_dyn_buf(&vp9->buf);
331 }
332
333 const RTPDynamicProtocolHandler ff_vp9_dynamic_handler = {
334 .enc_name = "VP9",
335 .codec_type = AVMEDIA_TYPE_VIDEO,
336 .codec_id = AV_CODEC_ID_VP9,
337 .priv_data_size = sizeof(PayloadContext),
338 .init = vp9_init,
339 .close = vp9_close_context,
340 .parse_packet = vp9_handle_packet
341 };
342