• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * RTP parser for VP9 payload format (draft version 02) - experimental
3  * Copyright (c) 2015 Thomas Volkert <thomas@homer-conferencing.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/intreadwrite.h"
23 
24 #include "avio_internal.h"
25 #include "rtpdec_formats.h"
26 
27 #define RTP_VP9_DESC_REQUIRED_SIZE 1
28 
29 struct PayloadContext {
30     AVIOContext *buf;
31     uint32_t     timestamp;
32 };
33 
vp9_init(AVFormatContext * ctx,int st_index,PayloadContext * data)34 static av_cold int vp9_init(AVFormatContext *ctx, int st_index,
35                             PayloadContext *data)
36 {
37     av_log(ctx, AV_LOG_WARNING,
38            "RTP/VP9 support is still experimental\n");
39 
40     return 0;
41 }
42 
vp9_handle_packet(AVFormatContext * ctx,PayloadContext * rtp_vp9_ctx,AVStream * st,AVPacket * pkt,uint32_t * timestamp,const uint8_t * buf,int len,uint16_t seq,int flags)43 static int vp9_handle_packet(AVFormatContext *ctx, PayloadContext *rtp_vp9_ctx,
44                              AVStream *st, AVPacket *pkt, uint32_t *timestamp,
45                              const uint8_t *buf, int len, uint16_t seq,
46                              int flags)
47 {
48     int has_pic_id, has_layer_idc, has_ref_idc, has_ss_data;
49     av_unused int pic_id = 0, non_key_frame = 0, inter_picture_layer_frame;
50     av_unused int layer_temporal = -1, layer_spatial = -1, layer_quality = -1;
51     int ref_fields = 0, has_ref_field_ext_pic_id = 0;
52     int first_fragment, last_fragment;
53     int rtp_m;
54     int res = 0;
55 
56     /* drop data of previous packets in case of non-continuous (lossy) packet stream */
57     if (rtp_vp9_ctx->buf && rtp_vp9_ctx->timestamp != *timestamp)
58         ffio_free_dyn_buf(&rtp_vp9_ctx->buf);
59 
60     /* sanity check for size of input packet: 1 byte payload at least */
61     if (len < RTP_VP9_DESC_REQUIRED_SIZE + 1) {
62         av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet, got %d bytes\n", len);
63         return AVERROR_INVALIDDATA;
64     }
65 
66     /*
67      *     decode the required VP9 payload descriptor according to section 4.2 of the spec.:
68      *
69      *      0 1 2 3 4 5 6 7
70      *     +-+-+-+-+-+-+-+-+
71      *     |I|P|L|F|B|E|V|-| (REQUIRED)
72      *     +-+-+-+-+-+-+-+-+
73      *
74      *     I: PictureID present
75      *     P: Inter-picture predicted layer frame
76      *     L: Layer indices present
77      *     F: Flexible mode
78      *     B: Start of VP9 frame
79      *     E: End of picture
80      *     V: Scalability Structure (SS) present
81      */
82     has_pic_id     = !!(buf[0] & 0x80);
83     inter_picture_layer_frame = !!(buf[0] & 0x40);
84     has_layer_idc  = !!(buf[0] & 0x20);
85     has_ref_idc    = !!(buf[0] & 0x10);
86     first_fragment = !!(buf[0] & 0x08);
87     last_fragment  = !!(buf[0] & 0x04);
88     has_ss_data    = !!(buf[0] & 0x02);
89 
90     rtp_m = !!(flags & RTP_FLAG_MARKER);
91 
92     /* sanity check for markers: B should always be equal to the RTP M marker */
93     if (last_fragment != rtp_m) {
94         av_log(ctx, AV_LOG_ERROR, "Invalid combination of B and M marker (%d != %d)\n", last_fragment, rtp_m);
95         return AVERROR_INVALIDDATA;
96     }
97 
98     /* pass the extensions field */
99     buf += RTP_VP9_DESC_REQUIRED_SIZE;
100     len -= RTP_VP9_DESC_REQUIRED_SIZE;
101 
102     /*
103      *         decode the 1-byte/2-byte picture ID:
104      *
105      *          0 1 2 3 4 5 6 7
106      *         +-+-+-+-+-+-+-+-+
107      *   I:    |M|PICTURE ID   | (RECOMMENDED)
108      *         +-+-+-+-+-+-+-+-+
109      *   M:    | EXTENDED PID  | (RECOMMENDED)
110      *         +-+-+-+-+-+-+-+-+
111      *
112      *   M: The most significant bit of the first octet is an extension flag.
113      *   PictureID:  8 or 16 bits including the M bit.
114      */
115     if (has_pic_id) {
116         /* check for 1-byte or 2-byte picture index */
117         if (buf[0] & 0x80) {
118             if (len < 2) {
119                 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
120                 return AVERROR_INVALIDDATA;
121             }
122             pic_id = AV_RB16(buf) & 0x7fff;
123             buf += 2;
124             len -= 2;
125         } else {
126             pic_id = buf[0] & 0x7f;
127             buf++;
128             len--;
129         }
130     }
131 
132     /*
133      *         decode layer indices
134      *
135      *          0 1 2 3 4 5 6 7
136      *         +-+-+-+-+-+-+-+-+
137      *   L:    | T | S | Q | R | (CONDITIONALLY RECOMMENDED)
138      *         +-+-+-+-+-+-+-+-+
139      *
140      *   T, S and Q are 2-bit indices for temporal, spatial, and quality layers.
141      *   If "F" is set in the initial octet, R is 2 bits representing the number
142      *   of reference fields this frame refers to.
143      */
144     if (has_layer_idc) {
145         if (len < 1) {
146             av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
147             return AVERROR_INVALIDDATA;
148         }
149         layer_temporal = buf[0] & 0xC0;
150         layer_spatial  = buf[0] & 0x30;
151         layer_quality  = buf[0] & 0x0C;
152         if (has_ref_idc) {
153             ref_fields = buf[0] & 0x03;
154             if (ref_fields)
155                 non_key_frame = 1;
156         }
157         buf++;
158         len--;
159     }
160 
161     /*
162      *         decode the reference fields
163      *
164      *          0 1 2 3 4 5 6 7
165      *         +-+-+-+-+-+-+-+-+              -\
166      *   F:    | PID |X| RS| RQ| (OPTIONAL)    .
167      *         +-+-+-+-+-+-+-+-+               . - R times
168      *   X:    | EXTENDED PID  | (OPTIONAL)    .
169      *         +-+-+-+-+-+-+-+-+              -/
170      *
171      *   PID:  The relative Picture ID referred to by this frame.
172      *   RS and RQ:  The spatial and quality layer IDs.
173      *   X: 1 if this layer index has an extended relative Picture ID.
174      */
175     if (has_ref_idc) {
176         while (ref_fields) {
177             if (len < 1) {
178                 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
179                 return AVERROR_INVALIDDATA;
180             }
181 
182             has_ref_field_ext_pic_id = buf[0] & 0x10;
183 
184             /* pass ref. field */
185             if (has_ref_field_ext_pic_id) {
186                 if (len < 2) {
187                     av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
188                     return AVERROR_INVALIDDATA;
189                 }
190 
191                 /* ignore ref. data */
192 
193                 buf += 2;
194                 len -= 2;
195             } else {
196 
197                 /* ignore ref. data */
198 
199                 buf++;
200                 len--;
201             }
202             ref_fields--;
203         }
204     }
205 
206     /*
207      *         decode the scalability structure (SS)
208      *
209      *          0 1 2 3 4 5 6 7
210      *         +-+-+-+-+-+-+-+-+
211      *   V:    | PATTERN LENGTH|
212      *         +-+-+-+-+-+-+-+-+                           -\
213      *         | T | S | Q | R | (OPTIONAL)                 .
214      *         +-+-+-+-+-+-+-+-+              -\            .
215      *         | PID |X| RS| RQ| (OPTIONAL)    .            . - PAT. LEN. times
216      *         +-+-+-+-+-+-+-+-+               . - R times  .
217      *   X:    | EXTENDED PID  | (OPTIONAL)    .            .
218      *         +-+-+-+-+-+-+-+-+              -/           -/
219      *
220      *   PID:  The relative Picture ID referred to by this frame.
221      *   RS and RQ:  The spatial and quality layer IDs.
222      *   X: 1 if this layer index has an extended relative Picture ID.
223      */
224     if (has_ss_data) {
225         int n_s, y, g, i;
226         if (len < 1) {
227             av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
228             return AVERROR_INVALIDDATA;
229         }
230         n_s = buf[0] >> 5;
231         y = !!(buf[0] & 0x10);
232         g = !!(buf[0] & 0x08);
233         buf++;
234         len--;
235         if (n_s > 0) {
236             avpriv_report_missing_feature(ctx, "VP9 scalability structure with multiple layers");
237             return AVERROR_PATCHWELCOME;
238         }
239         if (y) {
240             if (len < 4 * (n_s + 1)) {
241                 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
242                 return AVERROR_INVALIDDATA;
243             }
244             for (i = 0; i < n_s + 1; i++) {
245                 av_unused int w, h;
246                 w = AV_RB16(buf);
247                 h = AV_RB16(buf + 2);
248                 buf += 4;
249                 len -= 4;
250             }
251         }
252         if (g) {
253             int n_g;
254             if (len < 1) {
255                 av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
256                 return AVERROR_INVALIDDATA;
257             }
258             n_g = buf[0];
259             buf++;
260             len--;
261             for (i = 0; i < n_g; i++) {
262                 av_unused int t, u, r, j;
263                 if (len < 1) {
264                     av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
265                     return AVERROR_INVALIDDATA;
266                 }
267                 t = buf[0] >> 5;
268                 u = !!(buf[0] & 0x10);
269                 r = (buf[0] >> 2) & 0x03;
270                 buf++;
271                 len--;
272                 if (len < r) {
273                     av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
274                     return AVERROR_INVALIDDATA;
275                 }
276                 for (j = 0; j < r; j++) {
277                     av_unused int p_diff = buf[0];
278                     buf++;
279                     len--;
280                 }
281             }
282         }
283     }
284 
285     /*
286      * decode the VP9 payload header
287      *
288      *  spec. is tbd
289      */
290     //XXX: implement when specified
291 
292     /* sanity check: 1 byte payload as minimum */
293     if (len < 1) {
294         av_log(ctx, AV_LOG_ERROR, "Too short RTP/VP9 packet\n");
295         return AVERROR_INVALIDDATA;
296     }
297 
298     /* start frame buffering with new dynamic buffer */
299     if (!rtp_vp9_ctx->buf) {
300         /* sanity check: a new frame should have started */
301         if (first_fragment) {
302             res = avio_open_dyn_buf(&rtp_vp9_ctx->buf);
303             if (res < 0)
304                 return res;
305             /* update the timestamp in the frame packet with the one from the RTP packet */
306             rtp_vp9_ctx->timestamp = *timestamp;
307         } else {
308             /* frame not started yet, need more packets */
309             return AVERROR(EAGAIN);
310         }
311     }
312 
313     /* write the fragment to the dyn. buffer */
314     avio_write(rtp_vp9_ctx->buf, buf, len);
315 
316     /* do we need more fragments? */
317     if (!last_fragment)
318         return AVERROR(EAGAIN);
319 
320     /* close frame buffering and create resulting A/V packet */
321     res = ff_rtp_finalize_packet(pkt, &rtp_vp9_ctx->buf, st->index);
322     if (res < 0)
323         return res;
324 
325     return 0;
326 }
327 
vp9_close_context(PayloadContext * vp9)328 static void vp9_close_context(PayloadContext *vp9)
329 {
330     ffio_free_dyn_buf(&vp9->buf);
331 }
332 
333 const RTPDynamicProtocolHandler ff_vp9_dynamic_handler = {
334     .enc_name         = "VP9",
335     .codec_type       = AVMEDIA_TYPE_VIDEO,
336     .codec_id         = AV_CODEC_ID_VP9,
337     .priv_data_size   = sizeof(PayloadContext),
338     .init             = vp9_init,
339     .close            = vp9_close_context,
340     .parse_packet     = vp9_handle_packet
341 };
342