1 /*
2 * VP9 compatible video decoder
3 *
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "avcodec.h"
25 #include "get_bits.h"
26 #include "hwconfig.h"
27 #include "internal.h"
28 #include "profiles.h"
29 #include "thread.h"
30 #include "videodsp.h"
31 #include "vp56.h"
32 #include "vp9.h"
33 #include "vp9data.h"
34 #include "vp9dec.h"
35 #include "libavutil/avassert.h"
36 #include "libavutil/pixdesc.h"
37 #include "libavutil/video_enc_params.h"
38
39 #define VP9_SYNCCODE 0x498342
40
41 #if HAVE_THREADS
vp9_free_entries(AVCodecContext * avctx)42 static void vp9_free_entries(AVCodecContext *avctx) {
43 VP9Context *s = avctx->priv_data;
44
45 if (avctx->active_thread_type & FF_THREAD_SLICE) {
46 pthread_mutex_destroy(&s->progress_mutex);
47 pthread_cond_destroy(&s->progress_cond);
48 av_freep(&s->entries);
49 }
50 }
51
vp9_alloc_entries(AVCodecContext * avctx,int n)52 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
53 VP9Context *s = avctx->priv_data;
54 int i;
55
56 if (avctx->active_thread_type & FF_THREAD_SLICE) {
57 if (s->entries)
58 av_freep(&s->entries);
59
60 s->entries = av_malloc_array(n, sizeof(atomic_int));
61
62 if (!s->entries) {
63 av_freep(&s->entries);
64 return AVERROR(ENOMEM);
65 }
66
67 for (i = 0; i < n; i++)
68 atomic_init(&s->entries[i], 0);
69
70 pthread_mutex_init(&s->progress_mutex, NULL);
71 pthread_cond_init(&s->progress_cond, NULL);
72 }
73 return 0;
74 }
75
vp9_report_tile_progress(VP9Context * s,int field,int n)76 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
77 pthread_mutex_lock(&s->progress_mutex);
78 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
79 pthread_cond_signal(&s->progress_cond);
80 pthread_mutex_unlock(&s->progress_mutex);
81 }
82
vp9_await_tile_progress(VP9Context * s,int field,int n)83 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
84 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
85 return;
86
87 pthread_mutex_lock(&s->progress_mutex);
88 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
89 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
90 pthread_mutex_unlock(&s->progress_mutex);
91 }
92 #else
vp9_free_entries(AVCodecContext * avctx)93 static void vp9_free_entries(AVCodecContext *avctx) {}
vp9_alloc_entries(AVCodecContext * avctx,int n)94 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
95 #endif
96
vp9_tile_data_free(VP9TileData * td)97 static void vp9_tile_data_free(VP9TileData *td)
98 {
99 av_freep(&td->b_base);
100 av_freep(&td->block_base);
101 av_freep(&td->block_structure);
102 }
103
vp9_frame_unref(AVCodecContext * avctx,VP9Frame * f)104 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
105 {
106 ff_thread_release_buffer(avctx, &f->tf);
107 av_buffer_unref(&f->extradata);
108 av_buffer_unref(&f->hwaccel_priv_buf);
109 f->segmentation_map = NULL;
110 f->hwaccel_picture_private = NULL;
111 }
112
vp9_frame_alloc(AVCodecContext * avctx,VP9Frame * f)113 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
114 {
115 VP9Context *s = avctx->priv_data;
116 int ret, sz;
117
118 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
119 if (ret < 0)
120 return ret;
121
122 sz = 64 * s->sb_cols * s->sb_rows;
123 if (sz != s->frame_extradata_pool_size) {
124 av_buffer_pool_uninit(&s->frame_extradata_pool);
125 s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
126 if (!s->frame_extradata_pool) {
127 s->frame_extradata_pool_size = 0;
128 goto fail;
129 }
130 s->frame_extradata_pool_size = sz;
131 }
132 f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
133 if (!f->extradata) {
134 goto fail;
135 }
136 memset(f->extradata->data, 0, f->extradata->size);
137
138 f->segmentation_map = f->extradata->data;
139 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
140
141 if (avctx->hwaccel) {
142 const AVHWAccel *hwaccel = avctx->hwaccel;
143 av_assert0(!f->hwaccel_picture_private);
144 if (hwaccel->frame_priv_data_size) {
145 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
146 if (!f->hwaccel_priv_buf)
147 goto fail;
148 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
149 }
150 }
151
152 return 0;
153
154 fail:
155 vp9_frame_unref(avctx, f);
156 return AVERROR(ENOMEM);
157 }
158
vp9_frame_ref(AVCodecContext * avctx,VP9Frame * dst,VP9Frame * src)159 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
160 {
161 int ret;
162
163 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
164 if (ret < 0)
165 return ret;
166
167 dst->extradata = av_buffer_ref(src->extradata);
168 if (!dst->extradata)
169 goto fail;
170
171 dst->segmentation_map = src->segmentation_map;
172 dst->mv = src->mv;
173 dst->uses_2pass = src->uses_2pass;
174
175 if (src->hwaccel_picture_private) {
176 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
177 if (!dst->hwaccel_priv_buf)
178 goto fail;
179 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
180 }
181
182 return 0;
183
184 fail:
185 vp9_frame_unref(avctx, dst);
186 return AVERROR(ENOMEM);
187 }
188
update_size(AVCodecContext * avctx,int w,int h)189 static int update_size(AVCodecContext *avctx, int w, int h)
190 {
191 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
192 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
193 CONFIG_VP9_NVDEC_HWACCEL + \
194 CONFIG_VP9_VAAPI_HWACCEL + \
195 CONFIG_VP9_VDPAU_HWACCEL)
196 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
197 VP9Context *s = avctx->priv_data;
198 uint8_t *p;
199 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
200 int lflvl_len, i;
201
202 av_assert0(w > 0 && h > 0);
203
204 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
205 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
206 return ret;
207
208 switch (s->pix_fmt) {
209 case AV_PIX_FMT_YUV420P:
210 case AV_PIX_FMT_YUV420P10:
211 #if CONFIG_VP9_DXVA2_HWACCEL
212 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
213 #endif
214 #if CONFIG_VP9_D3D11VA_HWACCEL
215 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
216 *fmtp++ = AV_PIX_FMT_D3D11;
217 #endif
218 #if CONFIG_VP9_NVDEC_HWACCEL
219 *fmtp++ = AV_PIX_FMT_CUDA;
220 #endif
221 #if CONFIG_VP9_VAAPI_HWACCEL
222 *fmtp++ = AV_PIX_FMT_VAAPI;
223 #endif
224 #if CONFIG_VP9_VDPAU_HWACCEL
225 *fmtp++ = AV_PIX_FMT_VDPAU;
226 #endif
227 break;
228 case AV_PIX_FMT_YUV420P12:
229 #if CONFIG_VP9_NVDEC_HWACCEL
230 *fmtp++ = AV_PIX_FMT_CUDA;
231 #endif
232 #if CONFIG_VP9_VAAPI_HWACCEL
233 *fmtp++ = AV_PIX_FMT_VAAPI;
234 #endif
235 #if CONFIG_VP9_VDPAU_HWACCEL
236 *fmtp++ = AV_PIX_FMT_VDPAU;
237 #endif
238 break;
239 }
240
241 *fmtp++ = s->pix_fmt;
242 *fmtp = AV_PIX_FMT_NONE;
243
244 ret = ff_thread_get_format(avctx, pix_fmts);
245 if (ret < 0)
246 return ret;
247
248 avctx->pix_fmt = ret;
249 s->gf_fmt = s->pix_fmt;
250 s->w = w;
251 s->h = h;
252 }
253
254 cols = (w + 7) >> 3;
255 rows = (h + 7) >> 3;
256
257 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
258 return 0;
259
260 s->last_fmt = s->pix_fmt;
261 s->sb_cols = (w + 63) >> 6;
262 s->sb_rows = (h + 63) >> 6;
263 s->cols = (w + 7) >> 3;
264 s->rows = (h + 7) >> 3;
265 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
266
267 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
268 av_freep(&s->intra_pred_data[0]);
269 // FIXME we slightly over-allocate here for subsampled chroma, but a little
270 // bit of padding shouldn't affect performance...
271 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
272 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
273 if (!p)
274 return AVERROR(ENOMEM);
275 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
276 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
277 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
278 assign(s->above_y_nnz_ctx, uint8_t *, 16);
279 assign(s->above_mode_ctx, uint8_t *, 16);
280 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
281 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
282 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
283 assign(s->above_partition_ctx, uint8_t *, 8);
284 assign(s->above_skip_ctx, uint8_t *, 8);
285 assign(s->above_txfm_ctx, uint8_t *, 8);
286 assign(s->above_segpred_ctx, uint8_t *, 8);
287 assign(s->above_intra_ctx, uint8_t *, 8);
288 assign(s->above_comp_ctx, uint8_t *, 8);
289 assign(s->above_ref_ctx, uint8_t *, 8);
290 assign(s->above_filter_ctx, uint8_t *, 8);
291 assign(s->lflvl, VP9Filter *, lflvl_len);
292 #undef assign
293
294 if (s->td) {
295 for (i = 0; i < s->active_tile_cols; i++)
296 vp9_tile_data_free(&s->td[i]);
297 }
298
299 if (s->s.h.bpp != s->last_bpp) {
300 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
301 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
302 s->last_bpp = s->s.h.bpp;
303 }
304
305 return 0;
306 }
307
update_block_buffers(AVCodecContext * avctx)308 static int update_block_buffers(AVCodecContext *avctx)
309 {
310 int i;
311 VP9Context *s = avctx->priv_data;
312 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
313 VP9TileData *td = &s->td[0];
314
315 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
316 return 0;
317
318 vp9_tile_data_free(td);
319 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
320 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
321 if (s->s.frames[CUR_FRAME].uses_2pass) {
322 int sbs = s->sb_cols * s->sb_rows;
323
324 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
325 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
326 16 * 16 + 2 * chroma_eobs) * sbs);
327 if (!td->b_base || !td->block_base)
328 return AVERROR(ENOMEM);
329 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
330 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
331 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
332 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
333 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
334
335 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
336 td->block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
337 if (!td->block_structure)
338 return AVERROR(ENOMEM);
339 }
340 } else {
341 for (i = 1; i < s->active_tile_cols; i++)
342 vp9_tile_data_free(&s->td[i]);
343
344 for (i = 0; i < s->active_tile_cols; i++) {
345 s->td[i].b_base = av_malloc(sizeof(VP9Block));
346 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
347 16 * 16 + 2 * chroma_eobs);
348 if (!s->td[i].b_base || !s->td[i].block_base)
349 return AVERROR(ENOMEM);
350 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
351 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
352 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
353 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
354 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
355
356 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
357 s->td[i].block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
358 if (!s->td[i].block_structure)
359 return AVERROR(ENOMEM);
360 }
361 }
362 }
363 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
364
365 return 0;
366 }
367
368 // The sign bit is at the end, not the start, of a bit sequence
get_sbits_inv(GetBitContext * gb,int n)369 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
370 {
371 int v = get_bits(gb, n);
372 return get_bits1(gb) ? -v : v;
373 }
374
inv_recenter_nonneg(int v,int m)375 static av_always_inline int inv_recenter_nonneg(int v, int m)
376 {
377 if (v > 2 * m)
378 return v;
379 if (v & 1)
380 return m - ((v + 1) >> 1);
381 return m + (v >> 1);
382 }
383
384 // differential forward probability updates
update_prob(VP56RangeCoder * c,int p)385 static int update_prob(VP56RangeCoder *c, int p)
386 {
387 static const uint8_t inv_map_table[255] = {
388 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
389 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
390 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
391 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
392 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
393 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
394 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
395 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
396 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
397 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
398 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
399 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
400 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
401 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
402 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
403 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
404 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
405 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
406 252, 253, 253,
407 };
408 int d;
409
410 /* This code is trying to do a differential probability update. For a
411 * current probability A in the range [1, 255], the difference to a new
412 * probability of any value can be expressed differentially as 1-A, 255-A
413 * where some part of this (absolute range) exists both in positive as
414 * well as the negative part, whereas another part only exists in one
415 * half. We're trying to code this shared part differentially, i.e.
416 * times two where the value of the lowest bit specifies the sign, and
417 * the single part is then coded on top of this. This absolute difference
418 * then again has a value of [0, 254], but a bigger value in this range
419 * indicates that we're further away from the original value A, so we
420 * can code this as a VLC code, since higher values are increasingly
421 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
422 * updates vs. the 'fine, exact' updates further down the range, which
423 * adds one extra dimension to this differential update model. */
424
425 if (!vp8_rac_get(c)) {
426 d = vp8_rac_get_uint(c, 4) + 0;
427 } else if (!vp8_rac_get(c)) {
428 d = vp8_rac_get_uint(c, 4) + 16;
429 } else if (!vp8_rac_get(c)) {
430 d = vp8_rac_get_uint(c, 5) + 32;
431 } else {
432 d = vp8_rac_get_uint(c, 7);
433 if (d >= 65)
434 d = (d << 1) - 65 + vp8_rac_get(c);
435 d += 64;
436 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
437 }
438
439 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
440 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
441 }
442
read_colorspace_details(AVCodecContext * avctx)443 static int read_colorspace_details(AVCodecContext *avctx)
444 {
445 static const enum AVColorSpace colorspaces[8] = {
446 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
447 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
448 };
449 VP9Context *s = avctx->priv_data;
450 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
451
452 s->bpp_index = bits;
453 s->s.h.bpp = 8 + bits * 2;
454 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
455 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
456 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
457 static const enum AVPixelFormat pix_fmt_rgb[3] = {
458 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
459 };
460 s->ss_h = s->ss_v = 0;
461 avctx->color_range = AVCOL_RANGE_JPEG;
462 s->pix_fmt = pix_fmt_rgb[bits];
463 if (avctx->profile & 1) {
464 if (get_bits1(&s->gb)) {
465 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
466 return AVERROR_INVALIDDATA;
467 }
468 } else {
469 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
470 avctx->profile);
471 return AVERROR_INVALIDDATA;
472 }
473 } else {
474 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
475 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
476 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
477 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
478 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
479 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
480 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
481 };
482 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
483 if (avctx->profile & 1) {
484 s->ss_h = get_bits1(&s->gb);
485 s->ss_v = get_bits1(&s->gb);
486 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
487 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
488 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
489 avctx->profile);
490 return AVERROR_INVALIDDATA;
491 } else if (get_bits1(&s->gb)) {
492 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
493 avctx->profile);
494 return AVERROR_INVALIDDATA;
495 }
496 } else {
497 s->ss_h = s->ss_v = 1;
498 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
499 }
500 }
501
502 return 0;
503 }
504
decode_frame_header(AVCodecContext * avctx,const uint8_t * data,int size,int * ref)505 static int decode_frame_header(AVCodecContext *avctx,
506 const uint8_t *data, int size, int *ref)
507 {
508 VP9Context *s = avctx->priv_data;
509 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
510 int last_invisible;
511 const uint8_t *data2;
512
513 /* general header */
514 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
515 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
516 return ret;
517 }
518 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
519 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
520 return AVERROR_INVALIDDATA;
521 }
522 avctx->profile = get_bits1(&s->gb);
523 avctx->profile |= get_bits1(&s->gb) << 1;
524 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
525 if (avctx->profile > 3) {
526 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
527 return AVERROR_INVALIDDATA;
528 }
529 s->s.h.profile = avctx->profile;
530 if (get_bits1(&s->gb)) {
531 *ref = get_bits(&s->gb, 3);
532 return 0;
533 }
534
535 s->last_keyframe = s->s.h.keyframe;
536 s->s.h.keyframe = !get_bits1(&s->gb);
537
538 last_invisible = s->s.h.invisible;
539 s->s.h.invisible = !get_bits1(&s->gb);
540 s->s.h.errorres = get_bits1(&s->gb);
541 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
542
543 if (s->s.h.keyframe) {
544 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
545 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
546 return AVERROR_INVALIDDATA;
547 }
548 if ((ret = read_colorspace_details(avctx)) < 0)
549 return ret;
550 // for profile 1, here follows the subsampling bits
551 s->s.h.refreshrefmask = 0xff;
552 w = get_bits(&s->gb, 16) + 1;
553 h = get_bits(&s->gb, 16) + 1;
554 if (get_bits1(&s->gb)) // display size
555 skip_bits(&s->gb, 32);
556 } else {
557 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
558 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
559 if (s->s.h.intraonly) {
560 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
561 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
562 return AVERROR_INVALIDDATA;
563 }
564 if (avctx->profile >= 1) {
565 if ((ret = read_colorspace_details(avctx)) < 0)
566 return ret;
567 } else {
568 s->ss_h = s->ss_v = 1;
569 s->s.h.bpp = 8;
570 s->bpp_index = 0;
571 s->bytesperpixel = 1;
572 s->pix_fmt = AV_PIX_FMT_YUV420P;
573 avctx->colorspace = AVCOL_SPC_BT470BG;
574 avctx->color_range = AVCOL_RANGE_MPEG;
575 }
576 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
577 w = get_bits(&s->gb, 16) + 1;
578 h = get_bits(&s->gb, 16) + 1;
579 if (get_bits1(&s->gb)) // display size
580 skip_bits(&s->gb, 32);
581 } else {
582 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
583 s->s.h.refidx[0] = get_bits(&s->gb, 3);
584 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
585 s->s.h.refidx[1] = get_bits(&s->gb, 3);
586 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
587 s->s.h.refidx[2] = get_bits(&s->gb, 3);
588 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
589 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
590 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
591 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
592 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
593 return AVERROR_INVALIDDATA;
594 }
595 if (get_bits1(&s->gb)) {
596 w = s->s.refs[s->s.h.refidx[0]].f->width;
597 h = s->s.refs[s->s.h.refidx[0]].f->height;
598 } else if (get_bits1(&s->gb)) {
599 w = s->s.refs[s->s.h.refidx[1]].f->width;
600 h = s->s.refs[s->s.h.refidx[1]].f->height;
601 } else if (get_bits1(&s->gb)) {
602 w = s->s.refs[s->s.h.refidx[2]].f->width;
603 h = s->s.refs[s->s.h.refidx[2]].f->height;
604 } else {
605 w = get_bits(&s->gb, 16) + 1;
606 h = get_bits(&s->gb, 16) + 1;
607 }
608 // Note that in this code, "CUR_FRAME" is actually before we
609 // have formally allocated a frame, and thus actually represents
610 // the _last_ frame
611 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
612 s->s.frames[CUR_FRAME].tf.f->height == h;
613 if (get_bits1(&s->gb)) // display size
614 skip_bits(&s->gb, 32);
615 s->s.h.highprecisionmvs = get_bits1(&s->gb);
616 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
617 get_bits(&s->gb, 2);
618 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
619 s->s.h.signbias[0] != s->s.h.signbias[2];
620 if (s->s.h.allowcompinter) {
621 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
622 s->s.h.fixcompref = 2;
623 s->s.h.varcompref[0] = 0;
624 s->s.h.varcompref[1] = 1;
625 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
626 s->s.h.fixcompref = 1;
627 s->s.h.varcompref[0] = 0;
628 s->s.h.varcompref[1] = 2;
629 } else {
630 s->s.h.fixcompref = 0;
631 s->s.h.varcompref[0] = 1;
632 s->s.h.varcompref[1] = 2;
633 }
634 }
635 }
636 }
637 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
638 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
639 s->s.h.framectxid = c = get_bits(&s->gb, 2);
640 if (s->s.h.keyframe || s->s.h.intraonly)
641 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
642
643 /* loopfilter header data */
644 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
645 // reset loopfilter defaults
646 s->s.h.lf_delta.ref[0] = 1;
647 s->s.h.lf_delta.ref[1] = 0;
648 s->s.h.lf_delta.ref[2] = -1;
649 s->s.h.lf_delta.ref[3] = -1;
650 s->s.h.lf_delta.mode[0] = 0;
651 s->s.h.lf_delta.mode[1] = 0;
652 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
653 }
654 s->s.h.filter.level = get_bits(&s->gb, 6);
655 sharp = get_bits(&s->gb, 3);
656 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
657 // the old cache values since they are still valid
658 if (s->s.h.filter.sharpness != sharp) {
659 for (i = 1; i <= 63; i++) {
660 int limit = i;
661
662 if (sharp > 0) {
663 limit >>= (sharp + 3) >> 2;
664 limit = FFMIN(limit, 9 - sharp);
665 }
666 limit = FFMAX(limit, 1);
667
668 s->filter_lut.lim_lut[i] = limit;
669 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
670 }
671 }
672 s->s.h.filter.sharpness = sharp;
673 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
674 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
675 for (i = 0; i < 4; i++)
676 if (get_bits1(&s->gb))
677 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
678 for (i = 0; i < 2; i++)
679 if (get_bits1(&s->gb))
680 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
681 }
682 }
683
684 /* quantization header data */
685 s->s.h.yac_qi = get_bits(&s->gb, 8);
686 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
687 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
688 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
689 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
690 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
691 if (s->s.h.lossless)
692 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
693
694 /* segmentation header info */
695 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
696 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
697 for (i = 0; i < 7; i++)
698 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
699 get_bits(&s->gb, 8) : 255;
700 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
701 for (i = 0; i < 3; i++)
702 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
703 get_bits(&s->gb, 8) : 255;
704 }
705
706 if (get_bits1(&s->gb)) {
707 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
708 for (i = 0; i < 8; i++) {
709 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
710 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
711 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
712 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
713 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
714 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
715 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
716 }
717 }
718 }
719
720 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
721 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
722 int qyac, qydc, quvac, quvdc, lflvl, sh;
723
724 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
725 if (s->s.h.segmentation.absolute_vals)
726 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
727 else
728 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
729 } else {
730 qyac = s->s.h.yac_qi;
731 }
732 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
733 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
734 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
735 qyac = av_clip_uintp2(qyac, 8);
736
737 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
738 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
739 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
740 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
741
742 sh = s->s.h.filter.level >= 32;
743 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
744 if (s->s.h.segmentation.absolute_vals)
745 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
746 else
747 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
748 } else {
749 lflvl = s->s.h.filter.level;
750 }
751 if (s->s.h.lf_delta.enabled) {
752 s->s.h.segmentation.feat[i].lflvl[0][0] =
753 s->s.h.segmentation.feat[i].lflvl[0][1] =
754 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
755 for (j = 1; j < 4; j++) {
756 s->s.h.segmentation.feat[i].lflvl[j][0] =
757 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
758 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
759 s->s.h.segmentation.feat[i].lflvl[j][1] =
760 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
761 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
762 }
763 } else {
764 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
765 sizeof(s->s.h.segmentation.feat[i].lflvl));
766 }
767 }
768
769 /* tiling info */
770 if ((ret = update_size(avctx, w, h)) < 0) {
771 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
772 w, h, s->pix_fmt);
773 return ret;
774 }
775 for (s->s.h.tiling.log2_tile_cols = 0;
776 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
777 s->s.h.tiling.log2_tile_cols++) ;
778 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
779 max = FFMAX(0, max - 1);
780 while (max > s->s.h.tiling.log2_tile_cols) {
781 if (get_bits1(&s->gb))
782 s->s.h.tiling.log2_tile_cols++;
783 else
784 break;
785 }
786 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
787 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
788 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
789 int n_range_coders;
790 VP56RangeCoder *rc;
791
792 if (s->td) {
793 for (i = 0; i < s->active_tile_cols; i++)
794 vp9_tile_data_free(&s->td[i]);
795 av_free(s->td);
796 }
797
798 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
799 vp9_free_entries(avctx);
800 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
801 s->s.h.tiling.tile_cols : 1;
802 vp9_alloc_entries(avctx, s->sb_rows);
803 if (avctx->active_thread_type == FF_THREAD_SLICE) {
804 n_range_coders = 4; // max_tile_rows
805 } else {
806 n_range_coders = s->s.h.tiling.tile_cols;
807 }
808 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
809 n_range_coders * sizeof(VP56RangeCoder));
810 if (!s->td)
811 return AVERROR(ENOMEM);
812 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
813 for (i = 0; i < s->active_tile_cols; i++) {
814 s->td[i].s = s;
815 s->td[i].c_b = rc;
816 rc += n_range_coders;
817 }
818 }
819
820 /* check reference frames */
821 if (!s->s.h.keyframe && !s->s.h.intraonly) {
822 int valid_ref_frame = 0;
823 for (i = 0; i < 3; i++) {
824 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
825 int refw = ref->width, refh = ref->height;
826
827 if (ref->format != avctx->pix_fmt) {
828 av_log(avctx, AV_LOG_ERROR,
829 "Ref pixfmt (%s) did not match current frame (%s)",
830 av_get_pix_fmt_name(ref->format),
831 av_get_pix_fmt_name(avctx->pix_fmt));
832 return AVERROR_INVALIDDATA;
833 } else if (refw == w && refh == h) {
834 s->mvscale[i][0] = s->mvscale[i][1] = 0;
835 } else {
836 /* Check to make sure at least one of frames that */
837 /* this frame references has valid dimensions */
838 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
839 av_log(avctx, AV_LOG_WARNING,
840 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
841 refw, refh, w, h);
842 s->mvscale[i][0] = s->mvscale[i][1] = REF_INVALID_SCALE;
843 continue;
844 }
845 s->mvscale[i][0] = (refw << 14) / w;
846 s->mvscale[i][1] = (refh << 14) / h;
847 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
848 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
849 }
850 valid_ref_frame++;
851 }
852 if (!valid_ref_frame) {
853 av_log(avctx, AV_LOG_ERROR, "No valid reference frame is found, bitstream not supported\n");
854 return AVERROR_INVALIDDATA;
855 }
856 }
857
858 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
859 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
860 s->prob_ctx[3].p = ff_vp9_default_probs;
861 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
862 sizeof(ff_vp9_default_coef_probs));
863 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
864 sizeof(ff_vp9_default_coef_probs));
865 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
866 sizeof(ff_vp9_default_coef_probs));
867 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
868 sizeof(ff_vp9_default_coef_probs));
869 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
870 s->prob_ctx[c].p = ff_vp9_default_probs;
871 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
872 sizeof(ff_vp9_default_coef_probs));
873 }
874
875 // next 16 bits is size of the rest of the header (arith-coded)
876 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
877 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
878
879 data2 = align_get_bits(&s->gb);
880 if (size2 > size - (data2 - data)) {
881 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
882 return AVERROR_INVALIDDATA;
883 }
884 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
885 if (ret < 0)
886 return ret;
887
888 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
889 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
890 return AVERROR_INVALIDDATA;
891 }
892
893 for (i = 0; i < s->active_tile_cols; i++) {
894 if (s->s.h.keyframe || s->s.h.intraonly) {
895 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
896 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
897 } else {
898 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
899 }
900 s->td[i].nb_block_structure = 0;
901 }
902
903 /* FIXME is it faster to not copy here, but do it down in the fw updates
904 * as explicit copies if the fw update is missing (and skip the copy upon
905 * fw update)? */
906 s->prob.p = s->prob_ctx[c].p;
907
908 // txfm updates
909 if (s->s.h.lossless) {
910 s->s.h.txfmmode = TX_4X4;
911 } else {
912 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
913 if (s->s.h.txfmmode == 3)
914 s->s.h.txfmmode += vp8_rac_get(&s->c);
915
916 if (s->s.h.txfmmode == TX_SWITCHABLE) {
917 for (i = 0; i < 2; i++)
918 if (vp56_rac_get_prob_branchy(&s->c, 252))
919 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
920 for (i = 0; i < 2; i++)
921 for (j = 0; j < 2; j++)
922 if (vp56_rac_get_prob_branchy(&s->c, 252))
923 s->prob.p.tx16p[i][j] =
924 update_prob(&s->c, s->prob.p.tx16p[i][j]);
925 for (i = 0; i < 2; i++)
926 for (j = 0; j < 3; j++)
927 if (vp56_rac_get_prob_branchy(&s->c, 252))
928 s->prob.p.tx32p[i][j] =
929 update_prob(&s->c, s->prob.p.tx32p[i][j]);
930 }
931 }
932
933 // coef updates
934 for (i = 0; i < 4; i++) {
935 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
936 if (vp8_rac_get(&s->c)) {
937 for (j = 0; j < 2; j++)
938 for (k = 0; k < 2; k++)
939 for (l = 0; l < 6; l++)
940 for (m = 0; m < 6; m++) {
941 uint8_t *p = s->prob.coef[i][j][k][l][m];
942 uint8_t *r = ref[j][k][l][m];
943 if (m >= 3 && l == 0) // dc only has 3 pt
944 break;
945 for (n = 0; n < 3; n++) {
946 if (vp56_rac_get_prob_branchy(&s->c, 252))
947 p[n] = update_prob(&s->c, r[n]);
948 else
949 p[n] = r[n];
950 }
951 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
952 }
953 } else {
954 for (j = 0; j < 2; j++)
955 for (k = 0; k < 2; k++)
956 for (l = 0; l < 6; l++)
957 for (m = 0; m < 6; m++) {
958 uint8_t *p = s->prob.coef[i][j][k][l][m];
959 uint8_t *r = ref[j][k][l][m];
960 if (m > 3 && l == 0) // dc only has 3 pt
961 break;
962 memcpy(p, r, 3);
963 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
964 }
965 }
966 if (s->s.h.txfmmode == i)
967 break;
968 }
969
970 // mode updates
971 for (i = 0; i < 3; i++)
972 if (vp56_rac_get_prob_branchy(&s->c, 252))
973 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
974 if (!s->s.h.keyframe && !s->s.h.intraonly) {
975 for (i = 0; i < 7; i++)
976 for (j = 0; j < 3; j++)
977 if (vp56_rac_get_prob_branchy(&s->c, 252))
978 s->prob.p.mv_mode[i][j] =
979 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
980
981 if (s->s.h.filtermode == FILTER_SWITCHABLE)
982 for (i = 0; i < 4; i++)
983 for (j = 0; j < 2; j++)
984 if (vp56_rac_get_prob_branchy(&s->c, 252))
985 s->prob.p.filter[i][j] =
986 update_prob(&s->c, s->prob.p.filter[i][j]);
987
988 for (i = 0; i < 4; i++)
989 if (vp56_rac_get_prob_branchy(&s->c, 252))
990 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
991
992 if (s->s.h.allowcompinter) {
993 s->s.h.comppredmode = vp8_rac_get(&s->c);
994 if (s->s.h.comppredmode)
995 s->s.h.comppredmode += vp8_rac_get(&s->c);
996 if (s->s.h.comppredmode == PRED_SWITCHABLE)
997 for (i = 0; i < 5; i++)
998 if (vp56_rac_get_prob_branchy(&s->c, 252))
999 s->prob.p.comp[i] =
1000 update_prob(&s->c, s->prob.p.comp[i]);
1001 } else {
1002 s->s.h.comppredmode = PRED_SINGLEREF;
1003 }
1004
1005 if (s->s.h.comppredmode != PRED_COMPREF) {
1006 for (i = 0; i < 5; i++) {
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.single_ref[i][0] =
1009 update_prob(&s->c, s->prob.p.single_ref[i][0]);
1010 if (vp56_rac_get_prob_branchy(&s->c, 252))
1011 s->prob.p.single_ref[i][1] =
1012 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1013 }
1014 }
1015
1016 if (s->s.h.comppredmode != PRED_SINGLEREF) {
1017 for (i = 0; i < 5; i++)
1018 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019 s->prob.p.comp_ref[i] =
1020 update_prob(&s->c, s->prob.p.comp_ref[i]);
1021 }
1022
1023 for (i = 0; i < 4; i++)
1024 for (j = 0; j < 9; j++)
1025 if (vp56_rac_get_prob_branchy(&s->c, 252))
1026 s->prob.p.y_mode[i][j] =
1027 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1028
1029 for (i = 0; i < 4; i++)
1030 for (j = 0; j < 4; j++)
1031 for (k = 0; k < 3; k++)
1032 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033 s->prob.p.partition[3 - i][j][k] =
1034 update_prob(&s->c,
1035 s->prob.p.partition[3 - i][j][k]);
1036
1037 // mv fields don't use the update_prob subexp model for some reason
1038 for (i = 0; i < 3; i++)
1039 if (vp56_rac_get_prob_branchy(&s->c, 252))
1040 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1041
1042 for (i = 0; i < 2; i++) {
1043 if (vp56_rac_get_prob_branchy(&s->c, 252))
1044 s->prob.p.mv_comp[i].sign =
1045 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1046
1047 for (j = 0; j < 10; j++)
1048 if (vp56_rac_get_prob_branchy(&s->c, 252))
1049 s->prob.p.mv_comp[i].classes[j] =
1050 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1051
1052 if (vp56_rac_get_prob_branchy(&s->c, 252))
1053 s->prob.p.mv_comp[i].class0 =
1054 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1055
1056 for (j = 0; j < 10; j++)
1057 if (vp56_rac_get_prob_branchy(&s->c, 252))
1058 s->prob.p.mv_comp[i].bits[j] =
1059 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1060 }
1061
1062 for (i = 0; i < 2; i++) {
1063 for (j = 0; j < 2; j++)
1064 for (k = 0; k < 3; k++)
1065 if (vp56_rac_get_prob_branchy(&s->c, 252))
1066 s->prob.p.mv_comp[i].class0_fp[j][k] =
1067 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1068
1069 for (j = 0; j < 3; j++)
1070 if (vp56_rac_get_prob_branchy(&s->c, 252))
1071 s->prob.p.mv_comp[i].fp[j] =
1072 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1073 }
1074
1075 if (s->s.h.highprecisionmvs) {
1076 for (i = 0; i < 2; i++) {
1077 if (vp56_rac_get_prob_branchy(&s->c, 252))
1078 s->prob.p.mv_comp[i].class0_hp =
1079 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1080
1081 if (vp56_rac_get_prob_branchy(&s->c, 252))
1082 s->prob.p.mv_comp[i].hp =
1083 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1084 }
1085 }
1086 }
1087
1088 return (data2 - data) + size2;
1089 }
1090
decode_sb(VP9TileData * td,int row,int col,VP9Filter * lflvl,ptrdiff_t yoff,ptrdiff_t uvoff,enum BlockLevel bl)1091 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1092 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1093 {
1094 const VP9Context *s = td->s;
1095 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1096 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1097 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1098 s->prob.p.partition[bl][c];
1099 enum BlockPartition bp;
1100 ptrdiff_t hbs = 4 >> bl;
1101 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1102 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1103 int bytesperpixel = s->bytesperpixel;
1104
1105 if (bl == BL_8X8) {
1106 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1107 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1108 } else if (col + hbs < s->cols) { // FIXME why not <=?
1109 if (row + hbs < s->rows) { // FIXME why not <=?
1110 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1111 switch (bp) {
1112 case PARTITION_NONE:
1113 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1114 break;
1115 case PARTITION_H:
1116 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1117 yoff += hbs * 8 * y_stride;
1118 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1119 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1120 break;
1121 case PARTITION_V:
1122 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1123 yoff += hbs * 8 * bytesperpixel;
1124 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1125 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1126 break;
1127 case PARTITION_SPLIT:
1128 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1129 decode_sb(td, row, col + hbs, lflvl,
1130 yoff + 8 * hbs * bytesperpixel,
1131 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1132 yoff += hbs * 8 * y_stride;
1133 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1134 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1135 decode_sb(td, row + hbs, col + hbs, lflvl,
1136 yoff + 8 * hbs * bytesperpixel,
1137 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1138 break;
1139 default:
1140 av_assert0(0);
1141 }
1142 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1143 bp = PARTITION_SPLIT;
1144 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1145 decode_sb(td, row, col + hbs, lflvl,
1146 yoff + 8 * hbs * bytesperpixel,
1147 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1148 } else {
1149 bp = PARTITION_H;
1150 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1151 }
1152 } else if (row + hbs < s->rows) { // FIXME why not <=?
1153 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1154 bp = PARTITION_SPLIT;
1155 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1156 yoff += hbs * 8 * y_stride;
1157 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1158 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1159 } else {
1160 bp = PARTITION_V;
1161 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1162 }
1163 } else {
1164 bp = PARTITION_SPLIT;
1165 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1166 }
1167 td->counts.partition[bl][c][bp]++;
1168 }
1169
decode_sb_mem(VP9TileData * td,int row,int col,VP9Filter * lflvl,ptrdiff_t yoff,ptrdiff_t uvoff,enum BlockLevel bl)1170 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1171 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1172 {
1173 const VP9Context *s = td->s;
1174 VP9Block *b = td->b;
1175 ptrdiff_t hbs = 4 >> bl;
1176 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1177 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1178 int bytesperpixel = s->bytesperpixel;
1179
1180 if (bl == BL_8X8) {
1181 av_assert2(b->bl == BL_8X8);
1182 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1183 } else if (td->b->bl == bl) {
1184 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1185 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1186 yoff += hbs * 8 * y_stride;
1187 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1188 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1189 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1190 yoff += hbs * 8 * bytesperpixel;
1191 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1192 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1193 }
1194 } else {
1195 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1196 if (col + hbs < s->cols) { // FIXME why not <=?
1197 if (row + hbs < s->rows) {
1198 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1199 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1200 yoff += hbs * 8 * y_stride;
1201 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1202 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1203 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1204 yoff + 8 * hbs * bytesperpixel,
1205 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1206 } else {
1207 yoff += hbs * 8 * bytesperpixel;
1208 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1209 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1210 }
1211 } else if (row + hbs < s->rows) {
1212 yoff += hbs * 8 * y_stride;
1213 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1214 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1215 }
1216 }
1217 }
1218
set_tile_offset(int * start,int * end,int idx,int log2_n,int n)1219 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1220 {
1221 int sb_start = ( idx * n) >> log2_n;
1222 int sb_end = ((idx + 1) * n) >> log2_n;
1223 *start = FFMIN(sb_start, n) << 3;
1224 *end = FFMIN(sb_end, n) << 3;
1225 }
1226
free_buffers(VP9Context * s)1227 static void free_buffers(VP9Context *s)
1228 {
1229 int i;
1230
1231 av_freep(&s->intra_pred_data[0]);
1232 for (i = 0; i < s->active_tile_cols; i++)
1233 vp9_tile_data_free(&s->td[i]);
1234 }
1235
vp9_decode_free(AVCodecContext * avctx)1236 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1237 {
1238 VP9Context *s = avctx->priv_data;
1239 int i;
1240
1241 for (i = 0; i < 3; i++) {
1242 vp9_frame_unref(avctx, &s->s.frames[i]);
1243 av_frame_free(&s->s.frames[i].tf.f);
1244 }
1245 av_buffer_pool_uninit(&s->frame_extradata_pool);
1246 for (i = 0; i < 8; i++) {
1247 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1248 av_frame_free(&s->s.refs[i].f);
1249 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1250 av_frame_free(&s->next_refs[i].f);
1251 }
1252
1253 free_buffers(s);
1254 vp9_free_entries(avctx);
1255 av_freep(&s->td);
1256 return 0;
1257 }
1258
decode_tiles(AVCodecContext * avctx,const uint8_t * data,int size)1259 static int decode_tiles(AVCodecContext *avctx,
1260 const uint8_t *data, int size)
1261 {
1262 VP9Context *s = avctx->priv_data;
1263 VP9TileData *td = &s->td[0];
1264 int row, col, tile_row, tile_col, ret;
1265 int bytesperpixel;
1266 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1267 AVFrame *f;
1268 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1269
1270 f = s->s.frames[CUR_FRAME].tf.f;
1271 ls_y = f->linesize[0];
1272 ls_uv =f->linesize[1];
1273 bytesperpixel = s->bytesperpixel;
1274
1275 yoff = uvoff = 0;
1276 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1277 set_tile_offset(&tile_row_start, &tile_row_end,
1278 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1279
1280 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1281 int64_t tile_size;
1282
1283 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1284 tile_row == s->s.h.tiling.tile_rows - 1) {
1285 tile_size = size;
1286 } else {
1287 tile_size = AV_RB32(data);
1288 data += 4;
1289 size -= 4;
1290 }
1291 if (tile_size > size) {
1292 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1293 return AVERROR_INVALIDDATA;
1294 }
1295 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1296 if (ret < 0)
1297 return ret;
1298 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1299 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1300 return AVERROR_INVALIDDATA;
1301 }
1302 data += tile_size;
1303 size -= tile_size;
1304 }
1305
1306 for (row = tile_row_start; row < tile_row_end;
1307 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1308 VP9Filter *lflvl_ptr = s->lflvl;
1309 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1310
1311 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1312 set_tile_offset(&tile_col_start, &tile_col_end,
1313 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1314 td->tile_col_start = tile_col_start;
1315 if (s->pass != 2) {
1316 memset(td->left_partition_ctx, 0, 8);
1317 memset(td->left_skip_ctx, 0, 8);
1318 if (s->s.h.keyframe || s->s.h.intraonly) {
1319 memset(td->left_mode_ctx, DC_PRED, 16);
1320 } else {
1321 memset(td->left_mode_ctx, NEARESTMV, 8);
1322 }
1323 memset(td->left_y_nnz_ctx, 0, 16);
1324 memset(td->left_uv_nnz_ctx, 0, 32);
1325 memset(td->left_segpred_ctx, 0, 8);
1326
1327 td->c = &td->c_b[tile_col];
1328 }
1329
1330 for (col = tile_col_start;
1331 col < tile_col_end;
1332 col += 8, yoff2 += 64 * bytesperpixel,
1333 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1334 // FIXME integrate with lf code (i.e. zero after each
1335 // use, similar to invtxfm coefficients, or similar)
1336 if (s->pass != 1) {
1337 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1338 }
1339
1340 if (s->pass == 2) {
1341 decode_sb_mem(td, row, col, lflvl_ptr,
1342 yoff2, uvoff2, BL_64X64);
1343 } else {
1344 if (vpX_rac_is_end(td->c)) {
1345 return AVERROR_INVALIDDATA;
1346 }
1347 decode_sb(td, row, col, lflvl_ptr,
1348 yoff2, uvoff2, BL_64X64);
1349 }
1350 }
1351 }
1352
1353 if (s->pass == 1)
1354 continue;
1355
1356 // backup pre-loopfilter reconstruction data for intra
1357 // prediction of next row of sb64s
1358 if (row + 8 < s->rows) {
1359 memcpy(s->intra_pred_data[0],
1360 f->data[0] + yoff + 63 * ls_y,
1361 8 * s->cols * bytesperpixel);
1362 memcpy(s->intra_pred_data[1],
1363 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1364 8 * s->cols * bytesperpixel >> s->ss_h);
1365 memcpy(s->intra_pred_data[2],
1366 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1367 8 * s->cols * bytesperpixel >> s->ss_h);
1368 }
1369
1370 // loopfilter one row
1371 if (s->s.h.filter.level) {
1372 yoff2 = yoff;
1373 uvoff2 = uvoff;
1374 lflvl_ptr = s->lflvl;
1375 for (col = 0; col < s->cols;
1376 col += 8, yoff2 += 64 * bytesperpixel,
1377 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1378 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1379 yoff2, uvoff2);
1380 }
1381 }
1382
1383 // FIXME maybe we can make this more finegrained by running the
1384 // loopfilter per-block instead of after each sbrow
1385 // In fact that would also make intra pred left preparation easier?
1386 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1387 }
1388 }
1389 return 0;
1390 }
1391
1392 #if HAVE_THREADS
1393 static av_always_inline
decode_tiles_mt(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)1394 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1395 int threadnr)
1396 {
1397 VP9Context *s = avctx->priv_data;
1398 VP9TileData *td = &s->td[jobnr];
1399 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1400 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1401 unsigned tile_cols_len;
1402 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1403 VP9Filter *lflvl_ptr_base;
1404 AVFrame *f;
1405
1406 f = s->s.frames[CUR_FRAME].tf.f;
1407 ls_y = f->linesize[0];
1408 ls_uv =f->linesize[1];
1409
1410 set_tile_offset(&tile_col_start, &tile_col_end,
1411 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1412 td->tile_col_start = tile_col_start;
1413 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1414 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1415 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1416
1417 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1418 set_tile_offset(&tile_row_start, &tile_row_end,
1419 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1420
1421 td->c = &td->c_b[tile_row];
1422 for (row = tile_row_start; row < tile_row_end;
1423 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1424 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1425 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1426
1427 memset(td->left_partition_ctx, 0, 8);
1428 memset(td->left_skip_ctx, 0, 8);
1429 if (s->s.h.keyframe || s->s.h.intraonly) {
1430 memset(td->left_mode_ctx, DC_PRED, 16);
1431 } else {
1432 memset(td->left_mode_ctx, NEARESTMV, 8);
1433 }
1434 memset(td->left_y_nnz_ctx, 0, 16);
1435 memset(td->left_uv_nnz_ctx, 0, 32);
1436 memset(td->left_segpred_ctx, 0, 8);
1437
1438 for (col = tile_col_start;
1439 col < tile_col_end;
1440 col += 8, yoff2 += 64 * bytesperpixel,
1441 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1442 // FIXME integrate with lf code (i.e. zero after each
1443 // use, similar to invtxfm coefficients, or similar)
1444 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1445 decode_sb(td, row, col, lflvl_ptr,
1446 yoff2, uvoff2, BL_64X64);
1447 }
1448
1449 // backup pre-loopfilter reconstruction data for intra
1450 // prediction of next row of sb64s
1451 tile_cols_len = tile_col_end - tile_col_start;
1452 if (row + 8 < s->rows) {
1453 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1454 f->data[0] + yoff + 63 * ls_y,
1455 8 * tile_cols_len * bytesperpixel);
1456 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1457 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1458 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1459 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1460 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1461 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1462 }
1463
1464 vp9_report_tile_progress(s, row >> 3, 1);
1465 }
1466 }
1467 return 0;
1468 }
1469
1470 static av_always_inline
loopfilter_proc(AVCodecContext * avctx)1471 int loopfilter_proc(AVCodecContext *avctx)
1472 {
1473 VP9Context *s = avctx->priv_data;
1474 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1475 VP9Filter *lflvl_ptr;
1476 int bytesperpixel = s->bytesperpixel, col, i;
1477 AVFrame *f;
1478
1479 f = s->s.frames[CUR_FRAME].tf.f;
1480 ls_y = f->linesize[0];
1481 ls_uv =f->linesize[1];
1482
1483 for (i = 0; i < s->sb_rows; i++) {
1484 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1485
1486 if (s->s.h.filter.level) {
1487 yoff = (ls_y * 64)*i;
1488 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1489 lflvl_ptr = s->lflvl+s->sb_cols*i;
1490 for (col = 0; col < s->cols;
1491 col += 8, yoff += 64 * bytesperpixel,
1492 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1493 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1494 yoff, uvoff);
1495 }
1496 }
1497 }
1498 return 0;
1499 }
1500 #endif
1501
vp9_export_enc_params(VP9Context * s,VP9Frame * frame)1502 static int vp9_export_enc_params(VP9Context *s, VP9Frame *frame)
1503 {
1504 AVVideoEncParams *par;
1505 unsigned int tile, nb_blocks = 0;
1506
1507 if (s->s.h.segmentation.enabled) {
1508 for (tile = 0; tile < s->active_tile_cols; tile++)
1509 nb_blocks += s->td[tile].nb_block_structure;
1510 }
1511
1512 par = av_video_enc_params_create_side_data(frame->tf.f,
1513 AV_VIDEO_ENC_PARAMS_VP9, nb_blocks);
1514 if (!par)
1515 return AVERROR(ENOMEM);
1516
1517 par->qp = s->s.h.yac_qi;
1518 par->delta_qp[0][0] = s->s.h.ydc_qdelta;
1519 par->delta_qp[1][0] = s->s.h.uvdc_qdelta;
1520 par->delta_qp[2][0] = s->s.h.uvdc_qdelta;
1521 par->delta_qp[1][1] = s->s.h.uvac_qdelta;
1522 par->delta_qp[2][1] = s->s.h.uvac_qdelta;
1523
1524 if (nb_blocks) {
1525 unsigned int block = 0;
1526 unsigned int tile, block_tile;
1527
1528 for (tile = 0; tile < s->active_tile_cols; tile++) {
1529 VP9TileData *td = &s->td[tile];
1530
1531 for (block_tile = 0; block_tile < td->nb_block_structure; block_tile++) {
1532 AVVideoBlockParams *b = av_video_enc_params_block(par, block++);
1533 unsigned int row = td->block_structure[block_tile].row;
1534 unsigned int col = td->block_structure[block_tile].col;
1535 uint8_t seg_id = frame->segmentation_map[row * 8 * s->sb_cols + col];
1536
1537 b->src_x = col * 8;
1538 b->src_y = row * 8;
1539 b->w = 1 << (3 + td->block_structure[block_tile].block_size_idx_x);
1540 b->h = 1 << (3 + td->block_structure[block_tile].block_size_idx_y);
1541
1542 if (s->s.h.segmentation.feat[seg_id].q_enabled) {
1543 b->delta_qp = s->s.h.segmentation.feat[seg_id].q_val;
1544 if (s->s.h.segmentation.absolute_vals)
1545 b->delta_qp -= par->qp;
1546 }
1547 }
1548 }
1549 }
1550
1551 return 0;
1552 }
1553
vp9_decode_frame(AVCodecContext * avctx,void * frame,int * got_frame,AVPacket * pkt)1554 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1555 int *got_frame, AVPacket *pkt)
1556 {
1557 const uint8_t *data = pkt->data;
1558 int size = pkt->size;
1559 VP9Context *s = avctx->priv_data;
1560 int ret, i, j, ref;
1561 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1562 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1563 AVFrame *f;
1564
1565 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1566 return ret;
1567 } else if (ret == 0) {
1568 if (!s->s.refs[ref].f->buf[0]) {
1569 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1570 return AVERROR_INVALIDDATA;
1571 }
1572 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1573 return ret;
1574 ((AVFrame *)frame)->pts = pkt->pts;
1575 #if FF_API_PKT_PTS
1576 FF_DISABLE_DEPRECATION_WARNINGS
1577 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1578 FF_ENABLE_DEPRECATION_WARNINGS
1579 #endif
1580 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1581 for (i = 0; i < 8; i++) {
1582 if (s->next_refs[i].f->buf[0])
1583 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1584 if (s->s.refs[i].f->buf[0] &&
1585 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1586 return ret;
1587 }
1588 *got_frame = 1;
1589 return pkt->size;
1590 }
1591 data += ret;
1592 size -= ret;
1593
1594 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1595 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1596 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1597 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1598 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1599 return ret;
1600 }
1601 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1602 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1603 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1604 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1605 return ret;
1606 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1607 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1608 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1609 return ret;
1610 f = s->s.frames[CUR_FRAME].tf.f;
1611 f->key_frame = s->s.h.keyframe;
1612 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1613
1614 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1615 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1616 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1617 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1618 }
1619
1620 // ref frame setup
1621 for (i = 0; i < 8; i++) {
1622 if (s->next_refs[i].f->buf[0])
1623 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1624 if (s->s.h.refreshrefmask & (1 << i)) {
1625 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1626 } else if (s->s.refs[i].f->buf[0]) {
1627 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1628 }
1629 if (ret < 0)
1630 return ret;
1631 }
1632
1633 if (avctx->hwaccel) {
1634 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1635 if (ret < 0)
1636 return ret;
1637 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1638 if (ret < 0)
1639 return ret;
1640 ret = avctx->hwaccel->end_frame(avctx);
1641 if (ret < 0)
1642 return ret;
1643 goto finish;
1644 }
1645
1646 // main tile decode loop
1647 memset(s->above_partition_ctx, 0, s->cols);
1648 memset(s->above_skip_ctx, 0, s->cols);
1649 if (s->s.h.keyframe || s->s.h.intraonly) {
1650 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1651 } else {
1652 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1653 }
1654 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1655 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1656 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1657 memset(s->above_segpred_ctx, 0, s->cols);
1658 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1659 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1660 if ((ret = update_block_buffers(avctx)) < 0) {
1661 av_log(avctx, AV_LOG_ERROR,
1662 "Failed to allocate block buffers\n");
1663 return ret;
1664 }
1665 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1666 int j, k, l, m;
1667
1668 for (i = 0; i < 4; i++) {
1669 for (j = 0; j < 2; j++)
1670 for (k = 0; k < 2; k++)
1671 for (l = 0; l < 6; l++)
1672 for (m = 0; m < 6; m++)
1673 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1674 s->prob.coef[i][j][k][l][m], 3);
1675 if (s->s.h.txfmmode == i)
1676 break;
1677 }
1678 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1679 ff_thread_finish_setup(avctx);
1680 } else if (!s->s.h.refreshctx) {
1681 ff_thread_finish_setup(avctx);
1682 }
1683
1684 #if HAVE_THREADS
1685 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1686 for (i = 0; i < s->sb_rows; i++)
1687 atomic_store(&s->entries[i], 0);
1688 }
1689 #endif
1690
1691 do {
1692 for (i = 0; i < s->active_tile_cols; i++) {
1693 s->td[i].b = s->td[i].b_base;
1694 s->td[i].block = s->td[i].block_base;
1695 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1696 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1697 s->td[i].eob = s->td[i].eob_base;
1698 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1699 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1700 s->td[i].error_info = 0;
1701 }
1702
1703 #if HAVE_THREADS
1704 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1705 int tile_row, tile_col;
1706
1707 av_assert1(!s->pass);
1708
1709 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1710 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1711 int64_t tile_size;
1712
1713 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1714 tile_row == s->s.h.tiling.tile_rows - 1) {
1715 tile_size = size;
1716 } else {
1717 tile_size = AV_RB32(data);
1718 data += 4;
1719 size -= 4;
1720 }
1721 if (tile_size > size)
1722 return AVERROR_INVALIDDATA;
1723 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1724 if (ret < 0)
1725 return ret;
1726 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1727 return AVERROR_INVALIDDATA;
1728 data += tile_size;
1729 size -= tile_size;
1730 }
1731 }
1732
1733 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1734 } else
1735 #endif
1736 {
1737 ret = decode_tiles(avctx, data, size);
1738 if (ret < 0) {
1739 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1740 return ret;
1741 }
1742 }
1743
1744 // Sum all counts fields into td[0].counts for tile threading
1745 if (avctx->active_thread_type == FF_THREAD_SLICE)
1746 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1747 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1748 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1749
1750 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1751 ff_vp9_adapt_probs(s);
1752 ff_thread_finish_setup(avctx);
1753 }
1754 } while (s->pass++ == 1);
1755 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1756
1757 if (s->td->error_info < 0) {
1758 av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
1759 s->td->error_info = 0;
1760 return AVERROR_INVALIDDATA;
1761 }
1762 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
1763 ret = vp9_export_enc_params(s, &s->s.frames[CUR_FRAME]);
1764 if (ret < 0)
1765 return ret;
1766 }
1767
1768 finish:
1769 // ref frame setup
1770 for (i = 0; i < 8; i++) {
1771 if (s->s.refs[i].f->buf[0])
1772 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1773 if (s->next_refs[i].f->buf[0] &&
1774 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1775 return ret;
1776 }
1777
1778 if (!s->s.h.invisible) {
1779 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1780 return ret;
1781 *got_frame = 1;
1782 }
1783
1784 return pkt->size;
1785 }
1786
vp9_decode_flush(AVCodecContext * avctx)1787 static void vp9_decode_flush(AVCodecContext *avctx)
1788 {
1789 VP9Context *s = avctx->priv_data;
1790 int i;
1791
1792 for (i = 0; i < 3; i++)
1793 vp9_frame_unref(avctx, &s->s.frames[i]);
1794 for (i = 0; i < 8; i++)
1795 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1796 }
1797
init_frames(AVCodecContext * avctx)1798 static int init_frames(AVCodecContext *avctx)
1799 {
1800 VP9Context *s = avctx->priv_data;
1801 int i;
1802
1803 for (i = 0; i < 3; i++) {
1804 s->s.frames[i].tf.f = av_frame_alloc();
1805 if (!s->s.frames[i].tf.f) {
1806 vp9_decode_free(avctx);
1807 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1808 return AVERROR(ENOMEM);
1809 }
1810 }
1811 for (i = 0; i < 8; i++) {
1812 s->s.refs[i].f = av_frame_alloc();
1813 s->next_refs[i].f = av_frame_alloc();
1814 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1815 vp9_decode_free(avctx);
1816 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1817 return AVERROR(ENOMEM);
1818 }
1819 }
1820
1821 return 0;
1822 }
1823
vp9_decode_init(AVCodecContext * avctx)1824 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1825 {
1826 VP9Context *s = avctx->priv_data;
1827
1828 s->last_bpp = 0;
1829 s->s.h.filter.sharpness = -1;
1830
1831 return init_frames(avctx);
1832 }
1833
1834 #if HAVE_THREADS
vp9_decode_update_thread_context(AVCodecContext * dst,const AVCodecContext * src)1835 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1836 {
1837 int i, ret;
1838 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1839
1840 for (i = 0; i < 3; i++) {
1841 if (s->s.frames[i].tf.f->buf[0])
1842 vp9_frame_unref(dst, &s->s.frames[i]);
1843 if (ssrc->s.frames[i].tf.f->buf[0]) {
1844 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1845 return ret;
1846 }
1847 }
1848 for (i = 0; i < 8; i++) {
1849 if (s->s.refs[i].f->buf[0])
1850 ff_thread_release_buffer(dst, &s->s.refs[i]);
1851 if (ssrc->next_refs[i].f->buf[0]) {
1852 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1853 return ret;
1854 }
1855 }
1856
1857 s->s.h.invisible = ssrc->s.h.invisible;
1858 s->s.h.keyframe = ssrc->s.h.keyframe;
1859 s->s.h.intraonly = ssrc->s.h.intraonly;
1860 s->ss_v = ssrc->ss_v;
1861 s->ss_h = ssrc->ss_h;
1862 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1863 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1864 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1865 s->bytesperpixel = ssrc->bytesperpixel;
1866 s->gf_fmt = ssrc->gf_fmt;
1867 s->w = ssrc->w;
1868 s->h = ssrc->h;
1869 s->s.h.bpp = ssrc->s.h.bpp;
1870 s->bpp_index = ssrc->bpp_index;
1871 s->pix_fmt = ssrc->pix_fmt;
1872 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1873 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1874 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1875 sizeof(s->s.h.segmentation.feat));
1876
1877 return 0;
1878 }
1879 #endif
1880
1881 AVCodec ff_vp9_decoder = {
1882 .name = "vp9",
1883 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1884 .type = AVMEDIA_TYPE_VIDEO,
1885 .id = AV_CODEC_ID_VP9,
1886 .priv_data_size = sizeof(VP9Context),
1887 .init = vp9_decode_init,
1888 .close = vp9_decode_free,
1889 .decode = vp9_decode_frame,
1890 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1891 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
1892 FF_CODEC_CAP_ALLOCATE_PROGRESS,
1893 .flush = vp9_decode_flush,
1894 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1895 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1896 .bsfs = "vp9_superframe_split",
1897 .hw_configs = (const AVCodecHWConfigInternal *const []) {
1898 #if CONFIG_VP9_DXVA2_HWACCEL
1899 HWACCEL_DXVA2(vp9),
1900 #endif
1901 #if CONFIG_VP9_D3D11VA_HWACCEL
1902 HWACCEL_D3D11VA(vp9),
1903 #endif
1904 #if CONFIG_VP9_D3D11VA2_HWACCEL
1905 HWACCEL_D3D11VA2(vp9),
1906 #endif
1907 #if CONFIG_VP9_NVDEC_HWACCEL
1908 HWACCEL_NVDEC(vp9),
1909 #endif
1910 #if CONFIG_VP9_VAAPI_HWACCEL
1911 HWACCEL_VAAPI(vp9),
1912 #endif
1913 #if CONFIG_VP9_VDPAU_HWACCEL
1914 HWACCEL_VDPAU(vp9),
1915 #endif
1916 NULL
1917 },
1918 };
1919