1 /*
2 * VP7/VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27 #include "libavutil/imgutils.h"
28
29 #include "avcodec.h"
30 #include "hwconfig.h"
31 #include "internal.h"
32 #include "mathops.h"
33 #include "rectangle.h"
34 #include "thread.h"
35 #include "vp8.h"
36 #include "vp8data.h"
37
38 #if ARCH_ARM
39 # include "arm/vp8.h"
40 #endif
41
42 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44 #elif CONFIG_VP7_DECODER
45 #define VPX(vp7, f) vp7_ ## f
46 #else // CONFIG_VP8_DECODER
47 #define VPX(vp7, f) vp8_ ## f
48 #endif
49
free_buffers(VP8Context * s)50 static void free_buffers(VP8Context *s)
51 {
52 int i;
53 if (s->thread_data)
54 for (i = 0; i < MAX_THREADS; i++) {
55 #if HAVE_THREADS
56 pthread_cond_destroy(&s->thread_data[i].cond);
57 pthread_mutex_destroy(&s->thread_data[i].lock);
58 #endif
59 av_freep(&s->thread_data[i].filter_strength);
60 }
61 av_freep(&s->thread_data);
62 av_freep(&s->macroblocks_base);
63 av_freep(&s->intra4x4_pred_mode_top);
64 av_freep(&s->top_nnz);
65 av_freep(&s->top_border);
66
67 s->macroblocks = NULL;
68 }
69
vp8_alloc_frame(VP8Context * s,VP8Frame * f,int ref)70 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
71 {
72 int ret;
73 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
75 return ret;
76 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
77 goto fail;
78 if (s->avctx->hwaccel) {
79 const AVHWAccel *hwaccel = s->avctx->hwaccel;
80 if (hwaccel->frame_priv_data_size) {
81 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82 if (!f->hwaccel_priv_buf)
83 goto fail;
84 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
85 }
86 }
87 return 0;
88
89 fail:
90 av_buffer_unref(&f->seg_map);
91 ff_thread_release_buffer(s->avctx, &f->tf);
92 return AVERROR(ENOMEM);
93 }
94
vp8_release_frame(VP8Context * s,VP8Frame * f)95 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
96 {
97 av_buffer_unref(&f->seg_map);
98 av_buffer_unref(&f->hwaccel_priv_buf);
99 f->hwaccel_picture_private = NULL;
100 ff_thread_release_buffer(s->avctx, &f->tf);
101 }
102
103 #if CONFIG_VP8_DECODER
vp8_ref_frame(VP8Context * s,VP8Frame * dst,VP8Frame * src)104 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
105 {
106 int ret;
107
108 vp8_release_frame(s, dst);
109
110 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
111 return ret;
112 if (src->seg_map &&
113 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114 vp8_release_frame(s, dst);
115 return AVERROR(ENOMEM);
116 }
117 if (src->hwaccel_picture_private) {
118 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119 if (!dst->hwaccel_priv_buf)
120 return AVERROR(ENOMEM);
121 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
122 }
123
124 return 0;
125 }
126 #endif /* CONFIG_VP8_DECODER */
127
vp8_decode_flush_impl(AVCodecContext * avctx,int free_mem)128 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
129 {
130 VP8Context *s = avctx->priv_data;
131 int i;
132
133 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134 vp8_release_frame(s, &s->frames[i]);
135 memset(s->framep, 0, sizeof(s->framep));
136
137 if (free_mem)
138 free_buffers(s);
139 }
140
vp8_decode_flush(AVCodecContext * avctx)141 static void vp8_decode_flush(AVCodecContext *avctx)
142 {
143 vp8_decode_flush_impl(avctx, 0);
144 }
145
vp8_find_free_buffer(VP8Context * s)146 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
147 {
148 VP8Frame *frame = NULL;
149 int i;
150
151 // find a free buffer
152 for (i = 0; i < 5; i++)
153 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
154 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157 frame = &s->frames[i];
158 break;
159 }
160 if (i == 5) {
161 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
162 abort();
163 }
164 if (frame->tf.f->buf[0])
165 vp8_release_frame(s, frame);
166
167 return frame;
168 }
169
get_pixel_format(VP8Context * s)170 static enum AVPixelFormat get_pixel_format(VP8Context *s)
171 {
172 enum AVPixelFormat pix_fmts[] = {
173 #if CONFIG_VP8_VAAPI_HWACCEL
174 AV_PIX_FMT_VAAPI,
175 #endif
176 #if CONFIG_VP8_NVDEC_HWACCEL
177 AV_PIX_FMT_CUDA,
178 #endif
179 AV_PIX_FMT_YUV420P,
180 AV_PIX_FMT_NONE,
181 };
182
183 return ff_get_format(s->avctx, pix_fmts);
184 }
185
186 static av_always_inline
update_dimensions(VP8Context * s,int width,int height,int is_vp7)187 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
188 {
189 AVCodecContext *avctx = s->avctx;
190 int i, ret;
191
192 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193 height != s->avctx->height) {
194 vp8_decode_flush_impl(s->avctx, 1);
195
196 ret = ff_set_dimensions(s->avctx, width, height);
197 if (ret < 0)
198 return ret;
199 }
200
201 if (!s->actually_webp && !is_vp7) {
202 s->pix_fmt = get_pixel_format(s);
203 if (s->pix_fmt < 0)
204 return AVERROR(EINVAL);
205 avctx->pix_fmt = s->pix_fmt;
206 }
207
208 s->mb_width = (s->avctx->coded_width + 15) / 16;
209 s->mb_height = (s->avctx->coded_height + 15) / 16;
210
211 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
212 avctx->thread_count > 1;
213 if (!s->mb_layout) { // Frame threading and one thread
214 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
215 sizeof(*s->macroblocks));
216 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
217 } else // Sliced threading
218 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
219 sizeof(*s->macroblocks));
220 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
221 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
222 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
223
224 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
225 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
226 free_buffers(s);
227 return AVERROR(ENOMEM);
228 }
229
230 for (i = 0; i < MAX_THREADS; i++) {
231 s->thread_data[i].filter_strength =
232 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
233 if (!s->thread_data[i].filter_strength) {
234 free_buffers(s);
235 return AVERROR(ENOMEM);
236 }
237 #if HAVE_THREADS
238 pthread_mutex_init(&s->thread_data[i].lock, NULL);
239 pthread_cond_init(&s->thread_data[i].cond, NULL);
240 #endif
241 }
242
243 s->macroblocks = s->macroblocks_base + 1;
244
245 return 0;
246 }
247
vp7_update_dimensions(VP8Context * s,int width,int height)248 static int vp7_update_dimensions(VP8Context *s, int width, int height)
249 {
250 return update_dimensions(s, width, height, IS_VP7);
251 }
252
vp8_update_dimensions(VP8Context * s,int width,int height)253 static int vp8_update_dimensions(VP8Context *s, int width, int height)
254 {
255 return update_dimensions(s, width, height, IS_VP8);
256 }
257
258
parse_segment_info(VP8Context * s)259 static void parse_segment_info(VP8Context *s)
260 {
261 VP56RangeCoder *c = &s->c;
262 int i;
263
264 s->segmentation.update_map = vp8_rac_get(c);
265 s->segmentation.update_feature_data = vp8_rac_get(c);
266
267 if (s->segmentation.update_feature_data) {
268 s->segmentation.absolute_vals = vp8_rac_get(c);
269
270 for (i = 0; i < 4; i++)
271 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
272
273 for (i = 0; i < 4; i++)
274 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
275 }
276 if (s->segmentation.update_map)
277 for (i = 0; i < 3; i++)
278 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
279 }
280
update_lf_deltas(VP8Context * s)281 static void update_lf_deltas(VP8Context *s)
282 {
283 VP56RangeCoder *c = &s->c;
284 int i;
285
286 for (i = 0; i < 4; i++) {
287 if (vp8_rac_get(c)) {
288 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
289
290 if (vp8_rac_get(c))
291 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
292 }
293 }
294
295 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
296 if (vp8_rac_get(c)) {
297 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
298
299 if (vp8_rac_get(c))
300 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
301 }
302 }
303 }
304
setup_partitions(VP8Context * s,const uint8_t * buf,int buf_size)305 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
306 {
307 const uint8_t *sizes = buf;
308 int i;
309 int ret;
310
311 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
312
313 buf += 3 * (s->num_coeff_partitions - 1);
314 buf_size -= 3 * (s->num_coeff_partitions - 1);
315 if (buf_size < 0)
316 return -1;
317
318 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
319 int size = AV_RL24(sizes + 3 * i);
320 if (buf_size - size < 0)
321 return -1;
322 s->coeff_partition_size[i] = size;
323
324 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
325 if (ret < 0)
326 return ret;
327 buf += size;
328 buf_size -= size;
329 }
330
331 s->coeff_partition_size[i] = buf_size;
332 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
333
334 return 0;
335 }
336
vp7_get_quants(VP8Context * s)337 static void vp7_get_quants(VP8Context *s)
338 {
339 VP56RangeCoder *c = &s->c;
340
341 int yac_qi = vp8_rac_get_uint(c, 7);
342 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
343 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
344 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
345 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
347
348 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
349 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
350 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
351 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
352 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
353 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
354 }
355
vp8_get_quants(VP8Context * s)356 static void vp8_get_quants(VP8Context *s)
357 {
358 VP56RangeCoder *c = &s->c;
359 int i, base_qi;
360
361 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
362 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
363 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
364 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
365 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
366 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
367
368 for (i = 0; i < 4; i++) {
369 if (s->segmentation.enabled) {
370 base_qi = s->segmentation.base_quant[i];
371 if (!s->segmentation.absolute_vals)
372 base_qi += s->quant.yac_qi;
373 } else
374 base_qi = s->quant.yac_qi;
375
376 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
377 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
378 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
379 /* 101581>>16 is equivalent to 155/100 */
380 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
381 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
382 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
383
384 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
385 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
386 }
387 }
388
389 /**
390 * Determine which buffers golden and altref should be updated with after this frame.
391 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
392 *
393 * Intra frames update all 3 references
394 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
395 * If the update (golden|altref) flag is set, it's updated with the current frame
396 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
397 * If the flag is not set, the number read means:
398 * 0: no update
399 * 1: VP56_FRAME_PREVIOUS
400 * 2: update golden with altref, or update altref with golden
401 */
ref_to_update(VP8Context * s,int update,VP56Frame ref)402 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
403 {
404 VP56RangeCoder *c = &s->c;
405
406 if (update)
407 return VP56_FRAME_CURRENT;
408
409 switch (vp8_rac_get_uint(c, 2)) {
410 case 1:
411 return VP56_FRAME_PREVIOUS;
412 case 2:
413 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
414 }
415 return VP56_FRAME_NONE;
416 }
417
vp78_reset_probability_tables(VP8Context * s)418 static void vp78_reset_probability_tables(VP8Context *s)
419 {
420 int i, j;
421 for (i = 0; i < 4; i++)
422 for (j = 0; j < 16; j++)
423 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
424 sizeof(s->prob->token[i][j]));
425 }
426
vp78_update_probability_tables(VP8Context * s)427 static void vp78_update_probability_tables(VP8Context *s)
428 {
429 VP56RangeCoder *c = &s->c;
430 int i, j, k, l, m;
431
432 for (i = 0; i < 4; i++)
433 for (j = 0; j < 8; j++)
434 for (k = 0; k < 3; k++)
435 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
436 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
437 int prob = vp8_rac_get_uint(c, 8);
438 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
439 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
440 }
441 }
442
443 #define VP7_MVC_SIZE 17
444 #define VP8_MVC_SIZE 19
445
vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context * s,int mvc_size)446 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
447 int mvc_size)
448 {
449 VP56RangeCoder *c = &s->c;
450 int i, j;
451
452 if (vp8_rac_get(c))
453 for (i = 0; i < 4; i++)
454 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
455 if (vp8_rac_get(c))
456 for (i = 0; i < 3; i++)
457 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
458
459 // 17.2 MV probability update
460 for (i = 0; i < 2; i++)
461 for (j = 0; j < mvc_size; j++)
462 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
463 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
464 }
465
update_refs(VP8Context * s)466 static void update_refs(VP8Context *s)
467 {
468 VP56RangeCoder *c = &s->c;
469
470 int update_golden = vp8_rac_get(c);
471 int update_altref = vp8_rac_get(c);
472
473 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
474 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
475 }
476
copy_chroma(AVFrame * dst,AVFrame * src,int width,int height)477 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
478 {
479 int i, j;
480
481 for (j = 1; j < 3; j++) {
482 for (i = 0; i < height / 2; i++)
483 memcpy(dst->data[j] + i * dst->linesize[j],
484 src->data[j] + i * src->linesize[j], width / 2);
485 }
486 }
487
fade(uint8_t * dst,ptrdiff_t dst_linesize,const uint8_t * src,ptrdiff_t src_linesize,int width,int height,int alpha,int beta)488 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
489 const uint8_t *src, ptrdiff_t src_linesize,
490 int width, int height,
491 int alpha, int beta)
492 {
493 int i, j;
494 for (j = 0; j < height; j++) {
495 const uint8_t *src2 = src + j * src_linesize;
496 uint8_t *dst2 = dst + j * dst_linesize;
497 for (i = 0; i < width; i++) {
498 uint8_t y = src2[i];
499 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
500 }
501 }
502 }
503
vp7_fade_frame(VP8Context * s,int alpha,int beta)504 static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
505 {
506 int ret;
507
508 if (!s->keyframe && (alpha || beta)) {
509 int width = s->mb_width * 16;
510 int height = s->mb_height * 16;
511 AVFrame *src, *dst;
512
513 if (!s->framep[VP56_FRAME_PREVIOUS] ||
514 !s->framep[VP56_FRAME_GOLDEN]) {
515 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
516 return AVERROR_INVALIDDATA;
517 }
518
519 dst =
520 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
521
522 /* preserve the golden frame, write a new previous frame */
523 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
524 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
525 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
526 return ret;
527
528 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
529
530 copy_chroma(dst, src, width, height);
531 }
532
533 fade(dst->data[0], dst->linesize[0],
534 src->data[0], src->linesize[0],
535 width, height, alpha, beta);
536 }
537
538 return 0;
539 }
540
vp7_decode_frame_header(VP8Context * s,const uint8_t * buf,int buf_size)541 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
542 {
543 VP56RangeCoder *c = &s->c;
544 int part1_size, hscale, vscale, i, j, ret;
545 int width = s->avctx->width;
546 int height = s->avctx->height;
547 int alpha = 0;
548 int beta = 0;
549
550 if (buf_size < 4) {
551 return AVERROR_INVALIDDATA;
552 }
553
554 s->profile = (buf[0] >> 1) & 7;
555 if (s->profile > 1) {
556 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
557 return AVERROR_INVALIDDATA;
558 }
559
560 s->keyframe = !(buf[0] & 1);
561 s->invisible = 0;
562 part1_size = AV_RL24(buf) >> 4;
563
564 if (buf_size < 4 - s->profile + part1_size) {
565 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
566 return AVERROR_INVALIDDATA;
567 }
568
569 buf += 4 - s->profile;
570 buf_size -= 4 - s->profile;
571
572 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
573
574 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
575 if (ret < 0)
576 return ret;
577 buf += part1_size;
578 buf_size -= part1_size;
579
580 /* A. Dimension information (keyframes only) */
581 if (s->keyframe) {
582 width = vp8_rac_get_uint(c, 12);
583 height = vp8_rac_get_uint(c, 12);
584 hscale = vp8_rac_get_uint(c, 2);
585 vscale = vp8_rac_get_uint(c, 2);
586 if (hscale || vscale)
587 avpriv_request_sample(s->avctx, "Upscaling");
588
589 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
590 vp78_reset_probability_tables(s);
591 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
592 sizeof(s->prob->pred16x16));
593 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
594 sizeof(s->prob->pred8x8c));
595 for (i = 0; i < 2; i++)
596 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
597 sizeof(vp7_mv_default_prob[i]));
598 memset(&s->segmentation, 0, sizeof(s->segmentation));
599 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
600 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
601 }
602
603 if (s->keyframe || s->profile > 0)
604 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
605
606 /* B. Decoding information for all four macroblock-level features */
607 for (i = 0; i < 4; i++) {
608 s->feature_enabled[i] = vp8_rac_get(c);
609 if (s->feature_enabled[i]) {
610 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
611
612 for (j = 0; j < 3; j++)
613 s->feature_index_prob[i][j] =
614 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
615
616 if (vp7_feature_value_size[s->profile][i])
617 for (j = 0; j < 4; j++)
618 s->feature_value[i][j] =
619 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
620 }
621 }
622
623 s->segmentation.enabled = 0;
624 s->segmentation.update_map = 0;
625 s->lf_delta.enabled = 0;
626
627 s->num_coeff_partitions = 1;
628 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
629 if (ret < 0)
630 return ret;
631
632 if (!s->macroblocks_base || /* first frame */
633 width != s->avctx->width || height != s->avctx->height ||
634 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
635 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
636 return ret;
637 }
638
639 /* C. Dequantization indices */
640 vp7_get_quants(s);
641
642 /* D. Golden frame update flag (a Flag) for interframes only */
643 if (!s->keyframe) {
644 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
645 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
646 }
647
648 s->update_last = 1;
649 s->update_probabilities = 1;
650 s->fade_present = 1;
651
652 if (s->profile > 0) {
653 s->update_probabilities = vp8_rac_get(c);
654 if (!s->update_probabilities)
655 s->prob[1] = s->prob[0];
656
657 if (!s->keyframe)
658 s->fade_present = vp8_rac_get(c);
659 }
660
661 if (vpX_rac_is_end(c))
662 return AVERROR_INVALIDDATA;
663 /* E. Fading information for previous frame */
664 if (s->fade_present && vp8_rac_get(c)) {
665 alpha = (int8_t) vp8_rac_get_uint(c, 8);
666 beta = (int8_t) vp8_rac_get_uint(c, 8);
667 }
668
669 /* F. Loop filter type */
670 if (!s->profile)
671 s->filter.simple = vp8_rac_get(c);
672
673 /* G. DCT coefficient ordering specification */
674 if (vp8_rac_get(c))
675 for (i = 1; i < 16; i++)
676 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
677
678 /* H. Loop filter levels */
679 if (s->profile > 0)
680 s->filter.simple = vp8_rac_get(c);
681 s->filter.level = vp8_rac_get_uint(c, 6);
682 s->filter.sharpness = vp8_rac_get_uint(c, 3);
683
684 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
685 vp78_update_probability_tables(s);
686
687 s->mbskip_enabled = 0;
688
689 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
690 if (!s->keyframe) {
691 s->prob->intra = vp8_rac_get_uint(c, 8);
692 s->prob->last = vp8_rac_get_uint(c, 8);
693 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
694 }
695
696 if (vpX_rac_is_end(c))
697 return AVERROR_INVALIDDATA;
698
699 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
700 return ret;
701
702 return 0;
703 }
704
vp8_decode_frame_header(VP8Context * s,const uint8_t * buf,int buf_size)705 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
706 {
707 VP56RangeCoder *c = &s->c;
708 int header_size, hscale, vscale, ret;
709 int width = s->avctx->width;
710 int height = s->avctx->height;
711
712 if (buf_size < 3) {
713 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
714 return AVERROR_INVALIDDATA;
715 }
716
717 s->keyframe = !(buf[0] & 1);
718 s->profile = (buf[0]>>1) & 7;
719 s->invisible = !(buf[0] & 0x10);
720 header_size = AV_RL24(buf) >> 5;
721 buf += 3;
722 buf_size -= 3;
723
724 s->header_partition_size = header_size;
725
726 if (s->profile > 3)
727 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
728
729 if (!s->profile)
730 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
731 sizeof(s->put_pixels_tab));
732 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
733 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
734 sizeof(s->put_pixels_tab));
735
736 if (header_size > buf_size - 7 * s->keyframe) {
737 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
738 return AVERROR_INVALIDDATA;
739 }
740
741 if (s->keyframe) {
742 if (AV_RL24(buf) != 0x2a019d) {
743 av_log(s->avctx, AV_LOG_ERROR,
744 "Invalid start code 0x%x\n", AV_RL24(buf));
745 return AVERROR_INVALIDDATA;
746 }
747 width = AV_RL16(buf + 3) & 0x3fff;
748 height = AV_RL16(buf + 5) & 0x3fff;
749 hscale = buf[4] >> 6;
750 vscale = buf[6] >> 6;
751 buf += 7;
752 buf_size -= 7;
753
754 if (hscale || vscale)
755 avpriv_request_sample(s->avctx, "Upscaling");
756
757 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
758 vp78_reset_probability_tables(s);
759 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
760 sizeof(s->prob->pred16x16));
761 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
762 sizeof(s->prob->pred8x8c));
763 memcpy(s->prob->mvc, vp8_mv_default_prob,
764 sizeof(s->prob->mvc));
765 memset(&s->segmentation, 0, sizeof(s->segmentation));
766 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
767 }
768
769 ret = ff_vp56_init_range_decoder(c, buf, header_size);
770 if (ret < 0)
771 return ret;
772 buf += header_size;
773 buf_size -= header_size;
774
775 if (s->keyframe) {
776 s->colorspace = vp8_rac_get(c);
777 if (s->colorspace)
778 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
779 s->fullrange = vp8_rac_get(c);
780 }
781
782 if ((s->segmentation.enabled = vp8_rac_get(c)))
783 parse_segment_info(s);
784 else
785 s->segmentation.update_map = 0; // FIXME: move this to some init function?
786
787 s->filter.simple = vp8_rac_get(c);
788 s->filter.level = vp8_rac_get_uint(c, 6);
789 s->filter.sharpness = vp8_rac_get_uint(c, 3);
790
791 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
792 s->lf_delta.update = vp8_rac_get(c);
793 if (s->lf_delta.update)
794 update_lf_deltas(s);
795 }
796
797 if (setup_partitions(s, buf, buf_size)) {
798 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
799 return AVERROR_INVALIDDATA;
800 }
801
802 if (!s->macroblocks_base || /* first frame */
803 width != s->avctx->width || height != s->avctx->height ||
804 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
805 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
806 return ret;
807
808 vp8_get_quants(s);
809
810 if (!s->keyframe) {
811 update_refs(s);
812 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
813 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
814 }
815
816 // if we aren't saving this frame's probabilities for future frames,
817 // make a copy of the current probabilities
818 if (!(s->update_probabilities = vp8_rac_get(c)))
819 s->prob[1] = s->prob[0];
820
821 s->update_last = s->keyframe || vp8_rac_get(c);
822
823 vp78_update_probability_tables(s);
824
825 if ((s->mbskip_enabled = vp8_rac_get(c)))
826 s->prob->mbskip = vp8_rac_get_uint(c, 8);
827
828 if (!s->keyframe) {
829 s->prob->intra = vp8_rac_get_uint(c, 8);
830 s->prob->last = vp8_rac_get_uint(c, 8);
831 s->prob->golden = vp8_rac_get_uint(c, 8);
832 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
833 }
834
835 // Record the entropy coder state here so that hwaccels can use it.
836 s->c.code_word = vp56_rac_renorm(&s->c);
837 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
838 s->coder_state_at_header_end.range = s->c.high;
839 s->coder_state_at_header_end.value = s->c.code_word >> 16;
840 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
841
842 return 0;
843 }
844
845 static av_always_inline
clamp_mv(VP8mvbounds * s,VP56mv * dst,const VP56mv * src)846 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
847 {
848 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
849 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
850 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
851 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
852 }
853
854 /**
855 * Motion vector coding, 17.1.
856 */
read_mv_component(VP56RangeCoder * c,const uint8_t * p,int vp7)857 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
858 {
859 int bit, x = 0;
860
861 if (vp56_rac_get_prob_branchy(c, p[0])) {
862 int i;
863
864 for (i = 0; i < 3; i++)
865 x += vp56_rac_get_prob(c, p[9 + i]) << i;
866 for (i = (vp7 ? 7 : 9); i > 3; i--)
867 x += vp56_rac_get_prob(c, p[9 + i]) << i;
868 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
869 x += 8;
870 } else {
871 // small_mvtree
872 const uint8_t *ps = p + 2;
873 bit = vp56_rac_get_prob(c, *ps);
874 ps += 1 + 3 * bit;
875 x += 4 * bit;
876 bit = vp56_rac_get_prob(c, *ps);
877 ps += 1 + bit;
878 x += 2 * bit;
879 x += vp56_rac_get_prob(c, *ps);
880 }
881
882 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
883 }
884
vp7_read_mv_component(VP56RangeCoder * c,const uint8_t * p)885 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
886 {
887 return read_mv_component(c, p, 1);
888 }
889
vp8_read_mv_component(VP56RangeCoder * c,const uint8_t * p)890 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
891 {
892 return read_mv_component(c, p, 0);
893 }
894
895 static av_always_inline
get_submv_prob(uint32_t left,uint32_t top,int is_vp7)896 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
897 {
898 if (is_vp7)
899 return vp7_submv_prob;
900
901 if (left == top)
902 return vp8_submv_prob[4 - !!left];
903 if (!top)
904 return vp8_submv_prob[2];
905 return vp8_submv_prob[1 - !!left];
906 }
907
908 /**
909 * Split motion vector prediction, 16.4.
910 * @returns the number of motion vectors parsed (2, 4 or 16)
911 */
912 static av_always_inline
decode_splitmvs(VP8Context * s,VP56RangeCoder * c,VP8Macroblock * mb,int layout,int is_vp7)913 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
914 int layout, int is_vp7)
915 {
916 int part_idx;
917 int n, num;
918 VP8Macroblock *top_mb;
919 VP8Macroblock *left_mb = &mb[-1];
920 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
921 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
922 VP56mv *top_mv;
923 VP56mv *left_mv = left_mb->bmv;
924 VP56mv *cur_mv = mb->bmv;
925
926 if (!layout) // layout is inlined, s->mb_layout is not
927 top_mb = &mb[2];
928 else
929 top_mb = &mb[-s->mb_width - 1];
930 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
931 top_mv = top_mb->bmv;
932
933 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
934 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
935 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
936 else
937 part_idx = VP8_SPLITMVMODE_8x8;
938 } else {
939 part_idx = VP8_SPLITMVMODE_4x4;
940 }
941
942 num = vp8_mbsplit_count[part_idx];
943 mbsplits_cur = vp8_mbsplits[part_idx],
944 firstidx = vp8_mbfirstidx[part_idx];
945 mb->partitioning = part_idx;
946
947 for (n = 0; n < num; n++) {
948 int k = firstidx[n];
949 uint32_t left, above;
950 const uint8_t *submv_prob;
951
952 if (!(k & 3))
953 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
954 else
955 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
956 if (k <= 3)
957 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
958 else
959 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
960
961 submv_prob = get_submv_prob(left, above, is_vp7);
962
963 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
964 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
965 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
966 mb->bmv[n].y = mb->mv.y +
967 read_mv_component(c, s->prob->mvc[0], is_vp7);
968 mb->bmv[n].x = mb->mv.x +
969 read_mv_component(c, s->prob->mvc[1], is_vp7);
970 } else {
971 AV_ZERO32(&mb->bmv[n]);
972 }
973 } else {
974 AV_WN32A(&mb->bmv[n], above);
975 }
976 } else {
977 AV_WN32A(&mb->bmv[n], left);
978 }
979 }
980
981 return num;
982 }
983
984 /**
985 * The vp7 reference decoder uses a padding macroblock column (added to right
986 * edge of the frame) to guard against illegal macroblock offsets. The
987 * algorithm has bugs that permit offsets to straddle the padding column.
988 * This function replicates those bugs.
989 *
990 * @param[out] edge_x macroblock x address
991 * @param[out] edge_y macroblock y address
992 *
993 * @return macroblock offset legal (boolean)
994 */
vp7_calculate_mb_offset(int mb_x,int mb_y,int mb_width,int xoffset,int yoffset,int boundary,int * edge_x,int * edge_y)995 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
996 int xoffset, int yoffset, int boundary,
997 int *edge_x, int *edge_y)
998 {
999 int vwidth = mb_width + 1;
1000 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1001 if (new < boundary || new % vwidth == vwidth - 1)
1002 return 0;
1003 *edge_y = new / vwidth;
1004 *edge_x = new % vwidth;
1005 return 1;
1006 }
1007
get_bmv_ptr(const VP8Macroblock * mb,int subblock)1008 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1009 {
1010 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1011 }
1012
1013 static av_always_inline
vp7_decode_mvs(VP8Context * s,VP8Macroblock * mb,int mb_x,int mb_y,int layout)1014 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1015 int mb_x, int mb_y, int layout)
1016 {
1017 VP8Macroblock *mb_edge[12];
1018 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1019 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1020 int idx = CNT_ZERO;
1021 VP56mv near_mv[3];
1022 uint8_t cnt[3] = { 0 };
1023 VP56RangeCoder *c = &s->c;
1024 int i;
1025
1026 AV_ZERO32(&near_mv[0]);
1027 AV_ZERO32(&near_mv[1]);
1028 AV_ZERO32(&near_mv[2]);
1029
1030 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1031 const VP7MVPred * pred = &vp7_mv_pred[i];
1032 int edge_x, edge_y;
1033
1034 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1035 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1036 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1037 ? s->macroblocks_base + 1 + edge_x +
1038 (s->mb_width + 1) * (edge_y + 1)
1039 : s->macroblocks + edge_x +
1040 (s->mb_height - edge_y - 1) * 2;
1041 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1042 if (mv) {
1043 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1044 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1045 idx = CNT_NEAREST;
1046 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1047 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1048 continue;
1049 idx = CNT_NEAR;
1050 } else {
1051 AV_WN32A(&near_mv[CNT_NEAR], mv);
1052 idx = CNT_NEAR;
1053 }
1054 } else {
1055 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1056 idx = CNT_NEAREST;
1057 }
1058 } else {
1059 idx = CNT_ZERO;
1060 }
1061 } else {
1062 idx = CNT_ZERO;
1063 }
1064 cnt[idx] += vp7_mv_pred[i].score;
1065 }
1066
1067 mb->partitioning = VP8_SPLITMVMODE_NONE;
1068
1069 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1070 mb->mode = VP8_MVMODE_MV;
1071
1072 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1073
1074 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1075
1076 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1077 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1078 else
1079 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1080
1081 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1082 mb->mode = VP8_MVMODE_SPLIT;
1083 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1084 } else {
1085 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1086 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1087 mb->bmv[0] = mb->mv;
1088 }
1089 } else {
1090 mb->mv = near_mv[CNT_NEAR];
1091 mb->bmv[0] = mb->mv;
1092 }
1093 } else {
1094 mb->mv = near_mv[CNT_NEAREST];
1095 mb->bmv[0] = mb->mv;
1096 }
1097 } else {
1098 mb->mode = VP8_MVMODE_ZERO;
1099 AV_ZERO32(&mb->mv);
1100 mb->bmv[0] = mb->mv;
1101 }
1102 }
1103
1104 static av_always_inline
vp8_decode_mvs(VP8Context * s,VP8mvbounds * mv_bounds,VP8Macroblock * mb,int mb_x,int mb_y,int layout)1105 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1106 int mb_x, int mb_y, int layout)
1107 {
1108 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1109 mb - 1 /* left */,
1110 0 /* top-left */ };
1111 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1112 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1113 int idx = CNT_ZERO;
1114 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1115 int8_t *sign_bias = s->sign_bias;
1116 VP56mv near_mv[4];
1117 uint8_t cnt[4] = { 0 };
1118 VP56RangeCoder *c = &s->c;
1119
1120 if (!layout) { // layout is inlined (s->mb_layout is not)
1121 mb_edge[0] = mb + 2;
1122 mb_edge[2] = mb + 1;
1123 } else {
1124 mb_edge[0] = mb - s->mb_width - 1;
1125 mb_edge[2] = mb - s->mb_width - 2;
1126 }
1127
1128 AV_ZERO32(&near_mv[0]);
1129 AV_ZERO32(&near_mv[1]);
1130 AV_ZERO32(&near_mv[2]);
1131
1132 /* Process MB on top, left and top-left */
1133 #define MV_EDGE_CHECK(n) \
1134 { \
1135 VP8Macroblock *edge = mb_edge[n]; \
1136 int edge_ref = edge->ref_frame; \
1137 if (edge_ref != VP56_FRAME_CURRENT) { \
1138 uint32_t mv = AV_RN32A(&edge->mv); \
1139 if (mv) { \
1140 if (cur_sign_bias != sign_bias[edge_ref]) { \
1141 /* SWAR negate of the values in mv. */ \
1142 mv = ~mv; \
1143 mv = ((mv & 0x7fff7fff) + \
1144 0x00010001) ^ (mv & 0x80008000); \
1145 } \
1146 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1147 AV_WN32A(&near_mv[++idx], mv); \
1148 cnt[idx] += 1 + (n != 2); \
1149 } else \
1150 cnt[CNT_ZERO] += 1 + (n != 2); \
1151 } \
1152 }
1153
1154 MV_EDGE_CHECK(0)
1155 MV_EDGE_CHECK(1)
1156 MV_EDGE_CHECK(2)
1157
1158 mb->partitioning = VP8_SPLITMVMODE_NONE;
1159 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1160 mb->mode = VP8_MVMODE_MV;
1161
1162 /* If we have three distinct MVs, merge first and last if they're the same */
1163 if (cnt[CNT_SPLITMV] &&
1164 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1165 cnt[CNT_NEAREST] += 1;
1166
1167 /* Swap near and nearest if necessary */
1168 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1169 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1170 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1171 }
1172
1173 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1174 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1175 /* Choose the best mv out of 0,0 and the nearest mv */
1176 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1177 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1178 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1179 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1180
1181 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1182 mb->mode = VP8_MVMODE_SPLIT;
1183 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1184 } else {
1185 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1186 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1187 mb->bmv[0] = mb->mv;
1188 }
1189 } else {
1190 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1191 mb->bmv[0] = mb->mv;
1192 }
1193 } else {
1194 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1195 mb->bmv[0] = mb->mv;
1196 }
1197 } else {
1198 mb->mode = VP8_MVMODE_ZERO;
1199 AV_ZERO32(&mb->mv);
1200 mb->bmv[0] = mb->mv;
1201 }
1202 }
1203
1204 static av_always_inline
decode_intra4x4_modes(VP8Context * s,VP56RangeCoder * c,VP8Macroblock * mb,int mb_x,int keyframe,int layout)1205 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1206 int mb_x, int keyframe, int layout)
1207 {
1208 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1209
1210 if (layout) {
1211 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1212 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1213 }
1214 if (keyframe) {
1215 int x, y;
1216 uint8_t *top;
1217 uint8_t *const left = s->intra4x4_pred_mode_left;
1218 if (layout)
1219 top = mb->intra4x4_pred_mode_top;
1220 else
1221 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1222 for (y = 0; y < 4; y++) {
1223 for (x = 0; x < 4; x++) {
1224 const uint8_t *ctx;
1225 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1226 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1227 left[y] = top[x] = *intra4x4;
1228 intra4x4++;
1229 }
1230 }
1231 } else {
1232 int i;
1233 for (i = 0; i < 16; i++)
1234 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1235 vp8_pred4x4_prob_inter);
1236 }
1237 }
1238
1239 static av_always_inline
decode_mb_mode(VP8Context * s,VP8mvbounds * mv_bounds,VP8Macroblock * mb,int mb_x,int mb_y,uint8_t * segment,uint8_t * ref,int layout,int is_vp7)1240 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1241 VP8Macroblock *mb, int mb_x, int mb_y,
1242 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1243 {
1244 VP56RangeCoder *c = &s->c;
1245 static const char * const vp7_feature_name[] = { "q-index",
1246 "lf-delta",
1247 "partial-golden-update",
1248 "blit-pitch" };
1249 if (is_vp7) {
1250 int i;
1251 *segment = 0;
1252 for (i = 0; i < 4; i++) {
1253 if (s->feature_enabled[i]) {
1254 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1255 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1256 s->feature_index_prob[i]);
1257 av_log(s->avctx, AV_LOG_WARNING,
1258 "Feature %s present in macroblock (value 0x%x)\n",
1259 vp7_feature_name[i], s->feature_value[i][index]);
1260 }
1261 }
1262 }
1263 } else if (s->segmentation.update_map) {
1264 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1265 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1266 } else if (s->segmentation.enabled)
1267 *segment = ref ? *ref : *segment;
1268 mb->segment = *segment;
1269
1270 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1271
1272 if (s->keyframe) {
1273 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1274 vp8_pred16x16_prob_intra);
1275
1276 if (mb->mode == MODE_I4x4) {
1277 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1278 } else {
1279 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1280 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1281 if (s->mb_layout)
1282 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1283 else
1284 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1285 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1286 }
1287
1288 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1289 vp8_pred8x8c_prob_intra);
1290 mb->ref_frame = VP56_FRAME_CURRENT;
1291 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1292 // inter MB, 16.2
1293 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1294 mb->ref_frame =
1295 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1296 : VP56_FRAME_GOLDEN;
1297 else
1298 mb->ref_frame = VP56_FRAME_PREVIOUS;
1299 s->ref_count[mb->ref_frame - 1]++;
1300
1301 // motion vectors, 16.3
1302 if (is_vp7)
1303 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1304 else
1305 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1306 } else {
1307 // intra MB, 16.1
1308 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1309
1310 if (mb->mode == MODE_I4x4)
1311 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1312
1313 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1314 s->prob->pred8x8c);
1315 mb->ref_frame = VP56_FRAME_CURRENT;
1316 mb->partitioning = VP8_SPLITMVMODE_NONE;
1317 AV_ZERO32(&mb->bmv[0]);
1318 }
1319 }
1320
1321 /**
1322 * @param r arithmetic bitstream reader context
1323 * @param block destination for block coefficients
1324 * @param probs probabilities to use when reading trees from the bitstream
1325 * @param i initial coeff index, 0 unless a separate DC block is coded
1326 * @param qmul array holding the dc/ac dequant factor at position 0/1
1327 *
1328 * @return 0 if no coeffs were decoded
1329 * otherwise, the index of the last coeff decoded plus one
1330 */
1331 static av_always_inline
decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2],const uint8_t scan[16],int vp7)1332 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1333 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1334 int i, uint8_t *token_prob, int16_t qmul[2],
1335 const uint8_t scan[16], int vp7)
1336 {
1337 VP56RangeCoder c = *r;
1338 goto skip_eob;
1339 do {
1340 int coeff;
1341 restart:
1342 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1343 break;
1344
1345 skip_eob:
1346 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1347 if (++i == 16)
1348 break; // invalid input; blocks should end with EOB
1349 token_prob = probs[i][0];
1350 if (vp7)
1351 goto restart;
1352 goto skip_eob;
1353 }
1354
1355 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1356 coeff = 1;
1357 token_prob = probs[i + 1][1];
1358 } else {
1359 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1360 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1361 if (coeff)
1362 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1363 coeff += 2;
1364 } else {
1365 // DCT_CAT*
1366 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1367 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1368 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1369 } else { // DCT_CAT2
1370 coeff = 7;
1371 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1372 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1373 }
1374 } else { // DCT_CAT3 and up
1375 int a = vp56_rac_get_prob(&c, token_prob[8]);
1376 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1377 int cat = (a << 1) + b;
1378 coeff = 3 + (8 << cat);
1379 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1380 }
1381 }
1382 token_prob = probs[i + 1][2];
1383 }
1384 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1385 } while (++i < 16);
1386
1387 *r = c;
1388 return i;
1389 }
1390
1391 static av_always_inline
inter_predict_dc(int16_t block[16],int16_t pred[2])1392 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1393 {
1394 int16_t dc = block[0];
1395 int ret = 0;
1396
1397 if (pred[1] > 3) {
1398 dc += pred[0];
1399 ret = 1;
1400 }
1401
1402 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1403 block[0] = pred[0] = dc;
1404 pred[1] = 0;
1405 } else {
1406 if (pred[0] == dc)
1407 pred[1]++;
1408 block[0] = pred[0] = dc;
1409 }
1410
1411 return ret;
1412 }
1413
vp7_decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2],const uint8_t scan[16])1414 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1415 int16_t block[16],
1416 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1417 int i, uint8_t *token_prob,
1418 int16_t qmul[2],
1419 const uint8_t scan[16])
1420 {
1421 return decode_block_coeffs_internal(r, block, probs, i,
1422 token_prob, qmul, scan, IS_VP7);
1423 }
1424
1425 #ifndef vp8_decode_block_coeffs_internal
vp8_decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2])1426 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1427 int16_t block[16],
1428 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1429 int i, uint8_t *token_prob,
1430 int16_t qmul[2])
1431 {
1432 return decode_block_coeffs_internal(r, block, probs, i,
1433 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1434 }
1435 #endif
1436
1437 /**
1438 * @param c arithmetic bitstream reader context
1439 * @param block destination for block coefficients
1440 * @param probs probabilities to use when reading trees from the bitstream
1441 * @param i initial coeff index, 0 unless a separate DC block is coded
1442 * @param zero_nhood the initial prediction context for number of surrounding
1443 * all-zero blocks (only left/top, so 0-2)
1444 * @param qmul array holding the dc/ac dequant factor at position 0/1
1445 * @param scan scan pattern (VP7 only)
1446 *
1447 * @return 0 if no coeffs were decoded
1448 * otherwise, the index of the last coeff decoded plus one
1449 */
1450 static av_always_inline
decode_block_coeffs(VP56RangeCoder * c,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,int zero_nhood,int16_t qmul[2],const uint8_t scan[16],int vp7)1451 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1452 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1453 int i, int zero_nhood, int16_t qmul[2],
1454 const uint8_t scan[16], int vp7)
1455 {
1456 uint8_t *token_prob = probs[i][zero_nhood];
1457 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1458 return 0;
1459 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1460 token_prob, qmul, scan)
1461 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1462 token_prob, qmul);
1463 }
1464
1465 static av_always_inline
decode_mb_coeffs(VP8Context * s,VP8ThreadData * td,VP56RangeCoder * c,VP8Macroblock * mb,uint8_t t_nnz[9],uint8_t l_nnz[9],int is_vp7)1466 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1467 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1468 int is_vp7)
1469 {
1470 int i, x, y, luma_start = 0, luma_ctx = 3;
1471 int nnz_pred, nnz, nnz_total = 0;
1472 int segment = mb->segment;
1473 int block_dc = 0;
1474
1475 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1476 nnz_pred = t_nnz[8] + l_nnz[8];
1477
1478 // decode DC values and do hadamard
1479 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1480 nnz_pred, s->qmat[segment].luma_dc_qmul,
1481 ff_zigzag_scan, is_vp7);
1482 l_nnz[8] = t_nnz[8] = !!nnz;
1483
1484 if (is_vp7 && mb->mode > MODE_I4x4) {
1485 nnz |= inter_predict_dc(td->block_dc,
1486 s->inter_dc_pred[mb->ref_frame - 1]);
1487 }
1488
1489 if (nnz) {
1490 nnz_total += nnz;
1491 block_dc = 1;
1492 if (nnz == 1)
1493 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1494 else
1495 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1496 }
1497 luma_start = 1;
1498 luma_ctx = 0;
1499 }
1500
1501 // luma blocks
1502 for (y = 0; y < 4; y++)
1503 for (x = 0; x < 4; x++) {
1504 nnz_pred = l_nnz[y] + t_nnz[x];
1505 nnz = decode_block_coeffs(c, td->block[y][x],
1506 s->prob->token[luma_ctx],
1507 luma_start, nnz_pred,
1508 s->qmat[segment].luma_qmul,
1509 s->prob[0].scan, is_vp7);
1510 /* nnz+block_dc may be one more than the actual last index,
1511 * but we don't care */
1512 td->non_zero_count_cache[y][x] = nnz + block_dc;
1513 t_nnz[x] = l_nnz[y] = !!nnz;
1514 nnz_total += nnz;
1515 }
1516
1517 // chroma blocks
1518 // TODO: what to do about dimensions? 2nd dim for luma is x,
1519 // but for chroma it's (y<<1)|x
1520 for (i = 4; i < 6; i++)
1521 for (y = 0; y < 2; y++)
1522 for (x = 0; x < 2; x++) {
1523 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1524 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1525 s->prob->token[2], 0, nnz_pred,
1526 s->qmat[segment].chroma_qmul,
1527 s->prob[0].scan, is_vp7);
1528 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1529 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1530 nnz_total += nnz;
1531 }
1532
1533 // if there were no coded coeffs despite the macroblock not being marked skip,
1534 // we MUST not do the inner loop filter and should not do IDCT
1535 // Since skip isn't used for bitstream prediction, just manually set it.
1536 if (!nnz_total)
1537 mb->skip = 1;
1538 }
1539
1540 static av_always_inline
backup_mb_border(uint8_t * top_border,uint8_t * src_y,uint8_t * src_cb,uint8_t * src_cr,ptrdiff_t linesize,ptrdiff_t uvlinesize,int simple)1541 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1542 uint8_t *src_cb, uint8_t *src_cr,
1543 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1544 {
1545 AV_COPY128(top_border, src_y + 15 * linesize);
1546 if (!simple) {
1547 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1548 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1549 }
1550 }
1551
1552 static av_always_inline
xchg_mb_border(uint8_t * top_border,uint8_t * src_y,uint8_t * src_cb,uint8_t * src_cr,ptrdiff_t linesize,ptrdiff_t uvlinesize,int mb_x,int mb_y,int mb_width,int simple,int xchg)1553 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1554 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1555 int mb_y, int mb_width, int simple, int xchg)
1556 {
1557 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1558 src_y -= linesize;
1559 src_cb -= uvlinesize;
1560 src_cr -= uvlinesize;
1561
1562 #define XCHG(a, b, xchg) \
1563 do { \
1564 if (xchg) \
1565 AV_SWAP64(b, a); \
1566 else \
1567 AV_COPY64(b, a); \
1568 } while (0)
1569
1570 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1571 XCHG(top_border, src_y, xchg);
1572 XCHG(top_border + 8, src_y + 8, 1);
1573 if (mb_x < mb_width - 1)
1574 XCHG(top_border + 32, src_y + 16, 1);
1575
1576 // only copy chroma for normal loop filter
1577 // or to initialize the top row to 127
1578 if (!simple || !mb_y) {
1579 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1580 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1581 XCHG(top_border + 16, src_cb, 1);
1582 XCHG(top_border + 24, src_cr, 1);
1583 }
1584 }
1585
1586 static av_always_inline
check_dc_pred8x8_mode(int mode,int mb_x,int mb_y)1587 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1588 {
1589 if (!mb_x)
1590 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1591 else
1592 return mb_y ? mode : LEFT_DC_PRED8x8;
1593 }
1594
1595 static av_always_inline
check_tm_pred8x8_mode(int mode,int mb_x,int mb_y,int vp7)1596 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1597 {
1598 if (!mb_x)
1599 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1600 else
1601 return mb_y ? mode : HOR_PRED8x8;
1602 }
1603
1604 static av_always_inline
check_intra_pred8x8_mode_emuedge(int mode,int mb_x,int mb_y,int vp7)1605 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1606 {
1607 switch (mode) {
1608 case DC_PRED8x8:
1609 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1610 case VERT_PRED8x8:
1611 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1612 case HOR_PRED8x8:
1613 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1614 case PLANE_PRED8x8: /* TM */
1615 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1616 }
1617 return mode;
1618 }
1619
1620 static av_always_inline
check_tm_pred4x4_mode(int mode,int mb_x,int mb_y,int vp7)1621 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1622 {
1623 if (!mb_x) {
1624 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1625 } else {
1626 return mb_y ? mode : HOR_VP8_PRED;
1627 }
1628 }
1629
1630 static av_always_inline
check_intra_pred4x4_mode_emuedge(int mode,int mb_x,int mb_y,int * copy_buf,int vp7)1631 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1632 int *copy_buf, int vp7)
1633 {
1634 switch (mode) {
1635 case VERT_PRED:
1636 if (!mb_x && mb_y) {
1637 *copy_buf = 1;
1638 return mode;
1639 }
1640 /* fall-through */
1641 case DIAG_DOWN_LEFT_PRED:
1642 case VERT_LEFT_PRED:
1643 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1644 case HOR_PRED:
1645 if (!mb_y) {
1646 *copy_buf = 1;
1647 return mode;
1648 }
1649 /* fall-through */
1650 case HOR_UP_PRED:
1651 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1652 case TM_VP8_PRED:
1653 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1654 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1655 * as 16x16/8x8 DC */
1656 case DIAG_DOWN_RIGHT_PRED:
1657 case VERT_RIGHT_PRED:
1658 case HOR_DOWN_PRED:
1659 if (!mb_y || !mb_x)
1660 *copy_buf = 1;
1661 return mode;
1662 }
1663 return mode;
1664 }
1665
1666 static av_always_inline
intra_predict(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb,int mb_x,int mb_y,int is_vp7)1667 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1668 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1669 {
1670 int x, y, mode, nnz;
1671 uint32_t tr;
1672
1673 /* for the first row, we need to run xchg_mb_border to init the top edge
1674 * to 127 otherwise, skip it if we aren't going to deblock */
1675 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1676 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1677 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1678 s->filter.simple, 1);
1679
1680 if (mb->mode < MODE_I4x4) {
1681 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1682 s->hpc.pred16x16[mode](dst[0], s->linesize);
1683 } else {
1684 uint8_t *ptr = dst[0];
1685 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1686 const uint8_t lo = is_vp7 ? 128 : 127;
1687 const uint8_t hi = is_vp7 ? 128 : 129;
1688 uint8_t tr_top[4] = { lo, lo, lo, lo };
1689
1690 // all blocks on the right edge of the macroblock use bottom edge
1691 // the top macroblock for their topright edge
1692 uint8_t *tr_right = ptr - s->linesize + 16;
1693
1694 // if we're on the right edge of the frame, said edge is extended
1695 // from the top macroblock
1696 if (mb_y && mb_x == s->mb_width - 1) {
1697 tr = tr_right[-1] * 0x01010101u;
1698 tr_right = (uint8_t *) &tr;
1699 }
1700
1701 if (mb->skip)
1702 AV_ZERO128(td->non_zero_count_cache);
1703
1704 for (y = 0; y < 4; y++) {
1705 uint8_t *topright = ptr + 4 - s->linesize;
1706 for (x = 0; x < 4; x++) {
1707 int copy = 0;
1708 ptrdiff_t linesize = s->linesize;
1709 uint8_t *dst = ptr + 4 * x;
1710 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1711
1712 if ((y == 0 || x == 3) && mb_y == 0) {
1713 topright = tr_top;
1714 } else if (x == 3)
1715 topright = tr_right;
1716
1717 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1718 mb_y + y, ©, is_vp7);
1719 if (copy) {
1720 dst = copy_dst + 12;
1721 linesize = 8;
1722 if (!(mb_y + y)) {
1723 copy_dst[3] = lo;
1724 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1725 } else {
1726 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1727 if (!(mb_x + x)) {
1728 copy_dst[3] = hi;
1729 } else {
1730 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1731 }
1732 }
1733 if (!(mb_x + x)) {
1734 copy_dst[11] =
1735 copy_dst[19] =
1736 copy_dst[27] =
1737 copy_dst[35] = hi;
1738 } else {
1739 copy_dst[11] = ptr[4 * x - 1];
1740 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1741 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1742 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1743 }
1744 }
1745 s->hpc.pred4x4[mode](dst, topright, linesize);
1746 if (copy) {
1747 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1748 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1749 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1750 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1751 }
1752
1753 nnz = td->non_zero_count_cache[y][x];
1754 if (nnz) {
1755 if (nnz == 1)
1756 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1757 td->block[y][x], s->linesize);
1758 else
1759 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1760 td->block[y][x], s->linesize);
1761 }
1762 topright += 4;
1763 }
1764
1765 ptr += 4 * s->linesize;
1766 intra4x4 += 4;
1767 }
1768 }
1769
1770 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1771 mb_x, mb_y, is_vp7);
1772 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1773 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1774
1775 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1776 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1777 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1778 s->filter.simple, 0);
1779 }
1780
1781 static const uint8_t subpel_idx[3][8] = {
1782 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1783 // also function pointer index
1784 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1785 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1786 };
1787
1788 /**
1789 * luma MC function
1790 *
1791 * @param s VP8 decoding context
1792 * @param dst target buffer for block data at block position
1793 * @param ref reference picture buffer at origin (0, 0)
1794 * @param mv motion vector (relative to block position) to get pixel data from
1795 * @param x_off horizontal position of block from origin (0, 0)
1796 * @param y_off vertical position of block from origin (0, 0)
1797 * @param block_w width of block (16, 8 or 4)
1798 * @param block_h height of block (always same as block_w)
1799 * @param width width of src/dst plane data
1800 * @param height height of src/dst plane data
1801 * @param linesize size of a single line of plane data, including padding
1802 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1803 */
1804 static av_always_inline
vp8_mc_luma(VP8Context * s,VP8ThreadData * td,uint8_t * dst,ThreadFrame * ref,const VP56mv * mv,int x_off,int y_off,int block_w,int block_h,int width,int height,ptrdiff_t linesize,vp8_mc_func mc_func[3][3])1805 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1806 ThreadFrame *ref, const VP56mv *mv,
1807 int x_off, int y_off, int block_w, int block_h,
1808 int width, int height, ptrdiff_t linesize,
1809 vp8_mc_func mc_func[3][3])
1810 {
1811 uint8_t *src = ref->f->data[0];
1812
1813 if (AV_RN32A(mv)) {
1814 ptrdiff_t src_linesize = linesize;
1815
1816 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1817 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1818
1819 x_off += mv->x >> 2;
1820 y_off += mv->y >> 2;
1821
1822 // edge emulation
1823 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1824 src += y_off * linesize + x_off;
1825 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1826 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1827 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1828 src - my_idx * linesize - mx_idx,
1829 EDGE_EMU_LINESIZE, linesize,
1830 block_w + subpel_idx[1][mx],
1831 block_h + subpel_idx[1][my],
1832 x_off - mx_idx, y_off - my_idx,
1833 width, height);
1834 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1835 src_linesize = EDGE_EMU_LINESIZE;
1836 }
1837 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1838 } else {
1839 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1840 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1841 linesize, block_h, 0, 0);
1842 }
1843 }
1844
1845 /**
1846 * chroma MC function
1847 *
1848 * @param s VP8 decoding context
1849 * @param dst1 target buffer for block data at block position (U plane)
1850 * @param dst2 target buffer for block data at block position (V plane)
1851 * @param ref reference picture buffer at origin (0, 0)
1852 * @param mv motion vector (relative to block position) to get pixel data from
1853 * @param x_off horizontal position of block from origin (0, 0)
1854 * @param y_off vertical position of block from origin (0, 0)
1855 * @param block_w width of block (16, 8 or 4)
1856 * @param block_h height of block (always same as block_w)
1857 * @param width width of src/dst plane data
1858 * @param height height of src/dst plane data
1859 * @param linesize size of a single line of plane data, including padding
1860 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1861 */
1862 static av_always_inline
vp8_mc_chroma(VP8Context * s,VP8ThreadData * td,uint8_t * dst1,uint8_t * dst2,ThreadFrame * ref,const VP56mv * mv,int x_off,int y_off,int block_w,int block_h,int width,int height,ptrdiff_t linesize,vp8_mc_func mc_func[3][3])1863 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1864 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1865 int x_off, int y_off, int block_w, int block_h,
1866 int width, int height, ptrdiff_t linesize,
1867 vp8_mc_func mc_func[3][3])
1868 {
1869 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1870
1871 if (AV_RN32A(mv)) {
1872 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1873 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1874
1875 x_off += mv->x >> 3;
1876 y_off += mv->y >> 3;
1877
1878 // edge emulation
1879 src1 += y_off * linesize + x_off;
1880 src2 += y_off * linesize + x_off;
1881 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1882 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1883 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1884 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1885 src1 - my_idx * linesize - mx_idx,
1886 EDGE_EMU_LINESIZE, linesize,
1887 block_w + subpel_idx[1][mx],
1888 block_h + subpel_idx[1][my],
1889 x_off - mx_idx, y_off - my_idx, width, height);
1890 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1891 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1892
1893 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1894 src2 - my_idx * linesize - mx_idx,
1895 EDGE_EMU_LINESIZE, linesize,
1896 block_w + subpel_idx[1][mx],
1897 block_h + subpel_idx[1][my],
1898 x_off - mx_idx, y_off - my_idx, width, height);
1899 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1900 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1901 } else {
1902 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1903 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1904 }
1905 } else {
1906 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1907 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1908 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1909 }
1910 }
1911
1912 static av_always_inline
vp8_mc_part(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],ThreadFrame * ref_frame,int x_off,int y_off,int bx_off,int by_off,int block_w,int block_h,int width,int height,VP56mv * mv)1913 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1914 ThreadFrame *ref_frame, int x_off, int y_off,
1915 int bx_off, int by_off, int block_w, int block_h,
1916 int width, int height, VP56mv *mv)
1917 {
1918 VP56mv uvmv = *mv;
1919
1920 /* Y */
1921 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1922 ref_frame, mv, x_off + bx_off, y_off + by_off,
1923 block_w, block_h, width, height, s->linesize,
1924 s->put_pixels_tab[block_w == 8]);
1925
1926 /* U/V */
1927 if (s->profile == 3) {
1928 /* this block only applies VP8; it is safe to check
1929 * only the profile, as VP7 profile <= 1 */
1930 uvmv.x &= ~7;
1931 uvmv.y &= ~7;
1932 }
1933 x_off >>= 1;
1934 y_off >>= 1;
1935 bx_off >>= 1;
1936 by_off >>= 1;
1937 width >>= 1;
1938 height >>= 1;
1939 block_w >>= 1;
1940 block_h >>= 1;
1941 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1942 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1943 &uvmv, x_off + bx_off, y_off + by_off,
1944 block_w, block_h, width, height, s->uvlinesize,
1945 s->put_pixels_tab[1 + (block_w == 4)]);
1946 }
1947
1948 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1949 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1950 static av_always_inline
prefetch_motion(VP8Context * s,VP8Macroblock * mb,int mb_x,int mb_y,int mb_xy,int ref)1951 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1952 int mb_xy, int ref)
1953 {
1954 /* Don't prefetch refs that haven't been used very often this frame. */
1955 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1956 int x_off = mb_x << 4, y_off = mb_y << 4;
1957 int mx = (mb->mv.x >> 2) + x_off + 8;
1958 int my = (mb->mv.y >> 2) + y_off;
1959 uint8_t **src = s->framep[ref]->tf.f->data;
1960 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1961 /* For threading, a ff_thread_await_progress here might be useful, but
1962 * it actually slows down the decoder. Since a bad prefetch doesn't
1963 * generate bad decoder output, we don't run it here. */
1964 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1965 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1966 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1967 }
1968 }
1969
1970 /**
1971 * Apply motion vectors to prediction buffer, chapter 18.
1972 */
1973 static av_always_inline
inter_predict(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb,int mb_x,int mb_y)1974 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1975 VP8Macroblock *mb, int mb_x, int mb_y)
1976 {
1977 int x_off = mb_x << 4, y_off = mb_y << 4;
1978 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1979 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1980 VP56mv *bmv = mb->bmv;
1981
1982 switch (mb->partitioning) {
1983 case VP8_SPLITMVMODE_NONE:
1984 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1985 0, 0, 16, 16, width, height, &mb->mv);
1986 break;
1987 case VP8_SPLITMVMODE_4x4: {
1988 int x, y;
1989 VP56mv uvmv;
1990
1991 /* Y */
1992 for (y = 0; y < 4; y++) {
1993 for (x = 0; x < 4; x++) {
1994 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1995 ref, &bmv[4 * y + x],
1996 4 * x + x_off, 4 * y + y_off, 4, 4,
1997 width, height, s->linesize,
1998 s->put_pixels_tab[2]);
1999 }
2000 }
2001
2002 /* U/V */
2003 x_off >>= 1;
2004 y_off >>= 1;
2005 width >>= 1;
2006 height >>= 1;
2007 for (y = 0; y < 2; y++) {
2008 for (x = 0; x < 2; x++) {
2009 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2010 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2011 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2012 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2013 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2014 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2015 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2016 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2017 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2018 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2019 if (s->profile == 3) {
2020 uvmv.x &= ~7;
2021 uvmv.y &= ~7;
2022 }
2023 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2024 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2025 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2026 width, height, s->uvlinesize,
2027 s->put_pixels_tab[2]);
2028 }
2029 }
2030 break;
2031 }
2032 case VP8_SPLITMVMODE_16x8:
2033 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2034 0, 0, 16, 8, width, height, &bmv[0]);
2035 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2036 0, 8, 16, 8, width, height, &bmv[1]);
2037 break;
2038 case VP8_SPLITMVMODE_8x16:
2039 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2040 0, 0, 8, 16, width, height, &bmv[0]);
2041 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2042 8, 0, 8, 16, width, height, &bmv[1]);
2043 break;
2044 case VP8_SPLITMVMODE_8x8:
2045 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2046 0, 0, 8, 8, width, height, &bmv[0]);
2047 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2048 8, 0, 8, 8, width, height, &bmv[1]);
2049 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2050 0, 8, 8, 8, width, height, &bmv[2]);
2051 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2052 8, 8, 8, 8, width, height, &bmv[3]);
2053 break;
2054 }
2055 }
2056
2057 static av_always_inline
idct_mb(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb)2058 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2059 {
2060 int x, y, ch;
2061
2062 if (mb->mode != MODE_I4x4) {
2063 uint8_t *y_dst = dst[0];
2064 for (y = 0; y < 4; y++) {
2065 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2066 if (nnz4) {
2067 if (nnz4 & ~0x01010101) {
2068 for (x = 0; x < 4; x++) {
2069 if ((uint8_t) nnz4 == 1)
2070 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2071 td->block[y][x],
2072 s->linesize);
2073 else if ((uint8_t) nnz4 > 1)
2074 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2075 td->block[y][x],
2076 s->linesize);
2077 nnz4 >>= 8;
2078 if (!nnz4)
2079 break;
2080 }
2081 } else {
2082 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2083 }
2084 }
2085 y_dst += 4 * s->linesize;
2086 }
2087 }
2088
2089 for (ch = 0; ch < 2; ch++) {
2090 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2091 if (nnz4) {
2092 uint8_t *ch_dst = dst[1 + ch];
2093 if (nnz4 & ~0x01010101) {
2094 for (y = 0; y < 2; y++) {
2095 for (x = 0; x < 2; x++) {
2096 if ((uint8_t) nnz4 == 1)
2097 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2098 td->block[4 + ch][(y << 1) + x],
2099 s->uvlinesize);
2100 else if ((uint8_t) nnz4 > 1)
2101 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2102 td->block[4 + ch][(y << 1) + x],
2103 s->uvlinesize);
2104 nnz4 >>= 8;
2105 if (!nnz4)
2106 goto chroma_idct_end;
2107 }
2108 ch_dst += 4 * s->uvlinesize;
2109 }
2110 } else {
2111 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2112 }
2113 }
2114 chroma_idct_end:
2115 ;
2116 }
2117 }
2118
2119 static av_always_inline
filter_level_for_mb(VP8Context * s,VP8Macroblock * mb,VP8FilterStrength * f,int is_vp7)2120 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2121 VP8FilterStrength *f, int is_vp7)
2122 {
2123 int interior_limit, filter_level;
2124
2125 if (s->segmentation.enabled) {
2126 filter_level = s->segmentation.filter_level[mb->segment];
2127 if (!s->segmentation.absolute_vals)
2128 filter_level += s->filter.level;
2129 } else
2130 filter_level = s->filter.level;
2131
2132 if (s->lf_delta.enabled) {
2133 filter_level += s->lf_delta.ref[mb->ref_frame];
2134 filter_level += s->lf_delta.mode[mb->mode];
2135 }
2136
2137 filter_level = av_clip_uintp2(filter_level, 6);
2138
2139 interior_limit = filter_level;
2140 if (s->filter.sharpness) {
2141 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2142 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2143 }
2144 interior_limit = FFMAX(interior_limit, 1);
2145
2146 f->filter_level = filter_level;
2147 f->inner_limit = interior_limit;
2148 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2149 mb->mode == VP8_MVMODE_SPLIT;
2150 }
2151
2152 static av_always_inline
filter_mb(VP8Context * s,uint8_t * dst[3],VP8FilterStrength * f,int mb_x,int mb_y,int is_vp7)2153 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2154 int mb_x, int mb_y, int is_vp7)
2155 {
2156 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2157 int filter_level = f->filter_level;
2158 int inner_limit = f->inner_limit;
2159 int inner_filter = f->inner_filter;
2160 ptrdiff_t linesize = s->linesize;
2161 ptrdiff_t uvlinesize = s->uvlinesize;
2162 static const uint8_t hev_thresh_lut[2][64] = {
2163 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2164 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2165 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2166 3, 3, 3, 3 },
2167 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2170 2, 2, 2, 2 }
2171 };
2172
2173 if (!filter_level)
2174 return;
2175
2176 if (is_vp7) {
2177 bedge_lim_y = filter_level;
2178 bedge_lim_uv = filter_level * 2;
2179 mbedge_lim = filter_level + 2;
2180 } else {
2181 bedge_lim_y =
2182 bedge_lim_uv = filter_level * 2 + inner_limit;
2183 mbedge_lim = bedge_lim_y + 4;
2184 }
2185
2186 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2187
2188 if (mb_x) {
2189 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2190 mbedge_lim, inner_limit, hev_thresh);
2191 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2192 mbedge_lim, inner_limit, hev_thresh);
2193 }
2194
2195 #define H_LOOP_FILTER_16Y_INNER(cond) \
2196 if (cond && inner_filter) { \
2197 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2198 bedge_lim_y, inner_limit, \
2199 hev_thresh); \
2200 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2201 bedge_lim_y, inner_limit, \
2202 hev_thresh); \
2203 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2204 bedge_lim_y, inner_limit, \
2205 hev_thresh); \
2206 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2207 uvlinesize, bedge_lim_uv, \
2208 inner_limit, hev_thresh); \
2209 }
2210
2211 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2212
2213 if (mb_y) {
2214 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2215 mbedge_lim, inner_limit, hev_thresh);
2216 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2217 mbedge_lim, inner_limit, hev_thresh);
2218 }
2219
2220 if (inner_filter) {
2221 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2222 linesize, bedge_lim_y,
2223 inner_limit, hev_thresh);
2224 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2225 linesize, bedge_lim_y,
2226 inner_limit, hev_thresh);
2227 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2228 linesize, bedge_lim_y,
2229 inner_limit, hev_thresh);
2230 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2231 dst[2] + 4 * uvlinesize,
2232 uvlinesize, bedge_lim_uv,
2233 inner_limit, hev_thresh);
2234 }
2235
2236 H_LOOP_FILTER_16Y_INNER(is_vp7)
2237 }
2238
2239 static av_always_inline
filter_mb_simple(VP8Context * s,uint8_t * dst,VP8FilterStrength * f,int mb_x,int mb_y)2240 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2241 int mb_x, int mb_y)
2242 {
2243 int mbedge_lim, bedge_lim;
2244 int filter_level = f->filter_level;
2245 int inner_limit = f->inner_limit;
2246 int inner_filter = f->inner_filter;
2247 ptrdiff_t linesize = s->linesize;
2248
2249 if (!filter_level)
2250 return;
2251
2252 bedge_lim = 2 * filter_level + inner_limit;
2253 mbedge_lim = bedge_lim + 4;
2254
2255 if (mb_x)
2256 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2257 if (inner_filter) {
2258 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2259 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2260 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2261 }
2262
2263 if (mb_y)
2264 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2265 if (inner_filter) {
2266 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2267 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2268 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2269 }
2270 }
2271
2272 #define MARGIN (16 << 2)
2273 static av_always_inline
vp78_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * curframe,VP8Frame * prev_frame,int is_vp7)2274 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2275 VP8Frame *prev_frame, int is_vp7)
2276 {
2277 VP8Context *s = avctx->priv_data;
2278 int mb_x, mb_y;
2279
2280 s->mv_bounds.mv_min.y = -MARGIN;
2281 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2282 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2283 VP8Macroblock *mb = s->macroblocks_base +
2284 ((s->mb_width + 1) * (mb_y + 1) + 1);
2285 int mb_xy = mb_y * s->mb_width;
2286
2287 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2288
2289 s->mv_bounds.mv_min.x = -MARGIN;
2290 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2291
2292 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2293 if (vpX_rac_is_end(&s->c)) {
2294 return AVERROR_INVALIDDATA;
2295 }
2296 if (mb_y == 0)
2297 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2298 DC_PRED * 0x01010101);
2299 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2300 prev_frame && prev_frame->seg_map ?
2301 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2302 s->mv_bounds.mv_min.x -= 64;
2303 s->mv_bounds.mv_max.x -= 64;
2304 }
2305 s->mv_bounds.mv_min.y -= 64;
2306 s->mv_bounds.mv_max.y -= 64;
2307 }
2308 return 0;
2309 }
2310
vp7_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * cur_frame,VP8Frame * prev_frame)2311 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2312 VP8Frame *prev_frame)
2313 {
2314 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2315 }
2316
vp8_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * cur_frame,VP8Frame * prev_frame)2317 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2318 VP8Frame *prev_frame)
2319 {
2320 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2321 }
2322
2323 #if HAVE_THREADS
2324 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2325 do { \
2326 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2327 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2328 pthread_mutex_lock(&otd->lock); \
2329 atomic_store(&td->wait_mb_pos, tmp); \
2330 do { \
2331 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2332 break; \
2333 pthread_cond_wait(&otd->cond, &otd->lock); \
2334 } while (1); \
2335 atomic_store(&td->wait_mb_pos, INT_MAX); \
2336 pthread_mutex_unlock(&otd->lock); \
2337 } \
2338 } while (0)
2339
2340 #define update_pos(td, mb_y, mb_x) \
2341 do { \
2342 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2343 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2344 (num_jobs > 1); \
2345 int is_null = !next_td || !prev_td; \
2346 int pos_check = (is_null) ? 1 : \
2347 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2348 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2349 atomic_store(&td->thread_mb_pos, pos); \
2350 if (sliced_threading && pos_check) { \
2351 pthread_mutex_lock(&td->lock); \
2352 pthread_cond_broadcast(&td->cond); \
2353 pthread_mutex_unlock(&td->lock); \
2354 } \
2355 } while (0)
2356 #else
2357 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2358 #define update_pos(td, mb_y, mb_x) while(0)
2359 #endif
2360
decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2361 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2362 int jobnr, int threadnr, int is_vp7)
2363 {
2364 VP8Context *s = avctx->priv_data;
2365 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2366 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2367 int mb_x, mb_xy = mb_y * s->mb_width;
2368 int num_jobs = s->num_jobs;
2369 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2370 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2371 VP8Macroblock *mb;
2372 uint8_t *dst[3] = {
2373 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2374 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2375 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2376 };
2377
2378 if (vpX_rac_is_end(c))
2379 return AVERROR_INVALIDDATA;
2380
2381 if (mb_y == 0)
2382 prev_td = td;
2383 else
2384 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2385 if (mb_y == s->mb_height - 1)
2386 next_td = td;
2387 else
2388 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2389 if (s->mb_layout == 1)
2390 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2391 else {
2392 // Make sure the previous frame has read its segmentation map,
2393 // if we re-use the same map.
2394 if (prev_frame && s->segmentation.enabled &&
2395 !s->segmentation.update_map)
2396 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2397 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2398 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2399 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2400 }
2401
2402 if (!is_vp7 || mb_y == 0)
2403 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2404
2405 td->mv_bounds.mv_min.x = -MARGIN;
2406 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2407
2408 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2409 if (vpX_rac_is_end(c))
2410 return AVERROR_INVALIDDATA;
2411 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2412 if (prev_td != td) {
2413 if (threadnr != 0) {
2414 check_thread_pos(td, prev_td,
2415 mb_x + (is_vp7 ? 2 : 1),
2416 mb_y - (is_vp7 ? 2 : 1));
2417 } else {
2418 check_thread_pos(td, prev_td,
2419 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2420 mb_y - (is_vp7 ? 2 : 1));
2421 }
2422 }
2423
2424 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2425 s->linesize, 4);
2426 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2427 dst[2] - dst[1], 2);
2428
2429 if (!s->mb_layout)
2430 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2431 prev_frame && prev_frame->seg_map ?
2432 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2433
2434 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2435
2436 if (!mb->skip)
2437 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2438
2439 if (mb->mode <= MODE_I4x4)
2440 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2441 else
2442 inter_predict(s, td, dst, mb, mb_x, mb_y);
2443
2444 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2445
2446 if (!mb->skip) {
2447 idct_mb(s, td, dst, mb);
2448 } else {
2449 AV_ZERO64(td->left_nnz);
2450 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2451
2452 /* Reset DC block predictors if they would exist
2453 * if the mb had coefficients */
2454 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2455 td->left_nnz[8] = 0;
2456 s->top_nnz[mb_x][8] = 0;
2457 }
2458 }
2459
2460 if (s->deblock_filter)
2461 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2462
2463 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2464 if (s->filter.simple)
2465 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2466 NULL, NULL, s->linesize, 0, 1);
2467 else
2468 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2469 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2470 }
2471
2472 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2473
2474 dst[0] += 16;
2475 dst[1] += 8;
2476 dst[2] += 8;
2477 td->mv_bounds.mv_min.x -= 64;
2478 td->mv_bounds.mv_max.x -= 64;
2479
2480 if (mb_x == s->mb_width + 1) {
2481 update_pos(td, mb_y, s->mb_width + 3);
2482 } else {
2483 update_pos(td, mb_y, mb_x);
2484 }
2485 }
2486 return 0;
2487 }
2488
vp7_decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2489 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2490 int jobnr, int threadnr)
2491 {
2492 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2493 }
2494
vp8_decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2495 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2496 int jobnr, int threadnr)
2497 {
2498 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2499 }
2500
filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2501 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2502 int jobnr, int threadnr, int is_vp7)
2503 {
2504 VP8Context *s = avctx->priv_data;
2505 VP8ThreadData *td = &s->thread_data[threadnr];
2506 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2507 AVFrame *curframe = s->curframe->tf.f;
2508 VP8Macroblock *mb;
2509 VP8ThreadData *prev_td, *next_td;
2510 uint8_t *dst[3] = {
2511 curframe->data[0] + 16 * mb_y * s->linesize,
2512 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2513 curframe->data[2] + 8 * mb_y * s->uvlinesize
2514 };
2515
2516 if (s->mb_layout == 1)
2517 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2518 else
2519 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2520
2521 if (mb_y == 0)
2522 prev_td = td;
2523 else
2524 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2525 if (mb_y == s->mb_height - 1)
2526 next_td = td;
2527 else
2528 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2529
2530 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2531 VP8FilterStrength *f = &td->filter_strength[mb_x];
2532 if (prev_td != td)
2533 check_thread_pos(td, prev_td,
2534 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2535 if (next_td != td)
2536 if (next_td != &s->thread_data[0])
2537 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2538
2539 if (num_jobs == 1) {
2540 if (s->filter.simple)
2541 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2542 NULL, NULL, s->linesize, 0, 1);
2543 else
2544 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2545 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2546 }
2547
2548 if (s->filter.simple)
2549 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2550 else
2551 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2552 dst[0] += 16;
2553 dst[1] += 8;
2554 dst[2] += 8;
2555
2556 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2557 }
2558 }
2559
vp7_filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2560 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2561 int jobnr, int threadnr)
2562 {
2563 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2564 }
2565
vp8_filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2566 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2567 int jobnr, int threadnr)
2568 {
2569 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2570 }
2571
2572 static av_always_inline
vp78_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2573 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2574 int threadnr, int is_vp7)
2575 {
2576 VP8Context *s = avctx->priv_data;
2577 VP8ThreadData *td = &s->thread_data[jobnr];
2578 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2579 VP8Frame *curframe = s->curframe;
2580 int mb_y, num_jobs = s->num_jobs;
2581 int ret;
2582
2583 td->thread_nr = threadnr;
2584 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2585 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2586 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2587 atomic_store(&td->thread_mb_pos, mb_y << 16);
2588 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2589 if (ret < 0) {
2590 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2591 return ret;
2592 }
2593 if (s->deblock_filter)
2594 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2595 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2596
2597 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2598 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2599
2600 if (avctx->active_thread_type == FF_THREAD_FRAME)
2601 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2602 }
2603
2604 return 0;
2605 }
2606
vp7_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2607 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2608 int jobnr, int threadnr)
2609 {
2610 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2611 }
2612
vp8_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2613 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2614 int jobnr, int threadnr)
2615 {
2616 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2617 }
2618
2619 static av_always_inline
vp78_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt,int is_vp7)2620 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2621 AVPacket *avpkt, int is_vp7)
2622 {
2623 VP8Context *s = avctx->priv_data;
2624 int ret, i, referenced, num_jobs;
2625 enum AVDiscard skip_thresh;
2626 VP8Frame *av_uninit(curframe), *prev_frame;
2627
2628 if (is_vp7)
2629 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2630 else
2631 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2632
2633 if (ret < 0)
2634 goto err;
2635
2636 if (s->actually_webp) {
2637 // avctx->pix_fmt already set in caller.
2638 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2639 s->pix_fmt = get_pixel_format(s);
2640 if (s->pix_fmt < 0) {
2641 ret = AVERROR(EINVAL);
2642 goto err;
2643 }
2644 avctx->pix_fmt = s->pix_fmt;
2645 }
2646
2647 prev_frame = s->framep[VP56_FRAME_CURRENT];
2648
2649 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2650 s->update_altref == VP56_FRAME_CURRENT;
2651
2652 skip_thresh = !referenced ? AVDISCARD_NONREF
2653 : !s->keyframe ? AVDISCARD_NONKEY
2654 : AVDISCARD_ALL;
2655
2656 if (avctx->skip_frame >= skip_thresh) {
2657 s->invisible = 1;
2658 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2659 goto skip_decode;
2660 }
2661 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2662
2663 // release no longer referenced frames
2664 for (i = 0; i < 5; i++)
2665 if (s->frames[i].tf.f->buf[0] &&
2666 &s->frames[i] != prev_frame &&
2667 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2668 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2669 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2670 vp8_release_frame(s, &s->frames[i]);
2671
2672 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2673
2674 if (!s->colorspace)
2675 avctx->colorspace = AVCOL_SPC_BT470BG;
2676 if (s->fullrange)
2677 avctx->color_range = AVCOL_RANGE_JPEG;
2678 else
2679 avctx->color_range = AVCOL_RANGE_MPEG;
2680
2681 /* Given that arithmetic probabilities are updated every frame, it's quite
2682 * likely that the values we have on a random interframe are complete
2683 * junk if we didn't start decode on a keyframe. So just don't display
2684 * anything rather than junk. */
2685 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2686 !s->framep[VP56_FRAME_GOLDEN] ||
2687 !s->framep[VP56_FRAME_GOLDEN2])) {
2688 av_log(avctx, AV_LOG_WARNING,
2689 "Discarding interframe without a prior keyframe!\n");
2690 ret = AVERROR_INVALIDDATA;
2691 goto err;
2692 }
2693
2694 curframe->tf.f->key_frame = s->keyframe;
2695 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2696 : AV_PICTURE_TYPE_P;
2697 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2698 goto err;
2699
2700 // check if golden and altref are swapped
2701 if (s->update_altref != VP56_FRAME_NONE)
2702 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2703 else
2704 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2705
2706 if (s->update_golden != VP56_FRAME_NONE)
2707 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2708 else
2709 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2710
2711 if (s->update_last)
2712 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2713 else
2714 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2715
2716 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2717
2718 if (avctx->codec->update_thread_context)
2719 ff_thread_finish_setup(avctx);
2720
2721 if (avctx->hwaccel) {
2722 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2723 if (ret < 0)
2724 goto err;
2725
2726 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2727 if (ret < 0)
2728 goto err;
2729
2730 ret = avctx->hwaccel->end_frame(avctx);
2731 if (ret < 0)
2732 goto err;
2733
2734 } else {
2735 s->linesize = curframe->tf.f->linesize[0];
2736 s->uvlinesize = curframe->tf.f->linesize[1];
2737
2738 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2739 /* Zero macroblock structures for top/top-left prediction
2740 * from outside the frame. */
2741 if (!s->mb_layout)
2742 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2743 (s->mb_width + 1) * sizeof(*s->macroblocks));
2744 if (!s->mb_layout && s->keyframe)
2745 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2746
2747 memset(s->ref_count, 0, sizeof(s->ref_count));
2748
2749 if (s->mb_layout == 1) {
2750 // Make sure the previous frame has read its segmentation map,
2751 // if we re-use the same map.
2752 if (prev_frame && s->segmentation.enabled &&
2753 !s->segmentation.update_map)
2754 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2755 if (is_vp7)
2756 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2757 else
2758 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2759 if (ret < 0)
2760 goto err;
2761 }
2762
2763 if (avctx->active_thread_type == FF_THREAD_FRAME)
2764 num_jobs = 1;
2765 else
2766 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2767 s->num_jobs = num_jobs;
2768 s->curframe = curframe;
2769 s->prev_frame = prev_frame;
2770 s->mv_bounds.mv_min.y = -MARGIN;
2771 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2772 for (i = 0; i < MAX_THREADS; i++) {
2773 VP8ThreadData *td = &s->thread_data[i];
2774 atomic_init(&td->thread_mb_pos, 0);
2775 atomic_init(&td->wait_mb_pos, INT_MAX);
2776 }
2777 if (is_vp7)
2778 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2779 num_jobs);
2780 else
2781 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2782 num_jobs);
2783 }
2784
2785 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2786 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2787
2788 skip_decode:
2789 // if future frames don't use the updated probabilities,
2790 // reset them to the values we saved
2791 if (!s->update_probabilities)
2792 s->prob[0] = s->prob[1];
2793
2794 if (!s->invisible) {
2795 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2796 return ret;
2797 *got_frame = 1;
2798 }
2799
2800 return avpkt->size;
2801 err:
2802 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2803 return ret;
2804 }
2805
ff_vp8_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt)2806 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2807 AVPacket *avpkt)
2808 {
2809 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2810 }
2811
2812 #if CONFIG_VP7_DECODER
vp7_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt)2813 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2814 AVPacket *avpkt)
2815 {
2816 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2817 }
2818 #endif /* CONFIG_VP7_DECODER */
2819
ff_vp8_decode_free(AVCodecContext * avctx)2820 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2821 {
2822 VP8Context *s = avctx->priv_data;
2823 int i;
2824
2825 if (!s)
2826 return 0;
2827
2828 vp8_decode_flush_impl(avctx, 1);
2829 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2830 av_frame_free(&s->frames[i].tf.f);
2831
2832 return 0;
2833 }
2834
vp8_init_frames(VP8Context * s)2835 static av_cold int vp8_init_frames(VP8Context *s)
2836 {
2837 int i;
2838 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2839 s->frames[i].tf.f = av_frame_alloc();
2840 if (!s->frames[i].tf.f)
2841 return AVERROR(ENOMEM);
2842 }
2843 return 0;
2844 }
2845
2846 static av_always_inline
vp78_decode_init(AVCodecContext * avctx,int is_vp7)2847 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2848 {
2849 VP8Context *s = avctx->priv_data;
2850 int ret;
2851
2852 s->avctx = avctx;
2853 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2854 s->pix_fmt = AV_PIX_FMT_NONE;
2855 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2856
2857 ff_videodsp_init(&s->vdsp, 8);
2858
2859 ff_vp78dsp_init(&s->vp8dsp);
2860 if (CONFIG_VP7_DECODER && is_vp7) {
2861 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2862 ff_vp7dsp_init(&s->vp8dsp);
2863 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2864 s->filter_mb_row = vp7_filter_mb_row;
2865 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2866 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2867 ff_vp8dsp_init(&s->vp8dsp);
2868 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2869 s->filter_mb_row = vp8_filter_mb_row;
2870 }
2871
2872 /* does not change for VP8 */
2873 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2874
2875 if ((ret = vp8_init_frames(s)) < 0) {
2876 ff_vp8_decode_free(avctx);
2877 return ret;
2878 }
2879
2880 return 0;
2881 }
2882
2883 #if CONFIG_VP7_DECODER
vp7_decode_init(AVCodecContext * avctx)2884 static int vp7_decode_init(AVCodecContext *avctx)
2885 {
2886 return vp78_decode_init(avctx, IS_VP7);
2887 }
2888 #endif /* CONFIG_VP7_DECODER */
2889
ff_vp8_decode_init(AVCodecContext * avctx)2890 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2891 {
2892 return vp78_decode_init(avctx, IS_VP8);
2893 }
2894
2895 #if CONFIG_VP8_DECODER
2896 #if HAVE_THREADS
2897 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2898
vp8_decode_update_thread_context(AVCodecContext * dst,const AVCodecContext * src)2899 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2900 const AVCodecContext *src)
2901 {
2902 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2903 int i;
2904
2905 if (s->macroblocks_base &&
2906 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2907 free_buffers(s);
2908 s->mb_width = s_src->mb_width;
2909 s->mb_height = s_src->mb_height;
2910 }
2911
2912 s->pix_fmt = s_src->pix_fmt;
2913 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2914 s->segmentation = s_src->segmentation;
2915 s->lf_delta = s_src->lf_delta;
2916 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2917
2918 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2919 if (s_src->frames[i].tf.f->buf[0]) {
2920 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2921 if (ret < 0)
2922 return ret;
2923 }
2924 }
2925
2926 s->framep[0] = REBASE(s_src->next_framep[0]);
2927 s->framep[1] = REBASE(s_src->next_framep[1]);
2928 s->framep[2] = REBASE(s_src->next_framep[2]);
2929 s->framep[3] = REBASE(s_src->next_framep[3]);
2930
2931 return 0;
2932 }
2933 #endif /* HAVE_THREADS */
2934 #endif /* CONFIG_VP8_DECODER */
2935
2936 #if CONFIG_VP7_DECODER
2937 AVCodec ff_vp7_decoder = {
2938 .name = "vp7",
2939 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2940 .type = AVMEDIA_TYPE_VIDEO,
2941 .id = AV_CODEC_ID_VP7,
2942 .priv_data_size = sizeof(VP8Context),
2943 .init = vp7_decode_init,
2944 .close = ff_vp8_decode_free,
2945 .decode = vp7_decode_frame,
2946 .capabilities = AV_CODEC_CAP_DR1,
2947 .flush = vp8_decode_flush,
2948 };
2949 #endif /* CONFIG_VP7_DECODER */
2950
2951 #if CONFIG_VP8_DECODER
2952 AVCodec ff_vp8_decoder = {
2953 .name = "vp8",
2954 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2955 .type = AVMEDIA_TYPE_VIDEO,
2956 .id = AV_CODEC_ID_VP8,
2957 .priv_data_size = sizeof(VP8Context),
2958 .init = ff_vp8_decode_init,
2959 .close = ff_vp8_decode_free,
2960 .decode = ff_vp8_decode_frame,
2961 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2962 AV_CODEC_CAP_SLICE_THREADS,
2963 .flush = vp8_decode_flush,
2964 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2965 .hw_configs = (const AVCodecHWConfigInternal*[]) {
2966 #if CONFIG_VP8_VAAPI_HWACCEL
2967 HWACCEL_VAAPI(vp8),
2968 #endif
2969 #if CONFIG_VP8_NVDEC_HWACCEL
2970 HWACCEL_NVDEC(vp8),
2971 #endif
2972 NULL
2973 },
2974 .caps_internal = FF_CODEC_CAP_ALLOCATE_PROGRESS,
2975 };
2976 #endif /* CONFIG_VP7_DECODER */
2977