1 /*
2 * VP7/VP8 compatible video decoder
3 *
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
9 *
10 * This file is part of FFmpeg.
11 *
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 */
26
27 #include "libavutil/imgutils.h"
28 #include "libavutil/mem_internal.h"
29
30 #include "avcodec.h"
31 #include "hwconfig.h"
32 #include "internal.h"
33 #include "mathops.h"
34 #include "rectangle.h"
35 #include "thread.h"
36 #include "vp8.h"
37 #include "vp8data.h"
38
39 #if ARCH_ARM
40 # include "arm/vp8.h"
41 #endif
42
43 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
44 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
45 #elif CONFIG_VP7_DECODER
46 #define VPX(vp7, f) vp7_ ## f
47 #else // CONFIG_VP8_DECODER
48 #define VPX(vp7, f) vp8_ ## f
49 #endif
50
free_buffers(VP8Context * s)51 static void free_buffers(VP8Context *s)
52 {
53 int i;
54 if (s->thread_data)
55 for (i = 0; i < MAX_THREADS; i++) {
56 #if HAVE_THREADS
57 pthread_cond_destroy(&s->thread_data[i].cond);
58 pthread_mutex_destroy(&s->thread_data[i].lock);
59 #endif
60 av_freep(&s->thread_data[i].filter_strength);
61 }
62 av_freep(&s->thread_data);
63 av_freep(&s->macroblocks_base);
64 av_freep(&s->intra4x4_pred_mode_top);
65 av_freep(&s->top_nnz);
66 av_freep(&s->top_border);
67
68 s->macroblocks = NULL;
69 }
70
vp8_alloc_frame(VP8Context * s,VP8Frame * f,int ref)71 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
72 {
73 int ret;
74 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
75 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
76 return ret;
77 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
78 goto fail;
79 if (s->avctx->hwaccel) {
80 const AVHWAccel *hwaccel = s->avctx->hwaccel;
81 if (hwaccel->frame_priv_data_size) {
82 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
83 if (!f->hwaccel_priv_buf)
84 goto fail;
85 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
86 }
87 }
88 return 0;
89
90 fail:
91 av_buffer_unref(&f->seg_map);
92 ff_thread_release_buffer(s->avctx, &f->tf);
93 return AVERROR(ENOMEM);
94 }
95
vp8_release_frame(VP8Context * s,VP8Frame * f)96 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
97 {
98 av_buffer_unref(&f->seg_map);
99 av_buffer_unref(&f->hwaccel_priv_buf);
100 f->hwaccel_picture_private = NULL;
101 ff_thread_release_buffer(s->avctx, &f->tf);
102 }
103
104 #if CONFIG_VP8_DECODER
vp8_ref_frame(VP8Context * s,VP8Frame * dst,VP8Frame * src)105 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
106 {
107 int ret;
108
109 vp8_release_frame(s, dst);
110
111 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
112 return ret;
113 if (src->seg_map &&
114 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
115 vp8_release_frame(s, dst);
116 return AVERROR(ENOMEM);
117 }
118 if (src->hwaccel_picture_private) {
119 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
120 if (!dst->hwaccel_priv_buf)
121 return AVERROR(ENOMEM);
122 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
123 }
124
125 return 0;
126 }
127 #endif /* CONFIG_VP8_DECODER */
128
vp8_decode_flush_impl(AVCodecContext * avctx,int free_mem)129 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
130 {
131 VP8Context *s = avctx->priv_data;
132 int i;
133
134 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
135 vp8_release_frame(s, &s->frames[i]);
136 memset(s->framep, 0, sizeof(s->framep));
137
138 if (free_mem)
139 free_buffers(s);
140 }
141
vp8_decode_flush(AVCodecContext * avctx)142 static void vp8_decode_flush(AVCodecContext *avctx)
143 {
144 vp8_decode_flush_impl(avctx, 0);
145 }
146
vp8_find_free_buffer(VP8Context * s)147 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
148 {
149 VP8Frame *frame = NULL;
150 int i;
151
152 // find a free buffer
153 for (i = 0; i < 5; i++)
154 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
155 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
157 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
158 frame = &s->frames[i];
159 break;
160 }
161 if (i == 5) {
162 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
163 abort();
164 }
165 if (frame->tf.f->buf[0])
166 vp8_release_frame(s, frame);
167
168 return frame;
169 }
170
get_pixel_format(VP8Context * s)171 static enum AVPixelFormat get_pixel_format(VP8Context *s)
172 {
173 enum AVPixelFormat pix_fmts[] = {
174 #if CONFIG_VP8_VAAPI_HWACCEL
175 AV_PIX_FMT_VAAPI,
176 #endif
177 #if CONFIG_VP8_NVDEC_HWACCEL
178 AV_PIX_FMT_CUDA,
179 #endif
180 AV_PIX_FMT_YUV420P,
181 AV_PIX_FMT_NONE,
182 };
183
184 return ff_get_format(s->avctx, pix_fmts);
185 }
186
187 static av_always_inline
update_dimensions(VP8Context * s,int width,int height,int is_vp7)188 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
189 {
190 AVCodecContext *avctx = s->avctx;
191 int i, ret, dim_reset = 0;
192
193 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
194 height != s->avctx->height) {
195 vp8_decode_flush_impl(s->avctx, 1);
196
197 ret = ff_set_dimensions(s->avctx, width, height);
198 if (ret < 0)
199 return ret;
200
201 dim_reset = (s->macroblocks_base != NULL);
202 }
203
204 if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
205 !s->actually_webp && !is_vp7) {
206 s->pix_fmt = get_pixel_format(s);
207 if (s->pix_fmt < 0)
208 return AVERROR(EINVAL);
209 avctx->pix_fmt = s->pix_fmt;
210 }
211
212 s->mb_width = (s->avctx->coded_width + 15) / 16;
213 s->mb_height = (s->avctx->coded_height + 15) / 16;
214
215 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
216 avctx->thread_count > 1;
217 if (!s->mb_layout) { // Frame threading and one thread
218 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
219 sizeof(*s->macroblocks));
220 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
221 } else // Sliced threading
222 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
223 sizeof(*s->macroblocks));
224 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
225 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
226 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
227
228 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
229 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
230 free_buffers(s);
231 return AVERROR(ENOMEM);
232 }
233
234 for (i = 0; i < MAX_THREADS; i++) {
235 s->thread_data[i].filter_strength =
236 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
237 if (!s->thread_data[i].filter_strength) {
238 free_buffers(s);
239 return AVERROR(ENOMEM);
240 }
241 #if HAVE_THREADS
242 pthread_mutex_init(&s->thread_data[i].lock, NULL);
243 pthread_cond_init(&s->thread_data[i].cond, NULL);
244 #endif
245 }
246
247 s->macroblocks = s->macroblocks_base + 1;
248
249 return 0;
250 }
251
vp7_update_dimensions(VP8Context * s,int width,int height)252 static int vp7_update_dimensions(VP8Context *s, int width, int height)
253 {
254 return update_dimensions(s, width, height, IS_VP7);
255 }
256
vp8_update_dimensions(VP8Context * s,int width,int height)257 static int vp8_update_dimensions(VP8Context *s, int width, int height)
258 {
259 return update_dimensions(s, width, height, IS_VP8);
260 }
261
262
parse_segment_info(VP8Context * s)263 static void parse_segment_info(VP8Context *s)
264 {
265 VP56RangeCoder *c = &s->c;
266 int i;
267
268 s->segmentation.update_map = vp8_rac_get(c);
269 s->segmentation.update_feature_data = vp8_rac_get(c);
270
271 if (s->segmentation.update_feature_data) {
272 s->segmentation.absolute_vals = vp8_rac_get(c);
273
274 for (i = 0; i < 4; i++)
275 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
276
277 for (i = 0; i < 4; i++)
278 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
279 }
280 if (s->segmentation.update_map)
281 for (i = 0; i < 3; i++)
282 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
283 }
284
update_lf_deltas(VP8Context * s)285 static void update_lf_deltas(VP8Context *s)
286 {
287 VP56RangeCoder *c = &s->c;
288 int i;
289
290 for (i = 0; i < 4; i++) {
291 if (vp8_rac_get(c)) {
292 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
293
294 if (vp8_rac_get(c))
295 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
296 }
297 }
298
299 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
300 if (vp8_rac_get(c)) {
301 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
302
303 if (vp8_rac_get(c))
304 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
305 }
306 }
307 }
308
setup_partitions(VP8Context * s,const uint8_t * buf,int buf_size)309 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
310 {
311 const uint8_t *sizes = buf;
312 int i;
313 int ret;
314
315 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
316
317 buf += 3 * (s->num_coeff_partitions - 1);
318 buf_size -= 3 * (s->num_coeff_partitions - 1);
319 if (buf_size < 0)
320 return -1;
321
322 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
323 int size = AV_RL24(sizes + 3 * i);
324 if (buf_size - size < 0)
325 return -1;
326 s->coeff_partition_size[i] = size;
327
328 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
329 if (ret < 0)
330 return ret;
331 buf += size;
332 buf_size -= size;
333 }
334
335 s->coeff_partition_size[i] = buf_size;
336 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
337
338 return 0;
339 }
340
vp7_get_quants(VP8Context * s)341 static void vp7_get_quants(VP8Context *s)
342 {
343 VP56RangeCoder *c = &s->c;
344
345 int yac_qi = vp8_rac_get_uint(c, 7);
346 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
347 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
349 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
350 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
351
352 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
353 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
354 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
355 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
356 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
357 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
358 }
359
vp8_get_quants(VP8Context * s)360 static void vp8_get_quants(VP8Context *s)
361 {
362 VP56RangeCoder *c = &s->c;
363 int i, base_qi;
364
365 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
366 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
367 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
368 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
369 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
370 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
371
372 for (i = 0; i < 4; i++) {
373 if (s->segmentation.enabled) {
374 base_qi = s->segmentation.base_quant[i];
375 if (!s->segmentation.absolute_vals)
376 base_qi += s->quant.yac_qi;
377 } else
378 base_qi = s->quant.yac_qi;
379
380 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
381 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
382 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
383 /* 101581>>16 is equivalent to 155/100 */
384 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
385 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
386 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
387
388 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
389 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
390 }
391 }
392
393 /**
394 * Determine which buffers golden and altref should be updated with after this frame.
395 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
396 *
397 * Intra frames update all 3 references
398 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
399 * If the update (golden|altref) flag is set, it's updated with the current frame
400 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
401 * If the flag is not set, the number read means:
402 * 0: no update
403 * 1: VP56_FRAME_PREVIOUS
404 * 2: update golden with altref, or update altref with golden
405 */
ref_to_update(VP8Context * s,int update,VP56Frame ref)406 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
407 {
408 VP56RangeCoder *c = &s->c;
409
410 if (update)
411 return VP56_FRAME_CURRENT;
412
413 switch (vp8_rac_get_uint(c, 2)) {
414 case 1:
415 return VP56_FRAME_PREVIOUS;
416 case 2:
417 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
418 }
419 return VP56_FRAME_NONE;
420 }
421
vp78_reset_probability_tables(VP8Context * s)422 static void vp78_reset_probability_tables(VP8Context *s)
423 {
424 int i, j;
425 for (i = 0; i < 4; i++)
426 for (j = 0; j < 16; j++)
427 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
428 sizeof(s->prob->token[i][j]));
429 }
430
vp78_update_probability_tables(VP8Context * s)431 static void vp78_update_probability_tables(VP8Context *s)
432 {
433 VP56RangeCoder *c = &s->c;
434 int i, j, k, l, m;
435
436 for (i = 0; i < 4; i++)
437 for (j = 0; j < 8; j++)
438 for (k = 0; k < 3; k++)
439 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
440 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
441 int prob = vp8_rac_get_uint(c, 8);
442 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
443 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
444 }
445 }
446
447 #define VP7_MVC_SIZE 17
448 #define VP8_MVC_SIZE 19
449
vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context * s,int mvc_size)450 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
451 int mvc_size)
452 {
453 VP56RangeCoder *c = &s->c;
454 int i, j;
455
456 if (vp8_rac_get(c))
457 for (i = 0; i < 4; i++)
458 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
459 if (vp8_rac_get(c))
460 for (i = 0; i < 3; i++)
461 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
462
463 // 17.2 MV probability update
464 for (i = 0; i < 2; i++)
465 for (j = 0; j < mvc_size; j++)
466 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
467 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
468 }
469
update_refs(VP8Context * s)470 static void update_refs(VP8Context *s)
471 {
472 VP56RangeCoder *c = &s->c;
473
474 int update_golden = vp8_rac_get(c);
475 int update_altref = vp8_rac_get(c);
476
477 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
478 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
479 }
480
copy_chroma(AVFrame * dst,AVFrame * src,int width,int height)481 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
482 {
483 int i, j;
484
485 for (j = 1; j < 3; j++) {
486 for (i = 0; i < height / 2; i++)
487 memcpy(dst->data[j] + i * dst->linesize[j],
488 src->data[j] + i * src->linesize[j], width / 2);
489 }
490 }
491
fade(uint8_t * dst,ptrdiff_t dst_linesize,const uint8_t * src,ptrdiff_t src_linesize,int width,int height,int alpha,int beta)492 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
493 const uint8_t *src, ptrdiff_t src_linesize,
494 int width, int height,
495 int alpha, int beta)
496 {
497 int i, j;
498 for (j = 0; j < height; j++) {
499 const uint8_t *src2 = src + j * src_linesize;
500 uint8_t *dst2 = dst + j * dst_linesize;
501 for (i = 0; i < width; i++) {
502 uint8_t y = src2[i];
503 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
504 }
505 }
506 }
507
vp7_fade_frame(VP8Context * s,int alpha,int beta)508 static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
509 {
510 int ret;
511
512 if (!s->keyframe && (alpha || beta)) {
513 int width = s->mb_width * 16;
514 int height = s->mb_height * 16;
515 AVFrame *src, *dst;
516
517 if (!s->framep[VP56_FRAME_PREVIOUS] ||
518 !s->framep[VP56_FRAME_GOLDEN]) {
519 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
520 return AVERROR_INVALIDDATA;
521 }
522
523 dst =
524 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
525
526 /* preserve the golden frame, write a new previous frame */
527 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
528 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
529 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
530 return ret;
531
532 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
533
534 copy_chroma(dst, src, width, height);
535 }
536
537 fade(dst->data[0], dst->linesize[0],
538 src->data[0], src->linesize[0],
539 width, height, alpha, beta);
540 }
541
542 return 0;
543 }
544
vp7_decode_frame_header(VP8Context * s,const uint8_t * buf,int buf_size)545 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
546 {
547 VP56RangeCoder *c = &s->c;
548 int part1_size, hscale, vscale, i, j, ret;
549 int width = s->avctx->width;
550 int height = s->avctx->height;
551 int alpha = 0;
552 int beta = 0;
553
554 if (buf_size < 4) {
555 return AVERROR_INVALIDDATA;
556 }
557
558 s->profile = (buf[0] >> 1) & 7;
559 if (s->profile > 1) {
560 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
561 return AVERROR_INVALIDDATA;
562 }
563
564 s->keyframe = !(buf[0] & 1);
565 s->invisible = 0;
566 part1_size = AV_RL24(buf) >> 4;
567
568 if (buf_size < 4 - s->profile + part1_size) {
569 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
570 return AVERROR_INVALIDDATA;
571 }
572
573 buf += 4 - s->profile;
574 buf_size -= 4 - s->profile;
575
576 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
577
578 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
579 if (ret < 0)
580 return ret;
581 buf += part1_size;
582 buf_size -= part1_size;
583
584 /* A. Dimension information (keyframes only) */
585 if (s->keyframe) {
586 width = vp8_rac_get_uint(c, 12);
587 height = vp8_rac_get_uint(c, 12);
588 hscale = vp8_rac_get_uint(c, 2);
589 vscale = vp8_rac_get_uint(c, 2);
590 if (hscale || vscale)
591 avpriv_request_sample(s->avctx, "Upscaling");
592
593 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
594 vp78_reset_probability_tables(s);
595 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
596 sizeof(s->prob->pred16x16));
597 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
598 sizeof(s->prob->pred8x8c));
599 for (i = 0; i < 2; i++)
600 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
601 sizeof(vp7_mv_default_prob[i]));
602 memset(&s->segmentation, 0, sizeof(s->segmentation));
603 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
604 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
605 }
606
607 if (s->keyframe || s->profile > 0)
608 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
609
610 /* B. Decoding information for all four macroblock-level features */
611 for (i = 0; i < 4; i++) {
612 s->feature_enabled[i] = vp8_rac_get(c);
613 if (s->feature_enabled[i]) {
614 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
615
616 for (j = 0; j < 3; j++)
617 s->feature_index_prob[i][j] =
618 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
619
620 if (vp7_feature_value_size[s->profile][i])
621 for (j = 0; j < 4; j++)
622 s->feature_value[i][j] =
623 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
624 }
625 }
626
627 s->segmentation.enabled = 0;
628 s->segmentation.update_map = 0;
629 s->lf_delta.enabled = 0;
630
631 s->num_coeff_partitions = 1;
632 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
633 if (ret < 0)
634 return ret;
635
636 if (!s->macroblocks_base || /* first frame */
637 width != s->avctx->width || height != s->avctx->height ||
638 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
639 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
640 return ret;
641 }
642
643 /* C. Dequantization indices */
644 vp7_get_quants(s);
645
646 /* D. Golden frame update flag (a Flag) for interframes only */
647 if (!s->keyframe) {
648 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
649 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
650 }
651
652 s->update_last = 1;
653 s->update_probabilities = 1;
654 s->fade_present = 1;
655
656 if (s->profile > 0) {
657 s->update_probabilities = vp8_rac_get(c);
658 if (!s->update_probabilities)
659 s->prob[1] = s->prob[0];
660
661 if (!s->keyframe)
662 s->fade_present = vp8_rac_get(c);
663 }
664
665 if (vpX_rac_is_end(c))
666 return AVERROR_INVALIDDATA;
667 /* E. Fading information for previous frame */
668 if (s->fade_present && vp8_rac_get(c)) {
669 alpha = (int8_t) vp8_rac_get_uint(c, 8);
670 beta = (int8_t) vp8_rac_get_uint(c, 8);
671 }
672
673 /* F. Loop filter type */
674 if (!s->profile)
675 s->filter.simple = vp8_rac_get(c);
676
677 /* G. DCT coefficient ordering specification */
678 if (vp8_rac_get(c))
679 for (i = 1; i < 16; i++)
680 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
681
682 /* H. Loop filter levels */
683 if (s->profile > 0)
684 s->filter.simple = vp8_rac_get(c);
685 s->filter.level = vp8_rac_get_uint(c, 6);
686 s->filter.sharpness = vp8_rac_get_uint(c, 3);
687
688 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
689 vp78_update_probability_tables(s);
690
691 s->mbskip_enabled = 0;
692
693 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
694 if (!s->keyframe) {
695 s->prob->intra = vp8_rac_get_uint(c, 8);
696 s->prob->last = vp8_rac_get_uint(c, 8);
697 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
698 }
699
700 if (vpX_rac_is_end(c))
701 return AVERROR_INVALIDDATA;
702
703 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
704 return ret;
705
706 return 0;
707 }
708
vp8_decode_frame_header(VP8Context * s,const uint8_t * buf,int buf_size)709 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
710 {
711 VP56RangeCoder *c = &s->c;
712 int header_size, hscale, vscale, ret;
713 int width = s->avctx->width;
714 int height = s->avctx->height;
715
716 if (buf_size < 3) {
717 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
718 return AVERROR_INVALIDDATA;
719 }
720
721 s->keyframe = !(buf[0] & 1);
722 s->profile = (buf[0]>>1) & 7;
723 s->invisible = !(buf[0] & 0x10);
724 header_size = AV_RL24(buf) >> 5;
725 buf += 3;
726 buf_size -= 3;
727
728 s->header_partition_size = header_size;
729
730 if (s->profile > 3)
731 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
732
733 if (!s->profile)
734 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
735 sizeof(s->put_pixels_tab));
736 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
737 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
738 sizeof(s->put_pixels_tab));
739
740 if (header_size > buf_size - 7 * s->keyframe) {
741 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
742 return AVERROR_INVALIDDATA;
743 }
744
745 if (s->keyframe) {
746 if (AV_RL24(buf) != 0x2a019d) {
747 av_log(s->avctx, AV_LOG_ERROR,
748 "Invalid start code 0x%x\n", AV_RL24(buf));
749 return AVERROR_INVALIDDATA;
750 }
751 width = AV_RL16(buf + 3) & 0x3fff;
752 height = AV_RL16(buf + 5) & 0x3fff;
753 hscale = buf[4] >> 6;
754 vscale = buf[6] >> 6;
755 buf += 7;
756 buf_size -= 7;
757
758 if (hscale || vscale)
759 avpriv_request_sample(s->avctx, "Upscaling");
760
761 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
762 vp78_reset_probability_tables(s);
763 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
764 sizeof(s->prob->pred16x16));
765 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
766 sizeof(s->prob->pred8x8c));
767 memcpy(s->prob->mvc, vp8_mv_default_prob,
768 sizeof(s->prob->mvc));
769 memset(&s->segmentation, 0, sizeof(s->segmentation));
770 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
771 }
772
773 ret = ff_vp56_init_range_decoder(c, buf, header_size);
774 if (ret < 0)
775 return ret;
776 buf += header_size;
777 buf_size -= header_size;
778
779 if (s->keyframe) {
780 s->colorspace = vp8_rac_get(c);
781 if (s->colorspace)
782 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
783 s->fullrange = vp8_rac_get(c);
784 }
785
786 if ((s->segmentation.enabled = vp8_rac_get(c)))
787 parse_segment_info(s);
788 else
789 s->segmentation.update_map = 0; // FIXME: move this to some init function?
790
791 s->filter.simple = vp8_rac_get(c);
792 s->filter.level = vp8_rac_get_uint(c, 6);
793 s->filter.sharpness = vp8_rac_get_uint(c, 3);
794
795 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
796 s->lf_delta.update = vp8_rac_get(c);
797 if (s->lf_delta.update)
798 update_lf_deltas(s);
799 }
800
801 if (setup_partitions(s, buf, buf_size)) {
802 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
803 return AVERROR_INVALIDDATA;
804 }
805
806 if (!s->macroblocks_base || /* first frame */
807 width != s->avctx->width || height != s->avctx->height ||
808 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
809 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
810 return ret;
811
812 vp8_get_quants(s);
813
814 if (!s->keyframe) {
815 update_refs(s);
816 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
817 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
818 }
819
820 // if we aren't saving this frame's probabilities for future frames,
821 // make a copy of the current probabilities
822 if (!(s->update_probabilities = vp8_rac_get(c)))
823 s->prob[1] = s->prob[0];
824
825 s->update_last = s->keyframe || vp8_rac_get(c);
826
827 vp78_update_probability_tables(s);
828
829 if ((s->mbskip_enabled = vp8_rac_get(c)))
830 s->prob->mbskip = vp8_rac_get_uint(c, 8);
831
832 if (!s->keyframe) {
833 s->prob->intra = vp8_rac_get_uint(c, 8);
834 s->prob->last = vp8_rac_get_uint(c, 8);
835 s->prob->golden = vp8_rac_get_uint(c, 8);
836 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
837 }
838
839 // Record the entropy coder state here so that hwaccels can use it.
840 s->c.code_word = vp56_rac_renorm(&s->c);
841 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
842 s->coder_state_at_header_end.range = s->c.high;
843 s->coder_state_at_header_end.value = s->c.code_word >> 16;
844 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
845
846 return 0;
847 }
848
849 static av_always_inline
clamp_mv(VP8mvbounds * s,VP56mv * dst,const VP56mv * src)850 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
851 {
852 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
853 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
854 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
855 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
856 }
857
858 /**
859 * Motion vector coding, 17.1.
860 */
read_mv_component(VP56RangeCoder * c,const uint8_t * p,int vp7)861 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
862 {
863 int bit, x = 0;
864
865 if (vp56_rac_get_prob_branchy(c, p[0])) {
866 int i;
867
868 for (i = 0; i < 3; i++)
869 x += vp56_rac_get_prob(c, p[9 + i]) << i;
870 for (i = (vp7 ? 7 : 9); i > 3; i--)
871 x += vp56_rac_get_prob(c, p[9 + i]) << i;
872 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
873 x += 8;
874 } else {
875 // small_mvtree
876 const uint8_t *ps = p + 2;
877 bit = vp56_rac_get_prob(c, *ps);
878 ps += 1 + 3 * bit;
879 x += 4 * bit;
880 bit = vp56_rac_get_prob(c, *ps);
881 ps += 1 + bit;
882 x += 2 * bit;
883 x += vp56_rac_get_prob(c, *ps);
884 }
885
886 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
887 }
888
vp7_read_mv_component(VP56RangeCoder * c,const uint8_t * p)889 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
890 {
891 return read_mv_component(c, p, 1);
892 }
893
vp8_read_mv_component(VP56RangeCoder * c,const uint8_t * p)894 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
895 {
896 return read_mv_component(c, p, 0);
897 }
898
899 static av_always_inline
get_submv_prob(uint32_t left,uint32_t top,int is_vp7)900 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
901 {
902 if (is_vp7)
903 return vp7_submv_prob;
904
905 if (left == top)
906 return vp8_submv_prob[4 - !!left];
907 if (!top)
908 return vp8_submv_prob[2];
909 return vp8_submv_prob[1 - !!left];
910 }
911
912 /**
913 * Split motion vector prediction, 16.4.
914 * @returns the number of motion vectors parsed (2, 4 or 16)
915 */
916 static av_always_inline
decode_splitmvs(VP8Context * s,VP56RangeCoder * c,VP8Macroblock * mb,int layout,int is_vp7)917 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
918 int layout, int is_vp7)
919 {
920 int part_idx;
921 int n, num;
922 VP8Macroblock *top_mb;
923 VP8Macroblock *left_mb = &mb[-1];
924 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
925 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
926 VP56mv *top_mv;
927 VP56mv *left_mv = left_mb->bmv;
928 VP56mv *cur_mv = mb->bmv;
929
930 if (!layout) // layout is inlined, s->mb_layout is not
931 top_mb = &mb[2];
932 else
933 top_mb = &mb[-s->mb_width - 1];
934 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
935 top_mv = top_mb->bmv;
936
937 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
938 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
939 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
940 else
941 part_idx = VP8_SPLITMVMODE_8x8;
942 } else {
943 part_idx = VP8_SPLITMVMODE_4x4;
944 }
945
946 num = vp8_mbsplit_count[part_idx];
947 mbsplits_cur = vp8_mbsplits[part_idx],
948 firstidx = vp8_mbfirstidx[part_idx];
949 mb->partitioning = part_idx;
950
951 for (n = 0; n < num; n++) {
952 int k = firstidx[n];
953 uint32_t left, above;
954 const uint8_t *submv_prob;
955
956 if (!(k & 3))
957 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
958 else
959 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
960 if (k <= 3)
961 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
962 else
963 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
964
965 submv_prob = get_submv_prob(left, above, is_vp7);
966
967 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
968 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
969 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
970 mb->bmv[n].y = mb->mv.y +
971 read_mv_component(c, s->prob->mvc[0], is_vp7);
972 mb->bmv[n].x = mb->mv.x +
973 read_mv_component(c, s->prob->mvc[1], is_vp7);
974 } else {
975 AV_ZERO32(&mb->bmv[n]);
976 }
977 } else {
978 AV_WN32A(&mb->bmv[n], above);
979 }
980 } else {
981 AV_WN32A(&mb->bmv[n], left);
982 }
983 }
984
985 return num;
986 }
987
988 /**
989 * The vp7 reference decoder uses a padding macroblock column (added to right
990 * edge of the frame) to guard against illegal macroblock offsets. The
991 * algorithm has bugs that permit offsets to straddle the padding column.
992 * This function replicates those bugs.
993 *
994 * @param[out] edge_x macroblock x address
995 * @param[out] edge_y macroblock y address
996 *
997 * @return macroblock offset legal (boolean)
998 */
vp7_calculate_mb_offset(int mb_x,int mb_y,int mb_width,int xoffset,int yoffset,int boundary,int * edge_x,int * edge_y)999 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
1000 int xoffset, int yoffset, int boundary,
1001 int *edge_x, int *edge_y)
1002 {
1003 int vwidth = mb_width + 1;
1004 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1005 if (new < boundary || new % vwidth == vwidth - 1)
1006 return 0;
1007 *edge_y = new / vwidth;
1008 *edge_x = new % vwidth;
1009 return 1;
1010 }
1011
get_bmv_ptr(const VP8Macroblock * mb,int subblock)1012 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1013 {
1014 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1015 }
1016
1017 static av_always_inline
vp7_decode_mvs(VP8Context * s,VP8Macroblock * mb,int mb_x,int mb_y,int layout)1018 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1019 int mb_x, int mb_y, int layout)
1020 {
1021 VP8Macroblock *mb_edge[12];
1022 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1023 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1024 int idx = CNT_ZERO;
1025 VP56mv near_mv[3];
1026 uint8_t cnt[3] = { 0 };
1027 VP56RangeCoder *c = &s->c;
1028 int i;
1029
1030 AV_ZERO32(&near_mv[0]);
1031 AV_ZERO32(&near_mv[1]);
1032 AV_ZERO32(&near_mv[2]);
1033
1034 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1035 const VP7MVPred * pred = &vp7_mv_pred[i];
1036 int edge_x, edge_y;
1037
1038 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1039 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1040 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1041 ? s->macroblocks_base + 1 + edge_x +
1042 (s->mb_width + 1) * (edge_y + 1)
1043 : s->macroblocks + edge_x +
1044 (s->mb_height - edge_y - 1) * 2;
1045 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1046 if (mv) {
1047 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1048 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1049 idx = CNT_NEAREST;
1050 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1051 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1052 continue;
1053 idx = CNT_NEAR;
1054 } else {
1055 AV_WN32A(&near_mv[CNT_NEAR], mv);
1056 idx = CNT_NEAR;
1057 }
1058 } else {
1059 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1060 idx = CNT_NEAREST;
1061 }
1062 } else {
1063 idx = CNT_ZERO;
1064 }
1065 } else {
1066 idx = CNT_ZERO;
1067 }
1068 cnt[idx] += vp7_mv_pred[i].score;
1069 }
1070
1071 mb->partitioning = VP8_SPLITMVMODE_NONE;
1072
1073 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1074 mb->mode = VP8_MVMODE_MV;
1075
1076 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1077
1078 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1079
1080 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1081 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1082 else
1083 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1084
1085 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1086 mb->mode = VP8_MVMODE_SPLIT;
1087 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1088 } else {
1089 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1090 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1091 mb->bmv[0] = mb->mv;
1092 }
1093 } else {
1094 mb->mv = near_mv[CNT_NEAR];
1095 mb->bmv[0] = mb->mv;
1096 }
1097 } else {
1098 mb->mv = near_mv[CNT_NEAREST];
1099 mb->bmv[0] = mb->mv;
1100 }
1101 } else {
1102 mb->mode = VP8_MVMODE_ZERO;
1103 AV_ZERO32(&mb->mv);
1104 mb->bmv[0] = mb->mv;
1105 }
1106 }
1107
1108 static av_always_inline
vp8_decode_mvs(VP8Context * s,VP8mvbounds * mv_bounds,VP8Macroblock * mb,int mb_x,int mb_y,int layout)1109 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1110 int mb_x, int mb_y, int layout)
1111 {
1112 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1113 mb - 1 /* left */,
1114 0 /* top-left */ };
1115 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1116 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1117 int idx = CNT_ZERO;
1118 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1119 int8_t *sign_bias = s->sign_bias;
1120 VP56mv near_mv[4];
1121 uint8_t cnt[4] = { 0 };
1122 VP56RangeCoder *c = &s->c;
1123
1124 if (!layout) { // layout is inlined (s->mb_layout is not)
1125 mb_edge[0] = mb + 2;
1126 mb_edge[2] = mb + 1;
1127 } else {
1128 mb_edge[0] = mb - s->mb_width - 1;
1129 mb_edge[2] = mb - s->mb_width - 2;
1130 }
1131
1132 AV_ZERO32(&near_mv[0]);
1133 AV_ZERO32(&near_mv[1]);
1134 AV_ZERO32(&near_mv[2]);
1135
1136 /* Process MB on top, left and top-left */
1137 #define MV_EDGE_CHECK(n) \
1138 { \
1139 VP8Macroblock *edge = mb_edge[n]; \
1140 int edge_ref = edge->ref_frame; \
1141 if (edge_ref != VP56_FRAME_CURRENT) { \
1142 uint32_t mv = AV_RN32A(&edge->mv); \
1143 if (mv) { \
1144 if (cur_sign_bias != sign_bias[edge_ref]) { \
1145 /* SWAR negate of the values in mv. */ \
1146 mv = ~mv; \
1147 mv = ((mv & 0x7fff7fff) + \
1148 0x00010001) ^ (mv & 0x80008000); \
1149 } \
1150 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1151 AV_WN32A(&near_mv[++idx], mv); \
1152 cnt[idx] += 1 + (n != 2); \
1153 } else \
1154 cnt[CNT_ZERO] += 1 + (n != 2); \
1155 } \
1156 }
1157
1158 MV_EDGE_CHECK(0)
1159 MV_EDGE_CHECK(1)
1160 MV_EDGE_CHECK(2)
1161
1162 mb->partitioning = VP8_SPLITMVMODE_NONE;
1163 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1164 mb->mode = VP8_MVMODE_MV;
1165
1166 /* If we have three distinct MVs, merge first and last if they're the same */
1167 if (cnt[CNT_SPLITMV] &&
1168 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1169 cnt[CNT_NEAREST] += 1;
1170
1171 /* Swap near and nearest if necessary */
1172 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1173 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1174 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1175 }
1176
1177 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1178 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1179 /* Choose the best mv out of 0,0 and the nearest mv */
1180 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1181 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1182 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1183 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1184
1185 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1186 mb->mode = VP8_MVMODE_SPLIT;
1187 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1188 } else {
1189 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1190 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1191 mb->bmv[0] = mb->mv;
1192 }
1193 } else {
1194 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1195 mb->bmv[0] = mb->mv;
1196 }
1197 } else {
1198 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1199 mb->bmv[0] = mb->mv;
1200 }
1201 } else {
1202 mb->mode = VP8_MVMODE_ZERO;
1203 AV_ZERO32(&mb->mv);
1204 mb->bmv[0] = mb->mv;
1205 }
1206 }
1207
1208 static av_always_inline
decode_intra4x4_modes(VP8Context * s,VP56RangeCoder * c,VP8Macroblock * mb,int mb_x,int keyframe,int layout)1209 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1210 int mb_x, int keyframe, int layout)
1211 {
1212 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1213
1214 if (layout) {
1215 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1216 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1217 }
1218 if (keyframe) {
1219 int x, y;
1220 uint8_t *top;
1221 uint8_t *const left = s->intra4x4_pred_mode_left;
1222 if (layout)
1223 top = mb->intra4x4_pred_mode_top;
1224 else
1225 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1226 for (y = 0; y < 4; y++) {
1227 for (x = 0; x < 4; x++) {
1228 const uint8_t *ctx;
1229 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1230 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1231 left[y] = top[x] = *intra4x4;
1232 intra4x4++;
1233 }
1234 }
1235 } else {
1236 int i;
1237 for (i = 0; i < 16; i++)
1238 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1239 vp8_pred4x4_prob_inter);
1240 }
1241 }
1242
1243 static av_always_inline
decode_mb_mode(VP8Context * s,VP8mvbounds * mv_bounds,VP8Macroblock * mb,int mb_x,int mb_y,uint8_t * segment,uint8_t * ref,int layout,int is_vp7)1244 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1245 VP8Macroblock *mb, int mb_x, int mb_y,
1246 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1247 {
1248 VP56RangeCoder *c = &s->c;
1249 static const char * const vp7_feature_name[] = { "q-index",
1250 "lf-delta",
1251 "partial-golden-update",
1252 "blit-pitch" };
1253 if (is_vp7) {
1254 int i;
1255 *segment = 0;
1256 for (i = 0; i < 4; i++) {
1257 if (s->feature_enabled[i]) {
1258 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1259 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1260 s->feature_index_prob[i]);
1261 av_log(s->avctx, AV_LOG_WARNING,
1262 "Feature %s present in macroblock (value 0x%x)\n",
1263 vp7_feature_name[i], s->feature_value[i][index]);
1264 }
1265 }
1266 }
1267 } else if (s->segmentation.update_map) {
1268 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1269 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1270 } else if (s->segmentation.enabled)
1271 *segment = ref ? *ref : *segment;
1272 mb->segment = *segment;
1273
1274 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1275
1276 if (s->keyframe) {
1277 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1278 vp8_pred16x16_prob_intra);
1279
1280 if (mb->mode == MODE_I4x4) {
1281 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1282 } else {
1283 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1284 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1285 if (s->mb_layout)
1286 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1287 else
1288 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1289 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1290 }
1291
1292 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1293 vp8_pred8x8c_prob_intra);
1294 mb->ref_frame = VP56_FRAME_CURRENT;
1295 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1296 // inter MB, 16.2
1297 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1298 mb->ref_frame =
1299 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1300 : VP56_FRAME_GOLDEN;
1301 else
1302 mb->ref_frame = VP56_FRAME_PREVIOUS;
1303 s->ref_count[mb->ref_frame - 1]++;
1304
1305 // motion vectors, 16.3
1306 if (is_vp7)
1307 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1308 else
1309 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1310 } else {
1311 // intra MB, 16.1
1312 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1313
1314 if (mb->mode == MODE_I4x4)
1315 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1316
1317 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1318 s->prob->pred8x8c);
1319 mb->ref_frame = VP56_FRAME_CURRENT;
1320 mb->partitioning = VP8_SPLITMVMODE_NONE;
1321 AV_ZERO32(&mb->bmv[0]);
1322 }
1323 }
1324
1325 /**
1326 * @param r arithmetic bitstream reader context
1327 * @param block destination for block coefficients
1328 * @param probs probabilities to use when reading trees from the bitstream
1329 * @param i initial coeff index, 0 unless a separate DC block is coded
1330 * @param qmul array holding the dc/ac dequant factor at position 0/1
1331 *
1332 * @return 0 if no coeffs were decoded
1333 * otherwise, the index of the last coeff decoded plus one
1334 */
1335 static av_always_inline
decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2],const uint8_t scan[16],int vp7)1336 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1337 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1338 int i, uint8_t *token_prob, int16_t qmul[2],
1339 const uint8_t scan[16], int vp7)
1340 {
1341 VP56RangeCoder c = *r;
1342 goto skip_eob;
1343 do {
1344 int coeff;
1345 restart:
1346 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1347 break;
1348
1349 skip_eob:
1350 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1351 if (++i == 16)
1352 break; // invalid input; blocks should end with EOB
1353 token_prob = probs[i][0];
1354 if (vp7)
1355 goto restart;
1356 goto skip_eob;
1357 }
1358
1359 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1360 coeff = 1;
1361 token_prob = probs[i + 1][1];
1362 } else {
1363 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1364 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1365 if (coeff)
1366 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1367 coeff += 2;
1368 } else {
1369 // DCT_CAT*
1370 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1371 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1372 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1373 } else { // DCT_CAT2
1374 coeff = 7;
1375 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1376 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1377 }
1378 } else { // DCT_CAT3 and up
1379 int a = vp56_rac_get_prob(&c, token_prob[8]);
1380 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1381 int cat = (a << 1) + b;
1382 coeff = 3 + (8 << cat);
1383 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1384 }
1385 }
1386 token_prob = probs[i + 1][2];
1387 }
1388 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1389 } while (++i < 16);
1390
1391 *r = c;
1392 return i;
1393 }
1394
1395 static av_always_inline
inter_predict_dc(int16_t block[16],int16_t pred[2])1396 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1397 {
1398 int16_t dc = block[0];
1399 int ret = 0;
1400
1401 if (pred[1] > 3) {
1402 dc += pred[0];
1403 ret = 1;
1404 }
1405
1406 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1407 block[0] = pred[0] = dc;
1408 pred[1] = 0;
1409 } else {
1410 if (pred[0] == dc)
1411 pred[1]++;
1412 block[0] = pred[0] = dc;
1413 }
1414
1415 return ret;
1416 }
1417
vp7_decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2],const uint8_t scan[16])1418 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1419 int16_t block[16],
1420 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1421 int i, uint8_t *token_prob,
1422 int16_t qmul[2],
1423 const uint8_t scan[16])
1424 {
1425 return decode_block_coeffs_internal(r, block, probs, i,
1426 token_prob, qmul, scan, IS_VP7);
1427 }
1428
1429 #ifndef vp8_decode_block_coeffs_internal
vp8_decode_block_coeffs_internal(VP56RangeCoder * r,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,uint8_t * token_prob,int16_t qmul[2])1430 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1431 int16_t block[16],
1432 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1433 int i, uint8_t *token_prob,
1434 int16_t qmul[2])
1435 {
1436 return decode_block_coeffs_internal(r, block, probs, i,
1437 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1438 }
1439 #endif
1440
1441 /**
1442 * @param c arithmetic bitstream reader context
1443 * @param block destination for block coefficients
1444 * @param probs probabilities to use when reading trees from the bitstream
1445 * @param i initial coeff index, 0 unless a separate DC block is coded
1446 * @param zero_nhood the initial prediction context for number of surrounding
1447 * all-zero blocks (only left/top, so 0-2)
1448 * @param qmul array holding the dc/ac dequant factor at position 0/1
1449 * @param scan scan pattern (VP7 only)
1450 *
1451 * @return 0 if no coeffs were decoded
1452 * otherwise, the index of the last coeff decoded plus one
1453 */
1454 static av_always_inline
decode_block_coeffs(VP56RangeCoder * c,int16_t block[16],uint8_t probs[16][3][NUM_DCT_TOKENS-1],int i,int zero_nhood,int16_t qmul[2],const uint8_t scan[16],int vp7)1455 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1456 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1457 int i, int zero_nhood, int16_t qmul[2],
1458 const uint8_t scan[16], int vp7)
1459 {
1460 uint8_t *token_prob = probs[i][zero_nhood];
1461 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1462 return 0;
1463 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1464 token_prob, qmul, scan)
1465 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1466 token_prob, qmul);
1467 }
1468
1469 static av_always_inline
decode_mb_coeffs(VP8Context * s,VP8ThreadData * td,VP56RangeCoder * c,VP8Macroblock * mb,uint8_t t_nnz[9],uint8_t l_nnz[9],int is_vp7)1470 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1471 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1472 int is_vp7)
1473 {
1474 int i, x, y, luma_start = 0, luma_ctx = 3;
1475 int nnz_pred, nnz, nnz_total = 0;
1476 int segment = mb->segment;
1477 int block_dc = 0;
1478
1479 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1480 nnz_pred = t_nnz[8] + l_nnz[8];
1481
1482 // decode DC values and do hadamard
1483 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1484 nnz_pred, s->qmat[segment].luma_dc_qmul,
1485 ff_zigzag_scan, is_vp7);
1486 l_nnz[8] = t_nnz[8] = !!nnz;
1487
1488 if (is_vp7 && mb->mode > MODE_I4x4) {
1489 nnz |= inter_predict_dc(td->block_dc,
1490 s->inter_dc_pred[mb->ref_frame - 1]);
1491 }
1492
1493 if (nnz) {
1494 nnz_total += nnz;
1495 block_dc = 1;
1496 if (nnz == 1)
1497 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1498 else
1499 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1500 }
1501 luma_start = 1;
1502 luma_ctx = 0;
1503 }
1504
1505 // luma blocks
1506 for (y = 0; y < 4; y++)
1507 for (x = 0; x < 4; x++) {
1508 nnz_pred = l_nnz[y] + t_nnz[x];
1509 nnz = decode_block_coeffs(c, td->block[y][x],
1510 s->prob->token[luma_ctx],
1511 luma_start, nnz_pred,
1512 s->qmat[segment].luma_qmul,
1513 s->prob[0].scan, is_vp7);
1514 /* nnz+block_dc may be one more than the actual last index,
1515 * but we don't care */
1516 td->non_zero_count_cache[y][x] = nnz + block_dc;
1517 t_nnz[x] = l_nnz[y] = !!nnz;
1518 nnz_total += nnz;
1519 }
1520
1521 // chroma blocks
1522 // TODO: what to do about dimensions? 2nd dim for luma is x,
1523 // but for chroma it's (y<<1)|x
1524 for (i = 4; i < 6; i++)
1525 for (y = 0; y < 2; y++)
1526 for (x = 0; x < 2; x++) {
1527 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1528 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1529 s->prob->token[2], 0, nnz_pred,
1530 s->qmat[segment].chroma_qmul,
1531 s->prob[0].scan, is_vp7);
1532 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1533 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1534 nnz_total += nnz;
1535 }
1536
1537 // if there were no coded coeffs despite the macroblock not being marked skip,
1538 // we MUST not do the inner loop filter and should not do IDCT
1539 // Since skip isn't used for bitstream prediction, just manually set it.
1540 if (!nnz_total)
1541 mb->skip = 1;
1542 }
1543
1544 static av_always_inline
backup_mb_border(uint8_t * top_border,uint8_t * src_y,uint8_t * src_cb,uint8_t * src_cr,ptrdiff_t linesize,ptrdiff_t uvlinesize,int simple)1545 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1546 uint8_t *src_cb, uint8_t *src_cr,
1547 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1548 {
1549 AV_COPY128(top_border, src_y + 15 * linesize);
1550 if (!simple) {
1551 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1552 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1553 }
1554 }
1555
1556 static av_always_inline
xchg_mb_border(uint8_t * top_border,uint8_t * src_y,uint8_t * src_cb,uint8_t * src_cr,ptrdiff_t linesize,ptrdiff_t uvlinesize,int mb_x,int mb_y,int mb_width,int simple,int xchg)1557 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1558 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1559 int mb_y, int mb_width, int simple, int xchg)
1560 {
1561 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1562 src_y -= linesize;
1563 src_cb -= uvlinesize;
1564 src_cr -= uvlinesize;
1565
1566 #define XCHG(a, b, xchg) \
1567 do { \
1568 if (xchg) \
1569 AV_SWAP64(b, a); \
1570 else \
1571 AV_COPY64(b, a); \
1572 } while (0)
1573
1574 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1575 XCHG(top_border, src_y, xchg);
1576 XCHG(top_border + 8, src_y + 8, 1);
1577 if (mb_x < mb_width - 1)
1578 XCHG(top_border + 32, src_y + 16, 1);
1579
1580 // only copy chroma for normal loop filter
1581 // or to initialize the top row to 127
1582 if (!simple || !mb_y) {
1583 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1584 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1585 XCHG(top_border + 16, src_cb, 1);
1586 XCHG(top_border + 24, src_cr, 1);
1587 }
1588 }
1589
1590 static av_always_inline
check_dc_pred8x8_mode(int mode,int mb_x,int mb_y)1591 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1592 {
1593 if (!mb_x)
1594 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1595 else
1596 return mb_y ? mode : LEFT_DC_PRED8x8;
1597 }
1598
1599 static av_always_inline
check_tm_pred8x8_mode(int mode,int mb_x,int mb_y,int vp7)1600 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1601 {
1602 if (!mb_x)
1603 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1604 else
1605 return mb_y ? mode : HOR_PRED8x8;
1606 }
1607
1608 static av_always_inline
check_intra_pred8x8_mode_emuedge(int mode,int mb_x,int mb_y,int vp7)1609 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1610 {
1611 switch (mode) {
1612 case DC_PRED8x8:
1613 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1614 case VERT_PRED8x8:
1615 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1616 case HOR_PRED8x8:
1617 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1618 case PLANE_PRED8x8: /* TM */
1619 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1620 }
1621 return mode;
1622 }
1623
1624 static av_always_inline
check_tm_pred4x4_mode(int mode,int mb_x,int mb_y,int vp7)1625 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1626 {
1627 if (!mb_x) {
1628 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1629 } else {
1630 return mb_y ? mode : HOR_VP8_PRED;
1631 }
1632 }
1633
1634 static av_always_inline
check_intra_pred4x4_mode_emuedge(int mode,int mb_x,int mb_y,int * copy_buf,int vp7)1635 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1636 int *copy_buf, int vp7)
1637 {
1638 switch (mode) {
1639 case VERT_PRED:
1640 if (!mb_x && mb_y) {
1641 *copy_buf = 1;
1642 return mode;
1643 }
1644 /* fall-through */
1645 case DIAG_DOWN_LEFT_PRED:
1646 case VERT_LEFT_PRED:
1647 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1648 case HOR_PRED:
1649 if (!mb_y) {
1650 *copy_buf = 1;
1651 return mode;
1652 }
1653 /* fall-through */
1654 case HOR_UP_PRED:
1655 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1656 case TM_VP8_PRED:
1657 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1658 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1659 * as 16x16/8x8 DC */
1660 case DIAG_DOWN_RIGHT_PRED:
1661 case VERT_RIGHT_PRED:
1662 case HOR_DOWN_PRED:
1663 if (!mb_y || !mb_x)
1664 *copy_buf = 1;
1665 return mode;
1666 }
1667 return mode;
1668 }
1669
1670 static av_always_inline
intra_predict(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb,int mb_x,int mb_y,int is_vp7)1671 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1672 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1673 {
1674 int x, y, mode, nnz;
1675 uint32_t tr;
1676
1677 /* for the first row, we need to run xchg_mb_border to init the top edge
1678 * to 127 otherwise, skip it if we aren't going to deblock */
1679 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1680 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1681 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1682 s->filter.simple, 1);
1683
1684 if (mb->mode < MODE_I4x4) {
1685 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1686 s->hpc.pred16x16[mode](dst[0], s->linesize);
1687 } else {
1688 uint8_t *ptr = dst[0];
1689 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1690 const uint8_t lo = is_vp7 ? 128 : 127;
1691 const uint8_t hi = is_vp7 ? 128 : 129;
1692 uint8_t tr_top[4] = { lo, lo, lo, lo };
1693
1694 // all blocks on the right edge of the macroblock use bottom edge
1695 // the top macroblock for their topright edge
1696 uint8_t *tr_right = ptr - s->linesize + 16;
1697
1698 // if we're on the right edge of the frame, said edge is extended
1699 // from the top macroblock
1700 if (mb_y && mb_x == s->mb_width - 1) {
1701 tr = tr_right[-1] * 0x01010101u;
1702 tr_right = (uint8_t *) &tr;
1703 }
1704
1705 if (mb->skip)
1706 AV_ZERO128(td->non_zero_count_cache);
1707
1708 for (y = 0; y < 4; y++) {
1709 uint8_t *topright = ptr + 4 - s->linesize;
1710 for (x = 0; x < 4; x++) {
1711 int copy = 0;
1712 ptrdiff_t linesize = s->linesize;
1713 uint8_t *dst = ptr + 4 * x;
1714 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1715
1716 if ((y == 0 || x == 3) && mb_y == 0) {
1717 topright = tr_top;
1718 } else if (x == 3)
1719 topright = tr_right;
1720
1721 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1722 mb_y + y, ©, is_vp7);
1723 if (copy) {
1724 dst = copy_dst + 12;
1725 linesize = 8;
1726 if (!(mb_y + y)) {
1727 copy_dst[3] = lo;
1728 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1729 } else {
1730 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1731 if (!(mb_x + x)) {
1732 copy_dst[3] = hi;
1733 } else {
1734 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1735 }
1736 }
1737 if (!(mb_x + x)) {
1738 copy_dst[11] =
1739 copy_dst[19] =
1740 copy_dst[27] =
1741 copy_dst[35] = hi;
1742 } else {
1743 copy_dst[11] = ptr[4 * x - 1];
1744 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1745 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1746 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1747 }
1748 }
1749 s->hpc.pred4x4[mode](dst, topright, linesize);
1750 if (copy) {
1751 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1752 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1753 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1754 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1755 }
1756
1757 nnz = td->non_zero_count_cache[y][x];
1758 if (nnz) {
1759 if (nnz == 1)
1760 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1761 td->block[y][x], s->linesize);
1762 else
1763 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1764 td->block[y][x], s->linesize);
1765 }
1766 topright += 4;
1767 }
1768
1769 ptr += 4 * s->linesize;
1770 intra4x4 += 4;
1771 }
1772 }
1773
1774 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1775 mb_x, mb_y, is_vp7);
1776 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1777 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1778
1779 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1780 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1781 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1782 s->filter.simple, 0);
1783 }
1784
1785 static const uint8_t subpel_idx[3][8] = {
1786 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1787 // also function pointer index
1788 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1789 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1790 };
1791
1792 /**
1793 * luma MC function
1794 *
1795 * @param s VP8 decoding context
1796 * @param dst target buffer for block data at block position
1797 * @param ref reference picture buffer at origin (0, 0)
1798 * @param mv motion vector (relative to block position) to get pixel data from
1799 * @param x_off horizontal position of block from origin (0, 0)
1800 * @param y_off vertical position of block from origin (0, 0)
1801 * @param block_w width of block (16, 8 or 4)
1802 * @param block_h height of block (always same as block_w)
1803 * @param width width of src/dst plane data
1804 * @param height height of src/dst plane data
1805 * @param linesize size of a single line of plane data, including padding
1806 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1807 */
1808 static av_always_inline
vp8_mc_luma(VP8Context * s,VP8ThreadData * td,uint8_t * dst,ThreadFrame * ref,const VP56mv * mv,int x_off,int y_off,int block_w,int block_h,int width,int height,ptrdiff_t linesize,vp8_mc_func mc_func[3][3])1809 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1810 ThreadFrame *ref, const VP56mv *mv,
1811 int x_off, int y_off, int block_w, int block_h,
1812 int width, int height, ptrdiff_t linesize,
1813 vp8_mc_func mc_func[3][3])
1814 {
1815 uint8_t *src = ref->f->data[0];
1816
1817 if (AV_RN32A(mv)) {
1818 ptrdiff_t src_linesize = linesize;
1819
1820 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1821 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1822
1823 x_off += mv->x >> 2;
1824 y_off += mv->y >> 2;
1825
1826 // edge emulation
1827 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1828 src += y_off * linesize + x_off;
1829 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1830 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1831 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1832 src - my_idx * linesize - mx_idx,
1833 EDGE_EMU_LINESIZE, linesize,
1834 block_w + subpel_idx[1][mx],
1835 block_h + subpel_idx[1][my],
1836 x_off - mx_idx, y_off - my_idx,
1837 width, height);
1838 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1839 src_linesize = EDGE_EMU_LINESIZE;
1840 }
1841 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1842 } else {
1843 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1844 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1845 linesize, block_h, 0, 0);
1846 }
1847 }
1848
1849 /**
1850 * chroma MC function
1851 *
1852 * @param s VP8 decoding context
1853 * @param dst1 target buffer for block data at block position (U plane)
1854 * @param dst2 target buffer for block data at block position (V plane)
1855 * @param ref reference picture buffer at origin (0, 0)
1856 * @param mv motion vector (relative to block position) to get pixel data from
1857 * @param x_off horizontal position of block from origin (0, 0)
1858 * @param y_off vertical position of block from origin (0, 0)
1859 * @param block_w width of block (16, 8 or 4)
1860 * @param block_h height of block (always same as block_w)
1861 * @param width width of src/dst plane data
1862 * @param height height of src/dst plane data
1863 * @param linesize size of a single line of plane data, including padding
1864 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1865 */
1866 static av_always_inline
vp8_mc_chroma(VP8Context * s,VP8ThreadData * td,uint8_t * dst1,uint8_t * dst2,ThreadFrame * ref,const VP56mv * mv,int x_off,int y_off,int block_w,int block_h,int width,int height,ptrdiff_t linesize,vp8_mc_func mc_func[3][3])1867 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1868 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1869 int x_off, int y_off, int block_w, int block_h,
1870 int width, int height, ptrdiff_t linesize,
1871 vp8_mc_func mc_func[3][3])
1872 {
1873 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1874
1875 if (AV_RN32A(mv)) {
1876 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1877 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1878
1879 x_off += mv->x >> 3;
1880 y_off += mv->y >> 3;
1881
1882 // edge emulation
1883 src1 += y_off * linesize + x_off;
1884 src2 += y_off * linesize + x_off;
1885 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1886 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1887 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1888 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1889 src1 - my_idx * linesize - mx_idx,
1890 EDGE_EMU_LINESIZE, linesize,
1891 block_w + subpel_idx[1][mx],
1892 block_h + subpel_idx[1][my],
1893 x_off - mx_idx, y_off - my_idx, width, height);
1894 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1895 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1896
1897 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1898 src2 - my_idx * linesize - mx_idx,
1899 EDGE_EMU_LINESIZE, linesize,
1900 block_w + subpel_idx[1][mx],
1901 block_h + subpel_idx[1][my],
1902 x_off - mx_idx, y_off - my_idx, width, height);
1903 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1904 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1905 } else {
1906 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1907 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1908 }
1909 } else {
1910 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1911 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1912 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1913 }
1914 }
1915
1916 static av_always_inline
vp8_mc_part(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],ThreadFrame * ref_frame,int x_off,int y_off,int bx_off,int by_off,int block_w,int block_h,int width,int height,VP56mv * mv)1917 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1918 ThreadFrame *ref_frame, int x_off, int y_off,
1919 int bx_off, int by_off, int block_w, int block_h,
1920 int width, int height, VP56mv *mv)
1921 {
1922 VP56mv uvmv = *mv;
1923
1924 /* Y */
1925 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1926 ref_frame, mv, x_off + bx_off, y_off + by_off,
1927 block_w, block_h, width, height, s->linesize,
1928 s->put_pixels_tab[block_w == 8]);
1929
1930 /* U/V */
1931 if (s->profile == 3) {
1932 /* this block only applies VP8; it is safe to check
1933 * only the profile, as VP7 profile <= 1 */
1934 uvmv.x &= ~7;
1935 uvmv.y &= ~7;
1936 }
1937 x_off >>= 1;
1938 y_off >>= 1;
1939 bx_off >>= 1;
1940 by_off >>= 1;
1941 width >>= 1;
1942 height >>= 1;
1943 block_w >>= 1;
1944 block_h >>= 1;
1945 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1946 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1947 &uvmv, x_off + bx_off, y_off + by_off,
1948 block_w, block_h, width, height, s->uvlinesize,
1949 s->put_pixels_tab[1 + (block_w == 4)]);
1950 }
1951
1952 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1953 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1954 static av_always_inline
prefetch_motion(VP8Context * s,VP8Macroblock * mb,int mb_x,int mb_y,int mb_xy,int ref)1955 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1956 int mb_xy, int ref)
1957 {
1958 /* Don't prefetch refs that haven't been used very often this frame. */
1959 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1960 int x_off = mb_x << 4, y_off = mb_y << 4;
1961 int mx = (mb->mv.x >> 2) + x_off + 8;
1962 int my = (mb->mv.y >> 2) + y_off;
1963 uint8_t **src = s->framep[ref]->tf.f->data;
1964 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1965 /* For threading, a ff_thread_await_progress here might be useful, but
1966 * it actually slows down the decoder. Since a bad prefetch doesn't
1967 * generate bad decoder output, we don't run it here. */
1968 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1969 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1970 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1971 }
1972 }
1973
1974 /**
1975 * Apply motion vectors to prediction buffer, chapter 18.
1976 */
1977 static av_always_inline
inter_predict(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb,int mb_x,int mb_y)1978 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1979 VP8Macroblock *mb, int mb_x, int mb_y)
1980 {
1981 int x_off = mb_x << 4, y_off = mb_y << 4;
1982 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1983 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1984 VP56mv *bmv = mb->bmv;
1985
1986 switch (mb->partitioning) {
1987 case VP8_SPLITMVMODE_NONE:
1988 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1989 0, 0, 16, 16, width, height, &mb->mv);
1990 break;
1991 case VP8_SPLITMVMODE_4x4: {
1992 int x, y;
1993 VP56mv uvmv;
1994
1995 /* Y */
1996 for (y = 0; y < 4; y++) {
1997 for (x = 0; x < 4; x++) {
1998 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1999 ref, &bmv[4 * y + x],
2000 4 * x + x_off, 4 * y + y_off, 4, 4,
2001 width, height, s->linesize,
2002 s->put_pixels_tab[2]);
2003 }
2004 }
2005
2006 /* U/V */
2007 x_off >>= 1;
2008 y_off >>= 1;
2009 width >>= 1;
2010 height >>= 1;
2011 for (y = 0; y < 2; y++) {
2012 for (x = 0; x < 2; x++) {
2013 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2014 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2015 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2016 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2017 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2018 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2019 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2020 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2021 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2022 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2023 if (s->profile == 3) {
2024 uvmv.x &= ~7;
2025 uvmv.y &= ~7;
2026 }
2027 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2028 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2029 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2030 width, height, s->uvlinesize,
2031 s->put_pixels_tab[2]);
2032 }
2033 }
2034 break;
2035 }
2036 case VP8_SPLITMVMODE_16x8:
2037 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2038 0, 0, 16, 8, width, height, &bmv[0]);
2039 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2040 0, 8, 16, 8, width, height, &bmv[1]);
2041 break;
2042 case VP8_SPLITMVMODE_8x16:
2043 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2044 0, 0, 8, 16, width, height, &bmv[0]);
2045 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2046 8, 0, 8, 16, width, height, &bmv[1]);
2047 break;
2048 case VP8_SPLITMVMODE_8x8:
2049 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2050 0, 0, 8, 8, width, height, &bmv[0]);
2051 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2052 8, 0, 8, 8, width, height, &bmv[1]);
2053 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2054 0, 8, 8, 8, width, height, &bmv[2]);
2055 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2056 8, 8, 8, 8, width, height, &bmv[3]);
2057 break;
2058 }
2059 }
2060
2061 static av_always_inline
idct_mb(VP8Context * s,VP8ThreadData * td,uint8_t * dst[3],VP8Macroblock * mb)2062 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2063 {
2064 int x, y, ch;
2065
2066 if (mb->mode != MODE_I4x4) {
2067 uint8_t *y_dst = dst[0];
2068 for (y = 0; y < 4; y++) {
2069 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2070 if (nnz4) {
2071 if (nnz4 & ~0x01010101) {
2072 for (x = 0; x < 4; x++) {
2073 if ((uint8_t) nnz4 == 1)
2074 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2075 td->block[y][x],
2076 s->linesize);
2077 else if ((uint8_t) nnz4 > 1)
2078 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2079 td->block[y][x],
2080 s->linesize);
2081 nnz4 >>= 8;
2082 if (!nnz4)
2083 break;
2084 }
2085 } else {
2086 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2087 }
2088 }
2089 y_dst += 4 * s->linesize;
2090 }
2091 }
2092
2093 for (ch = 0; ch < 2; ch++) {
2094 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2095 if (nnz4) {
2096 uint8_t *ch_dst = dst[1 + ch];
2097 if (nnz4 & ~0x01010101) {
2098 for (y = 0; y < 2; y++) {
2099 for (x = 0; x < 2; x++) {
2100 if ((uint8_t) nnz4 == 1)
2101 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2102 td->block[4 + ch][(y << 1) + x],
2103 s->uvlinesize);
2104 else if ((uint8_t) nnz4 > 1)
2105 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2106 td->block[4 + ch][(y << 1) + x],
2107 s->uvlinesize);
2108 nnz4 >>= 8;
2109 if (!nnz4)
2110 goto chroma_idct_end;
2111 }
2112 ch_dst += 4 * s->uvlinesize;
2113 }
2114 } else {
2115 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2116 }
2117 }
2118 chroma_idct_end:
2119 ;
2120 }
2121 }
2122
2123 static av_always_inline
filter_level_for_mb(VP8Context * s,VP8Macroblock * mb,VP8FilterStrength * f,int is_vp7)2124 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2125 VP8FilterStrength *f, int is_vp7)
2126 {
2127 int interior_limit, filter_level;
2128
2129 if (s->segmentation.enabled) {
2130 filter_level = s->segmentation.filter_level[mb->segment];
2131 if (!s->segmentation.absolute_vals)
2132 filter_level += s->filter.level;
2133 } else
2134 filter_level = s->filter.level;
2135
2136 if (s->lf_delta.enabled) {
2137 filter_level += s->lf_delta.ref[mb->ref_frame];
2138 filter_level += s->lf_delta.mode[mb->mode];
2139 }
2140
2141 filter_level = av_clip_uintp2(filter_level, 6);
2142
2143 interior_limit = filter_level;
2144 if (s->filter.sharpness) {
2145 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2146 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2147 }
2148 interior_limit = FFMAX(interior_limit, 1);
2149
2150 f->filter_level = filter_level;
2151 f->inner_limit = interior_limit;
2152 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2153 mb->mode == VP8_MVMODE_SPLIT;
2154 }
2155
2156 static av_always_inline
filter_mb(VP8Context * s,uint8_t * dst[3],VP8FilterStrength * f,int mb_x,int mb_y,int is_vp7)2157 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2158 int mb_x, int mb_y, int is_vp7)
2159 {
2160 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2161 int filter_level = f->filter_level;
2162 int inner_limit = f->inner_limit;
2163 int inner_filter = f->inner_filter;
2164 ptrdiff_t linesize = s->linesize;
2165 ptrdiff_t uvlinesize = s->uvlinesize;
2166 static const uint8_t hev_thresh_lut[2][64] = {
2167 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2169 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2170 3, 3, 3, 3 },
2171 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2172 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2173 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2174 2, 2, 2, 2 }
2175 };
2176
2177 if (!filter_level)
2178 return;
2179
2180 if (is_vp7) {
2181 bedge_lim_y = filter_level;
2182 bedge_lim_uv = filter_level * 2;
2183 mbedge_lim = filter_level + 2;
2184 } else {
2185 bedge_lim_y =
2186 bedge_lim_uv = filter_level * 2 + inner_limit;
2187 mbedge_lim = bedge_lim_y + 4;
2188 }
2189
2190 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2191
2192 if (mb_x) {
2193 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2194 mbedge_lim, inner_limit, hev_thresh);
2195 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2196 mbedge_lim, inner_limit, hev_thresh);
2197 }
2198
2199 #define H_LOOP_FILTER_16Y_INNER(cond) \
2200 if (cond && inner_filter) { \
2201 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2202 bedge_lim_y, inner_limit, \
2203 hev_thresh); \
2204 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2205 bedge_lim_y, inner_limit, \
2206 hev_thresh); \
2207 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2208 bedge_lim_y, inner_limit, \
2209 hev_thresh); \
2210 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2211 uvlinesize, bedge_lim_uv, \
2212 inner_limit, hev_thresh); \
2213 }
2214
2215 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2216
2217 if (mb_y) {
2218 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2219 mbedge_lim, inner_limit, hev_thresh);
2220 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2221 mbedge_lim, inner_limit, hev_thresh);
2222 }
2223
2224 if (inner_filter) {
2225 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2226 linesize, bedge_lim_y,
2227 inner_limit, hev_thresh);
2228 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2229 linesize, bedge_lim_y,
2230 inner_limit, hev_thresh);
2231 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2232 linesize, bedge_lim_y,
2233 inner_limit, hev_thresh);
2234 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2235 dst[2] + 4 * uvlinesize,
2236 uvlinesize, bedge_lim_uv,
2237 inner_limit, hev_thresh);
2238 }
2239
2240 H_LOOP_FILTER_16Y_INNER(is_vp7)
2241 }
2242
2243 static av_always_inline
filter_mb_simple(VP8Context * s,uint8_t * dst,VP8FilterStrength * f,int mb_x,int mb_y)2244 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2245 int mb_x, int mb_y)
2246 {
2247 int mbedge_lim, bedge_lim;
2248 int filter_level = f->filter_level;
2249 int inner_limit = f->inner_limit;
2250 int inner_filter = f->inner_filter;
2251 ptrdiff_t linesize = s->linesize;
2252
2253 if (!filter_level)
2254 return;
2255
2256 bedge_lim = 2 * filter_level + inner_limit;
2257 mbedge_lim = bedge_lim + 4;
2258
2259 if (mb_x)
2260 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2261 if (inner_filter) {
2262 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2263 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2264 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2265 }
2266
2267 if (mb_y)
2268 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2269 if (inner_filter) {
2270 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2271 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2272 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2273 }
2274 }
2275
2276 #define MARGIN (16 << 2)
2277 static av_always_inline
vp78_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * curframe,VP8Frame * prev_frame,int is_vp7)2278 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2279 VP8Frame *prev_frame, int is_vp7)
2280 {
2281 VP8Context *s = avctx->priv_data;
2282 int mb_x, mb_y;
2283
2284 s->mv_bounds.mv_min.y = -MARGIN;
2285 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2286 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2287 VP8Macroblock *mb = s->macroblocks_base +
2288 ((s->mb_width + 1) * (mb_y + 1) + 1);
2289 int mb_xy = mb_y * s->mb_width;
2290
2291 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2292
2293 s->mv_bounds.mv_min.x = -MARGIN;
2294 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2295
2296 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2297 if (vpX_rac_is_end(&s->c)) {
2298 return AVERROR_INVALIDDATA;
2299 }
2300 if (mb_y == 0)
2301 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2302 DC_PRED * 0x01010101);
2303 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2304 prev_frame && prev_frame->seg_map ?
2305 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2306 s->mv_bounds.mv_min.x -= 64;
2307 s->mv_bounds.mv_max.x -= 64;
2308 }
2309 s->mv_bounds.mv_min.y -= 64;
2310 s->mv_bounds.mv_max.y -= 64;
2311 }
2312 return 0;
2313 }
2314
vp7_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * cur_frame,VP8Frame * prev_frame)2315 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2316 VP8Frame *prev_frame)
2317 {
2318 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2319 }
2320
vp8_decode_mv_mb_modes(AVCodecContext * avctx,VP8Frame * cur_frame,VP8Frame * prev_frame)2321 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2322 VP8Frame *prev_frame)
2323 {
2324 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2325 }
2326
2327 #if HAVE_THREADS
2328 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2329 do { \
2330 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2331 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2332 pthread_mutex_lock(&otd->lock); \
2333 atomic_store(&td->wait_mb_pos, tmp); \
2334 do { \
2335 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2336 break; \
2337 pthread_cond_wait(&otd->cond, &otd->lock); \
2338 } while (1); \
2339 atomic_store(&td->wait_mb_pos, INT_MAX); \
2340 pthread_mutex_unlock(&otd->lock); \
2341 } \
2342 } while (0)
2343
2344 #define update_pos(td, mb_y, mb_x) \
2345 do { \
2346 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2347 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2348 (num_jobs > 1); \
2349 int is_null = !next_td || !prev_td; \
2350 int pos_check = (is_null) ? 1 : \
2351 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2352 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2353 atomic_store(&td->thread_mb_pos, pos); \
2354 if (sliced_threading && pos_check) { \
2355 pthread_mutex_lock(&td->lock); \
2356 pthread_cond_broadcast(&td->cond); \
2357 pthread_mutex_unlock(&td->lock); \
2358 } \
2359 } while (0)
2360 #else
2361 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2362 #define update_pos(td, mb_y, mb_x) while(0)
2363 #endif
2364
decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2365 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2366 int jobnr, int threadnr, int is_vp7)
2367 {
2368 VP8Context *s = avctx->priv_data;
2369 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2370 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2371 int mb_x, mb_xy = mb_y * s->mb_width;
2372 int num_jobs = s->num_jobs;
2373 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2374 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2375 VP8Macroblock *mb;
2376 uint8_t *dst[3] = {
2377 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2378 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2379 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2380 };
2381
2382 if (vpX_rac_is_end(c))
2383 return AVERROR_INVALIDDATA;
2384
2385 if (mb_y == 0)
2386 prev_td = td;
2387 else
2388 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2389 if (mb_y == s->mb_height - 1)
2390 next_td = td;
2391 else
2392 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2393 if (s->mb_layout == 1)
2394 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2395 else {
2396 // Make sure the previous frame has read its segmentation map,
2397 // if we re-use the same map.
2398 if (prev_frame && s->segmentation.enabled &&
2399 !s->segmentation.update_map)
2400 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2401 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2402 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2403 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2404 }
2405
2406 if (!is_vp7 || mb_y == 0)
2407 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2408
2409 td->mv_bounds.mv_min.x = -MARGIN;
2410 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2411
2412 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2413 if (vpX_rac_is_end(c))
2414 return AVERROR_INVALIDDATA;
2415 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2416 if (prev_td != td) {
2417 if (threadnr != 0) {
2418 check_thread_pos(td, prev_td,
2419 mb_x + (is_vp7 ? 2 : 1),
2420 mb_y - (is_vp7 ? 2 : 1));
2421 } else {
2422 check_thread_pos(td, prev_td,
2423 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2424 mb_y - (is_vp7 ? 2 : 1));
2425 }
2426 }
2427
2428 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2429 s->linesize, 4);
2430 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2431 dst[2] - dst[1], 2);
2432
2433 if (!s->mb_layout)
2434 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2435 prev_frame && prev_frame->seg_map ?
2436 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2437
2438 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2439
2440 if (!mb->skip)
2441 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2442
2443 if (mb->mode <= MODE_I4x4)
2444 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2445 else
2446 inter_predict(s, td, dst, mb, mb_x, mb_y);
2447
2448 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2449
2450 if (!mb->skip) {
2451 idct_mb(s, td, dst, mb);
2452 } else {
2453 AV_ZERO64(td->left_nnz);
2454 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2455
2456 /* Reset DC block predictors if they would exist
2457 * if the mb had coefficients */
2458 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2459 td->left_nnz[8] = 0;
2460 s->top_nnz[mb_x][8] = 0;
2461 }
2462 }
2463
2464 if (s->deblock_filter)
2465 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2466
2467 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2468 if (s->filter.simple)
2469 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2470 NULL, NULL, s->linesize, 0, 1);
2471 else
2472 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2473 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2474 }
2475
2476 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2477
2478 dst[0] += 16;
2479 dst[1] += 8;
2480 dst[2] += 8;
2481 td->mv_bounds.mv_min.x -= 64;
2482 td->mv_bounds.mv_max.x -= 64;
2483
2484 if (mb_x == s->mb_width + 1) {
2485 update_pos(td, mb_y, s->mb_width + 3);
2486 } else {
2487 update_pos(td, mb_y, mb_x);
2488 }
2489 }
2490 return 0;
2491 }
2492
vp7_decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2493 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2494 int jobnr, int threadnr)
2495 {
2496 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2497 }
2498
vp8_decode_mb_row_no_filter(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2499 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2500 int jobnr, int threadnr)
2501 {
2502 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2503 }
2504
filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2505 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2506 int jobnr, int threadnr, int is_vp7)
2507 {
2508 VP8Context *s = avctx->priv_data;
2509 VP8ThreadData *td = &s->thread_data[threadnr];
2510 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2511 AVFrame *curframe = s->curframe->tf.f;
2512 VP8Macroblock *mb;
2513 VP8ThreadData *prev_td, *next_td;
2514 uint8_t *dst[3] = {
2515 curframe->data[0] + 16 * mb_y * s->linesize,
2516 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2517 curframe->data[2] + 8 * mb_y * s->uvlinesize
2518 };
2519
2520 if (s->mb_layout == 1)
2521 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2522 else
2523 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2524
2525 if (mb_y == 0)
2526 prev_td = td;
2527 else
2528 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2529 if (mb_y == s->mb_height - 1)
2530 next_td = td;
2531 else
2532 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2533
2534 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2535 VP8FilterStrength *f = &td->filter_strength[mb_x];
2536 if (prev_td != td)
2537 check_thread_pos(td, prev_td,
2538 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2539 if (next_td != td)
2540 if (next_td != &s->thread_data[0])
2541 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2542
2543 if (num_jobs == 1) {
2544 if (s->filter.simple)
2545 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2546 NULL, NULL, s->linesize, 0, 1);
2547 else
2548 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2549 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2550 }
2551
2552 if (s->filter.simple)
2553 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2554 else
2555 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2556 dst[0] += 16;
2557 dst[1] += 8;
2558 dst[2] += 8;
2559
2560 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2561 }
2562 }
2563
vp7_filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2564 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2565 int jobnr, int threadnr)
2566 {
2567 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2568 }
2569
vp8_filter_mb_row(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2570 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2571 int jobnr, int threadnr)
2572 {
2573 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2574 }
2575
2576 static av_always_inline
vp78_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr,int is_vp7)2577 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2578 int threadnr, int is_vp7)
2579 {
2580 VP8Context *s = avctx->priv_data;
2581 VP8ThreadData *td = &s->thread_data[jobnr];
2582 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2583 VP8Frame *curframe = s->curframe;
2584 int mb_y, num_jobs = s->num_jobs;
2585 int ret;
2586
2587 td->thread_nr = threadnr;
2588 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2589 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2590 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2591 atomic_store(&td->thread_mb_pos, mb_y << 16);
2592 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2593 if (ret < 0) {
2594 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2595 return ret;
2596 }
2597 if (s->deblock_filter)
2598 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2599 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2600
2601 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2602 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2603
2604 if (avctx->active_thread_type == FF_THREAD_FRAME)
2605 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2606 }
2607
2608 return 0;
2609 }
2610
vp7_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2611 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2612 int jobnr, int threadnr)
2613 {
2614 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2615 }
2616
vp8_decode_mb_row_sliced(AVCodecContext * avctx,void * tdata,int jobnr,int threadnr)2617 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2618 int jobnr, int threadnr)
2619 {
2620 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2621 }
2622
2623 static av_always_inline
vp78_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,const AVPacket * avpkt,int is_vp7)2624 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2625 const AVPacket *avpkt, int is_vp7)
2626 {
2627 VP8Context *s = avctx->priv_data;
2628 int ret, i, referenced, num_jobs;
2629 enum AVDiscard skip_thresh;
2630 VP8Frame *av_uninit(curframe), *prev_frame;
2631
2632 if (is_vp7)
2633 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2634 else
2635 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2636
2637 if (ret < 0)
2638 goto err;
2639
2640 if (s->actually_webp) {
2641 // avctx->pix_fmt already set in caller.
2642 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2643 s->pix_fmt = get_pixel_format(s);
2644 if (s->pix_fmt < 0) {
2645 ret = AVERROR(EINVAL);
2646 goto err;
2647 }
2648 avctx->pix_fmt = s->pix_fmt;
2649 }
2650
2651 prev_frame = s->framep[VP56_FRAME_CURRENT];
2652
2653 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2654 s->update_altref == VP56_FRAME_CURRENT;
2655
2656 skip_thresh = !referenced ? AVDISCARD_NONREF
2657 : !s->keyframe ? AVDISCARD_NONKEY
2658 : AVDISCARD_ALL;
2659
2660 if (avctx->skip_frame >= skip_thresh) {
2661 s->invisible = 1;
2662 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2663 goto skip_decode;
2664 }
2665 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2666
2667 // release no longer referenced frames
2668 for (i = 0; i < 5; i++)
2669 if (s->frames[i].tf.f->buf[0] &&
2670 &s->frames[i] != prev_frame &&
2671 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2672 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2673 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2674 vp8_release_frame(s, &s->frames[i]);
2675
2676 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2677
2678 if (!s->colorspace)
2679 avctx->colorspace = AVCOL_SPC_BT470BG;
2680 if (s->fullrange)
2681 avctx->color_range = AVCOL_RANGE_JPEG;
2682 else
2683 avctx->color_range = AVCOL_RANGE_MPEG;
2684
2685 /* Given that arithmetic probabilities are updated every frame, it's quite
2686 * likely that the values we have on a random interframe are complete
2687 * junk if we didn't start decode on a keyframe. So just don't display
2688 * anything rather than junk. */
2689 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2690 !s->framep[VP56_FRAME_GOLDEN] ||
2691 !s->framep[VP56_FRAME_GOLDEN2])) {
2692 av_log(avctx, AV_LOG_WARNING,
2693 "Discarding interframe without a prior keyframe!\n");
2694 ret = AVERROR_INVALIDDATA;
2695 goto err;
2696 }
2697
2698 curframe->tf.f->key_frame = s->keyframe;
2699 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2700 : AV_PICTURE_TYPE_P;
2701 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2702 goto err;
2703
2704 // check if golden and altref are swapped
2705 if (s->update_altref != VP56_FRAME_NONE)
2706 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2707 else
2708 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2709
2710 if (s->update_golden != VP56_FRAME_NONE)
2711 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2712 else
2713 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2714
2715 if (s->update_last)
2716 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2717 else
2718 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2719
2720 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2721
2722 if (avctx->codec->update_thread_context)
2723 ff_thread_finish_setup(avctx);
2724
2725 if (avctx->hwaccel) {
2726 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2727 if (ret < 0)
2728 goto err;
2729
2730 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2731 if (ret < 0)
2732 goto err;
2733
2734 ret = avctx->hwaccel->end_frame(avctx);
2735 if (ret < 0)
2736 goto err;
2737
2738 } else {
2739 s->linesize = curframe->tf.f->linesize[0];
2740 s->uvlinesize = curframe->tf.f->linesize[1];
2741
2742 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2743 /* Zero macroblock structures for top/top-left prediction
2744 * from outside the frame. */
2745 if (!s->mb_layout)
2746 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2747 (s->mb_width + 1) * sizeof(*s->macroblocks));
2748 if (!s->mb_layout && s->keyframe)
2749 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2750
2751 memset(s->ref_count, 0, sizeof(s->ref_count));
2752
2753 if (s->mb_layout == 1) {
2754 // Make sure the previous frame has read its segmentation map,
2755 // if we re-use the same map.
2756 if (prev_frame && s->segmentation.enabled &&
2757 !s->segmentation.update_map)
2758 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2759 if (is_vp7)
2760 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2761 else
2762 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2763 if (ret < 0)
2764 goto err;
2765 }
2766
2767 if (avctx->active_thread_type == FF_THREAD_FRAME)
2768 num_jobs = 1;
2769 else
2770 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2771 s->num_jobs = num_jobs;
2772 s->curframe = curframe;
2773 s->prev_frame = prev_frame;
2774 s->mv_bounds.mv_min.y = -MARGIN;
2775 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2776 for (i = 0; i < MAX_THREADS; i++) {
2777 VP8ThreadData *td = &s->thread_data[i];
2778 atomic_init(&td->thread_mb_pos, 0);
2779 atomic_init(&td->wait_mb_pos, INT_MAX);
2780 }
2781 if (is_vp7)
2782 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2783 num_jobs);
2784 else
2785 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2786 num_jobs);
2787 }
2788
2789 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2790 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2791
2792 skip_decode:
2793 // if future frames don't use the updated probabilities,
2794 // reset them to the values we saved
2795 if (!s->update_probabilities)
2796 s->prob[0] = s->prob[1];
2797
2798 if (!s->invisible) {
2799 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2800 return ret;
2801 *got_frame = 1;
2802 }
2803
2804 return avpkt->size;
2805 err:
2806 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2807 return ret;
2808 }
2809
ff_vp8_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt)2810 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2811 AVPacket *avpkt)
2812 {
2813 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2814 }
2815
2816 #if CONFIG_VP7_DECODER
vp7_decode_frame(AVCodecContext * avctx,void * data,int * got_frame,AVPacket * avpkt)2817 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2818 AVPacket *avpkt)
2819 {
2820 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2821 }
2822 #endif /* CONFIG_VP7_DECODER */
2823
ff_vp8_decode_free(AVCodecContext * avctx)2824 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2825 {
2826 VP8Context *s = avctx->priv_data;
2827 int i;
2828
2829 if (!s)
2830 return 0;
2831
2832 vp8_decode_flush_impl(avctx, 1);
2833 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2834 av_frame_free(&s->frames[i].tf.f);
2835
2836 return 0;
2837 }
2838
vp8_init_frames(VP8Context * s)2839 static av_cold int vp8_init_frames(VP8Context *s)
2840 {
2841 int i;
2842 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2843 s->frames[i].tf.f = av_frame_alloc();
2844 if (!s->frames[i].tf.f)
2845 return AVERROR(ENOMEM);
2846 }
2847 return 0;
2848 }
2849
2850 static av_always_inline
vp78_decode_init(AVCodecContext * avctx,int is_vp7)2851 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2852 {
2853 VP8Context *s = avctx->priv_data;
2854 int ret;
2855
2856 s->avctx = avctx;
2857 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2858 s->pix_fmt = AV_PIX_FMT_NONE;
2859 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2860
2861 ff_videodsp_init(&s->vdsp, 8);
2862
2863 ff_vp78dsp_init(&s->vp8dsp);
2864 if (CONFIG_VP7_DECODER && is_vp7) {
2865 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2866 ff_vp7dsp_init(&s->vp8dsp);
2867 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2868 s->filter_mb_row = vp7_filter_mb_row;
2869 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2870 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2871 ff_vp8dsp_init(&s->vp8dsp);
2872 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2873 s->filter_mb_row = vp8_filter_mb_row;
2874 }
2875
2876 /* does not change for VP8 */
2877 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2878
2879 if ((ret = vp8_init_frames(s)) < 0) {
2880 ff_vp8_decode_free(avctx);
2881 return ret;
2882 }
2883
2884 return 0;
2885 }
2886
2887 #if CONFIG_VP7_DECODER
vp7_decode_init(AVCodecContext * avctx)2888 static int vp7_decode_init(AVCodecContext *avctx)
2889 {
2890 return vp78_decode_init(avctx, IS_VP7);
2891 }
2892 #endif /* CONFIG_VP7_DECODER */
2893
ff_vp8_decode_init(AVCodecContext * avctx)2894 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2895 {
2896 return vp78_decode_init(avctx, IS_VP8);
2897 }
2898
2899 #if CONFIG_VP8_DECODER
2900 #if HAVE_THREADS
2901 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2902
vp8_decode_update_thread_context(AVCodecContext * dst,const AVCodecContext * src)2903 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2904 const AVCodecContext *src)
2905 {
2906 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2907 int i;
2908
2909 if (s->macroblocks_base &&
2910 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2911 free_buffers(s);
2912 s->mb_width = s_src->mb_width;
2913 s->mb_height = s_src->mb_height;
2914 }
2915
2916 s->pix_fmt = s_src->pix_fmt;
2917 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2918 s->segmentation = s_src->segmentation;
2919 s->lf_delta = s_src->lf_delta;
2920 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2921
2922 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2923 if (s_src->frames[i].tf.f->buf[0]) {
2924 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2925 if (ret < 0)
2926 return ret;
2927 }
2928 }
2929
2930 s->framep[0] = REBASE(s_src->next_framep[0]);
2931 s->framep[1] = REBASE(s_src->next_framep[1]);
2932 s->framep[2] = REBASE(s_src->next_framep[2]);
2933 s->framep[3] = REBASE(s_src->next_framep[3]);
2934
2935 return 0;
2936 }
2937 #endif /* HAVE_THREADS */
2938 #endif /* CONFIG_VP8_DECODER */
2939
2940 #if CONFIG_VP7_DECODER
2941 AVCodec ff_vp7_decoder = {
2942 .name = "vp7",
2943 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2944 .type = AVMEDIA_TYPE_VIDEO,
2945 .id = AV_CODEC_ID_VP7,
2946 .priv_data_size = sizeof(VP8Context),
2947 .init = vp7_decode_init,
2948 .close = ff_vp8_decode_free,
2949 .decode = vp7_decode_frame,
2950 .capabilities = AV_CODEC_CAP_DR1,
2951 .flush = vp8_decode_flush,
2952 };
2953 #endif /* CONFIG_VP7_DECODER */
2954
2955 #if CONFIG_VP8_DECODER
2956 AVCodec ff_vp8_decoder = {
2957 .name = "vp8",
2958 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2959 .type = AVMEDIA_TYPE_VIDEO,
2960 .id = AV_CODEC_ID_VP8,
2961 .priv_data_size = sizeof(VP8Context),
2962 .init = ff_vp8_decode_init,
2963 .close = ff_vp8_decode_free,
2964 .decode = ff_vp8_decode_frame,
2965 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2966 AV_CODEC_CAP_SLICE_THREADS,
2967 .flush = vp8_decode_flush,
2968 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2969 .hw_configs = (const AVCodecHWConfigInternal *const []) {
2970 #if CONFIG_VP8_VAAPI_HWACCEL
2971 HWACCEL_VAAPI(vp8),
2972 #endif
2973 #if CONFIG_VP8_NVDEC_HWACCEL
2974 HWACCEL_NVDEC(vp8),
2975 #endif
2976 NULL
2977 },
2978 .caps_internal = FF_CODEC_CAP_ALLOCATE_PROGRESS,
2979 };
2980 #endif /* CONFIG_VP7_DECODER */
2981