1 // Copyright 2010 Google Inc.
2 //
3 // This code is licensed under the same terms as WebM:
4 // Software License Agreement: http://www.webmproject.org/license/software/
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/
6 // -----------------------------------------------------------------------------
7 //
8 // Frame-reconstruction function. Memory allocation.
9 //
10 // Author: Skal (pascal.massimino@gmail.com)
11
12 #include <stdlib.h>
13 #include "./vp8i.h"
14
15 #if defined(__cplusplus) || defined(c_plusplus)
16 extern "C" {
17 #endif
18
19 #define ALIGN_MASK (32 - 1)
20
21 //------------------------------------------------------------------------------
22 // Memory setup
23
24 // kFilterExtraRows[] = How many extra lines are needed on the MB boundary
25 // for caching, given a filtering level.
26 // Simple filter: up to 2 luma samples are read and 1 is written.
27 // Complex filter: up to 4 luma samples are read and 3 are written. Same for
28 // U/V, so it's 8 samples total (because of the 2x upsampling).
29 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
30
VP8InitFrame(VP8Decoder * const dec,VP8Io * io)31 int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
32 const int mb_w = dec->mb_w_;
33 const int intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
34 const int top_size = (16 + 8 + 8) * mb_w;
35 const int info_size = (mb_w + 1) * sizeof(VP8MB);
36 const int yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
37 const int coeffs_size = 384 * sizeof(*dec->coeffs_);
38 const int cache_height = (16 + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
39 const int cache_size = top_size * cache_height;
40 const int alpha_size =
41 dec->alpha_data_ ? (dec->pic_hdr_.width_ * dec->pic_hdr_.height_) : 0;
42 const int needed = intra_pred_mode_size
43 + top_size + info_size
44 + yuv_size + coeffs_size
45 + cache_size + alpha_size + ALIGN_MASK;
46 uint8_t* mem;
47
48 if (needed > dec->mem_size_) {
49 free(dec->mem_);
50 dec->mem_size_ = 0;
51 dec->mem_ = (uint8_t*)malloc(needed);
52 if (dec->mem_ == NULL) {
53 return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
54 "no memory during frame initialization.");
55 }
56 dec->mem_size_ = needed;
57 }
58
59 mem = (uint8_t*)dec->mem_;
60 dec->intra_t_ = (uint8_t*)mem;
61 mem += intra_pred_mode_size;
62
63 dec->y_t_ = (uint8_t*)mem;
64 mem += 16 * mb_w;
65 dec->u_t_ = (uint8_t*)mem;
66 mem += 8 * mb_w;
67 dec->v_t_ = (uint8_t*)mem;
68 mem += 8 * mb_w;
69
70 dec->mb_info_ = ((VP8MB*)mem) + 1;
71 mem += info_size;
72
73 mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
74 assert((yuv_size & ALIGN_MASK) == 0);
75 dec->yuv_b_ = (uint8_t*)mem;
76 mem += yuv_size;
77
78 dec->coeffs_ = (int16_t*)mem;
79 mem += coeffs_size;
80
81 dec->cache_y_stride_ = 16 * mb_w;
82 dec->cache_uv_stride_ = 8 * mb_w;
83 {
84 const int extra_rows = kFilterExtraRows[dec->filter_type_];
85 const int extra_y = extra_rows * dec->cache_y_stride_;
86 const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
87 dec->cache_y_ = ((uint8_t*)mem) + extra_y;
88 dec->cache_u_ = dec->cache_y_ + 16 * dec->cache_y_stride_ + extra_uv;
89 dec->cache_v_ = dec->cache_u_ + 8 * dec->cache_uv_stride_ + extra_uv;
90 }
91 mem += cache_size;
92
93 // alpha plane
94 dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
95 mem += alpha_size;
96
97 // note: left-info is initialized once for all.
98 memset(dec->mb_info_ - 1, 0, (mb_w + 1) * sizeof(*dec->mb_info_));
99
100 // initialize top
101 memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
102
103 // prepare 'io'
104 io->mb_y = 0;
105 io->y = dec->cache_y_;
106 io->u = dec->cache_u_;
107 io->v = dec->cache_v_;
108 io->y_stride = dec->cache_y_stride_;
109 io->uv_stride = dec->cache_uv_stride_;
110 io->fancy_upsampling = 0; // default
111 io->a = NULL;
112
113 // Init critical function pointers and look-up tables.
114 VP8DspInitTables();
115 VP8DspInit();
116
117 return 1;
118 }
119
120 //------------------------------------------------------------------------------
121 // Filtering
122
hev_thresh_from_level(int level,int keyframe)123 static inline int hev_thresh_from_level(int level, int keyframe) {
124 if (keyframe) {
125 return (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
126 } else {
127 return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0;
128 }
129 }
130
DoFilter(const VP8Decoder * const dec,int mb_x,int mb_y)131 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
132 VP8MB* const mb = dec->mb_info_ + mb_x;
133 uint8_t* const y_dst = dec->cache_y_ + mb_x * 16;
134 const int y_bps = dec->cache_y_stride_;
135 const int level = mb->f_level_;
136 const int ilevel = mb->f_ilevel_;
137 const int limit = 2 * level + ilevel;
138 if (level == 0) {
139 return;
140 }
141 if (dec->filter_type_ == 1) { // simple
142 if (mb_x > 0) {
143 VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
144 }
145 if (mb->f_inner_) {
146 VP8SimpleHFilter16i(y_dst, y_bps, limit);
147 }
148 if (mb_y > 0) {
149 VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
150 }
151 if (mb->f_inner_) {
152 VP8SimpleVFilter16i(y_dst, y_bps, limit);
153 }
154 } else { // complex
155 uint8_t* const u_dst = dec->cache_u_ + mb_x * 8;
156 uint8_t* const v_dst = dec->cache_v_ + mb_x * 8;
157 const int uv_bps = dec->cache_uv_stride_;
158 const int hev_thresh =
159 hev_thresh_from_level(level, dec->frm_hdr_.key_frame_);
160 if (mb_x > 0) {
161 VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
162 VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
163 }
164 if (mb->f_inner_) {
165 VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
166 VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
167 }
168 if (mb_y > 0) {
169 VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
170 VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
171 }
172 if (mb->f_inner_) {
173 VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
174 VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
175 }
176 }
177 }
178
VP8FilterRow(const VP8Decoder * const dec)179 void VP8FilterRow(const VP8Decoder* const dec) {
180 int mb_x;
181 assert(dec->filter_type_ > 0);
182 if (dec->mb_y_ < dec->tl_mb_y_ || dec->mb_y_ > dec->br_mb_y_) {
183 return;
184 }
185 for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
186 DoFilter(dec, mb_x, dec->mb_y_);
187 }
188 }
189
190 //------------------------------------------------------------------------------
191
VP8StoreBlock(VP8Decoder * const dec)192 void VP8StoreBlock(VP8Decoder* const dec) {
193 if (dec->filter_type_ > 0) {
194 VP8MB* const info = dec->mb_info_ + dec->mb_x_;
195 int level = dec->filter_levels_[dec->segment_];
196 if (dec->filter_hdr_.use_lf_delta_) {
197 // TODO(skal): only CURRENT is handled for now.
198 level += dec->filter_hdr_.ref_lf_delta_[0];
199 if (dec->is_i4x4_) {
200 level += dec->filter_hdr_.mode_lf_delta_[0];
201 }
202 }
203 level = (level < 0) ? 0 : (level > 63) ? 63 : level;
204 info->f_level_ = level;
205
206 if (dec->filter_hdr_.sharpness_ > 0) {
207 if (dec->filter_hdr_.sharpness_ > 4) {
208 level >>= 2;
209 } else {
210 level >>= 1;
211 }
212 if (level > 9 - dec->filter_hdr_.sharpness_) {
213 level = 9 - dec->filter_hdr_.sharpness_;
214 }
215 }
216
217 info->f_ilevel_ = (level < 1) ? 1 : level;
218 info->f_inner_ = (!info->skip_ || dec->is_i4x4_);
219 }
220 {
221 // Transfer samples to row cache
222 int y;
223 uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16;
224 uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8;
225 uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8;
226 for (y = 0; y < 16; ++y) {
227 memcpy(ydst + y * dec->cache_y_stride_,
228 dec->yuv_b_ + Y_OFF + y * BPS, 16);
229 }
230 for (y = 0; y < 8; ++y) {
231 memcpy(udst + y * dec->cache_uv_stride_,
232 dec->yuv_b_ + U_OFF + y * BPS, 8);
233 memcpy(vdst + y * dec->cache_uv_stride_,
234 dec->yuv_b_ + V_OFF + y * BPS, 8);
235 }
236 }
237 }
238
239 //------------------------------------------------------------------------------
240 // This function is called after a row of macroblocks is finished decoding.
241 // It also takes into account the following restrictions:
242 // * In case of in-loop filtering, we must hold off sending some of the bottom
243 // pixels as they are yet unfiltered. They will be when the next macroblock
244 // row is decoded. Meanwhile, we must preserve them by rotating them in the
245 // cache area. This doesn't hold for the very bottom row of the uncropped
246 // picture of course.
247 // * we must clip the remaining pixels against the cropping area. The VP8Io
248 // struct must have the following fields set correctly before calling put():
249
250 #define MACROBLOCK_VPOS(mb_y) ((mb_y) * 16) // vertical position of a MB
251
VP8FinishRow(VP8Decoder * const dec,VP8Io * io)252 int VP8FinishRow(VP8Decoder* const dec, VP8Io* io) {
253 const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
254 const int ysize = extra_y_rows * dec->cache_y_stride_;
255 const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
256 uint8_t* const ydst = dec->cache_y_ - ysize;
257 uint8_t* const udst = dec->cache_u_ - uvsize;
258 uint8_t* const vdst = dec->cache_v_ - uvsize;
259 const int first_row = (dec->mb_y_ == 0);
260 const int last_row = (dec->mb_y_ >= dec->br_mb_y_ - 1);
261 int y_start = MACROBLOCK_VPOS(dec->mb_y_);
262 int y_end = MACROBLOCK_VPOS(dec->mb_y_ + 1);
263 if (io->put) {
264 if (!first_row) {
265 y_start -= extra_y_rows;
266 io->y = ydst;
267 io->u = udst;
268 io->v = vdst;
269 } else {
270 io->y = dec->cache_y_;
271 io->u = dec->cache_u_;
272 io->v = dec->cache_v_;
273 }
274
275 if (!last_row) {
276 y_end -= extra_y_rows;
277 }
278 if (y_end > io->crop_bottom) {
279 y_end = io->crop_bottom; // make sure we don't overflow on last row.
280 }
281 io->a = NULL;
282 #ifdef WEBP_EXPERIMENTAL_FEATURES
283 if (dec->alpha_data_) {
284 io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
285 if (io->a == NULL) {
286 return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
287 "Could not decode alpha data.");
288 }
289 }
290 #endif
291 if (y_start < io->crop_top) {
292 const int delta_y = io->crop_top - y_start;
293 y_start = io->crop_top;
294 assert(!(delta_y & 1));
295 io->y += dec->cache_y_stride_ * delta_y;
296 io->u += dec->cache_uv_stride_ * (delta_y >> 1);
297 io->v += dec->cache_uv_stride_ * (delta_y >> 1);
298 if (io->a) {
299 io->a += io->width * delta_y;
300 }
301 }
302 if (y_start < y_end) {
303 io->y += io->crop_left;
304 io->u += io->crop_left >> 1;
305 io->v += io->crop_left >> 1;
306 if (io->a) {
307 io->a += io->crop_left;
308 }
309 io->mb_y = y_start - io->crop_top;
310 io->mb_w = io->crop_right - io->crop_left;
311 io->mb_h = y_end - y_start;
312 if (!io->put(io)) {
313 return 0;
314 }
315 }
316 }
317 // rotate top samples
318 if (!last_row) {
319 memcpy(ydst, ydst + 16 * dec->cache_y_stride_, ysize);
320 memcpy(udst, udst + 8 * dec->cache_uv_stride_, uvsize);
321 memcpy(vdst, vdst + 8 * dec->cache_uv_stride_, uvsize);
322 }
323 return 1;
324 }
325
326 #undef MACROBLOCK_VPOS
327
328 //------------------------------------------------------------------------------
329 // Finish setting up the decoding parameter once user's setup() is called.
330
VP8FinishFrameSetup(VP8Decoder * const dec,VP8Io * const io)331 VP8StatusCode VP8FinishFrameSetup(VP8Decoder* const dec, VP8Io* const io) {
332 // Call setup() first. This may trigger additional decoding features on 'io'.
333 if (io->setup && !io->setup(io)) {
334 VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
335 return dec->status_;
336 }
337
338 // Disable filtering per user request
339 if (io->bypass_filtering) {
340 dec->filter_type_ = 0;
341 }
342 // TODO(skal): filter type / strength / sharpness forcing
343
344 // Define the area where we can skip in-loop filtering, in case of cropping.
345 //
346 // 'Simple' filter reads two luma samples outside of the macroblock and
347 // and filters one. It doesn't filter the chroma samples. Hence, we can
348 // avoid doing the in-loop filtering before crop_top/crop_left position.
349 // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.
350 // Means: there's a dependency chain that goes all the way up to the
351 // top-left corner of the picture (MB #0). We must filter all the previous
352 // macroblocks.
353 // TODO(skal): add an 'approximate_decoding' option, that won't produce
354 // a 1:1 bit-exactness for complex filtering?
355 {
356 const int extra_pixels = kFilterExtraRows[dec->filter_type_];
357 if (dec->filter_type_ == 2) {
358 // For complex filter, we need to preserve the dependency chain.
359 dec->tl_mb_x_ = 0;
360 dec->tl_mb_y_ = 0;
361 } else {
362 // For simple filter, we can filter only the cropped region.
363 dec->tl_mb_y_ = io->crop_top >> 4;
364 dec->tl_mb_x_ = io->crop_left >> 4;
365 }
366 // We need some 'extra' pixels on the right/bottom.
367 dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
368 dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4;
369 if (dec->br_mb_x_ > dec->mb_w_) {
370 dec->br_mb_x_ = dec->mb_w_;
371 }
372 if (dec->br_mb_y_ > dec->mb_h_) {
373 dec->br_mb_y_ = dec->mb_h_;
374 }
375 }
376 return VP8_STATUS_OK;
377 }
378
379 //------------------------------------------------------------------------------
380 // Main reconstruction function.
381
382 static const int kScan[16] = {
383 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
384 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
385 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
386 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
387 };
388
CheckMode(VP8Decoder * const dec,int mode)389 static inline int CheckMode(VP8Decoder* const dec, int mode) {
390 if (mode == B_DC_PRED) {
391 if (dec->mb_x_ == 0) {
392 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
393 } else {
394 return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
395 }
396 }
397 return mode;
398 }
399
Copy32b(uint8_t * dst,uint8_t * src)400 static inline void Copy32b(uint8_t* dst, uint8_t* src) {
401 *(uint32_t*)dst = *(uint32_t*)src;
402 }
403
VP8ReconstructBlock(VP8Decoder * const dec)404 void VP8ReconstructBlock(VP8Decoder* const dec) {
405 uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
406 uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
407 uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
408
409 // Rotate in the left samples from previously decoded block. We move four
410 // pixels at a time for alignment reason, and because of in-loop filter.
411 if (dec->mb_x_ > 0) {
412 int j;
413 for (j = -1; j < 16; ++j) {
414 Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
415 }
416 for (j = -1; j < 8; ++j) {
417 Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
418 Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
419 }
420 } else {
421 int j;
422 for (j = 0; j < 16; ++j) {
423 y_dst[j * BPS - 1] = 129;
424 }
425 for (j = 0; j < 8; ++j) {
426 u_dst[j * BPS - 1] = 129;
427 v_dst[j * BPS - 1] = 129;
428 }
429 // Init top-left sample on left column too
430 if (dec->mb_y_ > 0) {
431 y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
432 }
433 }
434 {
435 // bring top samples into the cache
436 uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16;
437 uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8;
438 uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8;
439 const int16_t* coeffs = dec->coeffs_;
440 int n;
441
442 if (dec->mb_y_ > 0) {
443 memcpy(y_dst - BPS, top_y, 16);
444 memcpy(u_dst - BPS, top_u, 8);
445 memcpy(v_dst - BPS, top_v, 8);
446 } else if (dec->mb_x_ == 0) {
447 // we only need to do this init once at block (0,0).
448 // Afterward, it remains valid for the whole topmost row.
449 memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
450 memset(u_dst - BPS - 1, 127, 8 + 1);
451 memset(v_dst - BPS - 1, 127, 8 + 1);
452 }
453
454 // predict and add residuals
455
456 if (dec->is_i4x4_) { // 4x4
457 uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
458
459 if (dec->mb_y_ > 0) {
460 if (dec->mb_x_ >= dec->mb_w_ - 1) { // on rightmost border
461 top_right[0] = top_y[15] * 0x01010101u;
462 } else {
463 memcpy(top_right, top_y + 16, sizeof(*top_right));
464 }
465 }
466 // replicate the top-right pixels below
467 top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
468
469 // predict and add residues for all 4x4 blocks in turn.
470 for (n = 0; n < 16; n++) {
471 uint8_t* const dst = y_dst + kScan[n];
472 VP8PredLuma4[dec->imodes_[n]](dst);
473 if (dec->non_zero_ac_ & (1 << n)) {
474 VP8Transform(coeffs + n * 16, dst, 0);
475 } else if (dec->non_zero_ & (1 << n)) { // only DC is present
476 VP8TransformDC(coeffs + n * 16, dst);
477 }
478 }
479 } else { // 16x16
480 const int pred_func = CheckMode(dec, dec->imodes_[0]);
481 VP8PredLuma16[pred_func](y_dst);
482 if (dec->non_zero_) {
483 for (n = 0; n < 16; n++) {
484 uint8_t* const dst = y_dst + kScan[n];
485 if (dec->non_zero_ac_ & (1 << n)) {
486 VP8Transform(coeffs + n * 16, dst, 0);
487 } else if (dec->non_zero_ & (1 << n)) { // only DC is present
488 VP8TransformDC(coeffs + n * 16, dst);
489 }
490 }
491 }
492 }
493 {
494 // Chroma
495 const int pred_func = CheckMode(dec, dec->uvmode_);
496 VP8PredChroma8[pred_func](u_dst);
497 VP8PredChroma8[pred_func](v_dst);
498
499 if (dec->non_zero_ & 0x0f0000) { // chroma-U
500 const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16;
501 if (dec->non_zero_ac_ & 0x0f0000) {
502 VP8TransformUV(u_coeffs, u_dst);
503 } else {
504 VP8TransformDCUV(u_coeffs, u_dst);
505 }
506 }
507 if (dec->non_zero_ & 0xf00000) { // chroma-V
508 const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16;
509 if (dec->non_zero_ac_ & 0xf00000) {
510 VP8TransformUV(v_coeffs, v_dst);
511 } else {
512 VP8TransformDCUV(v_coeffs, v_dst);
513 }
514 }
515
516 // stash away top samples for next block
517 if (dec->mb_y_ < dec->mb_h_ - 1) {
518 memcpy(top_y, y_dst + 15 * BPS, 16);
519 memcpy(top_u, u_dst + 7 * BPS, 8);
520 memcpy(top_v, v_dst + 7 * BPS, 8);
521 }
522 }
523 }
524 }
525
526 //------------------------------------------------------------------------------
527
528 #if defined(__cplusplus) || defined(c_plusplus)
529 } // extern "C"
530 #endif
531