1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11
12 #include "./vp9_rtcd.h"
13 #include "./vpx_config.h"
14 #include "./vpx_dsp_rtcd.h"
15
16 #include "vpx_dsp/quantize.h"
17 #include "vpx_mem/vpx_mem.h"
18 #include "vpx_ports/mem.h"
19
20 #include "vp9/common/vp9_idct.h"
21 #include "vp9/common/vp9_reconinter.h"
22 #include "vp9/common/vp9_reconintra.h"
23 #include "vp9/common/vp9_scan.h"
24
25 #include "vp9/encoder/vp9_encodemb.h"
26 #include "vp9/encoder/vp9_rd.h"
27 #include "vp9/encoder/vp9_tokenize.h"
28
29 struct optimize_ctx {
30 ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
31 ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
32 };
33
vp9_subtract_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane)34 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
35 struct macroblock_plane *const p = &x->plane[plane];
36 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
37 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
38 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
39 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
40
41 #if CONFIG_VP9_HIGHBITDEPTH
42 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
43 vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
44 p->src.stride, pd->dst.buf, pd->dst.stride,
45 x->e_mbd.bd);
46 return;
47 }
48 #endif // CONFIG_VP9_HIGHBITDEPTH
49 vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
50 pd->dst.buf, pd->dst.stride);
51 }
52
53 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
54
55 typedef struct vp9_token_state {
56 int rate;
57 int error;
58 int next;
59 int16_t token;
60 int16_t qc;
61 } vp9_token_state;
62
63 // TODO(jimbankoski): experiment to find optimal RD numbers.
64 static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 };
65
66 #define UPDATE_RD_COST()\
67 {\
68 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
69 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
70 if (rd_cost0 == rd_cost1) {\
71 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
72 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
73 }\
74 }
75
76 // This function is a place holder for now but may ultimately need
77 // to scan previous tokens to work out the correct context.
trellis_get_coeff_context(const int16_t * scan,const int16_t * nb,int idx,int token,uint8_t * token_cache)78 static int trellis_get_coeff_context(const int16_t *scan,
79 const int16_t *nb,
80 int idx, int token,
81 uint8_t *token_cache) {
82 int bak = token_cache[scan[idx]], pt;
83 token_cache[scan[idx]] = vp9_pt_energy_class[token];
84 pt = get_coef_context(nb, token_cache, idx + 1);
85 token_cache[scan[idx]] = bak;
86 return pt;
87 }
88
optimize_b(MACROBLOCK * mb,int plane,int block,TX_SIZE tx_size,int ctx)89 static int optimize_b(MACROBLOCK *mb, int plane, int block,
90 TX_SIZE tx_size, int ctx) {
91 MACROBLOCKD *const xd = &mb->e_mbd;
92 struct macroblock_plane *const p = &mb->plane[plane];
93 struct macroblockd_plane *const pd = &xd->plane[plane];
94 const int ref = is_inter_block(&xd->mi[0]->mbmi);
95 vp9_token_state tokens[1025][2];
96 unsigned best_index[1025][2];
97 uint8_t token_cache[1024];
98 const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
99 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
100 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
101 const int eob = p->eobs[block];
102 const PLANE_TYPE type = get_plane_type(plane);
103 const int default_eob = 16 << (tx_size << 1);
104 const int mul = 1 + (tx_size == TX_32X32);
105 const int16_t *dequant_ptr = pd->dequant;
106 const uint8_t *const band_translate = get_band_translate(tx_size);
107 const scan_order *const so = get_scan(xd, tx_size, type, block);
108 const int16_t *const scan = so->scan;
109 const int16_t *const nb = so->neighbors;
110 int next = eob, sz = 0;
111 int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv;
112 int64_t rd_cost0, rd_cost1;
113 int rate0, rate1, error0, error1;
114 int16_t t0, t1;
115 EXTRABIT e0;
116 int best, band, pt, i, final_eob;
117 #if CONFIG_VP9_HIGHBITDEPTH
118 const int16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
119 #else
120 const int16_t *cat6_high_cost = vp9_get_high_cost_table(8);
121 #endif
122
123 assert((!type && !plane) || (type && plane));
124 assert(eob <= default_eob);
125
126 /* Now set up a Viterbi trellis to evaluate alternative roundings. */
127 if (!ref)
128 rdmult = (rdmult * 9) >> 4;
129
130 /* Initialize the sentinel node of the trellis. */
131 tokens[eob][0].rate = 0;
132 tokens[eob][0].error = 0;
133 tokens[eob][0].next = default_eob;
134 tokens[eob][0].token = EOB_TOKEN;
135 tokens[eob][0].qc = 0;
136 tokens[eob][1] = tokens[eob][0];
137
138 for (i = 0; i < eob; i++)
139 token_cache[scan[i]] =
140 vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])];
141
142 for (i = eob; i-- > 0;) {
143 int base_bits, d2, dx;
144 const int rc = scan[i];
145 int x = qcoeff[rc];
146 /* Only add a trellis state for non-zero coefficients. */
147 if (x) {
148 int shortcut = 0;
149 error0 = tokens[next][0].error;
150 error1 = tokens[next][1].error;
151 /* Evaluate the first possibility for this state. */
152 rate0 = tokens[next][0].rate;
153 rate1 = tokens[next][1].rate;
154 vp9_get_token_extra(x, &t0, &e0);
155 /* Consider both possible successor states. */
156 if (next < default_eob) {
157 band = band_translate[i + 1];
158 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
159 rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
160 [tokens[next][0].token];
161 rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
162 [tokens[next][1].token];
163 }
164 UPDATE_RD_COST();
165 /* And pick the best. */
166 best = rd_cost1 < rd_cost0;
167 base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
168 dx = mul * (dqcoeff[rc] - coeff[rc]);
169 #if CONFIG_VP9_HIGHBITDEPTH
170 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
171 dx >>= xd->bd - 8;
172 }
173 #endif // CONFIG_VP9_HIGHBITDEPTH
174 d2 = dx * dx;
175 tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
176 tokens[i][0].error = d2 + (best ? error1 : error0);
177 tokens[i][0].next = next;
178 tokens[i][0].token = t0;
179 tokens[i][0].qc = x;
180 best_index[i][0] = best;
181
182 /* Evaluate the second possibility for this state. */
183 rate0 = tokens[next][0].rate;
184 rate1 = tokens[next][1].rate;
185
186 if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
187 (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
188 dequant_ptr[rc != 0]))
189 shortcut = 1;
190 else
191 shortcut = 0;
192
193 if (shortcut) {
194 sz = -(x < 0);
195 x -= 2 * sz + 1;
196 }
197
198 /* Consider both possible successor states. */
199 if (!x) {
200 /* If we reduced this coefficient to zero, check to see if
201 * we need to move the EOB back here.
202 */
203 t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
204 t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
205 e0 = 0;
206 } else {
207 vp9_get_token_extra(x, &t0, &e0);
208 t1 = t0;
209 }
210 if (next < default_eob) {
211 band = band_translate[i + 1];
212 if (t0 != EOB_TOKEN) {
213 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
214 rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
215 [tokens[next][0].token];
216 }
217 if (t1 != EOB_TOKEN) {
218 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
219 rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
220 [tokens[next][1].token];
221 }
222 }
223
224 UPDATE_RD_COST();
225 /* And pick the best. */
226 best = rd_cost1 < rd_cost0;
227 base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
228
229 if (shortcut) {
230 #if CONFIG_VP9_HIGHBITDEPTH
231 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
232 dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
233 } else {
234 dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
235 }
236 #else
237 dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
238 #endif // CONFIG_VP9_HIGHBITDEPTH
239 d2 = dx * dx;
240 }
241 tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
242 tokens[i][1].error = d2 + (best ? error1 : error0);
243 tokens[i][1].next = next;
244 tokens[i][1].token = best ? t1 : t0;
245 tokens[i][1].qc = x;
246 best_index[i][1] = best;
247 /* Finally, make this the new head of the trellis. */
248 next = i;
249 } else {
250 /* There's no choice to make for a zero coefficient, so we don't
251 * add a new trellis node, but we do need to update the costs.
252 */
253 band = band_translate[i + 1];
254 t0 = tokens[next][0].token;
255 t1 = tokens[next][1].token;
256 /* Update the cost of each path if we're past the EOB token. */
257 if (t0 != EOB_TOKEN) {
258 tokens[next][0].rate +=
259 mb->token_costs[tx_size][type][ref][band][1][0][t0];
260 tokens[next][0].token = ZERO_TOKEN;
261 }
262 if (t1 != EOB_TOKEN) {
263 tokens[next][1].rate +=
264 mb->token_costs[tx_size][type][ref][band][1][0][t1];
265 tokens[next][1].token = ZERO_TOKEN;
266 }
267 best_index[i][0] = best_index[i][1] = 0;
268 /* Don't update next, because we didn't add a new node. */
269 }
270 }
271
272 /* Now pick the best path through the whole trellis. */
273 band = band_translate[i + 1];
274 rate0 = tokens[next][0].rate;
275 rate1 = tokens[next][1].rate;
276 error0 = tokens[next][0].error;
277 error1 = tokens[next][1].error;
278 t0 = tokens[next][0].token;
279 t1 = tokens[next][1].token;
280 rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
281 rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
282 UPDATE_RD_COST();
283 best = rd_cost1 < rd_cost0;
284 final_eob = -1;
285 memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
286 memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
287 for (i = next; i < eob; i = next) {
288 const int x = tokens[i][best].qc;
289 const int rc = scan[i];
290 if (x) {
291 final_eob = i;
292 }
293
294 qcoeff[rc] = x;
295 dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
296
297 next = tokens[i][best].next;
298 best = best_index[i][best];
299 }
300 final_eob++;
301
302 mb->plane[plane].eobs[block] = final_eob;
303 return final_eob;
304 }
305
fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)306 static INLINE void fdct32x32(int rd_transform,
307 const int16_t *src, tran_low_t *dst,
308 int src_stride) {
309 if (rd_transform)
310 vpx_fdct32x32_rd(src, dst, src_stride);
311 else
312 vpx_fdct32x32(src, dst, src_stride);
313 }
314
315 #if CONFIG_VP9_HIGHBITDEPTH
highbd_fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)316 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
317 tran_low_t *dst, int src_stride) {
318 if (rd_transform)
319 vpx_highbd_fdct32x32_rd(src, dst, src_stride);
320 else
321 vpx_highbd_fdct32x32(src, dst, src_stride);
322 }
323 #endif // CONFIG_VP9_HIGHBITDEPTH
324
vp9_xform_quant_fp(MACROBLOCK * x,int plane,int block,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)325 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
326 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
327 MACROBLOCKD *const xd = &x->e_mbd;
328 const struct macroblock_plane *const p = &x->plane[plane];
329 const struct macroblockd_plane *const pd = &xd->plane[plane];
330 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
331 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
332 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
333 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
334 uint16_t *const eob = &p->eobs[block];
335 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
336 int i, j;
337 const int16_t *src_diff;
338 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
339 src_diff = &p->src_diff[4 * (j * diff_stride + i)];
340
341 #if CONFIG_VP9_HIGHBITDEPTH
342 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
343 switch (tx_size) {
344 case TX_32X32:
345 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
346 vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
347 p->round_fp, p->quant_fp, p->quant_shift,
348 qcoeff, dqcoeff, pd->dequant,
349 eob, scan_order->scan,
350 scan_order->iscan);
351 break;
352 case TX_16X16:
353 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
354 vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
355 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
356 pd->dequant, eob,
357 scan_order->scan, scan_order->iscan);
358 break;
359 case TX_8X8:
360 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
361 vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
362 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
363 pd->dequant, eob,
364 scan_order->scan, scan_order->iscan);
365 break;
366 case TX_4X4:
367 x->fwd_txm4x4(src_diff, coeff, diff_stride);
368 vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
369 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
370 pd->dequant, eob,
371 scan_order->scan, scan_order->iscan);
372 break;
373 default:
374 assert(0);
375 }
376 return;
377 }
378 #endif // CONFIG_VP9_HIGHBITDEPTH
379
380 switch (tx_size) {
381 case TX_32X32:
382 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
383 vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
384 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
385 pd->dequant, eob, scan_order->scan,
386 scan_order->iscan);
387 break;
388 case TX_16X16:
389 vpx_fdct16x16(src_diff, coeff, diff_stride);
390 vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
391 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
392 pd->dequant, eob,
393 scan_order->scan, scan_order->iscan);
394 break;
395 case TX_8X8:
396 vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64,
397 x->skip_block, p->zbin, p->round_fp,
398 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
399 pd->dequant, eob,
400 scan_order->scan, scan_order->iscan);
401 break;
402 case TX_4X4:
403 x->fwd_txm4x4(src_diff, coeff, diff_stride);
404 vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
405 p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
406 pd->dequant, eob,
407 scan_order->scan, scan_order->iscan);
408 break;
409 default:
410 assert(0);
411 break;
412 }
413 }
414
vp9_xform_quant_dc(MACROBLOCK * x,int plane,int block,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)415 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
416 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
417 MACROBLOCKD *const xd = &x->e_mbd;
418 const struct macroblock_plane *const p = &x->plane[plane];
419 const struct macroblockd_plane *const pd = &xd->plane[plane];
420 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
421 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
422 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
423 uint16_t *const eob = &p->eobs[block];
424 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
425 int i, j;
426 const int16_t *src_diff;
427
428 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
429 src_diff = &p->src_diff[4 * (j * diff_stride + i)];
430
431 #if CONFIG_VP9_HIGHBITDEPTH
432 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
433 switch (tx_size) {
434 case TX_32X32:
435 vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
436 vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
437 p->quant_fp[0], qcoeff, dqcoeff,
438 pd->dequant[0], eob);
439 break;
440 case TX_16X16:
441 vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
442 vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
443 p->quant_fp[0], qcoeff, dqcoeff,
444 pd->dequant[0], eob);
445 break;
446 case TX_8X8:
447 vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
448 vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
449 p->quant_fp[0], qcoeff, dqcoeff,
450 pd->dequant[0], eob);
451 break;
452 case TX_4X4:
453 x->fwd_txm4x4(src_diff, coeff, diff_stride);
454 vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
455 p->quant_fp[0], qcoeff, dqcoeff,
456 pd->dequant[0], eob);
457 break;
458 default:
459 assert(0);
460 }
461 return;
462 }
463 #endif // CONFIG_VP9_HIGHBITDEPTH
464
465 switch (tx_size) {
466 case TX_32X32:
467 vpx_fdct32x32_1(src_diff, coeff, diff_stride);
468 vpx_quantize_dc_32x32(coeff, x->skip_block, p->round,
469 p->quant_fp[0], qcoeff, dqcoeff,
470 pd->dequant[0], eob);
471 break;
472 case TX_16X16:
473 vpx_fdct16x16_1(src_diff, coeff, diff_stride);
474 vpx_quantize_dc(coeff, 256, x->skip_block, p->round,
475 p->quant_fp[0], qcoeff, dqcoeff,
476 pd->dequant[0], eob);
477 break;
478 case TX_8X8:
479 vpx_fdct8x8_1(src_diff, coeff, diff_stride);
480 vpx_quantize_dc(coeff, 64, x->skip_block, p->round,
481 p->quant_fp[0], qcoeff, dqcoeff,
482 pd->dequant[0], eob);
483 break;
484 case TX_4X4:
485 x->fwd_txm4x4(src_diff, coeff, diff_stride);
486 vpx_quantize_dc(coeff, 16, x->skip_block, p->round,
487 p->quant_fp[0], qcoeff, dqcoeff,
488 pd->dequant[0], eob);
489 break;
490 default:
491 assert(0);
492 break;
493 }
494 }
495
vp9_xform_quant(MACROBLOCK * x,int plane,int block,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)496 void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
497 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
498 MACROBLOCKD *const xd = &x->e_mbd;
499 const struct macroblock_plane *const p = &x->plane[plane];
500 const struct macroblockd_plane *const pd = &xd->plane[plane];
501 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
502 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
503 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
504 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
505 uint16_t *const eob = &p->eobs[block];
506 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
507 int i, j;
508 const int16_t *src_diff;
509 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
510 src_diff = &p->src_diff[4 * (j * diff_stride + i)];
511
512 #if CONFIG_VP9_HIGHBITDEPTH
513 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
514 switch (tx_size) {
515 case TX_32X32:
516 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
517 vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
518 p->round, p->quant, p->quant_shift, qcoeff,
519 dqcoeff, pd->dequant, eob,
520 scan_order->scan, scan_order->iscan);
521 break;
522 case TX_16X16:
523 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
524 vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
525 p->quant, p->quant_shift, qcoeff, dqcoeff,
526 pd->dequant, eob,
527 scan_order->scan, scan_order->iscan);
528 break;
529 case TX_8X8:
530 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
531 vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
532 p->quant, p->quant_shift, qcoeff, dqcoeff,
533 pd->dequant, eob,
534 scan_order->scan, scan_order->iscan);
535 break;
536 case TX_4X4:
537 x->fwd_txm4x4(src_diff, coeff, diff_stride);
538 vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
539 p->quant, p->quant_shift, qcoeff, dqcoeff,
540 pd->dequant, eob,
541 scan_order->scan, scan_order->iscan);
542 break;
543 default:
544 assert(0);
545 }
546 return;
547 }
548 #endif // CONFIG_VP9_HIGHBITDEPTH
549
550 switch (tx_size) {
551 case TX_32X32:
552 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
553 vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
554 p->quant, p->quant_shift, qcoeff, dqcoeff,
555 pd->dequant, eob, scan_order->scan,
556 scan_order->iscan);
557 break;
558 case TX_16X16:
559 vpx_fdct16x16(src_diff, coeff, diff_stride);
560 vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
561 p->quant, p->quant_shift, qcoeff, dqcoeff,
562 pd->dequant, eob,
563 scan_order->scan, scan_order->iscan);
564 break;
565 case TX_8X8:
566 vpx_fdct8x8(src_diff, coeff, diff_stride);
567 vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
568 p->quant, p->quant_shift, qcoeff, dqcoeff,
569 pd->dequant, eob,
570 scan_order->scan, scan_order->iscan);
571 break;
572 case TX_4X4:
573 x->fwd_txm4x4(src_diff, coeff, diff_stride);
574 vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
575 p->quant, p->quant_shift, qcoeff, dqcoeff,
576 pd->dequant, eob,
577 scan_order->scan, scan_order->iscan);
578 break;
579 default:
580 assert(0);
581 break;
582 }
583 }
584
encode_block(int plane,int block,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)585 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
586 TX_SIZE tx_size, void *arg) {
587 struct encode_b_args *const args = arg;
588 MACROBLOCK *const x = args->x;
589 MACROBLOCKD *const xd = &x->e_mbd;
590 struct optimize_ctx *const ctx = args->ctx;
591 struct macroblock_plane *const p = &x->plane[plane];
592 struct macroblockd_plane *const pd = &xd->plane[plane];
593 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
594 int i, j;
595 uint8_t *dst;
596 ENTROPY_CONTEXT *a, *l;
597 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
598 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
599 a = &ctx->ta[plane][i];
600 l = &ctx->tl[plane][j];
601
602 // TODO(jingning): per transformed block zero forcing only enabled for
603 // luma component. will integrate chroma components as well.
604 if (x->zcoeff_blk[tx_size][block] && plane == 0) {
605 p->eobs[block] = 0;
606 *a = *l = 0;
607 return;
608 }
609
610 if (!x->skip_recode) {
611 if (x->quant_fp) {
612 // Encoding process for rtc mode
613 if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
614 // skip forward transform
615 p->eobs[block] = 0;
616 *a = *l = 0;
617 return;
618 } else {
619 vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
620 }
621 } else {
622 if (max_txsize_lookup[plane_bsize] == tx_size) {
623 int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
624 if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
625 // full forward transform and quantization
626 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
627 } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
628 // fast path forward transform and quantization
629 vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
630 } else {
631 // skip forward transform
632 p->eobs[block] = 0;
633 *a = *l = 0;
634 return;
635 }
636 } else {
637 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
638 }
639 }
640 }
641
642 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
643 const int ctx = combine_entropy_contexts(*a, *l);
644 *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
645 } else {
646 *a = *l = p->eobs[block] > 0;
647 }
648
649 if (p->eobs[block])
650 *(args->skip) = 0;
651
652 if (x->skip_encode || p->eobs[block] == 0)
653 return;
654 #if CONFIG_VP9_HIGHBITDEPTH
655 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
656 switch (tx_size) {
657 case TX_32X32:
658 vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride,
659 p->eobs[block], xd->bd);
660 break;
661 case TX_16X16:
662 vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride,
663 p->eobs[block], xd->bd);
664 break;
665 case TX_8X8:
666 vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride,
667 p->eobs[block], xd->bd);
668 break;
669 case TX_4X4:
670 // this is like vp9_short_idct4x4 but has a special case around eob<=1
671 // which is significant (not just an optimization) for the lossless
672 // case.
673 x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride,
674 p->eobs[block], xd->bd);
675 break;
676 default:
677 assert(0 && "Invalid transform size");
678 }
679 return;
680 }
681 #endif // CONFIG_VP9_HIGHBITDEPTH
682
683 switch (tx_size) {
684 case TX_32X32:
685 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
686 break;
687 case TX_16X16:
688 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
689 break;
690 case TX_8X8:
691 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
692 break;
693 case TX_4X4:
694 // this is like vp9_short_idct4x4 but has a special case around eob<=1
695 // which is significant (not just an optimization) for the lossless
696 // case.
697 x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
698 break;
699 default:
700 assert(0 && "Invalid transform size");
701 break;
702 }
703 }
704
encode_block_pass1(int plane,int block,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)705 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
706 TX_SIZE tx_size, void *arg) {
707 MACROBLOCK *const x = (MACROBLOCK *)arg;
708 MACROBLOCKD *const xd = &x->e_mbd;
709 struct macroblock_plane *const p = &x->plane[plane];
710 struct macroblockd_plane *const pd = &xd->plane[plane];
711 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
712 int i, j;
713 uint8_t *dst;
714 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
715 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
716
717 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
718
719 if (p->eobs[block] > 0) {
720 #if CONFIG_VP9_HIGHBITDEPTH
721 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
722 x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
723 return;
724 }
725 #endif // CONFIG_VP9_HIGHBITDEPTH
726 x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
727 }
728 }
729
vp9_encode_sby_pass1(MACROBLOCK * x,BLOCK_SIZE bsize)730 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
731 vp9_subtract_plane(x, bsize, 0);
732 vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
733 encode_block_pass1, x);
734 }
735
vp9_encode_sb(MACROBLOCK * x,BLOCK_SIZE bsize)736 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
737 MACROBLOCKD *const xd = &x->e_mbd;
738 struct optimize_ctx ctx;
739 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
740 struct encode_b_args arg = {x, &ctx, &mbmi->skip};
741 int plane;
742
743 mbmi->skip = 1;
744
745 if (x->skip)
746 return;
747
748 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
749 if (!x->skip_recode)
750 vp9_subtract_plane(x, bsize, plane);
751
752 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
753 const struct macroblockd_plane* const pd = &xd->plane[plane];
754 const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
755 vp9_get_entropy_contexts(bsize, tx_size, pd,
756 ctx.ta[plane], ctx.tl[plane]);
757 }
758
759 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
760 &arg);
761 }
762 }
763
vp9_encode_block_intra(int plane,int block,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)764 void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
765 TX_SIZE tx_size, void *arg) {
766 struct encode_b_args* const args = arg;
767 MACROBLOCK *const x = args->x;
768 MACROBLOCKD *const xd = &x->e_mbd;
769 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
770 struct macroblock_plane *const p = &x->plane[plane];
771 struct macroblockd_plane *const pd = &xd->plane[plane];
772 tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
773 tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
774 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
775 const scan_order *scan_order;
776 TX_TYPE tx_type = DCT_DCT;
777 PREDICTION_MODE mode;
778 const int bwl = b_width_log2_lookup[plane_bsize];
779 const int diff_stride = 4 * (1 << bwl);
780 uint8_t *src, *dst;
781 int16_t *src_diff;
782 uint16_t *eob = &p->eobs[block];
783 const int src_stride = p->src.stride;
784 const int dst_stride = pd->dst.stride;
785 int i, j;
786 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
787 dst = &pd->dst.buf[4 * (j * dst_stride + i)];
788 src = &p->src.buf[4 * (j * src_stride + i)];
789 src_diff = &p->src_diff[4 * (j * diff_stride + i)];
790
791 if (tx_size == TX_4X4) {
792 tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
793 scan_order = &vp9_scan_orders[TX_4X4][tx_type];
794 mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
795 } else {
796 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
797 if (tx_size == TX_32X32) {
798 scan_order = &vp9_default_scan_orders[TX_32X32];
799 } else {
800 tx_type = get_tx_type(get_plane_type(plane), xd);
801 scan_order = &vp9_scan_orders[tx_size][tx_type];
802 }
803 }
804
805 vp9_predict_intra_block(xd, bwl, tx_size, mode, x->skip_encode ? src : dst,
806 x->skip_encode ? src_stride : dst_stride,
807 dst, dst_stride, i, j, plane);
808
809 #if CONFIG_VP9_HIGHBITDEPTH
810 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
811 switch (tx_size) {
812 case TX_32X32:
813 if (!x->skip_recode) {
814 vpx_highbd_subtract_block(32, 32, src_diff, diff_stride,
815 src, src_stride, dst, dst_stride, xd->bd);
816 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
817 vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
818 p->round, p->quant, p->quant_shift,
819 qcoeff, dqcoeff, pd->dequant, eob,
820 scan_order->scan, scan_order->iscan);
821 }
822 if (!x->skip_encode && *eob) {
823 vp9_highbd_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
824 }
825 break;
826 case TX_16X16:
827 if (!x->skip_recode) {
828 vpx_highbd_subtract_block(16, 16, src_diff, diff_stride,
829 src, src_stride, dst, dst_stride, xd->bd);
830 if (tx_type == DCT_DCT)
831 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
832 else
833 vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
834 vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
835 p->quant, p->quant_shift, qcoeff, dqcoeff,
836 pd->dequant, eob,
837 scan_order->scan, scan_order->iscan);
838 }
839 if (!x->skip_encode && *eob) {
840 vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, dst_stride,
841 *eob, xd->bd);
842 }
843 break;
844 case TX_8X8:
845 if (!x->skip_recode) {
846 vpx_highbd_subtract_block(8, 8, src_diff, diff_stride,
847 src, src_stride, dst, dst_stride, xd->bd);
848 if (tx_type == DCT_DCT)
849 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
850 else
851 vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
852 vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
853 p->quant, p->quant_shift, qcoeff, dqcoeff,
854 pd->dequant, eob,
855 scan_order->scan, scan_order->iscan);
856 }
857 if (!x->skip_encode && *eob) {
858 vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
859 xd->bd);
860 }
861 break;
862 case TX_4X4:
863 if (!x->skip_recode) {
864 vpx_highbd_subtract_block(4, 4, src_diff, diff_stride,
865 src, src_stride, dst, dst_stride, xd->bd);
866 if (tx_type != DCT_DCT)
867 vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
868 else
869 x->fwd_txm4x4(src_diff, coeff, diff_stride);
870 vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
871 p->quant, p->quant_shift, qcoeff, dqcoeff,
872 pd->dequant, eob,
873 scan_order->scan, scan_order->iscan);
874 }
875
876 if (!x->skip_encode && *eob) {
877 if (tx_type == DCT_DCT) {
878 // this is like vp9_short_idct4x4 but has a special case around
879 // eob<=1 which is significant (not just an optimization) for the
880 // lossless case.
881 x->highbd_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
882 } else {
883 vp9_highbd_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd);
884 }
885 }
886 break;
887 default:
888 assert(0);
889 return;
890 }
891 if (*eob)
892 *(args->skip) = 0;
893 return;
894 }
895 #endif // CONFIG_VP9_HIGHBITDEPTH
896
897 switch (tx_size) {
898 case TX_32X32:
899 if (!x->skip_recode) {
900 vpx_subtract_block(32, 32, src_diff, diff_stride,
901 src, src_stride, dst, dst_stride);
902 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
903 vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
904 p->quant, p->quant_shift, qcoeff, dqcoeff,
905 pd->dequant, eob, scan_order->scan,
906 scan_order->iscan);
907 }
908 if (!x->skip_encode && *eob)
909 vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
910 break;
911 case TX_16X16:
912 if (!x->skip_recode) {
913 vpx_subtract_block(16, 16, src_diff, diff_stride,
914 src, src_stride, dst, dst_stride);
915 vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
916 vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
917 p->quant, p->quant_shift, qcoeff, dqcoeff,
918 pd->dequant, eob, scan_order->scan,
919 scan_order->iscan);
920 }
921 if (!x->skip_encode && *eob)
922 vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
923 break;
924 case TX_8X8:
925 if (!x->skip_recode) {
926 vpx_subtract_block(8, 8, src_diff, diff_stride,
927 src, src_stride, dst, dst_stride);
928 vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
929 vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
930 p->quant_shift, qcoeff, dqcoeff,
931 pd->dequant, eob, scan_order->scan,
932 scan_order->iscan);
933 }
934 if (!x->skip_encode && *eob)
935 vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
936 break;
937 case TX_4X4:
938 if (!x->skip_recode) {
939 vpx_subtract_block(4, 4, src_diff, diff_stride,
940 src, src_stride, dst, dst_stride);
941 if (tx_type != DCT_DCT)
942 vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
943 else
944 x->fwd_txm4x4(src_diff, coeff, diff_stride);
945 vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
946 p->quant_shift, qcoeff, dqcoeff,
947 pd->dequant, eob, scan_order->scan,
948 scan_order->iscan);
949 }
950
951 if (!x->skip_encode && *eob) {
952 if (tx_type == DCT_DCT)
953 // this is like vp9_short_idct4x4 but has a special case around eob<=1
954 // which is significant (not just an optimization) for the lossless
955 // case.
956 x->itxm_add(dqcoeff, dst, dst_stride, *eob);
957 else
958 vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
959 }
960 break;
961 default:
962 assert(0);
963 break;
964 }
965 if (*eob)
966 *(args->skip) = 0;
967 }
968
vp9_encode_intra_block_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane)969 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
970 const MACROBLOCKD *const xd = &x->e_mbd;
971 struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip};
972
973 vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
974 vp9_encode_block_intra, &arg);
975 }
976