1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp9_rtcd.h"
12 #include "./vpx_config.h"
13 #include "./vpx_dsp_rtcd.h"
14
15 #include "vpx_dsp/quantize.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vpx_ports/mem.h"
18
19 #if CONFIG_MISMATCH_DEBUG
20 #include "vpx_util/vpx_debug_util.h"
21 #endif
22
23 #include "vp9/common/vp9_idct.h"
24 #include "vp9/common/vp9_reconinter.h"
25 #include "vp9/common/vp9_reconintra.h"
26 #include "vp9/common/vp9_scan.h"
27
28 #include "vp9/encoder/vp9_encodemb.h"
29 #include "vp9/encoder/vp9_rd.h"
30 #include "vp9/encoder/vp9_tokenize.h"
31
32 struct optimize_ctx {
33 ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
34 ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
35 };
36
vp9_subtract_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane)37 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
38 struct macroblock_plane *const p = &x->plane[plane];
39 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
40 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
41 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
42 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
43
44 #if CONFIG_VP9_HIGHBITDEPTH
45 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
46 vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
47 p->src.stride, pd->dst.buf, pd->dst.stride,
48 x->e_mbd.bd);
49 return;
50 }
51 #endif // CONFIG_VP9_HIGHBITDEPTH
52 vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
53 pd->dst.buf, pd->dst.stride);
54 }
55
56 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
57 { 10, 6 },
58 { 8, 5 },
59 };
60
61 // 'num' can be negative, but 'shift' must be non-negative.
62 #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
63 (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)))
64
vp9_optimize_b(MACROBLOCK * mb,int plane,int block,TX_SIZE tx_size,int ctx)65 int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
66 int ctx) {
67 MACROBLOCKD *const xd = &mb->e_mbd;
68 struct macroblock_plane *const p = &mb->plane[plane];
69 struct macroblockd_plane *const pd = &xd->plane[plane];
70 const int ref = is_inter_block(xd->mi[0]);
71 uint8_t token_cache[1024];
72 const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
73 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
74 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
75 const int eob = p->eobs[block];
76 const PLANE_TYPE plane_type = get_plane_type(plane);
77 const int default_eob = 16 << (tx_size << 1);
78 const int shift = (tx_size == TX_32X32);
79 const int16_t *const dequant_ptr = pd->dequant;
80 const uint8_t *const band_translate = get_band_translate(tx_size);
81 const scan_order *const so = get_scan(xd, tx_size, plane_type, block);
82 const int16_t *const scan = so->scan;
83 const int16_t *const nb = so->neighbors;
84 const MODE_INFO *mbmi = xd->mi[0];
85 const int sharpness = mb->sharpness;
86 const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
87 const int64_t rdmult =
88 (sharpness == 0 ? rdadj >> 1
89 : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
90
91 const int64_t rddiv = mb->rddiv;
92 int64_t rd_cost0, rd_cost1;
93 int64_t rate0, rate1;
94 int16_t t0, t1;
95 int i, final_eob;
96 int count_high_values_after_eob = 0;
97 #if CONFIG_VP9_HIGHBITDEPTH
98 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
99 #else
100 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
101 #endif
102 unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
103 mb->token_costs[tx_size][plane_type][ref];
104 unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
105 int64_t eob_cost0, eob_cost1;
106 const int ctx0 = ctx;
107 int64_t accu_rate = 0;
108 // Initialized to the worst possible error for the largest transform size.
109 // This ensures that it never goes negative.
110 int64_t accu_error = ((int64_t)1) << 50;
111 int64_t best_block_rd_cost = INT64_MAX;
112 int x_prev = 1;
113 tran_low_t before_best_eob_qc = 0;
114 tran_low_t before_best_eob_dqc = 0;
115
116 assert((!plane_type && !plane) || (plane_type && plane));
117 assert(eob <= default_eob);
118
119 for (i = 0; i < eob; i++) {
120 const int rc = scan[i];
121 token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
122 }
123 final_eob = 0;
124
125 // Initial RD cost.
126 token_costs_cur = token_costs + band_translate[0];
127 rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN];
128 best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
129
130 // For each token, pick one of two choices greedily:
131 // (i) First candidate: Keep current quantized value, OR
132 // (ii) Second candidate: Reduce quantized value by 1.
133 for (i = 0; i < eob; i++) {
134 const int rc = scan[i];
135 const int x = qcoeff[rc];
136 const int band_cur = band_translate[i];
137 const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
138 const int token_tree_sel_cur = (x_prev == 0);
139 token_costs_cur = token_costs + band_cur;
140 if (x == 0) { // No need to search
141 const int token = vp9_get_token(x);
142 rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
143 accu_rate += rate0;
144 x_prev = 0;
145 // Note: accu_error does not change.
146 } else {
147 const int dqv = dequant_ptr[rc != 0];
148 // Compute the distortion for quantizing to 0.
149 const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
150 const int diff_for_zero =
151 #if CONFIG_VP9_HIGHBITDEPTH
152 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
153 ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
154 :
155 #endif
156 diff_for_zero_raw;
157 const int64_t distortion_for_zero =
158 (int64_t)diff_for_zero * diff_for_zero;
159
160 // Compute the distortion for the first candidate
161 const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
162 const int diff0 =
163 #if CONFIG_VP9_HIGHBITDEPTH
164 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
165 ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
166 :
167 #endif // CONFIG_VP9_HIGHBITDEPTH
168 diff0_raw;
169 const int64_t distortion0 = (int64_t)diff0 * diff0;
170
171 // Compute the distortion for the second candidate
172 const int sign = -(x < 0); // -1 if x is negative and 0 otherwise.
173 const int x1 = x - 2 * sign - 1; // abs(x1) = abs(x) - 1.
174 int64_t distortion1;
175 if (x1 != 0) {
176 const int dqv_step =
177 #if CONFIG_VP9_HIGHBITDEPTH
178 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
179 :
180 #endif // CONFIG_VP9_HIGHBITDEPTH
181 dqv;
182 const int diff_step = (dqv_step + sign) ^ sign;
183 const int diff1 = diff0 - diff_step;
184 assert(dqv > 0); // We aren't right shifting a negative number above.
185 distortion1 = (int64_t)diff1 * diff1;
186 } else {
187 distortion1 = distortion_for_zero;
188 }
189 {
190 // Calculate RDCost for current coeff for the two candidates.
191 const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
192 const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
193 rate0 =
194 base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
195 rate1 =
196 base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
197 }
198 {
199 int rdcost_better_for_x1, eob_rdcost_better_for_x1;
200 int dqc0, dqc1;
201 int64_t best_eob_cost_cur;
202 int use_x1;
203
204 // Calculate RD Cost effect on the next coeff for the two candidates.
205 int64_t next_bits0 = 0;
206 int64_t next_bits1 = 0;
207 int64_t next_eob_bits0 = 0;
208 int64_t next_eob_bits1 = 0;
209 if (i < default_eob - 1) {
210 int ctx_next, token_tree_sel_next;
211 const int band_next = band_translate[i + 1];
212 const int token_next =
213 (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
214 unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
215 [ENTROPY_TOKENS] =
216 token_costs + band_next;
217 token_cache[rc] = vp9_pt_energy_class[t0];
218 ctx_next = get_coef_context(nb, token_cache, i + 1);
219 token_tree_sel_next = (x == 0);
220 next_bits0 =
221 (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
222 next_eob_bits0 =
223 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
224 token_cache[rc] = vp9_pt_energy_class[t1];
225 ctx_next = get_coef_context(nb, token_cache, i + 1);
226 token_tree_sel_next = (x1 == 0);
227 next_bits1 =
228 (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
229 if (x1 != 0) {
230 next_eob_bits1 =
231 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
232 }
233 }
234
235 // Compare the total RD costs for two candidates.
236 rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
237 rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
238 rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
239 eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
240 (accu_error + distortion0 - distortion_for_zero));
241 eob_cost1 = eob_cost0;
242 if (x1 != 0) {
243 eob_cost1 =
244 RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
245 (accu_error + distortion1 - distortion_for_zero));
246 eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
247 } else {
248 eob_rdcost_better_for_x1 = 0;
249 }
250
251 // Calculate the two candidate de-quantized values.
252 dqc0 = dqcoeff[rc];
253 dqc1 = 0;
254 if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
255 if (x1 != 0) {
256 dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
257 } else {
258 dqc1 = 0;
259 }
260 }
261
262 // Pick and record the better quantized and de-quantized values.
263 if (rdcost_better_for_x1) {
264 qcoeff[rc] = x1;
265 dqcoeff[rc] = dqc1;
266 accu_rate += rate1;
267 accu_error += distortion1 - distortion_for_zero;
268 assert(distortion1 <= distortion_for_zero);
269 token_cache[rc] = vp9_pt_energy_class[t1];
270 } else {
271 accu_rate += rate0;
272 accu_error += distortion0 - distortion_for_zero;
273 assert(distortion0 <= distortion_for_zero);
274 token_cache[rc] = vp9_pt_energy_class[t0];
275 }
276 if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
277 assert(accu_error >= 0);
278 x_prev = qcoeff[rc]; // Update based on selected quantized value.
279
280 use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
281 best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
282
283 // Determine whether to move the eob position to i+1
284 if (best_eob_cost_cur < best_block_rd_cost) {
285 best_block_rd_cost = best_eob_cost_cur;
286 final_eob = i + 1;
287 count_high_values_after_eob = 0;
288 if (use_x1) {
289 before_best_eob_qc = x1;
290 before_best_eob_dqc = dqc1;
291 } else {
292 before_best_eob_qc = x;
293 before_best_eob_dqc = dqc0;
294 }
295 }
296 }
297 }
298 }
299 if (count_high_values_after_eob > 0) {
300 final_eob = eob - 1;
301 for (; final_eob >= 0; final_eob--) {
302 const int rc = scan[final_eob];
303 const int x = qcoeff[rc];
304 if (x) {
305 break;
306 }
307 }
308 final_eob++;
309 } else {
310 assert(final_eob <= eob);
311 if (final_eob > 0) {
312 int rc;
313 assert(before_best_eob_qc != 0);
314 i = final_eob - 1;
315 rc = scan[i];
316 qcoeff[rc] = before_best_eob_qc;
317 dqcoeff[rc] = before_best_eob_dqc;
318 }
319 for (i = final_eob; i < eob; i++) {
320 int rc = scan[i];
321 qcoeff[rc] = 0;
322 dqcoeff[rc] = 0;
323 }
324 }
325 mb->plane[plane].eobs[block] = final_eob;
326 return final_eob;
327 }
328 #undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
329
fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)330 static INLINE void fdct32x32(int rd_transform, const int16_t *src,
331 tran_low_t *dst, int src_stride) {
332 if (rd_transform)
333 vpx_fdct32x32_rd(src, dst, src_stride);
334 else
335 vpx_fdct32x32(src, dst, src_stride);
336 }
337
338 #if CONFIG_VP9_HIGHBITDEPTH
highbd_fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)339 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
340 tran_low_t *dst, int src_stride) {
341 if (rd_transform)
342 vpx_highbd_fdct32x32_rd(src, dst, src_stride);
343 else
344 vpx_highbd_fdct32x32(src, dst, src_stride);
345 }
346 #endif // CONFIG_VP9_HIGHBITDEPTH
347
vp9_xform_quant_fp(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)348 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
349 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
350 MACROBLOCKD *const xd = &x->e_mbd;
351 const struct macroblock_plane *const p = &x->plane[plane];
352 const struct macroblockd_plane *const pd = &xd->plane[plane];
353 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
354 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
355 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
356 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
357 uint16_t *const eob = &p->eobs[block];
358 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
359 const int16_t *src_diff;
360 src_diff = &p->src_diff[4 * (row * diff_stride + col)];
361 // skip block condition should be handled before this is called.
362 assert(!x->skip_block);
363
364 #if CONFIG_VP9_HIGHBITDEPTH
365 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
366 switch (tx_size) {
367 case TX_32X32:
368 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
369 vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp,
370 p->quant_fp, qcoeff, dqcoeff, pd->dequant,
371 eob, scan_order->scan, scan_order->iscan);
372 break;
373 case TX_16X16:
374 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
375 vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->round_fp,
376 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
377 scan_order->scan, scan_order->iscan);
378 break;
379 case TX_8X8:
380 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
381 vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->round_fp,
382 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
383 scan_order->scan, scan_order->iscan);
384 break;
385 default:
386 assert(tx_size == TX_4X4);
387 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
388 vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp,
389 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
390 scan_order->scan, scan_order->iscan);
391 break;
392 }
393 return;
394 }
395 #endif // CONFIG_VP9_HIGHBITDEPTH
396
397 switch (tx_size) {
398 case TX_32X32:
399 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
400 vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp,
401 p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
402 scan_order->scan, scan_order->iscan);
403 break;
404 case TX_16X16:
405 vpx_fdct16x16(src_diff, coeff, diff_stride);
406 vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp,
407 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
408 scan_order->iscan);
409 break;
410 case TX_8X8:
411 vpx_fdct8x8(src_diff, coeff, diff_stride);
412 vp9_quantize_fp(coeff, 64, x->skip_block, p->round_fp, p->quant_fp,
413 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
414 scan_order->iscan);
415
416 break;
417 default:
418 assert(tx_size == TX_4X4);
419 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
420 vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp,
421 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
422 scan_order->iscan);
423 break;
424 }
425 }
426
vp9_xform_quant_dc(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)427 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
428 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
429 MACROBLOCKD *const xd = &x->e_mbd;
430 const struct macroblock_plane *const p = &x->plane[plane];
431 const struct macroblockd_plane *const pd = &xd->plane[plane];
432 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
433 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
434 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
435 uint16_t *const eob = &p->eobs[block];
436 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
437 const int16_t *src_diff;
438 src_diff = &p->src_diff[4 * (row * diff_stride + col)];
439 // skip block condition should be handled before this is called.
440 assert(!x->skip_block);
441
442 #if CONFIG_VP9_HIGHBITDEPTH
443 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
444 switch (tx_size) {
445 case TX_32X32:
446 vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
447 vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
448 p->quant_fp[0], qcoeff, dqcoeff,
449 pd->dequant[0], eob);
450 break;
451 case TX_16X16:
452 vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
453 vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
454 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
455 eob);
456 break;
457 case TX_8X8:
458 vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
459 vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
460 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
461 eob);
462 break;
463 default:
464 assert(tx_size == TX_4X4);
465 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
466 vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
467 p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
468 eob);
469 break;
470 }
471 return;
472 }
473 #endif // CONFIG_VP9_HIGHBITDEPTH
474
475 switch (tx_size) {
476 case TX_32X32:
477 vpx_fdct32x32_1(src_diff, coeff, diff_stride);
478 vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0],
479 qcoeff, dqcoeff, pd->dequant[0], eob);
480 break;
481 case TX_16X16:
482 vpx_fdct16x16_1(src_diff, coeff, diff_stride);
483 vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0],
484 qcoeff, dqcoeff, pd->dequant[0], eob);
485 break;
486 case TX_8X8:
487 vpx_fdct8x8_1(src_diff, coeff, diff_stride);
488 vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0],
489 qcoeff, dqcoeff, pd->dequant[0], eob);
490 break;
491 default:
492 assert(tx_size == TX_4X4);
493 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
494 vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0],
495 qcoeff, dqcoeff, pd->dequant[0], eob);
496 break;
497 }
498 }
499
vp9_xform_quant(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)500 void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
501 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
502 MACROBLOCKD *const xd = &x->e_mbd;
503 const struct macroblock_plane *const p = &x->plane[plane];
504 const struct macroblockd_plane *const pd = &xd->plane[plane];
505 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
506 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
507 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
508 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
509 uint16_t *const eob = &p->eobs[block];
510 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
511 const int16_t *src_diff;
512 src_diff = &p->src_diff[4 * (row * diff_stride + col)];
513 // skip block condition should be handled before this is called.
514 assert(!x->skip_block);
515
516 #if CONFIG_VP9_HIGHBITDEPTH
517 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
518 switch (tx_size) {
519 case TX_32X32:
520 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
521 vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
522 p->round, p->quant, p->quant_shift, qcoeff,
523 dqcoeff, pd->dequant, eob, scan_order->scan,
524 scan_order->iscan);
525 break;
526 case TX_16X16:
527 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
528 vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
529 p->quant, p->quant_shift, qcoeff, dqcoeff,
530 pd->dequant, eob, scan_order->scan,
531 scan_order->iscan);
532 break;
533 case TX_8X8:
534 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
535 vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
536 p->quant, p->quant_shift, qcoeff, dqcoeff,
537 pd->dequant, eob, scan_order->scan,
538 scan_order->iscan);
539 break;
540 default:
541 assert(tx_size == TX_4X4);
542 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
543 vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
544 p->quant, p->quant_shift, qcoeff, dqcoeff,
545 pd->dequant, eob, scan_order->scan,
546 scan_order->iscan);
547 break;
548 }
549 return;
550 }
551 #endif // CONFIG_VP9_HIGHBITDEPTH
552
553 switch (tx_size) {
554 case TX_32X32:
555 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
556 vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
557 p->quant, p->quant_shift, qcoeff, dqcoeff,
558 pd->dequant, eob, scan_order->scan,
559 scan_order->iscan);
560 break;
561 case TX_16X16:
562 vpx_fdct16x16(src_diff, coeff, diff_stride);
563 vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
564 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
565 scan_order->scan, scan_order->iscan);
566 break;
567 case TX_8X8:
568 vpx_fdct8x8(src_diff, coeff, diff_stride);
569 vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
570 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
571 scan_order->scan, scan_order->iscan);
572 break;
573 default:
574 assert(tx_size == TX_4X4);
575 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
576 vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
577 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
578 scan_order->scan, scan_order->iscan);
579 break;
580 }
581 }
582
encode_block(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)583 static void encode_block(int plane, int block, int row, int col,
584 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
585 struct encode_b_args *const args = arg;
586 #if CONFIG_MISMATCH_DEBUG
587 int mi_row = args->mi_row;
588 int mi_col = args->mi_col;
589 int output_enabled = args->output_enabled;
590 #endif
591 MACROBLOCK *const x = args->x;
592 MACROBLOCKD *const xd = &x->e_mbd;
593 struct macroblock_plane *const p = &x->plane[plane];
594 struct macroblockd_plane *const pd = &xd->plane[plane];
595 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
596 uint8_t *dst;
597 ENTROPY_CONTEXT *a, *l;
598 dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
599 a = &args->ta[col];
600 l = &args->tl[row];
601
602 // TODO(jingning): per transformed block zero forcing only enabled for
603 // luma component. will integrate chroma components as well.
604 if (x->zcoeff_blk[tx_size][block] && plane == 0) {
605 p->eobs[block] = 0;
606 *a = *l = 0;
607 #if CONFIG_MISMATCH_DEBUG
608 goto encode_block_end;
609 #else
610 return;
611 #endif
612 }
613
614 if (!x->skip_recode) {
615 if (x->quant_fp) {
616 // Encoding process for rtc mode
617 if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
618 // skip forward transform
619 p->eobs[block] = 0;
620 *a = *l = 0;
621 #if CONFIG_MISMATCH_DEBUG
622 goto encode_block_end;
623 #else
624 return;
625 #endif
626 } else {
627 vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
628 }
629 } else {
630 if (max_txsize_lookup[plane_bsize] == tx_size) {
631 int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
632 if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
633 // full forward transform and quantization
634 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
635 } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
636 // fast path forward transform and quantization
637 vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
638 } else {
639 // skip forward transform
640 p->eobs[block] = 0;
641 *a = *l = 0;
642 #if CONFIG_MISMATCH_DEBUG
643 goto encode_block_end;
644 #else
645 return;
646 #endif
647 }
648 } else {
649 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
650 }
651 }
652 }
653
654 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
655 const int ctx = combine_entropy_contexts(*a, *l);
656 *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
657 } else {
658 *a = *l = p->eobs[block] > 0;
659 }
660
661 if (p->eobs[block]) *(args->skip) = 0;
662
663 if (x->skip_encode || p->eobs[block] == 0) {
664 #if CONFIG_MISMATCH_DEBUG
665 goto encode_block_end;
666 #else
667 return;
668 #endif
669 }
670 #if CONFIG_VP9_HIGHBITDEPTH
671 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
672 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
673 switch (tx_size) {
674 case TX_32X32:
675 vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
676 xd->bd);
677 break;
678 case TX_16X16:
679 vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
680 xd->bd);
681 break;
682 case TX_8X8:
683 vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
684 xd->bd);
685 break;
686 default:
687 assert(tx_size == TX_4X4);
688 // this is like vp9_short_idct4x4 but has a special case around eob<=1
689 // which is significant (not just an optimization) for the lossless
690 // case.
691 x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
692 xd->bd);
693 break;
694 }
695 #if CONFIG_MISMATCH_DEBUG
696 goto encode_block_end;
697 #else
698 return;
699 #endif
700 }
701 #endif // CONFIG_VP9_HIGHBITDEPTH
702
703 switch (tx_size) {
704 case TX_32X32:
705 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
706 break;
707 case TX_16X16:
708 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
709 break;
710 case TX_8X8:
711 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
712 break;
713 default:
714 assert(tx_size == TX_4X4);
715 // this is like vp9_short_idct4x4 but has a special case around eob<=1
716 // which is significant (not just an optimization) for the lossless
717 // case.
718 x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
719 break;
720 }
721 #if CONFIG_MISMATCH_DEBUG
722 encode_block_end:
723 if (output_enabled) {
724 int pixel_c, pixel_r;
725 int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
726 int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
727 mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
728 pd->subsampling_x, pd->subsampling_y);
729 mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
730 blk_w, blk_h,
731 xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
732 }
733 #endif
734 }
735
encode_block_pass1(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)736 static void encode_block_pass1(int plane, int block, int row, int col,
737 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
738 void *arg) {
739 MACROBLOCK *const x = (MACROBLOCK *)arg;
740 MACROBLOCKD *const xd = &x->e_mbd;
741 struct macroblock_plane *const p = &x->plane[plane];
742 struct macroblockd_plane *const pd = &xd->plane[plane];
743 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
744 uint8_t *dst;
745 dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
746
747 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
748
749 if (p->eobs[block] > 0) {
750 #if CONFIG_VP9_HIGHBITDEPTH
751 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
752 x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
753 p->eobs[block], xd->bd);
754 return;
755 }
756 #endif // CONFIG_VP9_HIGHBITDEPTH
757 x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
758 }
759 }
760
vp9_encode_sby_pass1(MACROBLOCK * x,BLOCK_SIZE bsize)761 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
762 vp9_subtract_plane(x, bsize, 0);
763 vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
764 encode_block_pass1, x);
765 }
766
vp9_encode_sb(MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int output_enabled)767 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
768 int output_enabled) {
769 MACROBLOCKD *const xd = &x->e_mbd;
770 struct optimize_ctx ctx;
771 MODE_INFO *mi = xd->mi[0];
772 int plane;
773 #if CONFIG_MISMATCH_DEBUG
774 struct encode_b_args arg = { x, 1, NULL, NULL,
775 &mi->skip, mi_row, mi_col, output_enabled };
776 #else
777 struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
778 (void)mi_row;
779 (void)mi_col;
780 (void)output_enabled;
781 #endif
782
783 mi->skip = 1;
784
785 if (x->skip) return;
786
787 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
788 if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
789
790 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
791 const struct macroblockd_plane *const pd = &xd->plane[plane];
792 const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
793 vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
794 ctx.tl[plane]);
795 arg.enable_coeff_opt = 1;
796 } else {
797 arg.enable_coeff_opt = 0;
798 }
799 arg.ta = ctx.ta[plane];
800 arg.tl = ctx.tl[plane];
801
802 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
803 &arg);
804 }
805 }
806
vp9_encode_block_intra(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)807 void vp9_encode_block_intra(int plane, int block, int row, int col,
808 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
809 void *arg) {
810 struct encode_b_args *const args = arg;
811 MACROBLOCK *const x = args->x;
812 MACROBLOCKD *const xd = &x->e_mbd;
813 MODE_INFO *mi = xd->mi[0];
814 struct macroblock_plane *const p = &x->plane[plane];
815 struct macroblockd_plane *const pd = &xd->plane[plane];
816 tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
817 tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
818 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
819 const scan_order *scan_order;
820 TX_TYPE tx_type = DCT_DCT;
821 PREDICTION_MODE mode;
822 const int bwl = b_width_log2_lookup[plane_bsize];
823 const int diff_stride = 4 * (1 << bwl);
824 uint8_t *src, *dst;
825 int16_t *src_diff;
826 uint16_t *eob = &p->eobs[block];
827 const int src_stride = p->src.stride;
828 const int dst_stride = pd->dst.stride;
829 ENTROPY_CONTEXT *a = NULL;
830 ENTROPY_CONTEXT *l = NULL;
831 int entropy_ctx = 0;
832 dst = &pd->dst.buf[4 * (row * dst_stride + col)];
833 src = &p->src.buf[4 * (row * src_stride + col)];
834 src_diff = &p->src_diff[4 * (row * diff_stride + col)];
835 if (args->enable_coeff_opt) {
836 a = &args->ta[col];
837 l = &args->tl[row];
838 entropy_ctx = combine_entropy_contexts(*a, *l);
839 }
840
841 if (tx_size == TX_4X4) {
842 tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
843 scan_order = &vp9_scan_orders[TX_4X4][tx_type];
844 mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
845 } else {
846 mode = plane == 0 ? mi->mode : mi->uv_mode;
847 if (tx_size == TX_32X32) {
848 scan_order = &vp9_default_scan_orders[TX_32X32];
849 } else {
850 tx_type = get_tx_type(get_plane_type(plane), xd);
851 scan_order = &vp9_scan_orders[tx_size][tx_type];
852 }
853 }
854
855 vp9_predict_intra_block(
856 xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
857 (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
858 dst_stride, col, row, plane);
859
860 // skip block condition should be handled before this is called.
861 assert(!x->skip_block);
862
863 #if CONFIG_VP9_HIGHBITDEPTH
864 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
865 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
866 switch (tx_size) {
867 case TX_32X32:
868 if (!x->skip_recode) {
869 vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
870 src_stride, dst, dst_stride, xd->bd);
871 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
872 vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
873 p->round, p->quant, p->quant_shift,
874 qcoeff, dqcoeff, pd->dequant, eob,
875 scan_order->scan, scan_order->iscan);
876 }
877 if (args->enable_coeff_opt && !x->skip_recode) {
878 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
879 }
880 if (!x->skip_encode && *eob) {
881 vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
882 }
883 break;
884 case TX_16X16:
885 if (!x->skip_recode) {
886 vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src,
887 src_stride, dst, dst_stride, xd->bd);
888 if (tx_type == DCT_DCT)
889 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
890 else
891 vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
892 vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
893 p->quant, p->quant_shift, qcoeff, dqcoeff,
894 pd->dequant, eob, scan_order->scan,
895 scan_order->iscan);
896 }
897 if (args->enable_coeff_opt && !x->skip_recode) {
898 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
899 }
900 if (!x->skip_encode && *eob) {
901 vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
902 xd->bd);
903 }
904 break;
905 case TX_8X8:
906 if (!x->skip_recode) {
907 vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src,
908 src_stride, dst, dst_stride, xd->bd);
909 if (tx_type == DCT_DCT)
910 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
911 else
912 vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
913 vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
914 p->quant, p->quant_shift, qcoeff, dqcoeff,
915 pd->dequant, eob, scan_order->scan,
916 scan_order->iscan);
917 }
918 if (args->enable_coeff_opt && !x->skip_recode) {
919 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
920 }
921 if (!x->skip_encode && *eob) {
922 vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
923 xd->bd);
924 }
925 break;
926 default:
927 assert(tx_size == TX_4X4);
928 if (!x->skip_recode) {
929 vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
930 src_stride, dst, dst_stride, xd->bd);
931 if (tx_type != DCT_DCT)
932 vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
933 else
934 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
935 vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
936 p->quant, p->quant_shift, qcoeff, dqcoeff,
937 pd->dequant, eob, scan_order->scan,
938 scan_order->iscan);
939 }
940 if (args->enable_coeff_opt && !x->skip_recode) {
941 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
942 }
943 if (!x->skip_encode && *eob) {
944 if (tx_type == DCT_DCT) {
945 // this is like vp9_short_idct4x4 but has a special case around
946 // eob<=1 which is significant (not just an optimization) for the
947 // lossless case.
948 x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
949 } else {
950 vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
951 xd->bd);
952 }
953 }
954 break;
955 }
956 if (*eob) *(args->skip) = 0;
957 return;
958 }
959 #endif // CONFIG_VP9_HIGHBITDEPTH
960
961 switch (tx_size) {
962 case TX_32X32:
963 if (!x->skip_recode) {
964 vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst,
965 dst_stride);
966 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
967 vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
968 p->quant, p->quant_shift, qcoeff, dqcoeff,
969 pd->dequant, eob, scan_order->scan,
970 scan_order->iscan);
971 }
972 if (args->enable_coeff_opt && !x->skip_recode) {
973 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
974 }
975 if (!x->skip_encode && *eob)
976 vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
977 break;
978 case TX_16X16:
979 if (!x->skip_recode) {
980 vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst,
981 dst_stride);
982 vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
983 vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
984 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
985 scan_order->scan, scan_order->iscan);
986 }
987 if (args->enable_coeff_opt && !x->skip_recode) {
988 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
989 }
990 if (!x->skip_encode && *eob)
991 vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
992 break;
993 case TX_8X8:
994 if (!x->skip_recode) {
995 vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst,
996 dst_stride);
997 vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
998 vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
999 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
1000 scan_order->scan, scan_order->iscan);
1001 }
1002 if (args->enable_coeff_opt && !x->skip_recode) {
1003 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1004 }
1005 if (!x->skip_encode && *eob)
1006 vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
1007 break;
1008 default:
1009 assert(tx_size == TX_4X4);
1010 if (!x->skip_recode) {
1011 vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
1012 dst_stride);
1013 if (tx_type != DCT_DCT)
1014 vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
1015 else
1016 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
1017 vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
1018 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
1019 scan_order->scan, scan_order->iscan);
1020 }
1021 if (args->enable_coeff_opt && !x->skip_recode) {
1022 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1023 }
1024 if (!x->skip_encode && *eob) {
1025 if (tx_type == DCT_DCT)
1026 // this is like vp9_short_idct4x4 but has a special case around eob<=1
1027 // which is significant (not just an optimization) for the lossless
1028 // case.
1029 x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
1030 else
1031 vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1032 }
1033 break;
1034 }
1035 if (*eob) *(args->skip) = 0;
1036 }
1037
vp9_encode_intra_block_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane,int enable_optimize_b)1038 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
1039 int enable_optimize_b) {
1040 const MACROBLOCKD *const xd = &x->e_mbd;
1041 struct optimize_ctx ctx;
1042 #if CONFIG_MISMATCH_DEBUG
1043 // TODO(angiebird): make mismatch_debug support intra mode
1044 struct encode_b_args arg = {
1045 x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip, 0, 0,
1046 0
1047 };
1048 #else
1049 struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane],
1050 ctx.tl[plane], &xd->mi[0]->skip };
1051 #endif
1052
1053 if (enable_optimize_b && x->optimize &&
1054 (!x->skip_recode || !x->skip_optimize)) {
1055 const struct macroblockd_plane *const pd = &xd->plane[plane];
1056 const TX_SIZE tx_size =
1057 plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
1058 vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
1059 } else {
1060 arg.enable_coeff_opt = 0;
1061 }
1062
1063 vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1064 vp9_encode_block_intra, &arg);
1065 }
1066