1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp9_rtcd.h"
12 #include "./vpx_config.h"
13 #include "./vpx_dsp_rtcd.h"
14
15 #include "vpx_dsp/quantize.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vpx_ports/mem.h"
18
19 #if CONFIG_MISMATCH_DEBUG
20 #include "vpx_util/vpx_debug_util.h"
21 #endif
22
23 #include "vp9/common/vp9_idct.h"
24 #include "vp9/common/vp9_reconinter.h"
25 #include "vp9/common/vp9_reconintra.h"
26 #include "vp9/common/vp9_scan.h"
27
28 #include "vp9/encoder/vp9_encodemb.h"
29 #include "vp9/encoder/vp9_rd.h"
30 #include "vp9/encoder/vp9_tokenize.h"
31
32 struct optimize_ctx {
33 ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
34 ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
35 };
36
vp9_subtract_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane)37 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
38 struct macroblock_plane *const p = &x->plane[plane];
39 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
40 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
41 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
42 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
43
44 #if CONFIG_VP9_HIGHBITDEPTH
45 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
46 vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
47 p->src.stride, pd->dst.buf, pd->dst.stride,
48 x->e_mbd.bd);
49 return;
50 }
51 #endif // CONFIG_VP9_HIGHBITDEPTH
52 vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
53 pd->dst.buf, pd->dst.stride);
54 }
55
56 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
57 { 10, 6 },
58 { 8, 5 },
59 };
60
61 // 'num' can be negative, but 'shift' must be non-negative.
62 #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
63 (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)))
64
vp9_optimize_b(MACROBLOCK * mb,int plane,int block,TX_SIZE tx_size,int ctx)65 int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
66 int ctx) {
67 MACROBLOCKD *const xd = &mb->e_mbd;
68 struct macroblock_plane *const p = &mb->plane[plane];
69 struct macroblockd_plane *const pd = &xd->plane[plane];
70 const int ref = is_inter_block(xd->mi[0]);
71 uint8_t token_cache[1024];
72 const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
73 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
74 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
75 const int eob = p->eobs[block];
76 const PLANE_TYPE plane_type = get_plane_type(plane);
77 const int default_eob = 16 << (tx_size << 1);
78 const int shift = (tx_size == TX_32X32);
79 const int16_t *const dequant_ptr = pd->dequant;
80 const uint8_t *const band_translate = get_band_translate(tx_size);
81 const scan_order *const so = get_scan(xd, tx_size, plane_type, block);
82 const int16_t *const scan = so->scan;
83 const int16_t *const nb = so->neighbors;
84 const MODE_INFO *mbmi = xd->mi[0];
85 const int sharpness = mb->sharpness;
86 const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
87 const int64_t rdmult =
88 (sharpness == 0 ? rdadj >> 1
89 : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
90
91 const int64_t rddiv = mb->rddiv;
92 int64_t rd_cost0, rd_cost1;
93 int64_t rate0, rate1;
94 int16_t t0, t1;
95 int i, final_eob;
96 int count_high_values_after_eob = 0;
97 #if CONFIG_VP9_HIGHBITDEPTH
98 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
99 #else
100 const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
101 #endif
102 unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
103 mb->token_costs[tx_size][plane_type][ref];
104 unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
105 int64_t eob_cost0, eob_cost1;
106 const int ctx0 = ctx;
107 int64_t accu_rate = 0;
108 // Initialized to the worst possible error for the largest transform size.
109 // This ensures that it never goes negative.
110 int64_t accu_error = ((int64_t)1) << 50;
111 int64_t best_block_rd_cost = INT64_MAX;
112 int x_prev = 1;
113 tran_low_t before_best_eob_qc = 0;
114 tran_low_t before_best_eob_dqc = 0;
115
116 assert((!plane_type && !plane) || (plane_type && plane));
117 assert(eob <= default_eob);
118
119 for (i = 0; i < eob; i++) {
120 const int rc = scan[i];
121 token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
122 }
123 final_eob = 0;
124
125 // Initial RD cost.
126 token_costs_cur = token_costs + band_translate[0];
127 rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN];
128 best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
129
130 // For each token, pick one of two choices greedily:
131 // (i) First candidate: Keep current quantized value, OR
132 // (ii) Second candidate: Reduce quantized value by 1.
133 for (i = 0; i < eob; i++) {
134 const int rc = scan[i];
135 const int x = qcoeff[rc];
136 const int band_cur = band_translate[i];
137 const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
138 const int token_tree_sel_cur = (x_prev == 0);
139 token_costs_cur = token_costs + band_cur;
140 if (x == 0) { // No need to search
141 const int token = vp9_get_token(x);
142 rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
143 accu_rate += rate0;
144 x_prev = 0;
145 // Note: accu_error does not change.
146 } else {
147 const int dqv = dequant_ptr[rc != 0];
148 // Compute the distortion for quantizing to 0.
149 const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
150 const int diff_for_zero =
151 #if CONFIG_VP9_HIGHBITDEPTH
152 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
153 ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
154 :
155 #endif
156 diff_for_zero_raw;
157 const int64_t distortion_for_zero =
158 (int64_t)diff_for_zero * diff_for_zero;
159
160 // Compute the distortion for the first candidate
161 const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
162 const int diff0 =
163 #if CONFIG_VP9_HIGHBITDEPTH
164 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
165 ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
166 :
167 #endif // CONFIG_VP9_HIGHBITDEPTH
168 diff0_raw;
169 const int64_t distortion0 = (int64_t)diff0 * diff0;
170
171 // Compute the distortion for the second candidate
172 const int sign = -(x < 0); // -1 if x is negative and 0 otherwise.
173 const int x1 = x - 2 * sign - 1; // abs(x1) = abs(x) - 1.
174 int64_t distortion1;
175 if (x1 != 0) {
176 const int dqv_step =
177 #if CONFIG_VP9_HIGHBITDEPTH
178 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
179 :
180 #endif // CONFIG_VP9_HIGHBITDEPTH
181 dqv;
182 const int diff_step = (dqv_step + sign) ^ sign;
183 const int diff1 = diff0 - diff_step;
184 assert(dqv > 0); // We aren't right shifting a negative number above.
185 distortion1 = (int64_t)diff1 * diff1;
186 } else {
187 distortion1 = distortion_for_zero;
188 }
189 {
190 // Calculate RDCost for current coeff for the two candidates.
191 const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
192 const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
193 rate0 =
194 base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
195 rate1 =
196 base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
197 }
198 {
199 int rdcost_better_for_x1, eob_rdcost_better_for_x1;
200 int dqc0, dqc1;
201 int64_t best_eob_cost_cur;
202 int use_x1;
203
204 // Calculate RD Cost effect on the next coeff for the two candidates.
205 int64_t next_bits0 = 0;
206 int64_t next_bits1 = 0;
207 int64_t next_eob_bits0 = 0;
208 int64_t next_eob_bits1 = 0;
209 if (i < default_eob - 1) {
210 int ctx_next, token_tree_sel_next;
211 const int band_next = band_translate[i + 1];
212 const int token_next =
213 (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
214 unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
215 [ENTROPY_TOKENS] =
216 token_costs + band_next;
217 token_cache[rc] = vp9_pt_energy_class[t0];
218 ctx_next = get_coef_context(nb, token_cache, i + 1);
219 token_tree_sel_next = (x == 0);
220 next_bits0 =
221 (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
222 next_eob_bits0 =
223 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
224 token_cache[rc] = vp9_pt_energy_class[t1];
225 ctx_next = get_coef_context(nb, token_cache, i + 1);
226 token_tree_sel_next = (x1 == 0);
227 next_bits1 =
228 (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
229 if (x1 != 0) {
230 next_eob_bits1 =
231 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
232 }
233 }
234
235 // Compare the total RD costs for two candidates.
236 rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
237 rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
238 rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
239 eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
240 (accu_error + distortion0 - distortion_for_zero));
241 eob_cost1 = eob_cost0;
242 if (x1 != 0) {
243 eob_cost1 =
244 RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
245 (accu_error + distortion1 - distortion_for_zero));
246 eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
247 } else {
248 eob_rdcost_better_for_x1 = 0;
249 }
250
251 // Calculate the two candidate de-quantized values.
252 dqc0 = dqcoeff[rc];
253 dqc1 = 0;
254 if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
255 if (x1 != 0) {
256 dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
257 } else {
258 dqc1 = 0;
259 }
260 }
261
262 // Pick and record the better quantized and de-quantized values.
263 if (rdcost_better_for_x1) {
264 qcoeff[rc] = x1;
265 dqcoeff[rc] = dqc1;
266 accu_rate += rate1;
267 accu_error += distortion1 - distortion_for_zero;
268 assert(distortion1 <= distortion_for_zero);
269 token_cache[rc] = vp9_pt_energy_class[t1];
270 } else {
271 accu_rate += rate0;
272 accu_error += distortion0 - distortion_for_zero;
273 assert(distortion0 <= distortion_for_zero);
274 token_cache[rc] = vp9_pt_energy_class[t0];
275 }
276 if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
277 assert(accu_error >= 0);
278 x_prev = qcoeff[rc]; // Update based on selected quantized value.
279
280 use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
281 best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
282
283 // Determine whether to move the eob position to i+1
284 if (best_eob_cost_cur < best_block_rd_cost) {
285 best_block_rd_cost = best_eob_cost_cur;
286 final_eob = i + 1;
287 count_high_values_after_eob = 0;
288 if (use_x1) {
289 before_best_eob_qc = x1;
290 before_best_eob_dqc = dqc1;
291 } else {
292 before_best_eob_qc = x;
293 before_best_eob_dqc = dqc0;
294 }
295 }
296 }
297 }
298 }
299 if (count_high_values_after_eob > 0) {
300 final_eob = eob - 1;
301 for (; final_eob >= 0; final_eob--) {
302 const int rc = scan[final_eob];
303 const int x = qcoeff[rc];
304 if (x) {
305 break;
306 }
307 }
308 final_eob++;
309 } else {
310 assert(final_eob <= eob);
311 if (final_eob > 0) {
312 int rc;
313 assert(before_best_eob_qc != 0);
314 i = final_eob - 1;
315 rc = scan[i];
316 qcoeff[rc] = before_best_eob_qc;
317 dqcoeff[rc] = before_best_eob_dqc;
318 }
319 for (i = final_eob; i < eob; i++) {
320 int rc = scan[i];
321 qcoeff[rc] = 0;
322 dqcoeff[rc] = 0;
323 }
324 }
325 mb->plane[plane].eobs[block] = final_eob;
326 return final_eob;
327 }
328 #undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
329
fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)330 static INLINE void fdct32x32(int rd_transform, const int16_t *src,
331 tran_low_t *dst, int src_stride) {
332 if (rd_transform)
333 vpx_fdct32x32_rd(src, dst, src_stride);
334 else
335 vpx_fdct32x32(src, dst, src_stride);
336 }
337
338 #if CONFIG_VP9_HIGHBITDEPTH
highbd_fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)339 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
340 tran_low_t *dst, int src_stride) {
341 if (rd_transform)
342 vpx_highbd_fdct32x32_rd(src, dst, src_stride);
343 else
344 vpx_highbd_fdct32x32(src, dst, src_stride);
345 }
346 #endif // CONFIG_VP9_HIGHBITDEPTH
347
vp9_xform_quant_fp(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)348 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
349 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
350 MACROBLOCKD *const xd = &x->e_mbd;
351 const struct macroblock_plane *const p = &x->plane[plane];
352 const struct macroblockd_plane *const pd = &xd->plane[plane];
353 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
354 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
355 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
356 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
357 uint16_t *const eob = &p->eobs[block];
358 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
359 const int16_t *src_diff;
360 src_diff = &p->src_diff[4 * (row * diff_stride + col)];
361 // skip block condition should be handled before this is called.
362 assert(!x->skip_block);
363
364 #if CONFIG_VP9_HIGHBITDEPTH
365 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
366 switch (tx_size) {
367 case TX_32X32:
368 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
369 vp9_highbd_quantize_fp_32x32(coeff, 1024, p->round_fp, p->quant_fp,
370 qcoeff, dqcoeff, pd->dequant, eob,
371 scan_order->scan, scan_order->iscan);
372 break;
373 case TX_16X16:
374 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
375 vp9_highbd_quantize_fp(coeff, 256, p->round_fp, p->quant_fp, qcoeff,
376 dqcoeff, pd->dequant, eob, scan_order->scan,
377 scan_order->iscan);
378 break;
379 case TX_8X8:
380 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
381 vp9_highbd_quantize_fp(coeff, 64, p->round_fp, p->quant_fp, qcoeff,
382 dqcoeff, pd->dequant, eob, scan_order->scan,
383 scan_order->iscan);
384 break;
385 default:
386 assert(tx_size == TX_4X4);
387 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
388 vp9_highbd_quantize_fp(coeff, 16, p->round_fp, p->quant_fp, qcoeff,
389 dqcoeff, pd->dequant, eob, scan_order->scan,
390 scan_order->iscan);
391 break;
392 }
393 return;
394 }
395 #endif // CONFIG_VP9_HIGHBITDEPTH
396
397 switch (tx_size) {
398 case TX_32X32:
399 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
400 vp9_quantize_fp_32x32(coeff, 1024, p->round_fp, p->quant_fp, qcoeff,
401 dqcoeff, pd->dequant, eob, scan_order->scan,
402 scan_order->iscan);
403 break;
404 case TX_16X16:
405 vpx_fdct16x16(src_diff, coeff, diff_stride);
406 vp9_quantize_fp(coeff, 256, p->round_fp, p->quant_fp, qcoeff, dqcoeff,
407 pd->dequant, eob, scan_order->scan, scan_order->iscan);
408 break;
409 case TX_8X8:
410 vpx_fdct8x8(src_diff, coeff, diff_stride);
411 vp9_quantize_fp(coeff, 64, p->round_fp, p->quant_fp, qcoeff, dqcoeff,
412 pd->dequant, eob, scan_order->scan, scan_order->iscan);
413
414 break;
415 default:
416 assert(tx_size == TX_4X4);
417 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
418 vp9_quantize_fp(coeff, 16, p->round_fp, p->quant_fp, qcoeff, dqcoeff,
419 pd->dequant, eob, scan_order->scan, scan_order->iscan);
420 break;
421 }
422 }
423
vp9_xform_quant_dc(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)424 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
425 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
426 MACROBLOCKD *const xd = &x->e_mbd;
427 const struct macroblock_plane *const p = &x->plane[plane];
428 const struct macroblockd_plane *const pd = &xd->plane[plane];
429 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
430 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
431 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
432 uint16_t *const eob = &p->eobs[block];
433 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
434 const int16_t *src_diff;
435 src_diff = &p->src_diff[4 * (row * diff_stride + col)];
436 // skip block condition should be handled before this is called.
437 assert(!x->skip_block);
438
439 #if CONFIG_VP9_HIGHBITDEPTH
440 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
441 switch (tx_size) {
442 case TX_32X32:
443 vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
444 vpx_highbd_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff,
445 dqcoeff, pd->dequant[0], eob);
446 break;
447 case TX_16X16:
448 vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
449 vpx_highbd_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff,
450 dqcoeff, pd->dequant[0], eob);
451 break;
452 case TX_8X8:
453 vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
454 vpx_highbd_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff,
455 dqcoeff, pd->dequant[0], eob);
456 break;
457 default:
458 assert(tx_size == TX_4X4);
459 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
460 vpx_highbd_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff,
461 dqcoeff, pd->dequant[0], eob);
462 break;
463 }
464 return;
465 }
466 #endif // CONFIG_VP9_HIGHBITDEPTH
467
468 switch (tx_size) {
469 case TX_32X32:
470 vpx_fdct32x32_1(src_diff, coeff, diff_stride);
471 vpx_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff, dqcoeff,
472 pd->dequant[0], eob);
473 break;
474 case TX_16X16:
475 vpx_fdct16x16_1(src_diff, coeff, diff_stride);
476 vpx_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff, dqcoeff,
477 pd->dequant[0], eob);
478 break;
479 case TX_8X8:
480 vpx_fdct8x8_1(src_diff, coeff, diff_stride);
481 vpx_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff, dqcoeff,
482 pd->dequant[0], eob);
483 break;
484 default:
485 assert(tx_size == TX_4X4);
486 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
487 vpx_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff, dqcoeff,
488 pd->dequant[0], eob);
489 break;
490 }
491 }
492
vp9_xform_quant(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)493 void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
494 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
495 MACROBLOCKD *const xd = &x->e_mbd;
496 const struct macroblock_plane *const p = &x->plane[plane];
497 const struct macroblockd_plane *const pd = &xd->plane[plane];
498 const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
499 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
500 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
501 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
502 uint16_t *const eob = &p->eobs[block];
503 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
504 const int16_t *src_diff;
505 src_diff = &p->src_diff[4 * (row * diff_stride + col)];
506 // skip block condition should be handled before this is called.
507 assert(!x->skip_block);
508
509 #if CONFIG_VP9_HIGHBITDEPTH
510 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
511 switch (tx_size) {
512 case TX_32X32:
513 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
514 vpx_highbd_quantize_b_32x32(
515 coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
516 dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
517 break;
518 case TX_16X16:
519 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
520 vpx_highbd_quantize_b(coeff, 256, p->zbin, p->round, p->quant,
521 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
522 scan_order->scan, scan_order->iscan);
523 break;
524 case TX_8X8:
525 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
526 vpx_highbd_quantize_b(coeff, 64, p->zbin, p->round, p->quant,
527 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
528 scan_order->scan, scan_order->iscan);
529 break;
530 default:
531 assert(tx_size == TX_4X4);
532 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
533 vpx_highbd_quantize_b(coeff, 16, p->zbin, p->round, p->quant,
534 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
535 scan_order->scan, scan_order->iscan);
536 break;
537 }
538 return;
539 }
540 #endif // CONFIG_VP9_HIGHBITDEPTH
541
542 switch (tx_size) {
543 case TX_32X32:
544 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
545 vpx_quantize_b_32x32(coeff, 1024, p->zbin, p->round, p->quant,
546 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
547 scan_order->scan, scan_order->iscan);
548 break;
549 case TX_16X16:
550 vpx_fdct16x16(src_diff, coeff, diff_stride);
551 vpx_quantize_b(coeff, 256, p->zbin, p->round, p->quant, p->quant_shift,
552 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
553 scan_order->iscan);
554 break;
555 case TX_8X8:
556 vpx_fdct8x8(src_diff, coeff, diff_stride);
557 vpx_quantize_b(coeff, 64, p->zbin, p->round, p->quant, p->quant_shift,
558 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
559 scan_order->iscan);
560 break;
561 default:
562 assert(tx_size == TX_4X4);
563 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
564 vpx_quantize_b(coeff, 16, p->zbin, p->round, p->quant, p->quant_shift,
565 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
566 scan_order->iscan);
567 break;
568 }
569 }
570
encode_block(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)571 static void encode_block(int plane, int block, int row, int col,
572 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
573 struct encode_b_args *const args = arg;
574 #if CONFIG_MISMATCH_DEBUG
575 int mi_row = args->mi_row;
576 int mi_col = args->mi_col;
577 int output_enabled = args->output_enabled;
578 #endif
579 MACROBLOCK *const x = args->x;
580 MACROBLOCKD *const xd = &x->e_mbd;
581 struct macroblock_plane *const p = &x->plane[plane];
582 struct macroblockd_plane *const pd = &xd->plane[plane];
583 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
584 uint8_t *dst;
585 ENTROPY_CONTEXT *a, *l;
586 dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
587 a = &args->ta[col];
588 l = &args->tl[row];
589
590 // TODO(jingning): per transformed block zero forcing only enabled for
591 // luma component. will integrate chroma components as well.
592 if (x->zcoeff_blk[tx_size][block] && plane == 0) {
593 p->eobs[block] = 0;
594 *a = *l = 0;
595 #if CONFIG_MISMATCH_DEBUG
596 goto encode_block_end;
597 #else
598 return;
599 #endif
600 }
601
602 if (!x->skip_recode) {
603 if (x->quant_fp) {
604 // Encoding process for rtc mode
605 if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
606 // skip forward transform
607 p->eobs[block] = 0;
608 *a = *l = 0;
609 #if CONFIG_MISMATCH_DEBUG
610 goto encode_block_end;
611 #else
612 return;
613 #endif
614 } else {
615 vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
616 }
617 } else {
618 if (max_txsize_lookup[plane_bsize] == tx_size) {
619 int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
620 if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
621 // full forward transform and quantization
622 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
623 } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
624 // fast path forward transform and quantization
625 vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
626 } else {
627 // skip forward transform
628 p->eobs[block] = 0;
629 *a = *l = 0;
630 #if CONFIG_MISMATCH_DEBUG
631 goto encode_block_end;
632 #else
633 return;
634 #endif
635 }
636 } else {
637 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
638 }
639 }
640 }
641
642 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
643 const int ctx = combine_entropy_contexts(*a, *l);
644 *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
645 } else {
646 *a = *l = p->eobs[block] > 0;
647 }
648
649 if (p->eobs[block]) *(args->skip) = 0;
650
651 if (x->skip_encode || p->eobs[block] == 0) {
652 #if CONFIG_MISMATCH_DEBUG
653 goto encode_block_end;
654 #else
655 return;
656 #endif
657 }
658 #if CONFIG_VP9_HIGHBITDEPTH
659 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
660 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
661 switch (tx_size) {
662 case TX_32X32:
663 vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
664 xd->bd);
665 break;
666 case TX_16X16:
667 vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
668 xd->bd);
669 break;
670 case TX_8X8:
671 vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
672 xd->bd);
673 break;
674 default:
675 assert(tx_size == TX_4X4);
676 // this is like vp9_short_idct4x4 but has a special case around eob<=1
677 // which is significant (not just an optimization) for the lossless
678 // case.
679 x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
680 xd->bd);
681 break;
682 }
683 #if CONFIG_MISMATCH_DEBUG
684 goto encode_block_end;
685 #else
686 return;
687 #endif
688 }
689 #endif // CONFIG_VP9_HIGHBITDEPTH
690
691 switch (tx_size) {
692 case TX_32X32:
693 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
694 break;
695 case TX_16X16:
696 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
697 break;
698 case TX_8X8:
699 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
700 break;
701 default:
702 assert(tx_size == TX_4X4);
703 // this is like vp9_short_idct4x4 but has a special case around eob<=1
704 // which is significant (not just an optimization) for the lossless
705 // case.
706 x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
707 break;
708 }
709 #if CONFIG_MISMATCH_DEBUG
710 encode_block_end:
711 if (output_enabled) {
712 int pixel_c, pixel_r;
713 int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
714 int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
715 mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
716 pd->subsampling_x, pd->subsampling_y);
717 mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
718 blk_w, blk_h,
719 xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
720 }
721 #endif
722 }
723
encode_block_pass1(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)724 static void encode_block_pass1(int plane, int block, int row, int col,
725 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
726 void *arg) {
727 MACROBLOCK *const x = (MACROBLOCK *)arg;
728 MACROBLOCKD *const xd = &x->e_mbd;
729 struct macroblock_plane *const p = &x->plane[plane];
730 struct macroblockd_plane *const pd = &xd->plane[plane];
731 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
732 uint8_t *dst;
733 dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
734
735 vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
736
737 if (p->eobs[block] > 0) {
738 #if CONFIG_VP9_HIGHBITDEPTH
739 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
740 x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
741 p->eobs[block], xd->bd);
742 return;
743 }
744 #endif // CONFIG_VP9_HIGHBITDEPTH
745 x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
746 }
747 }
748
vp9_encode_sby_pass1(MACROBLOCK * x,BLOCK_SIZE bsize)749 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
750 vp9_subtract_plane(x, bsize, 0);
751 vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
752 encode_block_pass1, x);
753 }
754
vp9_encode_sb(MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int output_enabled)755 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
756 int output_enabled) {
757 MACROBLOCKD *const xd = &x->e_mbd;
758 struct optimize_ctx ctx;
759 MODE_INFO *mi = xd->mi[0];
760 int plane;
761 #if CONFIG_MISMATCH_DEBUG
762 struct encode_b_args arg = { x, 1, NULL, NULL,
763 &mi->skip, mi_row, mi_col, output_enabled };
764 #else
765 struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
766 (void)mi_row;
767 (void)mi_col;
768 (void)output_enabled;
769 #endif
770
771 mi->skip = 1;
772
773 if (x->skip) return;
774
775 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
776 if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
777
778 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
779 const struct macroblockd_plane *const pd = &xd->plane[plane];
780 const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
781 vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
782 ctx.tl[plane]);
783 arg.enable_coeff_opt = 1;
784 } else {
785 arg.enable_coeff_opt = 0;
786 }
787 arg.ta = ctx.ta[plane];
788 arg.tl = ctx.tl[plane];
789
790 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
791 &arg);
792 }
793 }
794
vp9_encode_block_intra(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)795 void vp9_encode_block_intra(int plane, int block, int row, int col,
796 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
797 void *arg) {
798 struct encode_b_args *const args = arg;
799 MACROBLOCK *const x = args->x;
800 MACROBLOCKD *const xd = &x->e_mbd;
801 MODE_INFO *mi = xd->mi[0];
802 struct macroblock_plane *const p = &x->plane[plane];
803 struct macroblockd_plane *const pd = &xd->plane[plane];
804 tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
805 tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
806 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
807 const scan_order *scan_order;
808 TX_TYPE tx_type = DCT_DCT;
809 PREDICTION_MODE mode;
810 const int bwl = b_width_log2_lookup[plane_bsize];
811 const int diff_stride = 4 * (1 << bwl);
812 uint8_t *src, *dst;
813 int16_t *src_diff;
814 uint16_t *eob = &p->eobs[block];
815 const int src_stride = p->src.stride;
816 const int dst_stride = pd->dst.stride;
817 ENTROPY_CONTEXT *a = NULL;
818 ENTROPY_CONTEXT *l = NULL;
819 int entropy_ctx = 0;
820 dst = &pd->dst.buf[4 * (row * dst_stride + col)];
821 src = &p->src.buf[4 * (row * src_stride + col)];
822 src_diff = &p->src_diff[4 * (row * diff_stride + col)];
823 if (args->enable_coeff_opt) {
824 a = &args->ta[col];
825 l = &args->tl[row];
826 entropy_ctx = combine_entropy_contexts(*a, *l);
827 }
828
829 if (tx_size == TX_4X4) {
830 tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
831 scan_order = &vp9_scan_orders[TX_4X4][tx_type];
832 mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
833 } else {
834 mode = plane == 0 ? mi->mode : mi->uv_mode;
835 if (tx_size == TX_32X32) {
836 scan_order = &vp9_default_scan_orders[TX_32X32];
837 } else {
838 tx_type = get_tx_type(get_plane_type(plane), xd);
839 scan_order = &vp9_scan_orders[tx_size][tx_type];
840 }
841 }
842
843 vp9_predict_intra_block(
844 xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
845 (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
846 dst_stride, col, row, plane);
847
848 // skip block condition should be handled before this is called.
849 assert(!x->skip_block);
850
851 #if CONFIG_VP9_HIGHBITDEPTH
852 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
853 uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
854 switch (tx_size) {
855 case TX_32X32:
856 if (!x->skip_recode) {
857 vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
858 src_stride, dst, dst_stride, xd->bd);
859 highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
860 vpx_highbd_quantize_b_32x32(
861 coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
862 dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
863 }
864 if (args->enable_coeff_opt && !x->skip_recode) {
865 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
866 }
867 if (!x->skip_encode && *eob) {
868 vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
869 }
870 break;
871 case TX_16X16:
872 if (!x->skip_recode) {
873 vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src,
874 src_stride, dst, dst_stride, xd->bd);
875 if (tx_type == DCT_DCT)
876 vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
877 else
878 vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
879 vpx_highbd_quantize_b(coeff, 256, p->zbin, p->round, p->quant,
880 p->quant_shift, qcoeff, dqcoeff, pd->dequant,
881 eob, scan_order->scan, scan_order->iscan);
882 }
883 if (args->enable_coeff_opt && !x->skip_recode) {
884 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
885 }
886 if (!x->skip_encode && *eob) {
887 vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
888 xd->bd);
889 }
890 break;
891 case TX_8X8:
892 if (!x->skip_recode) {
893 vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src,
894 src_stride, dst, dst_stride, xd->bd);
895 if (tx_type == DCT_DCT)
896 vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
897 else
898 vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
899 vpx_highbd_quantize_b(coeff, 64, p->zbin, p->round, p->quant,
900 p->quant_shift, qcoeff, dqcoeff, pd->dequant,
901 eob, scan_order->scan, scan_order->iscan);
902 }
903 if (args->enable_coeff_opt && !x->skip_recode) {
904 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
905 }
906 if (!x->skip_encode && *eob) {
907 vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
908 xd->bd);
909 }
910 break;
911 default:
912 assert(tx_size == TX_4X4);
913 if (!x->skip_recode) {
914 vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
915 src_stride, dst, dst_stride, xd->bd);
916 if (tx_type != DCT_DCT)
917 vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
918 else
919 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
920 vpx_highbd_quantize_b(coeff, 16, p->zbin, p->round, p->quant,
921 p->quant_shift, qcoeff, dqcoeff, pd->dequant,
922 eob, scan_order->scan, scan_order->iscan);
923 }
924 if (args->enable_coeff_opt && !x->skip_recode) {
925 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
926 }
927 if (!x->skip_encode && *eob) {
928 if (tx_type == DCT_DCT) {
929 // this is like vp9_short_idct4x4 but has a special case around
930 // eob<=1 which is significant (not just an optimization) for the
931 // lossless case.
932 x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
933 } else {
934 vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
935 xd->bd);
936 }
937 }
938 break;
939 }
940 if (*eob) *(args->skip) = 0;
941 return;
942 }
943 #endif // CONFIG_VP9_HIGHBITDEPTH
944
945 switch (tx_size) {
946 case TX_32X32:
947 if (!x->skip_recode) {
948 vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst,
949 dst_stride);
950 fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
951 vpx_quantize_b_32x32(coeff, 1024, p->zbin, p->round, p->quant,
952 p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
953 scan_order->scan, scan_order->iscan);
954 }
955 if (args->enable_coeff_opt && !x->skip_recode) {
956 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
957 }
958 if (!x->skip_encode && *eob)
959 vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
960 break;
961 case TX_16X16:
962 if (!x->skip_recode) {
963 vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst,
964 dst_stride);
965 vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
966 vpx_quantize_b(coeff, 256, p->zbin, p->round, p->quant, p->quant_shift,
967 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
968 scan_order->iscan);
969 }
970 if (args->enable_coeff_opt && !x->skip_recode) {
971 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
972 }
973 if (!x->skip_encode && *eob)
974 vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
975 break;
976 case TX_8X8:
977 if (!x->skip_recode) {
978 vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst,
979 dst_stride);
980 vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
981 vpx_quantize_b(coeff, 64, p->zbin, p->round, p->quant, p->quant_shift,
982 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
983 scan_order->iscan);
984 }
985 if (args->enable_coeff_opt && !x->skip_recode) {
986 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
987 }
988 if (!x->skip_encode && *eob)
989 vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
990 break;
991 default:
992 assert(tx_size == TX_4X4);
993 if (!x->skip_recode) {
994 vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
995 dst_stride);
996 if (tx_type != DCT_DCT)
997 vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
998 else
999 x->fwd_txfm4x4(src_diff, coeff, diff_stride);
1000 vpx_quantize_b(coeff, 16, p->zbin, p->round, p->quant, p->quant_shift,
1001 qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
1002 scan_order->iscan);
1003 }
1004 if (args->enable_coeff_opt && !x->skip_recode) {
1005 *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1006 }
1007 if (!x->skip_encode && *eob) {
1008 if (tx_type == DCT_DCT)
1009 // this is like vp9_short_idct4x4 but has a special case around eob<=1
1010 // which is significant (not just an optimization) for the lossless
1011 // case.
1012 x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
1013 else
1014 vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1015 }
1016 break;
1017 }
1018 if (*eob) *(args->skip) = 0;
1019 }
1020
vp9_encode_intra_block_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane,int enable_optimize_b)1021 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
1022 int enable_optimize_b) {
1023 const MACROBLOCKD *const xd = &x->e_mbd;
1024 struct optimize_ctx ctx;
1025 #if CONFIG_MISMATCH_DEBUG
1026 // TODO(angiebird): make mismatch_debug support intra mode
1027 struct encode_b_args arg = {
1028 x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip, 0, 0,
1029 0
1030 };
1031 #else
1032 struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane],
1033 ctx.tl[plane], &xd->mi[0]->skip };
1034 #endif
1035
1036 if (enable_optimize_b && x->optimize &&
1037 (!x->skip_recode || !x->skip_optimize)) {
1038 const struct macroblockd_plane *const pd = &xd->plane[plane];
1039 const TX_SIZE tx_size =
1040 plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
1041 vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
1042 } else {
1043 arg.enable_coeff_opt = 0;
1044 }
1045
1046 vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1047 vp9_encode_block_intra, &arg);
1048 }
1049