• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp9_rtcd.h"
12 #include "./vpx_config.h"
13 #include "./vpx_dsp_rtcd.h"
14 
15 #include "vpx_dsp/quantize.h"
16 #include "vpx_mem/vpx_mem.h"
17 #include "vpx_ports/mem.h"
18 
19 #if CONFIG_MISMATCH_DEBUG
20 #include "vpx_util/vpx_debug_util.h"
21 #endif
22 
23 #include "vp9/common/vp9_idct.h"
24 #include "vp9/common/vp9_reconinter.h"
25 #include "vp9/common/vp9_reconintra.h"
26 #include "vp9/common/vp9_scan.h"
27 
28 #include "vp9/encoder/vp9_encodemb.h"
29 #include "vp9/encoder/vp9_rd.h"
30 #include "vp9/encoder/vp9_tokenize.h"
31 
32 struct optimize_ctx {
33   ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
34   ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
35 };
36 
vp9_subtract_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane)37 void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
38   struct macroblock_plane *const p = &x->plane[plane];
39   const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
40   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
41   const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
42   const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
43 
44 #if CONFIG_VP9_HIGHBITDEPTH
45   if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
46     vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
47                               p->src.stride, pd->dst.buf, pd->dst.stride,
48                               x->e_mbd.bd);
49     return;
50   }
51 #endif  // CONFIG_VP9_HIGHBITDEPTH
52   vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
53                      pd->dst.buf, pd->dst.stride);
54 }
55 
56 static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
57   { 10, 6 },
58   { 8, 5 },
59 };
60 
61 // 'num' can be negative, but 'shift' must be non-negative.
62 #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \
63   (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift)))
64 
vp9_optimize_b(MACROBLOCK * mb,int plane,int block,TX_SIZE tx_size,int ctx)65 int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
66                    int ctx) {
67   MACROBLOCKD *const xd = &mb->e_mbd;
68   struct macroblock_plane *const p = &mb->plane[plane];
69   struct macroblockd_plane *const pd = &xd->plane[plane];
70   const int ref = is_inter_block(xd->mi[0]);
71   uint8_t token_cache[1024];
72   const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
73   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
74   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
75   const int eob = p->eobs[block];
76   const PLANE_TYPE plane_type = get_plane_type(plane);
77   const int default_eob = 16 << (tx_size << 1);
78   const int shift = (tx_size == TX_32X32);
79   const int16_t *const dequant_ptr = pd->dequant;
80   const uint8_t *const band_translate = get_band_translate(tx_size);
81   const scan_order *const so = get_scan(xd, tx_size, plane_type, block);
82   const int16_t *const scan = so->scan;
83   const int16_t *const nb = so->neighbors;
84   const MODE_INFO *mbmi = xd->mi[0];
85   const int sharpness = mb->sharpness;
86   const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
87   const int64_t rdmult =
88       (sharpness == 0 ? rdadj >> 1
89                       : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
90 
91   const int64_t rddiv = mb->rddiv;
92   int64_t rd_cost0, rd_cost1;
93   int64_t rate0, rate1;
94   int16_t t0, t1;
95   int i, final_eob;
96   int count_high_values_after_eob = 0;
97 #if CONFIG_VP9_HIGHBITDEPTH
98   const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
99 #else
100   const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8);
101 #endif
102   unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
103       mb->token_costs[tx_size][plane_type][ref];
104   unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS];
105   int64_t eob_cost0, eob_cost1;
106   const int ctx0 = ctx;
107   int64_t accu_rate = 0;
108   // Initialized to the worst possible error for the largest transform size.
109   // This ensures that it never goes negative.
110   int64_t accu_error = ((int64_t)1) << 50;
111   int64_t best_block_rd_cost = INT64_MAX;
112   int x_prev = 1;
113   tran_low_t before_best_eob_qc = 0;
114   tran_low_t before_best_eob_dqc = 0;
115 
116   assert((!plane_type && !plane) || (plane_type && plane));
117   assert(eob <= default_eob);
118 
119   for (i = 0; i < eob; i++) {
120     const int rc = scan[i];
121     token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])];
122   }
123   final_eob = 0;
124 
125   // Initial RD cost.
126   token_costs_cur = token_costs + band_translate[0];
127   rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN];
128   best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);
129 
130   // For each token, pick one of two choices greedily:
131   // (i) First candidate: Keep current quantized value, OR
132   // (ii) Second candidate: Reduce quantized value by 1.
133   for (i = 0; i < eob; i++) {
134     const int rc = scan[i];
135     const int x = qcoeff[rc];
136     const int band_cur = band_translate[i];
137     const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
138     const int token_tree_sel_cur = (x_prev == 0);
139     token_costs_cur = token_costs + band_cur;
140     if (x == 0) {  // No need to search
141       const int token = vp9_get_token(x);
142       rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token];
143       accu_rate += rate0;
144       x_prev = 0;
145       // Note: accu_error does not change.
146     } else {
147       const int dqv = dequant_ptr[rc != 0];
148       // Compute the distortion for quantizing to 0.
149       const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift);
150       const int diff_for_zero =
151 #if CONFIG_VP9_HIGHBITDEPTH
152           (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
153               ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8)
154               :
155 #endif
156               diff_for_zero_raw;
157       const int64_t distortion_for_zero =
158           (int64_t)diff_for_zero * diff_for_zero;
159 
160       // Compute the distortion for the first candidate
161       const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
162       const int diff0 =
163 #if CONFIG_VP9_HIGHBITDEPTH
164           (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
165               ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8)
166               :
167 #endif  // CONFIG_VP9_HIGHBITDEPTH
168               diff0_raw;
169       const int64_t distortion0 = (int64_t)diff0 * diff0;
170 
171       // Compute the distortion for the second candidate
172       const int sign = -(x < 0);        // -1 if x is negative and 0 otherwise.
173       const int x1 = x - 2 * sign - 1;  // abs(x1) = abs(x) - 1.
174       int64_t distortion1;
175       if (x1 != 0) {
176         const int dqv_step =
177 #if CONFIG_VP9_HIGHBITDEPTH
178             (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8)
179                                                           :
180 #endif  // CONFIG_VP9_HIGHBITDEPTH
181                                                           dqv;
182         const int diff_step = (dqv_step + sign) ^ sign;
183         const int diff1 = diff0 - diff_step;
184         assert(dqv > 0);  // We aren't right shifting a negative number above.
185         distortion1 = (int64_t)diff1 * diff1;
186       } else {
187         distortion1 = distortion_for_zero;
188       }
189       {
190         // Calculate RDCost for current coeff for the two candidates.
191         const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost);
192         const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost);
193         rate0 =
194             base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0];
195         rate1 =
196             base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1];
197       }
198       {
199         int rdcost_better_for_x1, eob_rdcost_better_for_x1;
200         int dqc0, dqc1;
201         int64_t best_eob_cost_cur;
202         int use_x1;
203 
204         // Calculate RD Cost effect on the next coeff for the two candidates.
205         int64_t next_bits0 = 0;
206         int64_t next_bits1 = 0;
207         int64_t next_eob_bits0 = 0;
208         int64_t next_eob_bits1 = 0;
209         if (i < default_eob - 1) {
210           int ctx_next, token_tree_sel_next;
211           const int band_next = band_translate[i + 1];
212           const int token_next =
213               (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
214           unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
215                                                [ENTROPY_TOKENS] =
216                                                    token_costs + band_next;
217           token_cache[rc] = vp9_pt_energy_class[t0];
218           ctx_next = get_coef_context(nb, token_cache, i + 1);
219           token_tree_sel_next = (x == 0);
220           next_bits0 =
221               (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
222           next_eob_bits0 =
223               (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
224           token_cache[rc] = vp9_pt_energy_class[t1];
225           ctx_next = get_coef_context(nb, token_cache, i + 1);
226           token_tree_sel_next = (x1 == 0);
227           next_bits1 =
228               (*token_costs_next)[token_tree_sel_next][ctx_next][token_next];
229           if (x1 != 0) {
230             next_eob_bits1 =
231                 (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN];
232           }
233         }
234 
235         // Compare the total RD costs for two candidates.
236         rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0);
237         rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1);
238         rdcost_better_for_x1 = (rd_cost1 < rd_cost0);
239         eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
240                            (accu_error + distortion0 - distortion_for_zero));
241         eob_cost1 = eob_cost0;
242         if (x1 != 0) {
243           eob_cost1 =
244               RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
245                      (accu_error + distortion1 - distortion_for_zero));
246           eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0);
247         } else {
248           eob_rdcost_better_for_x1 = 0;
249         }
250 
251         // Calculate the two candidate de-quantized values.
252         dqc0 = dqcoeff[rc];
253         dqc1 = 0;
254         if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) {
255           if (x1 != 0) {
256             dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift);
257           } else {
258             dqc1 = 0;
259           }
260         }
261 
262         // Pick and record the better quantized and de-quantized values.
263         if (rdcost_better_for_x1) {
264           qcoeff[rc] = x1;
265           dqcoeff[rc] = dqc1;
266           accu_rate += rate1;
267           accu_error += distortion1 - distortion_for_zero;
268           assert(distortion1 <= distortion_for_zero);
269           token_cache[rc] = vp9_pt_energy_class[t1];
270         } else {
271           accu_rate += rate0;
272           accu_error += distortion0 - distortion_for_zero;
273           assert(distortion0 <= distortion_for_zero);
274           token_cache[rc] = vp9_pt_energy_class[t0];
275         }
276         if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
277         assert(accu_error >= 0);
278         x_prev = qcoeff[rc];  // Update based on selected quantized value.
279 
280         use_x1 = (x1 != 0) && eob_rdcost_better_for_x1;
281         best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0;
282 
283         // Determine whether to move the eob position to i+1
284         if (best_eob_cost_cur < best_block_rd_cost) {
285           best_block_rd_cost = best_eob_cost_cur;
286           final_eob = i + 1;
287           count_high_values_after_eob = 0;
288           if (use_x1) {
289             before_best_eob_qc = x1;
290             before_best_eob_dqc = dqc1;
291           } else {
292             before_best_eob_qc = x;
293             before_best_eob_dqc = dqc0;
294           }
295         }
296       }
297     }
298   }
299   if (count_high_values_after_eob > 0) {
300     final_eob = eob - 1;
301     for (; final_eob >= 0; final_eob--) {
302       const int rc = scan[final_eob];
303       const int x = qcoeff[rc];
304       if (x) {
305         break;
306       }
307     }
308     final_eob++;
309   } else {
310     assert(final_eob <= eob);
311     if (final_eob > 0) {
312       int rc;
313       assert(before_best_eob_qc != 0);
314       i = final_eob - 1;
315       rc = scan[i];
316       qcoeff[rc] = before_best_eob_qc;
317       dqcoeff[rc] = before_best_eob_dqc;
318     }
319     for (i = final_eob; i < eob; i++) {
320       int rc = scan[i];
321       qcoeff[rc] = 0;
322       dqcoeff[rc] = 0;
323     }
324   }
325   mb->plane[plane].eobs[block] = final_eob;
326   return final_eob;
327 }
328 #undef RIGHT_SHIFT_POSSIBLY_NEGATIVE
329 
fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)330 static INLINE void fdct32x32(int rd_transform, const int16_t *src,
331                              tran_low_t *dst, int src_stride) {
332   if (rd_transform)
333     vpx_fdct32x32_rd(src, dst, src_stride);
334   else
335     vpx_fdct32x32(src, dst, src_stride);
336 }
337 
338 #if CONFIG_VP9_HIGHBITDEPTH
highbd_fdct32x32(int rd_transform,const int16_t * src,tran_low_t * dst,int src_stride)339 static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
340                                     tran_low_t *dst, int src_stride) {
341   if (rd_transform)
342     vpx_highbd_fdct32x32_rd(src, dst, src_stride);
343   else
344     vpx_highbd_fdct32x32(src, dst, src_stride);
345 }
346 #endif  // CONFIG_VP9_HIGHBITDEPTH
347 
vp9_xform_quant_fp(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)348 void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
349                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
350   MACROBLOCKD *const xd = &x->e_mbd;
351   const struct macroblock_plane *const p = &x->plane[plane];
352   const struct macroblockd_plane *const pd = &xd->plane[plane];
353   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
354   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
355   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
356   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
357   uint16_t *const eob = &p->eobs[block];
358   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
359   const int16_t *src_diff;
360   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
361   // skip block condition should be handled before this is called.
362   assert(!x->skip_block);
363 
364 #if CONFIG_VP9_HIGHBITDEPTH
365   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
366     switch (tx_size) {
367       case TX_32X32:
368         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
369         vp9_highbd_quantize_fp_32x32(coeff, 1024, p->round_fp, p->quant_fp,
370                                      qcoeff, dqcoeff, pd->dequant, eob,
371                                      scan_order->scan, scan_order->iscan);
372         break;
373       case TX_16X16:
374         vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
375         vp9_highbd_quantize_fp(coeff, 256, p->round_fp, p->quant_fp, qcoeff,
376                                dqcoeff, pd->dequant, eob, scan_order->scan,
377                                scan_order->iscan);
378         break;
379       case TX_8X8:
380         vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
381         vp9_highbd_quantize_fp(coeff, 64, p->round_fp, p->quant_fp, qcoeff,
382                                dqcoeff, pd->dequant, eob, scan_order->scan,
383                                scan_order->iscan);
384         break;
385       default:
386         assert(tx_size == TX_4X4);
387         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
388         vp9_highbd_quantize_fp(coeff, 16, p->round_fp, p->quant_fp, qcoeff,
389                                dqcoeff, pd->dequant, eob, scan_order->scan,
390                                scan_order->iscan);
391         break;
392     }
393     return;
394   }
395 #endif  // CONFIG_VP9_HIGHBITDEPTH
396 
397   switch (tx_size) {
398     case TX_32X32:
399       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
400       vp9_quantize_fp_32x32(coeff, 1024, p->round_fp, p->quant_fp, qcoeff,
401                             dqcoeff, pd->dequant, eob, scan_order->scan,
402                             scan_order->iscan);
403       break;
404     case TX_16X16:
405       vpx_fdct16x16(src_diff, coeff, diff_stride);
406       vp9_quantize_fp(coeff, 256, p->round_fp, p->quant_fp, qcoeff, dqcoeff,
407                       pd->dequant, eob, scan_order->scan, scan_order->iscan);
408       break;
409     case TX_8X8:
410       vpx_fdct8x8(src_diff, coeff, diff_stride);
411       vp9_quantize_fp(coeff, 64, p->round_fp, p->quant_fp, qcoeff, dqcoeff,
412                       pd->dequant, eob, scan_order->scan, scan_order->iscan);
413 
414       break;
415     default:
416       assert(tx_size == TX_4X4);
417       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
418       vp9_quantize_fp(coeff, 16, p->round_fp, p->quant_fp, qcoeff, dqcoeff,
419                       pd->dequant, eob, scan_order->scan, scan_order->iscan);
420       break;
421   }
422 }
423 
vp9_xform_quant_dc(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)424 void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
425                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
426   MACROBLOCKD *const xd = &x->e_mbd;
427   const struct macroblock_plane *const p = &x->plane[plane];
428   const struct macroblockd_plane *const pd = &xd->plane[plane];
429   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
430   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
431   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
432   uint16_t *const eob = &p->eobs[block];
433   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
434   const int16_t *src_diff;
435   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
436   // skip block condition should be handled before this is called.
437   assert(!x->skip_block);
438 
439 #if CONFIG_VP9_HIGHBITDEPTH
440   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
441     switch (tx_size) {
442       case TX_32X32:
443         vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
444         vpx_highbd_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff,
445                                      dqcoeff, pd->dequant[0], eob);
446         break;
447       case TX_16X16:
448         vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
449         vpx_highbd_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff,
450                                dqcoeff, pd->dequant[0], eob);
451         break;
452       case TX_8X8:
453         vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
454         vpx_highbd_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff,
455                                dqcoeff, pd->dequant[0], eob);
456         break;
457       default:
458         assert(tx_size == TX_4X4);
459         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
460         vpx_highbd_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff,
461                                dqcoeff, pd->dequant[0], eob);
462         break;
463     }
464     return;
465   }
466 #endif  // CONFIG_VP9_HIGHBITDEPTH
467 
468   switch (tx_size) {
469     case TX_32X32:
470       vpx_fdct32x32_1(src_diff, coeff, diff_stride);
471       vpx_quantize_dc_32x32(coeff, p->round, p->quant_fp[0], qcoeff, dqcoeff,
472                             pd->dequant[0], eob);
473       break;
474     case TX_16X16:
475       vpx_fdct16x16_1(src_diff, coeff, diff_stride);
476       vpx_quantize_dc(coeff, 256, p->round, p->quant_fp[0], qcoeff, dqcoeff,
477                       pd->dequant[0], eob);
478       break;
479     case TX_8X8:
480       vpx_fdct8x8_1(src_diff, coeff, diff_stride);
481       vpx_quantize_dc(coeff, 64, p->round, p->quant_fp[0], qcoeff, dqcoeff,
482                       pd->dequant[0], eob);
483       break;
484     default:
485       assert(tx_size == TX_4X4);
486       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
487       vpx_quantize_dc(coeff, 16, p->round, p->quant_fp[0], qcoeff, dqcoeff,
488                       pd->dequant[0], eob);
489       break;
490   }
491 }
492 
vp9_xform_quant(MACROBLOCK * x,int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size)493 void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
494                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
495   MACROBLOCKD *const xd = &x->e_mbd;
496   const struct macroblock_plane *const p = &x->plane[plane];
497   const struct macroblockd_plane *const pd = &xd->plane[plane];
498   const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
499   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
500   tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
501   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
502   uint16_t *const eob = &p->eobs[block];
503   const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
504   const int16_t *src_diff;
505   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
506   // skip block condition should be handled before this is called.
507   assert(!x->skip_block);
508 
509 #if CONFIG_VP9_HIGHBITDEPTH
510   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
511     switch (tx_size) {
512       case TX_32X32:
513         highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
514         vpx_highbd_quantize_b_32x32(
515             coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
516             dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
517         break;
518       case TX_16X16:
519         vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
520         vpx_highbd_quantize_b(coeff, 256, p->zbin, p->round, p->quant,
521                               p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
522                               scan_order->scan, scan_order->iscan);
523         break;
524       case TX_8X8:
525         vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
526         vpx_highbd_quantize_b(coeff, 64, p->zbin, p->round, p->quant,
527                               p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
528                               scan_order->scan, scan_order->iscan);
529         break;
530       default:
531         assert(tx_size == TX_4X4);
532         x->fwd_txfm4x4(src_diff, coeff, diff_stride);
533         vpx_highbd_quantize_b(coeff, 16, p->zbin, p->round, p->quant,
534                               p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
535                               scan_order->scan, scan_order->iscan);
536         break;
537     }
538     return;
539   }
540 #endif  // CONFIG_VP9_HIGHBITDEPTH
541 
542   switch (tx_size) {
543     case TX_32X32:
544       fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
545       vpx_quantize_b_32x32(coeff, 1024, p->zbin, p->round, p->quant,
546                            p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
547                            scan_order->scan, scan_order->iscan);
548       break;
549     case TX_16X16:
550       vpx_fdct16x16(src_diff, coeff, diff_stride);
551       vpx_quantize_b(coeff, 256, p->zbin, p->round, p->quant, p->quant_shift,
552                      qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
553                      scan_order->iscan);
554       break;
555     case TX_8X8:
556       vpx_fdct8x8(src_diff, coeff, diff_stride);
557       vpx_quantize_b(coeff, 64, p->zbin, p->round, p->quant, p->quant_shift,
558                      qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
559                      scan_order->iscan);
560       break;
561     default:
562       assert(tx_size == TX_4X4);
563       x->fwd_txfm4x4(src_diff, coeff, diff_stride);
564       vpx_quantize_b(coeff, 16, p->zbin, p->round, p->quant, p->quant_shift,
565                      qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
566                      scan_order->iscan);
567       break;
568   }
569 }
570 
encode_block(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)571 static void encode_block(int plane, int block, int row, int col,
572                          BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
573   struct encode_b_args *const args = arg;
574 #if CONFIG_MISMATCH_DEBUG
575   int mi_row = args->mi_row;
576   int mi_col = args->mi_col;
577   int output_enabled = args->output_enabled;
578 #endif
579   MACROBLOCK *const x = args->x;
580   MACROBLOCKD *const xd = &x->e_mbd;
581   struct macroblock_plane *const p = &x->plane[plane];
582   struct macroblockd_plane *const pd = &xd->plane[plane];
583   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
584   uint8_t *dst;
585   ENTROPY_CONTEXT *a, *l;
586   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
587   a = &args->ta[col];
588   l = &args->tl[row];
589 
590   // TODO(jingning): per transformed block zero forcing only enabled for
591   // luma component. will integrate chroma components as well.
592   if (x->zcoeff_blk[tx_size][block] && plane == 0) {
593     p->eobs[block] = 0;
594     *a = *l = 0;
595 #if CONFIG_MISMATCH_DEBUG
596     goto encode_block_end;
597 #else
598     return;
599 #endif
600   }
601 
602   if (!x->skip_recode) {
603     if (x->quant_fp) {
604       // Encoding process for rtc mode
605       if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
606         // skip forward transform
607         p->eobs[block] = 0;
608         *a = *l = 0;
609 #if CONFIG_MISMATCH_DEBUG
610         goto encode_block_end;
611 #else
612         return;
613 #endif
614       } else {
615         vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size);
616       }
617     } else {
618       if (max_txsize_lookup[plane_bsize] == tx_size) {
619         int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
620         if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
621           // full forward transform and quantization
622           vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
623         } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
624           // fast path forward transform and quantization
625           vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size);
626         } else {
627           // skip forward transform
628           p->eobs[block] = 0;
629           *a = *l = 0;
630 #if CONFIG_MISMATCH_DEBUG
631           goto encode_block_end;
632 #else
633           return;
634 #endif
635         }
636       } else {
637         vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
638       }
639     }
640   }
641 
642   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
643     const int ctx = combine_entropy_contexts(*a, *l);
644     *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0;
645   } else {
646     *a = *l = p->eobs[block] > 0;
647   }
648 
649   if (p->eobs[block]) *(args->skip) = 0;
650 
651   if (x->skip_encode || p->eobs[block] == 0) {
652 #if CONFIG_MISMATCH_DEBUG
653     goto encode_block_end;
654 #else
655     return;
656 #endif
657   }
658 #if CONFIG_VP9_HIGHBITDEPTH
659   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
660     uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
661     switch (tx_size) {
662       case TX_32X32:
663         vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
664                                  xd->bd);
665         break;
666       case TX_16X16:
667         vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
668                                  xd->bd);
669         break;
670       case TX_8X8:
671         vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
672                                xd->bd);
673         break;
674       default:
675         assert(tx_size == TX_4X4);
676         // this is like vp9_short_idct4x4 but has a special case around eob<=1
677         // which is significant (not just an optimization) for the lossless
678         // case.
679         x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
680                                xd->bd);
681         break;
682     }
683 #if CONFIG_MISMATCH_DEBUG
684     goto encode_block_end;
685 #else
686     return;
687 #endif
688   }
689 #endif  // CONFIG_VP9_HIGHBITDEPTH
690 
691   switch (tx_size) {
692     case TX_32X32:
693       vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
694       break;
695     case TX_16X16:
696       vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
697       break;
698     case TX_8X8:
699       vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
700       break;
701     default:
702       assert(tx_size == TX_4X4);
703       // this is like vp9_short_idct4x4 but has a special case around eob<=1
704       // which is significant (not just an optimization) for the lossless
705       // case.
706       x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
707       break;
708   }
709 #if CONFIG_MISMATCH_DEBUG
710 encode_block_end:
711   if (output_enabled) {
712     int pixel_c, pixel_r;
713     int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
714     int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2);
715     mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
716                     pd->subsampling_x, pd->subsampling_y);
717     mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r,
718                              blk_w, blk_h,
719                              xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
720   }
721 #endif
722 }
723 
encode_block_pass1(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)724 static void encode_block_pass1(int plane, int block, int row, int col,
725                                BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
726                                void *arg) {
727   MACROBLOCK *const x = (MACROBLOCK *)arg;
728   MACROBLOCKD *const xd = &x->e_mbd;
729   struct macroblock_plane *const p = &x->plane[plane];
730   struct macroblockd_plane *const pd = &xd->plane[plane];
731   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
732   uint8_t *dst;
733   dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
734 
735   vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size);
736 
737   if (p->eobs[block] > 0) {
738 #if CONFIG_VP9_HIGHBITDEPTH
739     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
740       x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride,
741                              p->eobs[block], xd->bd);
742       return;
743     }
744 #endif  // CONFIG_VP9_HIGHBITDEPTH
745     x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
746   }
747 }
748 
vp9_encode_sby_pass1(MACROBLOCK * x,BLOCK_SIZE bsize)749 void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
750   vp9_subtract_plane(x, bsize, 0);
751   vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
752                                          encode_block_pass1, x);
753 }
754 
vp9_encode_sb(MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int output_enabled)755 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col,
756                    int output_enabled) {
757   MACROBLOCKD *const xd = &x->e_mbd;
758   struct optimize_ctx ctx;
759   MODE_INFO *mi = xd->mi[0];
760   int plane;
761 #if CONFIG_MISMATCH_DEBUG
762   struct encode_b_args arg = { x,         1,      NULL,   NULL,
763                                &mi->skip, mi_row, mi_col, output_enabled };
764 #else
765   struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip };
766   (void)mi_row;
767   (void)mi_col;
768   (void)output_enabled;
769 #endif
770 
771   mi->skip = 1;
772 
773   if (x->skip) return;
774 
775   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
776     if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane);
777 
778     if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
779       const struct macroblockd_plane *const pd = &xd->plane[plane];
780       const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
781       vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane],
782                                ctx.tl[plane]);
783       arg.enable_coeff_opt = 1;
784     } else {
785       arg.enable_coeff_opt = 0;
786     }
787     arg.ta = ctx.ta[plane];
788     arg.tl = ctx.tl[plane];
789 
790     vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
791                                            &arg);
792   }
793 }
794 
vp9_encode_block_intra(int plane,int block,int row,int col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)795 void vp9_encode_block_intra(int plane, int block, int row, int col,
796                             BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
797                             void *arg) {
798   struct encode_b_args *const args = arg;
799   MACROBLOCK *const x = args->x;
800   MACROBLOCKD *const xd = &x->e_mbd;
801   MODE_INFO *mi = xd->mi[0];
802   struct macroblock_plane *const p = &x->plane[plane];
803   struct macroblockd_plane *const pd = &xd->plane[plane];
804   tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
805   tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
806   tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
807   const scan_order *scan_order;
808   TX_TYPE tx_type = DCT_DCT;
809   PREDICTION_MODE mode;
810   const int bwl = b_width_log2_lookup[plane_bsize];
811   const int diff_stride = 4 * (1 << bwl);
812   uint8_t *src, *dst;
813   int16_t *src_diff;
814   uint16_t *eob = &p->eobs[block];
815   const int src_stride = p->src.stride;
816   const int dst_stride = pd->dst.stride;
817   ENTROPY_CONTEXT *a = NULL;
818   ENTROPY_CONTEXT *l = NULL;
819   int entropy_ctx = 0;
820   dst = &pd->dst.buf[4 * (row * dst_stride + col)];
821   src = &p->src.buf[4 * (row * src_stride + col)];
822   src_diff = &p->src_diff[4 * (row * diff_stride + col)];
823   if (args->enable_coeff_opt) {
824     a = &args->ta[col];
825     l = &args->tl[row];
826     entropy_ctx = combine_entropy_contexts(*a, *l);
827   }
828 
829   if (tx_size == TX_4X4) {
830     tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
831     scan_order = &vp9_scan_orders[TX_4X4][tx_type];
832     mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode;
833   } else {
834     mode = plane == 0 ? mi->mode : mi->uv_mode;
835     if (tx_size == TX_32X32) {
836       scan_order = &vp9_default_scan_orders[TX_32X32];
837     } else {
838       tx_type = get_tx_type(get_plane_type(plane), xd);
839       scan_order = &vp9_scan_orders[tx_size][tx_type];
840     }
841   }
842 
843   vp9_predict_intra_block(
844       xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst,
845       (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst,
846       dst_stride, col, row, plane);
847 
848   // skip block condition should be handled before this is called.
849   assert(!x->skip_block);
850 
851 #if CONFIG_VP9_HIGHBITDEPTH
852   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
853     uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst);
854     switch (tx_size) {
855       case TX_32X32:
856         if (!x->skip_recode) {
857           vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src,
858                                     src_stride, dst, dst_stride, xd->bd);
859           highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
860           vpx_highbd_quantize_b_32x32(
861               coeff, 1024, p->zbin, p->round, p->quant, p->quant_shift, qcoeff,
862               dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan);
863         }
864         if (args->enable_coeff_opt && !x->skip_recode) {
865           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
866         }
867         if (!x->skip_encode && *eob) {
868           vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
869         }
870         break;
871       case TX_16X16:
872         if (!x->skip_recode) {
873           vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src,
874                                     src_stride, dst, dst_stride, xd->bd);
875           if (tx_type == DCT_DCT)
876             vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
877           else
878             vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type);
879           vpx_highbd_quantize_b(coeff, 256, p->zbin, p->round, p->quant,
880                                 p->quant_shift, qcoeff, dqcoeff, pd->dequant,
881                                 eob, scan_order->scan, scan_order->iscan);
882         }
883         if (args->enable_coeff_opt && !x->skip_recode) {
884           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
885         }
886         if (!x->skip_encode && *eob) {
887           vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
888                                   xd->bd);
889         }
890         break;
891       case TX_8X8:
892         if (!x->skip_recode) {
893           vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src,
894                                     src_stride, dst, dst_stride, xd->bd);
895           if (tx_type == DCT_DCT)
896             vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
897           else
898             vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type);
899           vpx_highbd_quantize_b(coeff, 64, p->zbin, p->round, p->quant,
900                                 p->quant_shift, qcoeff, dqcoeff, pd->dequant,
901                                 eob, scan_order->scan, scan_order->iscan);
902         }
903         if (args->enable_coeff_opt && !x->skip_recode) {
904           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
905         }
906         if (!x->skip_encode && *eob) {
907           vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob,
908                                 xd->bd);
909         }
910         break;
911       default:
912         assert(tx_size == TX_4X4);
913         if (!x->skip_recode) {
914           vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
915                                     src_stride, dst, dst_stride, xd->bd);
916           if (tx_type != DCT_DCT)
917             vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type);
918           else
919             x->fwd_txfm4x4(src_diff, coeff, diff_stride);
920           vpx_highbd_quantize_b(coeff, 16, p->zbin, p->round, p->quant,
921                                 p->quant_shift, qcoeff, dqcoeff, pd->dequant,
922                                 eob, scan_order->scan, scan_order->iscan);
923         }
924         if (args->enable_coeff_opt && !x->skip_recode) {
925           *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
926         }
927         if (!x->skip_encode && *eob) {
928           if (tx_type == DCT_DCT) {
929             // this is like vp9_short_idct4x4 but has a special case around
930             // eob<=1 which is significant (not just an optimization) for the
931             // lossless case.
932             x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd);
933           } else {
934             vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type,
935                                      xd->bd);
936           }
937         }
938         break;
939     }
940     if (*eob) *(args->skip) = 0;
941     return;
942   }
943 #endif  // CONFIG_VP9_HIGHBITDEPTH
944 
945   switch (tx_size) {
946     case TX_32X32:
947       if (!x->skip_recode) {
948         vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst,
949                            dst_stride);
950         fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
951         vpx_quantize_b_32x32(coeff, 1024, p->zbin, p->round, p->quant,
952                              p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
953                              scan_order->scan, scan_order->iscan);
954       }
955       if (args->enable_coeff_opt && !x->skip_recode) {
956         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
957       }
958       if (!x->skip_encode && *eob)
959         vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
960       break;
961     case TX_16X16:
962       if (!x->skip_recode) {
963         vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst,
964                            dst_stride);
965         vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
966         vpx_quantize_b(coeff, 256, p->zbin, p->round, p->quant, p->quant_shift,
967                        qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
968                        scan_order->iscan);
969       }
970       if (args->enable_coeff_opt && !x->skip_recode) {
971         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
972       }
973       if (!x->skip_encode && *eob)
974         vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
975       break;
976     case TX_8X8:
977       if (!x->skip_recode) {
978         vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst,
979                            dst_stride);
980         vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
981         vpx_quantize_b(coeff, 64, p->zbin, p->round, p->quant, p->quant_shift,
982                        qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
983                        scan_order->iscan);
984       }
985       if (args->enable_coeff_opt && !x->skip_recode) {
986         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
987       }
988       if (!x->skip_encode && *eob)
989         vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
990       break;
991     default:
992       assert(tx_size == TX_4X4);
993       if (!x->skip_recode) {
994         vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
995                            dst_stride);
996         if (tx_type != DCT_DCT)
997           vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
998         else
999           x->fwd_txfm4x4(src_diff, coeff, diff_stride);
1000         vpx_quantize_b(coeff, 16, p->zbin, p->round, p->quant, p->quant_shift,
1001                        qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
1002                        scan_order->iscan);
1003       }
1004       if (args->enable_coeff_opt && !x->skip_recode) {
1005         *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0;
1006       }
1007       if (!x->skip_encode && *eob) {
1008         if (tx_type == DCT_DCT)
1009           // this is like vp9_short_idct4x4 but has a special case around eob<=1
1010           // which is significant (not just an optimization) for the lossless
1011           // case.
1012           x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob);
1013         else
1014           vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
1015       }
1016       break;
1017   }
1018   if (*eob) *(args->skip) = 0;
1019 }
1020 
vp9_encode_intra_block_plane(MACROBLOCK * x,BLOCK_SIZE bsize,int plane,int enable_optimize_b)1021 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
1022                                   int enable_optimize_b) {
1023   const MACROBLOCKD *const xd = &x->e_mbd;
1024   struct optimize_ctx ctx;
1025 #if CONFIG_MISMATCH_DEBUG
1026   // TODO(angiebird): make mismatch_debug support intra mode
1027   struct encode_b_args arg = {
1028     x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip, 0, 0,
1029     0
1030   };
1031 #else
1032   struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane],
1033                                ctx.tl[plane], &xd->mi[0]->skip };
1034 #endif
1035 
1036   if (enable_optimize_b && x->optimize &&
1037       (!x->skip_recode || !x->skip_optimize)) {
1038     const struct macroblockd_plane *const pd = &xd->plane[plane];
1039     const TX_SIZE tx_size =
1040         plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size;
1041     vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]);
1042   } else {
1043     arg.enable_coeff_opt = 0;
1044   }
1045 
1046   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
1047                                          vp9_encode_block_intra, &arg);
1048 }
1049