1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #ifndef AOM_AV1_ENCODER_RD_H_
13 #define AOM_AV1_ENCODER_RD_H_
14
15 #include <limits.h>
16
17 #include "av1/common/blockd.h"
18
19 #include "av1/encoder/block.h"
20 #include "av1/encoder/context_tree.h"
21 #include "av1/encoder/cost.h"
22
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26
27 #define RDDIV_BITS 7
28 #define RD_EPB_SHIFT 6
29
30 #define RDCOST(RM, R, D) \
31 (ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), AV1_PROB_COST_SHIFT) + \
32 ((D) * (1 << RDDIV_BITS)))
33
34 #define RDCOST_DBL(RM, R, D) \
35 (((((double)(R)) * (RM)) / (double)(1 << AV1_PROB_COST_SHIFT)) + \
36 ((double)(D) * (1 << RDDIV_BITS)))
37
38 #define QIDX_SKIP_THRESH 115
39
40 #define MV_COST_WEIGHT 108
41 #define MV_COST_WEIGHT_SUB 120
42
43 #define RD_THRESH_MAX_FACT 64
44 #define RD_THRESH_INC 1
45
46 // Factor to weigh the rate for switchable interp filters.
47 #define SWITCHABLE_INTERP_RATE_FACTOR 1
48
49 // This enumerator type needs to be kept aligned with the mode order in
50 // const MODE_DEFINITION av1_mode_order[MAX_MODES] used in the rd code.
51 enum {
52 THR_NEARESTMV,
53 THR_NEARESTL2,
54 THR_NEARESTL3,
55 THR_NEARESTB,
56 THR_NEARESTA2,
57 THR_NEARESTA,
58 THR_NEARESTG,
59
60 THR_NEWMV,
61 THR_NEWL2,
62 THR_NEWL3,
63 THR_NEWB,
64 THR_NEWA2,
65 THR_NEWA,
66 THR_NEWG,
67
68 THR_NEARMV,
69 THR_NEARL2,
70 THR_NEARL3,
71 THR_NEARB,
72 THR_NEARA2,
73 THR_NEARA,
74 THR_NEARG,
75
76 THR_GLOBALMV,
77 THR_GLOBALL2,
78 THR_GLOBALL3,
79 THR_GLOBALB,
80 THR_GLOBALA2,
81 THR_GLOBALA,
82 THR_GLOBALG,
83
84 THR_COMP_NEAREST_NEARESTLA,
85 THR_COMP_NEAREST_NEARESTL2A,
86 THR_COMP_NEAREST_NEARESTL3A,
87 THR_COMP_NEAREST_NEARESTGA,
88 THR_COMP_NEAREST_NEARESTLB,
89 THR_COMP_NEAREST_NEARESTL2B,
90 THR_COMP_NEAREST_NEARESTL3B,
91 THR_COMP_NEAREST_NEARESTGB,
92 THR_COMP_NEAREST_NEARESTLA2,
93 THR_COMP_NEAREST_NEARESTL2A2,
94 THR_COMP_NEAREST_NEARESTL3A2,
95 THR_COMP_NEAREST_NEARESTGA2,
96 THR_COMP_NEAREST_NEARESTLL2,
97 THR_COMP_NEAREST_NEARESTLL3,
98 THR_COMP_NEAREST_NEARESTLG,
99 THR_COMP_NEAREST_NEARESTBA,
100
101 THR_COMP_NEAR_NEARLA,
102 THR_COMP_NEW_NEARESTLA,
103 THR_COMP_NEAREST_NEWLA,
104 THR_COMP_NEW_NEARLA,
105 THR_COMP_NEAR_NEWLA,
106 THR_COMP_NEW_NEWLA,
107 THR_COMP_GLOBAL_GLOBALLA,
108
109 THR_COMP_NEAR_NEARL2A,
110 THR_COMP_NEW_NEARESTL2A,
111 THR_COMP_NEAREST_NEWL2A,
112 THR_COMP_NEW_NEARL2A,
113 THR_COMP_NEAR_NEWL2A,
114 THR_COMP_NEW_NEWL2A,
115 THR_COMP_GLOBAL_GLOBALL2A,
116
117 THR_COMP_NEAR_NEARL3A,
118 THR_COMP_NEW_NEARESTL3A,
119 THR_COMP_NEAREST_NEWL3A,
120 THR_COMP_NEW_NEARL3A,
121 THR_COMP_NEAR_NEWL3A,
122 THR_COMP_NEW_NEWL3A,
123 THR_COMP_GLOBAL_GLOBALL3A,
124
125 THR_COMP_NEAR_NEARGA,
126 THR_COMP_NEW_NEARESTGA,
127 THR_COMP_NEAREST_NEWGA,
128 THR_COMP_NEW_NEARGA,
129 THR_COMP_NEAR_NEWGA,
130 THR_COMP_NEW_NEWGA,
131 THR_COMP_GLOBAL_GLOBALGA,
132
133 THR_COMP_NEAR_NEARLB,
134 THR_COMP_NEW_NEARESTLB,
135 THR_COMP_NEAREST_NEWLB,
136 THR_COMP_NEW_NEARLB,
137 THR_COMP_NEAR_NEWLB,
138 THR_COMP_NEW_NEWLB,
139 THR_COMP_GLOBAL_GLOBALLB,
140
141 THR_COMP_NEAR_NEARL2B,
142 THR_COMP_NEW_NEARESTL2B,
143 THR_COMP_NEAREST_NEWL2B,
144 THR_COMP_NEW_NEARL2B,
145 THR_COMP_NEAR_NEWL2B,
146 THR_COMP_NEW_NEWL2B,
147 THR_COMP_GLOBAL_GLOBALL2B,
148
149 THR_COMP_NEAR_NEARL3B,
150 THR_COMP_NEW_NEARESTL3B,
151 THR_COMP_NEAREST_NEWL3B,
152 THR_COMP_NEW_NEARL3B,
153 THR_COMP_NEAR_NEWL3B,
154 THR_COMP_NEW_NEWL3B,
155 THR_COMP_GLOBAL_GLOBALL3B,
156
157 THR_COMP_NEAR_NEARGB,
158 THR_COMP_NEW_NEARESTGB,
159 THR_COMP_NEAREST_NEWGB,
160 THR_COMP_NEW_NEARGB,
161 THR_COMP_NEAR_NEWGB,
162 THR_COMP_NEW_NEWGB,
163 THR_COMP_GLOBAL_GLOBALGB,
164
165 THR_COMP_NEAR_NEARLA2,
166 THR_COMP_NEW_NEARESTLA2,
167 THR_COMP_NEAREST_NEWLA2,
168 THR_COMP_NEW_NEARLA2,
169 THR_COMP_NEAR_NEWLA2,
170 THR_COMP_NEW_NEWLA2,
171 THR_COMP_GLOBAL_GLOBALLA2,
172
173 THR_COMP_NEAR_NEARL2A2,
174 THR_COMP_NEW_NEARESTL2A2,
175 THR_COMP_NEAREST_NEWL2A2,
176 THR_COMP_NEW_NEARL2A2,
177 THR_COMP_NEAR_NEWL2A2,
178 THR_COMP_NEW_NEWL2A2,
179 THR_COMP_GLOBAL_GLOBALL2A2,
180
181 THR_COMP_NEAR_NEARL3A2,
182 THR_COMP_NEW_NEARESTL3A2,
183 THR_COMP_NEAREST_NEWL3A2,
184 THR_COMP_NEW_NEARL3A2,
185 THR_COMP_NEAR_NEWL3A2,
186 THR_COMP_NEW_NEWL3A2,
187 THR_COMP_GLOBAL_GLOBALL3A2,
188
189 THR_COMP_NEAR_NEARGA2,
190 THR_COMP_NEW_NEARESTGA2,
191 THR_COMP_NEAREST_NEWGA2,
192 THR_COMP_NEW_NEARGA2,
193 THR_COMP_NEAR_NEWGA2,
194 THR_COMP_NEW_NEWGA2,
195 THR_COMP_GLOBAL_GLOBALGA2,
196
197 THR_COMP_NEAR_NEARLL2,
198 THR_COMP_NEW_NEARESTLL2,
199 THR_COMP_NEAREST_NEWLL2,
200 THR_COMP_NEW_NEARLL2,
201 THR_COMP_NEAR_NEWLL2,
202 THR_COMP_NEW_NEWLL2,
203 THR_COMP_GLOBAL_GLOBALLL2,
204
205 THR_COMP_NEAR_NEARLL3,
206 THR_COMP_NEW_NEARESTLL3,
207 THR_COMP_NEAREST_NEWLL3,
208 THR_COMP_NEW_NEARLL3,
209 THR_COMP_NEAR_NEWLL3,
210 THR_COMP_NEW_NEWLL3,
211 THR_COMP_GLOBAL_GLOBALLL3,
212
213 THR_COMP_NEAR_NEARLG,
214 THR_COMP_NEW_NEARESTLG,
215 THR_COMP_NEAREST_NEWLG,
216 THR_COMP_NEW_NEARLG,
217 THR_COMP_NEAR_NEWLG,
218 THR_COMP_NEW_NEWLG,
219 THR_COMP_GLOBAL_GLOBALLG,
220
221 THR_COMP_NEAR_NEARBA,
222 THR_COMP_NEW_NEARESTBA,
223 THR_COMP_NEAREST_NEWBA,
224 THR_COMP_NEW_NEARBA,
225 THR_COMP_NEAR_NEWBA,
226 THR_COMP_NEW_NEWBA,
227 THR_COMP_GLOBAL_GLOBALBA,
228
229 THR_DC,
230 THR_PAETH,
231 THR_SMOOTH,
232 THR_SMOOTH_V,
233 THR_SMOOTH_H,
234 THR_H_PRED,
235 THR_V_PRED,
236 THR_D135_PRED,
237 THR_D203_PRED,
238 THR_D157_PRED,
239 THR_D67_PRED,
240 THR_D113_PRED,
241 THR_D45_PRED,
242
243 MAX_MODES,
244
245 LAST_SINGLE_REF_MODES = THR_GLOBALG,
246 MAX_SINGLE_REF_MODES = LAST_SINGLE_REF_MODES + 1,
247 LAST_COMP_REF_MODES = THR_COMP_GLOBAL_GLOBALBA,
248 MAX_COMP_REF_MODES = LAST_COMP_REF_MODES + 1
249 } UENUM1BYTE(THR_MODES);
250
251 enum {
252 THR_LAST,
253 THR_LAST2,
254 THR_LAST3,
255 THR_BWDR,
256 THR_ALTR2,
257 THR_GOLD,
258 THR_ALTR,
259
260 THR_COMP_LA,
261 THR_COMP_L2A,
262 THR_COMP_L3A,
263 THR_COMP_GA,
264
265 THR_COMP_LB,
266 THR_COMP_L2B,
267 THR_COMP_L3B,
268 THR_COMP_GB,
269
270 THR_COMP_LA2,
271 THR_COMP_L2A2,
272 THR_COMP_L3A2,
273 THR_COMP_GA2,
274
275 THR_INTRA,
276
277 MAX_REFS
278 } UENUM1BYTE(THR_MODES_SUB8X8);
279
280 typedef struct RD_OPT {
281 // Thresh_mult is used to set a threshold for the rd score. A higher value
282 // means that we will accept the best mode so far more often. This number
283 // is used in combination with the current block size, and thresh_freq_fact
284 // to pick a threshold.
285 int thresh_mult[MAX_MODES];
286
287 int threshes[MAX_SEGMENTS][BLOCK_SIZES_ALL][MAX_MODES];
288
289 int64_t prediction_type_threshes[REF_FRAMES][REFERENCE_MODES];
290
291 int RDMULT;
292
293 double r0;
294 } RD_OPT;
295
av1_init_rd_stats(RD_STATS * rd_stats)296 static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
297 #if CONFIG_RD_DEBUG
298 int plane;
299 #endif
300 rd_stats->rate = 0;
301 rd_stats->dist = 0;
302 rd_stats->rdcost = 0;
303 rd_stats->sse = 0;
304 rd_stats->skip = 1;
305 rd_stats->zero_rate = 0;
306 rd_stats->invalid_rate = 0;
307 rd_stats->ref_rdcost = INT64_MAX;
308 #if CONFIG_RD_DEBUG
309 // This may run into problems when monochrome video is
310 // encoded, as there will only be 1 plane
311 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
312 rd_stats->txb_coeff_cost[plane] = 0;
313 {
314 int r, c;
315 for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
316 for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
317 rd_stats->txb_coeff_cost_map[plane][r][c] = 0;
318 }
319 }
320 #endif
321 }
322
av1_invalid_rd_stats(RD_STATS * rd_stats)323 static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
324 #if CONFIG_RD_DEBUG
325 int plane;
326 #endif
327 rd_stats->rate = INT_MAX;
328 rd_stats->dist = INT64_MAX;
329 rd_stats->rdcost = INT64_MAX;
330 rd_stats->sse = INT64_MAX;
331 rd_stats->skip = 0;
332 rd_stats->zero_rate = 0;
333 rd_stats->invalid_rate = 1;
334 rd_stats->ref_rdcost = INT64_MAX;
335 #if CONFIG_RD_DEBUG
336 // This may run into problems when monochrome video is
337 // encoded, as there will only be 1 plane
338 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
339 rd_stats->txb_coeff_cost[plane] = INT_MAX;
340 {
341 int r, c;
342 for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
343 for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
344 rd_stats->txb_coeff_cost_map[plane][r][c] = INT_MAX;
345 }
346 }
347 #endif
348 }
349
av1_merge_rd_stats(RD_STATS * rd_stats_dst,const RD_STATS * rd_stats_src)350 static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
351 const RD_STATS *rd_stats_src) {
352 #if CONFIG_RD_DEBUG
353 int plane;
354 #endif
355 rd_stats_dst->rate += rd_stats_src->rate;
356 if (!rd_stats_dst->zero_rate)
357 rd_stats_dst->zero_rate = rd_stats_src->zero_rate;
358 rd_stats_dst->dist += rd_stats_src->dist;
359 rd_stats_dst->sse += rd_stats_src->sse;
360 rd_stats_dst->skip &= rd_stats_src->skip;
361 rd_stats_dst->invalid_rate &= rd_stats_src->invalid_rate;
362 #if CONFIG_RD_DEBUG
363 // This may run into problems when monochrome video is
364 // encoded, as there will only be 1 plane
365 for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
366 rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
367 {
368 // TODO(angiebird): optimize this part
369 int r, c;
370 int ref_txb_coeff_cost = 0;
371 for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
372 for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
373 rd_stats_dst->txb_coeff_cost_map[plane][r][c] +=
374 rd_stats_src->txb_coeff_cost_map[plane][r][c];
375 ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c];
376 }
377 assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]);
378 }
379 }
380 #endif
381 }
382
383 struct TileInfo;
384 struct TileDataEnc;
385 struct AV1_COMP;
386 struct macroblock;
387
388 int av1_compute_rd_mult_based_on_qindex(const struct AV1_COMP *cpi, int qindex);
389
390 int av1_compute_rd_mult(const struct AV1_COMP *cpi, int qindex);
391
392 void av1_initialize_rd_consts(struct AV1_COMP *cpi);
393
394 void av1_initialize_cost_tables(const AV1_COMMON *const cm, MACROBLOCK *x);
395
396 void av1_initialize_me_consts(const struct AV1_COMP *cpi, MACROBLOCK *x,
397 int qindex);
398
399 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
400 unsigned int qstep, int *rate, int64_t *dist);
401
402 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
403 double *rate_f, double *distbysse_f);
404 void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
405 double yl, double *rate_f, double *distbysse_f);
406
407 int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
408 const MACROBLOCKD *xd);
409
410 int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
411 int stride);
412
413 int16_t *av1_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
414 int16_t *base);
415
416 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const struct AV1_COMP *cpi,
417 int ref_frame);
418
419 void av1_init_me_luts(void);
420
421 void av1_set_mvcost(MACROBLOCK *x, int ref, int ref_mv_idx);
422
423 void av1_get_entropy_contexts(BLOCK_SIZE bsize,
424 const struct macroblockd_plane *pd,
425 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
426 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]);
427
428 void av1_set_rd_speed_thresholds(struct AV1_COMP *cpi);
429
430 void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,
431 int (*fact)[MAX_MODES], int rd_thresh, int bsize,
432 int best_mode_index);
433
rd_less_than_thresh(int64_t best_rd,int thresh,int thresh_fact)434 static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
435 int thresh_fact) {
436 return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
437 }
438
439 void av1_mv_pred(const struct AV1_COMP *cpi, MACROBLOCK *x,
440 uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame,
441 BLOCK_SIZE block_size);
442
set_error_per_bit(MACROBLOCK * x,int rdmult)443 static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
444 x->errorperbit = rdmult >> RD_EPB_SHIFT;
445 x->errorperbit += (x->errorperbit == 0);
446 }
447
448 void av1_setup_pred_block(const MACROBLOCKD *xd,
449 struct buf_2d dst[MAX_MB_PLANE],
450 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
451 const struct scale_factors *scale,
452 const struct scale_factors *scale_uv,
453 const int num_planes);
454
455 int av1_get_intra_cost_penalty(int qindex, int qdelta,
456 aom_bit_depth_t bit_depth);
457
458 void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
459 FRAME_CONTEXT *fc);
460
461 void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
462 const int num_planes);
463
464 int av1_get_adaptive_rdmult(const struct AV1_COMP *cpi, double beta);
465
466 #ifdef __cplusplus
467 } // extern "C"
468 #endif
469
470 #endif // AOM_AV1_ENCODER_RD_H_
471