1 // Copyright 2011 Google Inc.
2 //
3 // This code is licensed under the same terms as WebM:
4 // Software License Agreement: http://www.webmproject.org/license/software/
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/
6 // -----------------------------------------------------------------------------
7 //
8 // WebP encoder: internal header.
9 //
10 // Author: Skal (pascal.massimino@gmail.com)
11
12 #ifndef WEBP_ENC_VP8ENCI_H_
13 #define WEBP_ENC_VP8ENCI_H_
14
15 #include "string.h" // for memcpy()
16 #include "webp/encode.h"
17 #include "bit_writer.h"
18
19 #if defined(__cplusplus) || defined(c_plusplus)
20 extern "C" {
21 #endif
22
23 //-----------------------------------------------------------------------------
24 // Various defines and enums
25
26 // version numbers
27 #define ENC_MAJ_VERSION 0
28 #define ENC_MIN_VERSION 1
29 #define ENC_REV_VERSION 2
30
31 // size of histogram used by CollectHistogram.
32 #define MAX_COEFF_THRESH 64
33
34 // intra prediction modes
35 enum { B_DC_PRED = 0, // 4x4 modes
36 B_TM_PRED = 1,
37 B_VE_PRED = 2,
38 B_HE_PRED = 3,
39 B_RD_PRED = 4,
40 B_VR_PRED = 5,
41 B_LD_PRED = 6,
42 B_VL_PRED = 7,
43 B_HD_PRED = 8,
44 B_HU_PRED = 9,
45 NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10
46
47 // Luma16 or UV modes
48 DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
49 H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED
50 };
51
52 enum { NUM_MB_SEGMENTS = 4,
53 MAX_NUM_PARTITIONS = 8,
54 NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC
55 NUM_BANDS = 8,
56 NUM_CTX = 3,
57 NUM_PROBAS = 11,
58 MAX_LF_LEVELS = 64, // Maximum loop filter level
59 MAX_VARIABLE_LEVEL = 67 // last (inclusive) level with variable cost
60 };
61
62 // YUV-cache parameters. Cache is 16-pixels wide.
63 // The original or reconstructed samples can be accessed using VP8Scan[]
64 // The predicted blocks can be accessed using offsets to yuv_p_ and
65 // the arrays VP8*ModeOffsets[];
66 // +----+ YUV Samples area. See VP8Scan[] for accessing the blocks.
67 // Y_OFF |YYYY| <- original samples (enc->yuv_in_)
68 // |YYYY|
69 // |YYYY|
70 // |YYYY|
71 // U_OFF |UUVV| V_OFF (=U_OFF + 8)
72 // |UUVV|
73 // +----+
74 // Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_')
75 // |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_')
76 // |YYYY|
77 // |YYYY|
78 // U_OFF |UUVV| V_OFF
79 // |UUVV|
80 // x2 (for yuv_out2_)
81 // +----+ Prediction area ('yuv_p_', size = PRED_SIZE)
82 // I16DC16 |YYYY| Intra16 predictions (16x16 block each)
83 // |YYYY|
84 // |YYYY|
85 // |YYYY|
86 // I16TM16 |YYYY|
87 // |YYYY|
88 // |YYYY|
89 // |YYYY|
90 // I16VE16 |YYYY|
91 // |YYYY|
92 // |YYYY|
93 // |YYYY|
94 // I16HE16 |YYYY|
95 // |YYYY|
96 // |YYYY|
97 // |YYYY|
98 // +----+ Chroma U/V predictions (16x8 block each)
99 // C8DC8 |UUVV|
100 // |UUVV|
101 // C8TM8 |UUVV|
102 // |UUVV|
103 // C8VE8 |UUVV|
104 // |UUVV|
105 // C8HE8 |UUVV|
106 // |UUVV|
107 // +----+ Intra 4x4 predictions (4x4 block each)
108 // |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
109 // |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
110 // |YY..| I4HD4 I4HU4 I4TMP
111 // +----+
112 #define BPS 16 // this is the common stride
113 #define Y_SIZE (BPS * 16)
114 #define UV_SIZE (BPS * 8)
115 #define YUV_SIZE (Y_SIZE + UV_SIZE)
116 #define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
117 #define Y_OFF (0)
118 #define U_OFF (Y_SIZE)
119 #define V_OFF (U_OFF + 8)
120 #define ALIGN_CST 15
121 #define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
122
123 extern const int VP8Scan[16 + 4 + 4]; // in quant.c
124 extern const int VP8UVModeOffsets[4]; // in analyze.c
125 extern const int VP8I16ModeOffsets[4];
126 extern const int VP8I4ModeOffsets[NUM_BMODES];
127
128 // Layout of prediction blocks
129 // intra 16x16
130 #define I16DC16 (0 * 16 * BPS)
131 #define I16TM16 (1 * 16 * BPS)
132 #define I16VE16 (2 * 16 * BPS)
133 #define I16HE16 (3 * 16 * BPS)
134 // chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
135 #define C8DC8 (4 * 16 * BPS)
136 #define C8TM8 (4 * 16 * BPS + 8 * BPS)
137 #define C8VE8 (5 * 16 * BPS)
138 #define C8HE8 (5 * 16 * BPS + 8 * BPS)
139 // intra 4x4
140 #define I4DC4 (6 * 16 * BPS + 0)
141 #define I4TM4 (6 * 16 * BPS + 4)
142 #define I4VE4 (6 * 16 * BPS + 8)
143 #define I4HE4 (6 * 16 * BPS + 12)
144 #define I4RD4 (6 * 16 * BPS + 4 * BPS + 0)
145 #define I4VR4 (6 * 16 * BPS + 4 * BPS + 4)
146 #define I4LD4 (6 * 16 * BPS + 4 * BPS + 8)
147 #define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
148 #define I4HD4 (6 * 16 * BPS + 8 * BPS + 0)
149 #define I4HU4 (6 * 16 * BPS + 8 * BPS + 4)
150 #define I4TMP (6 * 16 * BPS + 8 * BPS + 8)
151
152 typedef int64_t score_t; // type used for scores, rate, distortion
153 #define MAX_COST ((score_t)0x7fffffffffffffLL)
154
155 #define QFIX 17
156 #define BIAS(b) ((b) << (QFIX - 8))
157 // Fun fact: this is the _only_ line where we're actually being lossy and
158 // discarding bits.
QUANTDIV(int n,int iQ,int B)159 static inline int QUANTDIV(int n, int iQ, int B) {
160 return (n * iQ + B) >> QFIX;
161 }
162 extern const uint8_t VP8Zigzag[16];
163
164 //-----------------------------------------------------------------------------
165 // Headers
166
167 typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
168 typedef uint64_t StatsArray[NUM_CTX][NUM_PROBAS][2];
169 typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
170 typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
171
172 typedef struct VP8Encoder VP8Encoder;
173
174 // segment features
175 typedef struct {
176 int num_segments_; // Actual number of segments. 1 segment only = unused.
177 int update_map_; // whether to update the segment map or not.
178 // must be 0 if there's only 1 segment.
179 int size_; // bit-cost for transmitting the segment map
180 } VP8SegmentHeader;
181
182 // Struct collecting all frame-persistent probabilities.
183 typedef struct {
184 uint8_t segments_[3]; // probabilities for segment tree
185 uint8_t skip_proba_; // final probability of being skipped.
186 ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes
187 StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 7.4k
188 CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k
189 int use_skip_proba_; // Note: we always use skip_proba for now.
190 int nb_skip_; // number of skipped blocks
191 } VP8Proba;
192
193 // Filter parameters. Not actually used in the code (we don't perform
194 // the in-loop filtering), but filled from user's config
195 typedef struct {
196 int simple_; // filtering type: 0=complex, 1=simple
197 int level_; // base filter level [0..63]
198 int sharpness_; // [0..7]
199 int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16
200 } VP8FilterHeader;
201
202 //-----------------------------------------------------------------------------
203 // Informations about the macroblocks.
204
205 typedef struct {
206 // block type
207 uint8_t type_:2; // 0=i4x4, 1=i16x16
208 uint8_t uv_mode_:2;
209 uint8_t skip_:1;
210 uint8_t segment_:2;
211 uint8_t alpha_; // quantization-susceptibility
212 } VP8MBInfo;
213
214 typedef struct {
215 uint16_t q_[16]; // quantizer steps
216 uint16_t iq_[16]; // reciprocals, fixed point.
217 uint16_t bias_[16]; // rounding bias
218 uint16_t zthresh_[16]; // value under which a coefficient is zeroed
219 uint16_t sharpen_[16]; // frequency boosters for slight sharpening
220 } VP8Matrix;
221
222 typedef struct {
223 VP8Matrix y1_, y2_, uv_; // quantization matrices
224 int alpha_; // quant-susceptibility, range [-127,127]. Zero is neutral.
225 // Lower values indicate a lower risk of blurriness.
226 int beta_; // filter-susceptibility, range [0,255].
227 int quant_; // final segment quantizer.
228 int fstrength_; // final in-loop filtering strength
229 // reactivities
230 int lambda_i16_, lambda_i4_, lambda_uv_;
231 int lambda_mode_, lambda_trellis_, tlambda_;
232 int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
233 } VP8SegmentInfo;
234
235 // Handy transcient struct to accumulate score and info during RD-optimization
236 // and mode evaluation.
237 typedef struct {
238 score_t D, SD, R, score; // Distortion, spectral distortion, rate, score.
239 int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma.
240 int16_t y_ac_levels[16][16];
241 int16_t uv_levels[4 + 4][16];
242 int mode_i16; // mode number for intra16 prediction
243 int modes_i4[16]; // mode numbers for intra4 predictions
244 int mode_uv; // mode number of chroma prediction
245 uint32_t nz; // non-zero blocks
246 } VP8ModeScore;
247
248 // Iterator structure to iterate through macroblocks, pointing to the
249 // right neighbouring data (samples, predictions, contexts, ...)
250 typedef struct {
251 int x_, y_; // current macroblock
252 int y_offset_, uv_offset_; // offset to the luma / chroma planes
253 int y_stride_, uv_stride_; // respective strides
254 uint8_t* yuv_in_; // borrowed from enc_ (for now)
255 uint8_t* yuv_out_; // ''
256 uint8_t* yuv_out2_; // ''
257 uint8_t* yuv_p_; // ''
258 VP8Encoder* enc_; // back-pointer
259 VP8MBInfo* mb_; // current macroblock
260 VP8BitWriter* bw_; // current bit-writer
261 uint8_t* preds_; // intra mode predictors (4x4 blocks)
262 uint32_t* nz_; // non-zero pattern
263 uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4
264 uint8_t* i4_top_; // pointer to the current top boundary sample
265 int i4_; // current intra4x4 mode being tested
266 int top_nz_[9]; // top-non-zero context.
267 int left_nz_[9]; // left-non-zero. left_nz[8] is independent.
268 uint64_t bit_count_[4][3]; // bit counters for coded levels.
269 uint64_t luma_bits_; // macroblock bit-cost for luma
270 uint64_t uv_bits_; // macroblock bit-cost for chroma
271 LFStats* lf_stats_; // filter stats (borrowed from enc_)
272 int do_trellis_; // if true, perform extra level optimisation
273 int done_; // true when scan is finished
274 } VP8EncIterator;
275
276 // in iterator.c
277 // must be called first.
278 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
279 // restart a scan.
280 void VP8IteratorReset(VP8EncIterator* const it);
281 // import samples from source
282 void VP8IteratorImport(const VP8EncIterator* const it);
283 // export decimated samples
284 void VP8IteratorExport(const VP8EncIterator* const it);
285 // go to next macroblock. Returns !done_. If *block_to_save is non-null, will
286 // save the boundary values to top_/left_ arrays. block_to_save can be
287 // it->yuv_out_ or it->yuv_in_.
288 int VP8IteratorNext(VP8EncIterator* const it,
289 const uint8_t* const block_to_save);
290 // Intra4x4 iterations
291 void VP8IteratorStartI4(VP8EncIterator* const it);
292 // returns true if not done.
293 int VP8IteratorRotateI4(VP8EncIterator* const it,
294 const uint8_t* const yuv_out);
295
296 // Non-zero context setup/teardown
297 void VP8IteratorNzToBytes(VP8EncIterator* const it);
298 void VP8IteratorBytesToNz(VP8EncIterator* const it);
299
300 // Helper functions to set mode properties
301 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
302 void VP8SetIntra4Mode(const VP8EncIterator* const it, int modes[16]);
303 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
304 void VP8SetSkip(const VP8EncIterator* const it, int skip);
305 void VP8SetSegment(const VP8EncIterator* const it, int segment);
306 void VP8IteratorResetCosts(VP8EncIterator* const it);
307
308 //-----------------------------------------------------------------------------
309 // VP8Encoder
310
311 struct VP8Encoder {
312 const WebPConfig* config_; // user configuration and parameters
313 WebPPicture* pic_; // input / output picture
314
315 // headers
316 VP8FilterHeader filter_hdr_; // filtering information
317 VP8SegmentHeader segment_hdr_; // segment information
318
319 int profile_; // VP8's profile, deduced from Config.
320
321 // dimension, in macroblock units.
322 int mb_w_, mb_h_;
323 int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1)
324
325 // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
326 int num_parts_;
327
328 // per-partition boolean decoders.
329 VP8BitWriter bw_; // part0
330 VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
331
332 // transparency blob
333 int has_alpha_;
334 uint8_t* alpha_data_; // non-NULL if transparency is present
335 size_t alpha_data_size_;
336
337 // enhancement layer
338 int use_layer_;
339 VP8BitWriter layer_bw_;
340 uint8_t* layer_data_;
341 size_t layer_data_size_;
342
343 // quantization info (one set of DC/AC dequant factor per segment)
344 VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
345 int base_quant_; // nominal quantizer value. Only used
346 // for relative coding of segments' quant.
347 int uv_alpha_; // U/V quantization susceptibility
348 // global offset of quantizers, shared by all segments
349 int dq_y1_dc_;
350 int dq_y2_dc_, dq_y2_ac_;
351 int dq_uv_dc_, dq_uv_ac_;
352
353 // probabilities and statistics
354 VP8Proba proba_;
355 uint64_t sse_[3]; // sum of Y/U/V squared errors for all macroblocks
356 uint64_t sse_count_; // pixel count for the sse_[] stats
357 int coded_size_;
358 int residual_bytes_[3][4];
359 int block_count_[3];
360
361 // quality/speed settings
362 int method_; // 0=fastest, 6=best/slowest.
363 int rd_opt_level_; // Deduced from method_.
364
365 // Memory
366 VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
367 uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1)
368 uint32_t* nz_; // non-zero bit context: mb_w+1
369 uint8_t* yuv_in_; // input samples
370 uint8_t* yuv_out_; // output samples
371 uint8_t* yuv_out2_; // secondary scratch out-buffer. swapped with yuv_out_.
372 uint8_t* yuv_p_; // scratch buffer for prediction
373 uint8_t *y_top_; // top luma samples.
374 uint8_t *uv_top_; // top u/v samples.
375 // U and V are packed into 16 pixels (8 U + 8 V)
376 uint8_t *y_left_; // left luma samples (adressable from index -1 to 15).
377 uint8_t *u_left_; // left u samples (adressable from index -1 to 7)
378 uint8_t *v_left_; // left v samples (adressable from index -1 to 7)
379
380 LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off)
381 };
382
383 //-----------------------------------------------------------------------------
384 // internal functions. Not public.
385
386 // in tree.c
387 extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
388 extern const uint8_t
389 VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
390 // Reset the token probabilities to their initial (default) values
391 void VP8DefaultProbas(VP8Encoder* const enc);
392 // Write the token probabilities
393 void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
394 // Writes the partition #0 modes (that is: all intra modes)
395 void VP8CodeIntraModes(VP8Encoder* const enc);
396
397 // in syntax.c
398 // Generates the final bitstream by coding the partition0 and headers,
399 // and appending an assembly of all the pre-coded token partitions.
400 // Return true if everything is ok.
401 int VP8EncWrite(VP8Encoder* const enc);
402
403 // in frame.c
404 extern const uint8_t VP8EncBands[16 + 1];
405 // Form all the four Intra16x16 predictions in the yuv_p_ cache
406 void VP8MakeLuma16Preds(const VP8EncIterator* const it);
407 // Form all the four Chroma8x8 predictions in the yuv_p_ cache
408 void VP8MakeChroma8Preds(const VP8EncIterator* const it);
409 // Form all the ten Intra4x4 predictions in the yuv_p_ cache
410 // for the 4x4 block it->i4_
411 void VP8MakeIntra4Preds(const VP8EncIterator* const it);
412 // Rate calculation
413 int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
414 int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
415 int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
416 // Main stat / coding passes
417 int VP8EncLoop(VP8Encoder* const enc);
418 int VP8StatLoop(VP8Encoder* const enc);
419
420 // in webpenc.c
421 // Assign an error code to a picture. Return false for convenience.
422 int WebPEncodingSetError(WebPPicture* const pic, WebPEncodingError error);
423 // in analysis.c
424 // Compute susceptibility based on DCT-coeff histograms:
425 // the higher, the "easier" the macroblock is to compress.
426 typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
427 int start_block, int end_block);
428 extern VP8CHisto VP8CollectHistogram;
429 // Main analysis loop. Decides the segmentations and complexity.
430 // Assigns a first guess for Intra16 and uvmode_ prediction modes.
431 int VP8EncAnalyze(VP8Encoder* const enc);
432
433 // in quant.c
434 // Sets up segment's quantization values, base_quant_ and filter strengths.
435 void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
436 // Pick best modes and fills the levels. Returns true if skipped.
437 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
438
439 // in alpha.c
440 void VP8EncInitAlpha(VP8Encoder* enc); // initialize alpha compression
441 void VP8EncCodeAlphaBlock(VP8EncIterator* it); // analyze or code a macroblock
442 int VP8EncFinishAlpha(VP8Encoder* enc); // finalize compressed data
443 void VP8EncDeleteAlpha(VP8Encoder* enc); // delete compressed data
444
445 // in layer.c
446 void VP8EncInitLayer(VP8Encoder* const enc); // init everything
447 void VP8EncCodeLayerBlock(VP8EncIterator* it); // code one more macroblock
448 int VP8EncFinishLayer(VP8Encoder* const enc); // finalize coding
449 void VP8EncDeleteLayer(VP8Encoder* enc); // reclaim memory
450
451 // in dsp.c
452 int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]);
453
454 // Transforms
455 // VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
456 // will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
457 typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
458 int do_two);
459 typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
460 typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
461 extern VP8Idct VP8ITransform;
462 extern VP8Fdct VP8FTransform;
463 extern VP8WHT VP8ITransformWHT;
464 extern VP8WHT VP8FTransformWHT;
465 // Predictions
466 // *dst is the destination block. *top, *top_right and *left can be NULL.
467 typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
468 const uint8_t* top);
469 typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
470 extern VP8Intra4Preds VP8EncPredLuma4;
471 extern VP8IntraPreds VP8EncPredLuma16;
472 extern VP8IntraPreds VP8EncPredChroma8;
473
474 typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
475 extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
476 typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
477 const uint16_t* const weights);
478 extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
479
480 typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
481 extern VP8BlockCopy VP8Copy4x4;
482 extern VP8BlockCopy VP8Copy8x8;
483 extern VP8BlockCopy VP8Copy16x16;
484 // Quantization
485 typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
486 int n, const VP8Matrix* const mtx);
487 extern VP8QuantizeBlock VP8EncQuantizeBlock;
488
489 typedef enum {
490 kSSE2,
491 kSSE3
492 } CPUFeature;
493 // returns true if the CPU supports the feature.
494 typedef int (*VP8CPUInfo)(CPUFeature feature);
495 extern VP8CPUInfo VP8EncGetCPUInfo;
496
497 void VP8EncDspInit(void); // must be called before using any of the above
498
499 // in filter.c
500 extern void VP8InitFilter(VP8EncIterator* const it);
501 extern void VP8StoreFilterStats(VP8EncIterator* const it);
502 extern void VP8AdjustFilterStrength(VP8EncIterator* const it);
503
504 //-----------------------------------------------------------------------------
505
506 #if defined(__cplusplus) || defined(c_plusplus)
507 } // extern "C"
508 #endif
509
510 #endif // WEBP_ENC_VP8ENCI_H_
511