1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // This code is licensed under the same terms as WebM:
4 // Software License Agreement: http://www.webmproject.org/license/software/
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/
6 // -----------------------------------------------------------------------------
7 //
8 // WebP encoder: internal header.
9 //
10 // Author: Skal (pascal.massimino@gmail.com)
11
12 #ifndef WEBP_ENC_VP8ENCI_H_
13 #define WEBP_ENC_VP8ENCI_H_
14
15 #include <string.h> // for memcpy()
16 #include "../dsp/dsp.h"
17 #include "../utils/bit_writer.h"
18 #include "webp/encode.h"
19
20 #if defined(__cplusplus) || defined(c_plusplus)
21 extern "C" {
22 #endif
23
24 //------------------------------------------------------------------------------
25 // Various defines and enums
26
27 // version numbers
28 #define ENC_MAJ_VERSION 0
29 #define ENC_MIN_VERSION 2
30 #define ENC_REV_VERSION 0
31
32 // size of histogram used by CollectHistogram.
33 #define MAX_COEFF_THRESH 64
34
35 // intra prediction modes
36 enum { B_DC_PRED = 0, // 4x4 modes
37 B_TM_PRED = 1,
38 B_VE_PRED = 2,
39 B_HE_PRED = 3,
40 B_RD_PRED = 4,
41 B_VR_PRED = 5,
42 B_LD_PRED = 6,
43 B_VL_PRED = 7,
44 B_HD_PRED = 8,
45 B_HU_PRED = 9,
46 NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED, // = 10
47
48 // Luma16 or UV modes
49 DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
50 H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED
51 };
52
53 enum { NUM_MB_SEGMENTS = 4,
54 MAX_NUM_PARTITIONS = 8,
55 NUM_TYPES = 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC
56 NUM_BANDS = 8,
57 NUM_CTX = 3,
58 NUM_PROBAS = 11,
59 MAX_LF_LEVELS = 64, // Maximum loop filter level
60 MAX_VARIABLE_LEVEL = 67 // last (inclusive) level with variable cost
61 };
62
63 // YUV-cache parameters. Cache is 16-pixels wide.
64 // The original or reconstructed samples can be accessed using VP8Scan[]
65 // The predicted blocks can be accessed using offsets to yuv_p_ and
66 // the arrays VP8*ModeOffsets[];
67 // +----+ YUV Samples area. See VP8Scan[] for accessing the blocks.
68 // Y_OFF |YYYY| <- original samples (enc->yuv_in_)
69 // |YYYY|
70 // |YYYY|
71 // |YYYY|
72 // U_OFF |UUVV| V_OFF (=U_OFF + 8)
73 // |UUVV|
74 // +----+
75 // Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_')
76 // |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_')
77 // |YYYY|
78 // |YYYY|
79 // U_OFF |UUVV| V_OFF
80 // |UUVV|
81 // x2 (for yuv_out2_)
82 // +----+ Prediction area ('yuv_p_', size = PRED_SIZE)
83 // I16DC16 |YYYY| Intra16 predictions (16x16 block each)
84 // |YYYY|
85 // |YYYY|
86 // |YYYY|
87 // I16TM16 |YYYY|
88 // |YYYY|
89 // |YYYY|
90 // |YYYY|
91 // I16VE16 |YYYY|
92 // |YYYY|
93 // |YYYY|
94 // |YYYY|
95 // I16HE16 |YYYY|
96 // |YYYY|
97 // |YYYY|
98 // |YYYY|
99 // +----+ Chroma U/V predictions (16x8 block each)
100 // C8DC8 |UUVV|
101 // |UUVV|
102 // C8TM8 |UUVV|
103 // |UUVV|
104 // C8VE8 |UUVV|
105 // |UUVV|
106 // C8HE8 |UUVV|
107 // |UUVV|
108 // +----+ Intra 4x4 predictions (4x4 block each)
109 // |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
110 // |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
111 // |YY..| I4HD4 I4HU4 I4TMP
112 // +----+
113 #define BPS 16 // this is the common stride
114 #define Y_SIZE (BPS * 16)
115 #define UV_SIZE (BPS * 8)
116 #define YUV_SIZE (Y_SIZE + UV_SIZE)
117 #define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
118 #define Y_OFF (0)
119 #define U_OFF (Y_SIZE)
120 #define V_OFF (U_OFF + 8)
121 #define ALIGN_CST 15
122 #define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
123
124 extern const int VP8Scan[16 + 4 + 4]; // in quant.c
125 extern const int VP8UVModeOffsets[4]; // in analyze.c
126 extern const int VP8I16ModeOffsets[4];
127 extern const int VP8I4ModeOffsets[NUM_BMODES];
128
129 // Layout of prediction blocks
130 // intra 16x16
131 #define I16DC16 (0 * 16 * BPS)
132 #define I16TM16 (1 * 16 * BPS)
133 #define I16VE16 (2 * 16 * BPS)
134 #define I16HE16 (3 * 16 * BPS)
135 // chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
136 #define C8DC8 (4 * 16 * BPS)
137 #define C8TM8 (4 * 16 * BPS + 8 * BPS)
138 #define C8VE8 (5 * 16 * BPS)
139 #define C8HE8 (5 * 16 * BPS + 8 * BPS)
140 // intra 4x4
141 #define I4DC4 (6 * 16 * BPS + 0)
142 #define I4TM4 (6 * 16 * BPS + 4)
143 #define I4VE4 (6 * 16 * BPS + 8)
144 #define I4HE4 (6 * 16 * BPS + 12)
145 #define I4RD4 (6 * 16 * BPS + 4 * BPS + 0)
146 #define I4VR4 (6 * 16 * BPS + 4 * BPS + 4)
147 #define I4LD4 (6 * 16 * BPS + 4 * BPS + 8)
148 #define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
149 #define I4HD4 (6 * 16 * BPS + 8 * BPS + 0)
150 #define I4HU4 (6 * 16 * BPS + 8 * BPS + 4)
151 #define I4TMP (6 * 16 * BPS + 8 * BPS + 8)
152
153 typedef int64_t score_t; // type used for scores, rate, distortion
154 #define MAX_COST ((score_t)0x7fffffffffffffLL)
155
156 #define QFIX 17
157 #define BIAS(b) ((b) << (QFIX - 8))
158 // Fun fact: this is the _only_ line where we're actually being lossy and
159 // discarding bits.
QUANTDIV(int n,int iQ,int B)160 static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) {
161 return (n * iQ + B) >> QFIX;
162 }
163 extern const uint8_t VP8Zigzag[16];
164
165 //------------------------------------------------------------------------------
166 // Headers
167
168 typedef uint32_t proba_t; // 16b + 16b
169 typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
170 typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
171 typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
172 typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS]; // filter stats
173
174 typedef struct VP8Encoder VP8Encoder;
175
176 // segment features
177 typedef struct {
178 int num_segments_; // Actual number of segments. 1 segment only = unused.
179 int update_map_; // whether to update the segment map or not.
180 // must be 0 if there's only 1 segment.
181 int size_; // bit-cost for transmitting the segment map
182 } VP8SegmentHeader;
183
184 // Struct collecting all frame-persistent probabilities.
185 typedef struct {
186 uint8_t segments_[3]; // probabilities for segment tree
187 uint8_t skip_proba_; // final probability of being skipped.
188 ProbaArray coeffs_[NUM_TYPES][NUM_BANDS]; // 924 bytes
189 StatsArray stats_[NUM_TYPES][NUM_BANDS]; // 4224 bytes
190 CostArray level_cost_[NUM_TYPES][NUM_BANDS]; // 11.4k
191 int dirty_; // if true, need to call VP8CalculateLevelCosts()
192 int use_skip_proba_; // Note: we always use skip_proba for now.
193 int nb_skip_; // number of skipped blocks
194 } VP8Proba;
195
196 // Filter parameters. Not actually used in the code (we don't perform
197 // the in-loop filtering), but filled from user's config
198 typedef struct {
199 int simple_; // filtering type: 0=complex, 1=simple
200 int level_; // base filter level [0..63]
201 int sharpness_; // [0..7]
202 int i4x4_lf_delta_; // delta filter level for i4x4 relative to i16x16
203 } VP8FilterHeader;
204
205 //------------------------------------------------------------------------------
206 // Informations about the macroblocks.
207
208 typedef struct {
209 // block type
210 unsigned int type_:2; // 0=i4x4, 1=i16x16
211 unsigned int uv_mode_:2;
212 unsigned int skip_:1;
213 unsigned int segment_:2;
214 uint8_t alpha_; // quantization-susceptibility
215 } VP8MBInfo;
216
217 typedef struct VP8Matrix {
218 uint16_t q_[16]; // quantizer steps
219 uint16_t iq_[16]; // reciprocals, fixed point.
220 uint16_t bias_[16]; // rounding bias
221 uint16_t zthresh_[16]; // value under which a coefficient is zeroed
222 uint16_t sharpen_[16]; // frequency boosters for slight sharpening
223 } VP8Matrix;
224
225 typedef struct {
226 VP8Matrix y1_, y2_, uv_; // quantization matrices
227 int alpha_; // quant-susceptibility, range [-127,127]. Zero is neutral.
228 // Lower values indicate a lower risk of blurriness.
229 int beta_; // filter-susceptibility, range [0,255].
230 int quant_; // final segment quantizer.
231 int fstrength_; // final in-loop filtering strength
232 // reactivities
233 int lambda_i16_, lambda_i4_, lambda_uv_;
234 int lambda_mode_, lambda_trellis_, tlambda_;
235 int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
236 } VP8SegmentInfo;
237
238 // Handy transcient struct to accumulate score and info during RD-optimization
239 // and mode evaluation.
240 typedef struct {
241 score_t D, SD, R, score; // Distortion, spectral distortion, rate, score.
242 int16_t y_dc_levels[16]; // Quantized levels for luma-DC, luma-AC, chroma.
243 int16_t y_ac_levels[16][16];
244 int16_t uv_levels[4 + 4][16];
245 int mode_i16; // mode number for intra16 prediction
246 uint8_t modes_i4[16]; // mode numbers for intra4 predictions
247 int mode_uv; // mode number of chroma prediction
248 uint32_t nz; // non-zero blocks
249 } VP8ModeScore;
250
251 // Iterator structure to iterate through macroblocks, pointing to the
252 // right neighbouring data (samples, predictions, contexts, ...)
253 typedef struct {
254 int x_, y_; // current macroblock
255 int y_offset_, uv_offset_; // offset to the luma / chroma planes
256 int y_stride_, uv_stride_; // respective strides
257 uint8_t* yuv_in_; // borrowed from enc_ (for now)
258 uint8_t* yuv_out_; // ''
259 uint8_t* yuv_out2_; // ''
260 uint8_t* yuv_p_; // ''
261 VP8Encoder* enc_; // back-pointer
262 VP8MBInfo* mb_; // current macroblock
263 VP8BitWriter* bw_; // current bit-writer
264 uint8_t* preds_; // intra mode predictors (4x4 blocks)
265 uint32_t* nz_; // non-zero pattern
266 uint8_t i4_boundary_[37]; // 32+5 boundary samples needed by intra4x4
267 uint8_t* i4_top_; // pointer to the current top boundary sample
268 int i4_; // current intra4x4 mode being tested
269 int top_nz_[9]; // top-non-zero context.
270 int left_nz_[9]; // left-non-zero. left_nz[8] is independent.
271 uint64_t bit_count_[4][3]; // bit counters for coded levels.
272 uint64_t luma_bits_; // macroblock bit-cost for luma
273 uint64_t uv_bits_; // macroblock bit-cost for chroma
274 LFStats* lf_stats_; // filter stats (borrowed from enc_)
275 int do_trellis_; // if true, perform extra level optimisation
276 int done_; // true when scan is finished
277 int percent0_; // saved initial progress percent
278 } VP8EncIterator;
279
280 // in iterator.c
281 // must be called first.
282 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
283 // restart a scan.
284 void VP8IteratorReset(VP8EncIterator* const it);
285 // import samples from source
286 void VP8IteratorImport(const VP8EncIterator* const it);
287 // export decimated samples
288 void VP8IteratorExport(const VP8EncIterator* const it);
289 // go to next macroblock. Returns !done_. If *block_to_save is non-null, will
290 // save the boundary values to top_/left_ arrays. block_to_save can be
291 // it->yuv_out_ or it->yuv_in_.
292 int VP8IteratorNext(VP8EncIterator* const it,
293 const uint8_t* const block_to_save);
294 // Report progression based on macroblock rows. Return 0 for user-abort request.
295 int VP8IteratorProgress(const VP8EncIterator* const it,
296 int final_delta_percent);
297 // Intra4x4 iterations
298 void VP8IteratorStartI4(VP8EncIterator* const it);
299 // returns true if not done.
300 int VP8IteratorRotateI4(VP8EncIterator* const it,
301 const uint8_t* const yuv_out);
302
303 // Non-zero context setup/teardown
304 void VP8IteratorNzToBytes(VP8EncIterator* const it);
305 void VP8IteratorBytesToNz(VP8EncIterator* const it);
306
307 // Helper functions to set mode properties
308 void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
309 void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);
310 void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
311 void VP8SetSkip(const VP8EncIterator* const it, int skip);
312 void VP8SetSegment(const VP8EncIterator* const it, int segment);
313
314 //------------------------------------------------------------------------------
315 // Paginated token buffer
316
317 // WIP: #define USE_TOKEN_BUFFER
318
319 #ifdef USE_TOKEN_BUFFER
320
321 #define MAX_NUM_TOKEN 2048
322
323 typedef struct VP8Tokens VP8Tokens;
324 struct VP8Tokens {
325 uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit, bits 0..14: slot
326 int left_;
327 VP8Tokens* next_;
328 };
329
330 typedef struct {
331 VP8Tokens* rows_;
332 uint16_t* tokens_; // set to (*last_)->tokens_
333 VP8Tokens** last_;
334 int left_;
335 int error_; // true in case of malloc error
336 } VP8TBuffer;
337
338 void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer
339 int VP8TBufferNewPage(VP8TBuffer* const b); // allocate a new page
340 void VP8TBufferClear(VP8TBuffer* const b); // de-allocate memory
341
342 int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
343 const uint8_t* const probas);
344
VP8AddToken(VP8TBuffer * const b,int bit,int proba_idx)345 static WEBP_INLINE int VP8AddToken(VP8TBuffer* const b,
346 int bit, int proba_idx) {
347 if (b->left_ > 0 || VP8TBufferNewPage(b)) {
348 const int slot = --b->left_;
349 b->tokens_[slot] = (bit << 15) | proba_idx;
350 }
351 return bit;
352 }
353
354 #endif // USE_TOKEN_BUFFER
355
356 //------------------------------------------------------------------------------
357 // VP8Encoder
358
359 struct VP8Encoder {
360 const WebPConfig* config_; // user configuration and parameters
361 WebPPicture* pic_; // input / output picture
362
363 // headers
364 VP8FilterHeader filter_hdr_; // filtering information
365 VP8SegmentHeader segment_hdr_; // segment information
366
367 int profile_; // VP8's profile, deduced from Config.
368
369 // dimension, in macroblock units.
370 int mb_w_, mb_h_;
371 int preds_w_; // stride of the *preds_ prediction plane (=4*mb_w + 1)
372
373 // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
374 int num_parts_;
375
376 // per-partition boolean decoders.
377 VP8BitWriter bw_; // part0
378 VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
379
380 int percent_; // for progress
381
382 // transparency blob
383 int has_alpha_;
384 uint8_t* alpha_data_; // non-NULL if transparency is present
385 uint32_t alpha_data_size_;
386
387 // enhancement layer
388 int use_layer_;
389 VP8BitWriter layer_bw_;
390 uint8_t* layer_data_;
391 size_t layer_data_size_;
392
393 // quantization info (one set of DC/AC dequant factor per segment)
394 VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
395 int base_quant_; // nominal quantizer value. Only used
396 // for relative coding of segments' quant.
397 int uv_alpha_; // U/V quantization susceptibility
398 // global offset of quantizers, shared by all segments
399 int dq_y1_dc_;
400 int dq_y2_dc_, dq_y2_ac_;
401 int dq_uv_dc_, dq_uv_ac_;
402
403 // probabilities and statistics
404 VP8Proba proba_;
405 uint64_t sse_[4]; // sum of Y/U/V/A squared errors for all macroblocks
406 uint64_t sse_count_; // pixel count for the sse_[] stats
407 int coded_size_;
408 int residual_bytes_[3][4];
409 int block_count_[3];
410
411 // quality/speed settings
412 int method_; // 0=fastest, 6=best/slowest.
413 int rd_opt_level_; // Deduced from method_.
414 int max_i4_header_bits_; // partition #0 safeness factor
415
416 // Memory
417 VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
418 uint8_t* preds_; // predictions modes: (4*mb_w+1) * (4*mb_h+1)
419 uint32_t* nz_; // non-zero bit context: mb_w+1
420 uint8_t* yuv_in_; // input samples
421 uint8_t* yuv_out_; // output samples
422 uint8_t* yuv_out2_; // secondary scratch out-buffer. swapped with yuv_out_.
423 uint8_t* yuv_p_; // scratch buffer for prediction
424 uint8_t *y_top_; // top luma samples.
425 uint8_t *uv_top_; // top u/v samples.
426 // U and V are packed into 16 pixels (8 U + 8 V)
427 uint8_t *y_left_; // left luma samples (adressable from index -1 to 15).
428 uint8_t *u_left_; // left u samples (adressable from index -1 to 7)
429 uint8_t *v_left_; // left v samples (adressable from index -1 to 7)
430
431 LFStats *lf_stats_; // autofilter stats (if NULL, autofilter is off)
432 };
433
434 //------------------------------------------------------------------------------
435 // internal functions. Not public.
436
437 // in tree.c
438 extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
439 extern const uint8_t
440 VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
441 // Reset the token probabilities to their initial (default) values
442 void VP8DefaultProbas(VP8Encoder* const enc);
443 // Write the token probabilities
444 void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
445 // Writes the partition #0 modes (that is: all intra modes)
446 void VP8CodeIntraModes(VP8Encoder* const enc);
447
448 // in syntax.c
449 // Generates the final bitstream by coding the partition0 and headers,
450 // and appending an assembly of all the pre-coded token partitions.
451 // Return true if everything is ok.
452 int VP8EncWrite(VP8Encoder* const enc);
453 // Release memory allocated for bit-writing in VP8EncLoop & seq.
454 void VP8EncFreeBitWriters(VP8Encoder* const enc);
455
456 // in frame.c
457 extern const uint8_t VP8EncBands[16 + 1];
458 // Form all the four Intra16x16 predictions in the yuv_p_ cache
459 void VP8MakeLuma16Preds(const VP8EncIterator* const it);
460 // Form all the four Chroma8x8 predictions in the yuv_p_ cache
461 void VP8MakeChroma8Preds(const VP8EncIterator* const it);
462 // Form all the ten Intra4x4 predictions in the yuv_p_ cache
463 // for the 4x4 block it->i4_
464 void VP8MakeIntra4Preds(const VP8EncIterator* const it);
465 // Rate calculation
466 int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
467 int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
468 int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
469 // Main stat / coding passes
470 int VP8EncLoop(VP8Encoder* const enc);
471 int VP8StatLoop(VP8Encoder* const enc);
472
473 // in webpenc.c
474 // Assign an error code to a picture. Return false for convenience.
475 int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);
476 int WebPReportProgress(const WebPPicture* const pic,
477 int percent, int* const percent_store);
478
479 // in analysis.c
480 // Main analysis loop. Decides the segmentations and complexity.
481 // Assigns a first guess for Intra16 and uvmode_ prediction modes.
482 int VP8EncAnalyze(VP8Encoder* const enc);
483
484 // in quant.c
485 // Sets up segment's quantization values, base_quant_ and filter strengths.
486 void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
487 // Pick best modes and fills the levels. Returns true if skipped.
488 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
489
490 // in alpha.c
491 void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression
492 int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data
493 void VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
494
495 // in layer.c
496 void VP8EncInitLayer(VP8Encoder* const enc); // init everything
497 void VP8EncCodeLayerBlock(VP8EncIterator* it); // code one more macroblock
498 int VP8EncFinishLayer(VP8Encoder* const enc); // finalize coding
499 void VP8EncDeleteLayer(VP8Encoder* enc); // reclaim memory
500
501 // in filter.c
502
503 // SSIM utils
504 typedef struct {
505 double w, xm, ym, xxm, xym, yym;
506 } DistoStats;
507 void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst);
508 void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
509 const uint8_t* src2, int stride2,
510 int W, int H, DistoStats* const stats);
511 double VP8SSIMGet(const DistoStats* const stats);
512 double VP8SSIMGetSquaredError(const DistoStats* const stats);
513
514 // autofilter
515 void VP8InitFilter(VP8EncIterator* const it);
516 void VP8StoreFilterStats(VP8EncIterator* const it);
517 void VP8AdjustFilterStrength(VP8EncIterator* const it);
518
519 //------------------------------------------------------------------------------
520
521 #if defined(__cplusplus) || defined(c_plusplus)
522 } // extern "C"
523 #endif
524
525 #endif /* WEBP_ENC_VP8ENCI_H_ */
526