1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Quantization
11 //
12 // Author: Skal (pascal.massimino@gmail.com)
13
14 #include <assert.h>
15 #include <math.h>
16
17 #include "./vp8enci.h"
18 #include "./cost.h"
19
20 #define DO_TRELLIS_I4 1
21 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.
22 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.
23 #define USE_TDISTO 1
24
25 #define MID_ALPHA 64 // neutral value for susceptibility
26 #define MIN_ALPHA 30 // lowest usable value for susceptibility
27 #define MAX_ALPHA 100 // higher meaninful value for susceptibility
28
29 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
30 // power-law modulation. Must be strictly less than 1.
31
32 #define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
33
34 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
35
36 #if defined(__cplusplus) || defined(c_plusplus)
37 extern "C" {
38 #endif
39
40 //------------------------------------------------------------------------------
41
clip(int v,int m,int M)42 static WEBP_INLINE int clip(int v, int m, int M) {
43 return v < m ? m : v > M ? M : v;
44 }
45
46 static const uint8_t kZigzag[16] = {
47 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
48 };
49
50 static const uint8_t kDcTable[128] = {
51 4, 5, 6, 7, 8, 9, 10, 10,
52 11, 12, 13, 14, 15, 16, 17, 17,
53 18, 19, 20, 20, 21, 21, 22, 22,
54 23, 23, 24, 25, 25, 26, 27, 28,
55 29, 30, 31, 32, 33, 34, 35, 36,
56 37, 37, 38, 39, 40, 41, 42, 43,
57 44, 45, 46, 46, 47, 48, 49, 50,
58 51, 52, 53, 54, 55, 56, 57, 58,
59 59, 60, 61, 62, 63, 64, 65, 66,
60 67, 68, 69, 70, 71, 72, 73, 74,
61 75, 76, 76, 77, 78, 79, 80, 81,
62 82, 83, 84, 85, 86, 87, 88, 89,
63 91, 93, 95, 96, 98, 100, 101, 102,
64 104, 106, 108, 110, 112, 114, 116, 118,
65 122, 124, 126, 128, 130, 132, 134, 136,
66 138, 140, 143, 145, 148, 151, 154, 157
67 };
68
69 static const uint16_t kAcTable[128] = {
70 4, 5, 6, 7, 8, 9, 10, 11,
71 12, 13, 14, 15, 16, 17, 18, 19,
72 20, 21, 22, 23, 24, 25, 26, 27,
73 28, 29, 30, 31, 32, 33, 34, 35,
74 36, 37, 38, 39, 40, 41, 42, 43,
75 44, 45, 46, 47, 48, 49, 50, 51,
76 52, 53, 54, 55, 56, 57, 58, 60,
77 62, 64, 66, 68, 70, 72, 74, 76,
78 78, 80, 82, 84, 86, 88, 90, 92,
79 94, 96, 98, 100, 102, 104, 106, 108,
80 110, 112, 114, 116, 119, 122, 125, 128,
81 131, 134, 137, 140, 143, 146, 149, 152,
82 155, 158, 161, 164, 167, 170, 173, 177,
83 181, 185, 189, 193, 197, 201, 205, 209,
84 213, 217, 221, 225, 229, 234, 239, 245,
85 249, 254, 259, 264, 269, 274, 279, 284
86 };
87
88 static const uint16_t kAcTable2[128] = {
89 8, 8, 9, 10, 12, 13, 15, 17,
90 18, 20, 21, 23, 24, 26, 27, 29,
91 31, 32, 34, 35, 37, 38, 40, 41,
92 43, 44, 46, 48, 49, 51, 52, 54,
93 55, 57, 58, 60, 62, 63, 65, 66,
94 68, 69, 71, 72, 74, 75, 77, 79,
95 80, 82, 83, 85, 86, 88, 89, 93,
96 96, 99, 102, 105, 108, 111, 114, 117,
97 120, 124, 127, 130, 133, 136, 139, 142,
98 145, 148, 151, 155, 158, 161, 164, 167,
99 170, 173, 176, 179, 184, 189, 193, 198,
100 203, 207, 212, 217, 221, 226, 230, 235,
101 240, 244, 249, 254, 258, 263, 268, 274,
102 280, 286, 292, 299, 305, 311, 317, 323,
103 330, 336, 342, 348, 354, 362, 370, 379,
104 385, 393, 401, 409, 416, 424, 432, 440
105 };
106
107 static const uint16_t kCoeffThresh[16] = {
108 0, 10, 20, 30,
109 10, 20, 30, 30,
110 20, 30, 30, 30,
111 30, 30, 30, 30
112 };
113
114 // TODO(skal): tune more. Coeff thresholding?
115 static const uint8_t kBiasMatrices[3][16] = { // [3] = [luma-ac,luma-dc,chroma]
116 { 96, 96, 96, 96,
117 96, 96, 96, 96,
118 96, 96, 96, 96,
119 96, 96, 96, 96 },
120 { 96, 96, 96, 96,
121 96, 96, 96, 96,
122 96, 96, 96, 96,
123 96, 96, 96, 96 },
124 { 96, 96, 96, 96,
125 96, 96, 96, 96,
126 96, 96, 96, 96,
127 96, 96, 96, 96 }
128 };
129
130 // Sharpening by (slightly) raising the hi-frequency coeffs (only for trellis).
131 // Hack-ish but helpful for mid-bitrate range. Use with care.
132 static const uint8_t kFreqSharpening[16] = {
133 0, 30, 60, 90,
134 30, 60, 90, 90,
135 60, 90, 90, 90,
136 90, 90, 90, 90
137 };
138
139 //------------------------------------------------------------------------------
140 // Initialize quantization parameters in VP8Matrix
141
142 // Returns the average quantizer
ExpandMatrix(VP8Matrix * const m,int type)143 static int ExpandMatrix(VP8Matrix* const m, int type) {
144 int i;
145 int sum = 0;
146 for (i = 2; i < 16; ++i) {
147 m->q_[i] = m->q_[1];
148 }
149 for (i = 0; i < 16; ++i) {
150 const int j = kZigzag[i];
151 const int bias = kBiasMatrices[type][j];
152 m->iq_[j] = (1 << QFIX) / m->q_[j];
153 m->bias_[j] = BIAS(bias);
154 // TODO(skal): tune kCoeffThresh[]
155 m->zthresh_[j] = ((256 /*+ kCoeffThresh[j]*/ - bias) * m->q_[j] + 127) >> 8;
156 m->sharpen_[j] = (kFreqSharpening[j] * m->q_[j]) >> 11;
157 sum += m->q_[j];
158 }
159 return (sum + 8) >> 4;
160 }
161
SetupMatrices(VP8Encoder * enc)162 static void SetupMatrices(VP8Encoder* enc) {
163 int i;
164 const int tlambda_scale =
165 (enc->method_ >= 4) ? enc->config_->sns_strength
166 : 0;
167 const int num_segments = enc->segment_hdr_.num_segments_;
168 for (i = 0; i < num_segments; ++i) {
169 VP8SegmentInfo* const m = &enc->dqm_[i];
170 const int q = m->quant_;
171 int q4, q16, quv;
172 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
173 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];
174
175 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
176 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
177
178 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
179 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
180
181 q4 = ExpandMatrix(&m->y1_, 0);
182 q16 = ExpandMatrix(&m->y2_, 1);
183 quv = ExpandMatrix(&m->uv_, 2);
184
185 // TODO: Switch to kLambda*[] tables?
186 {
187 m->lambda_i4_ = (3 * q4 * q4) >> 7;
188 m->lambda_i16_ = (3 * q16 * q16);
189 m->lambda_uv_ = (3 * quv * quv) >> 6;
190 m->lambda_mode_ = (1 * q4 * q4) >> 7;
191 m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;
192 m->lambda_trellis_i16_ = (q16 * q16) >> 2;
193 m->lambda_trellis_uv_ = (quv *quv) << 1;
194 m->tlambda_ = (tlambda_scale * q4) >> 5;
195 }
196 }
197 }
198
199 //------------------------------------------------------------------------------
200 // Initialize filtering parameters
201
202 // Very small filter-strength values have close to no visual effect. So we can
203 // save a little decoding-CPU by turning filtering off for these.
204 #define FSTRENGTH_CUTOFF 3
205
SetupFilterStrength(VP8Encoder * const enc)206 static void SetupFilterStrength(VP8Encoder* const enc) {
207 int i;
208 const int level0 = enc->config_->filter_strength;
209 for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
210 // Segments with lower quantizer will be less filtered. TODO: tune (wrt SNS)
211 const int level = level0 * 256 * enc->dqm_[i].quant_ / 128;
212 const int f = level / (256 + enc->dqm_[i].beta_);
213 enc->dqm_[i].fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
214 }
215 // We record the initial strength (mainly for the case of 1-segment only).
216 enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
217 enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
218 enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
219 }
220
221 //------------------------------------------------------------------------------
222
223 // Note: if you change the values below, remember that the max range
224 // allowed by the syntax for DQ_UV is [-16,16].
225 #define MAX_DQ_UV (6)
226 #define MIN_DQ_UV (-4)
227
228 // We want to emulate jpeg-like behaviour where the expected "good" quality
229 // is around q=75. Internally, our "good" middle is around c=50. So we
230 // map accordingly using linear piece-wise function
QualityToCompression(double c)231 static double QualityToCompression(double c) {
232 const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
233 // The file size roughly scales as pow(quantizer, 3.). Actually, the
234 // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
235 // in the mid-quant range. So we scale the compressibility inversely to
236 // this power-law: quant ~= compression ^ 1/3. This law holds well for
237 // low quant. Finer modelling for high-quant would make use of kAcTable[]
238 // more explicitly.
239 const double v = pow(linear_c, 1 / 3.);
240 return v;
241 }
242
QualityToJPEGCompression(double c,double alpha)243 static double QualityToJPEGCompression(double c, double alpha) {
244 // We map the complexity 'alpha' and quality setting 'c' to a compression
245 // exponent empirically matched to the compression curve of libjpeg6b.
246 // On average, the WebP output size will be roughly similar to that of a
247 // JPEG file compressed with same quality factor.
248 const double amin = 0.30;
249 const double amax = 0.85;
250 const double exp_min = 0.4;
251 const double exp_max = 0.9;
252 const double slope = (exp_min - exp_max) / (amax - amin);
253 // Linearly interpolate 'expn' from exp_min to exp_max
254 // in the [amin, amax] range.
255 const double expn = (alpha > amax) ? exp_min
256 : (alpha < amin) ? exp_max
257 : exp_max + slope * (alpha - amin);
258 const double v = pow(c, expn);
259 return v;
260 }
261
SegmentsAreEquivalent(const VP8SegmentInfo * const S1,const VP8SegmentInfo * const S2)262 static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
263 const VP8SegmentInfo* const S2) {
264 return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
265 }
266
SimplifySegments(VP8Encoder * const enc)267 static void SimplifySegments(VP8Encoder* const enc) {
268 int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
269 const int num_segments = enc->segment_hdr_.num_segments_;
270 int num_final_segments = 1;
271 int s1, s2;
272 for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments
273 const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
274 int found = 0;
275 // check if we already have similar segment
276 for (s2 = 0; s2 < num_final_segments; ++s2) {
277 const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
278 if (SegmentsAreEquivalent(S1, S2)) {
279 found = 1;
280 break;
281 }
282 }
283 map[s1] = s2;
284 if (!found) {
285 if (num_final_segments != s1) {
286 enc->dqm_[num_final_segments] = enc->dqm_[s1];
287 }
288 ++num_final_segments;
289 }
290 }
291 if (num_final_segments < num_segments) { // Remap
292 int i = enc->mb_w_ * enc->mb_h_;
293 while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
294 enc->segment_hdr_.num_segments_ = num_final_segments;
295 // Replicate the trailing segment infos (it's mostly cosmetics)
296 for (i = num_final_segments; i < num_segments; ++i) {
297 enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
298 }
299 }
300 }
301
VP8SetSegmentParams(VP8Encoder * const enc,float quality)302 void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
303 int i;
304 int dq_uv_ac, dq_uv_dc;
305 const int num_segments = enc->segment_hdr_.num_segments_;
306 const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
307 const double Q = quality / 100.;
308 const double c_base = enc->config_->emulate_jpeg_size ?
309 QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
310 QualityToCompression(Q);
311 for (i = 0; i < num_segments; ++i) {
312 // We modulate the base coefficient to accommodate for the quantization
313 // susceptibility and allow denser segments to be quantized more.
314 const double expn = 1. - amp * enc->dqm_[i].alpha_;
315 const double c = pow(c_base, expn);
316 const int q = (int)(127. * (1. - c));
317 assert(expn > 0.);
318 enc->dqm_[i].quant_ = clip(q, 0, 127);
319 }
320
321 // purely indicative in the bitstream (except for the 1-segment case)
322 enc->base_quant_ = enc->dqm_[0].quant_;
323
324 // fill-in values for the unused segments (required by the syntax)
325 for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
326 enc->dqm_[i].quant_ = enc->base_quant_;
327 }
328
329 // uv_alpha_ is normally spread around ~60. The useful range is
330 // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
331 // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
332 dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
333 / (MAX_ALPHA - MIN_ALPHA);
334 // we rescale by the user-defined strength of adaptation
335 dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
336 // and make it safe.
337 dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
338 // We also boost the dc-uv-quant a little, based on sns-strength, since
339 // U/V channels are quite more reactive to high quants (flat DC-blocks
340 // tend to appear, and are displeasant).
341 dq_uv_dc = -4 * enc->config_->sns_strength / 100;
342 dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed
343
344 enc->dq_y1_dc_ = 0; // TODO(skal): dq-lum
345 enc->dq_y2_dc_ = 0;
346 enc->dq_y2_ac_ = 0;
347 enc->dq_uv_dc_ = dq_uv_dc;
348 enc->dq_uv_ac_ = dq_uv_ac;
349
350 SetupFilterStrength(enc); // initialize segments' filtering, eventually
351
352 if (num_segments > 1) SimplifySegments(enc);
353
354 SetupMatrices(enc); // finalize quantization matrices
355 }
356
357 //------------------------------------------------------------------------------
358 // Form the predictions in cache
359
360 // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
361 const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
362 const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
363
364 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
365 const int VP8I4ModeOffsets[NUM_BMODES] = {
366 I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
367 };
368
VP8MakeLuma16Preds(const VP8EncIterator * const it)369 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
370 const VP8Encoder* const enc = it->enc_;
371 const uint8_t* const left = it->x_ ? enc->y_left_ : NULL;
372 const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;
373 VP8EncPredLuma16(it->yuv_p_, left, top);
374 }
375
VP8MakeChroma8Preds(const VP8EncIterator * const it)376 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
377 const VP8Encoder* const enc = it->enc_;
378 const uint8_t* const left = it->x_ ? enc->u_left_ : NULL;
379 const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;
380 VP8EncPredChroma8(it->yuv_p_, left, top);
381 }
382
VP8MakeIntra4Preds(const VP8EncIterator * const it)383 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
384 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
385 }
386
387 //------------------------------------------------------------------------------
388 // Quantize
389
390 // Layout:
391 // +----+
392 // |YYYY| 0
393 // |YYYY| 4
394 // |YYYY| 8
395 // |YYYY| 12
396 // +----+
397 // |UUVV| 16
398 // |UUVV| 20
399 // +----+
400
401 const int VP8Scan[16 + 4 + 4] = {
402 // Luma
403 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
404 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
405 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
406 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
407
408 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U
409 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
410 };
411
412 //------------------------------------------------------------------------------
413 // Distortion measurement
414
415 static const uint16_t kWeightY[16] = {
416 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2
417 };
418
419 static const uint16_t kWeightTrellis[16] = {
420 #if USE_TDISTO == 0
421 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
422 #else
423 30, 27, 19, 11,
424 27, 24, 17, 10,
425 19, 17, 12, 8,
426 11, 10, 8, 6
427 #endif
428 };
429
430 // Init/Copy the common fields in score.
InitScore(VP8ModeScore * const rd)431 static void InitScore(VP8ModeScore* const rd) {
432 rd->D = 0;
433 rd->SD = 0;
434 rd->R = 0;
435 rd->nz = 0;
436 rd->score = MAX_COST;
437 }
438
CopyScore(VP8ModeScore * const dst,const VP8ModeScore * const src)439 static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
440 dst->D = src->D;
441 dst->SD = src->SD;
442 dst->R = src->R;
443 dst->nz = src->nz; // note that nz is not accumulated, but just copied.
444 dst->score = src->score;
445 }
446
AddScore(VP8ModeScore * const dst,const VP8ModeScore * const src)447 static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
448 dst->D += src->D;
449 dst->SD += src->SD;
450 dst->R += src->R;
451 dst->nz |= src->nz; // here, new nz bits are accumulated.
452 dst->score += src->score;
453 }
454
455 //------------------------------------------------------------------------------
456 // Performs trellis-optimized quantization.
457
458 // Trellis
459
460 typedef struct {
461 int prev; // best previous
462 int level; // level
463 int sign; // sign of coeff_i
464 score_t cost; // bit cost
465 score_t error; // distortion = sum of (|coeff_i| - level_i * Q_i)^2
466 int ctx; // context (only depends on 'level'. Could be spared.)
467 } Node;
468
469 // If a coefficient was quantized to a value Q (using a neutral bias),
470 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
471 // We don't test negative values though.
472 #define MIN_DELTA 0 // how much lower level to try
473 #define MAX_DELTA 1 // how much higher
474 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
475 #define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA])
476
SetRDScore(int lambda,VP8ModeScore * const rd)477 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
478 // TODO: incorporate the "* 256" in the tables?
479 rd->score = rd->R * lambda + 256 * (rd->D + rd->SD);
480 }
481
RDScoreTrellis(int lambda,score_t rate,score_t distortion)482 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
483 score_t distortion) {
484 return rate * lambda + 256 * distortion;
485 }
486
TrellisQuantizeBlock(const VP8EncIterator * const it,int16_t in[16],int16_t out[16],int ctx0,int coeff_type,const VP8Matrix * const mtx,int lambda)487 static int TrellisQuantizeBlock(const VP8EncIterator* const it,
488 int16_t in[16], int16_t out[16],
489 int ctx0, int coeff_type,
490 const VP8Matrix* const mtx,
491 int lambda) {
492 ProbaArray* const last_costs = it->enc_->proba_.coeffs_[coeff_type];
493 CostArray* const costs = it->enc_->proba_.level_cost_[coeff_type];
494 const int first = (coeff_type == 0) ? 1 : 0;
495 Node nodes[17][NUM_NODES];
496 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous
497 score_t best_score;
498 int best_node;
499 int last = first - 1;
500 int n, m, p, nz;
501
502 {
503 score_t cost;
504 score_t max_error;
505 const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
506 const int last_proba = last_costs[VP8EncBands[first]][ctx0][0];
507
508 // compute maximal distortion.
509 max_error = 0;
510 for (n = first; n < 16; ++n) {
511 const int j = kZigzag[n];
512 const int err = in[j] * in[j];
513 max_error += kWeightTrellis[j] * err;
514 if (err > thresh) last = n;
515 }
516 // we don't need to go inspect up to n = 16 coeffs. We can just go up
517 // to last + 1 (inclusive) without losing much.
518 if (last < 15) ++last;
519
520 // compute 'skip' score. This is the max score one can do.
521 cost = VP8BitCost(0, last_proba);
522 best_score = RDScoreTrellis(lambda, cost, max_error);
523
524 // initialize source node.
525 n = first - 1;
526 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
527 NODE(n, m).cost = 0;
528 NODE(n, m).error = max_error;
529 NODE(n, m).ctx = ctx0;
530 }
531 }
532
533 // traverse trellis.
534 for (n = first; n <= last; ++n) {
535 const int j = kZigzag[n];
536 const int Q = mtx->q_[j];
537 const int iQ = mtx->iq_[j];
538 const int B = BIAS(0x00); // neutral bias
539 // note: it's important to take sign of the _original_ coeff,
540 // so we don't have to consider level < 0 afterward.
541 const int sign = (in[j] < 0);
542 int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
543 int level0;
544 if (coeff0 > 2047) coeff0 = 2047;
545
546 level0 = QUANTDIV(coeff0, iQ, B);
547 // test all alternate level values around level0.
548 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
549 Node* const cur = &NODE(n, m);
550 int delta_error, new_error;
551 score_t cur_score = MAX_COST;
552 int level = level0 + m;
553 int last_proba;
554
555 cur->sign = sign;
556 cur->level = level;
557 cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;
558 if (level >= 2048 || level < 0) { // node is dead?
559 cur->cost = MAX_COST;
560 continue;
561 }
562 last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0];
563
564 // Compute delta_error = how much coding this level will
565 // subtract as distortion to max_error
566 new_error = coeff0 - level * Q;
567 delta_error =
568 kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error);
569
570 // Inspect all possible non-dead predecessors. Retain only the best one.
571 for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
572 const Node* const prev = &NODE(n - 1, p);
573 const int prev_ctx = prev->ctx;
574 const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx];
575 const score_t total_error = prev->error - delta_error;
576 score_t cost, base_cost, score;
577
578 if (prev->cost >= MAX_COST) { // dead node?
579 continue;
580 }
581
582 // Base cost of both terminal/non-terminal
583 base_cost = prev->cost + VP8LevelCost(tcost, level);
584
585 // Examine node assuming it's a non-terminal one.
586 cost = base_cost;
587 if (level && n < 15) {
588 cost += VP8BitCost(1, last_proba);
589 }
590 score = RDScoreTrellis(lambda, cost, total_error);
591 if (score < cur_score) {
592 cur_score = score;
593 cur->cost = cost;
594 cur->error = total_error;
595 cur->prev = p;
596 }
597
598 // Now, record best terminal node (and thus best entry in the graph).
599 if (level) {
600 cost = base_cost;
601 if (n < 15) cost += VP8BitCost(0, last_proba);
602 score = RDScoreTrellis(lambda, cost, total_error);
603 if (score < best_score) {
604 best_score = score;
605 best_path[0] = n; // best eob position
606 best_path[1] = m; // best level
607 best_path[2] = p; // best predecessor
608 }
609 }
610 }
611 }
612 }
613
614 // Fresh start
615 memset(in + first, 0, (16 - first) * sizeof(*in));
616 memset(out + first, 0, (16 - first) * sizeof(*out));
617 if (best_path[0] == -1) {
618 return 0; // skip!
619 }
620
621 // Unwind the best path.
622 // Note: best-prev on terminal node is not necessarily equal to the
623 // best_prev for non-terminal. So we patch best_path[2] in.
624 n = best_path[0];
625 best_node = best_path[1];
626 NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal
627 nz = 0;
628
629 for (; n >= first; --n) {
630 const Node* const node = &NODE(n, best_node);
631 const int j = kZigzag[n];
632 out[n] = node->sign ? -node->level : node->level;
633 nz |= (node->level != 0);
634 in[j] = out[n] * mtx->q_[j];
635 best_node = node->prev;
636 }
637 return nz;
638 }
639
640 #undef NODE
641
642 //------------------------------------------------------------------------------
643 // Performs: difference, transform, quantize, back-transform, add
644 // all at once. Output is the reconstructed block in *yuv_out, and the
645 // quantized levels in *levels.
646
ReconstructIntra16(VP8EncIterator * const it,VP8ModeScore * const rd,uint8_t * const yuv_out,int mode)647 static int ReconstructIntra16(VP8EncIterator* const it,
648 VP8ModeScore* const rd,
649 uint8_t* const yuv_out,
650 int mode) {
651 const VP8Encoder* const enc = it->enc_;
652 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
653 const uint8_t* const src = it->yuv_in_ + Y_OFF;
654 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
655 int nz = 0;
656 int n;
657 int16_t tmp[16][16], dc_tmp[16];
658
659 for (n = 0; n < 16; ++n) {
660 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
661 }
662 VP8FTransformWHT(tmp[0], dc_tmp);
663 nz |= VP8EncQuantizeBlock(dc_tmp, rd->y_dc_levels, 0, &dqm->y2_) << 24;
664
665 if (DO_TRELLIS_I16 && it->do_trellis_) {
666 int x, y;
667 VP8IteratorNzToBytes(it);
668 for (y = 0, n = 0; y < 4; ++y) {
669 for (x = 0; x < 4; ++x, ++n) {
670 const int ctx = it->top_nz_[x] + it->left_nz_[y];
671 const int non_zero =
672 TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0,
673 &dqm->y1_, dqm->lambda_trellis_i16_);
674 it->top_nz_[x] = it->left_nz_[y] = non_zero;
675 nz |= non_zero << n;
676 }
677 }
678 } else {
679 for (n = 0; n < 16; ++n) {
680 nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], 1, &dqm->y1_) << n;
681 }
682 }
683
684 // Transform back
685 VP8ITransformWHT(dc_tmp, tmp[0]);
686 for (n = 0; n < 16; n += 2) {
687 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
688 }
689
690 return nz;
691 }
692
ReconstructIntra4(VP8EncIterator * const it,int16_t levels[16],const uint8_t * const src,uint8_t * const yuv_out,int mode)693 static int ReconstructIntra4(VP8EncIterator* const it,
694 int16_t levels[16],
695 const uint8_t* const src,
696 uint8_t* const yuv_out,
697 int mode) {
698 const VP8Encoder* const enc = it->enc_;
699 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
700 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
701 int nz = 0;
702 int16_t tmp[16];
703
704 VP8FTransform(src, ref, tmp);
705 if (DO_TRELLIS_I4 && it->do_trellis_) {
706 const int x = it->i4_ & 3, y = it->i4_ >> 2;
707 const int ctx = it->top_nz_[x] + it->left_nz_[y];
708 nz = TrellisQuantizeBlock(it, tmp, levels, ctx, 3, &dqm->y1_,
709 dqm->lambda_trellis_i4_);
710 } else {
711 nz = VP8EncQuantizeBlock(tmp, levels, 0, &dqm->y1_);
712 }
713 VP8ITransform(ref, tmp, yuv_out, 0);
714 return nz;
715 }
716
ReconstructUV(VP8EncIterator * const it,VP8ModeScore * const rd,uint8_t * const yuv_out,int mode)717 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
718 uint8_t* const yuv_out, int mode) {
719 const VP8Encoder* const enc = it->enc_;
720 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
721 const uint8_t* const src = it->yuv_in_ + U_OFF;
722 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
723 int nz = 0;
724 int n;
725 int16_t tmp[8][16];
726
727 for (n = 0; n < 8; ++n) {
728 VP8FTransform(src + VP8Scan[16 + n], ref + VP8Scan[16 + n], tmp[n]);
729 }
730 if (DO_TRELLIS_UV && it->do_trellis_) {
731 int ch, x, y;
732 for (ch = 0, n = 0; ch <= 2; ch += 2) {
733 for (y = 0; y < 2; ++y) {
734 for (x = 0; x < 2; ++x, ++n) {
735 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
736 const int non_zero =
737 TrellisQuantizeBlock(it, tmp[n], rd->uv_levels[n], ctx, 2,
738 &dqm->uv_, dqm->lambda_trellis_uv_);
739 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
740 nz |= non_zero << n;
741 }
742 }
743 }
744 } else {
745 for (n = 0; n < 8; ++n) {
746 nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], 0, &dqm->uv_) << n;
747 }
748 }
749
750 for (n = 0; n < 8; n += 2) {
751 VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1);
752 }
753 return (nz << 16);
754 }
755
756 //------------------------------------------------------------------------------
757 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
758 // Pick the mode is lower RD-cost = Rate + lamba * Distortion.
759
SwapPtr(uint8_t ** a,uint8_t ** b)760 static void SwapPtr(uint8_t** a, uint8_t** b) {
761 uint8_t* const tmp = *a;
762 *a = *b;
763 *b = tmp;
764 }
765
SwapOut(VP8EncIterator * const it)766 static void SwapOut(VP8EncIterator* const it) {
767 SwapPtr(&it->yuv_out_, &it->yuv_out2_);
768 }
769
PickBestIntra16(VP8EncIterator * const it,VP8ModeScore * const rd)770 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
771 const VP8Encoder* const enc = it->enc_;
772 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
773 const int lambda = dqm->lambda_i16_;
774 const int tlambda = dqm->tlambda_;
775 const uint8_t* const src = it->yuv_in_ + Y_OFF;
776 VP8ModeScore rd16;
777 int mode;
778
779 rd->mode_i16 = -1;
780 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
781 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
782 int nz;
783
784 // Reconstruct
785 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
786
787 // Measure RD-score
788 rd16.D = VP8SSE16x16(src, tmp_dst);
789 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
790 : 0;
791 rd16.R = VP8GetCostLuma16(it, &rd16);
792 rd16.R += VP8FixedCostsI16[mode];
793
794 // Since we always examine Intra16 first, we can overwrite *rd directly.
795 SetRDScore(lambda, &rd16);
796 if (mode == 0 || rd16.score < rd->score) {
797 CopyScore(rd, &rd16);
798 rd->mode_i16 = mode;
799 rd->nz = nz;
800 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
801 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
802 SwapOut(it);
803 }
804 }
805 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
806 VP8SetIntra16Mode(it, rd->mode_i16);
807 }
808
809 //------------------------------------------------------------------------------
810
811 // return the cost array corresponding to the surrounding prediction modes.
GetCostModeI4(VP8EncIterator * const it,const uint8_t modes[16])812 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
813 const uint8_t modes[16]) {
814 const int preds_w = it->enc_->preds_w_;
815 const int x = (it->i4_ & 3), y = it->i4_ >> 2;
816 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
817 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
818 return VP8FixedCostsI4[top][left];
819 }
820
PickBestIntra4(VP8EncIterator * const it,VP8ModeScore * const rd)821 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
822 const VP8Encoder* const enc = it->enc_;
823 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
824 const int lambda = dqm->lambda_i4_;
825 const int tlambda = dqm->tlambda_;
826 const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
827 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
828 int total_header_bits = 0;
829 VP8ModeScore rd_best;
830
831 if (enc->max_i4_header_bits_ == 0) {
832 return 0;
833 }
834
835 InitScore(&rd_best);
836 rd_best.score = 211; // '211' is the value of VP8BitCost(0, 145)
837 VP8IteratorStartI4(it);
838 do {
839 VP8ModeScore rd_i4;
840 int mode;
841 int best_mode = -1;
842 const uint8_t* const src = src0 + VP8Scan[it->i4_];
843 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
844 uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
845 uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.
846
847 InitScore(&rd_i4);
848 VP8MakeIntra4Preds(it);
849 for (mode = 0; mode < NUM_BMODES; ++mode) {
850 VP8ModeScore rd_tmp;
851 int16_t tmp_levels[16];
852
853 // Reconstruct
854 rd_tmp.nz =
855 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
856
857 // Compute RD-score
858 rd_tmp.D = VP8SSE4x4(src, tmp_dst);
859 rd_tmp.SD =
860 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
861 : 0;
862 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
863 rd_tmp.R += mode_costs[mode];
864
865 SetRDScore(lambda, &rd_tmp);
866 if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
867 CopyScore(&rd_i4, &rd_tmp);
868 best_mode = mode;
869 SwapPtr(&tmp_dst, &best_block);
870 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
871 }
872 }
873 SetRDScore(dqm->lambda_mode_, &rd_i4);
874 AddScore(&rd_best, &rd_i4);
875 total_header_bits += mode_costs[best_mode];
876 if (rd_best.score >= rd->score ||
877 total_header_bits > enc->max_i4_header_bits_) {
878 return 0;
879 }
880 // Copy selected samples if not in the right place already.
881 if (best_block != best_blocks + VP8Scan[it->i4_])
882 VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
883 rd->modes_i4[it->i4_] = best_mode;
884 it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
885 } while (VP8IteratorRotateI4(it, best_blocks));
886
887 // finalize state
888 CopyScore(rd, &rd_best);
889 VP8SetIntra4Mode(it, rd->modes_i4);
890 SwapOut(it);
891 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
892 return 1; // select intra4x4 over intra16x16
893 }
894
895 //------------------------------------------------------------------------------
896
PickBestUV(VP8EncIterator * const it,VP8ModeScore * const rd)897 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
898 const VP8Encoder* const enc = it->enc_;
899 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
900 const int lambda = dqm->lambda_uv_;
901 const uint8_t* const src = it->yuv_in_ + U_OFF;
902 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer
903 uint8_t* const dst0 = it->yuv_out_ + U_OFF;
904 VP8ModeScore rd_best;
905 int mode;
906
907 rd->mode_uv = -1;
908 InitScore(&rd_best);
909 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
910 VP8ModeScore rd_uv;
911
912 // Reconstruct
913 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
914
915 // Compute RD-score
916 rd_uv.D = VP8SSE16x8(src, tmp_dst);
917 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.
918 rd_uv.R = VP8GetCostUV(it, &rd_uv);
919 rd_uv.R += VP8FixedCostsUV[mode];
920
921 SetRDScore(lambda, &rd_uv);
922 if (mode == 0 || rd_uv.score < rd_best.score) {
923 CopyScore(&rd_best, &rd_uv);
924 rd->mode_uv = mode;
925 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
926 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?
927 }
928 }
929 VP8SetIntraUVMode(it, rd->mode_uv);
930 AddScore(rd, &rd_best);
931 }
932
933 //------------------------------------------------------------------------------
934 // Final reconstruction and quantization.
935
SimpleQuantize(VP8EncIterator * const it,VP8ModeScore * const rd)936 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
937 const VP8Encoder* const enc = it->enc_;
938 const int is_i16 = (it->mb_->type_ == 1);
939 int nz = 0;
940
941 if (is_i16) {
942 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
943 } else {
944 VP8IteratorStartI4(it);
945 do {
946 const int mode =
947 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
948 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
949 uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
950 VP8MakeIntra4Preds(it);
951 nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
952 src, dst, mode) << it->i4_;
953 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
954 }
955
956 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
957 rd->nz = nz;
958 }
959
960 // Refine intra16/intra4 sub-modes based on distortion only (not rate).
DistoRefine(VP8EncIterator * const it,int try_both_i4_i16)961 static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
962 const int is_i16 = (it->mb_->type_ == 1);
963 score_t best_score = MAX_COST;
964
965 if (try_both_i4_i16 || is_i16) {
966 int mode;
967 int best_mode = -1;
968 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
969 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
970 const uint8_t* const src = it->yuv_in_ + Y_OFF;
971 const score_t score = VP8SSE16x16(src, ref);
972 if (score < best_score) {
973 best_mode = mode;
974 best_score = score;
975 }
976 }
977 VP8SetIntra16Mode(it, best_mode);
978 }
979 if (try_both_i4_i16 || !is_i16) {
980 uint8_t modes_i4[16];
981 // We don't evaluate the rate here, but just account for it through a
982 // constant penalty (i4 mode usually needs more bits compared to i16).
983 score_t score_i4 = (score_t)I4_PENALTY;
984
985 VP8IteratorStartI4(it);
986 do {
987 int mode;
988 int best_sub_mode = -1;
989 score_t best_sub_score = MAX_COST;
990 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
991
992 // TODO(skal): we don't really need the prediction pixels here,
993 // but just the distortion against 'src'.
994 VP8MakeIntra4Preds(it);
995 for (mode = 0; mode < NUM_BMODES; ++mode) {
996 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
997 const score_t score = VP8SSE4x4(src, ref);
998 if (score < best_sub_score) {
999 best_sub_mode = mode;
1000 best_sub_score = score;
1001 }
1002 }
1003 modes_i4[it->i4_] = best_sub_mode;
1004 score_i4 += best_sub_score;
1005 if (score_i4 >= best_score) break;
1006 } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
1007 if (score_i4 < best_score) {
1008 VP8SetIntra4Mode(it, modes_i4);
1009 }
1010 }
1011 }
1012
1013 //------------------------------------------------------------------------------
1014 // Entry point
1015
VP8Decimate(VP8EncIterator * const it,VP8ModeScore * const rd,VP8RDLevel rd_opt)1016 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
1017 VP8RDLevel rd_opt) {
1018 int is_skipped;
1019 const int method = it->enc_->method_;
1020
1021 InitScore(rd);
1022
1023 // We can perform predictions for Luma16x16 and Chroma8x8 already.
1024 // Luma4x4 predictions needs to be done as-we-go.
1025 VP8MakeLuma16Preds(it);
1026 VP8MakeChroma8Preds(it);
1027
1028 if (rd_opt > RD_OPT_NONE) {
1029 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
1030 PickBestIntra16(it, rd);
1031 if (method >= 2) {
1032 PickBestIntra4(it, rd);
1033 }
1034 PickBestUV(it, rd);
1035 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now
1036 it->do_trellis_ = 1;
1037 SimpleQuantize(it, rd);
1038 }
1039 } else {
1040 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
1041 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
1042 DistoRefine(it, (method >= 2));
1043 SimpleQuantize(it, rd);
1044 }
1045 is_skipped = (rd->nz == 0);
1046 VP8SetSkip(it, is_skipped);
1047 return is_skipped;
1048 }
1049
1050 #if defined(__cplusplus) || defined(c_plusplus)
1051 } // extern "C"
1052 #endif
1053