1 // Copyright 2014 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // WebPPicture utils for colorspace conversion
11 //
12 // Author: Skal (pascal.massimino@gmail.com)
13
14 #include <assert.h>
15 #include <stdlib.h>
16 #include <math.h>
17
18 #include "src/enc/vp8i_enc.h"
19 #include "src/utils/random_utils.h"
20 #include "src/utils/utils.h"
21 #include "src/dsp/dsp.h"
22 #include "src/dsp/lossless.h"
23 #include "src/dsp/yuv.h"
24
25 // Uncomment to disable gamma-compression during RGB->U/V averaging
26 #define USE_GAMMA_COMPRESSION
27
28 // If defined, use table to compute x / alpha.
29 #define USE_INVERSE_ALPHA_TABLE
30
31 #ifdef WORDS_BIGENDIAN
32 #define ALPHA_OFFSET 0 // uint32_t 0xff000000 is 0xff,00,00,00 in memory
33 #else
34 #define ALPHA_OFFSET 3 // uint32_t 0xff000000 is 0x00,00,00,ff in memory
35 #endif
36
37 //------------------------------------------------------------------------------
38 // Detection of non-trivial transparency
39
40 // Returns true if alpha[] has non-0xff values.
CheckNonOpaque(const uint8_t * alpha,int width,int height,int x_step,int y_step)41 static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
42 int x_step, int y_step) {
43 if (alpha == NULL) return 0;
44 WebPInitAlphaProcessing();
45 if (x_step == 1) {
46 for (; height-- > 0; alpha += y_step) {
47 if (WebPHasAlpha8b(alpha, width)) return 1;
48 }
49 } else {
50 for (; height-- > 0; alpha += y_step) {
51 if (WebPHasAlpha32b(alpha, width)) return 1;
52 }
53 }
54 return 0;
55 }
56
57 // Checking for the presence of non-opaque alpha.
WebPPictureHasTransparency(const WebPPicture * picture)58 int WebPPictureHasTransparency(const WebPPicture* picture) {
59 if (picture == NULL) return 0;
60 if (!picture->use_argb) {
61 return CheckNonOpaque(picture->a, picture->width, picture->height,
62 1, picture->a_stride);
63 } else {
64 const int alpha_offset = ALPHA_OFFSET;
65 return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset,
66 picture->width, picture->height,
67 4, picture->argb_stride * sizeof(*picture->argb));
68 }
69 return 0;
70 }
71
72 //------------------------------------------------------------------------------
73 // Code for gamma correction
74
75 #if defined(USE_GAMMA_COMPRESSION)
76
77 // gamma-compensates loss of resolution during chroma subsampling
78 #define kGamma 0.80 // for now we use a different gamma value than kGammaF
79 #define kGammaFix 12 // fixed-point precision for linear values
80 #define kGammaScale ((1 << kGammaFix) - 1)
81 #define kGammaTabFix 7 // fixed-point fractional bits precision
82 #define kGammaTabScale (1 << kGammaTabFix)
83 #define kGammaTabRounder (kGammaTabScale >> 1)
84 #define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
85
86 static int kLinearToGammaTab[kGammaTabSize + 1];
87 static uint16_t kGammaToLinearTab[256];
88 static volatile int kGammaTablesOk = 0;
89
InitGammaTables(void)90 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {
91 if (!kGammaTablesOk) {
92 int v;
93 const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
94 const double norm = 1. / 255.;
95 for (v = 0; v <= 255; ++v) {
96 kGammaToLinearTab[v] =
97 (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
98 }
99 for (v = 0; v <= kGammaTabSize; ++v) {
100 kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
101 }
102 kGammaTablesOk = 1;
103 }
104 }
105
GammaToLinear(uint8_t v)106 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
107 return kGammaToLinearTab[v];
108 }
109
Interpolate(int v)110 static WEBP_INLINE int Interpolate(int v) {
111 const int tab_pos = v >> (kGammaTabFix + 2); // integer part
112 const int x = v & ((kGammaTabScale << 2) - 1); // fractional part
113 const int v0 = kLinearToGammaTab[tab_pos];
114 const int v1 = kLinearToGammaTab[tab_pos + 1];
115 const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate
116 assert(tab_pos + 1 < kGammaTabSize + 1);
117 return y;
118 }
119
120 // Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
121 // U/V value, suitable for RGBToU/V calls.
LinearToGamma(uint32_t base_value,int shift)122 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
123 const int y = Interpolate(base_value << shift); // final uplifted value
124 return (y + kGammaTabRounder) >> kGammaTabFix; // descale
125 }
126
127 #else
128
InitGammaTables(void)129 static void InitGammaTables(void) {}
GammaToLinear(uint8_t v)130 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
LinearToGamma(uint32_t base_value,int shift)131 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
132 return (int)(base_value << shift);
133 }
134
135 #endif // USE_GAMMA_COMPRESSION
136
137 //------------------------------------------------------------------------------
138 // RGB -> YUV conversion
139
RGBToY(int r,int g,int b,VP8Random * const rg)140 static int RGBToY(int r, int g, int b, VP8Random* const rg) {
141 return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF)
142 : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
143 }
144
RGBToU(int r,int g,int b,VP8Random * const rg)145 static int RGBToU(int r, int g, int b, VP8Random* const rg) {
146 return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2)
147 : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
148 }
149
RGBToV(int r,int g,int b,VP8Random * const rg)150 static int RGBToV(int r, int g, int b, VP8Random* const rg) {
151 return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2)
152 : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
153 }
154
155 //------------------------------------------------------------------------------
156 // Sharp RGB->YUV conversion
157
158 static const int kNumIterations = 4;
159 static const int kMinDimensionIterativeConversion = 4;
160
161 // We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
162 // banding sometimes. Better use extra precision.
163 #define SFIX 2 // fixed-point precision of RGB and Y/W
164 typedef int16_t fixed_t; // signed type with extra SFIX precision for UV
165 typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W
166
167 #define SHALF (1 << SFIX >> 1)
168 #define MAX_Y_T ((256 << SFIX) - 1)
169 #define SROUNDER (1 << (YUV_FIX + SFIX - 1))
170
171 #if defined(USE_GAMMA_COMPRESSION)
172
173 // We use tables of different size and precision for the Rec709 / BT2020
174 // transfer function.
175 #define kGammaF (1./0.45)
176 static uint32_t kLinearToGammaTabS[kGammaTabSize + 2];
177 #define GAMMA_TO_LINEAR_BITS 14
178 static uint32_t kGammaToLinearTabS[MAX_Y_T + 1]; // size scales with Y_FIX
179 static volatile int kGammaTablesSOk = 0;
180
InitGammaTablesS(void)181 static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesS(void) {
182 assert(2 * GAMMA_TO_LINEAR_BITS < 32); // we use uint32_t intermediate values
183 if (!kGammaTablesSOk) {
184 int v;
185 const double norm = 1. / MAX_Y_T;
186 const double scale = 1. / kGammaTabSize;
187 const double a = 0.09929682680944;
188 const double thresh = 0.018053968510807;
189 const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
190 for (v = 0; v <= MAX_Y_T; ++v) {
191 const double g = norm * v;
192 double value;
193 if (g <= thresh * 4.5) {
194 value = g / 4.5;
195 } else {
196 const double a_rec = 1. / (1. + a);
197 value = pow(a_rec * (g + a), kGammaF);
198 }
199 kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
200 }
201 for (v = 0; v <= kGammaTabSize; ++v) {
202 const double g = scale * v;
203 double value;
204 if (g <= thresh) {
205 value = 4.5 * g;
206 } else {
207 value = (1. + a) * pow(g, 1. / kGammaF) - a;
208 }
209 // we already incorporate the 1/2 rounding constant here
210 kLinearToGammaTabS[v] =
211 (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1);
212 }
213 // to prevent small rounding errors to cause read-overflow:
214 kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize];
215 kGammaTablesSOk = 1;
216 }
217 }
218
219 // return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS
GammaToLinearS(int v)220 static WEBP_INLINE uint32_t GammaToLinearS(int v) {
221 return kGammaToLinearTabS[v];
222 }
223
LinearToGammaS(uint32_t value)224 static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
225 // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision
226 const uint32_t v = value * kGammaTabSize;
227 const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS;
228 // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision
229 const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS); // fractional part
230 // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1])
231 const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0];
232 const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1];
233 // Final interpolation. Note that rounding is already included.
234 const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0.
235 const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS);
236 return result;
237 }
238
239 #else
240
InitGammaTablesS(void)241 static void InitGammaTablesS(void) {}
GammaToLinearS(int v)242 static WEBP_INLINE uint32_t GammaToLinearS(int v) {
243 return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T;
244 }
LinearToGammaS(uint32_t value)245 static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
246 return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS;
247 }
248
249 #endif // USE_GAMMA_COMPRESSION
250
251 //------------------------------------------------------------------------------
252
clip_8b(fixed_t v)253 static uint8_t clip_8b(fixed_t v) {
254 return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
255 }
256
clip_y(int y)257 static fixed_y_t clip_y(int y) {
258 return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
259 }
260
261 //------------------------------------------------------------------------------
262
RGBToGray(int r,int g,int b)263 static int RGBToGray(int r, int g, int b) {
264 const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF;
265 return (luma >> YUV_FIX);
266 }
267
ScaleDown(int a,int b,int c,int d)268 static uint32_t ScaleDown(int a, int b, int c, int d) {
269 const uint32_t A = GammaToLinearS(a);
270 const uint32_t B = GammaToLinearS(b);
271 const uint32_t C = GammaToLinearS(c);
272 const uint32_t D = GammaToLinearS(d);
273 return LinearToGammaS((A + B + C + D + 2) >> 2);
274 }
275
UpdateW(const fixed_y_t * src,fixed_y_t * dst,int w)276 static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) {
277 int i;
278 for (i = 0; i < w; ++i) {
279 const uint32_t R = GammaToLinearS(src[0 * w + i]);
280 const uint32_t G = GammaToLinearS(src[1 * w + i]);
281 const uint32_t B = GammaToLinearS(src[2 * w + i]);
282 const uint32_t Y = RGBToGray(R, G, B);
283 dst[i] = (fixed_y_t)LinearToGammaS(Y);
284 }
285 }
286
UpdateChroma(const fixed_y_t * src1,const fixed_y_t * src2,fixed_t * dst,int uv_w)287 static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
288 fixed_t* dst, int uv_w) {
289 int i;
290 for (i = 0; i < uv_w; ++i) {
291 const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1],
292 src2[0 * uv_w + 0], src2[0 * uv_w + 1]);
293 const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1],
294 src2[2 * uv_w + 0], src2[2 * uv_w + 1]);
295 const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1],
296 src2[4 * uv_w + 0], src2[4 * uv_w + 1]);
297 const int W = RGBToGray(r, g, b);
298 dst[0 * uv_w] = (fixed_t)(r - W);
299 dst[1 * uv_w] = (fixed_t)(g - W);
300 dst[2 * uv_w] = (fixed_t)(b - W);
301 dst += 1;
302 src1 += 2;
303 src2 += 2;
304 }
305 }
306
StoreGray(const fixed_y_t * rgb,fixed_y_t * y,int w)307 static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
308 int i;
309 for (i = 0; i < w; ++i) {
310 y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
311 }
312 }
313
314 //------------------------------------------------------------------------------
315
Filter2(int A,int B,int W0)316 static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) {
317 const int v0 = (A * 3 + B + 2) >> 2;
318 return clip_y(v0 + W0);
319 }
320
321 //------------------------------------------------------------------------------
322
UpLift(uint8_t a)323 static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX
324 return ((fixed_y_t)a << SFIX) | SHALF;
325 }
326
ImportOneRow(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,int step,int pic_width,fixed_y_t * const dst)327 static void ImportOneRow(const uint8_t* const r_ptr,
328 const uint8_t* const g_ptr,
329 const uint8_t* const b_ptr,
330 int step,
331 int pic_width,
332 fixed_y_t* const dst) {
333 int i;
334 const int w = (pic_width + 1) & ~1;
335 for (i = 0; i < pic_width; ++i) {
336 const int off = i * step;
337 dst[i + 0 * w] = UpLift(r_ptr[off]);
338 dst[i + 1 * w] = UpLift(g_ptr[off]);
339 dst[i + 2 * w] = UpLift(b_ptr[off]);
340 }
341 if (pic_width & 1) { // replicate rightmost pixel
342 dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
343 dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
344 dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
345 }
346 }
347
InterpolateTwoRows(const fixed_y_t * const best_y,const fixed_t * prev_uv,const fixed_t * cur_uv,const fixed_t * next_uv,int w,fixed_y_t * out1,fixed_y_t * out2)348 static void InterpolateTwoRows(const fixed_y_t* const best_y,
349 const fixed_t* prev_uv,
350 const fixed_t* cur_uv,
351 const fixed_t* next_uv,
352 int w,
353 fixed_y_t* out1,
354 fixed_y_t* out2) {
355 const int uv_w = w >> 1;
356 const int len = (w - 1) >> 1; // length to filter
357 int k = 3;
358 while (k-- > 0) { // process each R/G/B segments in turn
359 // special boundary case for i==0
360 out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]);
361 out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]);
362
363 WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1);
364 WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1);
365
366 // special boundary case for i == w - 1 when w is even
367 if (!(w & 1)) {
368 out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
369 best_y[w - 1 + 0]);
370 out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
371 best_y[w - 1 + w]);
372 }
373 out1 += w;
374 out2 += w;
375 prev_uv += uv_w;
376 cur_uv += uv_w;
377 next_uv += uv_w;
378 }
379 }
380
ConvertRGBToY(int r,int g,int b)381 static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
382 const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
383 return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
384 }
385
ConvertRGBToU(int r,int g,int b)386 static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
387 const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER;
388 return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
389 }
390
ConvertRGBToV(int r,int g,int b)391 static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
392 const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER;
393 return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
394 }
395
ConvertWRGBToYUV(const fixed_y_t * best_y,const fixed_t * best_uv,WebPPicture * const picture)396 static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
397 WebPPicture* const picture) {
398 int i, j;
399 uint8_t* dst_y = picture->y;
400 uint8_t* dst_u = picture->u;
401 uint8_t* dst_v = picture->v;
402 const fixed_t* const best_uv_base = best_uv;
403 const int w = (picture->width + 1) & ~1;
404 const int h = (picture->height + 1) & ~1;
405 const int uv_w = w >> 1;
406 const int uv_h = h >> 1;
407 for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) {
408 for (i = 0; i < picture->width; ++i) {
409 const int off = (i >> 1);
410 const int W = best_y[i];
411 const int r = best_uv[off + 0 * uv_w] + W;
412 const int g = best_uv[off + 1 * uv_w] + W;
413 const int b = best_uv[off + 2 * uv_w] + W;
414 dst_y[i] = ConvertRGBToY(r, g, b);
415 }
416 best_y += w;
417 best_uv += (j & 1) * 3 * uv_w;
418 dst_y += picture->y_stride;
419 }
420 for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
421 for (i = 0; i < uv_w; ++i) {
422 const int off = i;
423 const int r = best_uv[off + 0 * uv_w];
424 const int g = best_uv[off + 1 * uv_w];
425 const int b = best_uv[off + 2 * uv_w];
426 dst_u[i] = ConvertRGBToU(r, g, b);
427 dst_v[i] = ConvertRGBToV(r, g, b);
428 }
429 best_uv += 3 * uv_w;
430 dst_u += picture->uv_stride;
431 dst_v += picture->uv_stride;
432 }
433 return 1;
434 }
435
436 //------------------------------------------------------------------------------
437 // Main function
438
439 #define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
440
PreprocessARGB(const uint8_t * r_ptr,const uint8_t * g_ptr,const uint8_t * b_ptr,int step,int rgb_stride,WebPPicture * const picture)441 static int PreprocessARGB(const uint8_t* r_ptr,
442 const uint8_t* g_ptr,
443 const uint8_t* b_ptr,
444 int step, int rgb_stride,
445 WebPPicture* const picture) {
446 // we expand the right/bottom border if needed
447 const int w = (picture->width + 1) & ~1;
448 const int h = (picture->height + 1) & ~1;
449 const int uv_w = w >> 1;
450 const int uv_h = h >> 1;
451 uint64_t prev_diff_y_sum = ~0;
452 int j, iter;
453
454 // TODO(skal): allocate one big memory chunk. But for now, it's easier
455 // for valgrind debugging to have several chunks.
456 fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch
457 fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
458 fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
459 fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
460 fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
461 fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
462 fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
463 fixed_y_t* best_y = best_y_base;
464 fixed_y_t* target_y = target_y_base;
465 fixed_t* best_uv = best_uv_base;
466 fixed_t* target_uv = target_uv_base;
467 const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
468 int ok;
469
470 if (best_y_base == NULL || best_uv_base == NULL ||
471 target_y_base == NULL || target_uv_base == NULL ||
472 best_rgb_y == NULL || best_rgb_uv == NULL ||
473 tmp_buffer == NULL) {
474 ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
475 goto End;
476 }
477 assert(picture->width >= kMinDimensionIterativeConversion);
478 assert(picture->height >= kMinDimensionIterativeConversion);
479
480 WebPInitConvertARGBToYUV();
481
482 // Import RGB samples to W/RGB representation.
483 for (j = 0; j < picture->height; j += 2) {
484 const int is_last_row = (j == picture->height - 1);
485 fixed_y_t* const src1 = tmp_buffer + 0 * w;
486 fixed_y_t* const src2 = tmp_buffer + 3 * w;
487
488 // prepare two rows of input
489 ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1);
490 if (!is_last_row) {
491 ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
492 step, picture->width, src2);
493 } else {
494 memcpy(src2, src1, 3 * w * sizeof(*src2));
495 }
496 StoreGray(src1, best_y + 0, w);
497 StoreGray(src2, best_y + w, w);
498
499 UpdateW(src1, target_y, w);
500 UpdateW(src2, target_y + w, w);
501 UpdateChroma(src1, src2, target_uv, uv_w);
502 memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
503 best_y += 2 * w;
504 best_uv += 3 * uv_w;
505 target_y += 2 * w;
506 target_uv += 3 * uv_w;
507 r_ptr += 2 * rgb_stride;
508 g_ptr += 2 * rgb_stride;
509 b_ptr += 2 * rgb_stride;
510 }
511
512 // Iterate and resolve clipping conflicts.
513 for (iter = 0; iter < kNumIterations; ++iter) {
514 const fixed_t* cur_uv = best_uv_base;
515 const fixed_t* prev_uv = best_uv_base;
516 uint64_t diff_y_sum = 0;
517
518 best_y = best_y_base;
519 best_uv = best_uv_base;
520 target_y = target_y_base;
521 target_uv = target_uv_base;
522 for (j = 0; j < h; j += 2) {
523 fixed_y_t* const src1 = tmp_buffer + 0 * w;
524 fixed_y_t* const src2 = tmp_buffer + 3 * w;
525 {
526 const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
527 InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2);
528 prev_uv = cur_uv;
529 cur_uv = next_uv;
530 }
531
532 UpdateW(src1, best_rgb_y + 0 * w, w);
533 UpdateW(src2, best_rgb_y + 1 * w, w);
534 UpdateChroma(src1, src2, best_rgb_uv, uv_w);
535
536 // update two rows of Y and one row of RGB
537 diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w);
538 WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
539
540 best_y += 2 * w;
541 best_uv += 3 * uv_w;
542 target_y += 2 * w;
543 target_uv += 3 * uv_w;
544 }
545 // test exit condition
546 if (iter > 0) {
547 if (diff_y_sum < diff_y_threshold) break;
548 if (diff_y_sum > prev_diff_y_sum) break;
549 }
550 prev_diff_y_sum = diff_y_sum;
551 }
552 // final reconstruction
553 ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture);
554
555 End:
556 WebPSafeFree(best_y_base);
557 WebPSafeFree(best_uv_base);
558 WebPSafeFree(target_y_base);
559 WebPSafeFree(target_uv_base);
560 WebPSafeFree(best_rgb_y);
561 WebPSafeFree(best_rgb_uv);
562 WebPSafeFree(tmp_buffer);
563 return ok;
564 }
565 #undef SAFE_ALLOC
566
567 //------------------------------------------------------------------------------
568 // "Fast" regular RGB->YUV
569
570 #define SUM4(ptr, step) LinearToGamma( \
571 GammaToLinear((ptr)[0]) + \
572 GammaToLinear((ptr)[(step)]) + \
573 GammaToLinear((ptr)[rgb_stride]) + \
574 GammaToLinear((ptr)[rgb_stride + (step)]), 0) \
575
576 #define SUM2(ptr) \
577 LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
578
579 #define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
580 #define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
581
582 #if defined(USE_INVERSE_ALPHA_TABLE)
583
584 static const int kAlphaFix = 19;
585 // Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
586 // formula is then equal to v / a in most (99.6%) cases. Note that this table
587 // and constant are adjusted very tightly to fit 32b arithmetic.
588 // In particular, they use the fact that the operands for 'v / a' are actually
589 // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
590 // with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
591 // overflow is: kGammaFix + kAlphaFix <= 31.
592 static const uint32_t kInvAlpha[4 * 0xff + 1] = {
593 0, /* alpha = 0 */
594 524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
595 58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768,
596 30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845,
597 20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384,
598 15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
599 12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922,
600 10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362,
601 9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192,
602 8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281,
603 7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
604 6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957,
605 5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461,
606 5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041,
607 4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681,
608 4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
609 4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096,
610 4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855,
611 3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640,
612 3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449,
613 3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
614 3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120,
615 3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978,
616 2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849,
617 2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730,
618 2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
619 2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520,
620 2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427,
621 2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340,
622 2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259,
623 2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
624 2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114,
625 2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048,
626 2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985,
627 1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927,
628 1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
629 1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820,
630 1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771,
631 1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724,
632 1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680,
633 1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
634 1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598,
635 1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560,
636 1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524,
637 1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489,
638 1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
639 1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424,
640 1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394,
641 1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365,
642 1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337,
643 1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
644 1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285,
645 1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260,
646 1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236,
647 1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213,
648 1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
649 1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170,
650 1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149,
651 1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129,
652 1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110,
653 1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
654 1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074,
655 1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057,
656 1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040,
657 1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024,
658 1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
659 1006, 1004, 1002, 1000, 998, 996, 994, 992,
660 991, 989, 987, 985, 983, 981, 979, 978,
661 976, 974, 972, 970, 969, 967, 965, 963,
662 961, 960, 958, 956, 954, 953, 951, 949,
663 948, 946, 944, 942, 941, 939, 937, 936,
664 934, 932, 931, 929, 927, 926, 924, 923,
665 921, 919, 918, 916, 914, 913, 911, 910,
666 908, 907, 905, 903, 902, 900, 899, 897,
667 896, 894, 893, 891, 890, 888, 887, 885,
668 884, 882, 881, 879, 878, 876, 875, 873,
669 872, 870, 869, 868, 866, 865, 863, 862,
670 860, 859, 858, 856, 855, 853, 852, 851,
671 849, 848, 846, 845, 844, 842, 841, 840,
672 838, 837, 836, 834, 833, 832, 830, 829,
673 828, 826, 825, 824, 823, 821, 820, 819,
674 817, 816, 815, 814, 812, 811, 810, 809,
675 807, 806, 805, 804, 802, 801, 800, 799,
676 798, 796, 795, 794, 793, 791, 790, 789,
677 788, 787, 786, 784, 783, 782, 781, 780,
678 779, 777, 776, 775, 774, 773, 772, 771,
679 769, 768, 767, 766, 765, 764, 763, 762,
680 760, 759, 758, 757, 756, 755, 754, 753,
681 752, 751, 750, 748, 747, 746, 745, 744,
682 743, 742, 741, 740, 739, 738, 737, 736,
683 735, 734, 733, 732, 731, 730, 729, 728,
684 727, 726, 725, 724, 723, 722, 721, 720,
685 719, 718, 717, 716, 715, 714, 713, 712,
686 711, 710, 709, 708, 707, 706, 705, 704,
687 703, 702, 701, 700, 699, 699, 698, 697,
688 696, 695, 694, 693, 692, 691, 690, 689,
689 688, 688, 687, 686, 685, 684, 683, 682,
690 681, 680, 680, 679, 678, 677, 676, 675,
691 674, 673, 673, 672, 671, 670, 669, 668,
692 667, 667, 666, 665, 664, 663, 662, 661,
693 661, 660, 659, 658, 657, 657, 656, 655,
694 654, 653, 652, 652, 651, 650, 649, 648,
695 648, 647, 646, 645, 644, 644, 643, 642,
696 641, 640, 640, 639, 638, 637, 637, 636,
697 635, 634, 633, 633, 632, 631, 630, 630,
698 629, 628, 627, 627, 626, 625, 624, 624,
699 623, 622, 621, 621, 620, 619, 618, 618,
700 617, 616, 616, 615, 614, 613, 613, 612,
701 611, 611, 610, 609, 608, 608, 607, 606,
702 606, 605, 604, 604, 603, 602, 601, 601,
703 600, 599, 599, 598, 597, 597, 596, 595,
704 595, 594, 593, 593, 592, 591, 591, 590,
705 589, 589, 588, 587, 587, 586, 585, 585,
706 584, 583, 583, 582, 581, 581, 580, 579,
707 579, 578, 578, 577, 576, 576, 575, 574,
708 574, 573, 572, 572, 571, 571, 570, 569,
709 569, 568, 568, 567, 566, 566, 565, 564,
710 564, 563, 563, 562, 561, 561, 560, 560,
711 559, 558, 558, 557, 557, 556, 555, 555,
712 554, 554, 553, 553, 552, 551, 551, 550,
713 550, 549, 548, 548, 547, 547, 546, 546,
714 545, 544, 544, 543, 543, 542, 542, 541,
715 541, 540, 539, 539, 538, 538, 537, 537,
716 536, 536, 535, 534, 534, 533, 533, 532,
717 532, 531, 531, 530, 530, 529, 529, 528,
718 527, 527, 526, 526, 525, 525, 524, 524,
719 523, 523, 522, 522, 521, 521, 520, 520,
720 519, 519, 518, 518, 517, 517, 516, 516,
721 515, 515, 514, 514
722 };
723
724 // Note that LinearToGamma() expects the values to be premultiplied by 4,
725 // so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
726 #define DIVIDE_BY_ALPHA(sum, a) (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
727
728 #else
729
730 #define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
731
732 #endif // USE_INVERSE_ALPHA_TABLE
733
LinearToGammaWeighted(const uint8_t * src,const uint8_t * a_ptr,uint32_t total_a,int step,int rgb_stride)734 static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
735 const uint8_t* a_ptr,
736 uint32_t total_a, int step,
737 int rgb_stride) {
738 const uint32_t sum =
739 a_ptr[0] * GammaToLinear(src[0]) +
740 a_ptr[step] * GammaToLinear(src[step]) +
741 a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
742 a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
743 assert(total_a > 0 && total_a <= 4 * 0xff);
744 #if defined(USE_INVERSE_ALPHA_TABLE)
745 assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
746 #endif
747 return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
748 }
749
ConvertRowToY(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,int step,uint8_t * const dst_y,int width,VP8Random * const rg)750 static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
751 const uint8_t* const g_ptr,
752 const uint8_t* const b_ptr,
753 int step,
754 uint8_t* const dst_y,
755 int width,
756 VP8Random* const rg) {
757 int i, j;
758 for (i = 0, j = 0; i < width; i += 1, j += step) {
759 dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
760 }
761 }
762
AccumulateRGBA(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,const uint8_t * const a_ptr,int rgb_stride,uint16_t * dst,int width)763 static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr,
764 const uint8_t* const g_ptr,
765 const uint8_t* const b_ptr,
766 const uint8_t* const a_ptr,
767 int rgb_stride,
768 uint16_t* dst, int width) {
769 int i, j;
770 // we loop over 2x2 blocks and produce one R/G/B/A value for each.
771 for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) {
772 const uint32_t a = SUM4ALPHA(a_ptr + j);
773 int r, g, b;
774 if (a == 4 * 0xff || a == 0) {
775 r = SUM4(r_ptr + j, 4);
776 g = SUM4(g_ptr + j, 4);
777 b = SUM4(b_ptr + j, 4);
778 } else {
779 r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
780 g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
781 b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
782 }
783 dst[0] = r;
784 dst[1] = g;
785 dst[2] = b;
786 dst[3] = a;
787 }
788 if (width & 1) {
789 const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
790 int r, g, b;
791 if (a == 4 * 0xff || a == 0) {
792 r = SUM2(r_ptr + j);
793 g = SUM2(g_ptr + j);
794 b = SUM2(b_ptr + j);
795 } else {
796 r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
797 g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
798 b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
799 }
800 dst[0] = r;
801 dst[1] = g;
802 dst[2] = b;
803 dst[3] = a;
804 }
805 }
806
AccumulateRGB(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,int step,int rgb_stride,uint16_t * dst,int width)807 static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
808 const uint8_t* const g_ptr,
809 const uint8_t* const b_ptr,
810 int step, int rgb_stride,
811 uint16_t* dst, int width) {
812 int i, j;
813 for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) {
814 dst[0] = SUM4(r_ptr + j, step);
815 dst[1] = SUM4(g_ptr + j, step);
816 dst[2] = SUM4(b_ptr + j, step);
817 }
818 if (width & 1) {
819 dst[0] = SUM2(r_ptr + j);
820 dst[1] = SUM2(g_ptr + j);
821 dst[2] = SUM2(b_ptr + j);
822 }
823 }
824
ConvertRowsToUV(const uint16_t * rgb,uint8_t * const dst_u,uint8_t * const dst_v,int width,VP8Random * const rg)825 static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
826 uint8_t* const dst_u,
827 uint8_t* const dst_v,
828 int width,
829 VP8Random* const rg) {
830 int i;
831 for (i = 0; i < width; i += 1, rgb += 4) {
832 const int r = rgb[0], g = rgb[1], b = rgb[2];
833 dst_u[i] = RGBToU(r, g, b, rg);
834 dst_v[i] = RGBToV(r, g, b, rg);
835 }
836 }
837
ImportYUVAFromRGBA(const uint8_t * r_ptr,const uint8_t * g_ptr,const uint8_t * b_ptr,const uint8_t * a_ptr,int step,int rgb_stride,float dithering,int use_iterative_conversion,WebPPicture * const picture)838 static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
839 const uint8_t* g_ptr,
840 const uint8_t* b_ptr,
841 const uint8_t* a_ptr,
842 int step, // bytes per pixel
843 int rgb_stride, // bytes per scanline
844 float dithering,
845 int use_iterative_conversion,
846 WebPPicture* const picture) {
847 int y;
848 const int width = picture->width;
849 const int height = picture->height;
850 const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
851 const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr
852
853 picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
854 picture->use_argb = 0;
855
856 // disable smart conversion if source is too small (overkill).
857 if (width < kMinDimensionIterativeConversion ||
858 height < kMinDimensionIterativeConversion) {
859 use_iterative_conversion = 0;
860 }
861
862 if (!WebPPictureAllocYUVA(picture, width, height)) {
863 return 0;
864 }
865 if (has_alpha) {
866 assert(step == 4);
867 #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
868 assert(kAlphaFix + kGammaFix <= 31);
869 #endif
870 }
871
872 if (use_iterative_conversion) {
873 InitGammaTablesS();
874 if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
875 return 0;
876 }
877 if (has_alpha) {
878 WebPExtractAlpha(a_ptr, rgb_stride, width, height,
879 picture->a, picture->a_stride);
880 }
881 } else {
882 const int uv_width = (width + 1) >> 1;
883 int use_dsp = (step == 3); // use special function in this case
884 // temporary storage for accumulated R/G/B values during conversion to U/V
885 uint16_t* const tmp_rgb =
886 (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb));
887 uint8_t* dst_y = picture->y;
888 uint8_t* dst_u = picture->u;
889 uint8_t* dst_v = picture->v;
890 uint8_t* dst_a = picture->a;
891
892 VP8Random base_rg;
893 VP8Random* rg = NULL;
894 if (dithering > 0.) {
895 VP8InitRandom(&base_rg, dithering);
896 rg = &base_rg;
897 use_dsp = 0; // can't use dsp in this case
898 }
899 WebPInitConvertARGBToYUV();
900 InitGammaTables();
901
902 if (tmp_rgb == NULL) return 0; // malloc error
903
904 // Downsample Y/U/V planes, two rows at a time
905 for (y = 0; y < (height >> 1); ++y) {
906 int rows_have_alpha = has_alpha;
907 if (use_dsp) {
908 if (is_rgb) {
909 WebPConvertRGB24ToY(r_ptr, dst_y, width);
910 WebPConvertRGB24ToY(r_ptr + rgb_stride,
911 dst_y + picture->y_stride, width);
912 } else {
913 WebPConvertBGR24ToY(b_ptr, dst_y, width);
914 WebPConvertBGR24ToY(b_ptr + rgb_stride,
915 dst_y + picture->y_stride, width);
916 }
917 } else {
918 ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
919 ConvertRowToY(r_ptr + rgb_stride,
920 g_ptr + rgb_stride,
921 b_ptr + rgb_stride, step,
922 dst_y + picture->y_stride, width, rg);
923 }
924 dst_y += 2 * picture->y_stride;
925 if (has_alpha) {
926 rows_have_alpha &= !WebPExtractAlpha(a_ptr, rgb_stride, width, 2,
927 dst_a, picture->a_stride);
928 dst_a += 2 * picture->a_stride;
929 }
930 // Collect averaged R/G/B(/A)
931 if (!rows_have_alpha) {
932 AccumulateRGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, tmp_rgb, width);
933 } else {
934 AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, rgb_stride, tmp_rgb, width);
935 }
936 // Convert to U/V
937 if (rg == NULL) {
938 WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
939 } else {
940 ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
941 }
942 dst_u += picture->uv_stride;
943 dst_v += picture->uv_stride;
944 r_ptr += 2 * rgb_stride;
945 b_ptr += 2 * rgb_stride;
946 g_ptr += 2 * rgb_stride;
947 if (has_alpha) a_ptr += 2 * rgb_stride;
948 }
949 if (height & 1) { // extra last row
950 int row_has_alpha = has_alpha;
951 if (use_dsp) {
952 if (r_ptr < b_ptr) {
953 WebPConvertRGB24ToY(r_ptr, dst_y, width);
954 } else {
955 WebPConvertBGR24ToY(b_ptr, dst_y, width);
956 }
957 } else {
958 ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
959 }
960 if (row_has_alpha) {
961 row_has_alpha &= !WebPExtractAlpha(a_ptr, 0, width, 1, dst_a, 0);
962 }
963 // Collect averaged R/G/B(/A)
964 if (!row_has_alpha) {
965 // Collect averaged R/G/B
966 AccumulateRGB(r_ptr, g_ptr, b_ptr, step, /* rgb_stride = */ 0,
967 tmp_rgb, width);
968 } else {
969 AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, /* rgb_stride = */ 0,
970 tmp_rgb, width);
971 }
972 if (rg == NULL) {
973 WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
974 } else {
975 ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
976 }
977 }
978 WebPSafeFree(tmp_rgb);
979 }
980 return 1;
981 }
982
983 #undef SUM4
984 #undef SUM2
985 #undef SUM4ALPHA
986 #undef SUM2ALPHA
987
988 //------------------------------------------------------------------------------
989 // call for ARGB->YUVA conversion
990
PictureARGBToYUVA(WebPPicture * picture,WebPEncCSP colorspace,float dithering,int use_iterative_conversion)991 static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace,
992 float dithering, int use_iterative_conversion) {
993 if (picture == NULL) return 0;
994 if (picture->argb == NULL) {
995 return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
996 } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
997 return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
998 } else {
999 const uint8_t* const argb = (const uint8_t*)picture->argb;
1000 const uint8_t* const a = argb + (0 ^ ALPHA_OFFSET);
1001 const uint8_t* const r = argb + (1 ^ ALPHA_OFFSET);
1002 const uint8_t* const g = argb + (2 ^ ALPHA_OFFSET);
1003 const uint8_t* const b = argb + (3 ^ ALPHA_OFFSET);
1004
1005 picture->colorspace = WEBP_YUV420;
1006 return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride,
1007 dithering, use_iterative_conversion, picture);
1008 }
1009 }
1010
WebPPictureARGBToYUVADithered(WebPPicture * picture,WebPEncCSP colorspace,float dithering)1011 int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace,
1012 float dithering) {
1013 return PictureARGBToYUVA(picture, colorspace, dithering, 0);
1014 }
1015
WebPPictureARGBToYUVA(WebPPicture * picture,WebPEncCSP colorspace)1016 int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
1017 return PictureARGBToYUVA(picture, colorspace, 0.f, 0);
1018 }
1019
WebPPictureSharpARGBToYUVA(WebPPicture * picture)1020 int WebPPictureSharpARGBToYUVA(WebPPicture* picture) {
1021 return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1);
1022 }
1023 // for backward compatibility
WebPPictureSmartARGBToYUVA(WebPPicture * picture)1024 int WebPPictureSmartARGBToYUVA(WebPPicture* picture) {
1025 return WebPPictureSharpARGBToYUVA(picture);
1026 }
1027
1028 //------------------------------------------------------------------------------
1029 // call for YUVA -> ARGB conversion
1030
WebPPictureYUVAToARGB(WebPPicture * picture)1031 int WebPPictureYUVAToARGB(WebPPicture* picture) {
1032 if (picture == NULL) return 0;
1033 if (picture->y == NULL || picture->u == NULL || picture->v == NULL) {
1034 return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
1035 }
1036 if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
1037 return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
1038 }
1039 if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
1040 return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
1041 }
1042 // Allocate a new argb buffer (discarding the previous one).
1043 if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
1044 picture->use_argb = 1;
1045
1046 // Convert
1047 {
1048 int y;
1049 const int width = picture->width;
1050 const int height = picture->height;
1051 const int argb_stride = 4 * picture->argb_stride;
1052 uint8_t* dst = (uint8_t*)picture->argb;
1053 const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
1054 WebPUpsampleLinePairFunc upsample =
1055 WebPGetLinePairConverter(ALPHA_OFFSET > 0);
1056
1057 // First row, with replicated top samples.
1058 upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
1059 cur_y += picture->y_stride;
1060 dst += argb_stride;
1061 // Center rows.
1062 for (y = 1; y + 1 < height; y += 2) {
1063 const uint8_t* const top_u = cur_u;
1064 const uint8_t* const top_v = cur_v;
1065 cur_u += picture->uv_stride;
1066 cur_v += picture->uv_stride;
1067 upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v,
1068 dst, dst + argb_stride, width);
1069 cur_y += 2 * picture->y_stride;
1070 dst += 2 * argb_stride;
1071 }
1072 // Last row (if needed), with replicated bottom samples.
1073 if (height > 1 && !(height & 1)) {
1074 upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
1075 }
1076 // Insert alpha values if needed, in replacement for the default 0xff ones.
1077 if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
1078 for (y = 0; y < height; ++y) {
1079 uint32_t* const argb_dst = picture->argb + y * picture->argb_stride;
1080 const uint8_t* const src = picture->a + y * picture->a_stride;
1081 int x;
1082 for (x = 0; x < width; ++x) {
1083 argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24);
1084 }
1085 }
1086 }
1087 }
1088 return 1;
1089 }
1090
1091 //------------------------------------------------------------------------------
1092 // automatic import / conversion
1093
Import(WebPPicture * const picture,const uint8_t * rgb,int rgb_stride,int step,int swap_rb,int import_alpha)1094 static int Import(WebPPicture* const picture,
1095 const uint8_t* rgb, int rgb_stride,
1096 int step, int swap_rb, int import_alpha) {
1097 int y;
1098 // swap_rb -> b,g,r,a , !swap_rb -> r,g,b,a
1099 const uint8_t* r_ptr = rgb + (swap_rb ? 2 : 0);
1100 const uint8_t* g_ptr = rgb + 1;
1101 const uint8_t* b_ptr = rgb + (swap_rb ? 0 : 2);
1102 const int width = picture->width;
1103 const int height = picture->height;
1104
1105 if (!picture->use_argb) {
1106 const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL;
1107 return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
1108 0.f /* no dithering */, 0, picture);
1109 }
1110 if (!WebPPictureAlloc(picture)) return 0;
1111
1112 VP8LDspInit();
1113 WebPInitAlphaProcessing();
1114
1115 if (import_alpha) {
1116 // dst[] byte order is {a,r,g,b} for big-endian, {b,g,r,a} for little endian
1117 uint32_t* dst = picture->argb;
1118 const int do_copy = (ALPHA_OFFSET == 3) && swap_rb;
1119 assert(step == 4);
1120 if (do_copy) {
1121 for (y = 0; y < height; ++y) {
1122 memcpy(dst, rgb, width * 4);
1123 rgb += rgb_stride;
1124 dst += picture->argb_stride;
1125 }
1126 } else {
1127 for (y = 0; y < height; ++y) {
1128 #ifdef WORDS_BIGENDIAN
1129 // BGRA or RGBA input order.
1130 const uint8_t* a_ptr = rgb + 3;
1131 WebPPackARGB(a_ptr, r_ptr, g_ptr, b_ptr, width, dst);
1132 r_ptr += rgb_stride;
1133 g_ptr += rgb_stride;
1134 b_ptr += rgb_stride;
1135 #else
1136 // RGBA input order. Need to swap R and B.
1137 VP8LConvertBGRAToRGBA((const uint32_t*)rgb, width, (uint8_t*)dst);
1138 #endif
1139 rgb += rgb_stride;
1140 dst += picture->argb_stride;
1141 }
1142 }
1143 } else {
1144 uint32_t* dst = picture->argb;
1145 assert(step >= 3);
1146 for (y = 0; y < height; ++y) {
1147 WebPPackRGB(r_ptr, g_ptr, b_ptr, width, step, dst);
1148 r_ptr += rgb_stride;
1149 g_ptr += rgb_stride;
1150 b_ptr += rgb_stride;
1151 dst += picture->argb_stride;
1152 }
1153 }
1154 return 1;
1155 }
1156
1157 // Public API
1158
1159 #if !defined(WEBP_REDUCE_CSP)
1160
WebPPictureImportBGR(WebPPicture * picture,const uint8_t * rgb,int rgb_stride)1161 int WebPPictureImportBGR(WebPPicture* picture,
1162 const uint8_t* rgb, int rgb_stride) {
1163 return (picture != NULL && rgb != NULL)
1164 ? Import(picture, rgb, rgb_stride, 3, 1, 0)
1165 : 0;
1166 }
1167
WebPPictureImportBGRA(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1168 int WebPPictureImportBGRA(WebPPicture* picture,
1169 const uint8_t* rgba, int rgba_stride) {
1170 return (picture != NULL && rgba != NULL)
1171 ? Import(picture, rgba, rgba_stride, 4, 1, 1)
1172 : 0;
1173 }
1174
1175
WebPPictureImportBGRX(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1176 int WebPPictureImportBGRX(WebPPicture* picture,
1177 const uint8_t* rgba, int rgba_stride) {
1178 return (picture != NULL && rgba != NULL)
1179 ? Import(picture, rgba, rgba_stride, 4, 1, 0)
1180 : 0;
1181 }
1182
1183 #endif // WEBP_REDUCE_CSP
1184
WebPPictureImportRGB(WebPPicture * picture,const uint8_t * rgb,int rgb_stride)1185 int WebPPictureImportRGB(WebPPicture* picture,
1186 const uint8_t* rgb, int rgb_stride) {
1187 return (picture != NULL && rgb != NULL)
1188 ? Import(picture, rgb, rgb_stride, 3, 0, 0)
1189 : 0;
1190 }
1191
WebPPictureImportRGBA(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1192 int WebPPictureImportRGBA(WebPPicture* picture,
1193 const uint8_t* rgba, int rgba_stride) {
1194 return (picture != NULL && rgba != NULL)
1195 ? Import(picture, rgba, rgba_stride, 4, 0, 1)
1196 : 0;
1197 }
1198
WebPPictureImportRGBX(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1199 int WebPPictureImportRGBX(WebPPicture* picture,
1200 const uint8_t* rgba, int rgba_stride) {
1201 return (picture != NULL && rgba != NULL)
1202 ? Import(picture, rgba, rgba_stride, 4, 0, 0)
1203 : 0;
1204 }
1205
1206 //------------------------------------------------------------------------------
1207