• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Image transforms and color space conversion methods for lossless decoder.
11 //
12 // Authors: Vikas Arora (vikaas.arora@gmail.com)
13 //          Jyrki Alakuijala (jyrki@google.com)
14 //          Urvang Joshi (urvang@google.com)
15 
16 #include "./dsp.h"
17 
18 #include <math.h>
19 #include <stdlib.h>
20 #include "../dec/vp8li_dec.h"
21 #include "../utils/endian_inl_utils.h"
22 #include "./lossless.h"
23 #include "./lossless_common.h"
24 
25 #define MAX_DIFF_COST (1e30f)
26 
27 //------------------------------------------------------------------------------
28 // Image transforms.
29 
Average2(uint32_t a0,uint32_t a1)30 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
31   return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
32 }
33 
Average3(uint32_t a0,uint32_t a1,uint32_t a2)34 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
35   return Average2(Average2(a0, a2), a1);
36 }
37 
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)38 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
39                                      uint32_t a2, uint32_t a3) {
40   return Average2(Average2(a0, a1), Average2(a2, a3));
41 }
42 
Clip255(uint32_t a)43 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
44   if (a < 256) {
45     return a;
46   }
47   // return 0, when a is a negative integer.
48   // return 255, when a is positive.
49   return ~a >> 24;
50 }
51 
AddSubtractComponentFull(int a,int b,int c)52 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
53   return Clip255(a + b - c);
54 }
55 
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)56 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
57                                                    uint32_t c2) {
58   const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
59   const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
60                                          (c1 >> 16) & 0xff,
61                                          (c2 >> 16) & 0xff);
62   const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
63                                          (c1 >> 8) & 0xff,
64                                          (c2 >> 8) & 0xff);
65   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
66   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
67 }
68 
AddSubtractComponentHalf(int a,int b)69 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
70   return Clip255(a + (a - b) / 2);
71 }
72 
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)73 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
74                                                    uint32_t c2) {
75   const uint32_t ave = Average2(c0, c1);
76   const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
77   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
78   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
79   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
80   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
81 }
82 
83 // gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
84 #if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
85 # define LOCAL_INLINE __attribute__ ((noinline))
86 #else
87 # define LOCAL_INLINE WEBP_INLINE
88 #endif
89 
Sub3(int a,int b,int c)90 static LOCAL_INLINE int Sub3(int a, int b, int c) {
91   const int pb = b - c;
92   const int pa = a - c;
93   return abs(pb) - abs(pa);
94 }
95 
96 #undef LOCAL_INLINE
97 
Select(uint32_t a,uint32_t b,uint32_t c)98 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
99   const int pa_minus_pb =
100       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
101       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
102       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
103       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
104   return (pa_minus_pb <= 0) ? a : b;
105 }
106 
107 //------------------------------------------------------------------------------
108 // Predictors
109 
Predictor0(uint32_t left,const uint32_t * const top)110 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
111   (void)top;
112   (void)left;
113   return ARGB_BLACK;
114 }
Predictor1(uint32_t left,const uint32_t * const top)115 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
116   (void)top;
117   return left;
118 }
Predictor2(uint32_t left,const uint32_t * const top)119 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
120   (void)left;
121   return top[0];
122 }
Predictor3(uint32_t left,const uint32_t * const top)123 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
124   (void)left;
125   return top[1];
126 }
Predictor4(uint32_t left,const uint32_t * const top)127 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
128   (void)left;
129   return top[-1];
130 }
Predictor5(uint32_t left,const uint32_t * const top)131 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
132   const uint32_t pred = Average3(left, top[0], top[1]);
133   return pred;
134 }
Predictor6(uint32_t left,const uint32_t * const top)135 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
136   const uint32_t pred = Average2(left, top[-1]);
137   return pred;
138 }
Predictor7(uint32_t left,const uint32_t * const top)139 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
140   const uint32_t pred = Average2(left, top[0]);
141   return pred;
142 }
Predictor8(uint32_t left,const uint32_t * const top)143 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
144   const uint32_t pred = Average2(top[-1], top[0]);
145   (void)left;
146   return pred;
147 }
Predictor9(uint32_t left,const uint32_t * const top)148 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
149   const uint32_t pred = Average2(top[0], top[1]);
150   (void)left;
151   return pred;
152 }
Predictor10(uint32_t left,const uint32_t * const top)153 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
154   const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
155   return pred;
156 }
Predictor11(uint32_t left,const uint32_t * const top)157 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
158   const uint32_t pred = Select(top[0], left, top[-1]);
159   return pred;
160 }
Predictor12(uint32_t left,const uint32_t * const top)161 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
162   const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
163   return pred;
164 }
Predictor13(uint32_t left,const uint32_t * const top)165 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
166   const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
167   return pred;
168 }
169 
GENERATE_PREDICTOR_ADD(Predictor0,PredictorAdd0)170 GENERATE_PREDICTOR_ADD(Predictor0, PredictorAdd0)
171 static void PredictorAdd1(const uint32_t* in, const uint32_t* upper,
172                           int num_pixels, uint32_t* out) {
173   int i;
174   uint32_t left = out[-1];
175   for (i = 0; i < num_pixels; ++i) {
176     out[i] = left = VP8LAddPixels(in[i], left);
177   }
178   (void)upper;
179 }
GENERATE_PREDICTOR_ADD(Predictor2,PredictorAdd2)180 GENERATE_PREDICTOR_ADD(Predictor2, PredictorAdd2)
181 GENERATE_PREDICTOR_ADD(Predictor3, PredictorAdd3)
182 GENERATE_PREDICTOR_ADD(Predictor4, PredictorAdd4)
183 GENERATE_PREDICTOR_ADD(Predictor5, PredictorAdd5)
184 GENERATE_PREDICTOR_ADD(Predictor6, PredictorAdd6)
185 GENERATE_PREDICTOR_ADD(Predictor7, PredictorAdd7)
186 GENERATE_PREDICTOR_ADD(Predictor8, PredictorAdd8)
187 GENERATE_PREDICTOR_ADD(Predictor9, PredictorAdd9)
188 GENERATE_PREDICTOR_ADD(Predictor10, PredictorAdd10)
189 GENERATE_PREDICTOR_ADD(Predictor11, PredictorAdd11)
190 GENERATE_PREDICTOR_ADD(Predictor12, PredictorAdd12)
191 GENERATE_PREDICTOR_ADD(Predictor13, PredictorAdd13)
192 
193 //------------------------------------------------------------------------------
194 
195 // Inverse prediction.
196 static void PredictorInverseTransform(const VP8LTransform* const transform,
197                                       int y_start, int y_end,
198                                       const uint32_t* in, uint32_t* out) {
199   const int width = transform->xsize_;
200   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
201     PredictorAdd0(in, NULL, 1, out);
202     PredictorAdd1(in + 1, NULL, width - 1, out + 1);
203     in += width;
204     out += width;
205     ++y_start;
206   }
207 
208   {
209     int y = y_start;
210     const int tile_width = 1 << transform->bits_;
211     const int mask = tile_width - 1;
212     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
213     const uint32_t* pred_mode_base =
214         transform->data_ + (y >> transform->bits_) * tiles_per_row;
215 
216     while (y < y_end) {
217       const uint32_t* pred_mode_src = pred_mode_base;
218       int x = 1;
219       // First pixel follows the T (mode=2) mode.
220       PredictorAdd2(in, out - width, 1, out);
221       // .. the rest:
222       while (x < width) {
223         const VP8LPredictorAddSubFunc pred_func =
224             VP8LPredictorsAdd[((*pred_mode_src++) >> 8) & 0xf];
225         int x_end = (x & ~mask) + tile_width;
226         if (x_end > width) x_end = width;
227         pred_func(in + x, out + x - width, x_end - x, out + x);
228         x = x_end;
229       }
230       in += width;
231       out += width;
232       ++y;
233       if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
234         pred_mode_base += tiles_per_row;
235       }
236     }
237   }
238 }
239 
240 // Add green to blue and red channels (i.e. perform the inverse transform of
241 // 'subtract green').
VP8LAddGreenToBlueAndRed_C(const uint32_t * src,int num_pixels,uint32_t * dst)242 void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
243                                 uint32_t* dst) {
244   int i;
245   for (i = 0; i < num_pixels; ++i) {
246     const uint32_t argb = src[i];
247     const uint32_t green = ((argb >> 8) & 0xff);
248     uint32_t red_blue = (argb & 0x00ff00ffu);
249     red_blue += (green << 16) | green;
250     red_blue &= 0x00ff00ffu;
251     dst[i] = (argb & 0xff00ff00u) | red_blue;
252   }
253 }
254 
ColorTransformDelta(int8_t color_pred,int8_t color)255 static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,
256                                            int8_t color) {
257   return ((int)color_pred * color) >> 5;
258 }
259 
ColorCodeToMultipliers(uint32_t color_code,VP8LMultipliers * const m)260 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
261                                                VP8LMultipliers* const m) {
262   m->green_to_red_  = (color_code >>  0) & 0xff;
263   m->green_to_blue_ = (color_code >>  8) & 0xff;
264   m->red_to_blue_   = (color_code >> 16) & 0xff;
265 }
266 
VP8LTransformColorInverse_C(const VP8LMultipliers * const m,const uint32_t * src,int num_pixels,uint32_t * dst)267 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
268                                  const uint32_t* src, int num_pixels,
269                                  uint32_t* dst) {
270   int i;
271   for (i = 0; i < num_pixels; ++i) {
272     const uint32_t argb = src[i];
273     const uint32_t green = argb >> 8;
274     const uint32_t red = argb >> 16;
275     int new_red = red;
276     int new_blue = argb;
277     new_red += ColorTransformDelta(m->green_to_red_, green);
278     new_red &= 0xff;
279     new_blue += ColorTransformDelta(m->green_to_blue_, green);
280     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
281     new_blue &= 0xff;
282     dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
283   }
284 }
285 
286 // Color space inverse transform.
ColorSpaceInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,const uint32_t * src,uint32_t * dst)287 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
288                                        int y_start, int y_end,
289                                        const uint32_t* src, uint32_t* dst) {
290   const int width = transform->xsize_;
291   const int tile_width = 1 << transform->bits_;
292   const int mask = tile_width - 1;
293   const int safe_width = width & ~mask;
294   const int remaining_width = width - safe_width;
295   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
296   int y = y_start;
297   const uint32_t* pred_row =
298       transform->data_ + (y >> transform->bits_) * tiles_per_row;
299 
300   while (y < y_end) {
301     const uint32_t* pred = pred_row;
302     VP8LMultipliers m = { 0, 0, 0 };
303     const uint32_t* const src_safe_end = src + safe_width;
304     const uint32_t* const src_end = src + width;
305     while (src < src_safe_end) {
306       ColorCodeToMultipliers(*pred++, &m);
307       VP8LTransformColorInverse(&m, src, tile_width, dst);
308       src += tile_width;
309       dst += tile_width;
310     }
311     if (src < src_end) {  // Left-overs using C-version.
312       ColorCodeToMultipliers(*pred++, &m);
313       VP8LTransformColorInverse(&m, src, remaining_width, dst);
314       src += remaining_width;
315       dst += remaining_width;
316     }
317     ++y;
318     if ((y & mask) == 0) pred_row += tiles_per_row;
319   }
320 }
321 
322 // Separate out pixels packed together using pixel-bundling.
323 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
324 #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
325                             GET_INDEX, GET_VALUE)                              \
326 static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
327                    TYPE* dst, int y_start, int y_end, int width) {             \
328   int y;                                                                       \
329   for (y = y_start; y < y_end; ++y) {                                          \
330     int x;                                                                     \
331     for (x = 0; x < width; ++x) {                                              \
332       *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
333     }                                                                          \
334   }                                                                            \
335 }                                                                              \
336 STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
337                            int y_start, int y_end, const TYPE* src,            \
338                            TYPE* dst) {                                        \
339   int y;                                                                       \
340   const int bits_per_pixel = 8 >> transform->bits_;                            \
341   const int width = transform->xsize_;                                         \
342   const uint32_t* const color_map = transform->data_;                          \
343   if (bits_per_pixel < 8) {                                                    \
344     const int pixels_per_byte = 1 << transform->bits_;                         \
345     const int count_mask = pixels_per_byte - 1;                                \
346     const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
347     for (y = y_start; y < y_end; ++y) {                                        \
348       uint32_t packed_pixels = 0;                                              \
349       int x;                                                                   \
350       for (x = 0; x < width; ++x) {                                            \
351         /* We need to load fresh 'packed_pixels' once every                */  \
352         /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
353         /* is a power of 2, so can just use a mask for that, instead of    */  \
354         /* decrementing a counter.                                         */  \
355         if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
356         *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
357         packed_pixels >>= bits_per_pixel;                                      \
358       }                                                                        \
359     }                                                                          \
360   } else {                                                                     \
361     VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
362   }                                                                            \
363 }
364 
365 COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b,
366                     VP8GetARGBIndex, VP8GetARGBValue)
367 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t,
368                     8b, VP8GetAlphaIndex, VP8GetAlphaValue)
369 
370 #undef COLOR_INDEX_INVERSE
371 
VP8LInverseTransform(const VP8LTransform * const transform,int row_start,int row_end,const uint32_t * const in,uint32_t * const out)372 void VP8LInverseTransform(const VP8LTransform* const transform,
373                           int row_start, int row_end,
374                           const uint32_t* const in, uint32_t* const out) {
375   const int width = transform->xsize_;
376   assert(row_start < row_end);
377   assert(row_end <= transform->ysize_);
378   switch (transform->type_) {
379     case SUBTRACT_GREEN:
380       VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
381       break;
382     case PREDICTOR_TRANSFORM:
383       PredictorInverseTransform(transform, row_start, row_end, in, out);
384       if (row_end != transform->ysize_) {
385         // The last predicted row in this iteration will be the top-pred row
386         // for the first row in next iteration.
387         memcpy(out - width, out + (row_end - row_start - 1) * width,
388                width * sizeof(*out));
389       }
390       break;
391     case CROSS_COLOR_TRANSFORM:
392       ColorSpaceInverseTransform(transform, row_start, row_end, in, out);
393       break;
394     case COLOR_INDEXING_TRANSFORM:
395       if (in == out && transform->bits_ > 0) {
396         // Move packed pixels to the end of unpacked region, so that unpacking
397         // can occur seamlessly.
398         // Also, note that this is the only transform that applies on
399         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
400         // transforms work on effective width of xsize_.
401         const int out_stride = (row_end - row_start) * width;
402         const int in_stride = (row_end - row_start) *
403             VP8LSubSampleSize(transform->xsize_, transform->bits_);
404         uint32_t* const src = out + out_stride - in_stride;
405         memmove(src, out, in_stride * sizeof(*src));
406         ColorIndexInverseTransform(transform, row_start, row_end, src, out);
407       } else {
408         ColorIndexInverseTransform(transform, row_start, row_end, in, out);
409       }
410       break;
411   }
412 }
413 
414 //------------------------------------------------------------------------------
415 // Color space conversion.
416 
is_big_endian(void)417 static int is_big_endian(void) {
418   static const union {
419     uint16_t w;
420     uint8_t b[2];
421   } tmp = { 1 };
422   return (tmp.b[0] != 1);
423 }
424 
VP8LConvertBGRAToRGB_C(const uint32_t * src,int num_pixels,uint8_t * dst)425 void VP8LConvertBGRAToRGB_C(const uint32_t* src,
426                             int num_pixels, uint8_t* dst) {
427   const uint32_t* const src_end = src + num_pixels;
428   while (src < src_end) {
429     const uint32_t argb = *src++;
430     *dst++ = (argb >> 16) & 0xff;
431     *dst++ = (argb >>  8) & 0xff;
432     *dst++ = (argb >>  0) & 0xff;
433   }
434 }
435 
VP8LConvertBGRAToRGBA_C(const uint32_t * src,int num_pixels,uint8_t * dst)436 void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
437                              int num_pixels, uint8_t* dst) {
438   const uint32_t* const src_end = src + num_pixels;
439   while (src < src_end) {
440     const uint32_t argb = *src++;
441     *dst++ = (argb >> 16) & 0xff;
442     *dst++ = (argb >>  8) & 0xff;
443     *dst++ = (argb >>  0) & 0xff;
444     *dst++ = (argb >> 24) & 0xff;
445   }
446 }
447 
VP8LConvertBGRAToRGBA4444_C(const uint32_t * src,int num_pixels,uint8_t * dst)448 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
449                                  int num_pixels, uint8_t* dst) {
450   const uint32_t* const src_end = src + num_pixels;
451   while (src < src_end) {
452     const uint32_t argb = *src++;
453     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
454     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
455 #ifdef WEBP_SWAP_16BIT_CSP
456     *dst++ = ba;
457     *dst++ = rg;
458 #else
459     *dst++ = rg;
460     *dst++ = ba;
461 #endif
462   }
463 }
464 
VP8LConvertBGRAToRGB565_C(const uint32_t * src,int num_pixels,uint8_t * dst)465 void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
466                                int num_pixels, uint8_t* dst) {
467   const uint32_t* const src_end = src + num_pixels;
468   while (src < src_end) {
469     const uint32_t argb = *src++;
470     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
471     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
472 #ifdef WEBP_SWAP_16BIT_CSP
473     *dst++ = gb;
474     *dst++ = rg;
475 #else
476     *dst++ = rg;
477     *dst++ = gb;
478 #endif
479   }
480 }
481 
VP8LConvertBGRAToBGR_C(const uint32_t * src,int num_pixels,uint8_t * dst)482 void VP8LConvertBGRAToBGR_C(const uint32_t* src,
483                             int num_pixels, uint8_t* dst) {
484   const uint32_t* const src_end = src + num_pixels;
485   while (src < src_end) {
486     const uint32_t argb = *src++;
487     *dst++ = (argb >>  0) & 0xff;
488     *dst++ = (argb >>  8) & 0xff;
489     *dst++ = (argb >> 16) & 0xff;
490   }
491 }
492 
CopyOrSwap(const uint32_t * src,int num_pixels,uint8_t * dst,int swap_on_big_endian)493 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
494                        int swap_on_big_endian) {
495   if (is_big_endian() == swap_on_big_endian) {
496     const uint32_t* const src_end = src + num_pixels;
497     while (src < src_end) {
498       const uint32_t argb = *src++;
499 
500 #if !defined(WORDS_BIGENDIAN)
501 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
502       WebPUint32ToMem(dst, BSwap32(argb));
503 #else  // WEBP_REFERENCE_IMPLEMENTATION
504       dst[0] = (argb >> 24) & 0xff;
505       dst[1] = (argb >> 16) & 0xff;
506       dst[2] = (argb >>  8) & 0xff;
507       dst[3] = (argb >>  0) & 0xff;
508 #endif
509 #else  // WORDS_BIGENDIAN
510       dst[0] = (argb >>  0) & 0xff;
511       dst[1] = (argb >>  8) & 0xff;
512       dst[2] = (argb >> 16) & 0xff;
513       dst[3] = (argb >> 24) & 0xff;
514 #endif
515       dst += sizeof(argb);
516     }
517   } else {
518     memcpy(dst, src, num_pixels * sizeof(*src));
519   }
520 }
521 
VP8LConvertFromBGRA(const uint32_t * const in_data,int num_pixels,WEBP_CSP_MODE out_colorspace,uint8_t * const rgba)522 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
523                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
524   switch (out_colorspace) {
525     case MODE_RGB:
526       VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
527       break;
528     case MODE_RGBA:
529       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
530       break;
531     case MODE_rgbA:
532       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
533       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
534       break;
535     case MODE_BGR:
536       VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
537       break;
538     case MODE_BGRA:
539       CopyOrSwap(in_data, num_pixels, rgba, 1);
540       break;
541     case MODE_bgrA:
542       CopyOrSwap(in_data, num_pixels, rgba, 1);
543       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
544       break;
545     case MODE_ARGB:
546       CopyOrSwap(in_data, num_pixels, rgba, 0);
547       break;
548     case MODE_Argb:
549       CopyOrSwap(in_data, num_pixels, rgba, 0);
550       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
551       break;
552     case MODE_RGBA_4444:
553       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
554       break;
555     case MODE_rgbA_4444:
556       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
557       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
558       break;
559     case MODE_RGB_565:
560       VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
561       break;
562     default:
563       assert(0);          // Code flow should not reach here.
564   }
565 }
566 
567 //------------------------------------------------------------------------------
568 
569 VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
570 VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
571 VP8LPredictorFunc VP8LPredictors[16];
572 
573 // exposed plain-C implementations
574 VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
575 VP8LPredictorFunc VP8LPredictors_C[16];
576 
577 VP8LTransformColorInverseFunc VP8LTransformColorInverse;
578 
579 VP8LConvertFunc VP8LConvertBGRAToRGB;
580 VP8LConvertFunc VP8LConvertBGRAToRGBA;
581 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
582 VP8LConvertFunc VP8LConvertBGRAToRGB565;
583 VP8LConvertFunc VP8LConvertBGRAToBGR;
584 
585 VP8LMapARGBFunc VP8LMapColor32b;
586 VP8LMapAlphaFunc VP8LMapColor8b;
587 
588 extern void VP8LDspInitSSE2(void);
589 extern void VP8LDspInitNEON(void);
590 extern void VP8LDspInitMIPSdspR2(void);
591 extern void VP8LDspInitMSA(void);
592 
593 static volatile VP8CPUInfo lossless_last_cpuinfo_used =
594     (VP8CPUInfo)&lossless_last_cpuinfo_used;
595 
596 #define COPY_PREDICTOR_ARRAY(IN, OUT) do {              \
597   (OUT)[0] = IN##0;                                     \
598   (OUT)[1] = IN##1;                                     \
599   (OUT)[2] = IN##2;                                     \
600   (OUT)[3] = IN##3;                                     \
601   (OUT)[4] = IN##4;                                     \
602   (OUT)[5] = IN##5;                                     \
603   (OUT)[6] = IN##6;                                     \
604   (OUT)[7] = IN##7;                                     \
605   (OUT)[8] = IN##8;                                     \
606   (OUT)[9] = IN##9;                                     \
607   (OUT)[10] = IN##10;                                   \
608   (OUT)[11] = IN##11;                                   \
609   (OUT)[12] = IN##12;                                   \
610   (OUT)[13] = IN##13;                                   \
611   (OUT)[14] = IN##0; /* <- padding security sentinels*/ \
612   (OUT)[15] = IN##0;                                    \
613 } while (0);
614 
VP8LDspInit(void)615 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
616   if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
617 
618   COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors)
619   COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C)
620   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
621   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
622 
623   VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
624 
625   VP8LTransformColorInverse = VP8LTransformColorInverse_C;
626 
627   VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
628   VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
629   VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
630   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
631   VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
632 
633   VP8LMapColor32b = MapARGB;
634   VP8LMapColor8b = MapAlpha;
635 
636   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
637   if (VP8GetCPUInfo != NULL) {
638 #if defined(WEBP_USE_SSE2)
639     if (VP8GetCPUInfo(kSSE2)) {
640       VP8LDspInitSSE2();
641     }
642 #endif
643 #if defined(WEBP_USE_NEON)
644     if (VP8GetCPUInfo(kNEON)) {
645       VP8LDspInitNEON();
646     }
647 #endif
648 #if defined(WEBP_USE_MIPS_DSP_R2)
649     if (VP8GetCPUInfo(kMIPSdspR2)) {
650       VP8LDspInitMIPSdspR2();
651     }
652 #endif
653 #if defined(WEBP_USE_MSA)
654     if (VP8GetCPUInfo(kMSA)) {
655       VP8LDspInitMSA();
656     }
657 #endif
658   }
659   lossless_last_cpuinfo_used = VP8GetCPUInfo;
660 }
661 #undef COPY_PREDICTOR_ARRAY
662 
663 //------------------------------------------------------------------------------
664