• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // This code is licensed under the same terms as WebM:
4 //  Software License Agreement:  http://www.webmproject.org/license/software/
5 //  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
6 // -----------------------------------------------------------------------------
7 //
8 // Image transforms and color space conversion methods for lossless decoder.
9 //
10 // Authors: Vikas Arora (vikaas.arora@gmail.com)
11 //          Jyrki Alakuijala (jyrki@google.com)
12 //          Urvang Joshi (urvang@google.com)
13 
14 #include "./dsp.h"
15 
16 // Define the following if target arch is sure to have SSE2
17 // #define WEBP_TARGET_HAS_SSE2
18 
19 #if defined(__cplusplus) || defined(c_plusplus)
20 extern "C" {
21 #endif
22 
23 #if defined(WEBP_TARGET_HAS_SSE2)
24 #include <emmintrin.h>
25 #endif
26 
27 #include <math.h>
28 #include <stdlib.h>
29 #include "./lossless.h"
30 #include "../dec/vp8li.h"
31 #include "./yuv.h"
32 
33 #define MAX_DIFF_COST (1e30f)
34 
35 // lookup table for small values of log2(int)
36 #define APPROX_LOG_MAX  4096
37 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
38 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
39   0.0000000000000000f, 0.0000000000000000f,
40   1.0000000000000000f, 1.5849625007211560f,
41   2.0000000000000000f, 2.3219280948873621f,
42   2.5849625007211560f, 2.8073549220576041f,
43   3.0000000000000000f, 3.1699250014423121f,
44   3.3219280948873621f, 3.4594316186372973f,
45   3.5849625007211560f, 3.7004397181410921f,
46   3.8073549220576041f, 3.9068905956085187f,
47   4.0000000000000000f, 4.0874628412503390f,
48   4.1699250014423121f, 4.2479275134435852f,
49   4.3219280948873626f, 4.3923174227787606f,
50   4.4594316186372973f, 4.5235619560570130f,
51   4.5849625007211560f, 4.6438561897747243f,
52   4.7004397181410917f, 4.7548875021634682f,
53   4.8073549220576037f, 4.8579809951275718f,
54   4.9068905956085187f, 4.9541963103868749f,
55   5.0000000000000000f, 5.0443941193584533f,
56   5.0874628412503390f, 5.1292830169449663f,
57   5.1699250014423121f, 5.2094533656289501f,
58   5.2479275134435852f, 5.2854022188622487f,
59   5.3219280948873626f, 5.3575520046180837f,
60   5.3923174227787606f, 5.4262647547020979f,
61   5.4594316186372973f, 5.4918530963296747f,
62   5.5235619560570130f, 5.5545888516776376f,
63   5.5849625007211560f, 5.6147098441152083f,
64   5.6438561897747243f, 5.6724253419714951f,
65   5.7004397181410917f, 5.7279204545631987f,
66   5.7548875021634682f, 5.7813597135246599f,
67   5.8073549220576037f, 5.8328900141647412f,
68   5.8579809951275718f, 5.8826430493618415f,
69   5.9068905956085187f, 5.9307373375628866f,
70   5.9541963103868749f, 5.9772799234999167f,
71   6.0000000000000000f, 6.0223678130284543f,
72   6.0443941193584533f, 6.0660891904577720f,
73   6.0874628412503390f, 6.1085244567781691f,
74   6.1292830169449663f, 6.1497471195046822f,
75   6.1699250014423121f, 6.1898245588800175f,
76   6.2094533656289501f, 6.2288186904958804f,
77   6.2479275134435852f, 6.2667865406949010f,
78   6.2854022188622487f, 6.3037807481771030f,
79   6.3219280948873626f, 6.3398500028846243f,
80   6.3575520046180837f, 6.3750394313469245f,
81   6.3923174227787606f, 6.4093909361377017f,
82   6.4262647547020979f, 6.4429434958487279f,
83   6.4594316186372973f, 6.4757334309663976f,
84   6.4918530963296747f, 6.5077946401986963f,
85   6.5235619560570130f, 6.5391588111080309f,
86   6.5545888516776376f, 6.5698556083309478f,
87   6.5849625007211560f, 6.5999128421871278f,
88   6.6147098441152083f, 6.6293566200796094f,
89   6.6438561897747243f, 6.6582114827517946f,
90   6.6724253419714951f, 6.6865005271832185f,
91   6.7004397181410917f, 6.7142455176661224f,
92   6.7279204545631987f, 6.7414669864011464f,
93   6.7548875021634682f, 6.7681843247769259f,
94   6.7813597135246599f, 6.7944158663501061f,
95   6.8073549220576037f, 6.8201789624151878f,
96   6.8328900141647412f, 6.8454900509443747f,
97   6.8579809951275718f, 6.8703647195834047f,
98   6.8826430493618415f, 6.8948177633079437f,
99   6.9068905956085187f, 6.9188632372745946f,
100   6.9307373375628866f, 6.9425145053392398f,
101   6.9541963103868749f, 6.9657842846620869f,
102   6.9772799234999167f, 6.9886846867721654f,
103   7.0000000000000000f, 7.0112272554232539f,
104   7.0223678130284543f, 7.0334230015374501f,
105   7.0443941193584533f, 7.0552824355011898f,
106   7.0660891904577720f, 7.0768155970508308f,
107   7.0874628412503390f, 7.0980320829605263f,
108   7.1085244567781691f, 7.1189410727235076f,
109   7.1292830169449663f, 7.1395513523987936f,
110   7.1497471195046822f, 7.1598713367783890f,
111   7.1699250014423121f, 7.1799090900149344f,
112   7.1898245588800175f, 7.1996723448363644f,
113   7.2094533656289501f, 7.2191685204621611f,
114   7.2288186904958804f, 7.2384047393250785f,
115   7.2479275134435852f, 7.2573878426926521f,
116   7.2667865406949010f, 7.2761244052742375f,
117   7.2854022188622487f, 7.2946207488916270f,
118   7.3037807481771030f, 7.3128829552843557f,
119   7.3219280948873626f, 7.3309168781146167f,
120   7.3398500028846243f, 7.3487281542310771f,
121   7.3575520046180837f, 7.3663222142458160f,
122   7.3750394313469245f, 7.3837042924740519f,
123   7.3923174227787606f, 7.4008794362821843f,
124   7.4093909361377017f, 7.4178525148858982f,
125   7.4262647547020979f, 7.4346282276367245f,
126   7.4429434958487279f, 7.4512111118323289f,
127   7.4594316186372973f, 7.4676055500829976f,
128   7.4757334309663976f, 7.4838157772642563f,
129   7.4918530963296747f, 7.4998458870832056f,
130   7.5077946401986963f, 7.5156998382840427f,
131   7.5235619560570130f, 7.5313814605163118f,
132   7.5391588111080309f, 7.5468944598876364f,
133   7.5545888516776376f, 7.5622424242210728f,
134   7.5698556083309478f, 7.5774288280357486f,
135   7.5849625007211560f, 7.5924570372680806f,
136   7.5999128421871278f, 7.6073303137496104f,
137   7.6147098441152083f, 7.6220518194563764f,
138   7.6293566200796094f, 7.6366246205436487f,
139   7.6438561897747243f, 7.6510516911789281f,
140   7.6582114827517946f, 7.6653359171851764f,
141   7.6724253419714951f, 7.6794800995054464f,
142   7.6865005271832185f, 7.6934869574993252f,
143   7.7004397181410917f, 7.7073591320808825f,
144   7.7142455176661224f, 7.7210991887071855f,
145   7.7279204545631987f, 7.7347096202258383f,
146   7.7414669864011464f, 7.7481928495894605f,
147   7.7548875021634682f, 7.7615512324444795f,
148   7.7681843247769259f, 7.7747870596011736f,
149   7.7813597135246599f, 7.7879025593914317f,
150   7.7944158663501061f, 7.8008998999203047f,
151   7.8073549220576037f, 7.8137811912170374f,
152   7.8201789624151878f, 7.8265484872909150f,
153   7.8328900141647412f, 7.8392037880969436f,
154   7.8454900509443747f, 7.8517490414160571f,
155   7.8579809951275718f, 7.8641861446542797f,
156   7.8703647195834047f, 7.8765169465649993f,
157   7.8826430493618415f, 7.8887432488982591f,
158   7.8948177633079437f, 7.9008668079807486f,
159   7.9068905956085187f, 7.9128893362299619f,
160   7.9188632372745946f, 7.9248125036057812f,
161   7.9307373375628866f, 7.9366379390025709f,
162   7.9425145053392398f, 7.9483672315846778f,
163   7.9541963103868749f, 7.9600019320680805f,
164   7.9657842846620869f, 7.9715435539507719f,
165   7.9772799234999167f, 7.9829935746943103f,
166   7.9886846867721654f, 7.9943534368588577f
167 };
168 
169 const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
170   0.00000000f,    0.00000000f,  2.00000000f,   4.75488750f,
171   8.00000000f,   11.60964047f,  15.50977500f,  19.65148445f,
172   24.00000000f,  28.52932501f,  33.21928095f,  38.05374781f,
173   43.01955001f,  48.10571634f,  53.30296891f,  58.60335893f,
174   64.00000000f,  69.48686830f,  75.05865003f,  80.71062276f,
175   86.43856190f,  92.23866588f,  98.10749561f,  104.04192499f,
176   110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
177   134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
178   160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
179   186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
180   212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
181   240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
182   268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
183   296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
184   325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
185   354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
186   384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
187   413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
188   444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
189   474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
190   505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
191   536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
192   568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
193   600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
194   632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
195   664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
196   696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
197   729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
198   762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
199   795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
200   828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
201   862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
202   896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
203   929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
204   963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
205   998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
206   1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
207   1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
208   1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
209   1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
210   1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
211   1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
212   1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
213   1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
214   1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
215   1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
216   1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
217   1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
218   1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
219   1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
220   1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
221   1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
222   1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
223   1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
224   1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
225   1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
226   1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
227   1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
228   1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
229   1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
230   1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
231   1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
232   1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
233   2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
234 };
235 
VP8LFastSLog2Slow(int v)236 float VP8LFastSLog2Slow(int v) {
237   assert(v >= LOG_LOOKUP_IDX_MAX);
238   if (v < APPROX_LOG_MAX) {
239     int log_cnt = 0;
240     const float v_f = (float)v;
241     while (v >= LOG_LOOKUP_IDX_MAX) {
242       ++log_cnt;
243       v = v >> 1;
244     }
245     return v_f * (kLog2Table[v] + log_cnt);
246   } else {
247     return (float)(LOG_2_RECIPROCAL * v * log((double)v));
248   }
249 }
250 
VP8LFastLog2Slow(int v)251 float VP8LFastLog2Slow(int v) {
252   assert(v >= LOG_LOOKUP_IDX_MAX);
253   if (v < APPROX_LOG_MAX) {
254     int log_cnt = 0;
255     while (v >= LOG_LOOKUP_IDX_MAX) {
256       ++log_cnt;
257       v = v >> 1;
258     }
259     return kLog2Table[v] + log_cnt;
260   } else {
261     return (float)(LOG_2_RECIPROCAL * log((double)v));
262   }
263 }
264 
265 //------------------------------------------------------------------------------
266 // Image transforms.
267 
268 // In-place sum of each component with mod 256.
AddPixelsEq(uint32_t * a,uint32_t b)269 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
270   const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
271   const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
272   *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
273 }
274 
Average2(uint32_t a0,uint32_t a1)275 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
276   return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
277 }
278 
Average3(uint32_t a0,uint32_t a1,uint32_t a2)279 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
280   return Average2(Average2(a0, a2), a1);
281 }
282 
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)283 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
284                                      uint32_t a2, uint32_t a3) {
285   return Average2(Average2(a0, a1), Average2(a2, a3));
286 }
287 
288 #if defined(WEBP_TARGET_HAS_SSE2)
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)289 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
290                                                    uint32_t c2) {
291   const __m128i zero = _mm_setzero_si128();
292   const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
293   const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
294   const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
295   const __m128i V1 = _mm_add_epi16(C0, C1);
296   const __m128i V2 = _mm_sub_epi16(V1, C2);
297   const __m128i b = _mm_packus_epi16(V2, V2);
298   const uint32_t output = _mm_cvtsi128_si32(b);
299   return output;
300 }
301 
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)302 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
303                                                    uint32_t c2) {
304   const uint32_t ave = Average2(c0, c1);
305   const __m128i zero = _mm_setzero_si128();
306   const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero);
307   const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
308   const __m128i A1 = _mm_sub_epi16(A0, B0);
309   const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
310   const __m128i A2 = _mm_sub_epi16(A1, BgtA);
311   const __m128i A3 = _mm_srai_epi16(A2, 1);
312   const __m128i A4 = _mm_add_epi16(A0, A3);
313   const __m128i A5 = _mm_packus_epi16(A4, A4);
314   const uint32_t output = _mm_cvtsi128_si32(A5);
315   return output;
316 }
317 
Select(uint32_t a,uint32_t b,uint32_t c)318 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
319   int pa_minus_pb;
320   const __m128i zero = _mm_setzero_si128();
321   const __m128i A0 = _mm_cvtsi32_si128(a);
322   const __m128i B0 = _mm_cvtsi32_si128(b);
323   const __m128i C0 = _mm_cvtsi32_si128(c);
324   const __m128i AC0 = _mm_subs_epu8(A0, C0);
325   const __m128i CA0 = _mm_subs_epu8(C0, A0);
326   const __m128i BC0 = _mm_subs_epu8(B0, C0);
327   const __m128i CB0 = _mm_subs_epu8(C0, B0);
328   const __m128i AC = _mm_or_si128(AC0, CA0);
329   const __m128i BC = _mm_or_si128(BC0, CB0);
330   const __m128i pa = _mm_unpacklo_epi8(AC, zero);  // |a - c|
331   const __m128i pb = _mm_unpacklo_epi8(BC, zero);  // |b - c|
332   const __m128i diff = _mm_sub_epi16(pb, pa);
333   {
334     int16_t out[8];
335     _mm_storeu_si128((__m128i*)out, diff);
336     pa_minus_pb = out[0] + out[1] + out[2] + out[3];
337   }
338   return (pa_minus_pb <= 0) ? a : b;
339 }
340 
341 #else
342 
Clip255(uint32_t a)343 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
344   if (a < 256) {
345     return a;
346   }
347   // return 0, when a is a negative integer.
348   // return 255, when a is positive.
349   return ~a >> 24;
350 }
351 
AddSubtractComponentFull(int a,int b,int c)352 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
353   return Clip255(a + b - c);
354 }
355 
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)356 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
357                                                    uint32_t c2) {
358   const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
359   const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
360                                          (c1 >> 16) & 0xff,
361                                          (c2 >> 16) & 0xff);
362   const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
363                                          (c1 >> 8) & 0xff,
364                                          (c2 >> 8) & 0xff);
365   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
366   return (a << 24) | (r << 16) | (g << 8) | b;
367 }
368 
AddSubtractComponentHalf(int a,int b)369 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
370   return Clip255(a + (a - b) / 2);
371 }
372 
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)373 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
374                                                    uint32_t c2) {
375   const uint32_t ave = Average2(c0, c1);
376   const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
377   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
378   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
379   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
380   return (a << 24) | (r << 16) | (g << 8) | b;
381 }
382 
Sub3(int a,int b,int c)383 static WEBP_INLINE int Sub3(int a, int b, int c) {
384   const int pb = b - c;
385   const int pa = a - c;
386   return abs(pb) - abs(pa);
387 }
388 
Select(uint32_t a,uint32_t b,uint32_t c)389 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
390   const int pa_minus_pb =
391       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
392       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
393       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
394       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
395   return (pa_minus_pb <= 0) ? a : b;
396 }
397 #endif
398 
399 //------------------------------------------------------------------------------
400 // Predictors
401 
Predictor0(uint32_t left,const uint32_t * const top)402 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
403   (void)top;
404   (void)left;
405   return ARGB_BLACK;
406 }
Predictor1(uint32_t left,const uint32_t * const top)407 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
408   (void)top;
409   return left;
410 }
Predictor2(uint32_t left,const uint32_t * const top)411 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
412   (void)left;
413   return top[0];
414 }
Predictor3(uint32_t left,const uint32_t * const top)415 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
416   (void)left;
417   return top[1];
418 }
Predictor4(uint32_t left,const uint32_t * const top)419 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
420   (void)left;
421   return top[-1];
422 }
Predictor5(uint32_t left,const uint32_t * const top)423 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
424   const uint32_t pred = Average3(left, top[0], top[1]);
425   return pred;
426 }
Predictor6(uint32_t left,const uint32_t * const top)427 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
428   const uint32_t pred = Average2(left, top[-1]);
429   return pred;
430 }
Predictor7(uint32_t left,const uint32_t * const top)431 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
432   const uint32_t pred = Average2(left, top[0]);
433   return pred;
434 }
Predictor8(uint32_t left,const uint32_t * const top)435 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
436   const uint32_t pred = Average2(top[-1], top[0]);
437   (void)left;
438   return pred;
439 }
Predictor9(uint32_t left,const uint32_t * const top)440 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
441   const uint32_t pred = Average2(top[0], top[1]);
442   (void)left;
443   return pred;
444 }
Predictor10(uint32_t left,const uint32_t * const top)445 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
446   const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
447   return pred;
448 }
Predictor11(uint32_t left,const uint32_t * const top)449 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
450   const uint32_t pred = Select(top[0], left, top[-1]);
451   return pred;
452 }
Predictor12(uint32_t left,const uint32_t * const top)453 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
454   const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
455   return pred;
456 }
Predictor13(uint32_t left,const uint32_t * const top)457 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
458   const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
459   return pred;
460 }
461 
462 typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
463 static const PredictorFunc kPredictors[16] = {
464   Predictor0, Predictor1, Predictor2, Predictor3,
465   Predictor4, Predictor5, Predictor6, Predictor7,
466   Predictor8, Predictor9, Predictor10, Predictor11,
467   Predictor12, Predictor13,
468   Predictor0, Predictor0    // <- padding security sentinels
469 };
470 
471 // TODO(vikasa): Replace 256 etc with defines.
PredictionCostSpatial(const int * counts,int weight_0,double exp_val)472 static float PredictionCostSpatial(const int* counts,
473                                    int weight_0, double exp_val) {
474   const int significant_symbols = 16;
475   const double exp_decay_factor = 0.6;
476   double bits = weight_0 * counts[0];
477   int i;
478   for (i = 1; i < significant_symbols; ++i) {
479     bits += exp_val * (counts[i] + counts[256 - i]);
480     exp_val *= exp_decay_factor;
481   }
482   return (float)(-0.1 * bits);
483 }
484 
485 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
CombinedShannonEntropy(const int * const X,const int * const Y,int n)486 static float CombinedShannonEntropy(const int* const X,
487                                     const int* const Y, int n) {
488   int i;
489   double retval = 0.;
490   int sumX = 0, sumXY = 0;
491   for (i = 0; i < n; ++i) {
492     const int x = X[i];
493     const int xy = X[i] + Y[i];
494     if (x != 0) {
495       sumX += x;
496       retval -= VP8LFastSLog2(x);
497     }
498     if (xy != 0) {
499       sumXY += xy;
500       retval -= VP8LFastSLog2(xy);
501     }
502   }
503   retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
504   return (float)retval;
505 }
506 
PredictionCostSpatialHistogram(int accumulated[4][256],int tile[4][256])507 static float PredictionCostSpatialHistogram(int accumulated[4][256],
508                                             int tile[4][256]) {
509   int i;
510   double retval = 0;
511   for (i = 0; i < 4; ++i) {
512     const double kExpValue = 0.94;
513     retval += PredictionCostSpatial(tile[i], 1, kExpValue);
514     retval += CombinedShannonEntropy(tile[i], accumulated[i], 256);
515   }
516   return (float)retval;
517 }
518 
GetBestPredictorForTile(int width,int height,int tile_x,int tile_y,int bits,int accumulated[4][256],const uint32_t * const argb_scratch)519 static int GetBestPredictorForTile(int width, int height,
520                                    int tile_x, int tile_y, int bits,
521                                    int accumulated[4][256],
522                                    const uint32_t* const argb_scratch) {
523   const int kNumPredModes = 14;
524   const int col_start = tile_x << bits;
525   const int row_start = tile_y << bits;
526   const int tile_size = 1 << bits;
527   const int ymax = (tile_size <= height - row_start) ?
528       tile_size : height - row_start;
529   const int xmax = (tile_size <= width - col_start) ?
530       tile_size : width - col_start;
531   int histo[4][256];
532   float best_diff = MAX_DIFF_COST;
533   int best_mode = 0;
534 
535   int mode;
536   for (mode = 0; mode < kNumPredModes; ++mode) {
537     const uint32_t* current_row = argb_scratch;
538     const PredictorFunc pred_func = kPredictors[mode];
539     float cur_diff;
540     int y;
541     memset(&histo[0][0], 0, sizeof(histo));
542     for (y = 0; y < ymax; ++y) {
543       int x;
544       const int row = row_start + y;
545       const uint32_t* const upper_row = current_row;
546       current_row = upper_row + width;
547       for (x = 0; x < xmax; ++x) {
548         const int col = col_start + x;
549         uint32_t predict;
550         uint32_t predict_diff;
551         if (row == 0) {
552           predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
553         } else if (col == 0) {
554           predict = upper_row[col];  // Top.
555         } else {
556           predict = pred_func(current_row[col - 1], upper_row + col);
557         }
558         predict_diff = VP8LSubPixels(current_row[col], predict);
559         ++histo[0][predict_diff >> 24];
560         ++histo[1][((predict_diff >> 16) & 0xff)];
561         ++histo[2][((predict_diff >> 8) & 0xff)];
562         ++histo[3][(predict_diff & 0xff)];
563       }
564     }
565     cur_diff = PredictionCostSpatialHistogram(accumulated, histo);
566     if (cur_diff < best_diff) {
567       best_diff = cur_diff;
568       best_mode = mode;
569     }
570   }
571 
572   return best_mode;
573 }
574 
CopyTileWithPrediction(int width,int height,int tile_x,int tile_y,int bits,int mode,const uint32_t * const argb_scratch,uint32_t * const argb)575 static void CopyTileWithPrediction(int width, int height,
576                                    int tile_x, int tile_y, int bits, int mode,
577                                    const uint32_t* const argb_scratch,
578                                    uint32_t* const argb) {
579   const int col_start = tile_x << bits;
580   const int row_start = tile_y << bits;
581   const int tile_size = 1 << bits;
582   const int ymax = (tile_size <= height - row_start) ?
583       tile_size : height - row_start;
584   const int xmax = (tile_size <= width - col_start) ?
585       tile_size : width - col_start;
586   const PredictorFunc pred_func = kPredictors[mode];
587   const uint32_t* current_row = argb_scratch;
588 
589   int y;
590   for (y = 0; y < ymax; ++y) {
591     int x;
592     const int row = row_start + y;
593     const uint32_t* const upper_row = current_row;
594     current_row = upper_row + width;
595     for (x = 0; x < xmax; ++x) {
596       const int col = col_start + x;
597       const int pix = row * width + col;
598       uint32_t predict;
599       if (row == 0) {
600         predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
601       } else if (col == 0) {
602         predict = upper_row[col];  // Top.
603       } else {
604         predict = pred_func(current_row[col - 1], upper_row + col);
605       }
606       argb[pix] = VP8LSubPixels(current_row[col], predict);
607     }
608   }
609 }
610 
VP8LResidualImage(int width,int height,int bits,uint32_t * const argb,uint32_t * const argb_scratch,uint32_t * const image)611 void VP8LResidualImage(int width, int height, int bits,
612                        uint32_t* const argb, uint32_t* const argb_scratch,
613                        uint32_t* const image) {
614   const int max_tile_size = 1 << bits;
615   const int tiles_per_row = VP8LSubSampleSize(width, bits);
616   const int tiles_per_col = VP8LSubSampleSize(height, bits);
617   uint32_t* const upper_row = argb_scratch;
618   uint32_t* const current_tile_rows = argb_scratch + width;
619   int tile_y;
620   int histo[4][256];
621   memset(histo, 0, sizeof(histo));
622   for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
623     const int tile_y_offset = tile_y * max_tile_size;
624     const int this_tile_height =
625         (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
626     int tile_x;
627     if (tile_y > 0) {
628       memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
629              width * sizeof(*upper_row));
630     }
631     memcpy(current_tile_rows, &argb[tile_y_offset * width],
632            this_tile_height * width * sizeof(*current_tile_rows));
633     for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
634       int pred;
635       int y;
636       const int tile_x_offset = tile_x * max_tile_size;
637       int all_x_max = tile_x_offset + max_tile_size;
638       if (all_x_max > width) {
639         all_x_max = width;
640       }
641       pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo,
642                                      argb_scratch);
643       image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
644       CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
645                              argb_scratch, argb);
646       for (y = 0; y < max_tile_size; ++y) {
647         int ix;
648         int all_x;
649         int all_y = tile_y_offset + y;
650         if (all_y >= height) {
651           break;
652         }
653         ix = all_y * width + tile_x_offset;
654         for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
655           const uint32_t a = argb[ix];
656           ++histo[0][a >> 24];
657           ++histo[1][((a >> 16) & 0xff)];
658           ++histo[2][((a >> 8) & 0xff)];
659           ++histo[3][(a & 0xff)];
660         }
661       }
662     }
663   }
664 }
665 
666 // Inverse prediction.
PredictorInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)667 static void PredictorInverseTransform(const VP8LTransform* const transform,
668                                       int y_start, int y_end, uint32_t* data) {
669   const int width = transform->xsize_;
670   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
671     int x;
672     const uint32_t pred0 = Predictor0(data[-1], NULL);
673     AddPixelsEq(data, pred0);
674     for (x = 1; x < width; ++x) {
675       const uint32_t pred1 = Predictor1(data[x - 1], NULL);
676       AddPixelsEq(data + x, pred1);
677     }
678     data += width;
679     ++y_start;
680   }
681 
682   {
683     int y = y_start;
684     const int mask = (1 << transform->bits_) - 1;
685     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
686     const uint32_t* pred_mode_base =
687         transform->data_ + (y >> transform->bits_) * tiles_per_row;
688 
689     while (y < y_end) {
690       int x;
691       const uint32_t pred2 = Predictor2(data[-1], data - width);
692       const uint32_t* pred_mode_src = pred_mode_base;
693       PredictorFunc pred_func;
694 
695       // First pixel follows the T (mode=2) mode.
696       AddPixelsEq(data, pred2);
697 
698       // .. the rest:
699       pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
700       for (x = 1; x < width; ++x) {
701         uint32_t pred;
702         if ((x & mask) == 0) {    // start of tile. Read predictor function.
703           pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
704         }
705         pred = pred_func(data[x - 1], data + x - width);
706         AddPixelsEq(data + x, pred);
707       }
708       data += width;
709       ++y;
710       if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
711         pred_mode_base += tiles_per_row;
712       }
713     }
714   }
715 }
716 
VP8LSubtractGreenFromBlueAndRed(uint32_t * argb_data,int num_pixs)717 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
718   int i = 0;
719 #if defined(WEBP_TARGET_HAS_SSE2)
720   const __m128i mask = _mm_set1_epi32(0x0000ff00);
721   for (; i + 4 < num_pixs; i += 4) {
722     const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
723     const __m128i in_00g0 = _mm_and_si128(in, mask);     // 00g0|00g0|...
724     const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8);  // 0g00|0g00|...
725     const __m128i in_000g = _mm_srli_epi32(in_00g0, 8);  // 000g|000g|...
726     const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
727     const __m128i out = _mm_sub_epi8(in, in_0g0g);
728     _mm_storeu_si128((__m128i*)&argb_data[i], out);
729   }
730   // fallthrough and finish off with plain-C
731 #endif
732   for (; i < num_pixs; ++i) {
733     const uint32_t argb = argb_data[i];
734     const uint32_t green = (argb >> 8) & 0xff;
735     const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
736     const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
737     argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
738   }
739 }
740 
741 // Add green to blue and red channels (i.e. perform the inverse transform of
742 // 'subtract green').
AddGreenToBlueAndRed(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)743 static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
744                                  int y_start, int y_end, uint32_t* data) {
745   const int width = transform->xsize_;
746   const uint32_t* const data_end = data + (y_end - y_start) * width;
747 #if defined(WEBP_TARGET_HAS_SSE2)
748   const __m128i mask = _mm_set1_epi32(0x0000ff00);
749   for (; data + 4 < data_end; data += 4) {
750     const __m128i in = _mm_loadu_si128((__m128i*)data);
751     const __m128i in_00g0 = _mm_and_si128(in, mask);     // 00g0|00g0|...
752     const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8);  // 0g00|0g00|...
753     const __m128i in_000g = _mm_srli_epi32(in_00g0, 8);  // 000g|000g|...
754     const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
755     const __m128i out = _mm_add_epi8(in, in_0g0g);
756     _mm_storeu_si128((__m128i*)data, out);
757   }
758   // fallthrough and finish off with plain-C
759 #endif
760   while (data < data_end) {
761     const uint32_t argb = *data;
762     const uint32_t green = ((argb >> 8) & 0xff);
763     uint32_t red_blue = (argb & 0x00ff00ffu);
764     red_blue += (green << 16) | green;
765     red_blue &= 0x00ff00ffu;
766     *data++ = (argb & 0xff00ff00u) | red_blue;
767   }
768 }
769 
770 typedef struct {
771   // Note: the members are uint8_t, so that any negative values are
772   // automatically converted to "mod 256" values.
773   uint8_t green_to_red_;
774   uint8_t green_to_blue_;
775   uint8_t red_to_blue_;
776 } Multipliers;
777 
MultipliersClear(Multipliers * m)778 static WEBP_INLINE void MultipliersClear(Multipliers* m) {
779   m->green_to_red_ = 0;
780   m->green_to_blue_ = 0;
781   m->red_to_blue_ = 0;
782 }
783 
ColorTransformDelta(int8_t color_pred,int8_t color)784 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
785                                                 int8_t color) {
786   return (uint32_t)((int)(color_pred) * color) >> 5;
787 }
788 
ColorCodeToMultipliers(uint32_t color_code,Multipliers * const m)789 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
790                                                Multipliers* const m) {
791   m->green_to_red_  = (color_code >>  0) & 0xff;
792   m->green_to_blue_ = (color_code >>  8) & 0xff;
793   m->red_to_blue_   = (color_code >> 16) & 0xff;
794 }
795 
MultipliersToColorCode(Multipliers * const m)796 static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) {
797   return 0xff000000u |
798          ((uint32_t)(m->red_to_blue_) << 16) |
799          ((uint32_t)(m->green_to_blue_) << 8) |
800          m->green_to_red_;
801 }
802 
TransformColor(const Multipliers * const m,uint32_t argb,int inverse)803 static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
804                                            uint32_t argb, int inverse) {
805   const uint32_t green = argb >> 8;
806   const uint32_t red = argb >> 16;
807   uint32_t new_red = red;
808   uint32_t new_blue = argb;
809 
810   if (inverse) {
811     new_red += ColorTransformDelta(m->green_to_red_, green);
812     new_red &= 0xff;
813     new_blue += ColorTransformDelta(m->green_to_blue_, green);
814     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
815     new_blue &= 0xff;
816   } else {
817     new_red -= ColorTransformDelta(m->green_to_red_, green);
818     new_red &= 0xff;
819     new_blue -= ColorTransformDelta(m->green_to_blue_, green);
820     new_blue -= ColorTransformDelta(m->red_to_blue_, red);
821     new_blue &= 0xff;
822   }
823   return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
824 }
825 
TransformColorRed(uint8_t green_to_red,uint32_t argb)826 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
827                                              uint32_t argb) {
828   const uint32_t green = argb >> 8;
829   uint32_t new_red = argb >> 16;
830   new_red -= ColorTransformDelta(green_to_red, green);
831   return (new_red & 0xff);
832 }
833 
TransformColorBlue(uint8_t green_to_blue,uint8_t red_to_blue,uint32_t argb)834 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
835                                               uint8_t red_to_blue,
836                                               uint32_t argb) {
837   const uint32_t green = argb >> 8;
838   const uint32_t red = argb >> 16;
839   uint8_t new_blue = argb;
840   new_blue -= ColorTransformDelta(green_to_blue, green);
841   new_blue -= ColorTransformDelta(red_to_blue, red);
842   return (new_blue & 0xff);
843 }
844 
SkipRepeatedPixels(const uint32_t * const argb,int ix,int xsize)845 static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
846                                           int ix, int xsize) {
847   const uint32_t v = argb[ix];
848   if (ix >= xsize + 3) {
849     if (v == argb[ix - xsize] &&
850         argb[ix - 1] == argb[ix - xsize - 1] &&
851         argb[ix - 2] == argb[ix - xsize - 2] &&
852         argb[ix - 3] == argb[ix - xsize - 3]) {
853       return 1;
854     }
855     return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
856   } else if (ix >= 3) {
857     return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
858   }
859   return 0;
860 }
861 
PredictionCostCrossColor(const int accumulated[256],const int counts[256])862 static float PredictionCostCrossColor(const int accumulated[256],
863                                       const int counts[256]) {
864   // Favor low entropy, locally and globally.
865   // Favor small absolute values for PredictionCostSpatial
866   static const double kExpValue = 2.4;
867   return CombinedShannonEntropy(counts, accumulated, 256) +
868          PredictionCostSpatial(counts, 3, kExpValue);
869 }
870 
GetBestColorTransformForTile(int tile_x,int tile_y,int bits,Multipliers prevX,Multipliers prevY,int step,int xsize,int ysize,int * accumulated_red_histo,int * accumulated_blue_histo,const uint32_t * const argb)871 static Multipliers GetBestColorTransformForTile(
872     int tile_x, int tile_y, int bits,
873     Multipliers prevX,
874     Multipliers prevY,
875     int step, int xsize, int ysize,
876     int* accumulated_red_histo,
877     int* accumulated_blue_histo,
878     const uint32_t* const argb) {
879   float best_diff = MAX_DIFF_COST;
880   float cur_diff;
881   const int halfstep = step / 2;
882   const int max_tile_size = 1 << bits;
883   const int tile_y_offset = tile_y * max_tile_size;
884   const int tile_x_offset = tile_x * max_tile_size;
885   int green_to_red;
886   int green_to_blue;
887   int red_to_blue;
888   int all_x_max = tile_x_offset + max_tile_size;
889   int all_y_max = tile_y_offset + max_tile_size;
890   Multipliers best_tx;
891   MultipliersClear(&best_tx);
892   if (all_x_max > xsize) {
893     all_x_max = xsize;
894   }
895   if (all_y_max > ysize) {
896     all_y_max = ysize;
897   }
898 
899   for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
900     int histo[256] = { 0 };
901     int all_y;
902 
903     for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
904       int ix = all_y * xsize + tile_x_offset;
905       int all_x;
906       for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
907         if (SkipRepeatedPixels(argb, ix, xsize)) {
908           continue;
909         }
910         ++histo[TransformColorRed(green_to_red, argb[ix])];  // red.
911       }
912     }
913     cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
914     if ((uint8_t)green_to_red == prevX.green_to_red_) {
915       cur_diff -= 3;  // favor keeping the areas locally similar
916     }
917     if ((uint8_t)green_to_red == prevY.green_to_red_) {
918       cur_diff -= 3;  // favor keeping the areas locally similar
919     }
920     if (green_to_red == 0) {
921       cur_diff -= 3;
922     }
923     if (cur_diff < best_diff) {
924       best_diff = cur_diff;
925       best_tx.green_to_red_ = green_to_red;
926     }
927   }
928   best_diff = MAX_DIFF_COST;
929   for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
930     for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
931       int all_y;
932       int histo[256] = { 0 };
933       for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
934         int all_x;
935         int ix = all_y * xsize + tile_x_offset;
936         for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
937           if (SkipRepeatedPixels(argb, ix, xsize)) {
938             continue;
939           }
940           ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
941         }
942       }
943       cur_diff =
944           PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
945       if ((uint8_t)green_to_blue == prevX.green_to_blue_) {
946         cur_diff -= 3;  // favor keeping the areas locally similar
947       }
948       if ((uint8_t)green_to_blue == prevY.green_to_blue_) {
949         cur_diff -= 3;  // favor keeping the areas locally similar
950       }
951       if ((uint8_t)red_to_blue == prevX.red_to_blue_) {
952         cur_diff -= 3;  // favor keeping the areas locally similar
953       }
954       if ((uint8_t)red_to_blue == prevY.red_to_blue_) {
955         cur_diff -= 3;  // favor keeping the areas locally similar
956       }
957       if (green_to_blue == 0) {
958         cur_diff -= 3;
959       }
960       if (red_to_blue == 0) {
961         cur_diff -= 3;
962       }
963       if (cur_diff < best_diff) {
964         best_diff = cur_diff;
965         best_tx.green_to_blue_ = green_to_blue;
966         best_tx.red_to_blue_ = red_to_blue;
967       }
968     }
969   }
970   return best_tx;
971 }
972 
CopyTileWithColorTransform(int xsize,int ysize,int tile_x,int tile_y,int bits,Multipliers color_transform,uint32_t * const argb)973 static void CopyTileWithColorTransform(int xsize, int ysize,
974                                        int tile_x, int tile_y, int bits,
975                                        Multipliers color_transform,
976                                        uint32_t* const argb) {
977   int y;
978   int xscan = 1 << bits;
979   int yscan = 1 << bits;
980   tile_x <<= bits;
981   tile_y <<= bits;
982   if (xscan > xsize - tile_x) {
983     xscan = xsize - tile_x;
984   }
985   if (yscan > ysize - tile_y) {
986     yscan = ysize - tile_y;
987   }
988   yscan += tile_y;
989   for (y = tile_y; y < yscan; ++y) {
990     int ix = y * xsize + tile_x;
991     const int end_ix = ix + xscan;
992     for (; ix < end_ix; ++ix) {
993       argb[ix] = TransformColor(&color_transform, argb[ix], 0);
994     }
995   }
996 }
997 
VP8LColorSpaceTransform(int width,int height,int bits,int step,uint32_t * const argb,uint32_t * image)998 void VP8LColorSpaceTransform(int width, int height, int bits, int step,
999                              uint32_t* const argb, uint32_t* image) {
1000   const int max_tile_size = 1 << bits;
1001   int tile_xsize = VP8LSubSampleSize(width, bits);
1002   int tile_ysize = VP8LSubSampleSize(height, bits);
1003   int accumulated_red_histo[256] = { 0 };
1004   int accumulated_blue_histo[256] = { 0 };
1005   int tile_y;
1006   int tile_x;
1007   Multipliers prevX;
1008   Multipliers prevY;
1009   MultipliersClear(&prevY);
1010   MultipliersClear(&prevX);
1011   for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
1012     for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
1013       Multipliers color_transform;
1014       int all_x_max;
1015       int y;
1016       const int tile_y_offset = tile_y * max_tile_size;
1017       const int tile_x_offset = tile_x * max_tile_size;
1018       if (tile_y != 0) {
1019         ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
1020         ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x],
1021                                &prevY);
1022       } else if (tile_x != 0) {
1023         ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
1024       }
1025       color_transform =
1026           GetBestColorTransformForTile(tile_x, tile_y, bits,
1027                                        prevX, prevY,
1028                                        step, width, height,
1029                                        &accumulated_red_histo[0],
1030                                        &accumulated_blue_histo[0],
1031                                        argb);
1032       image[tile_y * tile_xsize + tile_x] =
1033           MultipliersToColorCode(&color_transform);
1034       CopyTileWithColorTransform(width, height, tile_x, tile_y, bits,
1035                                  color_transform, argb);
1036 
1037       // Gather accumulated histogram data.
1038       all_x_max = tile_x_offset + max_tile_size;
1039       if (all_x_max > width) {
1040         all_x_max = width;
1041       }
1042       for (y = 0; y < max_tile_size; ++y) {
1043         int ix;
1044         int all_x;
1045         int all_y = tile_y_offset + y;
1046         if (all_y >= height) {
1047           break;
1048         }
1049         ix = all_y * width + tile_x_offset;
1050         for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
1051           if (ix >= 2 &&
1052               argb[ix] == argb[ix - 2] &&
1053               argb[ix] == argb[ix - 1]) {
1054             continue;  // repeated pixels are handled by backward references
1055           }
1056           if (ix >= width + 2 &&
1057               argb[ix - 2] == argb[ix - width - 2] &&
1058               argb[ix - 1] == argb[ix - width - 1] &&
1059               argb[ix] == argb[ix - width]) {
1060             continue;  // repeated pixels are handled by backward references
1061           }
1062           ++accumulated_red_histo[(argb[ix] >> 16) & 0xff];
1063           ++accumulated_blue_histo[argb[ix] & 0xff];
1064         }
1065       }
1066     }
1067   }
1068 }
1069 
1070 // Color space inverse transform.
ColorSpaceInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)1071 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
1072                                        int y_start, int y_end, uint32_t* data) {
1073   const int width = transform->xsize_;
1074   const int mask = (1 << transform->bits_) - 1;
1075   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
1076   int y = y_start;
1077   const uint32_t* pred_row =
1078       transform->data_ + (y >> transform->bits_) * tiles_per_row;
1079 
1080   while (y < y_end) {
1081     const uint32_t* pred = pred_row;
1082     Multipliers m = { 0, 0, 0 };
1083     int x;
1084 
1085     for (x = 0; x < width; ++x) {
1086       if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m);
1087       data[x] = TransformColor(&m, data[x], 1);
1088     }
1089     data += width;
1090     ++y;
1091     if ((y & mask) == 0) pred_row += tiles_per_row;;
1092   }
1093 }
1094 
1095 // Separate out pixels packed together using pixel-bundling.
ColorIndexInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,const uint32_t * src,uint32_t * dst)1096 static void ColorIndexInverseTransform(
1097     const VP8LTransform* const transform,
1098     int y_start, int y_end, const uint32_t* src, uint32_t* dst) {
1099   int y;
1100   const int bits_per_pixel = 8 >> transform->bits_;
1101   const int width = transform->xsize_;
1102   const uint32_t* const color_map = transform->data_;
1103   if (bits_per_pixel < 8) {
1104     const int pixels_per_byte = 1 << transform->bits_;
1105     const int count_mask = pixels_per_byte - 1;
1106     const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
1107     for (y = y_start; y < y_end; ++y) {
1108       uint32_t packed_pixels = 0;
1109       int x;
1110       for (x = 0; x < width; ++x) {
1111         // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
1112         // increments of x. Fortunately, pixels_per_byte is a power of 2, so
1113         // can just use a mask for that, instead of decrementing a counter.
1114         if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
1115         *dst++ = color_map[packed_pixels & bit_mask];
1116         packed_pixels >>= bits_per_pixel;
1117       }
1118     }
1119   } else {
1120     for (y = y_start; y < y_end; ++y) {
1121       int x;
1122       for (x = 0; x < width; ++x) {
1123         *dst++ = color_map[((*src++) >> 8) & 0xff];
1124       }
1125     }
1126   }
1127 }
1128 
VP8LInverseTransform(const VP8LTransform * const transform,int row_start,int row_end,const uint32_t * const in,uint32_t * const out)1129 void VP8LInverseTransform(const VP8LTransform* const transform,
1130                           int row_start, int row_end,
1131                           const uint32_t* const in, uint32_t* const out) {
1132   assert(row_start < row_end);
1133   assert(row_end <= transform->ysize_);
1134   switch (transform->type_) {
1135     case SUBTRACT_GREEN:
1136       AddGreenToBlueAndRed(transform, row_start, row_end, out);
1137       break;
1138     case PREDICTOR_TRANSFORM:
1139       PredictorInverseTransform(transform, row_start, row_end, out);
1140       if (row_end != transform->ysize_) {
1141         // The last predicted row in this iteration will be the top-pred row
1142         // for the first row in next iteration.
1143         const int width = transform->xsize_;
1144         memcpy(out - width, out + (row_end - row_start - 1) * width,
1145                width * sizeof(*out));
1146       }
1147       break;
1148     case CROSS_COLOR_TRANSFORM:
1149       ColorSpaceInverseTransform(transform, row_start, row_end, out);
1150       break;
1151     case COLOR_INDEXING_TRANSFORM:
1152       if (in == out && transform->bits_ > 0) {
1153         // Move packed pixels to the end of unpacked region, so that unpacking
1154         // can occur seamlessly.
1155         // Also, note that this is the only transform that applies on
1156         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1157         // transforms work on effective width of xsize_.
1158         const int out_stride = (row_end - row_start) * transform->xsize_;
1159         const int in_stride = (row_end - row_start) *
1160             VP8LSubSampleSize(transform->xsize_, transform->bits_);
1161         uint32_t* const src = out + out_stride - in_stride;
1162         memmove(src, out, in_stride * sizeof(*src));
1163         ColorIndexInverseTransform(transform, row_start, row_end, src, out);
1164       } else {
1165         ColorIndexInverseTransform(transform, row_start, row_end, in, out);
1166       }
1167       break;
1168   }
1169 }
1170 
1171 //------------------------------------------------------------------------------
1172 // Color space conversion.
1173 
is_big_endian(void)1174 static int is_big_endian(void) {
1175   static const union {
1176     uint16_t w;
1177     uint8_t b[2];
1178   } tmp = { 1 };
1179   return (tmp.b[0] != 1);
1180 }
1181 
ConvertBGRAToRGB(const uint32_t * src,int num_pixels,uint8_t * dst)1182 static void ConvertBGRAToRGB(const uint32_t* src,
1183                              int num_pixels, uint8_t* dst) {
1184   const uint32_t* const src_end = src + num_pixels;
1185   while (src < src_end) {
1186     const uint32_t argb = *src++;
1187     *dst++ = (argb >> 16) & 0xff;
1188     *dst++ = (argb >>  8) & 0xff;
1189     *dst++ = (argb >>  0) & 0xff;
1190   }
1191 }
1192 
ConvertBGRAToRGBA(const uint32_t * src,int num_pixels,uint8_t * dst)1193 static void ConvertBGRAToRGBA(const uint32_t* src,
1194                               int num_pixels, uint8_t* dst) {
1195   const uint32_t* const src_end = src + num_pixels;
1196   while (src < src_end) {
1197     const uint32_t argb = *src++;
1198     *dst++ = (argb >> 16) & 0xff;
1199     *dst++ = (argb >>  8) & 0xff;
1200     *dst++ = (argb >>  0) & 0xff;
1201     *dst++ = (argb >> 24) & 0xff;
1202   }
1203 }
1204 
ConvertBGRAToRGBA4444(const uint32_t * src,int num_pixels,uint8_t * dst)1205 static void ConvertBGRAToRGBA4444(const uint32_t* src,
1206                                   int num_pixels, uint8_t* dst) {
1207   const uint32_t* const src_end = src + num_pixels;
1208   while (src < src_end) {
1209     const uint32_t argb = *src++;
1210     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
1211     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
1212 #ifdef WEBP_SWAP_16BIT_CSP
1213     *dst++ = ba;
1214     *dst++ = rg;
1215 #else
1216     *dst++ = rg;
1217     *dst++ = ba;
1218 #endif
1219   }
1220 }
1221 
ConvertBGRAToRGB565(const uint32_t * src,int num_pixels,uint8_t * dst)1222 static void ConvertBGRAToRGB565(const uint32_t* src,
1223                                 int num_pixels, uint8_t* dst) {
1224   const uint32_t* const src_end = src + num_pixels;
1225   while (src < src_end) {
1226     const uint32_t argb = *src++;
1227     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
1228     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
1229 #ifdef WEBP_SWAP_16BIT_CSP
1230     *dst++ = gb;
1231     *dst++ = rg;
1232 #else
1233     *dst++ = rg;
1234     *dst++ = gb;
1235 #endif
1236   }
1237 }
1238 
ConvertBGRAToBGR(const uint32_t * src,int num_pixels,uint8_t * dst)1239 static void ConvertBGRAToBGR(const uint32_t* src,
1240                              int num_pixels, uint8_t* dst) {
1241   const uint32_t* const src_end = src + num_pixels;
1242   while (src < src_end) {
1243     const uint32_t argb = *src++;
1244     *dst++ = (argb >>  0) & 0xff;
1245     *dst++ = (argb >>  8) & 0xff;
1246     *dst++ = (argb >> 16) & 0xff;
1247   }
1248 }
1249 
CopyOrSwap(const uint32_t * src,int num_pixels,uint8_t * dst,int swap_on_big_endian)1250 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
1251                        int swap_on_big_endian) {
1252   if (is_big_endian() == swap_on_big_endian) {
1253     const uint32_t* const src_end = src + num_pixels;
1254     while (src < src_end) {
1255       uint32_t argb = *src++;
1256 
1257 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
1258 #if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
1259       __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
1260       *(uint32_t*)dst = argb;
1261 #elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
1262       argb = _byteswap_ulong(argb);
1263       *(uint32_t*)dst = argb;
1264 #else
1265       dst[0] = (argb >> 24) & 0xff;
1266       dst[1] = (argb >> 16) & 0xff;
1267       dst[2] = (argb >>  8) & 0xff;
1268       dst[3] = (argb >>  0) & 0xff;
1269 #endif
1270 #else   // WEBP_REFERENCE_IMPLEMENTATION
1271       dst[0] = (argb >> 24) & 0xff;
1272       dst[1] = (argb >> 16) & 0xff;
1273       dst[2] = (argb >>  8) & 0xff;
1274       dst[3] = (argb >>  0) & 0xff;
1275 #endif
1276       dst += sizeof(argb);
1277     }
1278   } else {
1279     memcpy(dst, src, num_pixels * sizeof(*src));
1280   }
1281 }
1282 
VP8LConvertFromBGRA(const uint32_t * const in_data,int num_pixels,WEBP_CSP_MODE out_colorspace,uint8_t * const rgba)1283 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
1284                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
1285   switch (out_colorspace) {
1286     case MODE_RGB:
1287       ConvertBGRAToRGB(in_data, num_pixels, rgba);
1288       break;
1289     case MODE_RGBA:
1290       ConvertBGRAToRGBA(in_data, num_pixels, rgba);
1291       break;
1292     case MODE_rgbA:
1293       ConvertBGRAToRGBA(in_data, num_pixels, rgba);
1294       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1295       break;
1296     case MODE_BGR:
1297       ConvertBGRAToBGR(in_data, num_pixels, rgba);
1298       break;
1299     case MODE_BGRA:
1300       CopyOrSwap(in_data, num_pixels, rgba, 1);
1301       break;
1302     case MODE_bgrA:
1303       CopyOrSwap(in_data, num_pixels, rgba, 1);
1304       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1305       break;
1306     case MODE_ARGB:
1307       CopyOrSwap(in_data, num_pixels, rgba, 0);
1308       break;
1309     case MODE_Argb:
1310       CopyOrSwap(in_data, num_pixels, rgba, 0);
1311       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
1312       break;
1313     case MODE_RGBA_4444:
1314       ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1315       break;
1316     case MODE_rgbA_4444:
1317       ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1318       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
1319       break;
1320     case MODE_RGB_565:
1321       ConvertBGRAToRGB565(in_data, num_pixels, rgba);
1322       break;
1323     default:
1324       assert(0);          // Code flow should not reach here.
1325   }
1326 }
1327 
1328 //------------------------------------------------------------------------------
1329 
1330 #if defined(__cplusplus) || defined(c_plusplus)
1331 }    // extern "C"
1332 #endif
1333