1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // This code is licensed under the same terms as WebM:
4 // Software License Agreement: http://www.webmproject.org/license/software/
5 // Additional IP Rights Grant: http://www.webmproject.org/license/additional/
6 // -----------------------------------------------------------------------------
7 //
8 // Image transforms and color space conversion methods for lossless decoder.
9 //
10 // Authors: Vikas Arora (vikaas.arora@gmail.com)
11 // Jyrki Alakuijala (jyrki@google.com)
12 // Urvang Joshi (urvang@google.com)
13
14 #include "./dsp.h"
15
16 // Define the following if target arch is sure to have SSE2
17 // #define WEBP_TARGET_HAS_SSE2
18
19 #if defined(__cplusplus) || defined(c_plusplus)
20 extern "C" {
21 #endif
22
23 #if defined(WEBP_TARGET_HAS_SSE2)
24 #include <emmintrin.h>
25 #endif
26
27 #include <math.h>
28 #include <stdlib.h>
29 #include "./lossless.h"
30 #include "../dec/vp8li.h"
31 #include "./yuv.h"
32
33 #define MAX_DIFF_COST (1e30f)
34
35 // lookup table for small values of log2(int)
36 #define APPROX_LOG_MAX 4096
37 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
38 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
39 0.0000000000000000f, 0.0000000000000000f,
40 1.0000000000000000f, 1.5849625007211560f,
41 2.0000000000000000f, 2.3219280948873621f,
42 2.5849625007211560f, 2.8073549220576041f,
43 3.0000000000000000f, 3.1699250014423121f,
44 3.3219280948873621f, 3.4594316186372973f,
45 3.5849625007211560f, 3.7004397181410921f,
46 3.8073549220576041f, 3.9068905956085187f,
47 4.0000000000000000f, 4.0874628412503390f,
48 4.1699250014423121f, 4.2479275134435852f,
49 4.3219280948873626f, 4.3923174227787606f,
50 4.4594316186372973f, 4.5235619560570130f,
51 4.5849625007211560f, 4.6438561897747243f,
52 4.7004397181410917f, 4.7548875021634682f,
53 4.8073549220576037f, 4.8579809951275718f,
54 4.9068905956085187f, 4.9541963103868749f,
55 5.0000000000000000f, 5.0443941193584533f,
56 5.0874628412503390f, 5.1292830169449663f,
57 5.1699250014423121f, 5.2094533656289501f,
58 5.2479275134435852f, 5.2854022188622487f,
59 5.3219280948873626f, 5.3575520046180837f,
60 5.3923174227787606f, 5.4262647547020979f,
61 5.4594316186372973f, 5.4918530963296747f,
62 5.5235619560570130f, 5.5545888516776376f,
63 5.5849625007211560f, 5.6147098441152083f,
64 5.6438561897747243f, 5.6724253419714951f,
65 5.7004397181410917f, 5.7279204545631987f,
66 5.7548875021634682f, 5.7813597135246599f,
67 5.8073549220576037f, 5.8328900141647412f,
68 5.8579809951275718f, 5.8826430493618415f,
69 5.9068905956085187f, 5.9307373375628866f,
70 5.9541963103868749f, 5.9772799234999167f,
71 6.0000000000000000f, 6.0223678130284543f,
72 6.0443941193584533f, 6.0660891904577720f,
73 6.0874628412503390f, 6.1085244567781691f,
74 6.1292830169449663f, 6.1497471195046822f,
75 6.1699250014423121f, 6.1898245588800175f,
76 6.2094533656289501f, 6.2288186904958804f,
77 6.2479275134435852f, 6.2667865406949010f,
78 6.2854022188622487f, 6.3037807481771030f,
79 6.3219280948873626f, 6.3398500028846243f,
80 6.3575520046180837f, 6.3750394313469245f,
81 6.3923174227787606f, 6.4093909361377017f,
82 6.4262647547020979f, 6.4429434958487279f,
83 6.4594316186372973f, 6.4757334309663976f,
84 6.4918530963296747f, 6.5077946401986963f,
85 6.5235619560570130f, 6.5391588111080309f,
86 6.5545888516776376f, 6.5698556083309478f,
87 6.5849625007211560f, 6.5999128421871278f,
88 6.6147098441152083f, 6.6293566200796094f,
89 6.6438561897747243f, 6.6582114827517946f,
90 6.6724253419714951f, 6.6865005271832185f,
91 6.7004397181410917f, 6.7142455176661224f,
92 6.7279204545631987f, 6.7414669864011464f,
93 6.7548875021634682f, 6.7681843247769259f,
94 6.7813597135246599f, 6.7944158663501061f,
95 6.8073549220576037f, 6.8201789624151878f,
96 6.8328900141647412f, 6.8454900509443747f,
97 6.8579809951275718f, 6.8703647195834047f,
98 6.8826430493618415f, 6.8948177633079437f,
99 6.9068905956085187f, 6.9188632372745946f,
100 6.9307373375628866f, 6.9425145053392398f,
101 6.9541963103868749f, 6.9657842846620869f,
102 6.9772799234999167f, 6.9886846867721654f,
103 7.0000000000000000f, 7.0112272554232539f,
104 7.0223678130284543f, 7.0334230015374501f,
105 7.0443941193584533f, 7.0552824355011898f,
106 7.0660891904577720f, 7.0768155970508308f,
107 7.0874628412503390f, 7.0980320829605263f,
108 7.1085244567781691f, 7.1189410727235076f,
109 7.1292830169449663f, 7.1395513523987936f,
110 7.1497471195046822f, 7.1598713367783890f,
111 7.1699250014423121f, 7.1799090900149344f,
112 7.1898245588800175f, 7.1996723448363644f,
113 7.2094533656289501f, 7.2191685204621611f,
114 7.2288186904958804f, 7.2384047393250785f,
115 7.2479275134435852f, 7.2573878426926521f,
116 7.2667865406949010f, 7.2761244052742375f,
117 7.2854022188622487f, 7.2946207488916270f,
118 7.3037807481771030f, 7.3128829552843557f,
119 7.3219280948873626f, 7.3309168781146167f,
120 7.3398500028846243f, 7.3487281542310771f,
121 7.3575520046180837f, 7.3663222142458160f,
122 7.3750394313469245f, 7.3837042924740519f,
123 7.3923174227787606f, 7.4008794362821843f,
124 7.4093909361377017f, 7.4178525148858982f,
125 7.4262647547020979f, 7.4346282276367245f,
126 7.4429434958487279f, 7.4512111118323289f,
127 7.4594316186372973f, 7.4676055500829976f,
128 7.4757334309663976f, 7.4838157772642563f,
129 7.4918530963296747f, 7.4998458870832056f,
130 7.5077946401986963f, 7.5156998382840427f,
131 7.5235619560570130f, 7.5313814605163118f,
132 7.5391588111080309f, 7.5468944598876364f,
133 7.5545888516776376f, 7.5622424242210728f,
134 7.5698556083309478f, 7.5774288280357486f,
135 7.5849625007211560f, 7.5924570372680806f,
136 7.5999128421871278f, 7.6073303137496104f,
137 7.6147098441152083f, 7.6220518194563764f,
138 7.6293566200796094f, 7.6366246205436487f,
139 7.6438561897747243f, 7.6510516911789281f,
140 7.6582114827517946f, 7.6653359171851764f,
141 7.6724253419714951f, 7.6794800995054464f,
142 7.6865005271832185f, 7.6934869574993252f,
143 7.7004397181410917f, 7.7073591320808825f,
144 7.7142455176661224f, 7.7210991887071855f,
145 7.7279204545631987f, 7.7347096202258383f,
146 7.7414669864011464f, 7.7481928495894605f,
147 7.7548875021634682f, 7.7615512324444795f,
148 7.7681843247769259f, 7.7747870596011736f,
149 7.7813597135246599f, 7.7879025593914317f,
150 7.7944158663501061f, 7.8008998999203047f,
151 7.8073549220576037f, 7.8137811912170374f,
152 7.8201789624151878f, 7.8265484872909150f,
153 7.8328900141647412f, 7.8392037880969436f,
154 7.8454900509443747f, 7.8517490414160571f,
155 7.8579809951275718f, 7.8641861446542797f,
156 7.8703647195834047f, 7.8765169465649993f,
157 7.8826430493618415f, 7.8887432488982591f,
158 7.8948177633079437f, 7.9008668079807486f,
159 7.9068905956085187f, 7.9128893362299619f,
160 7.9188632372745946f, 7.9248125036057812f,
161 7.9307373375628866f, 7.9366379390025709f,
162 7.9425145053392398f, 7.9483672315846778f,
163 7.9541963103868749f, 7.9600019320680805f,
164 7.9657842846620869f, 7.9715435539507719f,
165 7.9772799234999167f, 7.9829935746943103f,
166 7.9886846867721654f, 7.9943534368588577f
167 };
168
169 const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
170 0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f,
171 8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f,
172 24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f,
173 43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f,
174 64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f,
175 86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f,
176 110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
177 134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
178 160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
179 186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
180 212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
181 240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
182 268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
183 296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
184 325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
185 354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
186 384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
187 413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
188 444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
189 474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
190 505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
191 536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
192 568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
193 600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
194 632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
195 664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
196 696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
197 729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
198 762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
199 795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
200 828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
201 862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
202 896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
203 929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
204 963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
205 998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
206 1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
207 1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
208 1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
209 1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
210 1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
211 1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
212 1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
213 1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
214 1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
215 1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
216 1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
217 1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
218 1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
219 1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
220 1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
221 1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
222 1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
223 1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
224 1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
225 1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
226 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
227 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
228 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
229 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
230 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
231 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
232 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
233 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
234 };
235
VP8LFastSLog2Slow(int v)236 float VP8LFastSLog2Slow(int v) {
237 assert(v >= LOG_LOOKUP_IDX_MAX);
238 if (v < APPROX_LOG_MAX) {
239 int log_cnt = 0;
240 const float v_f = (float)v;
241 while (v >= LOG_LOOKUP_IDX_MAX) {
242 ++log_cnt;
243 v = v >> 1;
244 }
245 return v_f * (kLog2Table[v] + log_cnt);
246 } else {
247 return (float)(LOG_2_RECIPROCAL * v * log((double)v));
248 }
249 }
250
VP8LFastLog2Slow(int v)251 float VP8LFastLog2Slow(int v) {
252 assert(v >= LOG_LOOKUP_IDX_MAX);
253 if (v < APPROX_LOG_MAX) {
254 int log_cnt = 0;
255 while (v >= LOG_LOOKUP_IDX_MAX) {
256 ++log_cnt;
257 v = v >> 1;
258 }
259 return kLog2Table[v] + log_cnt;
260 } else {
261 return (float)(LOG_2_RECIPROCAL * log((double)v));
262 }
263 }
264
265 //------------------------------------------------------------------------------
266 // Image transforms.
267
268 // In-place sum of each component with mod 256.
AddPixelsEq(uint32_t * a,uint32_t b)269 static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
270 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
271 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
272 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
273 }
274
Average2(uint32_t a0,uint32_t a1)275 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
276 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
277 }
278
Average3(uint32_t a0,uint32_t a1,uint32_t a2)279 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
280 return Average2(Average2(a0, a2), a1);
281 }
282
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)283 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
284 uint32_t a2, uint32_t a3) {
285 return Average2(Average2(a0, a1), Average2(a2, a3));
286 }
287
288 #if defined(WEBP_TARGET_HAS_SSE2)
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)289 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
290 uint32_t c2) {
291 const __m128i zero = _mm_setzero_si128();
292 const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
293 const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
294 const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
295 const __m128i V1 = _mm_add_epi16(C0, C1);
296 const __m128i V2 = _mm_sub_epi16(V1, C2);
297 const __m128i b = _mm_packus_epi16(V2, V2);
298 const uint32_t output = _mm_cvtsi128_si32(b);
299 return output;
300 }
301
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)302 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
303 uint32_t c2) {
304 const uint32_t ave = Average2(c0, c1);
305 const __m128i zero = _mm_setzero_si128();
306 const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero);
307 const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
308 const __m128i A1 = _mm_sub_epi16(A0, B0);
309 const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
310 const __m128i A2 = _mm_sub_epi16(A1, BgtA);
311 const __m128i A3 = _mm_srai_epi16(A2, 1);
312 const __m128i A4 = _mm_add_epi16(A0, A3);
313 const __m128i A5 = _mm_packus_epi16(A4, A4);
314 const uint32_t output = _mm_cvtsi128_si32(A5);
315 return output;
316 }
317
Select(uint32_t a,uint32_t b,uint32_t c)318 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
319 int pa_minus_pb;
320 const __m128i zero = _mm_setzero_si128();
321 const __m128i A0 = _mm_cvtsi32_si128(a);
322 const __m128i B0 = _mm_cvtsi32_si128(b);
323 const __m128i C0 = _mm_cvtsi32_si128(c);
324 const __m128i AC0 = _mm_subs_epu8(A0, C0);
325 const __m128i CA0 = _mm_subs_epu8(C0, A0);
326 const __m128i BC0 = _mm_subs_epu8(B0, C0);
327 const __m128i CB0 = _mm_subs_epu8(C0, B0);
328 const __m128i AC = _mm_or_si128(AC0, CA0);
329 const __m128i BC = _mm_or_si128(BC0, CB0);
330 const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c|
331 const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c|
332 const __m128i diff = _mm_sub_epi16(pb, pa);
333 {
334 int16_t out[8];
335 _mm_storeu_si128((__m128i*)out, diff);
336 pa_minus_pb = out[0] + out[1] + out[2] + out[3];
337 }
338 return (pa_minus_pb <= 0) ? a : b;
339 }
340
341 #else
342
Clip255(uint32_t a)343 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
344 if (a < 256) {
345 return a;
346 }
347 // return 0, when a is a negative integer.
348 // return 255, when a is positive.
349 return ~a >> 24;
350 }
351
AddSubtractComponentFull(int a,int b,int c)352 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
353 return Clip255(a + b - c);
354 }
355
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)356 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
357 uint32_t c2) {
358 const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
359 const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
360 (c1 >> 16) & 0xff,
361 (c2 >> 16) & 0xff);
362 const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
363 (c1 >> 8) & 0xff,
364 (c2 >> 8) & 0xff);
365 const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
366 return (a << 24) | (r << 16) | (g << 8) | b;
367 }
368
AddSubtractComponentHalf(int a,int b)369 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
370 return Clip255(a + (a - b) / 2);
371 }
372
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)373 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
374 uint32_t c2) {
375 const uint32_t ave = Average2(c0, c1);
376 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
377 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
378 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
379 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
380 return (a << 24) | (r << 16) | (g << 8) | b;
381 }
382
Sub3(int a,int b,int c)383 static WEBP_INLINE int Sub3(int a, int b, int c) {
384 const int pb = b - c;
385 const int pa = a - c;
386 return abs(pb) - abs(pa);
387 }
388
Select(uint32_t a,uint32_t b,uint32_t c)389 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
390 const int pa_minus_pb =
391 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) +
392 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
393 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
394 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
395 return (pa_minus_pb <= 0) ? a : b;
396 }
397 #endif
398
399 //------------------------------------------------------------------------------
400 // Predictors
401
Predictor0(uint32_t left,const uint32_t * const top)402 static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
403 (void)top;
404 (void)left;
405 return ARGB_BLACK;
406 }
Predictor1(uint32_t left,const uint32_t * const top)407 static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
408 (void)top;
409 return left;
410 }
Predictor2(uint32_t left,const uint32_t * const top)411 static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
412 (void)left;
413 return top[0];
414 }
Predictor3(uint32_t left,const uint32_t * const top)415 static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
416 (void)left;
417 return top[1];
418 }
Predictor4(uint32_t left,const uint32_t * const top)419 static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
420 (void)left;
421 return top[-1];
422 }
Predictor5(uint32_t left,const uint32_t * const top)423 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
424 const uint32_t pred = Average3(left, top[0], top[1]);
425 return pred;
426 }
Predictor6(uint32_t left,const uint32_t * const top)427 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
428 const uint32_t pred = Average2(left, top[-1]);
429 return pred;
430 }
Predictor7(uint32_t left,const uint32_t * const top)431 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
432 const uint32_t pred = Average2(left, top[0]);
433 return pred;
434 }
Predictor8(uint32_t left,const uint32_t * const top)435 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
436 const uint32_t pred = Average2(top[-1], top[0]);
437 (void)left;
438 return pred;
439 }
Predictor9(uint32_t left,const uint32_t * const top)440 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
441 const uint32_t pred = Average2(top[0], top[1]);
442 (void)left;
443 return pred;
444 }
Predictor10(uint32_t left,const uint32_t * const top)445 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
446 const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
447 return pred;
448 }
Predictor11(uint32_t left,const uint32_t * const top)449 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
450 const uint32_t pred = Select(top[0], left, top[-1]);
451 return pred;
452 }
Predictor12(uint32_t left,const uint32_t * const top)453 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
454 const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
455 return pred;
456 }
Predictor13(uint32_t left,const uint32_t * const top)457 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
458 const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
459 return pred;
460 }
461
462 typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
463 static const PredictorFunc kPredictors[16] = {
464 Predictor0, Predictor1, Predictor2, Predictor3,
465 Predictor4, Predictor5, Predictor6, Predictor7,
466 Predictor8, Predictor9, Predictor10, Predictor11,
467 Predictor12, Predictor13,
468 Predictor0, Predictor0 // <- padding security sentinels
469 };
470
471 // TODO(vikasa): Replace 256 etc with defines.
PredictionCostSpatial(const int * counts,int weight_0,double exp_val)472 static float PredictionCostSpatial(const int* counts,
473 int weight_0, double exp_val) {
474 const int significant_symbols = 16;
475 const double exp_decay_factor = 0.6;
476 double bits = weight_0 * counts[0];
477 int i;
478 for (i = 1; i < significant_symbols; ++i) {
479 bits += exp_val * (counts[i] + counts[256 - i]);
480 exp_val *= exp_decay_factor;
481 }
482 return (float)(-0.1 * bits);
483 }
484
485 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
CombinedShannonEntropy(const int * const X,const int * const Y,int n)486 static float CombinedShannonEntropy(const int* const X,
487 const int* const Y, int n) {
488 int i;
489 double retval = 0.;
490 int sumX = 0, sumXY = 0;
491 for (i = 0; i < n; ++i) {
492 const int x = X[i];
493 const int xy = X[i] + Y[i];
494 if (x != 0) {
495 sumX += x;
496 retval -= VP8LFastSLog2(x);
497 }
498 if (xy != 0) {
499 sumXY += xy;
500 retval -= VP8LFastSLog2(xy);
501 }
502 }
503 retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
504 return (float)retval;
505 }
506
PredictionCostSpatialHistogram(int accumulated[4][256],int tile[4][256])507 static float PredictionCostSpatialHistogram(int accumulated[4][256],
508 int tile[4][256]) {
509 int i;
510 double retval = 0;
511 for (i = 0; i < 4; ++i) {
512 const double kExpValue = 0.94;
513 retval += PredictionCostSpatial(tile[i], 1, kExpValue);
514 retval += CombinedShannonEntropy(tile[i], accumulated[i], 256);
515 }
516 return (float)retval;
517 }
518
GetBestPredictorForTile(int width,int height,int tile_x,int tile_y,int bits,int accumulated[4][256],const uint32_t * const argb_scratch)519 static int GetBestPredictorForTile(int width, int height,
520 int tile_x, int tile_y, int bits,
521 int accumulated[4][256],
522 const uint32_t* const argb_scratch) {
523 const int kNumPredModes = 14;
524 const int col_start = tile_x << bits;
525 const int row_start = tile_y << bits;
526 const int tile_size = 1 << bits;
527 const int ymax = (tile_size <= height - row_start) ?
528 tile_size : height - row_start;
529 const int xmax = (tile_size <= width - col_start) ?
530 tile_size : width - col_start;
531 int histo[4][256];
532 float best_diff = MAX_DIFF_COST;
533 int best_mode = 0;
534
535 int mode;
536 for (mode = 0; mode < kNumPredModes; ++mode) {
537 const uint32_t* current_row = argb_scratch;
538 const PredictorFunc pred_func = kPredictors[mode];
539 float cur_diff;
540 int y;
541 memset(&histo[0][0], 0, sizeof(histo));
542 for (y = 0; y < ymax; ++y) {
543 int x;
544 const int row = row_start + y;
545 const uint32_t* const upper_row = current_row;
546 current_row = upper_row + width;
547 for (x = 0; x < xmax; ++x) {
548 const int col = col_start + x;
549 uint32_t predict;
550 uint32_t predict_diff;
551 if (row == 0) {
552 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
553 } else if (col == 0) {
554 predict = upper_row[col]; // Top.
555 } else {
556 predict = pred_func(current_row[col - 1], upper_row + col);
557 }
558 predict_diff = VP8LSubPixels(current_row[col], predict);
559 ++histo[0][predict_diff >> 24];
560 ++histo[1][((predict_diff >> 16) & 0xff)];
561 ++histo[2][((predict_diff >> 8) & 0xff)];
562 ++histo[3][(predict_diff & 0xff)];
563 }
564 }
565 cur_diff = PredictionCostSpatialHistogram(accumulated, histo);
566 if (cur_diff < best_diff) {
567 best_diff = cur_diff;
568 best_mode = mode;
569 }
570 }
571
572 return best_mode;
573 }
574
CopyTileWithPrediction(int width,int height,int tile_x,int tile_y,int bits,int mode,const uint32_t * const argb_scratch,uint32_t * const argb)575 static void CopyTileWithPrediction(int width, int height,
576 int tile_x, int tile_y, int bits, int mode,
577 const uint32_t* const argb_scratch,
578 uint32_t* const argb) {
579 const int col_start = tile_x << bits;
580 const int row_start = tile_y << bits;
581 const int tile_size = 1 << bits;
582 const int ymax = (tile_size <= height - row_start) ?
583 tile_size : height - row_start;
584 const int xmax = (tile_size <= width - col_start) ?
585 tile_size : width - col_start;
586 const PredictorFunc pred_func = kPredictors[mode];
587 const uint32_t* current_row = argb_scratch;
588
589 int y;
590 for (y = 0; y < ymax; ++y) {
591 int x;
592 const int row = row_start + y;
593 const uint32_t* const upper_row = current_row;
594 current_row = upper_row + width;
595 for (x = 0; x < xmax; ++x) {
596 const int col = col_start + x;
597 const int pix = row * width + col;
598 uint32_t predict;
599 if (row == 0) {
600 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left.
601 } else if (col == 0) {
602 predict = upper_row[col]; // Top.
603 } else {
604 predict = pred_func(current_row[col - 1], upper_row + col);
605 }
606 argb[pix] = VP8LSubPixels(current_row[col], predict);
607 }
608 }
609 }
610
VP8LResidualImage(int width,int height,int bits,uint32_t * const argb,uint32_t * const argb_scratch,uint32_t * const image)611 void VP8LResidualImage(int width, int height, int bits,
612 uint32_t* const argb, uint32_t* const argb_scratch,
613 uint32_t* const image) {
614 const int max_tile_size = 1 << bits;
615 const int tiles_per_row = VP8LSubSampleSize(width, bits);
616 const int tiles_per_col = VP8LSubSampleSize(height, bits);
617 uint32_t* const upper_row = argb_scratch;
618 uint32_t* const current_tile_rows = argb_scratch + width;
619 int tile_y;
620 int histo[4][256];
621 memset(histo, 0, sizeof(histo));
622 for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
623 const int tile_y_offset = tile_y * max_tile_size;
624 const int this_tile_height =
625 (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
626 int tile_x;
627 if (tile_y > 0) {
628 memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
629 width * sizeof(*upper_row));
630 }
631 memcpy(current_tile_rows, &argb[tile_y_offset * width],
632 this_tile_height * width * sizeof(*current_tile_rows));
633 for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
634 int pred;
635 int y;
636 const int tile_x_offset = tile_x * max_tile_size;
637 int all_x_max = tile_x_offset + max_tile_size;
638 if (all_x_max > width) {
639 all_x_max = width;
640 }
641 pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo,
642 argb_scratch);
643 image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
644 CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
645 argb_scratch, argb);
646 for (y = 0; y < max_tile_size; ++y) {
647 int ix;
648 int all_x;
649 int all_y = tile_y_offset + y;
650 if (all_y >= height) {
651 break;
652 }
653 ix = all_y * width + tile_x_offset;
654 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
655 const uint32_t a = argb[ix];
656 ++histo[0][a >> 24];
657 ++histo[1][((a >> 16) & 0xff)];
658 ++histo[2][((a >> 8) & 0xff)];
659 ++histo[3][(a & 0xff)];
660 }
661 }
662 }
663 }
664 }
665
666 // Inverse prediction.
PredictorInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)667 static void PredictorInverseTransform(const VP8LTransform* const transform,
668 int y_start, int y_end, uint32_t* data) {
669 const int width = transform->xsize_;
670 if (y_start == 0) { // First Row follows the L (mode=1) mode.
671 int x;
672 const uint32_t pred0 = Predictor0(data[-1], NULL);
673 AddPixelsEq(data, pred0);
674 for (x = 1; x < width; ++x) {
675 const uint32_t pred1 = Predictor1(data[x - 1], NULL);
676 AddPixelsEq(data + x, pred1);
677 }
678 data += width;
679 ++y_start;
680 }
681
682 {
683 int y = y_start;
684 const int mask = (1 << transform->bits_) - 1;
685 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
686 const uint32_t* pred_mode_base =
687 transform->data_ + (y >> transform->bits_) * tiles_per_row;
688
689 while (y < y_end) {
690 int x;
691 const uint32_t pred2 = Predictor2(data[-1], data - width);
692 const uint32_t* pred_mode_src = pred_mode_base;
693 PredictorFunc pred_func;
694
695 // First pixel follows the T (mode=2) mode.
696 AddPixelsEq(data, pred2);
697
698 // .. the rest:
699 pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
700 for (x = 1; x < width; ++x) {
701 uint32_t pred;
702 if ((x & mask) == 0) { // start of tile. Read predictor function.
703 pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
704 }
705 pred = pred_func(data[x - 1], data + x - width);
706 AddPixelsEq(data + x, pred);
707 }
708 data += width;
709 ++y;
710 if ((y & mask) == 0) { // Use the same mask, since tiles are squares.
711 pred_mode_base += tiles_per_row;
712 }
713 }
714 }
715 }
716
VP8LSubtractGreenFromBlueAndRed(uint32_t * argb_data,int num_pixs)717 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
718 int i = 0;
719 #if defined(WEBP_TARGET_HAS_SSE2)
720 const __m128i mask = _mm_set1_epi32(0x0000ff00);
721 for (; i + 4 < num_pixs; i += 4) {
722 const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
723 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
724 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
725 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
726 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
727 const __m128i out = _mm_sub_epi8(in, in_0g0g);
728 _mm_storeu_si128((__m128i*)&argb_data[i], out);
729 }
730 // fallthrough and finish off with plain-C
731 #endif
732 for (; i < num_pixs; ++i) {
733 const uint32_t argb = argb_data[i];
734 const uint32_t green = (argb >> 8) & 0xff;
735 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
736 const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
737 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
738 }
739 }
740
741 // Add green to blue and red channels (i.e. perform the inverse transform of
742 // 'subtract green').
AddGreenToBlueAndRed(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)743 static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
744 int y_start, int y_end, uint32_t* data) {
745 const int width = transform->xsize_;
746 const uint32_t* const data_end = data + (y_end - y_start) * width;
747 #if defined(WEBP_TARGET_HAS_SSE2)
748 const __m128i mask = _mm_set1_epi32(0x0000ff00);
749 for (; data + 4 < data_end; data += 4) {
750 const __m128i in = _mm_loadu_si128((__m128i*)data);
751 const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
752 const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
753 const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
754 const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
755 const __m128i out = _mm_add_epi8(in, in_0g0g);
756 _mm_storeu_si128((__m128i*)data, out);
757 }
758 // fallthrough and finish off with plain-C
759 #endif
760 while (data < data_end) {
761 const uint32_t argb = *data;
762 const uint32_t green = ((argb >> 8) & 0xff);
763 uint32_t red_blue = (argb & 0x00ff00ffu);
764 red_blue += (green << 16) | green;
765 red_blue &= 0x00ff00ffu;
766 *data++ = (argb & 0xff00ff00u) | red_blue;
767 }
768 }
769
770 typedef struct {
771 // Note: the members are uint8_t, so that any negative values are
772 // automatically converted to "mod 256" values.
773 uint8_t green_to_red_;
774 uint8_t green_to_blue_;
775 uint8_t red_to_blue_;
776 } Multipliers;
777
MultipliersClear(Multipliers * m)778 static WEBP_INLINE void MultipliersClear(Multipliers* m) {
779 m->green_to_red_ = 0;
780 m->green_to_blue_ = 0;
781 m->red_to_blue_ = 0;
782 }
783
ColorTransformDelta(int8_t color_pred,int8_t color)784 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
785 int8_t color) {
786 return (uint32_t)((int)(color_pred) * color) >> 5;
787 }
788
ColorCodeToMultipliers(uint32_t color_code,Multipliers * const m)789 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
790 Multipliers* const m) {
791 m->green_to_red_ = (color_code >> 0) & 0xff;
792 m->green_to_blue_ = (color_code >> 8) & 0xff;
793 m->red_to_blue_ = (color_code >> 16) & 0xff;
794 }
795
MultipliersToColorCode(Multipliers * const m)796 static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) {
797 return 0xff000000u |
798 ((uint32_t)(m->red_to_blue_) << 16) |
799 ((uint32_t)(m->green_to_blue_) << 8) |
800 m->green_to_red_;
801 }
802
TransformColor(const Multipliers * const m,uint32_t argb,int inverse)803 static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
804 uint32_t argb, int inverse) {
805 const uint32_t green = argb >> 8;
806 const uint32_t red = argb >> 16;
807 uint32_t new_red = red;
808 uint32_t new_blue = argb;
809
810 if (inverse) {
811 new_red += ColorTransformDelta(m->green_to_red_, green);
812 new_red &= 0xff;
813 new_blue += ColorTransformDelta(m->green_to_blue_, green);
814 new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
815 new_blue &= 0xff;
816 } else {
817 new_red -= ColorTransformDelta(m->green_to_red_, green);
818 new_red &= 0xff;
819 new_blue -= ColorTransformDelta(m->green_to_blue_, green);
820 new_blue -= ColorTransformDelta(m->red_to_blue_, red);
821 new_blue &= 0xff;
822 }
823 return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
824 }
825
TransformColorRed(uint8_t green_to_red,uint32_t argb)826 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
827 uint32_t argb) {
828 const uint32_t green = argb >> 8;
829 uint32_t new_red = argb >> 16;
830 new_red -= ColorTransformDelta(green_to_red, green);
831 return (new_red & 0xff);
832 }
833
TransformColorBlue(uint8_t green_to_blue,uint8_t red_to_blue,uint32_t argb)834 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
835 uint8_t red_to_blue,
836 uint32_t argb) {
837 const uint32_t green = argb >> 8;
838 const uint32_t red = argb >> 16;
839 uint8_t new_blue = argb;
840 new_blue -= ColorTransformDelta(green_to_blue, green);
841 new_blue -= ColorTransformDelta(red_to_blue, red);
842 return (new_blue & 0xff);
843 }
844
SkipRepeatedPixels(const uint32_t * const argb,int ix,int xsize)845 static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
846 int ix, int xsize) {
847 const uint32_t v = argb[ix];
848 if (ix >= xsize + 3) {
849 if (v == argb[ix - xsize] &&
850 argb[ix - 1] == argb[ix - xsize - 1] &&
851 argb[ix - 2] == argb[ix - xsize - 2] &&
852 argb[ix - 3] == argb[ix - xsize - 3]) {
853 return 1;
854 }
855 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
856 } else if (ix >= 3) {
857 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
858 }
859 return 0;
860 }
861
PredictionCostCrossColor(const int accumulated[256],const int counts[256])862 static float PredictionCostCrossColor(const int accumulated[256],
863 const int counts[256]) {
864 // Favor low entropy, locally and globally.
865 // Favor small absolute values for PredictionCostSpatial
866 static const double kExpValue = 2.4;
867 return CombinedShannonEntropy(counts, accumulated, 256) +
868 PredictionCostSpatial(counts, 3, kExpValue);
869 }
870
GetBestColorTransformForTile(int tile_x,int tile_y,int bits,Multipliers prevX,Multipliers prevY,int step,int xsize,int ysize,int * accumulated_red_histo,int * accumulated_blue_histo,const uint32_t * const argb)871 static Multipliers GetBestColorTransformForTile(
872 int tile_x, int tile_y, int bits,
873 Multipliers prevX,
874 Multipliers prevY,
875 int step, int xsize, int ysize,
876 int* accumulated_red_histo,
877 int* accumulated_blue_histo,
878 const uint32_t* const argb) {
879 float best_diff = MAX_DIFF_COST;
880 float cur_diff;
881 const int halfstep = step / 2;
882 const int max_tile_size = 1 << bits;
883 const int tile_y_offset = tile_y * max_tile_size;
884 const int tile_x_offset = tile_x * max_tile_size;
885 int green_to_red;
886 int green_to_blue;
887 int red_to_blue;
888 int all_x_max = tile_x_offset + max_tile_size;
889 int all_y_max = tile_y_offset + max_tile_size;
890 Multipliers best_tx;
891 MultipliersClear(&best_tx);
892 if (all_x_max > xsize) {
893 all_x_max = xsize;
894 }
895 if (all_y_max > ysize) {
896 all_y_max = ysize;
897 }
898
899 for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
900 int histo[256] = { 0 };
901 int all_y;
902
903 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
904 int ix = all_y * xsize + tile_x_offset;
905 int all_x;
906 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
907 if (SkipRepeatedPixels(argb, ix, xsize)) {
908 continue;
909 }
910 ++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
911 }
912 }
913 cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
914 if ((uint8_t)green_to_red == prevX.green_to_red_) {
915 cur_diff -= 3; // favor keeping the areas locally similar
916 }
917 if ((uint8_t)green_to_red == prevY.green_to_red_) {
918 cur_diff -= 3; // favor keeping the areas locally similar
919 }
920 if (green_to_red == 0) {
921 cur_diff -= 3;
922 }
923 if (cur_diff < best_diff) {
924 best_diff = cur_diff;
925 best_tx.green_to_red_ = green_to_red;
926 }
927 }
928 best_diff = MAX_DIFF_COST;
929 for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
930 for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
931 int all_y;
932 int histo[256] = { 0 };
933 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
934 int all_x;
935 int ix = all_y * xsize + tile_x_offset;
936 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
937 if (SkipRepeatedPixels(argb, ix, xsize)) {
938 continue;
939 }
940 ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
941 }
942 }
943 cur_diff =
944 PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
945 if ((uint8_t)green_to_blue == prevX.green_to_blue_) {
946 cur_diff -= 3; // favor keeping the areas locally similar
947 }
948 if ((uint8_t)green_to_blue == prevY.green_to_blue_) {
949 cur_diff -= 3; // favor keeping the areas locally similar
950 }
951 if ((uint8_t)red_to_blue == prevX.red_to_blue_) {
952 cur_diff -= 3; // favor keeping the areas locally similar
953 }
954 if ((uint8_t)red_to_blue == prevY.red_to_blue_) {
955 cur_diff -= 3; // favor keeping the areas locally similar
956 }
957 if (green_to_blue == 0) {
958 cur_diff -= 3;
959 }
960 if (red_to_blue == 0) {
961 cur_diff -= 3;
962 }
963 if (cur_diff < best_diff) {
964 best_diff = cur_diff;
965 best_tx.green_to_blue_ = green_to_blue;
966 best_tx.red_to_blue_ = red_to_blue;
967 }
968 }
969 }
970 return best_tx;
971 }
972
CopyTileWithColorTransform(int xsize,int ysize,int tile_x,int tile_y,int bits,Multipliers color_transform,uint32_t * const argb)973 static void CopyTileWithColorTransform(int xsize, int ysize,
974 int tile_x, int tile_y, int bits,
975 Multipliers color_transform,
976 uint32_t* const argb) {
977 int y;
978 int xscan = 1 << bits;
979 int yscan = 1 << bits;
980 tile_x <<= bits;
981 tile_y <<= bits;
982 if (xscan > xsize - tile_x) {
983 xscan = xsize - tile_x;
984 }
985 if (yscan > ysize - tile_y) {
986 yscan = ysize - tile_y;
987 }
988 yscan += tile_y;
989 for (y = tile_y; y < yscan; ++y) {
990 int ix = y * xsize + tile_x;
991 const int end_ix = ix + xscan;
992 for (; ix < end_ix; ++ix) {
993 argb[ix] = TransformColor(&color_transform, argb[ix], 0);
994 }
995 }
996 }
997
VP8LColorSpaceTransform(int width,int height,int bits,int step,uint32_t * const argb,uint32_t * image)998 void VP8LColorSpaceTransform(int width, int height, int bits, int step,
999 uint32_t* const argb, uint32_t* image) {
1000 const int max_tile_size = 1 << bits;
1001 int tile_xsize = VP8LSubSampleSize(width, bits);
1002 int tile_ysize = VP8LSubSampleSize(height, bits);
1003 int accumulated_red_histo[256] = { 0 };
1004 int accumulated_blue_histo[256] = { 0 };
1005 int tile_y;
1006 int tile_x;
1007 Multipliers prevX;
1008 Multipliers prevY;
1009 MultipliersClear(&prevY);
1010 MultipliersClear(&prevX);
1011 for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
1012 for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
1013 Multipliers color_transform;
1014 int all_x_max;
1015 int y;
1016 const int tile_y_offset = tile_y * max_tile_size;
1017 const int tile_x_offset = tile_x * max_tile_size;
1018 if (tile_y != 0) {
1019 ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
1020 ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x],
1021 &prevY);
1022 } else if (tile_x != 0) {
1023 ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
1024 }
1025 color_transform =
1026 GetBestColorTransformForTile(tile_x, tile_y, bits,
1027 prevX, prevY,
1028 step, width, height,
1029 &accumulated_red_histo[0],
1030 &accumulated_blue_histo[0],
1031 argb);
1032 image[tile_y * tile_xsize + tile_x] =
1033 MultipliersToColorCode(&color_transform);
1034 CopyTileWithColorTransform(width, height, tile_x, tile_y, bits,
1035 color_transform, argb);
1036
1037 // Gather accumulated histogram data.
1038 all_x_max = tile_x_offset + max_tile_size;
1039 if (all_x_max > width) {
1040 all_x_max = width;
1041 }
1042 for (y = 0; y < max_tile_size; ++y) {
1043 int ix;
1044 int all_x;
1045 int all_y = tile_y_offset + y;
1046 if (all_y >= height) {
1047 break;
1048 }
1049 ix = all_y * width + tile_x_offset;
1050 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
1051 if (ix >= 2 &&
1052 argb[ix] == argb[ix - 2] &&
1053 argb[ix] == argb[ix - 1]) {
1054 continue; // repeated pixels are handled by backward references
1055 }
1056 if (ix >= width + 2 &&
1057 argb[ix - 2] == argb[ix - width - 2] &&
1058 argb[ix - 1] == argb[ix - width - 1] &&
1059 argb[ix] == argb[ix - width]) {
1060 continue; // repeated pixels are handled by backward references
1061 }
1062 ++accumulated_red_histo[(argb[ix] >> 16) & 0xff];
1063 ++accumulated_blue_histo[argb[ix] & 0xff];
1064 }
1065 }
1066 }
1067 }
1068 }
1069
1070 // Color space inverse transform.
ColorSpaceInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,uint32_t * data)1071 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
1072 int y_start, int y_end, uint32_t* data) {
1073 const int width = transform->xsize_;
1074 const int mask = (1 << transform->bits_) - 1;
1075 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
1076 int y = y_start;
1077 const uint32_t* pred_row =
1078 transform->data_ + (y >> transform->bits_) * tiles_per_row;
1079
1080 while (y < y_end) {
1081 const uint32_t* pred = pred_row;
1082 Multipliers m = { 0, 0, 0 };
1083 int x;
1084
1085 for (x = 0; x < width; ++x) {
1086 if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m);
1087 data[x] = TransformColor(&m, data[x], 1);
1088 }
1089 data += width;
1090 ++y;
1091 if ((y & mask) == 0) pred_row += tiles_per_row;;
1092 }
1093 }
1094
1095 // Separate out pixels packed together using pixel-bundling.
ColorIndexInverseTransform(const VP8LTransform * const transform,int y_start,int y_end,const uint32_t * src,uint32_t * dst)1096 static void ColorIndexInverseTransform(
1097 const VP8LTransform* const transform,
1098 int y_start, int y_end, const uint32_t* src, uint32_t* dst) {
1099 int y;
1100 const int bits_per_pixel = 8 >> transform->bits_;
1101 const int width = transform->xsize_;
1102 const uint32_t* const color_map = transform->data_;
1103 if (bits_per_pixel < 8) {
1104 const int pixels_per_byte = 1 << transform->bits_;
1105 const int count_mask = pixels_per_byte - 1;
1106 const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
1107 for (y = y_start; y < y_end; ++y) {
1108 uint32_t packed_pixels = 0;
1109 int x;
1110 for (x = 0; x < width; ++x) {
1111 // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
1112 // increments of x. Fortunately, pixels_per_byte is a power of 2, so
1113 // can just use a mask for that, instead of decrementing a counter.
1114 if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
1115 *dst++ = color_map[packed_pixels & bit_mask];
1116 packed_pixels >>= bits_per_pixel;
1117 }
1118 }
1119 } else {
1120 for (y = y_start; y < y_end; ++y) {
1121 int x;
1122 for (x = 0; x < width; ++x) {
1123 *dst++ = color_map[((*src++) >> 8) & 0xff];
1124 }
1125 }
1126 }
1127 }
1128
VP8LInverseTransform(const VP8LTransform * const transform,int row_start,int row_end,const uint32_t * const in,uint32_t * const out)1129 void VP8LInverseTransform(const VP8LTransform* const transform,
1130 int row_start, int row_end,
1131 const uint32_t* const in, uint32_t* const out) {
1132 assert(row_start < row_end);
1133 assert(row_end <= transform->ysize_);
1134 switch (transform->type_) {
1135 case SUBTRACT_GREEN:
1136 AddGreenToBlueAndRed(transform, row_start, row_end, out);
1137 break;
1138 case PREDICTOR_TRANSFORM:
1139 PredictorInverseTransform(transform, row_start, row_end, out);
1140 if (row_end != transform->ysize_) {
1141 // The last predicted row in this iteration will be the top-pred row
1142 // for the first row in next iteration.
1143 const int width = transform->xsize_;
1144 memcpy(out - width, out + (row_end - row_start - 1) * width,
1145 width * sizeof(*out));
1146 }
1147 break;
1148 case CROSS_COLOR_TRANSFORM:
1149 ColorSpaceInverseTransform(transform, row_start, row_end, out);
1150 break;
1151 case COLOR_INDEXING_TRANSFORM:
1152 if (in == out && transform->bits_ > 0) {
1153 // Move packed pixels to the end of unpacked region, so that unpacking
1154 // can occur seamlessly.
1155 // Also, note that this is the only transform that applies on
1156 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1157 // transforms work on effective width of xsize_.
1158 const int out_stride = (row_end - row_start) * transform->xsize_;
1159 const int in_stride = (row_end - row_start) *
1160 VP8LSubSampleSize(transform->xsize_, transform->bits_);
1161 uint32_t* const src = out + out_stride - in_stride;
1162 memmove(src, out, in_stride * sizeof(*src));
1163 ColorIndexInverseTransform(transform, row_start, row_end, src, out);
1164 } else {
1165 ColorIndexInverseTransform(transform, row_start, row_end, in, out);
1166 }
1167 break;
1168 }
1169 }
1170
1171 //------------------------------------------------------------------------------
1172 // Color space conversion.
1173
is_big_endian(void)1174 static int is_big_endian(void) {
1175 static const union {
1176 uint16_t w;
1177 uint8_t b[2];
1178 } tmp = { 1 };
1179 return (tmp.b[0] != 1);
1180 }
1181
ConvertBGRAToRGB(const uint32_t * src,int num_pixels,uint8_t * dst)1182 static void ConvertBGRAToRGB(const uint32_t* src,
1183 int num_pixels, uint8_t* dst) {
1184 const uint32_t* const src_end = src + num_pixels;
1185 while (src < src_end) {
1186 const uint32_t argb = *src++;
1187 *dst++ = (argb >> 16) & 0xff;
1188 *dst++ = (argb >> 8) & 0xff;
1189 *dst++ = (argb >> 0) & 0xff;
1190 }
1191 }
1192
ConvertBGRAToRGBA(const uint32_t * src,int num_pixels,uint8_t * dst)1193 static void ConvertBGRAToRGBA(const uint32_t* src,
1194 int num_pixels, uint8_t* dst) {
1195 const uint32_t* const src_end = src + num_pixels;
1196 while (src < src_end) {
1197 const uint32_t argb = *src++;
1198 *dst++ = (argb >> 16) & 0xff;
1199 *dst++ = (argb >> 8) & 0xff;
1200 *dst++ = (argb >> 0) & 0xff;
1201 *dst++ = (argb >> 24) & 0xff;
1202 }
1203 }
1204
ConvertBGRAToRGBA4444(const uint32_t * src,int num_pixels,uint8_t * dst)1205 static void ConvertBGRAToRGBA4444(const uint32_t* src,
1206 int num_pixels, uint8_t* dst) {
1207 const uint32_t* const src_end = src + num_pixels;
1208 while (src < src_end) {
1209 const uint32_t argb = *src++;
1210 const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
1211 const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
1212 #ifdef WEBP_SWAP_16BIT_CSP
1213 *dst++ = ba;
1214 *dst++ = rg;
1215 #else
1216 *dst++ = rg;
1217 *dst++ = ba;
1218 #endif
1219 }
1220 }
1221
ConvertBGRAToRGB565(const uint32_t * src,int num_pixels,uint8_t * dst)1222 static void ConvertBGRAToRGB565(const uint32_t* src,
1223 int num_pixels, uint8_t* dst) {
1224 const uint32_t* const src_end = src + num_pixels;
1225 while (src < src_end) {
1226 const uint32_t argb = *src++;
1227 const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
1228 const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
1229 #ifdef WEBP_SWAP_16BIT_CSP
1230 *dst++ = gb;
1231 *dst++ = rg;
1232 #else
1233 *dst++ = rg;
1234 *dst++ = gb;
1235 #endif
1236 }
1237 }
1238
ConvertBGRAToBGR(const uint32_t * src,int num_pixels,uint8_t * dst)1239 static void ConvertBGRAToBGR(const uint32_t* src,
1240 int num_pixels, uint8_t* dst) {
1241 const uint32_t* const src_end = src + num_pixels;
1242 while (src < src_end) {
1243 const uint32_t argb = *src++;
1244 *dst++ = (argb >> 0) & 0xff;
1245 *dst++ = (argb >> 8) & 0xff;
1246 *dst++ = (argb >> 16) & 0xff;
1247 }
1248 }
1249
CopyOrSwap(const uint32_t * src,int num_pixels,uint8_t * dst,int swap_on_big_endian)1250 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
1251 int swap_on_big_endian) {
1252 if (is_big_endian() == swap_on_big_endian) {
1253 const uint32_t* const src_end = src + num_pixels;
1254 while (src < src_end) {
1255 uint32_t argb = *src++;
1256
1257 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
1258 #if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
1259 __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
1260 *(uint32_t*)dst = argb;
1261 #elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
1262 argb = _byteswap_ulong(argb);
1263 *(uint32_t*)dst = argb;
1264 #else
1265 dst[0] = (argb >> 24) & 0xff;
1266 dst[1] = (argb >> 16) & 0xff;
1267 dst[2] = (argb >> 8) & 0xff;
1268 dst[3] = (argb >> 0) & 0xff;
1269 #endif
1270 #else // WEBP_REFERENCE_IMPLEMENTATION
1271 dst[0] = (argb >> 24) & 0xff;
1272 dst[1] = (argb >> 16) & 0xff;
1273 dst[2] = (argb >> 8) & 0xff;
1274 dst[3] = (argb >> 0) & 0xff;
1275 #endif
1276 dst += sizeof(argb);
1277 }
1278 } else {
1279 memcpy(dst, src, num_pixels * sizeof(*src));
1280 }
1281 }
1282
VP8LConvertFromBGRA(const uint32_t * const in_data,int num_pixels,WEBP_CSP_MODE out_colorspace,uint8_t * const rgba)1283 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
1284 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
1285 switch (out_colorspace) {
1286 case MODE_RGB:
1287 ConvertBGRAToRGB(in_data, num_pixels, rgba);
1288 break;
1289 case MODE_RGBA:
1290 ConvertBGRAToRGBA(in_data, num_pixels, rgba);
1291 break;
1292 case MODE_rgbA:
1293 ConvertBGRAToRGBA(in_data, num_pixels, rgba);
1294 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1295 break;
1296 case MODE_BGR:
1297 ConvertBGRAToBGR(in_data, num_pixels, rgba);
1298 break;
1299 case MODE_BGRA:
1300 CopyOrSwap(in_data, num_pixels, rgba, 1);
1301 break;
1302 case MODE_bgrA:
1303 CopyOrSwap(in_data, num_pixels, rgba, 1);
1304 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1305 break;
1306 case MODE_ARGB:
1307 CopyOrSwap(in_data, num_pixels, rgba, 0);
1308 break;
1309 case MODE_Argb:
1310 CopyOrSwap(in_data, num_pixels, rgba, 0);
1311 WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
1312 break;
1313 case MODE_RGBA_4444:
1314 ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1315 break;
1316 case MODE_rgbA_4444:
1317 ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1318 WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
1319 break;
1320 case MODE_RGB_565:
1321 ConvertBGRAToRGB565(in_data, num_pixels, rgba);
1322 break;
1323 default:
1324 assert(0); // Code flow should not reach here.
1325 }
1326 }
1327
1328 //------------------------------------------------------------------------------
1329
1330 #if defined(__cplusplus) || defined(c_plusplus)
1331 } // extern "C"
1332 #endif
1333