1 // Copyright 2015 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Image transform methods for lossless encoder.
11 //
12 // Authors: Vikas Arora (vikaas.arora@gmail.com)
13 // Jyrki Alakuijala (jyrki@google.com)
14 // Urvang Joshi (urvang@google.com)
15
16 #include "./dsp.h"
17
18 #include <math.h>
19 #include <stdlib.h>
20 #include "../dec/vp8li.h"
21 #include "../utils/endian_inl.h"
22 #include "./lossless.h"
23 #include "./yuv.h"
24
25 #define MAX_DIFF_COST (1e30f)
26
27 static const int kPredLowEffort = 11;
28 static const uint32_t kMaskAlpha = 0xff000000;
29
30 // lookup table for small values of log2(int)
31 const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
32 0.0000000000000000f, 0.0000000000000000f,
33 1.0000000000000000f, 1.5849625007211560f,
34 2.0000000000000000f, 2.3219280948873621f,
35 2.5849625007211560f, 2.8073549220576041f,
36 3.0000000000000000f, 3.1699250014423121f,
37 3.3219280948873621f, 3.4594316186372973f,
38 3.5849625007211560f, 3.7004397181410921f,
39 3.8073549220576041f, 3.9068905956085187f,
40 4.0000000000000000f, 4.0874628412503390f,
41 4.1699250014423121f, 4.2479275134435852f,
42 4.3219280948873626f, 4.3923174227787606f,
43 4.4594316186372973f, 4.5235619560570130f,
44 4.5849625007211560f, 4.6438561897747243f,
45 4.7004397181410917f, 4.7548875021634682f,
46 4.8073549220576037f, 4.8579809951275718f,
47 4.9068905956085187f, 4.9541963103868749f,
48 5.0000000000000000f, 5.0443941193584533f,
49 5.0874628412503390f, 5.1292830169449663f,
50 5.1699250014423121f, 5.2094533656289501f,
51 5.2479275134435852f, 5.2854022188622487f,
52 5.3219280948873626f, 5.3575520046180837f,
53 5.3923174227787606f, 5.4262647547020979f,
54 5.4594316186372973f, 5.4918530963296747f,
55 5.5235619560570130f, 5.5545888516776376f,
56 5.5849625007211560f, 5.6147098441152083f,
57 5.6438561897747243f, 5.6724253419714951f,
58 5.7004397181410917f, 5.7279204545631987f,
59 5.7548875021634682f, 5.7813597135246599f,
60 5.8073549220576037f, 5.8328900141647412f,
61 5.8579809951275718f, 5.8826430493618415f,
62 5.9068905956085187f, 5.9307373375628866f,
63 5.9541963103868749f, 5.9772799234999167f,
64 6.0000000000000000f, 6.0223678130284543f,
65 6.0443941193584533f, 6.0660891904577720f,
66 6.0874628412503390f, 6.1085244567781691f,
67 6.1292830169449663f, 6.1497471195046822f,
68 6.1699250014423121f, 6.1898245588800175f,
69 6.2094533656289501f, 6.2288186904958804f,
70 6.2479275134435852f, 6.2667865406949010f,
71 6.2854022188622487f, 6.3037807481771030f,
72 6.3219280948873626f, 6.3398500028846243f,
73 6.3575520046180837f, 6.3750394313469245f,
74 6.3923174227787606f, 6.4093909361377017f,
75 6.4262647547020979f, 6.4429434958487279f,
76 6.4594316186372973f, 6.4757334309663976f,
77 6.4918530963296747f, 6.5077946401986963f,
78 6.5235619560570130f, 6.5391588111080309f,
79 6.5545888516776376f, 6.5698556083309478f,
80 6.5849625007211560f, 6.5999128421871278f,
81 6.6147098441152083f, 6.6293566200796094f,
82 6.6438561897747243f, 6.6582114827517946f,
83 6.6724253419714951f, 6.6865005271832185f,
84 6.7004397181410917f, 6.7142455176661224f,
85 6.7279204545631987f, 6.7414669864011464f,
86 6.7548875021634682f, 6.7681843247769259f,
87 6.7813597135246599f, 6.7944158663501061f,
88 6.8073549220576037f, 6.8201789624151878f,
89 6.8328900141647412f, 6.8454900509443747f,
90 6.8579809951275718f, 6.8703647195834047f,
91 6.8826430493618415f, 6.8948177633079437f,
92 6.9068905956085187f, 6.9188632372745946f,
93 6.9307373375628866f, 6.9425145053392398f,
94 6.9541963103868749f, 6.9657842846620869f,
95 6.9772799234999167f, 6.9886846867721654f,
96 7.0000000000000000f, 7.0112272554232539f,
97 7.0223678130284543f, 7.0334230015374501f,
98 7.0443941193584533f, 7.0552824355011898f,
99 7.0660891904577720f, 7.0768155970508308f,
100 7.0874628412503390f, 7.0980320829605263f,
101 7.1085244567781691f, 7.1189410727235076f,
102 7.1292830169449663f, 7.1395513523987936f,
103 7.1497471195046822f, 7.1598713367783890f,
104 7.1699250014423121f, 7.1799090900149344f,
105 7.1898245588800175f, 7.1996723448363644f,
106 7.2094533656289501f, 7.2191685204621611f,
107 7.2288186904958804f, 7.2384047393250785f,
108 7.2479275134435852f, 7.2573878426926521f,
109 7.2667865406949010f, 7.2761244052742375f,
110 7.2854022188622487f, 7.2946207488916270f,
111 7.3037807481771030f, 7.3128829552843557f,
112 7.3219280948873626f, 7.3309168781146167f,
113 7.3398500028846243f, 7.3487281542310771f,
114 7.3575520046180837f, 7.3663222142458160f,
115 7.3750394313469245f, 7.3837042924740519f,
116 7.3923174227787606f, 7.4008794362821843f,
117 7.4093909361377017f, 7.4178525148858982f,
118 7.4262647547020979f, 7.4346282276367245f,
119 7.4429434958487279f, 7.4512111118323289f,
120 7.4594316186372973f, 7.4676055500829976f,
121 7.4757334309663976f, 7.4838157772642563f,
122 7.4918530963296747f, 7.4998458870832056f,
123 7.5077946401986963f, 7.5156998382840427f,
124 7.5235619560570130f, 7.5313814605163118f,
125 7.5391588111080309f, 7.5468944598876364f,
126 7.5545888516776376f, 7.5622424242210728f,
127 7.5698556083309478f, 7.5774288280357486f,
128 7.5849625007211560f, 7.5924570372680806f,
129 7.5999128421871278f, 7.6073303137496104f,
130 7.6147098441152083f, 7.6220518194563764f,
131 7.6293566200796094f, 7.6366246205436487f,
132 7.6438561897747243f, 7.6510516911789281f,
133 7.6582114827517946f, 7.6653359171851764f,
134 7.6724253419714951f, 7.6794800995054464f,
135 7.6865005271832185f, 7.6934869574993252f,
136 7.7004397181410917f, 7.7073591320808825f,
137 7.7142455176661224f, 7.7210991887071855f,
138 7.7279204545631987f, 7.7347096202258383f,
139 7.7414669864011464f, 7.7481928495894605f,
140 7.7548875021634682f, 7.7615512324444795f,
141 7.7681843247769259f, 7.7747870596011736f,
142 7.7813597135246599f, 7.7879025593914317f,
143 7.7944158663501061f, 7.8008998999203047f,
144 7.8073549220576037f, 7.8137811912170374f,
145 7.8201789624151878f, 7.8265484872909150f,
146 7.8328900141647412f, 7.8392037880969436f,
147 7.8454900509443747f, 7.8517490414160571f,
148 7.8579809951275718f, 7.8641861446542797f,
149 7.8703647195834047f, 7.8765169465649993f,
150 7.8826430493618415f, 7.8887432488982591f,
151 7.8948177633079437f, 7.9008668079807486f,
152 7.9068905956085187f, 7.9128893362299619f,
153 7.9188632372745946f, 7.9248125036057812f,
154 7.9307373375628866f, 7.9366379390025709f,
155 7.9425145053392398f, 7.9483672315846778f,
156 7.9541963103868749f, 7.9600019320680805f,
157 7.9657842846620869f, 7.9715435539507719f,
158 7.9772799234999167f, 7.9829935746943103f,
159 7.9886846867721654f, 7.9943534368588577f
160 };
161
162 const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
163 0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f,
164 8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f,
165 24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f,
166 43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f,
167 64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f,
168 86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f,
169 110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
170 134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
171 160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
172 186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
173 212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
174 240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
175 268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
176 296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
177 325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
178 354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
179 384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
180 413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
181 444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
182 474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
183 505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
184 536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
185 568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
186 600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
187 632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
188 664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
189 696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
190 729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
191 762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
192 795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
193 828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
194 862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
195 896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
196 929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
197 963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
198 998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
199 1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
200 1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
201 1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
202 1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
203 1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
204 1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
205 1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
206 1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
207 1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
208 1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
209 1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
210 1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
211 1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
212 1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
213 1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
214 1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
215 1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
216 1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
217 1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
218 1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
219 1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
220 1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
221 1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
222 1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
223 1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
224 1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
225 1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
226 2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
227 };
228
229 const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
230 { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1},
231 { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2},
232 { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3},
233 { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3},
234 { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
235 {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
236 {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
237 {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
238 {11, 4}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
239 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
240 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
241 {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
242 {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
243 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
244 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
245 {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
246 {13, 5}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
247 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
248 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
249 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
250 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
251 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
252 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
253 {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
254 {14, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
255 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
256 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
257 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
258 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
259 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
260 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
261 {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
262 {15, 6}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
263 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
264 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
265 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
266 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
267 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
268 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
269 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
270 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
271 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
272 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
273 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
274 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
275 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
276 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
277 {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
278 {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
279 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
280 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
281 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
282 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
283 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
284 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
285 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
286 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
287 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
288 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
289 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
290 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
291 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
292 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
293 {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
294 };
295
296 const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
297 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3,
298 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
299 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
300 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
301 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
302 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
303 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
304 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
305 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
306 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
307 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
308 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
309 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
310 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
311 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
312 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
313 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
314 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
315 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
316 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
317 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
318 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
319 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
320 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
321 127,
322 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
323 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
324 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
325 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
326 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
327 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
328 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
329 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
330 };
331
FastSLog2Slow(uint32_t v)332 static float FastSLog2Slow(uint32_t v) {
333 assert(v >= LOG_LOOKUP_IDX_MAX);
334 if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
335 int log_cnt = 0;
336 uint32_t y = 1;
337 int correction = 0;
338 const float v_f = (float)v;
339 const uint32_t orig_v = v;
340 do {
341 ++log_cnt;
342 v = v >> 1;
343 y = y << 1;
344 } while (v >= LOG_LOOKUP_IDX_MAX);
345 // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
346 // Xf = floor(Xf) * (1 + (v % y) / v)
347 // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
348 // The correction factor: log(1 + d) ~ d; for very small d values, so
349 // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
350 // LOG_2_RECIPROCAL ~ 23/16
351 correction = (23 * (orig_v & (y - 1))) >> 4;
352 return v_f * (kLog2Table[v] + log_cnt) + correction;
353 } else {
354 return (float)(LOG_2_RECIPROCAL * v * log((double)v));
355 }
356 }
357
FastLog2Slow(uint32_t v)358 static float FastLog2Slow(uint32_t v) {
359 assert(v >= LOG_LOOKUP_IDX_MAX);
360 if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
361 int log_cnt = 0;
362 uint32_t y = 1;
363 const uint32_t orig_v = v;
364 double log_2;
365 do {
366 ++log_cnt;
367 v = v >> 1;
368 y = y << 1;
369 } while (v >= LOG_LOOKUP_IDX_MAX);
370 log_2 = kLog2Table[v] + log_cnt;
371 if (orig_v >= APPROX_LOG_MAX) {
372 // Since the division is still expensive, add this correction factor only
373 // for large values of 'v'.
374 const int correction = (23 * (orig_v & (y - 1))) >> 4;
375 log_2 += (double)correction / orig_v;
376 }
377 return (float)log_2;
378 } else {
379 return (float)(LOG_2_RECIPROCAL * log((double)v));
380 }
381 }
382
383 // Mostly used to reduce code size + readability
GetMin(int a,int b)384 static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
385
386 //------------------------------------------------------------------------------
387 // Methods to calculate Entropy (Shannon).
388
PredictionCostSpatial(const int counts[256],int weight_0,double exp_val)389 static float PredictionCostSpatial(const int counts[256], int weight_0,
390 double exp_val) {
391 const int significant_symbols = 256 >> 4;
392 const double exp_decay_factor = 0.6;
393 double bits = weight_0 * counts[0];
394 int i;
395 for (i = 1; i < significant_symbols; ++i) {
396 bits += exp_val * (counts[i] + counts[256 - i]);
397 exp_val *= exp_decay_factor;
398 }
399 return (float)(-0.1 * bits);
400 }
401
402 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
CombinedShannonEntropy(const int X[256],const int Y[256])403 static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
404 int i;
405 double retval = 0.;
406 int sumX = 0, sumXY = 0;
407 for (i = 0; i < 256; ++i) {
408 const int x = X[i];
409 if (x != 0) {
410 const int xy = x + Y[i];
411 sumX += x;
412 retval -= VP8LFastSLog2(x);
413 sumXY += xy;
414 retval -= VP8LFastSLog2(xy);
415 } else if (Y[i] != 0) {
416 sumXY += Y[i];
417 retval -= VP8LFastSLog2(Y[i]);
418 }
419 }
420 retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
421 return (float)retval;
422 }
423
PredictionCostSpatialHistogram(const int accumulated[4][256],const int tile[4][256])424 static float PredictionCostSpatialHistogram(const int accumulated[4][256],
425 const int tile[4][256]) {
426 int i;
427 double retval = 0;
428 for (i = 0; i < 4; ++i) {
429 const double kExpValue = 0.94;
430 retval += PredictionCostSpatial(tile[i], 1, kExpValue);
431 retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]);
432 }
433 return (float)retval;
434 }
435
VP8LBitEntropyInit(VP8LBitEntropy * const entropy)436 void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
437 entropy->entropy = 0.;
438 entropy->sum = 0;
439 entropy->nonzeros = 0;
440 entropy->max_val = 0;
441 entropy->nonzero_code = VP8L_NON_TRIVIAL_SYM;
442 }
443
VP8LBitsEntropyUnrefined(const uint32_t * const array,int n,VP8LBitEntropy * const entropy)444 void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
445 VP8LBitEntropy* const entropy) {
446 int i;
447
448 VP8LBitEntropyInit(entropy);
449
450 for (i = 0; i < n; ++i) {
451 if (array[i] != 0) {
452 entropy->sum += array[i];
453 entropy->nonzero_code = i;
454 ++entropy->nonzeros;
455 entropy->entropy -= VP8LFastSLog2(array[i]);
456 if (entropy->max_val < array[i]) {
457 entropy->max_val = array[i];
458 }
459 }
460 }
461 entropy->entropy += VP8LFastSLog2(entropy->sum);
462 }
463
GetEntropyUnrefinedHelper(uint32_t val,int i,uint32_t * const val_prev,int * const i_prev,VP8LBitEntropy * const bit_entropy,VP8LStreaks * const stats)464 static WEBP_INLINE void GetEntropyUnrefinedHelper(
465 uint32_t val, int i, uint32_t* const val_prev, int* const i_prev,
466 VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) {
467 const int streak = i - *i_prev;
468
469 // Gather info for the bit entropy.
470 if (*val_prev != 0) {
471 bit_entropy->sum += (*val_prev) * streak;
472 bit_entropy->nonzeros += streak;
473 bit_entropy->nonzero_code = *i_prev;
474 bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak;
475 if (bit_entropy->max_val < *val_prev) {
476 bit_entropy->max_val = *val_prev;
477 }
478 }
479
480 // Gather info for the Huffman cost.
481 stats->counts[*val_prev != 0] += (streak > 3);
482 stats->streaks[*val_prev != 0][(streak > 3)] += streak;
483
484 *val_prev = val;
485 *i_prev = i;
486 }
487
VP8LGetEntropyUnrefined(const uint32_t * const X,int length,VP8LBitEntropy * const bit_entropy,VP8LStreaks * const stats)488 void VP8LGetEntropyUnrefined(const uint32_t* const X, int length,
489 VP8LBitEntropy* const bit_entropy,
490 VP8LStreaks* const stats) {
491 int i;
492 int i_prev = 0;
493 uint32_t x_prev = X[0];
494
495 memset(stats, 0, sizeof(*stats));
496 VP8LBitEntropyInit(bit_entropy);
497
498 for (i = 1; i < length; ++i) {
499 const uint32_t x = X[i];
500 if (x != x_prev) {
501 VP8LGetEntropyUnrefinedHelper(x, i, &x_prev, &i_prev, bit_entropy, stats);
502 }
503 }
504 VP8LGetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats);
505
506 bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
507 }
508
VP8LGetCombinedEntropyUnrefined(const uint32_t * const X,const uint32_t * const Y,int length,VP8LBitEntropy * const bit_entropy,VP8LStreaks * const stats)509 void VP8LGetCombinedEntropyUnrefined(const uint32_t* const X,
510 const uint32_t* const Y, int length,
511 VP8LBitEntropy* const bit_entropy,
512 VP8LStreaks* const stats) {
513 int i = 1;
514 int i_prev = 0;
515 uint32_t xy_prev = X[0] + Y[0];
516
517 memset(stats, 0, sizeof(*stats));
518 VP8LBitEntropyInit(bit_entropy);
519
520 for (i = 1; i < length; ++i) {
521 const uint32_t xy = X[i] + Y[i];
522 if (xy != xy_prev) {
523 VP8LGetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, bit_entropy,
524 stats);
525 }
526 }
527 VP8LGetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats);
528
529 bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum);
530 }
531
UpdateHisto(int histo_argb[4][256],uint32_t argb)532 static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
533 ++histo_argb[0][argb >> 24];
534 ++histo_argb[1][(argb >> 16) & 0xff];
535 ++histo_argb[2][(argb >> 8) & 0xff];
536 ++histo_argb[3][argb & 0xff];
537 }
538
539 //------------------------------------------------------------------------------
540
Predict(VP8LPredictorFunc pred_func,int x,int y,const uint32_t * current_row,const uint32_t * upper_row)541 static WEBP_INLINE uint32_t Predict(VP8LPredictorFunc pred_func,
542 int x, int y,
543 const uint32_t* current_row,
544 const uint32_t* upper_row) {
545 if (y == 0) {
546 return (x == 0) ? ARGB_BLACK : current_row[x - 1]; // Left.
547 } else if (x == 0) {
548 return upper_row[x]; // Top.
549 } else {
550 return pred_func(current_row[x - 1], upper_row + x);
551 }
552 }
553
554 // Returns best predictor and updates the accumulated histogram.
GetBestPredictorForTile(int width,int height,int tile_x,int tile_y,int bits,int accumulated[4][256],const uint32_t * const argb_scratch,int exact)555 static int GetBestPredictorForTile(int width, int height,
556 int tile_x, int tile_y, int bits,
557 int accumulated[4][256],
558 const uint32_t* const argb_scratch,
559 int exact) {
560 const int kNumPredModes = 14;
561 const int col_start = tile_x << bits;
562 const int row_start = tile_y << bits;
563 const int tile_size = 1 << bits;
564 const int max_y = GetMin(tile_size, height - row_start);
565 const int max_x = GetMin(tile_size, width - col_start);
566 float best_diff = MAX_DIFF_COST;
567 int best_mode = 0;
568 int mode;
569 int histo_stack_1[4][256];
570 int histo_stack_2[4][256];
571 // Need pointers to be able to swap arrays.
572 int (*histo_argb)[256] = histo_stack_1;
573 int (*best_histo)[256] = histo_stack_2;
574
575 int i, j;
576 for (mode = 0; mode < kNumPredModes; ++mode) {
577 const uint32_t* current_row = argb_scratch;
578 const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
579 float cur_diff;
580 int y;
581 memset(histo_argb, 0, sizeof(histo_stack_1));
582 for (y = 0; y < max_y; ++y) {
583 int x;
584 const int row = row_start + y;
585 const uint32_t* const upper_row = current_row;
586 current_row = upper_row + width;
587 for (x = 0; x < max_x; ++x) {
588 const int col = col_start + x;
589 const uint32_t predict =
590 Predict(pred_func, col, row, current_row, upper_row);
591 uint32_t residual = VP8LSubPixels(current_row[col], predict);
592 if (!exact && (current_row[col] & kMaskAlpha) == 0) {
593 residual &= kMaskAlpha; // See CopyTileWithPrediction.
594 }
595 UpdateHisto(histo_argb, residual);
596 }
597 }
598 cur_diff = PredictionCostSpatialHistogram(
599 (const int (*)[256])accumulated, (const int (*)[256])histo_argb);
600 if (cur_diff < best_diff) {
601 int (*tmp)[256] = histo_argb;
602 histo_argb = best_histo;
603 best_histo = tmp;
604 best_diff = cur_diff;
605 best_mode = mode;
606 }
607 }
608
609 for (i = 0; i < 4; i++) {
610 for (j = 0; j < 256; j++) {
611 accumulated[i][j] += best_histo[i][j];
612 }
613 }
614
615 return best_mode;
616 }
617
CopyImageWithPrediction(int width,int height,int bits,uint32_t * const modes,uint32_t * const argb_scratch,uint32_t * const argb,int low_effort,int exact)618 static void CopyImageWithPrediction(int width, int height,
619 int bits, uint32_t* const modes,
620 uint32_t* const argb_scratch,
621 uint32_t* const argb,
622 int low_effort, int exact) {
623 const int tiles_per_row = VP8LSubSampleSize(width, bits);
624 const int mask = (1 << bits) - 1;
625 // The row size is one pixel longer to allow the top right pixel to point to
626 // the leftmost pixel of the next row when at the right edge.
627 uint32_t* current_row = argb_scratch;
628 uint32_t* upper_row = argb_scratch + width + 1;
629 int y;
630 VP8LPredictorFunc pred_func =
631 low_effort ? VP8LPredictors[kPredLowEffort] : NULL;
632
633 for (y = 0; y < height; ++y) {
634 int x;
635 uint32_t* tmp = upper_row;
636 upper_row = current_row;
637 current_row = tmp;
638 memcpy(current_row, argb + y * width, sizeof(*current_row) * width);
639 current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK;
640
641 if (low_effort) {
642 for (x = 0; x < width; ++x) {
643 const uint32_t predict =
644 Predict(pred_func, x, y, current_row, upper_row);
645 argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
646 }
647 } else {
648 for (x = 0; x < width; ++x) {
649 uint32_t predict, residual;
650 if ((x & mask) == 0) {
651 const int mode =
652 (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
653 pred_func = VP8LPredictors[mode];
654 }
655 predict = Predict(pred_func, x, y, current_row, upper_row);
656 residual = VP8LSubPixels(current_row[x], predict);
657 if (!exact && (current_row[x] & kMaskAlpha) == 0) {
658 // If alpha is 0, cleanup RGB. We can choose the RGB values of the
659 // residual for best compression. The prediction of alpha itself can
660 // be non-zero and must be kept though. We choose RGB of the residual
661 // to be 0.
662 residual &= kMaskAlpha;
663 // Update input image so that next predictions use correct RGB value.
664 current_row[x] = predict & ~kMaskAlpha;
665 if (x == 0 && y != 0) upper_row[width] = current_row[x];
666 }
667 argb[y * width + x] = residual;
668 }
669 }
670 }
671 }
672
VP8LResidualImage(int width,int height,int bits,int low_effort,uint32_t * const argb,uint32_t * const argb_scratch,uint32_t * const image,int exact)673 void VP8LResidualImage(int width, int height, int bits, int low_effort,
674 uint32_t* const argb, uint32_t* const argb_scratch,
675 uint32_t* const image, int exact) {
676 const int max_tile_size = 1 << bits;
677 const int tiles_per_row = VP8LSubSampleSize(width, bits);
678 const int tiles_per_col = VP8LSubSampleSize(height, bits);
679 uint32_t* const upper_row = argb_scratch;
680 uint32_t* const current_tile_rows = argb_scratch + width;
681 int tile_y;
682 int histo[4][256];
683 if (low_effort) {
684 int i;
685 for (i = 0; i < tiles_per_row * tiles_per_col; ++i) {
686 image[i] = ARGB_BLACK | (kPredLowEffort << 8);
687 }
688 } else {
689 memset(histo, 0, sizeof(histo));
690 for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
691 const int tile_y_offset = tile_y * max_tile_size;
692 const int this_tile_height =
693 (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
694 int tile_x;
695 if (tile_y > 0) {
696 memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
697 width * sizeof(*upper_row));
698 }
699 memcpy(current_tile_rows, &argb[tile_y_offset * width],
700 this_tile_height * width * sizeof(*current_tile_rows));
701 for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
702 const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y,
703 bits, (int (*)[256])histo, argb_scratch, exact);
704 image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8);
705 }
706 }
707 }
708
709 CopyImageWithPrediction(width, height, bits,
710 image, argb_scratch, argb, low_effort, exact);
711 }
712
VP8LSubtractGreenFromBlueAndRed_C(uint32_t * argb_data,int num_pixels)713 void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
714 int i;
715 for (i = 0; i < num_pixels; ++i) {
716 const uint32_t argb = argb_data[i];
717 const uint32_t green = (argb >> 8) & 0xff;
718 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
719 const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
720 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
721 }
722 }
723
MultipliersClear(VP8LMultipliers * const m)724 static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
725 m->green_to_red_ = 0;
726 m->green_to_blue_ = 0;
727 m->red_to_blue_ = 0;
728 }
729
ColorTransformDelta(int8_t color_pred,int8_t color)730 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
731 int8_t color) {
732 return (uint32_t)((int)(color_pred) * color) >> 5;
733 }
734
ColorCodeToMultipliers(uint32_t color_code,VP8LMultipliers * const m)735 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
736 VP8LMultipliers* const m) {
737 m->green_to_red_ = (color_code >> 0) & 0xff;
738 m->green_to_blue_ = (color_code >> 8) & 0xff;
739 m->red_to_blue_ = (color_code >> 16) & 0xff;
740 }
741
MultipliersToColorCode(const VP8LMultipliers * const m)742 static WEBP_INLINE uint32_t MultipliersToColorCode(
743 const VP8LMultipliers* const m) {
744 return 0xff000000u |
745 ((uint32_t)(m->red_to_blue_) << 16) |
746 ((uint32_t)(m->green_to_blue_) << 8) |
747 m->green_to_red_;
748 }
749
VP8LTransformColor_C(const VP8LMultipliers * const m,uint32_t * data,int num_pixels)750 void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
751 int num_pixels) {
752 int i;
753 for (i = 0; i < num_pixels; ++i) {
754 const uint32_t argb = data[i];
755 const uint32_t green = argb >> 8;
756 const uint32_t red = argb >> 16;
757 uint32_t new_red = red;
758 uint32_t new_blue = argb;
759 new_red -= ColorTransformDelta(m->green_to_red_, green);
760 new_red &= 0xff;
761 new_blue -= ColorTransformDelta(m->green_to_blue_, green);
762 new_blue -= ColorTransformDelta(m->red_to_blue_, red);
763 new_blue &= 0xff;
764 data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
765 }
766 }
767
TransformColorRed(uint8_t green_to_red,uint32_t argb)768 static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
769 uint32_t argb) {
770 const uint32_t green = argb >> 8;
771 uint32_t new_red = argb >> 16;
772 new_red -= ColorTransformDelta(green_to_red, green);
773 return (new_red & 0xff);
774 }
775
TransformColorBlue(uint8_t green_to_blue,uint8_t red_to_blue,uint32_t argb)776 static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
777 uint8_t red_to_blue,
778 uint32_t argb) {
779 const uint32_t green = argb >> 8;
780 const uint32_t red = argb >> 16;
781 uint8_t new_blue = argb;
782 new_blue -= ColorTransformDelta(green_to_blue, green);
783 new_blue -= ColorTransformDelta(red_to_blue, red);
784 return (new_blue & 0xff);
785 }
786
PredictionCostCrossColor(const int accumulated[256],const int counts[256])787 static float PredictionCostCrossColor(const int accumulated[256],
788 const int counts[256]) {
789 // Favor low entropy, locally and globally.
790 // Favor small absolute values for PredictionCostSpatial
791 static const double kExpValue = 2.4;
792 return VP8LCombinedShannonEntropy(counts, accumulated) +
793 PredictionCostSpatial(counts, 3, kExpValue);
794 }
795
VP8LCollectColorRedTransforms_C(const uint32_t * argb,int stride,int tile_width,int tile_height,int green_to_red,int histo[])796 void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
797 int tile_width, int tile_height,
798 int green_to_red, int histo[]) {
799 while (tile_height-- > 0) {
800 int x;
801 for (x = 0; x < tile_width; ++x) {
802 ++histo[TransformColorRed(green_to_red, argb[x])];
803 }
804 argb += stride;
805 }
806 }
807
GetPredictionCostCrossColorRed(const uint32_t * argb,int stride,int tile_width,int tile_height,VP8LMultipliers prev_x,VP8LMultipliers prev_y,int green_to_red,const int accumulated_red_histo[256])808 static float GetPredictionCostCrossColorRed(
809 const uint32_t* argb, int stride, int tile_width, int tile_height,
810 VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
811 const int accumulated_red_histo[256]) {
812 int histo[256] = { 0 };
813 float cur_diff;
814
815 VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,
816 green_to_red, histo);
817
818 cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
819 if ((uint8_t)green_to_red == prev_x.green_to_red_) {
820 cur_diff -= 3; // favor keeping the areas locally similar
821 }
822 if ((uint8_t)green_to_red == prev_y.green_to_red_) {
823 cur_diff -= 3; // favor keeping the areas locally similar
824 }
825 if (green_to_red == 0) {
826 cur_diff -= 3;
827 }
828 return cur_diff;
829 }
830
GetBestGreenToRed(const uint32_t * argb,int stride,int tile_width,int tile_height,VP8LMultipliers prev_x,VP8LMultipliers prev_y,int quality,const int accumulated_red_histo[256],VP8LMultipliers * const best_tx)831 static void GetBestGreenToRed(
832 const uint32_t* argb, int stride, int tile_width, int tile_height,
833 VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
834 const int accumulated_red_histo[256], VP8LMultipliers* const best_tx) {
835 const int kMaxIters = 4 + ((7 * quality) >> 8); // in range [4..6]
836 int green_to_red_best = 0;
837 int iter, offset;
838 float best_diff = GetPredictionCostCrossColorRed(
839 argb, stride, tile_width, tile_height, prev_x, prev_y,
840 green_to_red_best, accumulated_red_histo);
841 for (iter = 0; iter < kMaxIters; ++iter) {
842 // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
843 // one in color computation. Having initial delta here as 1 is sufficient
844 // to explore the range of (-2, 2).
845 const int delta = 32 >> iter;
846 // Try a negative and a positive delta from the best known value.
847 for (offset = -delta; offset <= delta; offset += 2 * delta) {
848 const int green_to_red_cur = offset + green_to_red_best;
849 const float cur_diff = GetPredictionCostCrossColorRed(
850 argb, stride, tile_width, tile_height, prev_x, prev_y,
851 green_to_red_cur, accumulated_red_histo);
852 if (cur_diff < best_diff) {
853 best_diff = cur_diff;
854 green_to_red_best = green_to_red_cur;
855 }
856 }
857 }
858 best_tx->green_to_red_ = green_to_red_best;
859 }
860
VP8LCollectColorBlueTransforms_C(const uint32_t * argb,int stride,int tile_width,int tile_height,int green_to_blue,int red_to_blue,int histo[])861 void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
862 int tile_width, int tile_height,
863 int green_to_blue, int red_to_blue,
864 int histo[]) {
865 while (tile_height-- > 0) {
866 int x;
867 for (x = 0; x < tile_width; ++x) {
868 ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[x])];
869 }
870 argb += stride;
871 }
872 }
873
GetPredictionCostCrossColorBlue(const uint32_t * argb,int stride,int tile_width,int tile_height,VP8LMultipliers prev_x,VP8LMultipliers prev_y,int green_to_blue,int red_to_blue,const int accumulated_blue_histo[256])874 static float GetPredictionCostCrossColorBlue(
875 const uint32_t* argb, int stride, int tile_width, int tile_height,
876 VP8LMultipliers prev_x, VP8LMultipliers prev_y,
877 int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256]) {
878 int histo[256] = { 0 };
879 float cur_diff;
880
881 VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,
882 green_to_blue, red_to_blue, histo);
883
884 cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
885 if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
886 cur_diff -= 3; // favor keeping the areas locally similar
887 }
888 if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
889 cur_diff -= 3; // favor keeping the areas locally similar
890 }
891 if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
892 cur_diff -= 3; // favor keeping the areas locally similar
893 }
894 if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
895 cur_diff -= 3; // favor keeping the areas locally similar
896 }
897 if (green_to_blue == 0) {
898 cur_diff -= 3;
899 }
900 if (red_to_blue == 0) {
901 cur_diff -= 3;
902 }
903 return cur_diff;
904 }
905
906 #define kGreenRedToBlueNumAxis 8
907 #define kGreenRedToBlueMaxIters 7
GetBestGreenRedToBlue(const uint32_t * argb,int stride,int tile_width,int tile_height,VP8LMultipliers prev_x,VP8LMultipliers prev_y,int quality,const int accumulated_blue_histo[256],VP8LMultipliers * const best_tx)908 static void GetBestGreenRedToBlue(
909 const uint32_t* argb, int stride, int tile_width, int tile_height,
910 VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
911 const int accumulated_blue_histo[256],
912 VP8LMultipliers* const best_tx) {
913 const int8_t offset[kGreenRedToBlueNumAxis][2] =
914 {{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};
915 const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 };
916 const int iters =
917 (quality < 25) ? 1 : (quality > 50) ? kGreenRedToBlueMaxIters : 4;
918 int green_to_blue_best = 0;
919 int red_to_blue_best = 0;
920 int iter;
921 // Initial value at origin:
922 float best_diff = GetPredictionCostCrossColorBlue(
923 argb, stride, tile_width, tile_height, prev_x, prev_y,
924 green_to_blue_best, red_to_blue_best, accumulated_blue_histo);
925 for (iter = 0; iter < iters; ++iter) {
926 const int delta = delta_lut[iter];
927 int axis;
928 for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) {
929 const int green_to_blue_cur =
930 offset[axis][0] * delta + green_to_blue_best;
931 const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;
932 const float cur_diff = GetPredictionCostCrossColorBlue(
933 argb, stride, tile_width, tile_height, prev_x, prev_y,
934 green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);
935 if (cur_diff < best_diff) {
936 best_diff = cur_diff;
937 green_to_blue_best = green_to_blue_cur;
938 red_to_blue_best = red_to_blue_cur;
939 }
940 if (quality < 25 && iter == 4) {
941 // Only axis aligned diffs for lower quality.
942 break; // next iter.
943 }
944 }
945 if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) {
946 // Further iterations would not help.
947 break; // out of iter-loop.
948 }
949 }
950 best_tx->green_to_blue_ = green_to_blue_best;
951 best_tx->red_to_blue_ = red_to_blue_best;
952 }
953 #undef kGreenRedToBlueMaxIters
954 #undef kGreenRedToBlueNumAxis
955
GetBestColorTransformForTile(int tile_x,int tile_y,int bits,VP8LMultipliers prev_x,VP8LMultipliers prev_y,int quality,int xsize,int ysize,const int accumulated_red_histo[256],const int accumulated_blue_histo[256],const uint32_t * const argb)956 static VP8LMultipliers GetBestColorTransformForTile(
957 int tile_x, int tile_y, int bits,
958 VP8LMultipliers prev_x,
959 VP8LMultipliers prev_y,
960 int quality, int xsize, int ysize,
961 const int accumulated_red_histo[256],
962 const int accumulated_blue_histo[256],
963 const uint32_t* const argb) {
964 const int max_tile_size = 1 << bits;
965 const int tile_y_offset = tile_y * max_tile_size;
966 const int tile_x_offset = tile_x * max_tile_size;
967 const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
968 const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
969 const int tile_width = all_x_max - tile_x_offset;
970 const int tile_height = all_y_max - tile_y_offset;
971 const uint32_t* const tile_argb = argb + tile_y_offset * xsize
972 + tile_x_offset;
973 VP8LMultipliers best_tx;
974 MultipliersClear(&best_tx);
975
976 GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height,
977 prev_x, prev_y, quality, accumulated_red_histo, &best_tx);
978 GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height,
979 prev_x, prev_y, quality, accumulated_blue_histo,
980 &best_tx);
981 return best_tx;
982 }
983
CopyTileWithColorTransform(int xsize,int ysize,int tile_x,int tile_y,int max_tile_size,VP8LMultipliers color_transform,uint32_t * argb)984 static void CopyTileWithColorTransform(int xsize, int ysize,
985 int tile_x, int tile_y,
986 int max_tile_size,
987 VP8LMultipliers color_transform,
988 uint32_t* argb) {
989 const int xscan = GetMin(max_tile_size, xsize - tile_x);
990 int yscan = GetMin(max_tile_size, ysize - tile_y);
991 argb += tile_y * xsize + tile_x;
992 while (yscan-- > 0) {
993 VP8LTransformColor(&color_transform, argb, xscan);
994 argb += xsize;
995 }
996 }
997
VP8LColorSpaceTransform(int width,int height,int bits,int quality,uint32_t * const argb,uint32_t * image)998 void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
999 uint32_t* const argb, uint32_t* image) {
1000 const int max_tile_size = 1 << bits;
1001 const int tile_xsize = VP8LSubSampleSize(width, bits);
1002 const int tile_ysize = VP8LSubSampleSize(height, bits);
1003 int accumulated_red_histo[256] = { 0 };
1004 int accumulated_blue_histo[256] = { 0 };
1005 int tile_x, tile_y;
1006 VP8LMultipliers prev_x, prev_y;
1007 MultipliersClear(&prev_y);
1008 MultipliersClear(&prev_x);
1009 for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
1010 for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
1011 int y;
1012 const int tile_x_offset = tile_x * max_tile_size;
1013 const int tile_y_offset = tile_y * max_tile_size;
1014 const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
1015 const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
1016 const int offset = tile_y * tile_xsize + tile_x;
1017 if (tile_y != 0) {
1018 ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
1019 }
1020 prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
1021 prev_x, prev_y,
1022 quality, width, height,
1023 accumulated_red_histo,
1024 accumulated_blue_histo,
1025 argb);
1026 image[offset] = MultipliersToColorCode(&prev_x);
1027 CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
1028 max_tile_size, prev_x, argb);
1029
1030 // Gather accumulated histogram data.
1031 for (y = tile_y_offset; y < all_y_max; ++y) {
1032 int ix = y * width + tile_x_offset;
1033 const int ix_end = ix + all_x_max - tile_x_offset;
1034 for (; ix < ix_end; ++ix) {
1035 const uint32_t pix = argb[ix];
1036 if (ix >= 2 &&
1037 pix == argb[ix - 2] &&
1038 pix == argb[ix - 1]) {
1039 continue; // repeated pixels are handled by backward references
1040 }
1041 if (ix >= width + 2 &&
1042 argb[ix - 2] == argb[ix - width - 2] &&
1043 argb[ix - 1] == argb[ix - width - 1] &&
1044 pix == argb[ix - width]) {
1045 continue; // repeated pixels are handled by backward references
1046 }
1047 ++accumulated_red_histo[(pix >> 16) & 0xff];
1048 ++accumulated_blue_histo[(pix >> 0) & 0xff];
1049 }
1050 }
1051 }
1052 }
1053 }
1054
1055 //------------------------------------------------------------------------------
1056 // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
VP8LBundleColorMap(const uint8_t * const row,int width,int xbits,uint32_t * const dst)1057 void VP8LBundleColorMap(const uint8_t* const row, int width,
1058 int xbits, uint32_t* const dst) {
1059 int x;
1060 if (xbits > 0) {
1061 const int bit_depth = 1 << (3 - xbits);
1062 const int mask = (1 << xbits) - 1;
1063 uint32_t code = 0xff000000;
1064 for (x = 0; x < width; ++x) {
1065 const int xsub = x & mask;
1066 if (xsub == 0) {
1067 code = 0xff000000;
1068 }
1069 code |= row[x] << (8 + bit_depth * xsub);
1070 dst[x >> xbits] = code;
1071 }
1072 } else {
1073 for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
1074 }
1075 }
1076
1077 //------------------------------------------------------------------------------
1078
ExtraCost(const uint32_t * population,int length)1079 static double ExtraCost(const uint32_t* population, int length) {
1080 int i;
1081 double cost = 0.;
1082 for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
1083 return cost;
1084 }
1085
ExtraCostCombined(const uint32_t * X,const uint32_t * Y,int length)1086 static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
1087 int length) {
1088 int i;
1089 double cost = 0.;
1090 for (i = 2; i < length - 2; ++i) {
1091 const int xy = X[i + 2] + Y[i + 2];
1092 cost += (i >> 1) * xy;
1093 }
1094 return cost;
1095 }
1096
1097 //------------------------------------------------------------------------------
1098
HistogramAdd(const VP8LHistogram * const a,const VP8LHistogram * const b,VP8LHistogram * const out)1099 static void HistogramAdd(const VP8LHistogram* const a,
1100 const VP8LHistogram* const b,
1101 VP8LHistogram* const out) {
1102 int i;
1103 const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
1104 assert(a->palette_code_bits_ == b->palette_code_bits_);
1105 if (b != out) {
1106 for (i = 0; i < literal_size; ++i) {
1107 out->literal_[i] = a->literal_[i] + b->literal_[i];
1108 }
1109 for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
1110 out->distance_[i] = a->distance_[i] + b->distance_[i];
1111 }
1112 for (i = 0; i < NUM_LITERAL_CODES; ++i) {
1113 out->red_[i] = a->red_[i] + b->red_[i];
1114 out->blue_[i] = a->blue_[i] + b->blue_[i];
1115 out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
1116 }
1117 } else {
1118 for (i = 0; i < literal_size; ++i) {
1119 out->literal_[i] += a->literal_[i];
1120 }
1121 for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
1122 out->distance_[i] += a->distance_[i];
1123 }
1124 for (i = 0; i < NUM_LITERAL_CODES; ++i) {
1125 out->red_[i] += a->red_[i];
1126 out->blue_[i] += a->blue_[i];
1127 out->alpha_[i] += a->alpha_[i];
1128 }
1129 }
1130 }
1131
1132 //------------------------------------------------------------------------------
1133
1134 VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
1135
1136 VP8LTransformColorFunc VP8LTransformColor;
1137
1138 VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
1139 VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
1140
1141 VP8LFastLog2SlowFunc VP8LFastLog2Slow;
1142 VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
1143
1144 VP8LCostFunc VP8LExtraCost;
1145 VP8LCostCombinedFunc VP8LExtraCostCombined;
1146 VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
1147
1148 GetEntropyUnrefinedHelperFunc VP8LGetEntropyUnrefinedHelper;
1149
1150 VP8LHistogramAddFunc VP8LHistogramAdd;
1151
1152 extern void VP8LEncDspInitSSE2(void);
1153 extern void VP8LEncDspInitSSE41(void);
1154 extern void VP8LEncDspInitNEON(void);
1155 extern void VP8LEncDspInitMIPS32(void);
1156 extern void VP8LEncDspInitMIPSdspR2(void);
1157
1158 static volatile VP8CPUInfo lossless_enc_last_cpuinfo_used =
1159 (VP8CPUInfo)&lossless_enc_last_cpuinfo_used;
1160
VP8LEncDspInit(void)1161 WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
1162 if (lossless_enc_last_cpuinfo_used == VP8GetCPUInfo) return;
1163
1164 VP8LDspInit();
1165
1166 VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
1167
1168 VP8LTransformColor = VP8LTransformColor_C;
1169
1170 VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C;
1171 VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C;
1172
1173 VP8LFastLog2Slow = FastLog2Slow;
1174 VP8LFastSLog2Slow = FastSLog2Slow;
1175
1176 VP8LExtraCost = ExtraCost;
1177 VP8LExtraCostCombined = ExtraCostCombined;
1178 VP8LCombinedShannonEntropy = CombinedShannonEntropy;
1179
1180 VP8LGetEntropyUnrefinedHelper = GetEntropyUnrefinedHelper;
1181
1182 VP8LHistogramAdd = HistogramAdd;
1183
1184 // If defined, use CPUInfo() to overwrite some pointers with faster versions.
1185 if (VP8GetCPUInfo != NULL) {
1186 #if defined(WEBP_USE_SSE2)
1187 if (VP8GetCPUInfo(kSSE2)) {
1188 VP8LEncDspInitSSE2();
1189 #if defined(WEBP_USE_SSE41)
1190 if (VP8GetCPUInfo(kSSE4_1)) {
1191 VP8LEncDspInitSSE41();
1192 }
1193 #endif
1194 }
1195 #endif
1196 #if defined(WEBP_USE_NEON)
1197 if (VP8GetCPUInfo(kNEON)) {
1198 VP8LEncDspInitNEON();
1199 }
1200 #endif
1201 #if defined(WEBP_USE_MIPS32)
1202 if (VP8GetCPUInfo(kMIPS32)) {
1203 VP8LEncDspInitMIPS32();
1204 }
1205 #endif
1206 #if defined(WEBP_USE_MIPS_DSP_R2)
1207 if (VP8GetCPUInfo(kMIPSdspR2)) {
1208 VP8LEncDspInitMIPSdspR2();
1209 }
1210 #endif
1211 }
1212 lossless_enc_last_cpuinfo_used = VP8GetCPUInfo;
1213 }
1214
1215 //------------------------------------------------------------------------------
1216