1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19
20 #include "blocksplitter.h"
21
22 #include <assert.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25
26 #include "deflate.h"
27 #include "lz77.h"
28 #include "squeeze.h"
29 #include "tree.h"
30 #include "util.h"
31
32 /*
33 The "f" for the FindMinimum function below.
34 i: the current parameter of f(i)
35 context: for your implementation
36 */
37 typedef double FindMinimumFun(size_t i, void* context);
38
39 /*
40 Finds minimum of function f(i) where is is of type size_t, f(i) is of type
41 double, i is in range start-end (excluding end).
42 */
FindMinimum(FindMinimumFun f,void * context,size_t start,size_t end)43 static size_t FindMinimum(FindMinimumFun f, void* context,
44 size_t start, size_t end) {
45 if (end - start < 1024) {
46 double best = ZOPFLI_LARGE_FLOAT;
47 size_t result = start;
48 size_t i;
49 for (i = start; i < end; i++) {
50 double v = f(i, context);
51 if (v < best) {
52 best = v;
53 result = i;
54 }
55 }
56 return result;
57 } else {
58 /* Try to find minimum faster by recursively checking multiple points. */
59 #define NUM 9 /* Good value: 9. */
60 size_t i;
61 size_t p[NUM];
62 double vp[NUM];
63 size_t besti;
64 double best;
65 double lastbest = ZOPFLI_LARGE_FLOAT;
66 size_t pos = start;
67
68 for (;;) {
69 if (end - start <= NUM) break;
70
71 for (i = 0; i < NUM; i++) {
72 p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
73 vp[i] = f(p[i], context);
74 }
75 besti = 0;
76 best = vp[0];
77 for (i = 1; i < NUM; i++) {
78 if (vp[i] < best) {
79 best = vp[i];
80 besti = i;
81 }
82 }
83 if (best > lastbest) break;
84
85 start = besti == 0 ? start : p[besti - 1];
86 end = besti == NUM - 1 ? end : p[besti + 1];
87
88 pos = p[besti];
89 lastbest = best;
90 }
91 return pos;
92 #undef NUM
93 }
94 }
95
96 /*
97 Returns estimated cost of a block in bits. It includes the size to encode the
98 tree and the size to encode all literal, length and distance symbols and their
99 extra bits.
100
101 litlens: lz77 lit/lengths
102 dists: ll77 distances
103 lstart: start of block
104 lend: end of block (not inclusive)
105 */
EstimateCost(const unsigned short * litlens,const unsigned short * dists,size_t lstart,size_t lend)106 static double EstimateCost(const unsigned short* litlens,
107 const unsigned short* dists,
108 size_t lstart, size_t lend) {
109 return ZopfliCalculateBlockSize(litlens, dists, lstart, lend, 2);
110 }
111
112 typedef struct SplitCostContext {
113 const unsigned short* litlens;
114 const unsigned short* dists;
115 size_t llsize;
116 size_t start;
117 size_t end;
118 } SplitCostContext;
119
120
121 /*
122 Gets the cost which is the sum of the cost of the left and the right section
123 of the data.
124 type: FindMinimumFun
125 */
SplitCost(size_t i,void * context)126 static double SplitCost(size_t i, void* context) {
127 SplitCostContext* c = (SplitCostContext*)context;
128 return EstimateCost(c->litlens, c->dists, c->start, i) +
129 EstimateCost(c->litlens, c->dists, i, c->end);
130 }
131
AddSorted(size_t value,size_t ** out,size_t * outsize)132 static void AddSorted(size_t value, size_t** out, size_t* outsize) {
133 size_t i;
134 ZOPFLI_APPEND_DATA(value, out, outsize);
135 for (i = 0; i + 1 < *outsize; i++) {
136 if ((*out)[i] > value) {
137 size_t j;
138 for (j = *outsize - 1; j > i; j--) {
139 (*out)[j] = (*out)[j - 1];
140 }
141 (*out)[i] = value;
142 break;
143 }
144 }
145 }
146
147 /*
148 Prints the block split points as decimal and hex values in the terminal.
149 */
PrintBlockSplitPoints(const unsigned short * litlens,const unsigned short * dists,size_t llsize,const size_t * lz77splitpoints,size_t nlz77points)150 static void PrintBlockSplitPoints(const unsigned short* litlens,
151 const unsigned short* dists,
152 size_t llsize, const size_t* lz77splitpoints,
153 size_t nlz77points) {
154 size_t* splitpoints = 0;
155 size_t npoints = 0;
156 size_t i;
157 /* The input is given as lz77 indices, but we want to see the uncompressed
158 index values. */
159 size_t pos = 0;
160 if (nlz77points > 0) {
161 for (i = 0; i < llsize; i++) {
162 size_t length = dists[i] == 0 ? 1 : litlens[i];
163 if (lz77splitpoints[npoints] == i) {
164 ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
165 if (npoints == nlz77points) break;
166 }
167 pos += length;
168 }
169 }
170 assert(npoints == nlz77points);
171
172 fprintf(stderr, "block split points: ");
173 for (i = 0; i < npoints; i++) {
174 fprintf(stderr, "%d ", (int)splitpoints[i]);
175 }
176 fprintf(stderr, "(hex:");
177 for (i = 0; i < npoints; i++) {
178 fprintf(stderr, " %x", (int)splitpoints[i]);
179 }
180 fprintf(stderr, ")\n");
181
182 free(splitpoints);
183 }
184
185 /*
186 Finds next block to try to split, the largest of the available ones.
187 The largest is chosen to make sure that if only a limited amount of blocks is
188 requested, their sizes are spread evenly.
189 llsize: the size of the LL77 data, which is the size of the done array here.
190 done: array indicating which blocks starting at that position are no longer
191 splittable (splitting them increases rather than decreases cost).
192 splitpoints: the splitpoints found so far.
193 npoints: the amount of splitpoints found so far.
194 lstart: output variable, giving start of block.
195 lend: output variable, giving end of block.
196 returns 1 if a block was found, 0 if no block found (all are done).
197 */
FindLargestSplittableBlock(size_t llsize,const unsigned char * done,const size_t * splitpoints,size_t npoints,size_t * lstart,size_t * lend)198 static int FindLargestSplittableBlock(
199 size_t llsize, const unsigned char* done,
200 const size_t* splitpoints, size_t npoints,
201 size_t* lstart, size_t* lend) {
202 size_t longest = 0;
203 int found = 0;
204 size_t i;
205 for (i = 0; i <= npoints; i++) {
206 size_t start = i == 0 ? 0 : splitpoints[i - 1];
207 size_t end = i == npoints ? llsize - 1 : splitpoints[i];
208 if (!done[start] && end - start > longest) {
209 *lstart = start;
210 *lend = end;
211 found = 1;
212 longest = end - start;
213 }
214 }
215 return found;
216 }
217
ZopfliBlockSplitLZ77(const ZopfliOptions * options,const unsigned short * litlens,const unsigned short * dists,size_t llsize,size_t maxblocks,size_t ** splitpoints,size_t * npoints)218 void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
219 const unsigned short* litlens,
220 const unsigned short* dists,
221 size_t llsize, size_t maxblocks,
222 size_t** splitpoints, size_t* npoints) {
223 size_t lstart, lend;
224 size_t i;
225 size_t llpos = 0;
226 size_t numblocks = 1;
227 unsigned char* done;
228 double splitcost, origcost;
229
230 if (llsize < 10) return; /* This code fails on tiny files. */
231
232 done = (unsigned char*)malloc(llsize);
233 if (!done) exit(-1); /* Allocation failed. */
234 for (i = 0; i < llsize; i++) done[i] = 0;
235
236 lstart = 0;
237 lend = llsize;
238 for (;;) {
239 SplitCostContext c;
240
241 if (maxblocks > 0 && numblocks >= maxblocks) {
242 break;
243 }
244
245 c.litlens = litlens;
246 c.dists = dists;
247 c.llsize = llsize;
248 c.start = lstart;
249 c.end = lend;
250 assert(lstart < lend);
251 llpos = FindMinimum(SplitCost, &c, lstart + 1, lend);
252
253 assert(llpos > lstart);
254 assert(llpos < lend);
255
256 splitcost = EstimateCost(litlens, dists, lstart, llpos) +
257 EstimateCost(litlens, dists, llpos, lend);
258 origcost = EstimateCost(litlens, dists, lstart, lend);
259
260 if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
261 done[lstart] = 1;
262 } else {
263 AddSorted(llpos, splitpoints, npoints);
264 numblocks++;
265 }
266
267 if (!FindLargestSplittableBlock(
268 llsize, done, *splitpoints, *npoints, &lstart, &lend)) {
269 break; /* No further split will probably reduce compression. */
270 }
271
272 if (lend - lstart < 10) {
273 break;
274 }
275 }
276
277 if (options->verbose) {
278 PrintBlockSplitPoints(litlens, dists, llsize, *splitpoints, *npoints);
279 }
280
281 free(done);
282 }
283
ZopfliBlockSplit(const ZopfliOptions * options,const unsigned char * in,size_t instart,size_t inend,size_t maxblocks,size_t ** splitpoints,size_t * npoints)284 void ZopfliBlockSplit(const ZopfliOptions* options,
285 const unsigned char* in, size_t instart, size_t inend,
286 size_t maxblocks, size_t** splitpoints, size_t* npoints) {
287 size_t pos = 0;
288 size_t i;
289 ZopfliBlockState s;
290 size_t* lz77splitpoints = 0;
291 size_t nlz77points = 0;
292 ZopfliLZ77Store store;
293
294 ZopfliInitLZ77Store(&store);
295
296 s.options = options;
297 s.blockstart = instart;
298 s.blockend = inend;
299 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
300 s.lmc = 0;
301 #endif
302
303 *npoints = 0;
304 *splitpoints = 0;
305
306 /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
307 results in better blocks. */
308 ZopfliLZ77Greedy(&s, in, instart, inend, &store);
309
310 ZopfliBlockSplitLZ77(options,
311 store.litlens, store.dists, store.size, maxblocks,
312 &lz77splitpoints, &nlz77points);
313
314 /* Convert LZ77 positions to positions in the uncompressed input. */
315 pos = instart;
316 if (nlz77points > 0) {
317 for (i = 0; i < store.size; i++) {
318 size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
319 if (lz77splitpoints[*npoints] == i) {
320 ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
321 if (*npoints == nlz77points) break;
322 }
323 pos += length;
324 }
325 }
326 assert(*npoints == nlz77points);
327
328 free(lz77splitpoints);
329 ZopfliCleanLZ77Store(&store);
330 }
331
ZopfliBlockSplitSimple(const unsigned char * in,size_t instart,size_t inend,size_t blocksize,size_t ** splitpoints,size_t * npoints)332 void ZopfliBlockSplitSimple(const unsigned char* in,
333 size_t instart, size_t inend,
334 size_t blocksize,
335 size_t** splitpoints, size_t* npoints) {
336 size_t i = instart;
337 while (i < inend) {
338 ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
339 i += blocksize;
340 }
341 (void)in;
342 }
343