• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2 /*
3  * erofs-utils/lib/kite_deflate.c
4  *
5  * Copyright (C) 2023, Alibaba Cloud
6  * Copyright (C) 2023, Gao Xiang <xiang@kernel.org>
7  */
8 #include "erofs/defs.h"
9 #include "erofs/print.h"
10 #include <errno.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <ctype.h>
14 #include <stdio.h>
15 
16 unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
17 			    unsigned long sz);
18 
19 #ifdef TEST
20 #define kite_dbg(x, ...)	fprintf(stderr, x "\n", ##__VA_ARGS__)
21 #else
22 #define kite_dbg(x, ...)
23 #endif
24 
25 #define kHistorySize32		(1U << 15)
26 
27 #define kNumLenSymbols32	256
28 #define kNumLenSymbolsMax	kNumLenSymbols32
29 
30 #define kSymbolEndOfBlock	256
31 #define kSymbolMatch		(kSymbolEndOfBlock + 1)
32 #define kNumLenSlots		29
33 #define kMainTableSize		(kSymbolMatch + kNumLenSlots)
34 
35 #define kFixedLenTableSize	(kSymbolMatch + 31)
36 #define FixedDistTableSize	32
37 
38 #define kMainTableSize		(kSymbolMatch + kNumLenSlots)
39 #define kDistTableSize32	30
40 
41 #define kNumLitLenCodesMin	257
42 #define kNumDistCodesMin	1
43 
44 #define kNumLensCodesMin	4
45 #define kLensTableSize		19
46 
47 #define kMatchMinLen		3
48 #define kMatchMaxLen32		kNumLenSymbols32 + kMatchMinLen - 1
49 
50 #define kTableDirectLevels      16
51 #define kBitLensRepNumber_3_6   kTableDirectLevels
52 #define kBitLens0Number_3_10    (kBitLensRepNumber_3_6 + 1)
53 #define kBitLens0Number_11_138  (kBitLens0Number_3_10 + 1)
54 
55 static u32 kstaticHuff_mainCodes[kFixedLenTableSize];
56 static const u8 kstaticHuff_litLenLevels[kFixedLenTableSize] = {
57 	[0   ... 143] = 8, [144 ... 255] = 9,
58 	[256 ... 279] = 7, [280 ... 287] = 8,
59 };
60 static u32 kstaticHuff_distCodes[kFixedLenTableSize];
61 
62 const u8 kLenStart32[kNumLenSlots] =
63 	{0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224, 255};
64 
65 const u8 kLenExtraBits32[kNumLenSlots] =
66 	{0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,  4,  5,
67 	 5,  5,  5, 0};
68 
69 /* First normalized distance for each code (0 = distance of 1) */
70 const u32 kDistStart[kDistTableSize32] =
71 	{0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768,
72 	 1024,1536,2048,3072,4096,6144,8192,12288,16384,24576};
73 
74 /* extra bits for each distance code */
75 const u8 kDistExtraBits[kDistTableSize32] =
76 	{0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
77 
78 const u8 kCodeLengthAlphabetOrder[kLensTableSize] =
79 	{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
80 
81 const u8 kLevelExtraBits[3] = {2, 3, 7};
82 
83 #define kStored			0
84 #define kFixedHuffman		1
85 #define kDynamicHuffman		2
86 
87 struct kite_deflate_symbol {
88 	u16 len, dist;
89 };
90 
91 struct kite_deflate_table {
92 	u32  mainCodes[kMainTableSize];
93 	u8   litLenLevels[kMainTableSize];
94 	u32  distCodes[kDistTableSize32];
95 	u8   distLevels[kDistTableSize32];
96 	u32  levelCodes[kLensTableSize];
97 	u8   levelLens[kLensTableSize];
98 
99 	u8   numdistlens, numblcodes;
100 	u16  numlitlens;
101 };
102 
103 struct kite_deflate {
104 	struct kite_deflate_table  *tab;
105 	const u8   *in;
106 	u8         *out;
107 
108 	u32  inlen, outlen;
109 	u32  pos_in, pos_out;
110 	u32  inflightbits;
111 	u8   bitpos;
112 	u8   numHuffBits;
113 	u32  symbols;
114 
115 	u32  costbits, startpos;
116 	u8   encode_mode;
117 	bool freq_changed, lastblock;
118 
119 	/* Previous match for lazy matching */
120 	bool prev_valid;
121 	u16 prev_longest;
122 
123 	u32  mainFreqs[kMainTableSize];
124 	u32  distFreqs[kDistTableSize32];
125 	struct kite_deflate_table tables[2];
126 
127 	/* don't reset the following fields */
128 	struct kite_matchfinder *mf;
129 	struct kite_deflate_symbol *sym;
130 	u32 max_symbols;
131 	bool lazy_search;
132 };
133 
134 #define ZLIB_DISTANCE_TOO_FAR	4096
135 
136 static u8 g_LenSlots[kNumLenSymbolsMax];
137 
138 #define kNumLogBits 9		// do not change it
139 static u8 g_FastPos[1 << kNumLogBits];
140 
writebits(struct kite_deflate * s,unsigned int v,u8 bits)141 static void writebits(struct kite_deflate *s, unsigned int v, u8 bits)
142 {
143 	unsigned int rem = sizeof(s->inflightbits) * 8 - s->bitpos;
144 
145 	s->inflightbits |= (v << s->bitpos) & (!rem - 1);
146 	if (bits > rem) {
147 		u8 *out = s->out + s->pos_out;
148 
149 		out[0] = s->inflightbits & 0xff;
150 		out[1] = (s->inflightbits >> 8) & 0xff;
151 		out[2] = (s->inflightbits >> 16) & 0xff;
152 		out[3] = (s->inflightbits >> 24) & 0xff;
153 		s->pos_out += 4;
154 		DBG_BUGON(s->pos_out > s->outlen);
155 		s->inflightbits = v >> rem;
156 		s->bitpos = bits - rem;
157 		return;
158 	}
159 	s->bitpos += bits;
160 }
161 
flushbits(struct kite_deflate * s)162 static void flushbits(struct kite_deflate *s)
163 {
164 	u8 *out = s->out + s->pos_out;
165 
166 	if (!s->bitpos)
167 		return;
168 	out[0] = s->inflightbits & 0xff;
169 	if (s->bitpos >= 8) {
170 		out[1] = (s->inflightbits >> 8) & 0xff;
171 		if (s->bitpos >= 16) {
172 			out[2] = (s->inflightbits >> 16) & 0xff;
173 			if (s->bitpos >= 24)
174 				out[3] = (s->inflightbits >> 24) & 0xff;
175 		}
176 	}
177 	s->pos_out += round_up(s->bitpos, 8) >> 3;
178 	DBG_BUGON(s->pos_out > s->outlen);
179 	s->bitpos = 0;
180 	s->inflightbits = 0;
181 }
182 
183 #define kMaxLen 16
184 
deflate_genhuffcodes(const u8 * lens,u32 * p,unsigned int nr_codes,const u32 * bl_count)185 static void deflate_genhuffcodes(const u8 *lens, u32 *p, unsigned int nr_codes,
186 				 const u32 *bl_count)
187 {
188 	u32 nextCodes[kMaxLen + 1];	/* next code value for each bit length */
189 	unsigned int code = 0;		/* running code value */
190 	unsigned int bits, k;
191 
192 	for (bits = 1; bits <= kMaxLen; ++bits) {
193 		code = (code + bl_count[bits - 1]) << 1;
194 		nextCodes[bits] = code;
195 	}
196 
197 	DBG_BUGON(code + bl_count[kMaxLen] != 1 << kMaxLen);
198 
199 	for (k = 0; k < nr_codes; ++k)
200 		p[k] = nextCodes[lens[k]]++;
201 }
202 
deflate_reversebits_one(u32 code,u8 bits)203 static u32 deflate_reversebits_one(u32 code, u8 bits)
204 {
205 	unsigned int x = code;
206 
207 	x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1);
208 	x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2);
209 	x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4);
210 
211 	return (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - bits);
212 }
213 
Huffman_ReverseBits(u32 * codes,const u8 * lens,unsigned int n)214 static void Huffman_ReverseBits(u32 *codes, const u8 *lens, unsigned int n)
215 {
216 	while (n) {
217 		u32 code = *codes;
218 
219 		*codes++ = deflate_reversebits_one(code, *lens++);
220 		--n;
221 	}
222 }
223 
kite_deflate_init_once(void)224 static void kite_deflate_init_once(void)
225 {
226 	static const u32 static_bl_count[kMaxLen + 1] = {
227 		[7] = 279 - 256 + 1,
228 		[8] = (143 + 1) + (287 - 280 + 1),
229 		[9] = 255 - 144 + 1,
230 	};
231 	unsigned int i, c, j, k;
232 
233 	if (kstaticHuff_distCodes[31])
234 		return;
235 	deflate_genhuffcodes(kstaticHuff_litLenLevels, kstaticHuff_mainCodes,
236 			     kFixedLenTableSize, static_bl_count);
237 	Huffman_ReverseBits(kstaticHuff_mainCodes, kstaticHuff_litLenLevels,
238 			    kFixedLenTableSize);
239 
240 	for (i = 0; i < ARRAY_SIZE(kstaticHuff_distCodes); ++i)
241 		kstaticHuff_distCodes[i] = deflate_reversebits_one(i, 5);
242 
243 	for (i = 0; i < kNumLenSlots; i++) {
244 		c = kLenStart32[i];
245 		j = 1 << kLenExtraBits32[i];
246 
247 		for (k = 0; k < j; k++, c++)
248 			g_LenSlots[c] = (u8)i;
249 	}
250 
251 	c = 0;
252 	for (i = 0; i < /*kFastSlots*/ kNumLogBits * 2; i++) {
253 		k = 1 << kDistExtraBits[i];
254 		for (j = 0; j < k; j++)
255 			g_FastPos[c++] = i;
256 	}
257 }
258 
kite_deflate_scanlens(unsigned int numlens,u8 * lens,u32 * freqs)259 static void kite_deflate_scanlens(unsigned int numlens, u8 *lens, u32 *freqs)
260 {
261 	int n;				/* iterates over all tree elements */
262 	int prevlen = -1;		/* last emitted length */
263 	int curlen;			/* length of current code */
264 	int nextlen = lens[0];		/* length of next code */
265 	int count = 0;			/* repeat count of the current code */
266 	int max_count = 7;		/* max repeat count */
267 	int min_count = 4;		/* min repeat count */
268 
269 	if (!nextlen)
270 		max_count = 138, min_count = 3;
271 
272 	for (n = 0; n < numlens; n++) {
273 		curlen = nextlen;
274 		nextlen = n + 1 < numlens ? lens[n + 1] : -1;
275 		++count;
276 
277 		if (count < max_count && curlen == nextlen)
278 			continue;
279 		if (count < min_count) {
280 			freqs[curlen] += count;
281 		} else if (curlen != 0) {
282 			if (curlen != prevlen)
283 				freqs[curlen]++;
284 			freqs[kBitLensRepNumber_3_6]++;
285 		} else if (count <= 10) {
286 			freqs[kBitLens0Number_3_10]++;
287 		} else {
288 			freqs[kBitLens0Number_11_138]++;
289 		}
290 
291 		count = 0;
292 		prevlen = curlen;
293 		if (!nextlen)
294 			max_count = 138, min_count = 3;
295 		else if (curlen == nextlen)
296 			max_count = 6, min_count = 3;
297 		else
298 			max_count = 7, min_count = 4;
299 	}
300 }
301 
kite_deflate_sendtree(struct kite_deflate * s,const u8 * lens,unsigned int numlens)302 static void kite_deflate_sendtree(struct kite_deflate *s, const u8 *lens,
303 				  unsigned int numlens)
304 {
305 	int n;				/* iterates over all tree elements */
306 	int prevlen = -1;		/* last emitted length */
307 	int curlen;			/* length of current code */
308 	int nextlen = lens[0];		/* length of next code */
309 	int count = 0;			/* repeat count of the current code */
310 	int max_count = 7;		/* max repeat count */
311 	int min_count = 4;		/* min repeat count */
312 	const u8 *bl_lens = s->tab->levelLens;
313 	const u32 *bl_codes = s->tab->levelCodes;
314 
315 	if (!nextlen)
316 		max_count = 138, min_count = 3;
317 
318 	for (n = 0; n < numlens; n++) {
319 		curlen = nextlen;
320 		nextlen = n + 1 < numlens ? lens[n + 1] : -1;
321 		++count;
322 
323 		if (count < max_count && curlen == nextlen)
324 			continue;
325 		if (count < min_count) {
326 			do {
327 				writebits(s, bl_codes[curlen], bl_lens[curlen]);
328 			} while (--count);
329 		} else if (curlen) {
330 			if (curlen != prevlen) {
331 				writebits(s, bl_codes[curlen], bl_lens[curlen]);
332 				count--;
333 			}
334 			writebits(s, bl_codes[kBitLensRepNumber_3_6],
335 				  bl_lens[kBitLensRepNumber_3_6]);
336 			writebits(s, count - 3, 2);
337 		} else if (count <= 10) {
338 			writebits(s, bl_codes[kBitLens0Number_3_10],
339 				  bl_lens[kBitLens0Number_3_10]);
340 			writebits(s, count - 3, 3);
341 		} else {
342 			writebits(s, bl_codes[kBitLens0Number_11_138],
343 				  bl_lens[kBitLens0Number_11_138]);
344 			writebits(s, count - 11, 7);
345 		}
346 
347 		count = 0;
348 		prevlen = curlen;
349 		if (!nextlen)
350 			max_count = 138, min_count = 3;
351 		else if (curlen == nextlen)
352 			max_count = 6, min_count = 3;
353 		else
354 			max_count = 7, min_count = 4;
355 	}
356 }
357 
kite_deflate_setfixedtrees(struct kite_deflate * s)358 static void kite_deflate_setfixedtrees(struct kite_deflate *s)
359 {
360 	writebits(s, (kFixedHuffman << 1) + s->lastblock, 3);
361 }
362 
kite_deflate_sendtrees(struct kite_deflate * s)363 static void kite_deflate_sendtrees(struct kite_deflate *s)
364 {
365 	struct kite_deflate_table *t = s->tab;
366 	unsigned int i;
367 
368 	writebits(s, (kDynamicHuffman << 1) + s->lastblock, 3);
369 	writebits(s, t->numlitlens - kNumLitLenCodesMin, 5);
370 	writebits(s, t->numdistlens - kNumDistCodesMin,  5);
371 	writebits(s, t->numblcodes - kNumLensCodesMin,   4);
372 
373 	for (i = 0; i < t->numblcodes; i++)
374 		writebits(s, t->levelLens[kCodeLengthAlphabetOrder[i]], 3);
375 
376 	Huffman_ReverseBits(t->levelCodes, t->levelLens, kLensTableSize);
377 	kite_deflate_sendtree(s, t->litLenLevels, t->numlitlens);
378 	kite_deflate_sendtree(s, t->distLevels, t->numdistlens);
379 }
380 
deflateDistSlot(unsigned int pos)381 static inline unsigned int deflateDistSlot(unsigned int pos)
382 {
383 	const unsigned int zz = (kNumLogBits - 1) &
384 		((((1U << kNumLogBits) - 1) - pos) >> (31 - 3));
385 
386 	return g_FastPos[pos >> zz] + (zz * 2);
387 }
388 
kite_deflate_writeblock(struct kite_deflate * s,bool fixed)389 static void kite_deflate_writeblock(struct kite_deflate *s, bool fixed)
390 {
391 	int i;
392 	u32 *mainCodes, *distCodes;
393 	const u8 *litLenLevels, *distLevels;
394 
395 	if (!fixed) {
396 		struct kite_deflate_table *t = s->tab;
397 
398 		mainCodes = t->mainCodes; distCodes = t->distCodes;
399 		litLenLevels = t->litLenLevels;	distLevels = t->distLevels;
400 
401 		Huffman_ReverseBits(mainCodes, litLenLevels, kMainTableSize);
402 		Huffman_ReverseBits(distCodes, distLevels, kDistTableSize32);
403 	} else {
404 		mainCodes = kstaticHuff_mainCodes;
405 		distCodes = kstaticHuff_distCodes;
406 
407 		litLenLevels = kstaticHuff_litLenLevels;
408 		distLevels = NULL;
409 	}
410 
411 	for (i = 0; i < s->symbols; ++i) {
412 		struct kite_deflate_symbol *sym = &s->sym[i];
413 
414 		if (sym->len < kMatchMinLen) {		/* literal */
415 			writebits(s, mainCodes[sym->dist],
416 				  litLenLevels[sym->dist]);
417 		} else {
418 			unsigned int lenSlot, distSlot;
419 			unsigned int lc = sym->len - kMatchMinLen;
420 
421 			lenSlot = g_LenSlots[lc];
422 			writebits(s, mainCodes[kSymbolMatch + lenSlot],
423 				  litLenLevels[kSymbolMatch + lenSlot]);
424 			writebits(s, lc - kLenStart32[lenSlot],
425 				  kLenExtraBits32[lenSlot]);
426 
427 			distSlot = deflateDistSlot(sym->dist - 1);
428 			writebits(s, distCodes[distSlot],
429 				  fixed ? 5 : distLevels[distSlot]);
430 			writebits(s, sym->dist - 1 - kDistStart[distSlot],
431 				  kDistExtraBits[distSlot]);
432 		}
433 	}
434 	writebits(s, mainCodes[kSymbolEndOfBlock],
435 		  litLenLevels[kSymbolEndOfBlock]);
436 }
437 
Huffman_GetPrice(const u32 * freqs,const u8 * lens,u32 num)438 static u32 Huffman_GetPrice(const u32 *freqs, const u8 *lens, u32 num)
439 {
440 	u32 price = 0;
441 
442 	while (num) {
443 		price += (*lens++) * (*freqs++);
444 		--num;
445 	}
446 	return price;
447 }
448 
Huffman_GetPriceEx(const u32 * freqs,const u8 * lens,u32 num,const u8 * extraBits,u32 extraBase)449 static u32 Huffman_GetPriceEx(const u32 *freqs, const u8 *lens, u32 num,
450 			      const u8 *extraBits, u32 extraBase)
451 {
452 	return Huffman_GetPrice(freqs, lens, num) +
453 		Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase);
454 }
455 
456 /* Adapted from C/HuffEnc.c (7zip) for now */
457 #define HeapSortDown(p, k, size, temp) \
458   { for (;;) { \
459     size_t s = (k << 1); \
460     if (s > size) break; \
461     if (s < size && p[s + 1] > p[s]) s++; \
462     if (temp >= p[s]) break; \
463     p[k] = p[s]; k = s; \
464   } p[k] = temp; }
465 
HeapSort(u32 * p,size_t size)466 static void HeapSort(u32 *p, size_t size)
467 {
468   if (size <= 1)
469     return;
470   p--;
471   {
472     size_t i = size / 2;
473     do
474     {
475       u32 temp = p[i];
476       size_t k = i;
477       HeapSortDown(p, k, size, temp)
478     }
479     while (--i != 0);
480   }
481   /*
482   do
483   {
484     size_t k = 1;
485     UInt32 temp = p[size];
486     p[size--] = p[1];
487     HeapSortDown(p, k, size, temp)
488   }
489   while (size > 1);
490   */
491   while (size > 3)
492   {
493     u32 temp = p[size];
494     size_t k = (p[3] > p[2]) ? 3 : 2;
495     p[size--] = p[1];
496     p[1] = p[k];
497     HeapSortDown(p, k, size, temp)
498   }
499   {
500     u32 temp = p[size];
501     p[size] = p[1];
502     if (size > 2 && p[2] < temp)
503     {
504       p[1] = p[2];
505       p[2] = temp;
506     }
507     else
508       p[1] = temp;
509   }
510 }
511 
512 #define NUM_BITS 10
513 #define MASK (((unsigned)1 << NUM_BITS) - 1)
514 
Huffman_Generate(const u32 * freqs,u32 * p,u8 * lens,unsigned int numSymbols,unsigned int maxLen)515 static void Huffman_Generate(const u32 *freqs, u32 *p, u8 *lens,
516 			     unsigned int numSymbols, unsigned int maxLen)
517 {
518 	u32 num, i;
519 
520 	num = 0;
521 	/* if (maxLen > 10) maxLen = 10; */
522 
523 	for (i = 0; i < numSymbols; i++) {
524 		u32 freq = freqs[i];
525 
526 		if (!freq)
527 			lens[i] = 0;
528 		else
529 			p[num++] = i | (freq << NUM_BITS);
530 	}
531 	HeapSort(p, num);
532 
533 	if (num < 2) {
534 		unsigned int minCode = 0, maxCode = 1;
535 
536 		if (num == 1) {
537 			maxCode = (unsigned int)p[0] & MASK;
538 			if (!maxCode)
539 				maxCode++;
540 		}
541 		p[minCode] = 0;
542 		p[maxCode] = 1;
543 		lens[minCode] = lens[maxCode] = 1;
544 		return;
545 	}
546 
547 	{
548 		u32 b, e, i;
549 
550 		i = b = e = 0;
551 		do {
552 			u32 n, m, freq;
553 
554 			n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
555 			freq = (p[n] & ~MASK);
556 			p[n] = (p[n] & MASK) | (e << NUM_BITS);
557 			m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
558 			freq += (p[m] & ~MASK);
559 			p[m] = (p[m] & MASK) | (e << NUM_BITS);
560 			p[e] = (p[e] & MASK) | freq;
561 			e++;
562 		} while (num - e > 1);
563 
564 		{
565 			u32 lenCounters[kMaxLen + 1];
566 
567 			for (i = 0; i <= kMaxLen; i++)
568 				lenCounters[i] = 0;
569 
570 			p[--e] &= MASK;
571 			lenCounters[1] = 2;
572 			while (e > 0) {
573 				u32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
574 
575 				p[e] = (p[e] & MASK) | (len << NUM_BITS);
576 				if (len >= maxLen)
577 					for (len = maxLen - 1; lenCounters[len] == 0; len--);
578 				lenCounters[len]--;
579 				lenCounters[(size_t)len + 1] += 2;
580 			}
581 
582 			{
583 				u32 len;
584 
585 				i = 0;
586 				for (len = maxLen; len != 0; len--) {
587 					u32 k;
588 					for (k = lenCounters[len]; k != 0; k--)
589 						lens[p[i++] & MASK] = (u8)len;
590 				}
591 			}
592 			deflate_genhuffcodes(lens, p, numSymbols, lenCounters);
593 		}
594 	}
595 }
596 
kite_deflate_fixdynblock(struct kite_deflate * s)597 static void kite_deflate_fixdynblock(struct kite_deflate *s)
598 {
599 	struct kite_deflate_table *t = s->tab;
600 	unsigned int numlitlens, numdistlens, numblcodes;
601 	u32 levelFreqs[kLensTableSize] = {0};
602 	u32 opt_mainlen;
603 
604 	if (!s->freq_changed)
605 		return;
606 
607 	/* in order to match zlib */
608 	s->numHuffBits = kMaxLen;
609 //	s->numHuffBits = (s->symbols > 18000 ? 12 :
610 //		(s->symbols > 7000 ? 11 : (s->symbols > 2000 ? 10 : 9)));
611 
612 	Huffman_Generate(s->mainFreqs, t->mainCodes, t->litLenLevels,
613 			 kMainTableSize, s->numHuffBits);
614 	Huffman_Generate(s->distFreqs, t->distCodes, t->distLevels,
615 			 kDistTableSize32, s->numHuffBits);
616 
617 	/* code lengths for the literal/length alphabet */
618 	numlitlens = kMainTableSize;
619 	while (numlitlens > kNumLitLenCodesMin &&
620 	       !t->litLenLevels[numlitlens - 1])
621 		--numlitlens;
622 
623 	/* code lengths for the distance alphabet */
624 	numdistlens = kDistTableSize32;
625 	while (numdistlens > kNumDistCodesMin &&
626 	       !t->distLevels[numdistlens - 1])
627 		--numdistlens;
628 
629 	kite_deflate_scanlens(numlitlens, t->litLenLevels, levelFreqs);
630 	kite_deflate_scanlens(numdistlens, t->distLevels, levelFreqs);
631 	Huffman_Generate(levelFreqs, t->levelCodes, t->levelLens,
632 			 kLensTableSize, 7);
633 	numblcodes = kLensTableSize;
634 	while (numblcodes > kNumLensCodesMin &&
635 	       !t->levelLens[kCodeLengthAlphabetOrder[numblcodes - 1]])
636 		--numblcodes;
637 
638 	t->numlitlens = numlitlens;
639 	t->numdistlens = numdistlens;
640 	t->numblcodes = numblcodes;
641 
642 	opt_mainlen = Huffman_GetPriceEx(s->mainFreqs, t->litLenLevels,
643 			kMainTableSize, kLenExtraBits32, kSymbolMatch) +
644 		Huffman_GetPriceEx(s->distFreqs, t->distLevels,
645 			kDistTableSize32, kDistExtraBits, 0);
646 	s->costbits = 3 + 5 + 5 + 4 + 3 * numblcodes +
647 		Huffman_GetPriceEx(levelFreqs, t->levelLens,
648 			kLensTableSize, kLevelExtraBits, kTableDirectLevels) +
649 		opt_mainlen;
650 	s->freq_changed = false;
651 }
652 
653 
654 /*
655  * an array used used by the LZ-based encoder to hold the length-distance pairs
656  * found by LZ matchfinder.
657  */
658 struct kite_match {
659 	unsigned int len;
660 	unsigned int dist;
661 };
662 
663 struct kite_matchfinder {
664 	/* pointer to buffer with data to be compressed */
665 	const u8 *buffer;
666 
667 	/* indicate the first byte that doesn't contain valid input data */
668 	const u8 *end;
669 
670 	/* LZ matchfinder hash chain representation */
671 	u32 *hash, *chain;
672 
673 	u32 base;
674 
675 	/* indicate the next byte to run through the match finder */
676 	u32 offset;
677 
678 	u32 cyclic_pos;
679 
680 	/* maximum length of a match that the matchfinder will try to find. */
681 	u16 nice_len;
682 
683 	/* the total sliding window size */
684 	u16 wsiz;
685 
686 	/* how many rounds a matchfinder searches on a hash chain for */
687 	u16 depth;
688 
689 	/* do not perform lazy search no less than this match length */
690 	u16 max_lazy;
691 
692 	/* reduce lazy search no less than this match length */
693 	u8  good_len;
694 
695 	/* current match for lazy matching */
696 	struct kite_match *matches;
697 	struct kite_match matches_matrix[2][4];
698 };
699 
700 /*
701  * This mysterious table is just the CRC of each possible byte. It can be
702  * computed using the standard bit-at-a-time methods. The polynomial can
703  * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12.
704  * Add the implicit x^16, and you have the standard CRC-CCITT.
705  */
706 u16 const crc_ccitt_table[256] __attribute__((__aligned__(128))) = {
707 	0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
708 	0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
709 	0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
710 	0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
711 	0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
712 	0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
713 	0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
714 	0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
715 	0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
716 	0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
717 	0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
718 	0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
719 	0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
720 	0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
721 	0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
722 	0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
723 	0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
724 	0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
725 	0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
726 	0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
727 	0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
728 	0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
729 	0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
730 	0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
731 	0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
732 	0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
733 	0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
734 	0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
735 	0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
736 	0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
737 	0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
738 	0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
739 };
740 
kite_mf_getmatches_hc3(struct kite_matchfinder * mf,u16 depth,u16 bestlen)741 int kite_mf_getmatches_hc3(struct kite_matchfinder *mf, u16 depth, u16 bestlen)
742 {
743 	const u8 *cur = mf->buffer + mf->offset;
744 	const u8 *qbase = mf->buffer - mf->base;
745 	u32 curMatch;
746 	unsigned int v, hv, i, k, p, wsiz;
747 
748 	if (mf->end - cur < bestlen + 1)
749 		return 0;
750 
751 	v = get_unaligned((u16 *)cur);
752 	hv = v ^ crc_ccitt_table[cur[2]];
753 	curMatch = mf->hash[hv];
754 	p = mf->base + mf->offset;
755 	mf->hash[hv] = p;
756 	mf->chain[mf->cyclic_pos] = curMatch;
757 	wsiz = mf->wsiz;
758 	k = 1;
759 
760 	if (depth) {
761 		unsigned int wpos = wsiz + mf->cyclic_pos;
762 
763 		hv = min_t(unsigned int, mf->nice_len, mf->end - cur);
764 		DBG_BUGON(hv > kMatchMaxLen32);
765 		do {
766 			unsigned int diff = p - curMatch;
767 			const u8 *q;
768 
769 			if (diff >= wsiz)
770 				break;
771 
772 			q = qbase + curMatch;
773 			curMatch = mf->chain[(wpos - diff) & (wsiz - 1)];
774 			if (v == get_unaligned((u16 *)q) && (bestlen < 3 || (
775 			    get_unaligned((u16 *)(cur + bestlen - 1)) ==
776 			    get_unaligned((u16 *)(q + bestlen - 1)) &&
777 			    !memcmp(cur + 3, q + 3, bestlen - 3)))) {
778 				DBG_BUGON(cur[2] != q[2]);
779 				i = erofs_memcmp2(cur + bestlen + 1,
780 					q + bestlen + 1, hv - bestlen - 1);
781 				bestlen += 1 + i;
782 
783 				k -= (k >= ARRAY_SIZE(mf->matches_matrix[0]));
784 				mf->matches[k++] = (struct kite_match) {
785 					.len = bestlen,
786 					.dist = diff,
787 				};
788 				if (bestlen >= hv)
789 					break;
790 			}
791 		} while (--depth);
792 	}
793 	mf->offset++;
794 	mf->cyclic_pos = (mf->cyclic_pos + 1) & (wsiz - 1);
795 	return k - 1;
796 }
797 
798 /* let's align with zlib */
799 static const struct kite_matchfinder_cfg {
800 	u16  good_length;	/* reduce lazy search above this match length */
801 	u16  max_lazy;	/* do not perform lazy search above this match length */
802 	u16  nice_length;	/* quit search above this match length */
803 	u16  depth;
804 	bool lazy_search;
805 } kite_mfcfg[10] = {
806 /*      good lazy nice depth */
807 /* 0 */ {0,    0,  0,    0, false},	/* store only [unsupported] */
808 /* 1 */ {4,    4,  8,    4, false},	/* maximum speed, no lazy matches */
809 /* 2 */ {4,    5, 16,    8, false},
810 /* 3 */ {4,    6, 32,   32, false},
811 
812 /* 4 */ {4,    4,  16,   16, true},	/* lazy matches */
813 /* 5 */ {8,   16,  32,   32, true},
814 /* 6 */ {8,   16, 128,  128, true},
815 /* 7 */ {8,   32, 128,  256, true},
816 /* 8 */ {32, 128, 258, 1024, true},
817 /* 9 */ {32, 258, 258, 4096, true},	/* maximum compression */
818 };
819 
kite_mf_init(struct kite_matchfinder * mf,int wsiz,int level)820 static int kite_mf_init(struct kite_matchfinder *mf, int wsiz, int level)
821 {
822 	const struct kite_matchfinder_cfg *cfg;
823 
824 	if (!level || level >= ARRAY_SIZE(kite_mfcfg))
825 		return -EINVAL;
826 	cfg = &kite_mfcfg[level];
827 
828 	if (wsiz > kHistorySize32 || (1 << ilog2(wsiz)) != wsiz)
829 		return -EINVAL;
830 
831 	mf->hash = calloc(0x10000, sizeof(mf->hash[0]));
832 	if (!mf->hash)
833 		return -ENOMEM;
834 
835 	mf->chain = malloc(sizeof(mf->chain[0]) * wsiz);
836 	if (!mf->chain) {
837 		free(mf->hash);
838 		mf->hash = NULL;
839 		return -ENOMEM;
840 	}
841 	mf->wsiz = wsiz;
842 
843 	mf->good_len = cfg->good_length;
844 	mf->nice_len = cfg->nice_length;
845 	mf->depth = cfg->depth;
846 	mf->max_lazy = cfg->max_lazy;
847 	return cfg->lazy_search;
848 }
849 
kite_mf_reset(struct kite_matchfinder * mf,const void * buffer,const void * end)850 static void kite_mf_reset(struct kite_matchfinder *mf,
851 			  const void *buffer, const void *end)
852 {
853 	mf->buffer = buffer;
854 	mf->end = end;
855 
856 	/*
857 	 * Set the initial value as max_distance + 1.  This would avoid hash
858 	 * zero initialization.
859 	 */
860 	mf->base += mf->offset + kHistorySize32 + 1;
861 
862 	mf->offset = 0;
863 	mf->cyclic_pos = 0;
864 
865 	mf->matches = mf->matches_matrix[0];
866 	mf->matches_matrix[0][0].len =
867 		mf->matches_matrix[1][0].len = kMatchMinLen - 1;
868 }
869 
deflate_count_code(struct kite_deflate * s,bool literal,unsigned int lenSlot,unsigned int distSlot)870 static bool deflate_count_code(struct kite_deflate *s, bool literal,
871 			       unsigned int lenSlot, unsigned int distSlot)
872 {
873 	struct kite_deflate_table *t = s->tab;
874 	unsigned int lenbase = (literal ? 0 : kSymbolMatch);
875 	u64 rem = (s->outlen - s->pos_out) * 8 - s->bitpos;
876 	bool recalc = false;
877 	unsigned int bits;
878 
879 	s->freq_changed = true;
880 	++s->mainFreqs[lenbase + lenSlot];
881 	if (!literal)
882 		++s->distFreqs[distSlot];
883 
884 	if (s->encode_mode == 1) {
885 		if (literal) {
886 			bits = kstaticHuff_litLenLevels[lenSlot];
887 			goto out;
888 		}
889 		bits = kstaticHuff_litLenLevels[kSymbolMatch + lenSlot] +
890 			kLenExtraBits32[lenSlot] + 5 + kDistExtraBits[distSlot];
891 		goto out;
892 	}
893 
894 	/* XXX: more ideas to be done later */
895 	recalc |= (!literal && !t->distLevels[distSlot]);
896 	recalc |= !t->litLenLevels[lenbase + lenSlot];
897 	if (recalc) {
898 		kite_dbg("recalc %c lS %u dS %u", literal ? 'l' : 'm',
899 			 lenSlot, distSlot);
900 		s->tab = s->tables + (s->tab == s->tables);
901 		kite_deflate_fixdynblock(s);
902 		bits = 0;
903 		goto out;
904 	}
905 
906 	if (literal) {
907 		bits = t->litLenLevels[lenSlot];
908 		goto out;
909 	}
910 
911 	bits = t->distLevels[distSlot] + kDistExtraBits[distSlot] +
912 		t->litLenLevels[kSymbolMatch + lenSlot] +
913 		kLenExtraBits32[lenSlot];
914 out:
915 	if (rem < s->costbits + bits) {
916 		--s->mainFreqs[lenbase + lenSlot];
917 		if (!literal)
918 			--s->distFreqs[distSlot];
919 		if (recalc)
920 			s->tab = s->tables + (s->tab == s->tables);
921 		return false;
922 	}
923 	s->costbits += bits;
924 	return true;
925 }
926 
kite_deflate_tally(struct kite_deflate * s,struct kite_match * match)927 static bool kite_deflate_tally(struct kite_deflate *s,
928 			       struct kite_match *match)
929 {
930 	struct kite_deflate_symbol *sym = s->sym + s->symbols;
931 	u32 fixedcost = ~0;
932 	bool hassp;
933 
934 	*sym = (struct kite_deflate_symbol) {
935 		.len = match->len,
936 		.dist = match->dist,
937 	};
938 
939 retry:
940 	if (sym->len < kMatchMinLen) {
941 		hassp = deflate_count_code(s, true, sym->dist, 0);
942 	} else {
943 		unsigned int lc = sym->len - kMatchMinLen;
944 		unsigned int lenSlot = g_LenSlots[lc];
945 		unsigned int distSlot = deflateDistSlot(sym->dist - 1);
946 
947 		hassp = deflate_count_code(s, false, lenSlot, distSlot);
948 	}
949 
950 	if (!hassp) {
951 		if (s->encode_mode == 1) {
952 			fixedcost = s->costbits;
953 			s->encode_mode = 2;
954 			goto retry;
955 		}
956 		s->lastblock = true;
957 		if (fixedcost <= s->costbits)
958 			s->encode_mode = 1;
959 		return true;
960 	}
961 	++s->symbols;
962 	return false;
963 }
964 
kite_deflate_writestore(struct kite_deflate * s)965 static void kite_deflate_writestore(struct kite_deflate *s)
966 {
967 	bool fb = !s->startpos && !s->bitpos;
968 	unsigned int totalsiz = s->pos_in - s->prev_valid - s->startpos;
969 
970 	do {
971 		unsigned int len = min_t(unsigned int, totalsiz, 65535);
972 
973 		totalsiz -= len;
974 		writebits(s, (fb << 3) | (kStored << 1) |
975 			  (s->lastblock && !totalsiz), 3 + fb);
976 		flushbits(s);
977 		writebits(s, len, 16);
978 		writebits(s, len ^ 0xffff, 16);
979 		flushbits(s);
980 		memcpy(s->out + s->pos_out, s->in + s->startpos, len);
981 		s->pos_out += len;
982 		s->startpos += len;
983 	} while (totalsiz);
984 }
985 
kite_deflate_endblock(struct kite_deflate * s)986 static void kite_deflate_endblock(struct kite_deflate *s)
987 {
988 	if (s->encode_mode == 1) {
989 		u32 fixedcost = s->costbits;
990 		unsigned int storelen, storeblocks, storecost;
991 
992 		kite_deflate_fixdynblock(s);
993 		if (fixedcost > s->costbits)
994 			s->encode_mode = 2;
995 		else
996 			s->costbits = fixedcost;
997 
998 		storelen = s->pos_in - s->prev_valid - s->startpos;
999 		storeblocks = max(DIV_ROUND_UP(storelen, 65535), 1U);
1000 		storecost = (8 - s->bitpos) + storeblocks - 1 +
1001 			storeblocks * 32 + storelen * 8;
1002 		if (s->costbits > storecost) {
1003 			s->costbits = storecost;
1004 			s->encode_mode = 0;
1005 		}
1006 	}
1007 
1008 	s->lastblock |= (s->costbits + s->bitpos >=
1009 			(s->outlen - s->pos_out) * 8);
1010 }
1011 
kite_deflate_startblock(struct kite_deflate * s)1012 static void kite_deflate_startblock(struct kite_deflate *s)
1013 {
1014 	memset(s->mainFreqs, 0, sizeof(s->mainFreqs));
1015 	memset(s->distFreqs, 0, sizeof(s->distFreqs));
1016 	memset(s->tables, 0, sizeof(s->tables[0]));
1017 	s->symbols = 0;
1018 	s->mainFreqs[kSymbolEndOfBlock]++;
1019 	s->encode_mode = 1;
1020 	s->tab = s->tables;
1021 	s->costbits = 3 + kstaticHuff_litLenLevels[kSymbolEndOfBlock];
1022 }
1023 
kite_deflate_commitblock(struct kite_deflate * s)1024 static bool kite_deflate_commitblock(struct kite_deflate *s)
1025 {
1026 	if (s->encode_mode == 1) {
1027 		kite_deflate_setfixedtrees(s);
1028 		kite_deflate_writeblock(s, true);
1029 	} else if (s->encode_mode == 2) {
1030 		kite_deflate_sendtrees(s);
1031 		kite_deflate_writeblock(s, false);
1032 	} else {
1033 		kite_deflate_writestore(s);
1034 	}
1035 	s->startpos = s->pos_in - s->prev_valid;
1036 	return s->lastblock;
1037 }
1038 
kite_deflate_fast(struct kite_deflate * s)1039 static bool kite_deflate_fast(struct kite_deflate *s)
1040 {
1041 	struct kite_matchfinder *mf = s->mf;
1042 
1043 	kite_deflate_startblock(s);
1044 	while (1) {
1045 		int matches = kite_mf_getmatches_hc3(mf, mf->depth,
1046 				kMatchMinLen - 1);
1047 
1048 		if (matches) {
1049 			unsigned int len = mf->matches[matches].len;
1050 			unsigned int dist = mf->matches[matches].dist;
1051 
1052 			if (len == kMatchMinLen && dist > ZLIB_DISTANCE_TOO_FAR)
1053 				goto nomatch;
1054 
1055 			kite_dbg("%u matches found: longest [%u,%u] of distance %u",
1056 				 matches, s->pos_in, s->pos_in + len - 1, dist);
1057 
1058 			if (kite_deflate_tally(s, mf->matches + matches))
1059 				break;
1060 			s->pos_in += len;
1061 			/* skip the rest bytes */
1062 			while (--len)
1063 				(void)kite_mf_getmatches_hc3(mf, 0, 0);
1064 		} else {
1065 nomatch:
1066 			mf->matches[0].dist = s->in[s->pos_in];
1067 			if (isprint(s->in[s->pos_in]))
1068 				kite_dbg("literal %c pos_in %u", s->in[s->pos_in], s->pos_in);
1069 			else
1070 				kite_dbg("literal %x pos_in %u", s->in[s->pos_in], s->pos_in);
1071 
1072 			if (kite_deflate_tally(s, mf->matches))
1073 				break;
1074 			++s->pos_in;
1075 		}
1076 
1077 		s->lastblock |= (s->pos_in >= s->inlen);
1078 		if (s->pos_in >= s->inlen || s->symbols >= s->max_symbols) {
1079 			kite_deflate_endblock(s);
1080 			break;
1081 		}
1082 	}
1083 	return kite_deflate_commitblock(s);
1084 }
1085 
kite_deflate_slow(struct kite_deflate * s)1086 static bool kite_deflate_slow(struct kite_deflate *s)
1087 {
1088 	struct kite_matchfinder *mf = s->mf;
1089 	bool flush = false;
1090 
1091 	kite_deflate_startblock(s);
1092 	while (1) {
1093 		struct kite_match *prev_matches = mf->matches;
1094 		unsigned int len = kMatchMinLen - 1;
1095 		int matches;
1096 		unsigned int len0;
1097 
1098 		mf->matches = mf->matches_matrix[
1099 				mf->matches == mf->matches_matrix[0]];
1100 		mf->matches[0].dist = s->in[s->pos_in];
1101 
1102 		len0 = prev_matches[s->prev_longest].len;
1103 		if (len0 < mf->max_lazy) {
1104 			matches = kite_mf_getmatches_hc3(mf, mf->depth >>
1105 				(len0 >= mf->good_len), len0);
1106 			if (matches) {
1107 				len = mf->matches[matches].len;
1108 				if (len == kMatchMinLen &&
1109 				    mf->matches[matches].dist > ZLIB_DISTANCE_TOO_FAR) {
1110 					matches = 0;
1111 					len = kMatchMinLen - 1;
1112 				}
1113 			}
1114 		} else {
1115 			matches = 0;
1116 			(void)kite_mf_getmatches_hc3(mf, 0, 0);
1117 		}
1118 
1119 		if (len < len0) {
1120 			if (kite_deflate_tally(s,
1121 					prev_matches + s->prev_longest))
1122 				break;
1123 
1124 			s->pos_in += --len0;
1125 			/* skip the rest bytes */
1126 			while (--len0)
1127 				(void)kite_mf_getmatches_hc3(mf, 0, 0);
1128 			s->prev_valid = false;
1129 			s->prev_longest = 0;
1130 		} else {
1131 			if (!s->prev_valid)
1132 				s->prev_valid = true;
1133 			else if (kite_deflate_tally(s, prev_matches))
1134 				break;
1135 			++s->pos_in;
1136 			s->prev_longest = matches;
1137 		}
1138 
1139 		s->lastblock |= (s->pos_in >= s->inlen);
1140 		if (s->pos_in >= s->inlen) {
1141 			flush = true;
1142 			break;
1143 		}
1144 		if (s->symbols >= s->max_symbols) {
1145 			kite_deflate_endblock(s);
1146 			break;
1147 		}
1148 	}
1149 
1150 	if (flush && s->prev_valid) {
1151 		(void)kite_deflate_tally(s, mf->matches + s->prev_longest);
1152 		s->prev_valid = false;
1153 	}
1154 	return kite_deflate_commitblock(s);
1155 }
1156 
kite_deflate_end(struct kite_deflate * s)1157 void kite_deflate_end(struct kite_deflate *s)
1158 {
1159 	if (s->mf) {
1160 		if (s->mf->hash)
1161 			free(s->mf->hash);
1162 		if (s->mf->chain)
1163 			free(s->mf->chain);
1164 		free(s->mf);
1165 	}
1166 	if (s->sym)
1167 		free(s->sym);
1168 	free(s);
1169 }
1170 
kite_deflate_init(int level,unsigned int dict_size)1171 struct kite_deflate *kite_deflate_init(int level, unsigned int dict_size)
1172 {
1173 	struct kite_deflate *s;
1174 	int err;
1175 
1176 	kite_deflate_init_once();
1177 	s = calloc(1, sizeof(*s));
1178 	if (!s)
1179 		return ERR_PTR(-ENOMEM);
1180 
1181 	s->max_symbols = 16384;
1182 	s->sym = malloc(sizeof(s->sym[0]) * s->max_symbols);
1183 	if (!s->sym) {
1184 		err = -ENOMEM;
1185 		goto err_out;
1186 	}
1187 
1188 	s->mf = malloc(sizeof(*s->mf));
1189 	if (!s->mf) {
1190 		err = -ENOMEM;
1191 		goto err_out;
1192 	}
1193 
1194 	if (!dict_size)
1195 		dict_size = kHistorySize32;
1196 
1197 	err = kite_mf_init(s->mf, dict_size, level);
1198 	if (err < 0)
1199 		goto err_out;
1200 
1201 	s->lazy_search = err;
1202 	return s;
1203 err_out:
1204 	if (s->mf)
1205 		free(s->mf);
1206 	if (s->sym)
1207 		free(s->sym);
1208 	free(s);
1209 	return ERR_PTR(err);
1210 }
1211 
kite_deflate_destsize(struct kite_deflate * s,const u8 * in,u8 * out,unsigned int * srcsize,unsigned int target_dstsize)1212 int kite_deflate_destsize(struct kite_deflate *s, const u8 *in, u8 *out,
1213 			   unsigned int *srcsize, unsigned int target_dstsize)
1214 {
1215 	memset(s, 0, offsetof(struct kite_deflate, mainFreqs));
1216 	s->in = in;
1217 	s->inlen = *srcsize;
1218 	s->out = out;
1219 	s->outlen = target_dstsize;
1220 	kite_mf_reset(s->mf, in, in + s->inlen);
1221 
1222 	if (s->lazy_search)
1223 		while (!kite_deflate_slow(s));
1224 	else
1225 		while (!kite_deflate_fast(s));
1226 	flushbits(s);
1227 
1228 	*srcsize = s->startpos;
1229 	return s->pos_out;
1230 }
1231 
1232 #if TEST
1233 #include <unistd.h>
1234 #include <fcntl.h>
1235 #include <sys/mman.h>
1236 
main(int argc,char * argv[])1237 int main(int argc, char *argv[])
1238 {
1239 	int fd;
1240 	u64 filelength;
1241 	u8 out[1048576], *buf;
1242 	int dstsize = 4096;
1243 	unsigned int srcsize, outsize;
1244 	struct kite_deflate *s;
1245 
1246 	fd = open(argv[1], O_RDONLY);
1247 	if (fd < 0)
1248 		return -errno;
1249 	if (argc > 2)
1250 		dstsize = atoi(argv[2]);
1251 	filelength = lseek(fd, 0, SEEK_END);
1252 
1253 	s = kite_deflate_init(9, 0);
1254 	if (IS_ERR(s))
1255 		return PTR_ERR(s);
1256 
1257 	filelength = lseek(fd, 0, SEEK_END);
1258 	buf = mmap(NULL, filelength, PROT_READ, MAP_SHARED, fd, 0);
1259 	if (buf == MAP_FAILED)
1260 		return -errno;
1261 	close(fd);
1262 
1263 	srcsize = filelength;
1264 	outsize = kite_deflate_destsize(s, buf, out, &srcsize, dstsize);
1265 	fd = open("out.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
1266 	write(fd, out, outsize);
1267 	close(fd);
1268 	kite_deflate_end(s);
1269 	return 0;
1270 }
1271 #endif
1272