• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * FSE : Finite State Entropy encoder
3  * Copyright (C) 2013-2015, Yann Collet.
4  *
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *   * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *   * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * This program is free software; you can redistribute it and/or modify it under
31  * the terms of the GNU General Public License version 2 as published by the
32  * Free Software Foundation. This program is dual-licensed; you may select
33  * either version 2 of the GNU General Public License ("GPL") or BSD license
34  * ("BSD").
35  *
36  * You can contact the author at :
37  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
38  */
39 
40 /* **************************************************************
41 *  Compiler specifics
42 ****************************************************************/
43 #define FORCE_INLINE static __always_inline
44 
45 /* **************************************************************
46 *  Includes
47 ****************************************************************/
48 #include "bitstream.h"
49 #include "fse.h"
50 #include <linux/compiler.h>
51 #include <linux/kernel.h>
52 #include <linux/math64.h>
53 #include <linux/string.h> /* memcpy, memset */
54 
55 /* **************************************************************
56 *  Error Management
57 ****************************************************************/
58 #define FSE_STATIC_ASSERT(c)                                   \
59 	{                                                      \
60 		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
61 	} /* use only *after* variable declarations */
62 
63 /* **************************************************************
64 *  Templates
65 ****************************************************************/
66 /*
67   designed to be included
68   for type-specific functions (template emulation in C)
69   Objective is to write these functions only once, for improved maintenance
70 */
71 
72 /* safety checks */
73 #ifndef FSE_FUNCTION_EXTENSION
74 #error "FSE_FUNCTION_EXTENSION must be defined"
75 #endif
76 #ifndef FSE_FUNCTION_TYPE
77 #error "FSE_FUNCTION_TYPE must be defined"
78 #endif
79 
80 /* Function names */
81 #define FSE_CAT(X, Y) X##Y
82 #define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
83 #define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
84 
85 /* Function templates */
86 
87 /* FSE_buildCTable_wksp() :
88  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
89  * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
90  * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
91  */
FSE_buildCTable_wksp(FSE_CTable * ct,const short * normalizedCounter,unsigned maxSymbolValue,unsigned tableLog,void * workspace,size_t workspaceSize)92 size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
93 {
94 	U32 const tableSize = 1 << tableLog;
95 	U32 const tableMask = tableSize - 1;
96 	void *const ptr = ct;
97 	U16 *const tableU16 = ((U16 *)ptr) + 2;
98 	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1);
99 	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
100 	U32 const step = FSE_TABLESTEP(tableSize);
101 	U32 highThreshold = tableSize - 1;
102 
103 	U32 *cumul;
104 	FSE_FUNCTION_TYPE *tableSymbol;
105 	size_t spaceUsed32 = 0;
106 
107 	cumul = (U32 *)workspace + spaceUsed32;
108 	spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2;
109 	tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32);
110 	spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2;
111 
112 	if ((spaceUsed32 << 2) > workspaceSize)
113 		return ERROR(tableLog_tooLarge);
114 	workspace = (U32 *)workspace + spaceUsed32;
115 	workspaceSize -= (spaceUsed32 << 2);
116 
117 	/* CTable header */
118 	tableU16[-2] = (U16)tableLog;
119 	tableU16[-1] = (U16)maxSymbolValue;
120 
121 	/* For explanations on how to distribute symbol values over the table :
122 	*  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
123 
124 	/* symbol start positions */
125 	{
126 		U32 u;
127 		cumul[0] = 0;
128 		for (u = 1; u <= maxSymbolValue + 1; u++) {
129 			if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */
130 				cumul[u] = cumul[u - 1] + 1;
131 				tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1);
132 			} else {
133 				cumul[u] = cumul[u - 1] + normalizedCounter[u - 1];
134 			}
135 		}
136 		cumul[maxSymbolValue + 1] = tableSize + 1;
137 	}
138 
139 	/* Spread symbols */
140 	{
141 		U32 position = 0;
142 		U32 symbol;
143 		for (symbol = 0; symbol <= maxSymbolValue; symbol++) {
144 			int nbOccurences;
145 			for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) {
146 				tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
147 				position = (position + step) & tableMask;
148 				while (position > highThreshold)
149 					position = (position + step) & tableMask; /* Low proba area */
150 			}
151 		}
152 
153 		if (position != 0)
154 			return ERROR(GENERIC); /* Must have gone through all positions */
155 	}
156 
157 	/* Build table */
158 	{
159 		U32 u;
160 		for (u = 0; u < tableSize; u++) {
161 			FSE_FUNCTION_TYPE s = tableSymbol[u];	/* note : static analyzer may not understand tableSymbol is properly initialized */
162 			tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */
163 		}
164 	}
165 
166 	/* Build Symbol Transformation Table */
167 	{
168 		unsigned total = 0;
169 		unsigned s;
170 		for (s = 0; s <= maxSymbolValue; s++) {
171 			switch (normalizedCounter[s]) {
172 			case 0: break;
173 
174 			case -1:
175 			case 1:
176 				symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog);
177 				symbolTT[s].deltaFindState = total - 1;
178 				total++;
179 				break;
180 			default: {
181 				U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1);
182 				U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
183 				symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
184 				symbolTT[s].deltaFindState = total - normalizedCounter[s];
185 				total += normalizedCounter[s];
186 			}
187 			}
188 		}
189 	}
190 
191 	return 0;
192 }
193 
194 /*-**************************************************************
195 *  FSE NCount encoding-decoding
196 ****************************************************************/
FSE_NCountWriteBound(unsigned maxSymbolValue,unsigned tableLog)197 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
198 {
199 	size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3;
200 	return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
201 }
202 
FSE_writeNCount_generic(void * header,size_t headerBufferSize,const short * normalizedCounter,unsigned maxSymbolValue,unsigned tableLog,unsigned writeIsSafe)203 static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
204 				      unsigned writeIsSafe)
205 {
206 	BYTE *const ostart = (BYTE *)header;
207 	BYTE *out = ostart;
208 	BYTE *const oend = ostart + headerBufferSize;
209 	int nbBits;
210 	const int tableSize = 1 << tableLog;
211 	int remaining;
212 	int threshold;
213 	U32 bitStream;
214 	int bitCount;
215 	unsigned charnum = 0;
216 	int previous0 = 0;
217 
218 	bitStream = 0;
219 	bitCount = 0;
220 	/* Table Size */
221 	bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount;
222 	bitCount += 4;
223 
224 	/* Init */
225 	remaining = tableSize + 1; /* +1 for extra accuracy */
226 	threshold = tableSize;
227 	nbBits = tableLog + 1;
228 
229 	while (remaining > 1) { /* stops at 1 */
230 		if (previous0) {
231 			unsigned start = charnum;
232 			while (!normalizedCounter[charnum])
233 				charnum++;
234 			while (charnum >= start + 24) {
235 				start += 24;
236 				bitStream += 0xFFFFU << bitCount;
237 				if ((!writeIsSafe) && (out > oend - 2))
238 					return ERROR(dstSize_tooSmall); /* Buffer overflow */
239 				out[0] = (BYTE)bitStream;
240 				out[1] = (BYTE)(bitStream >> 8);
241 				out += 2;
242 				bitStream >>= 16;
243 			}
244 			while (charnum >= start + 3) {
245 				start += 3;
246 				bitStream += 3 << bitCount;
247 				bitCount += 2;
248 			}
249 			bitStream += (charnum - start) << bitCount;
250 			bitCount += 2;
251 			if (bitCount > 16) {
252 				if ((!writeIsSafe) && (out > oend - 2))
253 					return ERROR(dstSize_tooSmall); /* Buffer overflow */
254 				out[0] = (BYTE)bitStream;
255 				out[1] = (BYTE)(bitStream >> 8);
256 				out += 2;
257 				bitStream >>= 16;
258 				bitCount -= 16;
259 			}
260 		}
261 		{
262 			int count = normalizedCounter[charnum++];
263 			int const max = (2 * threshold - 1) - remaining;
264 			remaining -= count < 0 ? -count : count;
265 			count++; /* +1 for extra accuracy */
266 			if (count >= threshold)
267 				count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
268 			bitStream += count << bitCount;
269 			bitCount += nbBits;
270 			bitCount -= (count < max);
271 			previous0 = (count == 1);
272 			if (remaining < 1)
273 				return ERROR(GENERIC);
274 			while (remaining < threshold)
275 				nbBits--, threshold >>= 1;
276 		}
277 		if (bitCount > 16) {
278 			if ((!writeIsSafe) && (out > oend - 2))
279 				return ERROR(dstSize_tooSmall); /* Buffer overflow */
280 			out[0] = (BYTE)bitStream;
281 			out[1] = (BYTE)(bitStream >> 8);
282 			out += 2;
283 			bitStream >>= 16;
284 			bitCount -= 16;
285 		}
286 	}
287 
288 	/* flush remaining bitStream */
289 	if ((!writeIsSafe) && (out > oend - 2))
290 		return ERROR(dstSize_tooSmall); /* Buffer overflow */
291 	out[0] = (BYTE)bitStream;
292 	out[1] = (BYTE)(bitStream >> 8);
293 	out += (bitCount + 7) / 8;
294 
295 	if (charnum > maxSymbolValue + 1)
296 		return ERROR(GENERIC);
297 
298 	return (out - ostart);
299 }
300 
FSE_writeNCount(void * buffer,size_t bufferSize,const short * normalizedCounter,unsigned maxSymbolValue,unsigned tableLog)301 size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
302 {
303 	if (tableLog > FSE_MAX_TABLELOG)
304 		return ERROR(tableLog_tooLarge); /* Unsupported */
305 	if (tableLog < FSE_MIN_TABLELOG)
306 		return ERROR(GENERIC); /* Unsupported */
307 
308 	if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
309 		return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
310 
311 	return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
312 }
313 
314 /*-**************************************************************
315 *  Counting histogram
316 ****************************************************************/
317 /*! FSE_count_simple
318 	This function counts byte values within `src`, and store the histogram into table `count`.
319 	It doesn't use any additional memory.
320 	But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
321 	For this reason, prefer using a table `count` with 256 elements.
322 	@return : count of most numerous element
323 */
FSE_count_simple(unsigned * count,unsigned * maxSymbolValuePtr,const void * src,size_t srcSize)324 size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize)
325 {
326 	const BYTE *ip = (const BYTE *)src;
327 	const BYTE *const end = ip + srcSize;
328 	unsigned maxSymbolValue = *maxSymbolValuePtr;
329 	unsigned max = 0;
330 
331 	memset(count, 0, (maxSymbolValue + 1) * sizeof(*count));
332 	if (srcSize == 0) {
333 		*maxSymbolValuePtr = 0;
334 		return 0;
335 	}
336 
337 	while (ip < end)
338 		count[*ip++]++;
339 
340 	while (!count[maxSymbolValue])
341 		maxSymbolValue--;
342 	*maxSymbolValuePtr = maxSymbolValue;
343 
344 	{
345 		U32 s;
346 		for (s = 0; s <= maxSymbolValue; s++)
347 			if (count[s] > max)
348 				max = count[s];
349 	}
350 
351 	return (size_t)max;
352 }
353 
354 /* FSE_count_parallel_wksp() :
355  * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
356  * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
FSE_count_parallel_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,unsigned checkMax,unsigned * const workSpace)357 static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax,
358 				      unsigned *const workSpace)
359 {
360 	const BYTE *ip = (const BYTE *)source;
361 	const BYTE *const iend = ip + sourceSize;
362 	unsigned maxSymbolValue = *maxSymbolValuePtr;
363 	unsigned max = 0;
364 	U32 *const Counting1 = workSpace;
365 	U32 *const Counting2 = Counting1 + 256;
366 	U32 *const Counting3 = Counting2 + 256;
367 	U32 *const Counting4 = Counting3 + 256;
368 
369 	memset(Counting1, 0, 4 * 256 * sizeof(unsigned));
370 
371 	/* safety checks */
372 	if (!sourceSize) {
373 		memset(count, 0, maxSymbolValue + 1);
374 		*maxSymbolValuePtr = 0;
375 		return 0;
376 	}
377 	if (!maxSymbolValue)
378 		maxSymbolValue = 255; /* 0 == default */
379 
380 	/* by stripes of 16 bytes */
381 	{
382 		U32 cached = ZSTD_read32(ip);
383 		ip += 4;
384 		while (ip < iend - 15) {
385 			U32 c = cached;
386 			cached = ZSTD_read32(ip);
387 			ip += 4;
388 			Counting1[(BYTE)c]++;
389 			Counting2[(BYTE)(c >> 8)]++;
390 			Counting3[(BYTE)(c >> 16)]++;
391 			Counting4[c >> 24]++;
392 			c = cached;
393 			cached = ZSTD_read32(ip);
394 			ip += 4;
395 			Counting1[(BYTE)c]++;
396 			Counting2[(BYTE)(c >> 8)]++;
397 			Counting3[(BYTE)(c >> 16)]++;
398 			Counting4[c >> 24]++;
399 			c = cached;
400 			cached = ZSTD_read32(ip);
401 			ip += 4;
402 			Counting1[(BYTE)c]++;
403 			Counting2[(BYTE)(c >> 8)]++;
404 			Counting3[(BYTE)(c >> 16)]++;
405 			Counting4[c >> 24]++;
406 			c = cached;
407 			cached = ZSTD_read32(ip);
408 			ip += 4;
409 			Counting1[(BYTE)c]++;
410 			Counting2[(BYTE)(c >> 8)]++;
411 			Counting3[(BYTE)(c >> 16)]++;
412 			Counting4[c >> 24]++;
413 		}
414 		ip -= 4;
415 	}
416 
417 	/* finish last symbols */
418 	while (ip < iend)
419 		Counting1[*ip++]++;
420 
421 	if (checkMax) { /* verify stats will fit into destination table */
422 		U32 s;
423 		for (s = 255; s > maxSymbolValue; s--) {
424 			Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
425 			if (Counting1[s])
426 				return ERROR(maxSymbolValue_tooSmall);
427 		}
428 	}
429 
430 	{
431 		U32 s;
432 		for (s = 0; s <= maxSymbolValue; s++) {
433 			count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
434 			if (count[s] > max)
435 				max = count[s];
436 		}
437 	}
438 
439 	while (!count[maxSymbolValue])
440 		maxSymbolValue--;
441 	*maxSymbolValuePtr = maxSymbolValue;
442 	return (size_t)max;
443 }
444 
445 /* FSE_countFast_wksp() :
446  * Same as FSE_countFast(), but using an externally provided scratch buffer.
447  * `workSpace` size must be table of >= `1024` unsigned */
FSE_countFast_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,unsigned * workSpace)448 size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
449 {
450 	if (sourceSize < 1500)
451 		return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
452 	return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
453 }
454 
455 /* FSE_count_wksp() :
456  * Same as FSE_count(), but using an externally provided scratch buffer.
457  * `workSpace` size must be table of >= `1024` unsigned */
FSE_count_wksp(unsigned * count,unsigned * maxSymbolValuePtr,const void * source,size_t sourceSize,unsigned * workSpace)458 size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
459 {
460 	if (*maxSymbolValuePtr < 255)
461 		return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
462 	*maxSymbolValuePtr = 255;
463 	return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
464 }
465 
466 /*-**************************************************************
467 *  FSE Compression Code
468 ****************************************************************/
469 /*! FSE_sizeof_CTable() :
470 	FSE_CTable is a variable size structure which contains :
471 	`U16 tableLog;`
472 	`U16 maxSymbolValue;`
473 	`U16 nextStateNumber[1 << tableLog];`                         // This size is variable
474 	`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
475 Allocation is manual (C standard does not support variable-size structures).
476 */
FSE_sizeof_CTable(unsigned maxSymbolValue,unsigned tableLog)477 size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog)
478 {
479 	if (tableLog > FSE_MAX_TABLELOG)
480 		return ERROR(tableLog_tooLarge);
481 	return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32);
482 }
483 
484 /* provides the minimum logSize to safely represent a distribution */
FSE_minTableLog(size_t srcSize,unsigned maxSymbolValue)485 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
486 {
487 	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
488 	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
489 	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
490 	return minBits;
491 }
492 
FSE_optimalTableLog_internal(unsigned maxTableLog,size_t srcSize,unsigned maxSymbolValue,unsigned minus)493 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
494 {
495 	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
496 	U32 tableLog = maxTableLog;
497 	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
498 	if (tableLog == 0)
499 		tableLog = FSE_DEFAULT_TABLELOG;
500 	if (maxBitsSrc < tableLog)
501 		tableLog = maxBitsSrc; /* Accuracy can be reduced */
502 	if (minBits > tableLog)
503 		tableLog = minBits; /* Need a minimum to safely represent all symbol values */
504 	if (tableLog < FSE_MIN_TABLELOG)
505 		tableLog = FSE_MIN_TABLELOG;
506 	if (tableLog > FSE_MAX_TABLELOG)
507 		tableLog = FSE_MAX_TABLELOG;
508 	return tableLog;
509 }
510 
FSE_optimalTableLog(unsigned maxTableLog,size_t srcSize,unsigned maxSymbolValue)511 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
512 {
513 	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
514 }
515 
516 /* Secondary normalization method.
517    To be used when primary method fails. */
518 
FSE_normalizeM2(short * norm,U32 tableLog,const unsigned * count,size_t total,U32 maxSymbolValue)519 static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue)
520 {
521 	short const NOT_YET_ASSIGNED = -2;
522 	U32 s;
523 	U32 distributed = 0;
524 	U32 ToDistribute;
525 
526 	/* Init */
527 	U32 const lowThreshold = (U32)(total >> tableLog);
528 	U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
529 
530 	for (s = 0; s <= maxSymbolValue; s++) {
531 		if (count[s] == 0) {
532 			norm[s] = 0;
533 			continue;
534 		}
535 		if (count[s] <= lowThreshold) {
536 			norm[s] = -1;
537 			distributed++;
538 			total -= count[s];
539 			continue;
540 		}
541 		if (count[s] <= lowOne) {
542 			norm[s] = 1;
543 			distributed++;
544 			total -= count[s];
545 			continue;
546 		}
547 
548 		norm[s] = NOT_YET_ASSIGNED;
549 	}
550 	ToDistribute = (1 << tableLog) - distributed;
551 
552 	if ((total / ToDistribute) > lowOne) {
553 		/* risk of rounding to zero */
554 		lowOne = (U32)((total * 3) / (ToDistribute * 2));
555 		for (s = 0; s <= maxSymbolValue; s++) {
556 			if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
557 				norm[s] = 1;
558 				distributed++;
559 				total -= count[s];
560 				continue;
561 			}
562 		}
563 		ToDistribute = (1 << tableLog) - distributed;
564 	}
565 
566 	if (distributed == maxSymbolValue + 1) {
567 		/* all values are pretty poor;
568 		   probably incompressible data (should have already been detected);
569 		   find max, then give all remaining points to max */
570 		U32 maxV = 0, maxC = 0;
571 		for (s = 0; s <= maxSymbolValue; s++)
572 			if (count[s] > maxC)
573 				maxV = s, maxC = count[s];
574 		norm[maxV] += (short)ToDistribute;
575 		return 0;
576 	}
577 
578 	if (total == 0) {
579 		/* all of the symbols were low enough for the lowOne or lowThreshold */
580 		for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1))
581 			if (norm[s] > 0)
582 				ToDistribute--, norm[s]++;
583 		return 0;
584 	}
585 
586 	{
587 		U64 const vStepLog = 62 - tableLog;
588 		U64 const mid = (1ULL << (vStepLog - 1)) - 1;
589 		U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
590 		U64 tmpTotal = mid;
591 		for (s = 0; s <= maxSymbolValue; s++) {
592 			if (norm[s] == NOT_YET_ASSIGNED) {
593 				U64 const end = tmpTotal + (count[s] * rStep);
594 				U32 const sStart = (U32)(tmpTotal >> vStepLog);
595 				U32 const sEnd = (U32)(end >> vStepLog);
596 				U32 const weight = sEnd - sStart;
597 				if (weight < 1)
598 					return ERROR(GENERIC);
599 				norm[s] = (short)weight;
600 				tmpTotal = end;
601 			}
602 		}
603 	}
604 
605 	return 0;
606 }
607 
FSE_normalizeCount(short * normalizedCounter,unsigned tableLog,const unsigned * count,size_t total,unsigned maxSymbolValue)608 size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue)
609 {
610 	/* Sanity checks */
611 	if (tableLog == 0)
612 		tableLog = FSE_DEFAULT_TABLELOG;
613 	if (tableLog < FSE_MIN_TABLELOG)
614 		return ERROR(GENERIC); /* Unsupported size */
615 	if (tableLog > FSE_MAX_TABLELOG)
616 		return ERROR(tableLog_tooLarge); /* Unsupported size */
617 	if (tableLog < FSE_minTableLog(total, maxSymbolValue))
618 		return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
619 
620 	{
621 		U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000};
622 		U64 const scale = 62 - tableLog;
623 		U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */
624 		U64 const vStep = 1ULL << (scale - 20);
625 		int stillToDistribute = 1 << tableLog;
626 		unsigned s;
627 		unsigned largest = 0;
628 		short largestP = 0;
629 		U32 lowThreshold = (U32)(total >> tableLog);
630 
631 		for (s = 0; s <= maxSymbolValue; s++) {
632 			if (count[s] == total)
633 				return 0; /* rle special case */
634 			if (count[s] == 0) {
635 				normalizedCounter[s] = 0;
636 				continue;
637 			}
638 			if (count[s] <= lowThreshold) {
639 				normalizedCounter[s] = -1;
640 				stillToDistribute--;
641 			} else {
642 				short proba = (short)((count[s] * step) >> scale);
643 				if (proba < 8) {
644 					U64 restToBeat = vStep * rtbTable[proba];
645 					proba += (count[s] * step) - ((U64)proba << scale) > restToBeat;
646 				}
647 				if (proba > largestP)
648 					largestP = proba, largest = s;
649 				normalizedCounter[s] = proba;
650 				stillToDistribute -= proba;
651 			}
652 		}
653 		if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
654 			/* corner case, need another normalization method */
655 			size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
656 			if (FSE_isError(errorCode))
657 				return errorCode;
658 		} else
659 			normalizedCounter[largest] += (short)stillToDistribute;
660 	}
661 
662 	return tableLog;
663 }
664 
665 /* fake FSE_CTable, for raw (uncompressed) input */
FSE_buildCTable_raw(FSE_CTable * ct,unsigned nbBits)666 size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits)
667 {
668 	const unsigned tableSize = 1 << nbBits;
669 	const unsigned tableMask = tableSize - 1;
670 	const unsigned maxSymbolValue = tableMask;
671 	void *const ptr = ct;
672 	U16 *const tableU16 = ((U16 *)ptr) + 2;
673 	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */
674 	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
675 	unsigned s;
676 
677 	/* Sanity checks */
678 	if (nbBits < 1)
679 		return ERROR(GENERIC); /* min size */
680 
681 	/* header */
682 	tableU16[-2] = (U16)nbBits;
683 	tableU16[-1] = (U16)maxSymbolValue;
684 
685 	/* Build table */
686 	for (s = 0; s < tableSize; s++)
687 		tableU16[s] = (U16)(tableSize + s);
688 
689 	/* Build Symbol Transformation Table */
690 	{
691 		const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
692 		for (s = 0; s <= maxSymbolValue; s++) {
693 			symbolTT[s].deltaNbBits = deltaNbBits;
694 			symbolTT[s].deltaFindState = s - 1;
695 		}
696 	}
697 
698 	return 0;
699 }
700 
701 /* fake FSE_CTable, for rle input (always same symbol) */
FSE_buildCTable_rle(FSE_CTable * ct,BYTE symbolValue)702 size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue)
703 {
704 	void *ptr = ct;
705 	U16 *tableU16 = ((U16 *)ptr) + 2;
706 	void *FSCTptr = (U32 *)ptr + 2;
707 	FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr;
708 
709 	/* header */
710 	tableU16[-2] = (U16)0;
711 	tableU16[-1] = (U16)symbolValue;
712 
713 	/* Build table */
714 	tableU16[0] = 0;
715 	tableU16[1] = 0; /* just in case */
716 
717 	/* Build Symbol Transformation Table */
718 	symbolTT[symbolValue].deltaNbBits = 0;
719 	symbolTT[symbolValue].deltaFindState = 0;
720 
721 	return 0;
722 }
723 
FSE_compress_usingCTable_generic(void * dst,size_t dstSize,const void * src,size_t srcSize,const FSE_CTable * ct,const unsigned fast)724 static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast)
725 {
726 	const BYTE *const istart = (const BYTE *)src;
727 	const BYTE *const iend = istart + srcSize;
728 	const BYTE *ip = iend;
729 
730 	BIT_CStream_t bitC;
731 	FSE_CState_t CState1, CState2;
732 
733 	/* init */
734 	if (srcSize <= 2)
735 		return 0;
736 	{
737 		size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
738 		if (FSE_isError(initError))
739 			return 0; /* not enough space available to write a bitstream */
740 	}
741 
742 #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
743 
744 	if (srcSize & 1) {
745 		FSE_initCState2(&CState1, ct, *--ip);
746 		FSE_initCState2(&CState2, ct, *--ip);
747 		FSE_encodeSymbol(&bitC, &CState1, *--ip);
748 		FSE_FLUSHBITS(&bitC);
749 	} else {
750 		FSE_initCState2(&CState2, ct, *--ip);
751 		FSE_initCState2(&CState1, ct, *--ip);
752 	}
753 
754 	/* join to mod 4 */
755 	srcSize -= 2;
756 	if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */
757 		FSE_encodeSymbol(&bitC, &CState2, *--ip);
758 		FSE_encodeSymbol(&bitC, &CState1, *--ip);
759 		FSE_FLUSHBITS(&bitC);
760 	}
761 
762 	/* 2 or 4 encoding per loop */
763 	while (ip > istart) {
764 
765 		FSE_encodeSymbol(&bitC, &CState2, *--ip);
766 
767 		if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */
768 			FSE_FLUSHBITS(&bitC);
769 
770 		FSE_encodeSymbol(&bitC, &CState1, *--ip);
771 
772 		if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */
773 			FSE_encodeSymbol(&bitC, &CState2, *--ip);
774 			FSE_encodeSymbol(&bitC, &CState1, *--ip);
775 		}
776 
777 		FSE_FLUSHBITS(&bitC);
778 	}
779 
780 	FSE_flushCState(&bitC, &CState2);
781 	FSE_flushCState(&bitC, &CState1);
782 	return BIT_closeCStream(&bitC);
783 }
784 
FSE_compress_usingCTable(void * dst,size_t dstSize,const void * src,size_t srcSize,const FSE_CTable * ct)785 size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct)
786 {
787 	unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
788 
789 	if (fast)
790 		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
791 	else
792 		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
793 }
794 
FSE_compressBound(size_t size)795 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
796