1 /* libFLAC - Free Lossless Audio Codec library
2  * Copyright (C) 2001-2009  Josh Coalson
3  * Copyright (C) 2011-2022  Xiph.Org Foundation
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * - Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  *
16  * - Neither the name of the Xiph.org Foundation nor the names of its
17  * contributors may be used to endorse or promote products derived from
18  * this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
24  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #ifndef FLAC__PRIVATE__BITMATH_H
34 #define FLAC__PRIVATE__BITMATH_H
35 
36 #include "FLAC/ordinals.h"
37 #include "FLAC/assert.h"
38 
39 #include "share/compat.h"
40 
41 #if defined(_MSC_VER)
42 #include <intrin.h> /* for _BitScanReverse* */
43 #endif
44 
45 /* Will never be emitted for MSVC, GCC, Intel compilers */
FLAC__clz_soft_uint32(FLAC__uint32 word)46 static inline uint32_t FLAC__clz_soft_uint32(FLAC__uint32 word)
47 {
48 	static const uint8_t byte_to_unary_table[] = {
49 	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
50 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
51 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
53 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
65 	};
66 
67 	return word > 0xffffff ? byte_to_unary_table[word >> 24] :
68 		word > 0xffff ? byte_to_unary_table[word >> 16] + 8 :
69 		word > 0xff ? byte_to_unary_table[word >> 8] + 16 :
70 		byte_to_unary_table[word] + 24;
71 }
72 
FLAC__clz_uint32(FLAC__uint32 v)73 static inline uint32_t FLAC__clz_uint32(FLAC__uint32 v)
74 {
75 /* Never used with input 0 */
76 	FLAC__ASSERT(v > 0);
77 #if defined(__INTEL_COMPILER)
78 	return _bit_scan_reverse(v) ^ 31U;
79 #elif defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
80 /* This will translate either to (bsr ^ 31U), clz , ctlz, cntlz, lzcnt depending on
81  * -march= setting or to a software routine in exotic machines. */
82 	return __builtin_clz(v);
83 #elif defined(_MSC_VER)
84 	{
85 		uint32_t idx;
86 		_BitScanReverse(&idx, v);
87 		return idx ^ 31U;
88 	}
89 #else
90 	return FLAC__clz_soft_uint32(v);
91 #endif
92 }
93 
94 /* Used when 64-bit bsr/clz is unavailable; can use 32-bit bsr/clz when possible */
FLAC__clz_soft_uint64(FLAC__uint64 word)95 static inline uint32_t FLAC__clz_soft_uint64(FLAC__uint64 word)
96 {
97 	return (FLAC__uint32)(word>>32) ? FLAC__clz_uint32((FLAC__uint32)(word>>32)) :
98 		FLAC__clz_uint32((FLAC__uint32)word) + 32;
99 }
100 
FLAC__clz_uint64(FLAC__uint64 v)101 static inline uint32_t FLAC__clz_uint64(FLAC__uint64 v)
102 {
103 	/* Never used with input 0 */
104 	FLAC__ASSERT(v > 0);
105 #if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
106 	return __builtin_clzll(v);
107 #elif (defined(__INTEL_COMPILER) || defined(_MSC_VER)) && (defined(_M_IA64) || defined(_M_X64))
108 	{
109 		uint32_t idx;
110 		_BitScanReverse64(&idx, v);
111 		return idx ^ 63U;
112 	}
113 #else
114 	return FLAC__clz_soft_uint64(v);
115 #endif
116 }
117 
118 /* These two functions work with input 0 */
FLAC__clz2_uint32(FLAC__uint32 v)119 static inline uint32_t FLAC__clz2_uint32(FLAC__uint32 v)
120 {
121 	if (!v)
122 		return 32;
123 	return FLAC__clz_uint32(v);
124 }
125 
FLAC__clz2_uint64(FLAC__uint64 v)126 static inline uint32_t FLAC__clz2_uint64(FLAC__uint64 v)
127 {
128 	if (!v)
129 		return 64;
130 	return FLAC__clz_uint64(v);
131 }
132 
133 /* An example of what FLAC__bitmath_ilog2() computes:
134  *
135  * ilog2( 0) = assertion failure
136  * ilog2( 1) = 0
137  * ilog2( 2) = 1
138  * ilog2( 3) = 1
139  * ilog2( 4) = 2
140  * ilog2( 5) = 2
141  * ilog2( 6) = 2
142  * ilog2( 7) = 2
143  * ilog2( 8) = 3
144  * ilog2( 9) = 3
145  * ilog2(10) = 3
146  * ilog2(11) = 3
147  * ilog2(12) = 3
148  * ilog2(13) = 3
149  * ilog2(14) = 3
150  * ilog2(15) = 3
151  * ilog2(16) = 4
152  * ilog2(17) = 4
153  * ilog2(18) = 4
154  */
155 
FLAC__bitmath_ilog2(FLAC__uint32 v)156 static inline uint32_t FLAC__bitmath_ilog2(FLAC__uint32 v)
157 {
158 	FLAC__ASSERT(v > 0);
159 #if defined(__INTEL_COMPILER)
160 	return _bit_scan_reverse(v);
161 #elif defined(_MSC_VER)
162 	{
163 		uint32_t idx;
164 		_BitScanReverse(&idx, v);
165 		return idx;
166 	}
167 #else
168 	return FLAC__clz_uint32(v) ^ 31U;
169 #endif
170 }
171 
FLAC__bitmath_ilog2_wide(FLAC__uint64 v)172 static inline uint32_t FLAC__bitmath_ilog2_wide(FLAC__uint64 v)
173 {
174 	FLAC__ASSERT(v > 0);
175 #if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
176 	return __builtin_clzll(v) ^ 63U;
177 /* Sorry, only supported in x64/Itanium.. and both have fast FPU which makes integer-only encoder pointless */
178 #elif (defined(__INTEL_COMPILER) || defined(_MSC_VER)) && (defined(_M_IA64) || defined(_M_X64))
179 	{
180 		uint32_t idx;
181 		_BitScanReverse64(&idx, v);
182 		return idx;
183 	}
184 #else
185 /*  Brain-damaged compilers will use the fastest possible way that is,
186 	de Bruijn sequences (http://supertech.csail.mit.edu/papers/debruijn.pdf)
187 	(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 CC0 (Public domain).
188 */
189 	{
190 		static const uint8_t DEBRUIJN_IDX64[64]={
191 			0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
192 			5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
193 			63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
194 			62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
195 		};
196 		v|= v>>1;
197 		v|= v>>2;
198 		v|= v>>4;
199 		v|= v>>8;
200 		v|= v>>16;
201 		v|= v>>32;
202 		v= (v>>1)+1;
203 		return DEBRUIJN_IDX64[v*FLAC__U64L(0x218A392CD3D5DBF)>>58&0x3F];
204 	}
205 #endif
206 }
207 
208 uint32_t FLAC__bitmath_silog2(FLAC__int64 v);
209 
210 #endif
211