• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Bit and bytes utilities.
2 
3    Bytes swap functions, reverse order of bytes:
4 
5    - _Py_bswap16(uint16_t)
6    - _Py_bswap32(uint32_t)
7    - _Py_bswap64(uint64_t)
8 */
9 
10 #ifndef Py_INTERNAL_BITUTILS_H
11 #define Py_INTERNAL_BITUTILS_H
12 #ifdef __cplusplus
13 extern "C" {
14 #endif
15 
16 #ifndef Py_BUILD_CORE
17 #  error "this header requires Py_BUILD_CORE define"
18 #endif
19 
20 #if defined(__GNUC__) \
21       && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
22    /* __builtin_bswap16() is available since GCC 4.8,
23       __builtin_bswap32() is available since GCC 4.3,
24       __builtin_bswap64() is available since GCC 4.3. */
25 #  define _PY_HAVE_BUILTIN_BSWAP
26 #endif
27 
28 #ifdef _MSC_VER
29    /* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
30 #  include <intrin.h>
31 #endif
32 
33 static inline uint16_t
_Py_bswap16(uint16_t word)34 _Py_bswap16(uint16_t word)
35 {
36 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
37     return __builtin_bswap16(word);
38 #elif defined(_MSC_VER)
39     Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
40     return _byteswap_ushort(word);
41 #else
42     // Portable implementation which doesn't rely on circular bit shift
43     return ( ((word & UINT16_C(0x00FF)) << 8)
44            | ((word & UINT16_C(0xFF00)) >> 8));
45 #endif
46 }
47 
48 static inline uint32_t
_Py_bswap32(uint32_t word)49 _Py_bswap32(uint32_t word)
50 {
51 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
52     return __builtin_bswap32(word);
53 #elif defined(_MSC_VER)
54     Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
55     return _byteswap_ulong(word);
56 #else
57     // Portable implementation which doesn't rely on circular bit shift
58     return ( ((word & UINT32_C(0x000000FF)) << 24)
59            | ((word & UINT32_C(0x0000FF00)) <<  8)
60            | ((word & UINT32_C(0x00FF0000)) >>  8)
61            | ((word & UINT32_C(0xFF000000)) >> 24));
62 #endif
63 }
64 
65 static inline uint64_t
_Py_bswap64(uint64_t word)66 _Py_bswap64(uint64_t word)
67 {
68 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
69     return __builtin_bswap64(word);
70 #elif defined(_MSC_VER)
71     return _byteswap_uint64(word);
72 #else
73     // Portable implementation which doesn't rely on circular bit shift
74     return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
75            | ((word & UINT64_C(0x000000000000FF00)) << 40)
76            | ((word & UINT64_C(0x0000000000FF0000)) << 24)
77            | ((word & UINT64_C(0x00000000FF000000)) <<  8)
78            | ((word & UINT64_C(0x000000FF00000000)) >>  8)
79            | ((word & UINT64_C(0x0000FF0000000000)) >> 24)
80            | ((word & UINT64_C(0x00FF000000000000)) >> 40)
81            | ((word & UINT64_C(0xFF00000000000000)) >> 56));
82 #endif
83 }
84 
85 
86 // Population count: count the number of 1's in 'x'
87 // (number of bits set to 1), also known as the hamming weight.
88 //
89 // Implementation note. CPUID is not used, to test if x86 POPCNT instruction
90 // can be used, to keep the implementation simple. For example, Visual Studio
91 // __popcnt() is not used this reason. The clang and GCC builtin function can
92 // use the x86 POPCNT instruction if the target architecture has SSE4a or
93 // newer.
94 static inline int
_Py_popcount32(uint32_t x)95 _Py_popcount32(uint32_t x)
96 {
97 #if (defined(__clang__) || defined(__GNUC__))
98 
99 #if SIZEOF_INT >= 4
100     Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
101     return __builtin_popcount(x);
102 #else
103     // The C standard guarantees that unsigned long will always be big enough
104     // to hold a uint32_t value without losing information.
105     Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
106     return __builtin_popcountl(x);
107 #endif
108 
109 #else
110     // 32-bit SWAR (SIMD Within A Register) popcount
111 
112     // Binary: 0 1 0 1 ...
113     const uint32_t M1 = 0x55555555;
114     // Binary: 00 11 00 11. ..
115     const uint32_t M2 = 0x33333333;
116     // Binary: 0000 1111 0000 1111 ...
117     const uint32_t M4 = 0x0F0F0F0F;
118     // 256**4 + 256**3 + 256**2 + 256**1
119     const uint32_t SUM = 0x01010101;
120 
121     // Put count of each 2 bits into those 2 bits
122     x = x - ((x >> 1) & M1);
123     // Put count of each 4 bits into those 4 bits
124     x = (x & M2) + ((x >> 2) & M2);
125     // Put count of each 8 bits into those 8 bits
126     x = (x + (x >> 4)) & M4;
127     // Sum of the 4 byte counts
128     return (uint32_t)((uint64_t)x * (uint64_t)SUM) >> 24;
129 #endif
130 }
131 
132 
133 // Return the index of the most significant 1 bit in 'x'. This is the smallest
134 // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
135 static inline int
_Py_bit_length(unsigned long x)136 _Py_bit_length(unsigned long x)
137 {
138 #if (defined(__clang__) || defined(__GNUC__))
139     if (x != 0) {
140         // __builtin_clzl() is available since GCC 3.4.
141         // Undefined behavior for x == 0.
142         return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
143     }
144     else {
145         return 0;
146     }
147 #elif defined(_MSC_VER)
148     // _BitScanReverse() is documented to search 32 bits.
149     Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
150     unsigned long msb;
151     if (_BitScanReverse(&msb, x)) {
152         return (int)msb + 1;
153     }
154     else {
155         return 0;
156     }
157 #else
158     const int BIT_LENGTH_TABLE[32] = {
159         0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
161     };
162     int msb = 0;
163     while (x >= 32) {
164         msb += 6;
165         x >>= 6;
166     }
167     msb += BIT_LENGTH_TABLE[x];
168     return msb;
169 #endif
170 }
171 
172 
173 #ifdef __cplusplus
174 }
175 #endif
176 #endif /* !Py_INTERNAL_BITUTILS_H */
177