1 2 #include "pffft.h" 3 4 #include <stdlib.h> 5 6 /* SSE and co like 16-bytes aligned pointers 7 * with a 64-byte alignment, we are even aligned on L2 cache lines... */ 8 #define MALLOC_V4SF_ALIGNMENT 64 9 Valigned_malloc(size_t nb_bytes)10static void * Valigned_malloc(size_t nb_bytes) { 11 void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT); 12 if (!p0) return (void *) 0; 13 p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1)))); 14 *((void **) p - 1) = p0; 15 return p; 16 } 17 Valigned_free(void * p)18static void Valigned_free(void *p) { 19 if (p) free(*((void **) p - 1)); 20 } 21 22 next_power_of_two(int N)23static int next_power_of_two(int N) { 24 /* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ 25 /* compute the next highest power of 2 of 32-bit v */ 26 unsigned v = N; 27 v--; 28 v |= v >> 1; 29 v |= v >> 2; 30 v |= v >> 4; 31 v |= v >> 8; 32 v |= v >> 16; 33 v++; 34 return v; 35 } 36 is_power_of_two(int N)37static int is_power_of_two(int N) { 38 /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */ 39 int f = N && !(N & (N - 1)); 40 return f; 41 } 42 min_fft_size(pffft_transform_t transform)43static int min_fft_size(pffft_transform_t transform) { 44 /* unfortunately, the fft size must be a multiple of 16 for complex FFTs 45 and 32 for real FFTs -- a lot of stuff would need to be rewritten to 46 handle other cases (or maybe just switch to a scalar fft, I don't know..) */ 47 int simdSz = pffft_simd_size(); 48 if (transform == PFFFT_REAL) 49 return ( 2 * simdSz * simdSz ); 50 else if (transform == PFFFT_COMPLEX) 51 return ( simdSz * simdSz ); 52 else 53 return 1; 54 } 55 56 pffft_aligned_malloc(size_t nb_bytes)57void *pffft_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); } pffft_aligned_free(void * p)58void pffft_aligned_free(void *p) { Valigned_free(p); } pffft_next_power_of_two(int N)59int pffft_next_power_of_two(int N) { return next_power_of_two(N); } pffft_is_power_of_two(int N)60int pffft_is_power_of_two(int N) { return is_power_of_two(N); } pffft_min_fft_size(pffft_transform_t transform)61int pffft_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); } 62 pffftd_aligned_malloc(size_t nb_bytes)63void *pffftd_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); } pffftd_aligned_free(void * p)64void pffftd_aligned_free(void *p) { Valigned_free(p); } pffftd_next_power_of_two(int N)65int pffftd_next_power_of_two(int N) { return next_power_of_two(N); } pffftd_is_power_of_two(int N)66int pffftd_is_power_of_two(int N) { return is_power_of_two(N); } pffftd_min_fft_size(pffft_transform_t transform)67int pffftd_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); } 68 69