• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 #include "pffft.h"
3 
4 #include <stdlib.h>
5 
6 /* SSE and co like 16-bytes aligned pointers
7  * with a 64-byte alignment, we are even aligned on L2 cache lines... */
8 #define MALLOC_V4SF_ALIGNMENT 64
9 
Valigned_malloc(size_t nb_bytes)10 static void * Valigned_malloc(size_t nb_bytes) {
11   void *p, *p0 = malloc(nb_bytes + MALLOC_V4SF_ALIGNMENT);
12   if (!p0) return (void *) 0;
13   p = (void *) (((size_t) p0 + MALLOC_V4SF_ALIGNMENT) & (~((size_t) (MALLOC_V4SF_ALIGNMENT-1))));
14   *((void **) p - 1) = p0;
15   return p;
16 }
17 
Valigned_free(void * p)18 static void Valigned_free(void *p) {
19   if (p) free(*((void **) p - 1));
20 }
21 
22 
next_power_of_two(int N)23 static int next_power_of_two(int N) {
24   /* https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
25   /* compute the next highest power of 2 of 32-bit v */
26   unsigned v = N;
27   v--;
28   v |= v >> 1;
29   v |= v >> 2;
30   v |= v >> 4;
31   v |= v >> 8;
32   v |= v >> 16;
33   v++;
34   return v;
35 }
36 
is_power_of_two(int N)37 static int is_power_of_two(int N) {
38   /* https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 */
39   int f = N && !(N & (N - 1));
40   return f;
41 }
42 
min_fft_size(pffft_transform_t transform)43 static int min_fft_size(pffft_transform_t transform) {
44   /* unfortunately, the fft size must be a multiple of 16 for complex FFTs
45      and 32 for real FFTs -- a lot of stuff would need to be rewritten to
46      handle other cases (or maybe just switch to a scalar fft, I don't know..) */
47   int simdSz = pffft_simd_size();
48   if (transform == PFFFT_REAL)
49     return ( 2 * simdSz * simdSz );
50   else if (transform == PFFFT_COMPLEX)
51     return ( simdSz * simdSz );
52   else
53     return 1;
54 }
55 
56 
pffft_aligned_malloc(size_t nb_bytes)57 void *pffft_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); }
pffft_aligned_free(void * p)58 void pffft_aligned_free(void *p) { Valigned_free(p); }
pffft_next_power_of_two(int N)59 int pffft_next_power_of_two(int N) { return next_power_of_two(N); }
pffft_is_power_of_two(int N)60 int pffft_is_power_of_two(int N) { return is_power_of_two(N); }
pffft_min_fft_size(pffft_transform_t transform)61 int pffft_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); }
62 
pffftd_aligned_malloc(size_t nb_bytes)63 void *pffftd_aligned_malloc(size_t nb_bytes) { return Valigned_malloc(nb_bytes); }
pffftd_aligned_free(void * p)64 void pffftd_aligned_free(void *p) { Valigned_free(p); }
pffftd_next_power_of_two(int N)65 int pffftd_next_power_of_two(int N) { return next_power_of_two(N); }
pffftd_is_power_of_two(int N)66 int pffftd_is_power_of_two(int N) { return is_power_of_two(N); }
pffftd_min_fft_size(pffft_transform_t transform)67 int pffftd_min_fft_size(pffft_transform_t transform) { return min_fft_size(transform); }
68 
69