1 /* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) 2 Copyright (c) 2020 Hayati Ayguen ( h_ayguen@web.de ) 3 Copyright (c) 2020 Dario Mambro ( dario.mambro@gmail.com ) 4 5 Based on original fortran 77 code from FFTPACKv4 from NETLIB 6 (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber 7 of NCAR, in 1985. 8 9 As confirmed by the NCAR fftpack software curators, the following 10 FFTPACKv5 license applies to FFTPACKv4 sources. My changes are 11 released under the same terms. 12 13 FFTPACK license: 14 15 http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html 16 17 Copyright (c) 2004 the University Corporation for Atmospheric 18 Research ("UCAR"). All rights reserved. Developed by NCAR's 19 Computational and Information Systems Laboratory, UCAR, 20 www.cisl.ucar.edu. 21 22 Redistribution and use of the Software in source and binary forms, 23 with or without modification, is permitted provided that the 24 following conditions are met: 25 26 - Neither the names of NCAR's Computational and Information Systems 27 Laboratory, the University Corporation for Atmospheric Research, 28 nor the names of its sponsors or contributors may be used to 29 endorse or promote products derived from this Software without 30 specific prior written permission. 31 32 - Redistributions of source code must retain the above copyright 33 notices, this list of conditions, and the disclaimer below. 34 35 - Redistributions in binary form must reproduce the above copyright 36 notice, this list of conditions, and the disclaimer below in the 37 documentation and/or other materials provided with the 38 distribution. 39 40 THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 41 EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF 42 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 43 NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT 44 HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, 45 EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN 46 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 47 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 48 SOFTWARE. 49 50 51 PFFFT : a Pretty Fast FFT. 52 53 This file is largerly based on the original FFTPACK implementation, modified in 54 order to take advantage of SIMD instructions of modern CPUs. 55 */ 56 57 /* 58 NOTE: This file is adapted from Julien Pommier's original PFFFT, 59 which works on 32 bit floating point precision using SSE instructions, 60 to work with 64 bit floating point precision using AVX instructions. 61 Author: Dario Mambro @ https://github.com/unevens/pffft 62 */ 63 64 #include "pffft_double.h" 65 66 /* detect compiler flavour */ 67 #if defined(_MSC_VER) 68 # define COMPILER_MSVC 69 #elif defined(__GNUC__) 70 # define COMPILER_GCC 71 #endif 72 73 #ifdef COMPILER_MSVC 74 # define _USE_MATH_DEFINES 75 # include <malloc.h> 76 #elif defined(__MINGW32__) || defined(__MINGW64__) 77 # include <malloc.h> 78 #else 79 # include <alloca.h> 80 #endif 81 82 #include <stdlib.h> 83 #include <stdint.h> 84 #include <stdio.h> 85 #include <math.h> 86 #include <assert.h> 87 88 #if defined(COMPILER_GCC) 89 # define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline)) 90 # define NEVER_INLINE(return_type) return_type __attribute__ ((noinline)) 91 # define RESTRICT __restrict 92 # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__]; 93 #elif defined(COMPILER_MSVC) 94 # define ALWAYS_INLINE(return_type) __forceinline return_type 95 # define NEVER_INLINE(return_type) __declspec(noinline) return_type 96 # define RESTRICT __restrict 97 # define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__)) 98 #endif 99 100 101 #ifdef COMPILER_MSVC 102 #pragma warning( disable : 4244 4305 4204 4456 ) 103 #endif 104 105 /* 106 vector support macros: the rest of the code is independant of 107 AVX -- adding support for other platforms with 4-element 108 vectors should be limited to these macros 109 */ 110 #include "simd/pf_double.h" 111 112 /* have code comparable with this definition */ 113 #define float double 114 #define SETUP_STRUCT PFFFTD_Setup 115 #define FUNC_NEW_SETUP pffftd_new_setup 116 #define FUNC_DESTROY pffftd_destroy_setup 117 #define FUNC_TRANSFORM_UNORDRD pffftd_transform 118 #define FUNC_TRANSFORM_ORDERED pffftd_transform_ordered 119 #define FUNC_ZREORDER pffftd_zreorder 120 #define FUNC_ZCONVOLVE_ACCUMULATE pffftd_zconvolve_accumulate 121 #define FUNC_ZCONVOLVE_NO_ACCU pffftd_zconvolve_no_accu 122 123 #define FUNC_ALIGNED_MALLOC pffftd_aligned_malloc 124 #define FUNC_ALIGNED_FREE pffftd_aligned_free 125 #define FUNC_SIMD_SIZE pffftd_simd_size 126 #define FUNC_MIN_FFT_SIZE pffftd_min_fft_size 127 #define FUNC_IS_VALID_SIZE pffftd_is_valid_size 128 #define FUNC_NEAREST_SIZE pffftd_nearest_transform_size 129 #define FUNC_SIMD_ARCH pffftd_simd_arch 130 #define FUNC_VALIDATE_SIMD_A validate_pffftd_simd 131 #define FUNC_VALIDATE_SIMD_EX validate_pffftd_simd_ex 132 133 #define FUNC_CPLX_FINALIZE pffftd_cplx_finalize 134 #define FUNC_CPLX_PREPROCESS pffftd_cplx_preprocess 135 #define FUNC_REAL_PREPROCESS_4X4 pffftd_real_preprocess_4x4 136 #define FUNC_REAL_PREPROCESS pffftd_real_preprocess 137 #define FUNC_REAL_FINALIZE_4X4 pffftd_real_finalize_4x4 138 #define FUNC_REAL_FINALIZE pffftd_real_finalize 139 #define FUNC_TRANSFORM_INTERNAL pffftd_transform_internal 140 141 #define FUNC_COS cos 142 #define FUNC_SIN sin 143 144 145 #include "pffft_priv_impl.h" 146 147 148