1 2 /* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) 3 Copyright (c) 2020 Hayati Ayguen ( h_ayguen@web.de ) 4 5 Redistribution and use of the Software in source and binary forms, 6 with or without modification, is permitted provided that the 7 following conditions are met: 8 9 - Neither the names of NCAR's Computational and Information Systems 10 Laboratory, the University Corporation for Atmospheric Research, 11 nor the names of its sponsors or contributors may be used to 12 endorse or promote products derived from this Software without 13 specific prior written permission. 14 15 - Redistributions of source code must retain the above copyright 16 notices, this list of conditions, and the disclaimer below. 17 18 - Redistributions in binary form must reproduce the above copyright 19 notice, this list of conditions, and the disclaimer below in the 20 documentation and/or other materials provided with the 21 distribution. 22 23 THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF 25 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT 27 HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, 28 EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 31 SOFTWARE. 32 */ 33 34 #ifndef PF_SCAL_FLT_H 35 #define PF_SCAL_FLT_H 36 37 /* 38 fallback mode(s) for situations where SSE/AVX/NEON/Altivec are not available, use scalar mode instead 39 */ 40 41 #if !defined(SIMD_SZ) && defined(PFFFT_SCALVEC_ENABLED) 42 #pragma message( __FILE__ ": float SCALAR4 macros are defined" ) 43 44 typedef struct { 45 vsfscalar a; 46 vsfscalar b; 47 vsfscalar c; 48 vsfscalar d; 49 } v4sf; 50 51 # define SIMD_SZ 4 52 53 typedef union v4sf_union { 54 v4sf v; 55 vsfscalar f[SIMD_SZ]; 56 } v4sf_union; 57 58 # define VARCH "4xScalar" 59 # define VREQUIRES_ALIGN 0 60 VZERO()61 static ALWAYS_INLINE(v4sf) VZERO() { 62 v4sf r = { 0.f, 0.f, 0.f, 0.f }; 63 return r; 64 } 65 VMUL(v4sf A,v4sf B)66 static ALWAYS_INLINE(v4sf) VMUL(v4sf A, v4sf B) { 67 v4sf r = { A.a * B.a, A.b * B.b, A.c * B.c, A.d * B.d }; 68 return r; 69 } 70 VADD(v4sf A,v4sf B)71 static ALWAYS_INLINE(v4sf) VADD(v4sf A, v4sf B) { 72 v4sf r = { A.a + B.a, A.b + B.b, A.c + B.c, A.d + B.d }; 73 return r; 74 } 75 VMADD(v4sf A,v4sf B,v4sf C)76 static ALWAYS_INLINE(v4sf) VMADD(v4sf A, v4sf B, v4sf C) { 77 v4sf r = { A.a * B.a + C.a, A.b * B.b + C.b, A.c * B.c + C.c, A.d * B.d + C.d }; 78 return r; 79 } 80 VSUB(v4sf A,v4sf B)81 static ALWAYS_INLINE(v4sf) VSUB(v4sf A, v4sf B) { 82 v4sf r = { A.a - B.a, A.b - B.b, A.c - B.c, A.d - B.d }; 83 return r; 84 } 85 LD_PS1(vsfscalar v)86 static ALWAYS_INLINE(v4sf) LD_PS1(vsfscalar v) { 87 v4sf r = { v, v, v, v }; 88 return r; 89 } 90 91 # define VLOAD_UNALIGNED(ptr) (*((v4sf*)(ptr))) 92 93 # define VLOAD_ALIGNED(ptr) (*((v4sf*)(ptr))) 94 95 # define VALIGNED(ptr) ((((uintptr_t)(ptr)) & (sizeof(v4sf)-1) ) == 0) 96 97 98 /* INTERLEAVE2() */ 99 #define INTERLEAVE2( A, B, C, D) \ 100 do { \ 101 v4sf Cr = { A.a, B.a, A.b, B.b }; \ 102 v4sf Dr = { A.c, B.c, A.d, B.d }; \ 103 C = Cr; \ 104 D = Dr; \ 105 } while (0) 106 107 108 /* UNINTERLEAVE2() */ 109 #define UNINTERLEAVE2(A, B, C, D) \ 110 do { \ 111 v4sf Cr = { A.a, A.c, B.a, B.c }; \ 112 v4sf Dr = { A.b, A.d, B.b, B.d }; \ 113 C = Cr; \ 114 D = Dr; \ 115 } while (0) 116 117 118 /* VTRANSPOSE4() */ 119 #define VTRANSPOSE4(A, B, C, D) \ 120 do { \ 121 v4sf Ar = { A.a, B.a, C.a, D.a }; \ 122 v4sf Br = { A.b, B.b, C.b, D.b }; \ 123 v4sf Cr = { A.c, B.c, C.c, D.c }; \ 124 v4sf Dr = { A.d, B.d, C.d, D.d }; \ 125 A = Ar; \ 126 B = Br; \ 127 C = Cr; \ 128 D = Dr; \ 129 } while (0) 130 131 132 /* VSWAPHL() */ VSWAPHL(v4sf A,v4sf B)133 static ALWAYS_INLINE(v4sf) VSWAPHL(v4sf A, v4sf B) { 134 v4sf r = { B.a, B.b, A.c, A.d }; 135 return r; 136 } 137 138 139 /* reverse/flip all floats */ VREV_S(v4sf A)140 static ALWAYS_INLINE(v4sf) VREV_S(v4sf A) { 141 v4sf r = { A.d, A.c, A.b, A.a }; 142 return r; 143 } 144 145 /* reverse/flip complex floats */ VREV_C(v4sf A)146 static ALWAYS_INLINE(v4sf) VREV_C(v4sf A) { 147 v4sf r = { A.c, A.d, A.a, A.b }; 148 return r; 149 } 150 151 #else 152 /* #pragma message( __FILE__ ": float SCALAR4 macros are not defined" ) */ 153 #endif 154 155 156 #if !defined(SIMD_SZ) 157 #pragma message( __FILE__ ": float SCALAR1 macros are defined" ) 158 typedef vsfscalar v4sf; 159 160 # define SIMD_SZ 1 161 162 typedef union v4sf_union { 163 v4sf v; 164 vsfscalar f[SIMD_SZ]; 165 } v4sf_union; 166 167 # define VARCH "Scalar" 168 # define VREQUIRES_ALIGN 0 169 # define VZERO() 0.f 170 # define VMUL(a,b) ((a)*(b)) 171 # define VADD(a,b) ((a)+(b)) 172 # define VMADD(a,b,c) ((a)*(b)+(c)) 173 # define VSUB(a,b) ((a)-(b)) 174 # define LD_PS1(p) (p) 175 # define VLOAD_UNALIGNED(ptr) (*(ptr)) 176 # define VLOAD_ALIGNED(ptr) (*(ptr)) 177 # define VALIGNED(ptr) ((((uintptr_t)(ptr)) & (sizeof(vsfscalar)-1) ) == 0) 178 179 #else 180 /* #pragma message( __FILE__ ": float SCALAR1 macros are not defined" ) */ 181 #endif 182 183 184 #endif /* PF_SCAL_FLT_H */ 185 186