• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Copyright (c) 2013  Julien Pommier ( pommier@modartt.com )
3    Copyright (c) 2020  Hayati Ayguen ( h_ayguen@web.de )
4 
5    Redistribution and use of the Software in source and binary forms,
6    with or without modification, is permitted provided that the
7    following conditions are met:
8 
9    - Neither the names of NCAR's Computational and Information Systems
10    Laboratory, the University Corporation for Atmospheric Research,
11    nor the names of its sponsors or contributors may be used to
12    endorse or promote products derived from this Software without
13    specific prior written permission.
14 
15    - Redistributions of source code must retain the above copyright
16    notices, this list of conditions, and the disclaimer below.
17 
18    - Redistributions in binary form must reproduce the above copyright
19    notice, this list of conditions, and the disclaimer below in the
20    documentation and/or other materials provided with the
21    distribution.
22 
23    THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24    EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
25    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
27    HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
28    EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
31    SOFTWARE.
32 */
33 
34 #ifndef PF_SCAL_FLT_H
35 #define PF_SCAL_FLT_H
36 
37 /*
38   fallback mode(s) for situations where SSE/AVX/NEON/Altivec are not available, use scalar mode instead
39 */
40 
41 #if !defined(SIMD_SZ) && defined(PFFFT_SCALVEC_ENABLED)
42 #pragma message( __FILE__ ": float SCALAR4 macros are defined" )
43 
44 typedef struct {
45   vsfscalar a;
46   vsfscalar b;
47   vsfscalar c;
48   vsfscalar d;
49 } v4sf;
50 
51 #  define SIMD_SZ 4
52 
53 typedef union v4sf_union {
54   v4sf  v;
55   vsfscalar f[SIMD_SZ];
56 } v4sf_union;
57 
58 #  define VARCH "4xScalar"
59 #  define VREQUIRES_ALIGN 0
60 
VZERO()61   static ALWAYS_INLINE(v4sf) VZERO() {
62     v4sf r = { 0.f, 0.f, 0.f, 0.f };
63     return r;
64   }
65 
VMUL(v4sf A,v4sf B)66   static ALWAYS_INLINE(v4sf) VMUL(v4sf A, v4sf B) {
67     v4sf r = { A.a * B.a, A.b * B.b, A.c * B.c, A.d * B.d };
68     return r;
69   }
70 
VADD(v4sf A,v4sf B)71   static ALWAYS_INLINE(v4sf) VADD(v4sf A, v4sf B) {
72     v4sf r = { A.a + B.a, A.b + B.b, A.c + B.c, A.d + B.d };
73     return r;
74   }
75 
VMADD(v4sf A,v4sf B,v4sf C)76   static ALWAYS_INLINE(v4sf) VMADD(v4sf A, v4sf B, v4sf C) {
77     v4sf r = { A.a * B.a + C.a, A.b * B.b + C.b, A.c * B.c + C.c, A.d * B.d + C.d };
78     return r;
79   }
80 
VSUB(v4sf A,v4sf B)81   static ALWAYS_INLINE(v4sf) VSUB(v4sf A, v4sf B) {
82     v4sf r = { A.a - B.a, A.b - B.b, A.c - B.c, A.d - B.d };
83     return r;
84   }
85 
LD_PS1(vsfscalar v)86   static ALWAYS_INLINE(v4sf) LD_PS1(vsfscalar v) {
87     v4sf r = { v, v, v, v };
88     return r;
89   }
90 
91 #  define VLOAD_UNALIGNED(ptr)  (*((v4sf*)(ptr)))
92 
93 #  define VLOAD_ALIGNED(ptr)    (*((v4sf*)(ptr)))
94 
95 #  define VALIGNED(ptr) ((((uintptr_t)(ptr)) & (sizeof(v4sf)-1) ) == 0)
96 
97 
98   /* INTERLEAVE2() */
99   #define INTERLEAVE2( A, B, C, D) \
100   do { \
101     v4sf Cr = { A.a, B.a, A.b, B.b }; \
102     v4sf Dr = { A.c, B.c, A.d, B.d }; \
103     C = Cr; \
104     D = Dr; \
105   } while (0)
106 
107 
108   /* UNINTERLEAVE2() */
109   #define UNINTERLEAVE2(A, B, C, D) \
110   do { \
111     v4sf Cr = { A.a, A.c, B.a, B.c }; \
112     v4sf Dr = { A.b, A.d, B.b, B.d }; \
113     C = Cr; \
114     D = Dr; \
115   } while (0)
116 
117 
118   /* VTRANSPOSE4() */
119   #define VTRANSPOSE4(A, B, C, D) \
120   do { \
121     v4sf Ar = { A.a, B.a, C.a, D.a }; \
122     v4sf Br = { A.b, B.b, C.b, D.b }; \
123     v4sf Cr = { A.c, B.c, C.c, D.c }; \
124     v4sf Dr = { A.d, B.d, C.d, D.d }; \
125     A = Ar; \
126     B = Br; \
127     C = Cr; \
128     D = Dr; \
129   } while (0)
130 
131 
132   /* VSWAPHL() */
VSWAPHL(v4sf A,v4sf B)133   static ALWAYS_INLINE(v4sf) VSWAPHL(v4sf A, v4sf B) {
134     v4sf r = { B.a, B.b, A.c, A.d };
135     return r;
136   }
137 
138 
139   /* reverse/flip all floats */
VREV_S(v4sf A)140   static ALWAYS_INLINE(v4sf) VREV_S(v4sf A) {
141     v4sf r = { A.d, A.c, A.b, A.a };
142     return r;
143   }
144 
145   /* reverse/flip complex floats */
VREV_C(v4sf A)146   static ALWAYS_INLINE(v4sf) VREV_C(v4sf A) {
147     v4sf r = { A.c, A.d, A.a, A.b };
148     return r;
149   }
150 
151 #else
152 /* #pragma message( __FILE__ ": float SCALAR4 macros are not defined" ) */
153 #endif
154 
155 
156 #if !defined(SIMD_SZ)
157 #pragma message( __FILE__ ": float SCALAR1 macros are defined" )
158 typedef vsfscalar v4sf;
159 
160 #  define SIMD_SZ 1
161 
162 typedef union v4sf_union {
163   v4sf  v;
164   vsfscalar f[SIMD_SZ];
165 } v4sf_union;
166 
167 #  define VARCH "Scalar"
168 #  define VREQUIRES_ALIGN 0
169 #  define VZERO() 0.f
170 #  define VMUL(a,b) ((a)*(b))
171 #  define VADD(a,b) ((a)+(b))
172 #  define VMADD(a,b,c) ((a)*(b)+(c))
173 #  define VSUB(a,b) ((a)-(b))
174 #  define LD_PS1(p) (p)
175 #  define VLOAD_UNALIGNED(ptr)  (*(ptr))
176 #  define VLOAD_ALIGNED(ptr)    (*(ptr))
177 #  define VALIGNED(ptr) ((((uintptr_t)(ptr)) & (sizeof(vsfscalar)-1) ) == 0)
178 
179 #else
180 /* #pragma message( __FILE__ ": float SCALAR1 macros are not defined" ) */
181 #endif
182 
183 
184 #endif /* PF_SCAL_FLT_H */
185 
186