• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 #include "pf_conv.h"
3 
4 #include <string.h>
5 #include <assert.h>
6 
7 #include <algorithm>
8 
9 #if 0
10 #include <stdio.h>
11 
12 #define DPRINT(...) fprintf(stderr, __VA_ARGS__)
13 
14 #else
15 #define DPRINT(...) do { } while (0)
16 #endif
17 
18 
19 #ifdef HAVE_MIPP
20 #include <mipp.h>
21 #endif
22 
23 
24 #ifndef CONV_ARCH_POST
25 #error CONV_ARCH_POST not defined
26 #endif
27 
28 #define PP_STRINGIFY(X) #X
29 #define PP_TOSTRING(X)  PP_STRINGIFY(X)
30 #define PP_CONCAT_IMPL(x, y) x##y
31 #define PP_CONCAT(x, y) PP_CONCAT_IMPL( x, y )
32 
33 #define ARCHFUNCNAME(X) PP_CONCAT(X##_,CONV_ARCH_POST)
34 
35 
ARCHFUNCNAME(id)36 const char * ARCHFUNCNAME(id)()
37 {
38     return PP_TOSTRING(CONV_ARCH_POST);
39 }
40 
41 
ARCHFUNCNAME(conv_float_simd_size)42 int ARCHFUNCNAME(conv_float_simd_size)()
43 {
44 #if defined(MIPP_NO_INTRINSICS) || !defined(HAVE_MIPP)
45     // have a completely MIPP independent implementation
46     return 1;
47 #else
48     return mipp::N<float>();
49 #endif
50 }
51 
52 
ARCHFUNCNAME(conv_float_move_rest)53 void ARCHFUNCNAME(conv_float_move_rest)(float * RESTRICT s, conv_buffer_state * RESTRICT state)
54 {
55     int R = state->size - state->offset;    // this many samples from prev conv_float were not processed
56     if (R > 0)
57     {
58         // memmove(s, &s[state->offset], R * sizeof(s[0]));   // move them to the begin
59         std::copy(&s[state->offset], &s[state->size], s);
60     }
61     else
62         R = 0;
63     state->offset = 0;      // data - to be processed - is at begin
64     state->size = R;        // this many unprocessed samples
65 }
66 
67 
ARCHFUNCNAME(conv_cplx_move_rest)68 void ARCHFUNCNAME(conv_cplx_move_rest)(complexf * RESTRICT s, conv_buffer_state * RESTRICT state)
69 {
70     int R = state->size - state->offset;    // this many samples from prev conv_float were not processed
71     if (R > 0)
72     {
73         // memmove(s, &s[state->offset], R * sizeof(s[0]));   // move them to the begin
74         std::copy(&s[state->offset], &s[state->size], s);
75     }
76     else
77         R = 0;
78     state->offset = 0;      // data - to be processed - is at begin
79     state->size = R;        // this many unprocessed samples
80 }
81 
82 
83 #if defined(MIPP_NO_INTRINSICS)
84 // have a completely MIPP independent implementation
85 // #error missing HAVE_MIPP: there is no MIPP-independent implementation
86 
ARCHFUNCNAME(conv_float_inplace)87 int ARCHFUNCNAME(conv_float_inplace)(
88         float * RESTRICT s, conv_buffer_state * RESTRICT state,
89         const float * RESTRICT filter, const int sz_filter
90         )
91 {
92     const int off0 = state->offset;
93     const int sz_s = state->size;
94     int offset;
95 
96     for ( offset = off0; offset + sz_filter <= sz_s; ++offset)
97     {
98         float accu = 0.0F;
99         for (int k = 0; k < sz_filter; ++k)
100             accu += s[offset+k] * filter[k];
101         s[offset] = accu;
102     }
103 
104     state->offset = offset;
105     return offset - off0;
106 }
107 
108 
ARCHFUNCNAME(conv_float_oop)109 int ARCHFUNCNAME(conv_float_oop)(
110         const float * RESTRICT s, conv_buffer_state * RESTRICT state,
111         const float * RESTRICT filter, const int sz_filter,
112         float * RESTRICT y
113         )
114 {
115     const int off0 = state->offset;
116     const int sz_s = state->size;
117     int offset;
118 
119     for ( offset = off0; offset + sz_filter <= sz_s; ++offset)
120     {
121         float accu = 0.0F;
122         for (int k = 0; k < sz_filter; ++k)
123             accu += s[offset+k] * filter[k];
124         y[offset] = accu;
125     }
126 
127     state->offset = offset;
128     return offset - off0;
129 }
130 
131 
ARCHFUNCNAME(conv_cplx_float_oop)132 int ARCHFUNCNAME(conv_cplx_float_oop)(
133         const complexf * RESTRICT s_cplx, conv_buffer_state * RESTRICT state,
134         const float * RESTRICT filter, const int sz_filter,
135         complexf * RESTRICT y_cplx
136         )
137 {
138     const int off0 = state->offset;
139     const int sz_s = state->size;
140     const int sz_f = sz_filter;
141     int offset;
142 
143     for ( offset = off0; offset + sz_f <= sz_s; ++offset)
144     {
145         float accu_re = 0.0F;
146         float accu_im = 0.0F;
147         for (int k = 0; k < sz_filter; ++k)
148         {
149             accu_re = s_cplx[offset+k].i * filter[k];   // accu += rS * rH;
150             accu_im = s_cplx[offset+k].q * filter[k];   // accu += rS * rH;
151         }
152         y_cplx[offset].i = accu_re;  // == hadd() == sum of real parts
153         y_cplx[offset].q = accu_im;  // == hadd() == sum of imag parts
154     }
155 
156     state->offset = offset;
157     return offset - off0;
158 }
159 
160 
161 #elif defined(HAVE_MIPP)
162 
163 
ARCHFUNCNAME(conv_float_inplace)164 int ARCHFUNCNAME(conv_float_inplace)(
165         float * RESTRICT s, conv_buffer_state * RESTRICT state,
166         const float * RESTRICT filter, const int sz_filter
167         )
168 {
169     assert( (sz_filter % mipp::N<float>()) == 0 );  // size of filter must be divisible by conv_float_simd_size()
170 
171     mipp::Reg<float> accu, rS, rH;
172     const int off0 = state->offset;
173     const int sz_s = state->size;
174     int offset;
175 
176     for ( offset = off0; offset + sz_filter <= sz_s; ++offset)
177     {
178         accu.set0();
179         for (int k = 0; k < sz_filter; k += mipp::N<float>())
180         {
181             rS.load(&s[offset+k]);
182             rH.load(&filter[k]);
183             accu = mipp::fmadd(rS, rH, accu);   // accu += rS * rH;
184         }
185         s[offset] = accu.sum();    // == hadd()
186     }
187 
188     state->offset = offset;
189     return offset - off0;
190 }
191 
192 
ARCHFUNCNAME(conv_float_oop)193 int ARCHFUNCNAME(conv_float_oop)(
194         const float * RESTRICT s, conv_buffer_state * RESTRICT state,
195         const float * RESTRICT filter, const int sz_filter,
196         float * RESTRICT y
197         )
198 {
199     assert( (sz_filter % mipp::N<float>()) == 0 );  // size of filter must be divisible by conv_float_simd_size()
200 
201     mipp::Reg<float> accu, rS, rH;
202     const int off0 = state->offset;
203     const int sz_s = state->size;
204     int offset;
205 
206     for ( offset = off0; offset + sz_filter <= sz_s; ++offset)
207     {
208         accu.set0();
209         for (int k = 0; k < sz_filter; k += mipp::N<float>())
210         {
211             rS.loadu(&s[offset+k]);
212             rH.load(&filter[k]);
213             accu = mipp::fmadd(rS, rH, accu);   // accu += rS * rH;
214         }
215         y[offset] = accu.sum();    // == hadd()
216     }
217 
218     state->offset = offset;
219     return offset - off0;
220 }
221 
222 
ARCHFUNCNAME(conv_cplx_float_oop)223 int ARCHFUNCNAME(conv_cplx_float_oop)(
224         const complexf * RESTRICT s_cplx, conv_buffer_state * RESTRICT state,
225         const float * RESTRICT filter, const int sz_filter,
226         complexf * RESTRICT y_cplx
227         )
228 {
229     assert( (sz_filter % mipp::N<float>()) == 0 );  // size of filter must be divisible by conv_float_simd_size()
230     const float * RESTRICT s = &(s_cplx[0].i);
231     float * RESTRICT y = &(y_cplx[0].i);
232 
233     mipp::Regx2<float> accu_x2, rS_x2, H_x2;
234     const int off0 = 2 * state->offset;
235     const int sz_s = 2 * state->size;
236     const int sz_f2 = 2 * sz_filter;
237     int offset;
238 
239     for ( offset = off0; offset + sz_f2 <= sz_s; offset += 2)
240     {
241         accu_x2.val[0].set0();
242         accu_x2.val[1].set0();
243         for (int k = 0; k < sz_filter; k += mipp::N<float>())
244         {
245             mipp::Reg<float> rH;
246             rS_x2.loadu(&s[offset+2*k]);
247             rH.load(&filter[k]);
248             H_x2 = mipp::interleave<float>(rH, rH);
249             accu_x2.val[0] = mipp::fmadd(rS_x2.val[0], H_x2.val[0], accu_x2.val[0]);   // accu += rS * rH;
250             accu_x2.val[1] = mipp::fmadd(rS_x2.val[1], H_x2.val[1], accu_x2.val[1]);   // accu += rS * rH;
251         }
252         H_x2 = mipp::deinterleave(accu_x2);
253         y[offset]   = H_x2.val[0].sum();  // == hadd() == sum of real parts
254         y[offset+1] = H_x2.val[1].sum();  // == hadd() == sum of imag parts
255     }
256 
257     state->offset = offset /2;
258     return (offset - off0) / 2;
259 }
260 
261 #endif
262 
263 
264 static const conv_f_ptrs conv_ptrs =
265 {
266     PP_TOSTRING(CONV_ARCH_POST),
267 #ifndef MIPP_NO_INTRINSICS
268     1,
269 #else
270     0,
271 #endif
272 
273     ARCHFUNCNAME(id),
274     ARCHFUNCNAME(conv_float_simd_size),
275 
276 #if defined(MIPP_NO_INTRINSICS) || defined(HAVE_MIPP)
277     ARCHFUNCNAME(conv_float_move_rest),
278     ARCHFUNCNAME(conv_float_inplace),
279     ARCHFUNCNAME(conv_float_oop),
280 
281     ARCHFUNCNAME(conv_cplx_move_rest),
282     ARCHFUNCNAME(conv_cplx_float_oop)
283 #else
284     nullptr,
285     nullptr,
286     nullptr,
287 
288     nullptr,
289     nullptr
290 #endif
291 };
292 
293 
ARCHFUNCNAME(conv_ptrs)294 const conv_f_ptrs* ARCHFUNCNAME(conv_ptrs)()
295 {
296     DPRINT("arch pointer for '%s':\n", conv_ptrs.id);
297     if (!strcmp(conv_ptrs.id, "none"))
298         return &conv_ptrs;
299 
300 #if defined(MIPP_NO_INTRINSICS)
301     DPRINT("arch pointer for '%s' - BUT defined(MIPP_NO_INTRINSICS)\n", conv_ptrs.id);
302     return &conv_ptrs;
303 #elif defined(HAVE_MIPP)
304     DPRINT("arch pointer for '%s' - defined(HAVE_MIPP)\n", conv_ptrs.id);
305     DPRINT("'%s': conv_ptrs.using_mipp %d\n", conv_ptrs.id, conv_ptrs.using_mipp);
306     DPRINT("'%s': simd_size() %d\n", conv_ptrs.id, conv_ptrs.fp_conv_float_simd_size());
307     if (conv_ptrs.using_mipp && conv_ptrs.fp_conv_float_simd_size() > 1)
308         return &conv_ptrs;
309     else
310         DPRINT("arch pointer for '%s': HAVE_MIPP BUT using_mipp %d, float_simd_size %d\n", conv_ptrs.id, conv_ptrs.using_mipp, conv_ptrs.fp_conv_float_simd_size());
311 #else
312     DPRINT("arch pointer for '%s': neither MIPP_NO_INTRINSICS nor HAVE_MIPP\n", conv_ptrs.id);
313 #endif
314     DPRINT("arch pointer for '%s' => nullptr\n", conv_ptrs.id);
315     return nullptr;
316 }
317 
318 #if defined(__cplusplus) && (__cplusplus >= 201703L)
319 [[maybe_unused]]
320 #endif
321 static f_conv_ptrs test_f_ptrs = ARCHFUNCNAME(conv_ptrs);
322 
323