• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #pragma once
2 
3 /* pf_conv.h/.cpp implements linear "slow" convolution.
4  * this code is primarily for test/demonstration of runtime dispatching.
5  * each "kernel" is compiled with different compiler/architecture options,
6  * that activates different implementations in the MIPP headers.
7  *
8  * the dispatcher library 'pf_conv_dispatcher' collects (links agains)
9  * all the pf_conv_arch_<opt> libraries ..
10  * and provides the  get_all_conv_arch_ptrs() function,
11  * which delivers an array of pointers to the struct (conv_f_ptrs)
12  * containing the function pointers for the different implementations.
13  *
14  * requirement(s):
15  * - installed MIPP headers
16  * - compiler definitions for the different architecture types:
17  *   see CMakeLists.txt CONV_ARCH_MSVC_AMD64, CONV_ARCH_GCC_ARM32NEON, ..
18  * - one cmake library target pf_conv_arch_<opt> for each architecture option.
19  *   each one gets it's specific  architecture/compiler  options
20  *    utilizing the target_set_cxx_arch_option() macro in the CMakeLists.txt
21  */
22 
23 #include "pf_cplx.h"
24 
25 #if defined(_MSC_VER)
26 #  define RESTRICT __restrict
27 #elif defined(__GNUC__)
28 #  define RESTRICT __restrict
29 #else
30 #  define RESTRICT
31 #endif
32 
33 
34 struct conv_buffer_state
35 {
36     int offset; // sample index where data (to process) starts
37     int size;   // actual - or previous - size in amount of samples from buffer start (NOT offset)
38 };
39 
40 // declare provided function pointer types
41 
42 typedef const char * (*f_conv_id)();
43 
44 typedef int  (*f_conv_float_simd_size)();
45 
46 typedef void (*f_conv_float_move_rest)(float * RESTRICT s, conv_buffer_state * RESTRICT state);
47 typedef void (*f_conv_cplx_move_rest)(complexf * RESTRICT s, conv_buffer_state * RESTRICT state);
48 
49 typedef int  (*f_conv_float_inplace)(
50         float * RESTRICT s, conv_buffer_state * RESTRICT state,
51         const float * RESTRICT filter, const int sz_filter
52         );
53 
54 typedef int  (*f_conv_float_oop)(
55         const float * RESTRICT s, conv_buffer_state * RESTRICT state,
56         const float * RESTRICT filter, const int sz_filter,
57         float * RESTRICT y
58         );
59 
60 typedef int  (*f_conv_cplx_float_oop)(
61         const complexf * RESTRICT s, conv_buffer_state * RESTRICT state,
62         const float * RESTRICT filter, const int sz_filter,
63         complexf * RESTRICT y
64         );
65 
66 
67 // struct with the provided function pointers
68 struct conv_f_ptrs
69 {
70     const char * id;
71     const int using_mipp;
72     f_conv_id               fp_id;
73     f_conv_float_simd_size  fp_conv_float_simd_size;
74 
75     f_conv_float_move_rest  fp_conv_float_move_rest;
76     f_conv_float_inplace    fp_conv_float_inplace;
77     f_conv_float_oop        fp_conv_float_oop;
78 
79     f_conv_cplx_move_rest   fp_conv_cplx_move_rest;
80     f_conv_cplx_float_oop   fp_conv_cplx_float_oop;
81 };
82 
83 typedef const conv_f_ptrs * ptr_to_conv_f_ptrs;
84 
85 // function pointer type, delivering the struct with the function pointers
86 typedef const conv_f_ptrs* (*f_conv_ptrs)();
87 
88 
89 // helper for systematic function names
90 #define CONV_FN_ARCH(FN, ARCH) FN##_##ARCH
91 
92 // declare all functions - returning the structs with the function pointers
93 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, none)();  // = conv_ptrs_none()
94 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, dflt)();  // simd / mipp is activated
95 
96 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, sse3)();  // = conv_ptrs_sse3()
97 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, sse4)();
98 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, avx)();
99 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, avx2)();
100 
101 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, sse2)();
102 //extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, avx)();  // already declared
103 //extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, avx2)(); // already declared
104 
105 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, neon_vfpv4)();    // for armv7l / 32-bit ARM
106 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, neon_rpi3_a53)();
107 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, neon_rpi4_a72)();
108 
109 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, armv8a)();  // for aarch64
110