1 #pragma once 2 3 /* pf_conv.h/.cpp implements linear "slow" convolution. 4 * this code is primarily for test/demonstration of runtime dispatching. 5 * each "kernel" is compiled with different compiler/architecture options, 6 * that activates different implementations in the MIPP headers. 7 * 8 * the dispatcher library 'pf_conv_dispatcher' collects (links agains) 9 * all the pf_conv_arch_<opt> libraries .. 10 * and provides the get_all_conv_arch_ptrs() function, 11 * which delivers an array of pointers to the struct (conv_f_ptrs) 12 * containing the function pointers for the different implementations. 13 * 14 * requirement(s): 15 * - installed MIPP headers 16 * - compiler definitions for the different architecture types: 17 * see CMakeLists.txt CONV_ARCH_MSVC_AMD64, CONV_ARCH_GCC_ARM32NEON, .. 18 * - one cmake library target pf_conv_arch_<opt> for each architecture option. 19 * each one gets it's specific architecture/compiler options 20 * utilizing the target_set_cxx_arch_option() macro in the CMakeLists.txt 21 */ 22 23 #include "pf_cplx.h" 24 25 #if defined(_MSC_VER) 26 # define RESTRICT __restrict 27 #elif defined(__GNUC__) 28 # define RESTRICT __restrict 29 #else 30 # define RESTRICT 31 #endif 32 33 34 struct conv_buffer_state 35 { 36 int offset; // sample index where data (to process) starts 37 int size; // actual - or previous - size in amount of samples from buffer start (NOT offset) 38 }; 39 40 // declare provided function pointer types 41 42 typedef const char * (*f_conv_id)(); 43 44 typedef int (*f_conv_float_simd_size)(); 45 46 typedef void (*f_conv_float_move_rest)(float * RESTRICT s, conv_buffer_state * RESTRICT state); 47 typedef void (*f_conv_cplx_move_rest)(complexf * RESTRICT s, conv_buffer_state * RESTRICT state); 48 49 typedef int (*f_conv_float_inplace)( 50 float * RESTRICT s, conv_buffer_state * RESTRICT state, 51 const float * RESTRICT filter, const int sz_filter 52 ); 53 54 typedef int (*f_conv_float_oop)( 55 const float * RESTRICT s, conv_buffer_state * RESTRICT state, 56 const float * RESTRICT filter, const int sz_filter, 57 float * RESTRICT y 58 ); 59 60 typedef int (*f_conv_cplx_float_oop)( 61 const complexf * RESTRICT s, conv_buffer_state * RESTRICT state, 62 const float * RESTRICT filter, const int sz_filter, 63 complexf * RESTRICT y 64 ); 65 66 67 // struct with the provided function pointers 68 struct conv_f_ptrs 69 { 70 const char * id; 71 const int using_mipp; 72 f_conv_id fp_id; 73 f_conv_float_simd_size fp_conv_float_simd_size; 74 75 f_conv_float_move_rest fp_conv_float_move_rest; 76 f_conv_float_inplace fp_conv_float_inplace; 77 f_conv_float_oop fp_conv_float_oop; 78 79 f_conv_cplx_move_rest fp_conv_cplx_move_rest; 80 f_conv_cplx_float_oop fp_conv_cplx_float_oop; 81 }; 82 83 typedef const conv_f_ptrs * ptr_to_conv_f_ptrs; 84 85 // function pointer type, delivering the struct with the function pointers 86 typedef const conv_f_ptrs* (*f_conv_ptrs)(); 87 88 89 // helper for systematic function names 90 #define CONV_FN_ARCH(FN, ARCH) FN##_##ARCH 91 92 // declare all functions - returning the structs with the function pointers 93 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, none)(); // = conv_ptrs_none() 94 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, dflt)(); // simd / mipp is activated 95 96 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, sse3)(); // = conv_ptrs_sse3() 97 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, sse4)(); 98 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, avx)(); 99 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, avx2)(); 100 101 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, sse2)(); 102 //extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, avx)(); // already declared 103 //extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, avx2)(); // already declared 104 105 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, neon_vfpv4)(); // for armv7l / 32-bit ARM 106 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, neon_rpi3_a53)(); 107 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, neon_rpi4_a72)(); 108 109 extern const conv_f_ptrs* CONV_FN_ARCH(conv_ptrs, armv8a)(); // for aarch64 110