1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // kernel_default.h: Chooses default GEMM and GEMV kernels for the 16 // host platform. 17 18 #ifndef GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 19 #define GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 20 21 #ifndef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK 22 #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK 23 #endif 24 25 #include "../public/bit_depth.h" 26 #include "common.h" 27 #include "kernel_reference.h" 28 29 namespace gemmlowp { 30 31 template <bool MaxProductIsLessThan4096, 32 bool LhsAlwaysNonzero> 33 struct DefaultKernelImpl {}; 34 35 // Partial specialization implementing the logic that if we want to use 36 // a kernel for LhsAlwaysNonzero but do not have such a kernel, then we fall 37 // back to a generic kernel not taking advantage of LhsAlwaysNonzero. 38 template <bool LhsAlwaysNonzero> 39 struct DefaultKernelImpl<true, LhsAlwaysNonzero> 40 : DefaultKernelImpl<false, LhsAlwaysNonzero> {}; 41 42 // Partial specialization implementing the logic that if we want to use 43 // a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we 44 // fall back to a generic kernel not taking advantage of 45 // MaxProductIsLessThan4096. 46 template <bool MaxProductIsLessThan4096> 47 struct DefaultKernelImpl<MaxProductIsLessThan4096, true> 48 : DefaultKernelImpl<MaxProductIsLessThan4096, false> {}; 49 50 template <typename BitDepthParams> 51 struct DefaultKernel 52 : DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue * 53 BitDepthParams::RhsRange::kMaxValue < 54 4096), 55 (BitDepthParams::LhsRange::kMinValue > 0)> {}; 56 57 } // end namespace gemmlowp 58 59 #define GEMMLOWP_SET_DEFAULT_KERNEL(MaxProductIsLessThan4096, \ 60 LhsAlwaysNonzero, Kernel) \ 61 namespace gemmlowp { \ 62 template <> \ 63 struct DefaultKernelImpl<MaxProductIsLessThan4096, \ 64 LhsAlwaysNonzero> : Kernel {}; \ 65 } 66 67 #if defined GEMMLOWP_NEON_32 68 #include "kernel_neon.h" 69 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_32_Kernel12x4Depth2) 70 GEMMLOWP_SET_DEFAULT_KERNEL(true, false, 71 NEON_32_Kernel12x4Depth2Assuming12BitProducts) 72 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, 73 NEON_32bit_GEMM_Int8Operands_LhsNonzero) 74 #elif defined GEMMLOWP_NEON_64 75 #include "kernel_neon.h" 76 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, NEON_64_Kernel12x8Depth2) 77 GEMMLOWP_SET_DEFAULT_KERNEL(false, true, 78 NEON_64bit_GEMM_Int8Operands_LhsNonzero) 79 #elif defined GEMMLOWP_SSE4_32 80 #include "kernel_sse.h" 81 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_32_Kernel4x4Depth2) 82 #elif defined GEMMLOWP_SSE4_64 83 #include "kernel_sse.h" 84 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, SSE4_64_Kernel12x4Depth2) 85 #else 86 #ifndef GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK 87 #if defined __ARM_ARCH_5TE__ 88 // SIMD is not available on this platform. The slow fallback will be used. 89 // Don't require GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK because there's nothing 90 // the user can do about it. 91 #else 92 #error \ 93 "SIMD not enabled, you'd be getting a slow software fallback. Consider \ 94 enabling SIMD extensions (for example using -msse4 if you're on modern x86). \ 95 If that's not an option, and you would like to continue with the \ 96 slow fallback, define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK." 97 #endif 98 #endif 99 #include "kernel_reference.h" 100 namespace gemmlowp { 101 typedef ReferenceKernel<KernelFormat< 102 KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1>, 103 KernelSideFormat<CellFormat<4, 16, CellOrder::WidthMajor>, 1> > > 104 DefaultReferenceKernel; 105 } 106 GEMMLOWP_SET_DEFAULT_KERNEL(false, false, DefaultReferenceKernel) 107 #endif 108 109 #endif // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_ 110