1 // Copyright 2018 The Gemmlowp Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // detect_platform.h: Sets up macros that control architecture-specific 16 // features of gemmlowp's implementation. 17 18 #ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ 19 #define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ 20 21 // Our inline assembly path assume GCC/Clang syntax. 22 // Native Client doesn't seem to support inline assembly(?). 23 #if defined(__GNUC__) && !defined(__native_client__) 24 #define GEMMLOWP_ALLOW_INLINE_ASM 25 #endif 26 27 // Define macro statement that avoids inlining for GCC. 28 // For non-GCC, define as empty macro. 29 #if defined(__GNUC__) 30 #define GEMMLOWP_NOINLINE __attribute__((noinline)) 31 #else 32 #define GEMMLOWP_NOINLINE 33 #endif 34 35 // Detect ARM, 32-bit or 64-bit 36 #ifdef __arm__ 37 #define GEMMLOWP_ARM_32 38 #endif 39 40 #ifdef __aarch64__ 41 #define GEMMLOWP_ARM_64 42 #endif 43 44 #if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64) 45 #define GEMMLOWP_ARM 46 #endif 47 48 // Detect MIPS, 32-bit or 64-bit 49 #if defined(__mips) && !defined(__LP64__) 50 #define GEMMLOWP_MIPS_32 51 #endif 52 53 #if defined(__mips) && defined(__LP64__) 54 #define GEMMLOWP_MIPS_64 55 #endif 56 57 #if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64) 58 #define GEMMLOWP_MIPS 59 #endif 60 61 // Detect x86, 32-bit or 64-bit 62 #if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386) 63 #define GEMMLOWP_X86_32 64 #endif 65 66 #if defined(__x86_64__) || defined(_M_X64) || defined(__amd64) 67 #define GEMMLOWP_X86_64 68 #endif 69 70 #if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64) 71 #define GEMMLOWP_X86 72 #endif 73 74 // Detect WebAssembly SIMD. 75 #if defined(__wasm_simd128__) 76 #define GEMMLOWP_WASMSIMD 77 #endif 78 79 // Some of our optimized paths use inline assembly and for 80 // now we don't bother enabling some other optimized paths using intrinddics 81 // where we can't use inline assembly paths. 82 #ifdef GEMMLOWP_ALLOW_INLINE_ASM 83 84 // Detect NEON. It's important to check for both tokens. 85 #if (defined __ARM_NEON) || (defined __ARM_NEON__) 86 #define GEMMLOWP_NEON 87 #endif 88 89 // Convenience NEON tokens for 32-bit or 64-bit 90 #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32) 91 #define GEMMLOWP_NEON_32 92 #endif 93 94 #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64) 95 #define GEMMLOWP_NEON_64 96 #endif 97 98 // Detect MIPS MSA. 99 // Limit MSA optimizations to little-endian CPUs for now. 100 // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs? 101 #if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \ 102 defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 103 #define GEMMLOWP_MSA 104 #endif 105 106 // Convenience MIPS MSA tokens for 32-bit or 64-bit. 107 #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32) 108 #define GEMMLOWP_MSA_32 109 #endif 110 111 #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64) 112 #define GEMMLOWP_MSA_64 113 #endif 114 115 // compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2 116 // Detect AVX2 117 #if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2) 118 #define GEMMLOWP_AVX2 119 // Detect SSE4. 120 // MSVC does not have __SSE4_1__ macro, but will enable SSE4 121 // when AVX is turned on. 122 #elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__)) 123 #define GEMMLOWP_SSE4 124 // Detect SSE3. 125 #elif defined(__SSE3__) 126 #define GEMMLOWP_SSE3 127 #endif 128 129 // Convenience SSE4 tokens for 32-bit or 64-bit 130 #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \ 131 !defined(GEMMLOWP_DISABLE_SSE4) 132 #define GEMMLOWP_SSE4_32 133 #endif 134 135 #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32) 136 #define GEMMLOWP_SSE3_32 137 #endif 138 139 #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \ 140 !defined(GEMMLOWP_DISABLE_SSE4) 141 #define GEMMLOWP_SSE4_64 142 #endif 143 144 #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64) 145 #define GEMMLOWP_SSE3_64 146 #endif 147 148 #if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64) 149 #define GEMMLOWP_AVX2_64 150 #endif 151 152 #if defined(__has_feature) 153 #if __has_feature(memory_sanitizer) 154 #include <sanitizer/msan_interface.h> 155 #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison 156 #elif __has_feature(address_sanitizer) 157 #include <sanitizer/asan_interface.h> 158 #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region 159 #endif 160 #endif 161 162 #endif // GEMMLOWP_ALLOW_INLINE_ASM 163 164 // Detect Android. Don't conflate with ARM - we care about tuning 165 // for non-ARM Android devices too. This can be used in conjunction 166 // with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs. 167 #if defined(__ANDROID__) || defined(ANDROID) 168 #define GEMMLOWP_ANDROID 169 #endif 170 171 #endif // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ 172