1 // Copyright 2018 The Gemmlowp Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // detect_platform.h: Sets up macros that control architecture-specific 16 // features of gemmlowp's implementation. 17 18 #ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ 19 #define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ 20 21 // Our inline assembly path assume GCC/Clang syntax. 22 // Native Client doesn't seem to support inline assembly(?). 23 #if defined(__GNUC__) && !defined(__native_client__) 24 #define GEMMLOWP_ALLOW_INLINE_ASM 25 #endif 26 27 // Define macro statement that avoids inlining for GCC. 28 // For non-GCC, define as empty macro. 29 #if defined(__GNUC__) 30 #define GEMMLOWP_NOINLINE __attribute__((noinline)) 31 #else 32 #define GEMMLOWP_NOINLINE 33 #endif 34 35 // Detect ARM, 32-bit or 64-bit 36 #ifdef __arm__ 37 #define GEMMLOWP_ARM_32 38 #endif 39 40 #ifdef __aarch64__ 41 #define GEMMLOWP_ARM_64 42 #endif 43 44 #if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64) 45 #define GEMMLOWP_ARM 46 #endif 47 48 // Detect MIPS, 32-bit or 64-bit 49 #if defined(__mips) && !defined(__LP64__) 50 #define GEMMLOWP_MIPS_32 51 #endif 52 53 #if defined(__mips) && defined(__LP64__) 54 #define GEMMLOWP_MIPS_64 55 #endif 56 57 #if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64) 58 #define GEMMLOWP_MIPS 59 #endif 60 61 // Detect x86, 32-bit or 64-bit 62 #if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386) 63 #define GEMMLOWP_X86_32 64 #endif 65 66 #if defined(__x86_64__) || defined(_M_X64) || defined(__amd64) 67 #define GEMMLOWP_X86_64 68 #endif 69 70 #if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64) 71 #define GEMMLOWP_X86 72 #endif 73 74 // Some of our optimized paths use inline assembly and for 75 // now we don't bother enabling some other optimized paths using intrinddics 76 // where we can't use inline assembly paths. 77 #ifdef GEMMLOWP_ALLOW_INLINE_ASM 78 79 // Detect NEON. It's important to check for both tokens. 80 #if (defined __ARM_NEON) || (defined __ARM_NEON__) 81 #define GEMMLOWP_NEON 82 #endif 83 84 // Convenience NEON tokens for 32-bit or 64-bit 85 #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32) 86 #define GEMMLOWP_NEON_32 87 #endif 88 89 #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64) 90 #define GEMMLOWP_NEON_64 91 #endif 92 93 // Detect MIPS MSA. 94 // Limit MSA optimizations to little-endian CPUs for now. 95 // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs? 96 #if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \ 97 defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 98 #define GEMMLOWP_MSA 99 #endif 100 101 // Convenience MIPS MSA tokens for 32-bit or 64-bit. 102 #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32) 103 #define GEMMLOWP_MSA_32 104 #endif 105 106 #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64) 107 #define GEMMLOWP_MSA_64 108 #endif 109 110 // compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2 111 // Detect AVX2 112 #if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2) 113 #define GEMMLOWP_AVX2 114 // Detect SSE4. 115 // MSVC does not have __SSE4_1__ macro, but will enable SSE4 116 // when AVX is turned on. 117 #elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__)) 118 #define GEMMLOWP_SSE4 119 // Detect SSE3. 120 #elif defined(__SSE3__) 121 #define GEMMLOWP_SSE3 122 #endif 123 124 // Convenience SSE4 tokens for 32-bit or 64-bit 125 #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \ 126 !defined(GEMMLOWP_DISABLE_SSE4) 127 #define GEMMLOWP_SSE4_32 128 #endif 129 130 #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32) 131 #define GEMMLOWP_SSE3_32 132 #endif 133 134 #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \ 135 !defined(GEMMLOWP_DISABLE_SSE4) 136 #define GEMMLOWP_SSE4_64 137 #endif 138 139 #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64) 140 #define GEMMLOWP_SSE3_64 141 #endif 142 143 #if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64) 144 #define GEMMLOWP_AVX2_64 145 #endif 146 147 #if defined(__has_feature) 148 #if __has_feature(memory_sanitizer) 149 #include <sanitizer/msan_interface.h> 150 #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison 151 #elif __has_feature(address_sanitizer) 152 #include <sanitizer/asan_interface.h> 153 #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region 154 #endif 155 #endif 156 157 #endif // GEMMLOWP_ALLOW_INLINE_ASM 158 159 // Detect Android. Don't conflate with ARM - we care about tuning 160 // for non-ARM Android devices too. This can be used in conjunction 161 // with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs. 162 #if defined(__ANDROID__) || defined(ANDROID) 163 #define GEMMLOWP_ANDROID 164 #endif 165 166 #endif // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_ 167