• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 The Gemmlowp Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // detect_platform.h: Sets up macros that control architecture-specific
16 // features of gemmlowp's implementation.
17 
18 #ifndef GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
19 #define GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
20 
21 // Our inline assembly path assume GCC/Clang syntax.
22 // Native Client doesn't seem to support inline assembly(?).
23 #if defined(__GNUC__) && !defined(__native_client__)
24 #define GEMMLOWP_ALLOW_INLINE_ASM
25 #endif
26 
27 // Define macro statement that avoids inlining for GCC.
28 // For non-GCC, define as empty macro.
29 #if defined(__GNUC__)
30 #define GEMMLOWP_NOINLINE __attribute__((noinline))
31 #else
32 #define GEMMLOWP_NOINLINE
33 #endif
34 
35 // Detect ARM, 32-bit or 64-bit
36 #ifdef __arm__
37 #define GEMMLOWP_ARM_32
38 #endif
39 
40 #ifdef __aarch64__
41 #define GEMMLOWP_ARM_64
42 #endif
43 
44 #if defined(GEMMLOWP_ARM_32) || defined(GEMMLOWP_ARM_64)
45 #define GEMMLOWP_ARM
46 #endif
47 
48 // Detect MIPS, 32-bit or 64-bit
49 #if defined(__mips) && !defined(__LP64__)
50 #define GEMMLOWP_MIPS_32
51 #endif
52 
53 #if defined(__mips) && defined(__LP64__)
54 #define GEMMLOWP_MIPS_64
55 #endif
56 
57 #if defined(GEMMLOWP_MIPS_32) || defined(GEMMLOWP_MIPS_64)
58 #define GEMMLOWP_MIPS
59 #endif
60 
61 // Detect x86, 32-bit or 64-bit
62 #if defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__i386)
63 #define GEMMLOWP_X86_32
64 #endif
65 
66 #if defined(__x86_64__) || defined(_M_X64) || defined(__amd64)
67 #define GEMMLOWP_X86_64
68 #endif
69 
70 #if defined(GEMMLOWP_X86_32) || defined(GEMMLOWP_X86_64)
71 #define GEMMLOWP_X86
72 #endif
73 
74 // Detect WebAssembly SIMD.
75 #if defined(__wasm_simd128__)
76 #define GEMMLOWP_WASMSIMD
77 #endif
78 
79 // Some of our optimized paths use inline assembly and for
80 // now we don't bother enabling some other optimized paths using intrinddics
81 // where we can't use inline assembly paths.
82 #ifdef GEMMLOWP_ALLOW_INLINE_ASM
83 
84 // Detect NEON. It's important to check for both tokens.
85 #if (defined __ARM_NEON) || (defined __ARM_NEON__)
86 #define GEMMLOWP_NEON
87 #endif
88 
89 // Convenience NEON tokens for 32-bit or 64-bit
90 #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_32)
91 #define GEMMLOWP_NEON_32
92 #endif
93 
94 #if defined(GEMMLOWP_NEON) && defined(GEMMLOWP_ARM_64)
95 #define GEMMLOWP_NEON_64
96 #endif
97 
98 // Detect MIPS MSA.
99 // Limit MSA optimizations to little-endian CPUs for now.
100 // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
101 #if defined(GEMMLOWP_MIPS) && (__mips_isa_rev >= 5) && defined(__mips_msa) && \
102     defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
103 #define GEMMLOWP_MSA
104 #endif
105 
106 // Convenience MIPS MSA tokens for 32-bit or 64-bit.
107 #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_32)
108 #define GEMMLOWP_MSA_32
109 #endif
110 
111 #if defined(GEMMLOWP_MSA) && defined(GEMMLOWP_MIPS_64)
112 #define GEMMLOWP_MSA_64
113 #endif
114 
115 // compiler define for AVX2 -D GEMMLOWP_ENABLE_AVX2
116 // Detect AVX2
117 #if defined(__AVX2__) && defined(GEMMLOWP_ENABLE_AVX2)
118 #define GEMMLOWP_AVX2
119 // Detect SSE4.
120 // MSVC does not have __SSE4_1__ macro, but will enable SSE4
121 // when AVX is turned on.
122 #elif defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__))
123 #define GEMMLOWP_SSE4
124 // Detect SSE3.
125 #elif defined(__SSE3__)
126 #define GEMMLOWP_SSE3
127 #endif
128 
129 // Convenience SSE4 tokens for 32-bit or 64-bit
130 #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_32) && \
131     !defined(GEMMLOWP_DISABLE_SSE4)
132 #define GEMMLOWP_SSE4_32
133 #endif
134 
135 #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_32)
136 #define GEMMLOWP_SSE3_32
137 #endif
138 
139 #if defined(GEMMLOWP_SSE4) && defined(GEMMLOWP_X86_64) && \
140     !defined(GEMMLOWP_DISABLE_SSE4)
141 #define GEMMLOWP_SSE4_64
142 #endif
143 
144 #if defined(GEMMLOWP_SSE3) && defined(GEMMLOWP_X86_64)
145 #define GEMMLOWP_SSE3_64
146 #endif
147 
148 #if defined(GEMMLOWP_AVX2) && defined(GEMMLOWP_X86_64)
149 #define GEMMLOWP_AVX2_64
150 #endif
151 
152 #if defined(__has_feature)
153 #if __has_feature(memory_sanitizer)
154 #include <sanitizer/msan_interface.h>
155 #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __msan_unpoison
156 #elif __has_feature(address_sanitizer)
157 #include <sanitizer/asan_interface.h>
158 #define GEMMLOWP_MARK_MEMORY_AS_INITIALIZED __asan_unpoison_memory_region
159 #endif
160 #endif
161 
162 #endif  // GEMMLOWP_ALLOW_INLINE_ASM
163 
164 // Detect Android. Don't conflate with ARM - we care about tuning
165 // for non-ARM Android devices too. This can be used in conjunction
166 // with x86 to tune differently for mobile x86 CPUs (Atom) vs. desktop x86 CPUs.
167 #if defined(__ANDROID__) || defined(ANDROID)
168 #define GEMMLOWP_ANDROID
169 #endif
170 
171 #endif  // GEMMLOWP_INTERNAL_DETECT_PLATFORM_H_
172