1 // 2 // Copyright 2017 The Abseil Authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // https://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // ----------------------------------------------------------------------------- 17 // File: optimization.h 18 // ----------------------------------------------------------------------------- 19 // 20 // This header file defines portable macros for performance optimization. 21 22 #ifndef ABSL_BASE_OPTIMIZATION_H_ 23 #define ABSL_BASE_OPTIMIZATION_H_ 24 25 #include <assert.h> 26 27 #include "absl/base/config.h" 28 29 // ABSL_BLOCK_TAIL_CALL_OPTIMIZATION 30 // 31 // Instructs the compiler to avoid optimizing tail-call recursion. This macro is 32 // useful when you wish to preserve the existing function order within a stack 33 // trace for logging, debugging, or profiling purposes. 34 // 35 // Example: 36 // 37 // int f() { 38 // int result = g(); 39 // ABSL_BLOCK_TAIL_CALL_OPTIMIZATION(); 40 // return result; 41 // } 42 #if defined(__pnacl__) 43 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } 44 #elif defined(__clang__) 45 // Clang will not tail call given inline volatile assembly. 46 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") 47 #elif defined(__GNUC__) 48 // GCC will not tail call given inline volatile assembly. 49 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") 50 #elif defined(_MSC_VER) 51 #include <intrin.h> 52 // The __nop() intrinsic blocks the optimisation. 53 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __nop() 54 #else 55 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } 56 #endif 57 58 // ABSL_CACHELINE_SIZE 59 // 60 // Explicitly defines the size of the L1 cache for purposes of alignment. 61 // Setting the cacheline size allows you to specify that certain objects be 62 // aligned on a cacheline boundary with `ABSL_CACHELINE_ALIGNED` declarations. 63 // (See below.) 64 // 65 // NOTE: this macro should be replaced with the following C++17 features, when 66 // those are generally available: 67 // 68 // * `std::hardware_constructive_interference_size` 69 // * `std::hardware_destructive_interference_size` 70 // 71 // See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html 72 // for more information. 73 #if defined(__GNUC__) 74 // Cache line alignment 75 #if defined(__i386__) || defined(__x86_64__) 76 #define ABSL_CACHELINE_SIZE 64 77 #elif defined(__powerpc64__) 78 #define ABSL_CACHELINE_SIZE 128 79 #elif defined(__aarch64__) 80 // We would need to read special register ctr_el0 to find out L1 dcache size. 81 // This value is a good estimate based on a real aarch64 machine. 82 #define ABSL_CACHELINE_SIZE 64 83 #elif defined(__arm__) 84 // Cache line sizes for ARM: These values are not strictly correct since 85 // cache line sizes depend on implementations, not architectures. There 86 // are even implementations with cache line sizes configurable at boot 87 // time. 88 #if defined(__ARM_ARCH_5T__) 89 #define ABSL_CACHELINE_SIZE 32 90 #elif defined(__ARM_ARCH_7A__) 91 #define ABSL_CACHELINE_SIZE 64 92 #endif 93 #endif 94 #endif 95 96 #ifndef ABSL_CACHELINE_SIZE 97 // A reasonable default guess. Note that overestimates tend to waste more 98 // space, while underestimates tend to waste more time. 99 #define ABSL_CACHELINE_SIZE 64 100 #endif 101 102 // ABSL_CACHELINE_ALIGNED 103 // 104 // Indicates that the declared object be cache aligned using 105 // `ABSL_CACHELINE_SIZE` (see above). Cacheline aligning objects allows you to 106 // load a set of related objects in the L1 cache for performance improvements. 107 // Cacheline aligning objects properly allows constructive memory sharing and 108 // prevents destructive (or "false") memory sharing. 109 // 110 // NOTE: callers should replace uses of this macro with `alignas()` using 111 // `std::hardware_constructive_interference_size` and/or 112 // `std::hardware_destructive_interference_size` when C++17 becomes available to 113 // them. 114 // 115 // See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html 116 // for more information. 117 // 118 // On some compilers, `ABSL_CACHELINE_ALIGNED` expands to an `__attribute__` 119 // or `__declspec` attribute. For compilers where this is not known to work, 120 // the macro expands to nothing. 121 // 122 // No further guarantees are made here. The result of applying the macro 123 // to variables and types is always implementation-defined. 124 // 125 // WARNING: It is easy to use this attribute incorrectly, even to the point 126 // of causing bugs that are difficult to diagnose, crash, etc. It does not 127 // of itself guarantee that objects are aligned to a cache line. 128 // 129 // NOTE: Some compilers are picky about the locations of annotations such as 130 // this attribute, so prefer to put it at the beginning of your declaration. 131 // For example, 132 // 133 // ABSL_CACHELINE_ALIGNED static Foo* foo = ... 134 // 135 // class ABSL_CACHELINE_ALIGNED Bar { ... 136 // 137 // Recommendations: 138 // 139 // 1) Consult compiler documentation; this comment is not kept in sync as 140 // toolchains evolve. 141 // 2) Verify your use has the intended effect. This often requires inspecting 142 // the generated machine code. 143 // 3) Prefer applying this attribute to individual variables. Avoid 144 // applying it to types. This tends to localize the effect. 145 #if defined(__clang__) || defined(__GNUC__) 146 #define ABSL_CACHELINE_ALIGNED __attribute__((aligned(ABSL_CACHELINE_SIZE))) 147 #elif defined(_MSC_VER) 148 #define ABSL_CACHELINE_ALIGNED __declspec(align(ABSL_CACHELINE_SIZE)) 149 #else 150 #define ABSL_CACHELINE_ALIGNED 151 #endif 152 153 // ABSL_PREDICT_TRUE, ABSL_PREDICT_FALSE 154 // 155 // Enables the compiler to prioritize compilation using static analysis for 156 // likely paths within a boolean branch. 157 // 158 // Example: 159 // 160 // if (ABSL_PREDICT_TRUE(expression)) { 161 // return result; // Faster if more likely 162 // } else { 163 // return 0; 164 // } 165 // 166 // Compilers can use the information that a certain branch is not likely to be 167 // taken (for instance, a CHECK failure) to optimize for the common case in 168 // the absence of better information (ie. compiling gcc with `-fprofile-arcs`). 169 // 170 // Recommendation: Modern CPUs dynamically predict branch execution paths, 171 // typically with accuracy greater than 97%. As a result, annotating every 172 // branch in a codebase is likely counterproductive; however, annotating 173 // specific branches that are both hot and consistently mispredicted is likely 174 // to yield performance improvements. 175 #if ABSL_HAVE_BUILTIN(__builtin_expect) || \ 176 (defined(__GNUC__) && !defined(__clang__)) 177 #define ABSL_PREDICT_FALSE(x) (__builtin_expect(false || (x), false)) 178 #define ABSL_PREDICT_TRUE(x) (__builtin_expect(false || (x), true)) 179 #else 180 #define ABSL_PREDICT_FALSE(x) (x) 181 #define ABSL_PREDICT_TRUE(x) (x) 182 #endif 183 184 // `ABSL_INTERNAL_IMMEDIATE_ABORT_IMPL()` aborts the program in the fastest 185 // possible way, with no attempt at logging. One use is to implement hardening 186 // aborts with ABSL_OPTION_HARDENED. Since this is an internal symbol, it 187 // should not be used directly outside of Abseil. 188 #if ABSL_HAVE_BUILTIN(__builtin_trap) || \ 189 (defined(__GNUC__) && !defined(__clang__)) 190 #define ABSL_INTERNAL_IMMEDIATE_ABORT_IMPL() __builtin_trap() 191 #else 192 #define ABSL_INTERNAL_IMMEDIATE_ABORT_IMPL() abort() 193 #endif 194 195 // `ABSL_INTERNAL_UNREACHABLE_IMPL()` is the platform specific directive to 196 // indicate that a statement is unreachable, and to allow the compiler to 197 // optimize accordingly. Clients should use `ABSL_UNREACHABLE()`, which is 198 // defined below. 199 #if defined(__cpp_lib_unreachable) && __cpp_lib_unreachable >= 202202L 200 #define ABSL_INTERNAL_UNREACHABLE_IMPL() std::unreachable() 201 #elif defined(__GNUC__) || ABSL_HAVE_BUILTIN(__builtin_unreachable) 202 #define ABSL_INTERNAL_UNREACHABLE_IMPL() __builtin_unreachable() 203 #elif ABSL_HAVE_BUILTIN(__builtin_assume) 204 #define ABSL_INTERNAL_UNREACHABLE_IMPL() __builtin_assume(false) 205 #elif defined(_MSC_VER) 206 #define ABSL_INTERNAL_UNREACHABLE_IMPL() __assume(false) 207 #else 208 #define ABSL_INTERNAL_UNREACHABLE_IMPL() 209 #endif 210 211 // `ABSL_UNREACHABLE()` is an unreachable statement. A program which reaches 212 // one has undefined behavior, and the compiler may optimize accordingly. 213 #if ABSL_OPTION_HARDENED == 1 && defined(NDEBUG) 214 // Abort in hardened mode to avoid dangerous undefined behavior. 215 #define ABSL_UNREACHABLE() \ 216 do { \ 217 ABSL_INTERNAL_IMMEDIATE_ABORT_IMPL(); \ 218 ABSL_INTERNAL_UNREACHABLE_IMPL(); \ 219 } while (false) 220 #else 221 // The assert only fires in debug mode to aid in debugging. 222 // When NDEBUG is defined, reaching ABSL_UNREACHABLE() is undefined behavior. 223 #define ABSL_UNREACHABLE() \ 224 do { \ 225 /* NOLINTNEXTLINE: misc-static-assert */ \ 226 assert(false && "ABSL_UNREACHABLE reached"); \ 227 ABSL_INTERNAL_UNREACHABLE_IMPL(); \ 228 } while (false) 229 #endif 230 231 // ABSL_ASSUME(cond) 232 // 233 // Informs the compiler that a condition is always true and that it can assume 234 // it to be true for optimization purposes. 235 // 236 // WARNING: If the condition is false, the program can produce undefined and 237 // potentially dangerous behavior. 238 // 239 // In !NDEBUG mode, the condition is checked with an assert(). 240 // 241 // NOTE: The expression must not have side effects, as it may only be evaluated 242 // in some compilation modes and not others. Some compilers may issue a warning 243 // if the compiler cannot prove the expression has no side effects. For example, 244 // the expression should not use a function call since the compiler cannot prove 245 // that a function call does not have side effects. 246 // 247 // Example: 248 // 249 // int x = ...; 250 // ABSL_ASSUME(x >= 0); 251 // // The compiler can optimize the division to a simple right shift using the 252 // // assumption specified above. 253 // int y = x / 16; 254 // 255 #if !defined(NDEBUG) 256 #define ABSL_ASSUME(cond) assert(cond) 257 #elif ABSL_HAVE_BUILTIN(__builtin_assume) 258 #define ABSL_ASSUME(cond) __builtin_assume(cond) 259 #elif defined(_MSC_VER) 260 #define ABSL_ASSUME(cond) __assume(cond) 261 #elif defined(__cpp_lib_unreachable) && __cpp_lib_unreachable >= 202202L 262 #define ABSL_ASSUME(cond) \ 263 do { \ 264 if (!(cond)) std::unreachable(); \ 265 } while (false) 266 #elif defined(__GNUC__) || ABSL_HAVE_BUILTIN(__builtin_unreachable) 267 #define ABSL_ASSUME(cond) \ 268 do { \ 269 if (!(cond)) __builtin_unreachable(); \ 270 } while (false) 271 #else 272 #define ABSL_ASSUME(cond) \ 273 do { \ 274 static_cast<void>(false && (cond)); \ 275 } while (false) 276 #endif 277 278 // ABSL_INTERNAL_UNIQUE_SMALL_NAME(cond) 279 // This macro forces small unique name on a static file level symbols like 280 // static local variables or static functions. This is intended to be used in 281 // macro definitions to optimize the cost of generated code. Do NOT use it on 282 // symbols exported from translation unit since it may cause a link time 283 // conflict. 284 // 285 // Example: 286 // 287 // #define MY_MACRO(txt) 288 // namespace { 289 // char VeryVeryLongVarName[] ABSL_INTERNAL_UNIQUE_SMALL_NAME() = txt; 290 // const char* VeryVeryLongFuncName() ABSL_INTERNAL_UNIQUE_SMALL_NAME(); 291 // const char* VeryVeryLongFuncName() { return txt; } 292 // } 293 // 294 295 #if defined(__GNUC__) 296 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME2(x) #x 297 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME1(x) ABSL_INTERNAL_UNIQUE_SMALL_NAME2(x) 298 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME() \ 299 asm(ABSL_INTERNAL_UNIQUE_SMALL_NAME1(.absl.__COUNTER__)) 300 #else 301 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME() 302 #endif 303 304 #endif // ABSL_BASE_OPTIMIZATION_H_ 305