1 // 2 // Copyright 2017 The Abseil Authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // https://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // ----------------------------------------------------------------------------- 17 // File: optimization.h 18 // ----------------------------------------------------------------------------- 19 // 20 // This header file defines portable macros for performance optimization. 21 22 #ifndef ABSL_BASE_OPTIMIZATION_H_ 23 #define ABSL_BASE_OPTIMIZATION_H_ 24 25 #include "absl/base/config.h" 26 27 // ABSL_BLOCK_TAIL_CALL_OPTIMIZATION 28 // 29 // Instructs the compiler to avoid optimizing tail-call recursion. Use of this 30 // macro is useful when you wish to preserve the existing function order within 31 // a stack trace for logging, debugging, or profiling purposes. 32 // 33 // Example: 34 // 35 // int f() { 36 // int result = g(); 37 // ABSL_BLOCK_TAIL_CALL_OPTIMIZATION(); 38 // return result; 39 // } 40 #if defined(__pnacl__) 41 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } 42 #elif defined(__clang__) 43 // Clang will not tail call given inline volatile assembly. 44 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") 45 #elif defined(__GNUC__) 46 // GCC will not tail call given inline volatile assembly. 47 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") 48 #elif defined(_MSC_VER) 49 #include <intrin.h> 50 // The __nop() intrinsic blocks the optimisation. 51 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __nop() 52 #else 53 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } 54 #endif 55 56 // ABSL_CACHELINE_SIZE 57 // 58 // Explicitly defines the size of the L1 cache for purposes of alignment. 59 // Setting the cacheline size allows you to specify that certain objects be 60 // aligned on a cacheline boundary with `ABSL_CACHELINE_ALIGNED` declarations. 61 // (See below.) 62 // 63 // NOTE: this macro should be replaced with the following C++17 features, when 64 // those are generally available: 65 // 66 // * `std::hardware_constructive_interference_size` 67 // * `std::hardware_destructive_interference_size` 68 // 69 // See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html 70 // for more information. 71 #if defined(__GNUC__) 72 // Cache line alignment 73 #if defined(__i386__) || defined(__x86_64__) 74 #define ABSL_CACHELINE_SIZE 64 75 #elif defined(__powerpc64__) 76 #define ABSL_CACHELINE_SIZE 128 77 #elif defined(__aarch64__) 78 // We would need to read special register ctr_el0 to find out L1 dcache size. 79 // This value is a good estimate based on a real aarch64 machine. 80 #define ABSL_CACHELINE_SIZE 64 81 #elif defined(__arm__) 82 // Cache line sizes for ARM: These values are not strictly correct since 83 // cache line sizes depend on implementations, not architectures. There 84 // are even implementations with cache line sizes configurable at boot 85 // time. 86 #if defined(__ARM_ARCH_5T__) 87 #define ABSL_CACHELINE_SIZE 32 88 #elif defined(__ARM_ARCH_7A__) 89 #define ABSL_CACHELINE_SIZE 64 90 #endif 91 #endif 92 93 #ifndef ABSL_CACHELINE_SIZE 94 // A reasonable default guess. Note that overestimates tend to waste more 95 // space, while underestimates tend to waste more time. 96 #define ABSL_CACHELINE_SIZE 64 97 #endif 98 99 // ABSL_CACHELINE_ALIGNED 100 // 101 // Indicates that the declared object be cache aligned using 102 // `ABSL_CACHELINE_SIZE` (see above). Cacheline aligning objects allows you to 103 // load a set of related objects in the L1 cache for performance improvements. 104 // Cacheline aligning objects properly allows constructive memory sharing and 105 // prevents destructive (or "false") memory sharing. 106 // 107 // NOTE: this macro should be replaced with usage of `alignas()` using 108 // `std::hardware_constructive_interference_size` and/or 109 // `std::hardware_destructive_interference_size` when available within C++17. 110 // 111 // See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html 112 // for more information. 113 // 114 // On some compilers, `ABSL_CACHELINE_ALIGNED` expands to an `__attribute__` 115 // or `__declspec` attribute. For compilers where this is not known to work, 116 // the macro expands to nothing. 117 // 118 // No further guarantees are made here. The result of applying the macro 119 // to variables and types is always implementation-defined. 120 // 121 // WARNING: It is easy to use this attribute incorrectly, even to the point 122 // of causing bugs that are difficult to diagnose, crash, etc. It does not 123 // of itself guarantee that objects are aligned to a cache line. 124 // 125 // NOTE: Some compilers are picky about the locations of annotations such as 126 // this attribute, so prefer to put it at the beginning of your declaration. 127 // For example, 128 // 129 // ABSL_CACHELINE_ALIGNED static Foo* foo = ... 130 // 131 // class ABSL_CACHELINE_ALIGNED Bar { ... 132 // 133 // Recommendations: 134 // 135 // 1) Consult compiler documentation; this comment is not kept in sync as 136 // toolchains evolve. 137 // 2) Verify your use has the intended effect. This often requires inspecting 138 // the generated machine code. 139 // 3) Prefer applying this attribute to individual variables. Avoid 140 // applying it to types. This tends to localize the effect. 141 #define ABSL_CACHELINE_ALIGNED __attribute__((aligned(ABSL_CACHELINE_SIZE))) 142 #elif defined(_MSC_VER) 143 #define ABSL_CACHELINE_SIZE 64 144 #define ABSL_CACHELINE_ALIGNED __declspec(align(ABSL_CACHELINE_SIZE)) 145 #else 146 #define ABSL_CACHELINE_SIZE 64 147 #define ABSL_CACHELINE_ALIGNED 148 #endif 149 150 // ABSL_PREDICT_TRUE, ABSL_PREDICT_FALSE 151 // 152 // Enables the compiler to prioritize compilation using static analysis for 153 // likely paths within a boolean branch. 154 // 155 // Example: 156 // 157 // if (ABSL_PREDICT_TRUE(expression)) { 158 // return result; // Faster if more likely 159 // } else { 160 // return 0; 161 // } 162 // 163 // Compilers can use the information that a certain branch is not likely to be 164 // taken (for instance, a CHECK failure) to optimize for the common case in 165 // the absence of better information (ie. compiling gcc with `-fprofile-arcs`). 166 // 167 // Recommendation: Modern CPUs dynamically predict branch execution paths, 168 // typically with accuracy greater than 97%. As a result, annotating every 169 // branch in a codebase is likely counterproductive; however, annotating 170 // specific branches that are both hot and consistently mispredicted is likely 171 // to yield performance improvements. 172 #if ABSL_HAVE_BUILTIN(__builtin_expect) || \ 173 (defined(__GNUC__) && !defined(__clang__)) 174 #define ABSL_PREDICT_FALSE(x) (__builtin_expect(false || (x), false)) 175 #define ABSL_PREDICT_TRUE(x) (__builtin_expect(false || (x), true)) 176 #else 177 #define ABSL_PREDICT_FALSE(x) (x) 178 #define ABSL_PREDICT_TRUE(x) (x) 179 #endif 180 181 // ABSL_INTERNAL_ASSUME(cond) 182 // Informs the compiler than a condition is always true and that it can assume 183 // it to be true for optimization purposes. The call has undefined behavior if 184 // the condition is false. 185 // In !NDEBUG mode, the condition is checked with an assert(). 186 // NOTE: The expression must not have side effects, as it will only be evaluated 187 // in some compilation modes and not others. 188 // 189 // Example: 190 // 191 // int x = ...; 192 // ABSL_INTERNAL_ASSUME(x >= 0); 193 // // The compiler can optimize the division to a simple right shift using the 194 // // assumption specified above. 195 // int y = x / 16; 196 // 197 #if !defined(NDEBUG) 198 #define ABSL_INTERNAL_ASSUME(cond) assert(cond) 199 #elif ABSL_HAVE_BUILTIN(__builtin_assume) 200 #define ABSL_INTERNAL_ASSUME(cond) __builtin_assume(cond) 201 #elif defined(__GNUC__) || ABSL_HAVE_BUILTIN(__builtin_unreachable) 202 #define ABSL_INTERNAL_ASSUME(cond) \ 203 do { \ 204 if (!(cond)) __builtin_unreachable(); \ 205 } while (0) 206 #elif defined(_MSC_VER) 207 #define ABSL_INTERNAL_ASSUME(cond) __assume(cond) 208 #else 209 #define ABSL_INTERNAL_ASSUME(cond) \ 210 do { \ 211 static_cast<void>(false && (cond)); \ 212 } while (0) 213 #endif 214 215 // ABSL_INTERNAL_UNIQUE_SMALL_NAME(cond) 216 // This macro forces small unique name on a static file level symbols like 217 // static local variables or static functions. This is intended to be used in 218 // macro definitions to optimize the cost of generated code. Do NOT use it on 219 // symbols exported from translation unit since it may casue a link time 220 // conflict. 221 // 222 // Example: 223 // 224 // #define MY_MACRO(txt) 225 // namespace { 226 // char VeryVeryLongVarName[] ABSL_INTERNAL_UNIQUE_SMALL_NAME() = txt; 227 // const char* VeryVeryLongFuncName() ABSL_INTERNAL_UNIQUE_SMALL_NAME(); 228 // const char* VeryVeryLongFuncName() { return txt; } 229 // } 230 // 231 232 #if defined(__GNUC__) 233 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME2(x) #x 234 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME1(x) ABSL_INTERNAL_UNIQUE_SMALL_NAME2(x) 235 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME() \ 236 asm(ABSL_INTERNAL_UNIQUE_SMALL_NAME1(.absl.__COUNTER__)) 237 #else 238 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME() 239 #endif 240 241 #endif // ABSL_BASE_OPTIMIZATION_H_ 242