1 // 2 // Copyright 2017 The Abseil Authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // https://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // ----------------------------------------------------------------------------- 17 // File: optimization.h 18 // ----------------------------------------------------------------------------- 19 // 20 // This header file defines portable macros for performance optimization. 21 22 #ifndef ABSL_BASE_OPTIMIZATION_H_ 23 #define ABSL_BASE_OPTIMIZATION_H_ 24 25 #include <assert.h> 26 27 #include "absl/base/config.h" 28 29 // ABSL_BLOCK_TAIL_CALL_OPTIMIZATION 30 // 31 // Instructs the compiler to avoid optimizing tail-call recursion. This macro is 32 // useful when you wish to preserve the existing function order within a stack 33 // trace for logging, debugging, or profiling purposes. 34 // 35 // Example: 36 // 37 // int f() { 38 // int result = g(); 39 // ABSL_BLOCK_TAIL_CALL_OPTIMIZATION(); 40 // return result; 41 // } 42 #if defined(__pnacl__) 43 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } 44 #elif defined(__clang__) 45 // Clang will not tail call given inline volatile assembly. 46 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") 47 #elif defined(__GNUC__) 48 // GCC will not tail call given inline volatile assembly. 49 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") 50 #elif defined(_MSC_VER) 51 #include <intrin.h> 52 // The __nop() intrinsic blocks the optimisation. 53 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __nop() 54 #else 55 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } 56 #endif 57 58 // ABSL_CACHELINE_SIZE 59 // 60 // Explicitly defines the size of the L1 cache for purposes of alignment. 61 // Setting the cacheline size allows you to specify that certain objects be 62 // aligned on a cacheline boundary with `ABSL_CACHELINE_ALIGNED` declarations. 63 // (See below.) 64 // 65 // NOTE: this macro should be replaced with the following C++17 features, when 66 // those are generally available: 67 // 68 // * `std::hardware_constructive_interference_size` 69 // * `std::hardware_destructive_interference_size` 70 // 71 // See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html 72 // for more information. 73 #if defined(__GNUC__) 74 // Cache line alignment 75 #if defined(__i386__) || defined(__x86_64__) 76 #define ABSL_CACHELINE_SIZE 64 77 #elif defined(__powerpc64__) 78 #define ABSL_CACHELINE_SIZE 128 79 #elif defined(__aarch64__) 80 // We would need to read special register ctr_el0 to find out L1 dcache size. 81 // This value is a good estimate based on a real aarch64 machine. 82 #define ABSL_CACHELINE_SIZE 64 83 #elif defined(__arm__) 84 // Cache line sizes for ARM: These values are not strictly correct since 85 // cache line sizes depend on implementations, not architectures. There 86 // are even implementations with cache line sizes configurable at boot 87 // time. 88 #if defined(__ARM_ARCH_5T__) 89 #define ABSL_CACHELINE_SIZE 32 90 #elif defined(__ARM_ARCH_7A__) 91 #define ABSL_CACHELINE_SIZE 64 92 #endif 93 #endif 94 95 #ifndef ABSL_CACHELINE_SIZE 96 // A reasonable default guess. Note that overestimates tend to waste more 97 // space, while underestimates tend to waste more time. 98 #define ABSL_CACHELINE_SIZE 64 99 #endif 100 101 // ABSL_CACHELINE_ALIGNED 102 // 103 // Indicates that the declared object be cache aligned using 104 // `ABSL_CACHELINE_SIZE` (see above). Cacheline aligning objects allows you to 105 // load a set of related objects in the L1 cache for performance improvements. 106 // Cacheline aligning objects properly allows constructive memory sharing and 107 // prevents destructive (or "false") memory sharing. 108 // 109 // NOTE: callers should replace uses of this macro with `alignas()` using 110 // `std::hardware_constructive_interference_size` and/or 111 // `std::hardware_destructive_interference_size` when C++17 becomes available to 112 // them. 113 // 114 // See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html 115 // for more information. 116 // 117 // On some compilers, `ABSL_CACHELINE_ALIGNED` expands to an `__attribute__` 118 // or `__declspec` attribute. For compilers where this is not known to work, 119 // the macro expands to nothing. 120 // 121 // No further guarantees are made here. The result of applying the macro 122 // to variables and types is always implementation-defined. 123 // 124 // WARNING: It is easy to use this attribute incorrectly, even to the point 125 // of causing bugs that are difficult to diagnose, crash, etc. It does not 126 // of itself guarantee that objects are aligned to a cache line. 127 // 128 // NOTE: Some compilers are picky about the locations of annotations such as 129 // this attribute, so prefer to put it at the beginning of your declaration. 130 // For example, 131 // 132 // ABSL_CACHELINE_ALIGNED static Foo* foo = ... 133 // 134 // class ABSL_CACHELINE_ALIGNED Bar { ... 135 // 136 // Recommendations: 137 // 138 // 1) Consult compiler documentation; this comment is not kept in sync as 139 // toolchains evolve. 140 // 2) Verify your use has the intended effect. This often requires inspecting 141 // the generated machine code. 142 // 3) Prefer applying this attribute to individual variables. Avoid 143 // applying it to types. This tends to localize the effect. 144 #define ABSL_CACHELINE_ALIGNED __attribute__((aligned(ABSL_CACHELINE_SIZE))) 145 #elif defined(_MSC_VER) 146 #define ABSL_CACHELINE_SIZE 64 147 #define ABSL_CACHELINE_ALIGNED __declspec(align(ABSL_CACHELINE_SIZE)) 148 #else 149 #define ABSL_CACHELINE_SIZE 64 150 #define ABSL_CACHELINE_ALIGNED 151 #endif 152 153 // ABSL_PREDICT_TRUE, ABSL_PREDICT_FALSE 154 // 155 // Enables the compiler to prioritize compilation using static analysis for 156 // likely paths within a boolean branch. 157 // 158 // Example: 159 // 160 // if (ABSL_PREDICT_TRUE(expression)) { 161 // return result; // Faster if more likely 162 // } else { 163 // return 0; 164 // } 165 // 166 // Compilers can use the information that a certain branch is not likely to be 167 // taken (for instance, a CHECK failure) to optimize for the common case in 168 // the absence of better information (ie. compiling gcc with `-fprofile-arcs`). 169 // 170 // Recommendation: Modern CPUs dynamically predict branch execution paths, 171 // typically with accuracy greater than 97%. As a result, annotating every 172 // branch in a codebase is likely counterproductive; however, annotating 173 // specific branches that are both hot and consistently mispredicted is likely 174 // to yield performance improvements. 175 #if ABSL_HAVE_BUILTIN(__builtin_expect) || \ 176 (defined(__GNUC__) && !defined(__clang__)) 177 #define ABSL_PREDICT_FALSE(x) (__builtin_expect(false || (x), false)) 178 #define ABSL_PREDICT_TRUE(x) (__builtin_expect(false || (x), true)) 179 #else 180 #define ABSL_PREDICT_FALSE(x) (x) 181 #define ABSL_PREDICT_TRUE(x) (x) 182 #endif 183 184 // ABSL_INTERNAL_ASSUME(cond) 185 // Informs the compiler that a condition is always true and that it can assume 186 // it to be true for optimization purposes. The call has undefined behavior if 187 // the condition is false. 188 // In !NDEBUG mode, the condition is checked with an assert(). 189 // NOTE: The expression must not have side effects, as it will only be evaluated 190 // in some compilation modes and not others. 191 // 192 // Example: 193 // 194 // int x = ...; 195 // ABSL_INTERNAL_ASSUME(x >= 0); 196 // // The compiler can optimize the division to a simple right shift using the 197 // // assumption specified above. 198 // int y = x / 16; 199 // 200 #if !defined(NDEBUG) 201 #define ABSL_INTERNAL_ASSUME(cond) assert(cond) 202 #elif ABSL_HAVE_BUILTIN(__builtin_assume) 203 #define ABSL_INTERNAL_ASSUME(cond) __builtin_assume(cond) 204 #elif defined(__GNUC__) || ABSL_HAVE_BUILTIN(__builtin_unreachable) 205 #define ABSL_INTERNAL_ASSUME(cond) \ 206 do { \ 207 if (!(cond)) __builtin_unreachable(); \ 208 } while (0) 209 #elif defined(_MSC_VER) 210 #define ABSL_INTERNAL_ASSUME(cond) __assume(cond) 211 #else 212 #define ABSL_INTERNAL_ASSUME(cond) \ 213 do { \ 214 static_cast<void>(false && (cond)); \ 215 } while (0) 216 #endif 217 218 // ABSL_INTERNAL_UNIQUE_SMALL_NAME(cond) 219 // This macro forces small unique name on a static file level symbols like 220 // static local variables or static functions. This is intended to be used in 221 // macro definitions to optimize the cost of generated code. Do NOT use it on 222 // symbols exported from translation unit since it may cause a link time 223 // conflict. 224 // 225 // Example: 226 // 227 // #define MY_MACRO(txt) 228 // namespace { 229 // char VeryVeryLongVarName[] ABSL_INTERNAL_UNIQUE_SMALL_NAME() = txt; 230 // const char* VeryVeryLongFuncName() ABSL_INTERNAL_UNIQUE_SMALL_NAME(); 231 // const char* VeryVeryLongFuncName() { return txt; } 232 // } 233 // 234 235 #if defined(__GNUC__) 236 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME2(x) #x 237 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME1(x) ABSL_INTERNAL_UNIQUE_SMALL_NAME2(x) 238 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME() \ 239 asm(ABSL_INTERNAL_UNIQUE_SMALL_NAME1(.absl.__COUNTER__)) 240 #else 241 #define ABSL_INTERNAL_UNIQUE_SMALL_NAME() 242 #endif 243 244 #endif // ABSL_BASE_OPTIMIZATION_H_ 245