1 // 2 // Copyright 2017 The Abseil Authors. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // https://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 // ----------------------------------------------------------------------------- 17 // File: optimization.h 18 // ----------------------------------------------------------------------------- 19 // 20 // This header file defines portable macros for performance optimization. 21 22 #ifndef ABSL_BASE_OPTIMIZATION_H_ 23 #define ABSL_BASE_OPTIMIZATION_H_ 24 25 #include "absl/base/config.h" 26 27 // ABSL_BLOCK_TAIL_CALL_OPTIMIZATION 28 // 29 // Instructs the compiler to avoid optimizing tail-call recursion. Use of this 30 // macro is useful when you wish to preserve the existing function order within 31 // a stack trace for logging, debugging, or profiling purposes. 32 // 33 // Example: 34 // 35 // int f() { 36 // int result = g(); 37 // ABSL_BLOCK_TAIL_CALL_OPTIMIZATION(); 38 // return result; 39 // } 40 #if defined(__pnacl__) 41 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } 42 #elif defined(__clang__) 43 // Clang will not tail call given inline volatile assembly. 44 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") 45 #elif defined(__GNUC__) 46 // GCC will not tail call given inline volatile assembly. 47 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __asm__ __volatile__("") 48 #elif defined(_MSC_VER) 49 #include <intrin.h> 50 // The __nop() intrinsic blocks the optimisation. 51 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() __nop() 52 #else 53 #define ABSL_BLOCK_TAIL_CALL_OPTIMIZATION() if (volatile int x = 0) { (void)x; } 54 #endif 55 56 // ABSL_CACHELINE_SIZE 57 // 58 // Explicitly defines the size of the L1 cache for purposes of alignment. 59 // Setting the cacheline size allows you to specify that certain objects be 60 // aligned on a cacheline boundary with `ABSL_CACHELINE_ALIGNED` declarations. 61 // (See below.) 62 // 63 // NOTE: this macro should be replaced with the following C++17 features, when 64 // those are generally available: 65 // 66 // * `std::hardware_constructive_interference_size` 67 // * `std::hardware_destructive_interference_size` 68 // 69 // See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html 70 // for more information. 71 #if defined(__GNUC__) 72 // Cache line alignment 73 #if defined(__i386__) || defined(__x86_64__) 74 #define ABSL_CACHELINE_SIZE 64 75 #elif defined(__powerpc64__) 76 #define ABSL_CACHELINE_SIZE 128 77 #elif defined(__aarch64__) 78 // We would need to read special register ctr_el0 to find out L1 dcache size. 79 // This value is a good estimate based on a real aarch64 machine. 80 #define ABSL_CACHELINE_SIZE 64 81 #elif defined(__arm__) 82 // Cache line sizes for ARM: These values are not strictly correct since 83 // cache line sizes depend on implementations, not architectures. There 84 // are even implementations with cache line sizes configurable at boot 85 // time. 86 #if defined(__ARM_ARCH_5T__) 87 #define ABSL_CACHELINE_SIZE 32 88 #elif defined(__ARM_ARCH_7A__) 89 #define ABSL_CACHELINE_SIZE 64 90 #endif 91 #endif 92 93 #ifndef ABSL_CACHELINE_SIZE 94 // A reasonable default guess. Note that overestimates tend to waste more 95 // space, while underestimates tend to waste more time. 96 #define ABSL_CACHELINE_SIZE 64 97 #endif 98 99 // ABSL_CACHELINE_ALIGNED 100 // 101 // Indicates that the declared object be cache aligned using 102 // `ABSL_CACHELINE_SIZE` (see above). Cacheline aligning objects allows you to 103 // load a set of related objects in the L1 cache for performance improvements. 104 // Cacheline aligning objects properly allows constructive memory sharing and 105 // prevents destructive (or "false") memory sharing. 106 // 107 // NOTE: this macro should be replaced with usage of `alignas()` using 108 // `std::hardware_constructive_interference_size` and/or 109 // `std::hardware_destructive_interference_size` when available within C++17. 110 // 111 // See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html 112 // for more information. 113 // 114 // On some compilers, `ABSL_CACHELINE_ALIGNED` expands to an `__attribute__` 115 // or `__declspec` attribute. For compilers where this is not known to work, 116 // the macro expands to nothing. 117 // 118 // No further guarantees are made here. The result of applying the macro 119 // to variables and types is always implementation-defined. 120 // 121 // WARNING: It is easy to use this attribute incorrectly, even to the point 122 // of causing bugs that are difficult to diagnose, crash, etc. It does not 123 // of itself guarantee that objects are aligned to a cache line. 124 // 125 // NOTE: Some compilers are picky about the locations of annotations such as 126 // this attribute, so prefer to put it at the beginning of your declaration. 127 // For example, 128 // 129 // ABSL_CACHELINE_ALIGNED static Foo* foo = ... 130 // 131 // class ABSL_CACHELINE_ALIGNED Bar { ... 132 // 133 // Recommendations: 134 // 135 // 1) Consult compiler documentation; this comment is not kept in sync as 136 // toolchains evolve. 137 // 2) Verify your use has the intended effect. This often requires inspecting 138 // the generated machine code. 139 // 3) Prefer applying this attribute to individual variables. Avoid 140 // applying it to types. This tends to localize the effect. 141 #define ABSL_CACHELINE_ALIGNED __attribute__((aligned(ABSL_CACHELINE_SIZE))) 142 #elif defined(_MSC_VER) 143 #define ABSL_CACHELINE_SIZE 64 144 #define ABSL_CACHELINE_ALIGNED __declspec(align(ABSL_CACHELINE_SIZE)) 145 #else 146 #define ABSL_CACHELINE_SIZE 64 147 #define ABSL_CACHELINE_ALIGNED 148 #endif 149 150 // ABSL_PREDICT_TRUE, ABSL_PREDICT_FALSE 151 // 152 // Enables the compiler to prioritize compilation using static analysis for 153 // likely paths within a boolean branch. 154 // 155 // Example: 156 // 157 // if (ABSL_PREDICT_TRUE(expression)) { 158 // return result; // Faster if more likely 159 // } else { 160 // return 0; 161 // } 162 // 163 // Compilers can use the information that a certain branch is not likely to be 164 // taken (for instance, a CHECK failure) to optimize for the common case in 165 // the absence of better information (ie. compiling gcc with `-fprofile-arcs`). 166 // 167 // Recommendation: Modern CPUs dynamically predict branch execution paths, 168 // typically with accuracy greater than 97%. As a result, annotating every 169 // branch in a codebase is likely counterproductive; however, annotating 170 // specific branches that are both hot and consistently mispredicted is likely 171 // to yield performance improvements. 172 #if ABSL_HAVE_BUILTIN(__builtin_expect) || \ 173 (defined(__GNUC__) && !defined(__clang__)) 174 #define ABSL_PREDICT_FALSE(x) (__builtin_expect(x, 0)) 175 #define ABSL_PREDICT_TRUE(x) (__builtin_expect(false || (x), true)) 176 #else 177 #define ABSL_PREDICT_FALSE(x) (x) 178 #define ABSL_PREDICT_TRUE(x) (x) 179 #endif 180 181 #endif // ABSL_BASE_OPTIMIZATION_H_ 182