1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_ 17 #define TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_ 18 19 // This header defines the macro TF_PLATFORM_STRINGS() which should be used 20 // once in each dynamically loadable TensorFlow module. It embeds static 21 // strings into the compilation unit that allow TensorFlow to determine what 22 // compilation options were in effect when the compilation unit was built. All 23 // compilation units within the same dynamically loadable library should be 24 // built with the same options (or at least, the strings should be embedded in 25 // the compilation unit built with the most restrictive options). 26 27 // The platform strings embedded into a binary may be retrieved with the 28 // GetPlatformStrings function. 29 30 // Rationale: 31 // We wish to load only those libraries that this CPU can execute. For 32 // example, we should not load a library compiled with avx256 instructions on a 33 // CPU that cannot execute them. 34 // 35 // One might think that one could dlopen() the library, and call a routine that 36 // would return which cpu type it was compiled for. Alas, this does not work, 37 // because at dlopen() time, a library containing C++ will execute constructors 38 // of class variables with static storage class. Even code that looks 39 // innocuous may use optional platform-specific instructions. For example, 40 // the fastest way to zero a region of memory might use optional instructions. 41 // 42 // One might think one could run a tool such as "objdump" to read flags from 43 // the libraries' headers, or perhaps disassemble each library to look for 44 // particular instructions. Unfortunately, the desired flags are not present 45 // in the headers, and disassembly can be prohibitively slow ("objdump -d" is 46 // very slow, for example). Moreover, a tool to examine the library may not 47 // be present on the system unless the user has installed special packages (for 48 // example, on Windows). 49 // 50 // Instead, we adopt a crude but straightforward solution: We require 51 // developers to use the macro TF_PLATFORM_STRINGS() in their library, to 52 // embed the compilation options as constant strings. The compiler's 53 // predefined macros pick which strings are included. We then search for the 54 // strings in the files, and then dlopen() only those libraries that have or 55 // lack strings as needed. 56 // 57 // We adopt the approach of placing in the binary a fairly raw copy of the 58 // predefined macros, rather than trying to interpret them in complex ways at 59 // compile time. This allows the loading binary to alter its interpretation of 60 // the strings without library developers having to recompile. 61 62 #include <stdio.h> 63 64 #include <string> 65 #include <vector> 66 67 // Aside from the header guard, the internal macros defined here have the form: 68 // TF_PLAT_STR_* 69 70 // If a macro is removed from the list of tested macros, the major version in 71 // the following version number should be incremented, and the minor version 72 // set to zero. Otherwise, if a macro is added to the list of tested macros, 73 // the minor number should be incremented. 74 #define TF_PLAT_STR_VERSION_ "1.0" 75 76 // Prefix of each option string indicator in the binary. 77 // After the prefix, such strings have the form: 78 // [A-Za-z_0-9]=<value> 79 // followed by a terminating nul. To simplify searching, this prefix is all 80 // ASCII, starts with a nul, and contains no character twice. 81 #define TF_PLAT_STR_MAGIC_PREFIX_ "\0S\\s\":^p*L}" 82 83 // A helper macro for TF_PLAT_STR_AS_STR_(). 84 #define TF_PLAT_STR_STR_1_(x) #x 85 86 // Yield a constant string corresponding to x, after macro expansion. 87 #define TF_PLAT_STR_AS_STR_(x) TF_PLAT_STR_STR_1_(x) 88 89 // An empty definition to make lists more uniform. 90 #define TF_PLAT_STR_TERMINATOR_ 91 92 // TF_PLAT_STR_(x) introduces a constant string indicating whether a 93 // particular compilation option has been turned on. 94 // 95 // In gcc and clang, we might imagine using something like 96 // #define TF_PLAT_STR_(x) \ 97 // (sizeof (#x) != sizeof (TF_PLAT_STR_AS_STR_ (x))? \ 98 // TF_PLAT_STR_MAGIC_PREFIX_ #x "=" TF_PLAT_STR_AS_STR_ (x) : \ 99 // TF_PLAT_STR_MAGIC_PREFIX_ #x "=0"), 100 // but some compilers (notably MSVC) place both "foo" and "bar" in the binary 101 // when presented with 102 // (true? "foo" : "bar") 103 // so we must use #if to select the strings we need, which is rather verbose. 104 #define TF_PLAT_STR_(x) TF_PLAT_STR_MAGIC_PREFIX_ #x "=" TF_PLAT_STR_AS_STR_(x) 105 106 // Include the #if machinery that sets the macros used below. 107 // platform_strings_computed.h can be generated by filtering this header file 108 // through: 109 // awk ' 110 // header == "" { print; } 111 // /\*\// && header == "" { 112 // print "// Generated from platform_strings.h."; 113 // print ""; 114 // print "#ifndef TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_"; 115 // print "#define TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_"; 116 // print ""; 117 // header = 1; 118 // } 119 // /^#define TF_PLAT_STR_LIST_[a-zA-Z0-9_]*\(\) *\\$/ { active = 1; } 120 // /TF_PLAT_STR_TERMINATOR_/ { active = 0; } 121 // /^ *TF_PLAT_STR_[A-Za-z0-9_]* *\\$/ && active { 122 // x = $0; 123 // sub(/^ *TF_PLAT_STR_/, "", x); 124 // sub(/ *\\$/, "", x); 125 // printf ("#if defined(%s)\n", x); 126 // printf ("#define TF_PLAT_STR_%s TF_PLAT_STR_(%s)\n", x, x); 127 // printf ("#else\n"); 128 // printf ("#define TF_PLAT_STR_%s\n", x); 129 // printf ("#endif\n"); 130 // } 131 // END { 132 // print ""; 133 // print "#endif // TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_"; 134 // }' 135 #include "tensorflow/core/platform/platform_strings_computed.h" 136 137 // clang-format butchers the following lines. 138 // clang-format off 139 140 // x86_64 and x86_32 optional features. 141 #define TF_PLAT_STR_LIST___x86_64__() \ 142 TF_PLAT_STR__M_IX86_FP \ 143 TF_PLAT_STR__NO_PREFETCHW \ 144 TF_PLAT_STR___3dNOW_A__ \ 145 TF_PLAT_STR___3dNOW__ \ 146 TF_PLAT_STR___ABM__ \ 147 TF_PLAT_STR___ADX__ \ 148 TF_PLAT_STR___AES__ \ 149 TF_PLAT_STR___AVX2__ \ 150 TF_PLAT_STR___AVX512BW__ \ 151 TF_PLAT_STR___AVX512CD__ \ 152 TF_PLAT_STR___AVX512DQ__ \ 153 TF_PLAT_STR___AVX512ER__ \ 154 TF_PLAT_STR___AVX512F__ \ 155 TF_PLAT_STR___AVX512IFMA__ \ 156 TF_PLAT_STR___AVX512PF__ \ 157 TF_PLAT_STR___AVX512VBMI__ \ 158 TF_PLAT_STR___AVX512VL__ \ 159 TF_PLAT_STR___AVX__ \ 160 TF_PLAT_STR___BMI2__ \ 161 TF_PLAT_STR___BMI__ \ 162 TF_PLAT_STR___CLFLUSHOPT__ \ 163 TF_PLAT_STR___CLZERO__ \ 164 TF_PLAT_STR___F16C__ \ 165 TF_PLAT_STR___FMA4__ \ 166 TF_PLAT_STR___FMA__ \ 167 TF_PLAT_STR___FP_FAST_FMA \ 168 TF_PLAT_STR___FP_FAST_FMAF \ 169 TF_PLAT_STR___FSGSBASE__ \ 170 TF_PLAT_STR___FXSR__ \ 171 TF_PLAT_STR___LWP__ \ 172 TF_PLAT_STR___LZCNT__ \ 173 TF_PLAT_STR___MMX__ \ 174 TF_PLAT_STR___MWAITX__ \ 175 TF_PLAT_STR___PCLMUL__ \ 176 TF_PLAT_STR___PKU__ \ 177 TF_PLAT_STR___POPCNT__ \ 178 TF_PLAT_STR___PRFCHW__ \ 179 TF_PLAT_STR___RDRND__ \ 180 TF_PLAT_STR___RDSEED__ \ 181 TF_PLAT_STR___RTM__ \ 182 TF_PLAT_STR___SHA__ \ 183 TF_PLAT_STR___SSE2_MATH__ \ 184 TF_PLAT_STR___SSE2__ \ 185 TF_PLAT_STR___SSE_MATH__ \ 186 TF_PLAT_STR___SSE__ \ 187 TF_PLAT_STR___SSE3__ \ 188 TF_PLAT_STR___SSE4A__ \ 189 TF_PLAT_STR___SSE4_1__ \ 190 TF_PLAT_STR___SSE4_2__ \ 191 TF_PLAT_STR___SSSE3__ \ 192 TF_PLAT_STR___TBM__ \ 193 TF_PLAT_STR___XOP__ \ 194 TF_PLAT_STR___XSAVEC__ \ 195 TF_PLAT_STR___XSAVEOPT__ \ 196 TF_PLAT_STR___XSAVES__ \ 197 TF_PLAT_STR___XSAVE__ \ 198 TF_PLAT_STR_TERMINATOR_ 199 200 // PowerPC (64- and 32-bit) optional features. 201 #define TF_PLAT_STR_LIST___powerpc64__() \ 202 TF_PLAT_STR__SOFT_DOUBLE \ 203 TF_PLAT_STR__SOFT_FLOAT \ 204 TF_PLAT_STR___ALTIVEC__ \ 205 TF_PLAT_STR___APPLE_ALTIVEC__ \ 206 TF_PLAT_STR___CRYPTO__ \ 207 TF_PLAT_STR___FLOAT128_HARDWARE__ \ 208 TF_PLAT_STR___FLOAT128_TYPE__ \ 209 TF_PLAT_STR___FP_FAST_FMA \ 210 TF_PLAT_STR___FP_FAST_FMAF \ 211 TF_PLAT_STR___HTM__ \ 212 TF_PLAT_STR___NO_FPRS__ \ 213 TF_PLAT_STR___NO_LWSYNC__ \ 214 TF_PLAT_STR___POWER8_VECTOR__ \ 215 TF_PLAT_STR___POWER9_VECTOR__ \ 216 TF_PLAT_STR___PPC405__ \ 217 TF_PLAT_STR___QUAD_MEMORY_ATOMIC__ \ 218 TF_PLAT_STR___RECIPF__ \ 219 TF_PLAT_STR___RECIP_PRECISION__ \ 220 TF_PLAT_STR___RECIP__ \ 221 TF_PLAT_STR___RSQRTEF__ \ 222 TF_PLAT_STR___RSQRTE__ \ 223 TF_PLAT_STR___TM_FENCE__ \ 224 TF_PLAT_STR___UPPER_REGS_DF__ \ 225 TF_PLAT_STR___UPPER_REGS_SF__ \ 226 TF_PLAT_STR___VEC__ \ 227 TF_PLAT_STR___VSX__ \ 228 TF_PLAT_STR_TERMINATOR_ 229 230 // aarch64 and 32-bit arm optional features 231 #define TF_PLAT_STR_LIST___aarch64__() \ 232 TF_PLAT_STR___ARM_ARCH \ 233 TF_PLAT_STR___ARM_FEATURE_CLZ \ 234 TF_PLAT_STR___ARM_FEATURE_CRC32 \ 235 TF_PLAT_STR___ARM_FEATURE_CRC32 \ 236 TF_PLAT_STR___ARM_FEATURE_CRYPTO \ 237 TF_PLAT_STR___ARM_FEATURE_DIRECTED_ROUNDING \ 238 TF_PLAT_STR___ARM_FEATURE_DSP \ 239 TF_PLAT_STR___ARM_FEATURE_FMA \ 240 TF_PLAT_STR___ARM_FEATURE_IDIV \ 241 TF_PLAT_STR___ARM_FEATURE_LDREX \ 242 TF_PLAT_STR___ARM_FEATURE_NUMERIC_MAXMIN \ 243 TF_PLAT_STR___ARM_FEATURE_QBIT \ 244 TF_PLAT_STR___ARM_FEATURE_QRDMX \ 245 TF_PLAT_STR___ARM_FEATURE_SAT \ 246 TF_PLAT_STR___ARM_FEATURE_SIMD32 \ 247 TF_PLAT_STR___ARM_FEATURE_UNALIGNED \ 248 TF_PLAT_STR___ARM_FP \ 249 TF_PLAT_STR___ARM_NEON_FP \ 250 TF_PLAT_STR___ARM_NEON__ \ 251 TF_PLAT_STR___ARM_WMMX \ 252 TF_PLAT_STR___IWMMXT2__ \ 253 TF_PLAT_STR___IWMMXT__ \ 254 TF_PLAT_STR___VFP_FP__ \ 255 TF_PLAT_STR_TERMINATOR_ 256 257 // Generic features, including indication of architecture and OS. 258 // The _M_* macros are defined by Visual Studio. 259 // It doesn't define __LITTLE_ENDIAN__ or __BYTE_ORDER__; 260 // Windows is assumed to be little endian. 261 #define TF_PLAT_STR_LIST___generic__() \ 262 TF_PLAT_STR_TARGET_IPHONE_SIMULATOR \ 263 TF_PLAT_STR_TARGET_OS_IOS \ 264 TF_PLAT_STR_TARGET_OS_IPHONE \ 265 TF_PLAT_STR__MSC_VER \ 266 TF_PLAT_STR__M_ARM \ 267 TF_PLAT_STR__M_ARM64 \ 268 TF_PLAT_STR__M_ARM_ARMV7VE \ 269 TF_PLAT_STR__M_ARM_FP \ 270 TF_PLAT_STR__M_IX86 \ 271 TF_PLAT_STR__M_X64 \ 272 TF_PLAT_STR__WIN32 \ 273 TF_PLAT_STR__WIN64 \ 274 TF_PLAT_STR___ANDROID__ \ 275 TF_PLAT_STR___APPLE__ \ 276 TF_PLAT_STR___BYTE_ORDER__ \ 277 TF_PLAT_STR___CYGWIN__ \ 278 TF_PLAT_STR___FreeBSD__ \ 279 TF_PLAT_STR___LITTLE_ENDIAN__ \ 280 TF_PLAT_STR___NetBSD__ \ 281 TF_PLAT_STR___OpenBSD__ \ 282 TF_PLAT_STR_____MSYS__ \ 283 TF_PLAT_STR___aarch64__ \ 284 TF_PLAT_STR___alpha__ \ 285 TF_PLAT_STR___arm__ \ 286 TF_PLAT_STR___i386__ \ 287 TF_PLAT_STR___i686__ \ 288 TF_PLAT_STR___ia64__ \ 289 TF_PLAT_STR___linux__ \ 290 TF_PLAT_STR___mips32__ \ 291 TF_PLAT_STR___mips64__ \ 292 TF_PLAT_STR___powerpc64__ \ 293 TF_PLAT_STR___powerpc__ \ 294 TF_PLAT_STR___riscv___ \ 295 TF_PLAT_STR___s390x__ \ 296 TF_PLAT_STR___sparc64__ \ 297 TF_PLAT_STR___sparc__ \ 298 TF_PLAT_STR___x86_64__ \ 299 TF_PLAT_STR_TERMINATOR_ 300 301 #if !defined(__x86_64__) && !defined(_M_X64) && \ 302 !defined(__i386__) && !defined(_M_IX86) 303 #undef TF_PLAT_STR_LIST___x86_64__ 304 #define TF_PLAT_STR_LIST___x86_64__() 305 #endif 306 #if !defined(__powerpc64__) && !defined(__powerpc__) 307 #undef TF_PLAT_STR_LIST___powerpc64__ 308 #define TF_PLAT_STR_LIST___powerpc64__() 309 #endif 310 #if !defined(__aarch64__) && !defined(_M_ARM64) && \ 311 !defined(__arm__) && !defined(_M_ARM) 312 #undef TF_PLAT_STR_LIST___aarch64__ 313 #define TF_PLAT_STR_LIST___aarch64__() 314 #endif 315 316 // Macro to be used in each dynamically loadable library. 317 // 318 // The BSS global variable tf_cpu_option_global and the class 319 // instance tf_cpu_option_avoid_omit_class are needed to prevent 320 // compilers/linkers such as clang from omitting the static variable 321 // tf_cpu_option[], which would otherwise appear to be unused. We cannot make 322 // tf_cpu_option[] global, because we then might get multiply-defined symbols 323 // if TF_PLAT_STR() is used twice in the same library. 324 // (tf_cpu_option_global doesn't see such errors because it is 325 // defined in BSS, so multiple definitions are combined by the linker.) gcc's 326 // __attribute__((used)) is insufficient because it seems to be ignored by 327 // linkers. 328 #define TF_PLATFORM_STRINGS() \ 329 static const char tf_cpu_option[] = \ 330 TF_PLAT_STR_MAGIC_PREFIX_ "TF_PLAT_STR_VERSION=" TF_PLAT_STR_VERSION_ \ 331 TF_PLAT_STR_LIST___x86_64__() \ 332 TF_PLAT_STR_LIST___powerpc64__() \ 333 TF_PLAT_STR_LIST___aarch64__() \ 334 TF_PLAT_STR_LIST___generic__() \ 335 ; \ 336 const char *tf_cpu_option_global; \ 337 namespace { \ 338 class TFCPUOptionHelper { \ 339 public: \ 340 TFCPUOptionHelper() { \ 341 /* Compilers/linkers remove unused variables aggressively. The */ \ 342 /* following gyrations subvert most such optimizations. */ \ 343 tf_cpu_option_global = tf_cpu_option; \ 344 /* Nothing is printed because the string starts with a nul. */ \ 345 printf("%s", tf_cpu_option); \ 346 } \ 347 } tf_cpu_option_avoid_omit_class; \ 348 } /* anonymous namespace */ 349 // clang-format on 350 351 namespace tensorflow { 352 353 class Status; 354 355 // Retrieves the platform strings from the file at the given path and appends 356 // them to the given vector. If the returned int is non-zero, an error occurred 357 // reading the file and vector may or may not be modified. The returned error 358 // code is suitable for use with strerror(). 359 int GetPlatformStrings(const std::string& path, 360 std::vector<std::string>* found); 361 362 } // namespace tensorflow 363 364 #endif // TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_ 365