• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/platform/cpu_feature_guard.h"
17 
18 #include <mutex>
19 #include <string>
20 
21 #include "absl/base/call_once.h"
22 #include "tensorflow/core/platform/byte_order.h"
23 #include "tensorflow/core/platform/cpu_info.h"
24 #include "tensorflow/core/platform/logging.h"
25 
26 namespace tensorflow {
27 namespace port {
28 namespace {
29 
30 // If the CPU feature isn't present, log a fatal error.
CheckFeatureOrDie(CPUFeature feature,const string & feature_name)31 void CheckFeatureOrDie(CPUFeature feature, const string& feature_name) {
32   if (!TestCPUFeature(feature)) {
33 #ifdef __ANDROID__
34     // Some Android emulators seem to indicate they don't support SSE, so to
35     // avoid crashes when testing, switch this to a warning.
36     LOG(WARNING)
37 #else
38     LOG(FATAL)
39 #endif
40         << "The TensorFlow library was compiled to use " << feature_name
41         << " instructions, but these aren't available on your machine.";
42   }
43 }
44 
45 // Check if CPU feature is included in the TensorFlow binary.
CheckIfFeatureUnused(CPUFeature feature,const string & feature_name,string & missing_instructions)46 void CheckIfFeatureUnused(CPUFeature feature, const string& feature_name,
47                           string& missing_instructions) {
48   if (TestCPUFeature(feature)) {
49     missing_instructions.append(" ");
50     missing_instructions.append(feature_name);
51   }
52 }
53 
54 // Raises an error if the binary has been compiled for a CPU feature (like AVX)
55 // that isn't available on the current machine. It also warns of performance
56 // loss if there's a feature available that's not being used.
57 // Depending on the compiler and initialization order, a SIGILL exception may
58 // occur before this code is reached, but this at least offers a chance to give
59 // a more meaningful error message.
60 class CPUFeatureGuard {
61  public:
CPUFeatureGuard()62   CPUFeatureGuard() {
63 #ifdef __SSE__
64     CheckFeatureOrDie(CPUFeature::SSE, "SSE");
65 #endif  // __SSE__
66 #ifdef __SSE2__
67     CheckFeatureOrDie(CPUFeature::SSE2, "SSE2");
68 #endif  // __SSE2__
69 #ifdef __SSE3__
70     CheckFeatureOrDie(CPUFeature::SSE3, "SSE3");
71 #endif  // __SSE3__
72 #ifdef __SSE4_1__
73     CheckFeatureOrDie(CPUFeature::SSE4_1, "SSE4.1");
74 #endif  // __SSE4_1__
75 #ifdef __SSE4_2__
76     CheckFeatureOrDie(CPUFeature::SSE4_2, "SSE4.2");
77 #endif  // __SSE4_2__
78 #ifdef __AVX__
79     CheckFeatureOrDie(CPUFeature::AVX, "AVX");
80 #endif  // __AVX__
81 #ifdef __AVX2__
82     CheckFeatureOrDie(CPUFeature::AVX2, "AVX2");
83 #endif  // __AVX2__
84 #ifdef __AVX512F__
85     CheckFeatureOrDie(CPUFeature::AVX512F, "AVX512F");
86 #endif  // __AVX512F__
87 #ifdef __FMA__
88     CheckFeatureOrDie(CPUFeature::FMA, "FMA");
89 #endif  // __FMA__
90   }
91 };
92 
93 CPUFeatureGuard g_cpu_feature_guard_singleton;
94 
95 absl::once_flag g_cpu_feature_guard_warn_once_flag;
96 
97 }  // namespace
98 
InfoAboutUnusedCPUFeatures()99 void InfoAboutUnusedCPUFeatures() {
100   absl::call_once(g_cpu_feature_guard_warn_once_flag, [] {
101     string missing_instructions;
102 #if defined(_MSC_VER) && !defined(__clang__)
103 
104 #ifndef __AVX__
105     CheckIfFeatureUnused(CPUFeature::AVX, "AVX", missing_instructions);
106 #endif  // __AVX__
107 #ifndef __AVX2__
108     CheckIfFeatureUnused(CPUFeature::AVX2, "AVX2", missing_instructions);
109 #endif  // __AVX2__
110 
111 #else  // if defined(_MSC_VER) && !defined(__clang__)
112 
113 #ifndef __SSE__
114     CheckIfFeatureUnused(CPUFeature::SSE, "SSE", missing_instructions);
115 #endif  // __SSE__
116 #ifndef __SSE2__
117     CheckIfFeatureUnused(CPUFeature::SSE2, "SSE2", missing_instructions);
118 #endif  // __SSE2__
119 #ifndef __SSE3__
120     CheckIfFeatureUnused(CPUFeature::SSE3, "SSE3", missing_instructions);
121 #endif  // __SSE3__
122 #ifndef __SSE4_1__
123     CheckIfFeatureUnused(CPUFeature::SSE4_1, "SSE4.1", missing_instructions);
124 #endif  // __SSE4_1__
125 #ifndef __SSE4_2__
126     CheckIfFeatureUnused(CPUFeature::SSE4_2, "SSE4.2", missing_instructions);
127 #endif  // __SSE4_2__
128 #ifndef __AVX__
129     CheckIfFeatureUnused(CPUFeature::AVX, "AVX", missing_instructions);
130 #endif  // __AVX__
131 #ifndef __AVX2__
132     CheckIfFeatureUnused(CPUFeature::AVX2, "AVX2", missing_instructions);
133 #endif  // __AVX2__
134 #ifndef __AVX512F__
135     CheckIfFeatureUnused(CPUFeature::AVX512F, "AVX512F", missing_instructions);
136 #endif  // __AVX512F__
137 #ifndef __FMA__
138     CheckIfFeatureUnused(CPUFeature::FMA, "FMA", missing_instructions);
139 #endif  // __FMA__
140 #endif  // else of if defined(_MSC_VER) && !defined(__clang__)
141     if (!missing_instructions.empty()) {
142 #ifndef INTEL_MKL
143       LOG(INFO) << "Your CPU supports instructions that this TensorFlow "
144                 << "binary was not compiled to use:" << missing_instructions;
145 #else
146       LOG(INFO) << "This TensorFlow binary is optimized with Intel(R) MKL-DNN "
147                 << "to use the following CPU instructions in performance "
148                 << "critical operations: " << missing_instructions << std::endl
149                 << "To enable them in non-MKL-DNN operations, rebuild "
150                 << "TensorFlow with the appropriate compiler flags.";
151 #endif
152     }
153   });
154 }
155 
156 }  // namespace port
157 }  // namespace tensorflow
158