1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_
17 #define TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_
18 #ifdef INTEL_MKL
19
20 #include "absl/base/call_once.h"
21 #include "tensorflow/core/framework/op_kernel.h"
22 #include "tensorflow/core/framework/types.pb.h"
23 #include "tensorflow/core/graph/graph.h"
24 #include "tensorflow/core/lib/core/status.h"
25 #include "tensorflow/core/platform/cpu_info.h"
26 #include "tensorflow/core/util/env_var.h"
27
28 namespace tensorflow {
29 // Since our ops are going to produce and also consume N addition tensors
30 // (Mkl) for N Tensorflow tensors, we can have following different
31 // orderings among these 2N tensors.
32 //
33 // E.g., for Tensorflow tensors A, B, and C, our ops will produce and
34 // consume A_m, B_m, and C_m additionally.
35 //
36 // INTERLEAVED: in this case 2N tensors are interleaved. So for above
37 // example, the ordering looks like: A, A_m, B, B_m, C, C_m.
38 //
39 // CONTIGUOUS: in thi case N Tensorflow tensors are contiguous followed
40 // by N Mkl tensors. So for above example, the ordering looks
41 // like: A, B, C, A_m, B_m, C_m
42 //
43 // Following APIs map index of original Tensorflow tensors to their
44 // appropriate position based on selected ordering. For contiguous ordering,
45 // we need to know the total number of tensors (parameter total).
46 //
47 typedef enum { TENSORS_INTERLEAVED, TENSORS_CONTIGUOUS } MklTfTensorOrdering;
48 // NOTE: Currently, we use contiguous ordering. If you change this, then you
49 // would need to change Mkl op definitions in nn_ops.cc.
50 static const MklTfTensorOrdering kTensorOrdering = TENSORS_CONTIGUOUS;
51
52 // Get index of MetaData tensor from index 'n' of Data tensor.
DataIndexToMetaDataIndex(int n,int total_tensors)53 inline int DataIndexToMetaDataIndex(int n, int total_tensors) {
54 if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
55 // For interleaved ordering, Mkl tensor follows immediately after
56 // Tensorflow tensor.
57 return n + 1;
58 } else {
59 CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
60 // For contiguous ordering, Mkl tensor is n+total_tensors / 2 away.
61 return n + total_tensors / 2;
62 }
63 }
64
GetTensorDataIndex(int n,int total_tensors)65 int inline GetTensorDataIndex(int n, int total_tensors) {
66 if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) {
67 return 2 * n; // index corresponding to nth input/output tensor
68 } else {
69 CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS);
70 return n;
71 }
72 }
73
GetTensorMetaDataIndex(int n,int total_tensors)74 int inline GetTensorMetaDataIndex(int n, int total_tensors) {
75 // Get index for TensorData first and then use mapping function
76 // to get TensorMetaData index from TensorData index.
77 int tidx = GetTensorDataIndex(n, total_tensors);
78 return DataIndexToMetaDataIndex(tidx, total_tensors);
79 }
80
81 // check if the control between src and dst nodes already exists
DoesControlEdgeExist(const Node * src,const Node * dst)82 bool inline DoesControlEdgeExist(const Node* src, const Node* dst) {
83 for (const Edge* edge : src->out_edges()) {
84 if (edge->IsControlEdge() && edge->dst() == dst) {
85 return true;
86 }
87 }
88 return false;
89 }
90
91 // Check if graph should run in layout-dependent mode or native format mode
92 // based on environment variable setting. Native format mode is default. User
93 // can set TF_ENABLE_MKL_NATIVE_FORMAT=0 to disable the native format mode.
NativeFormatEnabled()94 bool inline NativeFormatEnabled() {
95 #ifndef ENABLE_MKL
96 return true;
97 #else
98 static bool native_fmt_enabled = true;
99 static absl::once_flag once;
100 absl::call_once(once, [&] {
101 TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MKL_NATIVE_FORMAT",
102 /*default_value*/ true,
103 &native_fmt_enabled));
104 });
105 return native_fmt_enabled;
106 #endif
107 }
108
109 // Check if the data_format attribute in the node def represents 5D tensor
Check5DFormat(const NodeDef & ndef)110 bool inline Check5DFormat(const NodeDef& ndef) {
111 string data_format;
112 TF_CHECK_OK(GetNodeAttr(ndef, "data_format", &data_format));
113 if (data_format.compare("NCDHW") == 0 || data_format.compare("NDHWC") == 0) {
114 return true;
115 }
116 return false;
117 }
118
119 namespace mkl_op_registry {
120 // MKL operators whose kernels are registered with 'MklLayoutDependentOp' label
121 // (e.g., MklConv2D) understand input tensors in MKL layout. These operators
122 // get additional meta-tensors for actual input tensors.
123 static const char* kMklLayoutDependentOpLabel = "MklLayoutDependentOp";
124 static const char* kMklLayoutDependentOpLabelPattern =
125 "label='MklLayoutDependentOp'";
126 // MKL operators whose kernels are registered with 'MklNameChangeOp' label
127 // (e.g., MklMatMul, MklTranspose) do not understand input tensors in MKL
128 // layout. These operators do not get additional meta-tensors. The signatures of
129 // these operators are the same as the original TensorFlow operators that they
130 // correspond to. So these ops just go through a name change during graph
131 // rewrite pass.
132 static const char* kMklNameChangeOpLabel = "MklNameChangeOp";
133 static const char* kMklNameChangeOpLabelPattern = "label='MklNameChangeOp'";
134 static const char* kMklQuantizedOpLabel = "QuantizedMklOp";
135 static const char* kMklQuantizedOpLabelPattern = "label='QuantizedMklOp'";
136
137 // Prefix that we add to Tensorflow op name to construct Mkl op name.
138 static const char* const kMklOpPrefix = "_Mkl";
139 // TODO(intel-tf): PR review feedback (penpornk)
140 // Can we add eager_mode (or is_eager) as an op attribute instead?
141 // This way we don't need to rename the op just to pass eager_mode
142 // through template parameter.
143 static const char* const kMklEagerOpPrefix = "_MklEager";
144
145 // Prefix that we add to TF op name to construct MKL op that does not
146 // depend on layout propagation. It will be used in both Eager and graph
147 // modes unless there is a reason to have additional op name with
148 // _MklEager prefix.
149 static const char* const kMklNativeOpPrefix = "_MklNative";
150
151 // Get the name of Mkl Native (does not depend on layout propagation) op
152 // from original TensorFlow op.
GetMklNativeOpName(const string & name)153 inline string GetMklNativeOpName(const string& name) {
154 // There are few operators that don't depend on layout propagation but are
155 // prefixed with _Mkl instead of _MklNative.
156 bool result =
157 (0 == name.compare("ConjugateTranspose") ||
158 0 == name.compare("BatchMatMul") || 0 == name.compare("BatchMatMulV2") ||
159 0 == name.compare("Einsum") || 0 == name.compare("MatMul") ||
160 0 == name.compare("Transpose") || 0 == name.compare("QuantizeV2") ||
161 0 == name.compare("Dequantize") || 0 == name.rfind("Quantized", 0));
162
163 if (result) {
164 return string(kMklOpPrefix) + name;
165 } else {
166 return string(kMklNativeOpPrefix) + name;
167 }
168 }
169
170 // Get the name of Mkl op from original TensorFlow op
171 // We prefix the original op with _Mkl or _MklNative to get Mkl op.
GetMklOpName(const string & name)172 inline string GetMklOpName(const string& name) {
173 if (!NativeFormatEnabled()) {
174 return string(kMklOpPrefix) + name;
175 } else {
176 return GetMklNativeOpName(name);
177 }
178 }
179
180 // Get the name of Mkl Eager op from original TensorFlow op
181 // We prefix 'MklEager' to the original op to get Mkl Eager op.
GetMklEagerOpName(const string & name)182 inline string GetMklEagerOpName(const string& name) {
183 return string(kMklEagerOpPrefix) + name;
184 }
185
IsBF16SupportedByOneDNNOnThisCPU()186 static inline bool IsBF16SupportedByOneDNNOnThisCPU() {
187 return port::TestCPUFeature(port::CPUFeature::AVX512F);
188 }
189
BF16UnsupportedWarning()190 static inline void BF16UnsupportedWarning() {
191 static absl::once_flag cpu_bfloat16_warn_once_flag;
192 absl::call_once(cpu_bfloat16_warn_once_flag, [] {
193 LOG(ERROR) << "oneDNN BFloat16 support are only on platforms with AVX512. "
194 "Falling back to default implementation if present.";
195 });
196 }
197
198 // Check whether opname with type T is registered as MKL operator
199 // that can accept input tensors in MKL layout.
200 //
201 // @input: name of the op
202 // @input: T datatype to be used for checking op
203 // @return: true if opname is registered as Mkl-layout dependent op;
204 // false otherwise
IsMklLayoutDependentOp(const string & op_name,DataType T)205 static inline bool IsMklLayoutDependentOp(const string& op_name, DataType T) {
206 string kernel = KernelsRegisteredForOp(op_name);
207
208 // Restrict regular ops to FLOAT and BFLOAT16
209 if (kernel.find(kMklLayoutDependentOpLabelPattern) != string::npos) {
210 if (T == DT_FLOAT) return true;
211 if (T == DT_BFLOAT16) {
212 if (IsBF16SupportedByOneDNNOnThisCPU()) {
213 return true;
214 } else {
215 // Restrict bfloat16 ops to platforms with at least AVX512 support, fall
216 // back to Eigen implementation otherwise.
217 BF16UnsupportedWarning();
218 return false;
219 }
220 }
221 return false;
222 }
223 return false;
224 }
225
226 // TODO(mdfaijul): QuantizedConv2D is registered with input: QUINT8
227 // filter:QINT8 for mkldnn integration. First a dummy kernel is created
228 // and then it is replaced by an actual kernel.
IsMklQuantizedOp(const string & op_name,DataType Tinput,DataType Tfilter)229 static inline bool IsMklQuantizedOp(const string& op_name, DataType Tinput,
230 DataType Tfilter) {
231 string kernel = KernelsRegisteredForOp(op_name);
232
233 // Restrict quantized ops to QUINT8 and QINT8 for now
234 if (kernel.find(kMklQuantizedOpLabelPattern) != string::npos) {
235 return (Tfilter == DT_QINT8);
236 }
237 return false;
238 }
239
240 // Check whether opname with type T is registered as an MKL operator that
241 // will go through name change.
242 //
243 // @input: name of the op
244 // @input: T datatype to be used for checking op
245 // @return: true if opname is registered as MKL op that will go through name
246 // change; false otherwise
IsMklNameChangeOp(const string & op_name,DataType T)247 static inline bool IsMklNameChangeOp(const string& op_name, DataType T) {
248 string kernel = KernelsRegisteredForOp(op_name);
249 // String returned by KernelsRegisteredForOp looks like below:
250 //
251 // Op = _MklMatMul, kernels =
252 // device='CPU'; label='MklNameChangeOp'; T in [DT_COMPLEX128]
253 // device='CPU'; label='MklNameChangeOp'; T in [DT_COMPLEX64]
254 // device='CPU'; label='MklNameChangeOp'; T in [DT_DOUBLE]
255 // device='CPU'; label='MklNameChangeOp'; T in [DT_FLOAT]
256
257 if (kernel.find(kMklQuantizedOpLabelPattern) != string::npos) {
258 // Restrict quantized ops to QUINT8, QINT8 and DT_QINT32
259 return (T == DT_QUINT8 || T == DT_QINT8 || T == DT_QINT32);
260 }
261
262 // Now we just construct a search string to match what we are looking for.
263 string search_string = kMklNameChangeOpLabelPattern;
264 search_string += string(";") + string(" T in [");
265 search_string += DataType_Name(T) + string("]");
266
267 // Temporarily replacing earlier check by adding a type-specific check so
268 // that we can selectively decide which type is supported by MKL operators.
269 // That way kernel registration does not decide which operators we support.
270 // We are using this change to temporarily disable BFLOAT16 support. Once
271 // we want to enable it, we will go back to earlier check.
272 bool isTypeAllowed = false;
273 if (kernel.find(search_string) != string::npos) {
274 isTypeAllowed = (T == DT_COMPLEX128 || T == DT_COMPLEX64 ||
275 T == DT_DOUBLE || T == DT_FLOAT);
276 if (!isTypeAllowed) {
277 if (T == DT_BFLOAT16) {
278 if (IsBF16SupportedByOneDNNOnThisCPU()) {
279 isTypeAllowed = true;
280 } else {
281 // Restrict bfloat16 ops to platforms with at least AVX512 support,
282 // fall back to Eigen implementation otherwise.
283 BF16UnsupportedWarning();
284 isTypeAllowed = false;
285 }
286 }
287 }
288 return isTypeAllowed;
289 }
290 return false;
291 }
292
293 // Check if the operator with 'op_name' and type 'T' is an MKL operator that
294 // will either understand input tensors in MKL layout or will go through name
295 // rewrite that some operators go through.
IsMklOp(const string & op_name,DataType T)296 static inline bool IsMklOp(const string& op_name, DataType T) {
297 return IsMklLayoutDependentOp(op_name, T) || IsMklNameChangeOp(op_name, T);
298 }
299
IsMklOp(const Node * n)300 static inline bool IsMklOp(const Node* n) {
301 DataType T;
302 return GetNodeAttr(n->def(), "T", &T).ok() && IsMklOp(n->type_string(), T);
303 }
304
305 // Check whether opname with type T is registered as MKL-compliant and
306 // is element-wise.
307 //
308 // @input: name of the op
309 // @input: T datatype to be used for checking op
310 // @return: true if opname is registered as element-wise Mkl op;
311 // false otherwise
IsMklElementWiseOp(const string & op_name,DataType T)312 static inline bool IsMklElementWiseOp(const string& op_name, DataType T) {
313 if (!IsMklOp(op_name, T)) {
314 return false;
315 }
316 bool result = (0 == op_name.compare(GetMklOpName("Add")) ||
317 0 == op_name.compare(GetMklOpName("AddV2")) ||
318 0 == op_name.compare(GetMklOpName("Sub")) ||
319 0 == op_name.compare(GetMklOpName("Mul")) ||
320 0 == op_name.compare(GetMklOpName("Maximum")) ||
321 0 == op_name.compare(GetMklOpName("SquaredDifference")));
322
323 return result;
324 }
325 } // namespace mkl_op_registry
326 } // namespace tensorflow
327 #endif // INTEL_MKL
328 #endif // TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_
329