• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/stream_executor/dnn.h"
17 
18 #include "tensorflow/stream_executor/lib/strcat.h"
19 #include "tensorflow/stream_executor/lib/stringprintf.h"
20 
21 namespace perftools {
22 namespace gputools {
23 namespace dnn {
24 
GetConvolveAlgorithms(bool with_winograd_nonfused,int cc_major,int cc_minor,std::vector<AlgorithmDesc> * out_algorithms)25 bool DnnSupport::GetConvolveAlgorithms(
26     bool with_winograd_nonfused, int cc_major, int cc_minor,
27     std::vector<AlgorithmDesc>* out_algorithms) {
28   return false;
29 }
30 
GetConvolveBackwardDataAlgorithms(bool with_winograd_nonfused,int cc_major,int cc_minor,std::vector<AlgorithmDesc> * out_algorithms)31 bool DnnSupport::GetConvolveBackwardDataAlgorithms(
32     bool with_winograd_nonfused, int cc_major, int cc_minor,
33     std::vector<AlgorithmDesc>* out_algorithms) {
34   return false;
35 }
36 
GetConvolveBackwardFilterAlgorithms(bool with_winograd_nonfused,int cc_major,int cc_minor,std::vector<AlgorithmDesc> * out_algorithms)37 bool DnnSupport::GetConvolveBackwardFilterAlgorithms(
38     bool with_winograd_nonfused, int cc_major, int cc_minor,
39     std::vector<AlgorithmDesc>* out_algorithms) {
40   return false;
41 }
42 
QuantizedActivationModeString(QuantizedActivationMode mode)43 string QuantizedActivationModeString(QuantizedActivationMode mode) {
44   switch (mode) {
45     case dnn::QuantizedActivationMode::k8Bit:
46       return "uint8";
47     case dnn::QuantizedActivationMode::k16Bit:
48       return "uint16";
49     case dnn::QuantizedActivationMode::k32Bit:
50       return "int32";
51     default:
52       LOG(FATAL) << "Unknown quantized_activation_mode "
53                  << static_cast<int32>(mode);
54   }
55   return "unknown quantized_activation_mode";
56 }
57 
ActivationModeString(ActivationMode mode)58 string ActivationModeString(ActivationMode mode) {
59   switch (mode) {
60     case ActivationMode::kSigmoid:
61       return "sigmoid";
62     case ActivationMode::kRelu:
63       return "relu";
64     case ActivationMode::kRelu6:
65       return "relu6";
66     case ActivationMode::kReluX:
67       return "reluX";
68     case ActivationMode::kTanh:
69       return "tanh";
70     case ActivationMode::kBandPass:
71       return "bandpass";
72     default:
73       LOG(FATAL) << "Unknown activation_mode " << static_cast<int32>(mode);
74   }
75   return "unknown activation_mode";
76 }
77 
ElementwiseOperationString(ElementwiseOperation op)78 string ElementwiseOperationString(ElementwiseOperation op) {
79   switch (op) {
80     case ElementwiseOperation::kAdd:
81       return "add";
82     case ElementwiseOperation::kMultiply:
83       return "multiply";
84     default:
85       LOG(FATAL) << "Unknown elementwise op " << static_cast<int32>(op);
86   }
87   return "unknown element wise op";
88 }
89 
DataLayoutString(DataLayout layout)90 string DataLayoutString(DataLayout layout) {
91   switch (layout) {
92     case DataLayout::kYXDepthBatch:
93       return "YXDepthBatch";
94     case DataLayout::kYXBatchDepth:
95       return "YXBatchDepth";
96     case DataLayout::kBatchYXDepth:
97       return "BatchYXDepth";
98     case DataLayout::kBatchDepthYX:
99       return "BatchDepthYX";
100     case DataLayout::kBatchDepthYX4:
101       return "BatchDepthYX4";
102     default:
103       LOG(FATAL) << "Unknown data layout " << static_cast<int32>(layout);
104   }
105   return "unknown data layout";
106 }
107 
FilterLayoutString(FilterLayout layout)108 string FilterLayoutString(FilterLayout layout) {
109   switch (layout) {
110     case FilterLayout::kOutputInputYX:
111       return "OutputInputYX";
112     case FilterLayout::kOutputInputYX4:
113       return "OutputInputYX4";
114     case FilterLayout::kInputYXOutput:
115       return "InputYXOutput";
116     case FilterLayout::kYXInputOutput:
117       return "YXInputOutput";
118     default:
119       LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(layout);
120   }
121   return "unknown filter layout";
122 }
123 
PadAlignmentString(PadAlignment alignment)124 string PadAlignmentString(PadAlignment alignment) {
125   switch (alignment) {
126     case PadAlignment::kDefault:
127       return "default";
128     case PadAlignment::kCudnnPadding:
129       return "cuDNN padding";
130     case PadAlignment::kTensorFlowPadding:
131       return "TensorFlow padding";
132   }
133   return "unknown pad alignment";
134 }
135 
ShortPoolingModeString(PoolingMode mode)136 string ShortPoolingModeString(PoolingMode mode) {
137   switch (mode) {
138     case PoolingMode::kMaximum:
139       return "Max";
140     case PoolingMode::kAverage:
141       return "Avg";
142     default:
143       LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(mode);
144   }
145   return "unknown filter layout";
146 }
147 
GetDimIndices(const DataLayout & layout,const int data_dims)148 std::tuple<int, int, int> GetDimIndices(const DataLayout& layout,
149                                         const int data_dims) {
150   int depth_idx, batch_idx, spatial_idx;
151   switch (layout) {
152     case DataLayout::kYXBatchDepth:
153       depth_idx = data_dims - 1;
154       batch_idx = data_dims - 2;
155       spatial_idx = 0;
156       break;
157 
158     case DataLayout::kYXDepthBatch:
159       depth_idx = data_dims - 2;
160       batch_idx = data_dims - 1;
161       spatial_idx = 0;
162       break;
163 
164     case DataLayout::kBatchYXDepth:
165       depth_idx = data_dims - 1;
166       batch_idx = 0;
167       spatial_idx = 1;
168       break;
169 
170     case DataLayout::kBatchDepthYX:
171     case DataLayout::kBatchDepthYX4:
172       depth_idx = 1;
173       batch_idx = 0;
174       spatial_idx = 2;
175       break;
176   }
177 
178   return std::make_tuple(depth_idx, batch_idx, spatial_idx);
179 }
180 
ReorderDims(const std::vector<int64> & input,const DataLayout & from,const DataLayout & to)181 std::vector<int64> ReorderDims(const std::vector<int64>& input,
182                                const DataLayout& from, const DataLayout& to) {
183   if (from == to) return input;
184 
185   int d_idx_from, b_idx_from, spatial_idx_from;
186   int d_idx_to, b_idx_to, spatial_idx_to;
187 
188   std::tie(d_idx_from, b_idx_from, spatial_idx_from) =
189       GetDimIndices(from, input.size());
190   std::tie(d_idx_to, b_idx_to, spatial_idx_to) =
191       GetDimIndices(to, input.size());
192 
193   std::vector<int64> reordered(input.size());
194   reordered[b_idx_to] = input[b_idx_from];
195   reordered[d_idx_to] = input[d_idx_from];
196 
197   for (size_t i = 0; i < input.size() - 2;
198        i++, spatial_idx_from++, spatial_idx_to++) {
199     reordered[spatial_idx_to] = input[spatial_idx_from];
200   }
201 
202   return reordered;
203 }
204 
205 // -- AlgorithmConfig
206 
ToString() const207 string AlgorithmConfig::ToString() const {
208   return port::StrCat(algorithm_.algo_id(), ", ",
209                       algorithm_no_scratch_.algo_id());
210 }
211 
212 // -- BatchDescriptor
213 
BatchDescriptor(int ndims)214 BatchDescriptor::BatchDescriptor(int ndims)
215     : count_(0),
216       feature_map_count_(0),
217       spatial_size_(ndims, 0),
218       value_max_(0.0),
219       value_min_(0.0),
220       layout_(DataLayout::kYXDepthBatch),
221       ndims_(ndims),
222       quantized_activation_mode_(QuantizedActivationMode::k8Bit) {}
223 
BatchDescriptor()224 BatchDescriptor::BatchDescriptor() : BatchDescriptor(/*ndims=*/2) {}
225 
full_dims(const DataLayout & layout) const226 std::vector<int64> BatchDescriptor::full_dims(const DataLayout& layout) const {
227   std::vector<int64> bdyx_dims(ndims_ + 2);
228   bdyx_dims[0] = count();
229   bdyx_dims[1] = feature_map_count();
230   std::copy(spatial_size_.begin(), spatial_size_.end(), bdyx_dims.begin() + 2);
231   return ReorderDims(bdyx_dims, DataLayout::kBatchDepthYX, layout);
232 }
233 
full_strides(const DataLayout & layout) const234 std::vector<int64> BatchDescriptor::full_strides(
235     const DataLayout& layout) const {
236   if (layout_ == DataLayout::kBatchDepthYX4) {
237     LOG(FATAL)
238         << "Cannot compute full strides for batch descriptor " << ToString()
239         << ", because its layout is kBatchDepthYX4. In fact, "
240            "cudnnSetTensorNdDescriptor doesn't work for kBatchDepthYX4 at all. "
241            "Use cudnnSetTensor4DDescriptor to set cudnnTensorDescriptor_t "
242            "instead.";
243   }
244   std::vector<int64> phys_dims = full_dims(layout_);
245   std::vector<int64> phys_strides(phys_dims.size());
246   phys_strides[ndims_ + 1] = 1;
247   for (int i = ndims_; i >= 0; i--) {
248     phys_strides[i] = phys_strides[i + 1] * phys_dims[i + 1];
249   }
250   return ReorderDims(phys_strides, layout_, layout);
251 }
252 
CloneFrom(const BatchDescriptor & other)253 void BatchDescriptor::CloneFrom(const BatchDescriptor& other) {
254   count_ = other.count_;
255   feature_map_count_ = other.feature_map_count_;
256   spatial_size_ = other.spatial_size_;
257   value_max_ = other.value_max_;
258   value_min_ = other.value_min_;
259   layout_ = other.layout_;
260   ndims_ = other.ndims_;
261   quantized_activation_mode_ = other.quantized_activation_mode_;
262 }
263 
ToString() const264 string BatchDescriptor::ToString() const {
265   string spatial;
266   for (int i = 0; i < ndims_; i++) {
267     port::Appendf(&spatial, "%lld ", spatial_size_[i]);
268   }
269   return port::Printf(
270       "{count: %lld feature_map_count: %lld spatial: %s "
271       "value_min: %f value_max: %f layout: %s}",
272       count_, feature_map_count_, spatial.c_str(), value_min_, value_max_,
273       DataLayoutString(layout_).c_str());
274 }
275 
ToShortString() const276 string BatchDescriptor::ToShortString() const {
277   // All the constituent strings are less than 15 characters, so the
278   // small string optimization ensures that there will be at most one
279   // heap memory allocation.
280   string depth = port::StrCat("d", feature_map_count());
281   string batch = port::StrCat("b", count());
282 
283   string spatial = "s";
284   for (int i = 0; i < ndims_; i++) {
285     port::Appendf(&spatial, "%lld ", spatial_size_[i]);
286   }
287 
288   string suffix;
289   if (value_min() != value_max()) {
290     port::StrAppend(&suffix, "[", value_min(), ";", value_max(), "]");
291   }
292   if (quantized_activation_mode() == QuantizedActivationMode::k16Bit) {
293     suffix += "_16bit";
294   }
295 
296   switch (layout()) {
297     case DataLayout::kYXDepthBatch:
298       return port::StrCat(spatial, depth, batch, suffix);
299     case DataLayout::kYXBatchDepth:
300       return port::StrCat(spatial, batch, depth, suffix);
301     case DataLayout::kBatchYXDepth:
302       return port::StrCat(batch, spatial, depth, suffix);
303     case DataLayout::kBatchDepthYX:
304       return port::StrCat(batch, depth, spatial, suffix);
305     case DataLayout::kBatchDepthYX4:
306       return port::StrCat(batch, depth, spatial, suffix, "(VECT_C)");
307     default:
308       LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout());
309       return "";  // Avoid return warning (unreachable)
310   }
311 }
312 
NodesPerFeatureMap() const313 int64 BatchDescriptor::NodesPerFeatureMap() const {
314   int64 ret = 1;
315   for (int i = 0; i < ndims_; i++) {
316     ret *= spatial_size_[i];
317   }
318   return ret;
319 }
320 
NodesAcrossFeatureMaps() const321 int64 BatchDescriptor::NodesAcrossFeatureMaps() const {
322   return NodesPerFeatureMap() * feature_map_count_;
323 }
324 
ElementCount() const325 int64 BatchDescriptor::ElementCount() const {
326   return count_ * feature_map_count_ * NodesPerFeatureMap();
327 }
328 
FullyConnectedWeightCount(const BatchDescriptor & input,const BatchDescriptor & output)329 int64 BatchDescriptor::FullyConnectedWeightCount(
330     const BatchDescriptor& input, const BatchDescriptor& output) {
331   return input.NodesAcrossFeatureMaps() * output.NodesAcrossFeatureMaps();
332 }
333 
FullyConnectedBiasCount(const BatchDescriptor & output)334 int64 BatchDescriptor::FullyConnectedBiasCount(const BatchDescriptor& output) {
335   return output.NodesAcrossFeatureMaps();
336 }
337 
DepthConcatenateOutputDescriptor(port::ArraySlice<dnn::BatchDescriptor> inputs)338 BatchDescriptor BatchDescriptor::DepthConcatenateOutputDescriptor(
339     port::ArraySlice<dnn::BatchDescriptor> inputs) {
340   if (inputs.empty()) {
341     return BatchDescriptor();
342   }
343   int feature_map_count = 0;
344   for (const auto& dimensions : inputs) {
345     feature_map_count += dimensions.feature_map_count();
346   }
347   BatchDescriptor output = inputs[0];
348   output.set_feature_map_count(feature_map_count);
349   return output;
350 }
351 
352 // -- FilterDescriptor
353 
FilterDescriptor(int ndims)354 FilterDescriptor::FilterDescriptor(int ndims)
355     : output_feature_map_count_(0),
356       input_feature_map_count_(0),
357       input_filter_dims_(ndims, 0),
358       ndims_(ndims),
359       layout_(FilterLayout::kOutputInputYX) {}
360 
FilterDescriptor()361 FilterDescriptor::FilterDescriptor() : FilterDescriptor(/*ndims=*/2) {}
362 
~FilterDescriptor()363 FilterDescriptor::~FilterDescriptor() {}
364 
CloneFrom(const FilterDescriptor & other)365 void FilterDescriptor::CloneFrom(const FilterDescriptor& other) {
366   set_output_feature_map_count(other.output_feature_map_count())
367       .set_input_feature_map_count(other.input_feature_map_count())
368       .set_layout(other.layout());
369   input_filter_dims_ = other.input_filter_dims_;
370   ndims_ = other.ndims_;
371 }
372 
ToString() const373 string FilterDescriptor::ToString() const {
374   string desc = port::Printf(
375       "{output_feature_map_count: %lld input_feature_map_count: %lld "
376       "layout: %s shape: ",
377       output_feature_map_count_, input_feature_map_count_,
378       FilterLayoutString(layout_).c_str());
379   for (int i = 0; i < ndims_; i++) {
380     port::Appendf(&desc, "%lld ", input_filter_dims_[i]);
381   }
382   port::StrAppend(&desc, "}");
383 
384   return desc;
385 }
386 
ToShortString() const387 string FilterDescriptor::ToShortString() const {
388   // All the constituent strings are less than 15 characters, so the
389   // small string optimization ensures that there will be at most one
390   // heap memory allocation.
391   string od = port::StrCat("od", output_feature_map_count_);
392   string id = port::StrCat("id", input_feature_map_count_);
393 
394   string spatial = "s";
395   for (int i = 0; i < ndims_; i++) {
396     port::Appendf(&spatial, "%lld ", input_filter_dims_[i]);
397   }
398 
399   switch (layout_) {
400     case FilterLayout::kOutputInputYX:
401       return port::StrCat(od, id, spatial);
402     case FilterLayout::kOutputInputYX4:
403       return port::StrCat(od, id, spatial, "(VECT_C)");
404     case FilterLayout::kInputYXOutput:
405       return port::StrCat(id, spatial, od);
406     case FilterLayout::kYXInputOutput:
407       return port::StrCat(spatial, id, od);
408     default:
409       LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout_);
410       return "";  // Avoid return warning (unreachable)
411   }
412 }
413 
ComputeWeightCount() const414 int64 FilterDescriptor::ComputeWeightCount() const {
415   int64 ret = output_feature_map_count_ * input_feature_map_count_;
416   for (int i = 0; i < ndims_; i++) {
417     ret *= input_filter_dims_[i];
418   }
419   return ret;
420 }
421 
422 // -- ConvolutionDescriptor
423 
ConvolutionDescriptor(int ndims)424 ConvolutionDescriptor::ConvolutionDescriptor(int ndims)
425     : zero_padding_(ndims, 0),
426       filter_strides_(ndims, 1),
427       dilation_rates_(ndims, 1),
428       pad_alignment_(PadAlignment::kDefault),
429       ndims_(ndims) {}
430 
ConvolutionDescriptor()431 ConvolutionDescriptor::ConvolutionDescriptor()
432     : ConvolutionDescriptor(/*ndims=*/2) {}
433 
~ConvolutionDescriptor()434 ConvolutionDescriptor::~ConvolutionDescriptor() {}
435 
ToString() const436 string ConvolutionDescriptor::ToString() const {
437   string padding;
438   string strides;
439   string dilations;
440   for (int i = 0; i < ndims_; i++) {
441     port::Appendf(&padding, "%lld ", zero_padding_[i]);
442     port::Appendf(&strides, "%lld ", filter_strides_[i]);
443     port::Appendf(&dilations, "%lld ", dilation_rates_[i]);
444   }
445 
446   return port::Printf(
447       "{zero_padding: %s pad_alignment: %s filter_strides: %s dilation_rates: "
448       "%s}",
449       padding.c_str(), PadAlignmentString(pad_alignment_).c_str(),
450       strides.c_str(), dilations.c_str());
451 }
452 
ToShortString() const453 string ConvolutionDescriptor::ToShortString() const {
454   string desc;
455   for (int i = 0; i < ndims_; i++) {
456     if (i > 0) port::Appendf(&desc, "_");
457     port::Appendf(&desc, "p%d:%lld", i, zero_padding_[i]);
458   }
459   for (int i = 0; i < ndims_; i++) {
460     port::Appendf(&desc, "_s%d:%lld", i, filter_strides_[i]);
461   }
462   for (int i = 0; i < ndims_; i++) {
463     port::Appendf(&desc, "_d%d:%lld", i, dilation_rates_[i]);
464   }
465   return desc;
466 }
467 
468 // -- PoolingDescriptor
469 
PoolingDescriptor(int ndims)470 PoolingDescriptor::PoolingDescriptor(int ndims)
471     : mode_(dnn::PoolingMode::kMaximum),
472       ndims_(ndims),
473       propagate_nans_(false),
474       window_(ndims, 0),
475       padding_(ndims, 0),
476       strides_(ndims, 1) {}
477 
PoolingDescriptor()478 PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {}
479 
CloneFrom(const PoolingDescriptor & other)480 void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) {
481   mode_ = other.mode_;
482   ndims_ = other.ndims_;
483   window_ = other.window_;
484   padding_ = other.padding_;
485   strides_ = other.strides_;
486   propagate_nans_ = other.propagate_nans_;
487 }
488 
ToString() const489 string PoolingDescriptor::ToString() const {
490   const char* mode_string =
491       mode_ == dnn::PoolingMode::kMaximum ? "kMaximum" : "kAverage";
492 
493   string window, strides, padding;
494   for (int i = 0; i < ndims_; i++) {
495     port::Appendf(&window, "%lld ", window_[i]);
496     port::Appendf(&strides, "%lld ", strides_[i]);
497     port::Appendf(&padding, "%lld", padding_[i]);
498   }
499 
500   const char* propagate_string = propagate_nans_ ? "Yes" : "No";
501 
502   return port::Printf(
503       "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}",
504       mode_string, window.c_str(), strides.c_str(), padding.c_str(),
505       propagate_string);
506 }
507 
ToShortString() const508 string PoolingDescriptor::ToShortString() const {
509   string window, strides, padding;
510   for (int i = 0; i < ndims_; i++) {
511     port::Appendf(&window, "_w%d:%lld", i, window_[i]);
512     port::Appendf(&strides, "_s%d:%lld", i, strides_[i]);
513     port::Appendf(&padding, "_p%d:%lld", i, padding_[i]);
514   }
515   return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg",
516                       window, strides, padding,
517                       propagate_nans_ ? "propagate_nans" : "ignore_nans");
518 }
519 
520 // -- NormalizeDescriptor
521 
NormalizeDescriptor()522 NormalizeDescriptor::NormalizeDescriptor()
523     : bias_(0.0),
524       range_(0),
525       alpha_(0.0),
526       beta_(0.0),
527       wrap_around_(false),
528       segment_size_(0) {}
529 
CloneFrom(const NormalizeDescriptor & other)530 void NormalizeDescriptor::CloneFrom(const NormalizeDescriptor& other) {
531   bias_ = other.bias_;
532   range_ = other.range_;
533   alpha_ = other.alpha_;
534   beta_ = other.beta_;
535   wrap_around_ = other.wrap_around_;
536   segment_size_ = other.segment_size_;
537 }
538 
ToString() const539 string NormalizeDescriptor::ToString() const {
540   return port::Printf(
541       "{bias: %f range: %d alpha: %f beta: %f wrap_around: %d "
542       "segment_size: %d}",
543       bias_, range_, alpha_, beta_, wrap_around_, segment_size_);
544 }
545 
ToShortString() const546 string NormalizeDescriptor::ToShortString() const {
547   return port::StrCat("bias:", bias_, "_range:", range_, "_alpha:", alpha_,
548                       "_beta:", beta_, "_wrap:", wrap_around_, "_size:",
549                       segment_size_);
550 }
551 
552 }  // namespace dnn
553 }  // namespace gputools
554 }  // namespace perftools
555