• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// LINT: LEGACY_NAMES
2syntax = "proto3";
3
4package stream_executor.dnn;
5
6import "google/protobuf/wrappers.proto";
7
8option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/stream_executor";
9
10// Specifies the data type used by an operation.
11enum DataType {
12  kFloat = 0;
13  kDouble = 1;
14  kHalf = 2;
15  kInt8 = 3;
16  kInt32 = 4;
17  kComplexFloat = 5;
18  kComplexDouble = 6;
19  kBF16 = 7;
20}
21
22// Describes how a convolution input or output layer's data is formatted.
23enum DataLayout {
24  // Naming convention:
25  // Y <-> row or height
26  // X <-> column or width
27  // Batch <-> batch, or N
28  // Depth <-> feature, or channel
29  // TODO(timshen): turn them into cuDNN names, e.g. kNCHW.
30  //
31  // Note: In cudnn, kBatchDepthYX4 and kBatchDepthYX32 are the same layout
32  // (namely, NCHW_VECT_C).  It differentiates between these two by using a
33  // different data type (int8x4 vs int8x32).  In StreamExecutor we use
34  // different layouts for these, because we don't usually pass an explicit data
35  // type to StreamExecutor functions.
36  kYXDepthBatch = 0;
37  kYXBatchDepth = 1;
38  kBatchYXDepth = 2;    // cuDNN's NHWC layout
39  kBatchDepthYX = 3;    // cuDNN's NCHW layout
40  kBatchDepthYX4 = 4;   // cuDNN's NCHW_VECT_C with 4-elem vectors (e.g. int8x4)
41  kBatchDepthYX32 = 5;  // cuDNN's NCHW_VECT_C with 32-elem vects (e.g. int8x32)
42}
43
44// Describes how a convolution filter is laid out in the memory.
45enum FilterLayout {
46  // Naming convention:
47  // Y <-> row or height
48  // X <-> column or width
49  // Output <-> output feature, or N
50  // Input <-> input feature, or N
51  // TODO(timshen): turn them into cuDNN names, e.g. kNCHW.
52  kOutputInputYX = 0;    // cuDNN's NCHW layout
53  kOutputYXInput = 1;    // cuDNN's NHWC layout
54  kOutputInputYX4 = 2;   // cuDNN's NCHW_VECT_C layout with 4-elem vectors
55  kOutputInputYX32 = 5;  // cuDNN's NCHW_VECT_C layout with 32-elem vectors
56  kInputYXOutput = 3;
57  kYXInputOutput = 4;
58}
59
60// Describes a kind of non-linearity (threshold-like mathematical function).
61enum ActivationMode {
62  kNone = 0;
63  kSigmoid = 1;
64  // Rectified linear activation: f(x) = x < 0 ? 0 : x
65  kRelu = 2;
66  // Rectified linear activation; where upper maximum is 6.0.
67  kRelu6 = 3;
68  // Rectified linear activation; where upper maximum specified by
69  // BatchDescriptor::value_max().
70  kReluX = 4;
71  kTanh = 5;
72  // Like ReluX; but passes all values in the range [-X,X].
73  kBandPass = 6;
74  // Exponential linear activation: f(x) = x < 0 ? e^x - 1 : x
75  kElu = 7;
76  // Leaky Rectified linear activation: f(x) = x < 0 ? alpha * x : x
77  kLeakyRelu = 8;
78}
79
80// Describe the math definition for the conv op. The popular behavior is
81// actually called cross-correlation in math, despite the operation is often
82// referred as convolution. See cuDNN cudnnConvolutionMode_t.
83enum ConvolutionMode {
84  CROSS_CORRELATION = 0;
85  CONVOLUTION = 1;
86}
87
88enum ConvolutionKind {
89  INVALID = 0;
90  FORWARD = 1;
91  BACKWARD_FILTER = 2;
92  BACKWARD_DATA = 3;
93  FORWARD_BIAS_ACTIVATION = 4;
94}
95
96// Generic tensor representation.
97message TensorDescriptorProto {
98  repeated int64 dimensions = 1;
99  DataType data_type = 2;
100  oneof layout_oneof {
101    DataLayout data_layout = 3;
102    FilterLayout filter_layout = 4;
103  }
104}
105
106// Generic algorithm representation.
107message AlgorithmProto {
108  enum MathType {
109    DEFAULT_MATH = 0;
110    // The GPU may operate 4x4 matrix FMA.
111    // See cuDNN's documentation for CUDNN_TENSOR_OP_MATH.
112    TENSOR_OP_MATH = 1;
113  }
114  int64 algo_id = 1;
115  MathType math_type = 2;
116  reserved 3;
117
118  map<int64, int64> tuning_knobs = 4;
119  // Legacy algorithm enums and cuDNN Frontend engine numbers need to coexist in
120  // the same proto medium-term, until we can be confident of no longer needing
121  // the legacy cuDNN convolution API.  Once the migration is complete, we can
122  // stop producing legacy algorithm enums and remove this field.
123  bool is_cudnn_frontend = 5;
124
125  // For ROCm only, it's impossible to re-query the required workspace size
126  // after running the algorithm search, so we must store the workspace size
127  // along with the choice of algorithm.  For consistency and convenience,
128  // cuDNN uses this field in the same way, even though it would be possible to
129  // re-query the workspace size from cuDNN at each use.
130  //
131  // Since this message is persisted in files, we need to be able to distinguish
132  // 0 workspace size from unknown workspace size in an old message, so this is
133  // a message field.
134  google.protobuf.UInt64Value workspace_size = 6;
135}
136
137// Proto definition of AlgorithmConfig in "dnn.h".
138// TODO(ruochengw): After cl/380702564 is submitted, add support for algorithm
139// configs with cuDNN Frontend APIs.
140message AlgorithmConfigProto {
141  // Use oneof to emulate optional semantics in proto2 since older
142  // version of proto3 cannot distinguish "unset field" and "default field".
143  oneof optional_algorithm {
144    AlgorithmProto algorithm = 1;
145  }
146  oneof optional_algorithm_no_scratch {
147    AlgorithmProto algorithm_no_scratch = 2;
148  }
149  oneof optional_scratch_size {
150    int64 scratch_size = 3;
151  }
152}
153
154// Convolution-specific parameters.
155message ConvolutionDescriptorProto {
156  repeated int64 paddings = 1;
157  repeated int64 strides = 2;
158  repeated int64 dilations = 3;
159  // The "accumulator" type. For example, use F32 as an accumulator for F16
160  // convolutions.
161  // See cuDNN's cudnnConvolutionMode_t.
162  DataType compute_mode = 4;
163  // See cuDNN's group count.
164  int32 group_count = 5;
165  ConvolutionMode convolution_mode = 6;
166  // Tensorflow node name, same as in NodeDef, for debugging purposes.
167  string name = 7;
168}
169