1// LINT: LEGACY_NAMES 2syntax = "proto3"; 3 4package stream_executor.dnn; 5 6import "google/protobuf/wrappers.proto"; 7 8option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/stream_executor"; 9 10// Specifies the data type used by an operation. 11enum DataType { 12 kFloat = 0; 13 kDouble = 1; 14 kHalf = 2; 15 kInt8 = 3; 16 kInt32 = 4; 17 kComplexFloat = 5; 18 kComplexDouble = 6; 19 kBF16 = 7; 20} 21 22// Describes how a convolution input or output layer's data is formatted. 23enum DataLayout { 24 // Naming convention: 25 // Y <-> row or height 26 // X <-> column or width 27 // Batch <-> batch, or N 28 // Depth <-> feature, or channel 29 // TODO(timshen): turn them into cuDNN names, e.g. kNCHW. 30 // 31 // Note: In cudnn, kBatchDepthYX4 and kBatchDepthYX32 are the same layout 32 // (namely, NCHW_VECT_C). It differentiates between these two by using a 33 // different data type (int8x4 vs int8x32). In StreamExecutor we use 34 // different layouts for these, because we don't usually pass an explicit data 35 // type to StreamExecutor functions. 36 kYXDepthBatch = 0; 37 kYXBatchDepth = 1; 38 kBatchYXDepth = 2; // cuDNN's NHWC layout 39 kBatchDepthYX = 3; // cuDNN's NCHW layout 40 kBatchDepthYX4 = 4; // cuDNN's NCHW_VECT_C with 4-elem vectors (e.g. int8x4) 41 kBatchDepthYX32 = 5; // cuDNN's NCHW_VECT_C with 32-elem vects (e.g. int8x32) 42} 43 44// Describes how a convolution filter is laid out in the memory. 45enum FilterLayout { 46 // Naming convention: 47 // Y <-> row or height 48 // X <-> column or width 49 // Output <-> output feature, or N 50 // Input <-> input feature, or N 51 // TODO(timshen): turn them into cuDNN names, e.g. kNCHW. 52 kOutputInputYX = 0; // cuDNN's NCHW layout 53 kOutputYXInput = 1; // cuDNN's NHWC layout 54 kOutputInputYX4 = 2; // cuDNN's NCHW_VECT_C layout with 4-elem vectors 55 kOutputInputYX32 = 5; // cuDNN's NCHW_VECT_C layout with 32-elem vectors 56 kInputYXOutput = 3; 57 kYXInputOutput = 4; 58} 59 60// Describes a kind of non-linearity (threshold-like mathematical function). 61enum ActivationMode { 62 kNone = 0; 63 kSigmoid = 1; 64 // Rectified linear activation: f(x) = x < 0 ? 0 : x 65 kRelu = 2; 66 // Rectified linear activation; where upper maximum is 6.0. 67 kRelu6 = 3; 68 // Rectified linear activation; where upper maximum specified by 69 // BatchDescriptor::value_max(). 70 kReluX = 4; 71 kTanh = 5; 72 // Like ReluX; but passes all values in the range [-X,X]. 73 kBandPass = 6; 74 // Exponential linear activation: f(x) = x < 0 ? e^x - 1 : x 75 kElu = 7; 76 // Leaky Rectified linear activation: f(x) = x < 0 ? alpha * x : x 77 kLeakyRelu = 8; 78} 79 80// Describe the math definition for the conv op. The popular behavior is 81// actually called cross-correlation in math, despite the operation is often 82// referred as convolution. See cuDNN cudnnConvolutionMode_t. 83enum ConvolutionMode { 84 CROSS_CORRELATION = 0; 85 CONVOLUTION = 1; 86} 87 88enum ConvolutionKind { 89 INVALID = 0; 90 FORWARD = 1; 91 BACKWARD_FILTER = 2; 92 BACKWARD_DATA = 3; 93 FORWARD_BIAS_ACTIVATION = 4; 94} 95 96// Generic tensor representation. 97message TensorDescriptorProto { 98 repeated int64 dimensions = 1; 99 DataType data_type = 2; 100 oneof layout_oneof { 101 DataLayout data_layout = 3; 102 FilterLayout filter_layout = 4; 103 } 104} 105 106// Generic algorithm representation. 107message AlgorithmProto { 108 enum MathType { 109 DEFAULT_MATH = 0; 110 // The GPU may operate 4x4 matrix FMA. 111 // See cuDNN's documentation for CUDNN_TENSOR_OP_MATH. 112 TENSOR_OP_MATH = 1; 113 } 114 int64 algo_id = 1; 115 MathType math_type = 2; 116 reserved 3; 117 118 map<int64, int64> tuning_knobs = 4; 119 // Legacy algorithm enums and cuDNN Frontend engine numbers need to coexist in 120 // the same proto medium-term, until we can be confident of no longer needing 121 // the legacy cuDNN convolution API. Once the migration is complete, we can 122 // stop producing legacy algorithm enums and remove this field. 123 bool is_cudnn_frontend = 5; 124 125 // For ROCm only, it's impossible to re-query the required workspace size 126 // after running the algorithm search, so we must store the workspace size 127 // along with the choice of algorithm. For consistency and convenience, 128 // cuDNN uses this field in the same way, even though it would be possible to 129 // re-query the workspace size from cuDNN at each use. 130 // 131 // Since this message is persisted in files, we need to be able to distinguish 132 // 0 workspace size from unknown workspace size in an old message, so this is 133 // a message field. 134 google.protobuf.UInt64Value workspace_size = 6; 135} 136 137// Proto definition of AlgorithmConfig in "dnn.h". 138// TODO(ruochengw): After cl/380702564 is submitted, add support for algorithm 139// configs with cuDNN Frontend APIs. 140message AlgorithmConfigProto { 141 // Use oneof to emulate optional semantics in proto2 since older 142 // version of proto3 cannot distinguish "unset field" and "default field". 143 oneof optional_algorithm { 144 AlgorithmProto algorithm = 1; 145 } 146 oneof optional_algorithm_no_scratch { 147 AlgorithmProto algorithm_no_scratch = 2; 148 } 149 oneof optional_scratch_size { 150 int64 scratch_size = 3; 151 } 152} 153 154// Convolution-specific parameters. 155message ConvolutionDescriptorProto { 156 repeated int64 paddings = 1; 157 repeated int64 strides = 2; 158 repeated int64 dilations = 3; 159 // The "accumulator" type. For example, use F32 as an accumulator for F16 160 // convolutions. 161 // See cuDNN's cudnnConvolutionMode_t. 162 DataType compute_mode = 4; 163 // See cuDNN's group count. 164 int32 group_count = 5; 165 ConvolutionMode convolution_mode = 6; 166 // Tensorflow node name, same as in NodeDef, for debugging purposes. 167 string name = 7; 168} 169