1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_ 18 19 #include <string> 20 #include <vector> 21 22 #include "tensorflow/lite/delegates/gpu/common/data_type.h" 23 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h" 24 #include "tensorflow/lite/delegates/gpu/common/kernel_info.h" 25 #include "tensorflow/lite/delegates/gpu/common/precision.h" 26 #include "tensorflow/lite/delegates/gpu/common/status.h" 27 #include "tensorflow/lite/delegates/gpu/common/task/arguments.h" 28 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h" 29 #include "tensorflow/lite/delegates/gpu/common/task/compiler_options.h" 30 #include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h" 31 #include "tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h" 32 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h" 33 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h" 34 #include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h" 35 #include "tensorflow/lite/delegates/gpu/common/types.h" 36 37 namespace tflite { 38 namespace gpu { 39 namespace cl { 40 class ClOperation; 41 } 42 namespace metal { 43 class ComputeTask; 44 struct ComputeTaskDescriptor; 45 } 46 47 // kCustom: default value 48 // GPUOperation::GetGridSize must be overloaded 49 // kWBToX_HDToY_SToZ: 50 // grid_x = dst_[0]->Width() * dst_[0]->Batch(); 51 // grid_y = dst_[0]->Height() * dst_[0]->Depth(); 52 // grid_z = dst_[0]->Slices(); 53 // kWBToX_HDToY_ZIs1: 54 // grid_x = dst_[0]->Width() * dst_[0]->Batch(); 55 // grid_y = dst_[0]->Height() * dst_[0]->Depth(); 56 // grid_z = 1; 57 // kWBToX_HToY_DToZ: 58 // grid_x = dst_[0]->Width() * dst_[0]->Batch(); 59 // grid_y = dst_[0]->Height(); 60 // grid_z = dst_[0]->Depth(); 61 // kBToX_YIs1_ZIs1: 62 // grid_x = dst_[0]->Batch(); 63 // grid_y = 1; 64 // grid_z = 1; 65 enum class TensorToGrid { 66 kCustom, 67 kWBToX_HDToY_SToZ, 68 kWBToX_HDToY_ZIs1, 69 kWBToX_HToY_DToZ, 70 kBToX_YIs1_ZIs1 71 }; 72 73 struct OperationDef { 74 CalculationsPrecision precision; 75 std::vector<TensorDescriptor> src_tensors; 76 std::vector<TensorDescriptor> dst_tensors; 77 78 // returns FLOAT32 for F32 precision and FLOAT16 for F16 precision 79 DataType GetDataType() const; 80 // Primary means the first src tensor, because first tensor usually defines 81 // the structure of kernel, all other resources(biases) types and etc. 82 DataType GetPrimaryDataType() const; 83 TensorStorageType GetPrimaryStorageType() const; 84 bool IsBatchSupported() const; 85 }; 86 87 // GPUOperation represents some implementation of neural network operation on 88 // GPU. GPUOperation can contain another GPU operations with flag elementwise_. 89 // When GPUOperation contains another GPU ops, this GPUoperation replaces 90 // some sequence of operations Op + op0 + op1 + ... 91 // Because of this abilities of GPUOperation, usage scenario is next: 92 // Create instance of GPUOperation. 93 // Create all instances of GPUOperations that we will(probably) attach 94 // to GPUOperation. Attach all GPUOperations to GPUOperation. Call 95 // GPUOperation.Compile(). Don't call GPUOperations.Compile() if it 96 // attached, it useless(and may be error) 97 class GPUOperation { 98 public: 99 GPUOperation() = default; 100 explicit GPUOperation(const OperationDef& definition); 101 virtual ~GPUOperation() = default; 102 // Move only 103 GPUOperation(GPUOperation&& operation); 104 GPUOperation& operator=(GPUOperation&& operation); 105 GPUOperation(const GPUOperation&) = delete; 106 GPUOperation& operator=(const GPUOperation&) = delete; 107 108 absl::Status AddOperation(GPUOperation* operation); 109 110 void SetSrc(GpuSpatialTensor* ptr, int index = 0); 111 void SetDst(GpuSpatialTensor* ptr, int index = 0); 112 113 virtual void GetPossibleKernelWorkGroups( 114 TuningType tuning_type, const GpuInfo& gpu_info, 115 const KernelInfo& kernel_info, std::vector<int3>* work_groups) const; 116 117 void AssembleCode(const GpuInfo& gpu_info); 118 PostCompileCheck(const GpuInfo & gpu_info,const KernelInfo & kernel_info)119 virtual absl::Status PostCompileCheck(const GpuInfo& gpu_info, 120 const KernelInfo& kernel_info) { 121 return absl::OkStatus(); 122 } 123 GetDefinition()124 const OperationDef& GetDefinition() const { return definition_; } 125 126 void AddSrcTensor(const std::string& tensor_name, 127 const TensorDescriptor& desc); 128 void AddSrcBuffer(const std::string& buffer_name, 129 const BufferDescriptor& desc); 130 void AddSrcTexture2D(const std::string& texture_name, 131 const Texture2DDescriptor& desc); 132 void AddDstTensor(const std::string& tensor_name, 133 const TensorDescriptor& desc); 134 IsLinkable()135 bool IsLinkable() const { return elementwise_ && linkable_; } 136 137 // for linking 138 void AddUniquePostfix(const std::string& unique_postfix); 139 140 Arguments args_; 141 std::string code_; 142 int3 work_group_size_ = int3(8, 4, 1); 143 std::vector<CompilerOptions> compiler_options_; 144 // not applicable to elementwise 145 TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom; 146 147 bool elementwise_ = false; 148 // applicable only with elementwise_ = true; 149 bool linkable_ = true; // by default every elementwise is linkable 150 // applicable only with elementwise_ = true; 151 bool check_src_channels_size_ = false; 152 153 protected: 154 friend class cl::ClOperation; 155 friend class metal::ComputeTask; 156 friend struct metal::ComputeTaskDescriptor; 157 friend flatbuffers::Offset<tflite::gpu::data::GPUOperation> Encode( 158 const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder); 159 friend absl::Status Decode(const tflite::gpu::data::GPUOperation* fb_op, 160 GPUOperation* op); 161 BindArguments(ArgumentsBinder * args)162 virtual absl::Status BindArguments(ArgumentsBinder* args) { 163 return absl::OkStatus(); 164 } 165 virtual int3 GetGridSize() const; 166 167 // Defines operation calculation precision and format of src/dst tensors. 168 OperationDef definition_; 169 std::vector<GpuSpatialTensor*> src_; 170 std::vector<GpuSpatialTensor*> dst_; 171 int grid_dimension_ = 3; // can be 1, 2 or 3 172 int3 work_group_launch_order_ = int3(0, 1, 2); 173 int3 grid_size_ = int3(0, 0, 0); 174 std::vector<std::string> src_tensors_names_; 175 std::vector<std::string> dst_tensors_names_; 176 177 private: 178 int3 work_groups_count_ = int3(0, 0, 0); 179 int linkable_count_ = 0; 180 std::string elementwise_code_; // temporary, used during op construction 181 }; 182 183 } // namespace gpu 184 } // namespace tflite 185 186 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_ 187