1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_ 18 19 #include <string> 20 #include <vector> 21 22 #include "tensorflow/lite/delegates/gpu/common/data_type.h" 23 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h" 24 #include "tensorflow/lite/delegates/gpu/common/kernel_info.h" 25 #include "tensorflow/lite/delegates/gpu/common/precision.h" 26 #include "tensorflow/lite/delegates/gpu/common/status.h" 27 #include "tensorflow/lite/delegates/gpu/common/task/arguments.h" 28 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h" 29 #include "tensorflow/lite/delegates/gpu/common/task/compiler_options.h" 30 #include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h" 31 #include "tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h" 32 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h" 33 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h" 34 #include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h" 35 #include "tensorflow/lite/delegates/gpu/common/types.h" 36 37 namespace tflite { 38 namespace gpu { 39 namespace cl { 40 class ClOperation; 41 } 42 namespace metal { 43 class ComputeTask; 44 struct ComputeTaskDescriptor; 45 } 46 namespace gl { 47 class GlOperation; 48 } 49 namespace vulkan { 50 class VulkanOperation; 51 } 52 53 // kCustom: default value 54 // GPUOperation::GetGridSize must be overloaded 55 // kWBToX_HDToY_SToZ: 56 // grid_x = dst_[0]->Width() * dst_[0]->Batch(); 57 // grid_y = dst_[0]->Height() * dst_[0]->Depth(); 58 // grid_z = dst_[0]->Slices(); 59 // kWBToX_HDToY_ZIs1: 60 // grid_x = dst_[0]->Width() * dst_[0]->Batch(); 61 // grid_y = dst_[0]->Height() * dst_[0]->Depth(); 62 // grid_z = 1; 63 // kWBToX_HToY_DToZ: 64 // grid_x = dst_[0]->Width() * dst_[0]->Batch(); 65 // grid_y = dst_[0]->Height(); 66 // grid_z = dst_[0]->Depth(); 67 // kBToX_YIs1_ZIs1: 68 // grid_x = dst_[0]->Batch(); 69 // grid_y = 1; 70 // grid_z = 1; 71 enum class TensorToGrid { 72 kCustom, 73 kWBToX_HDToY_SToZ, 74 kWBToX_HDToY_ZIs1, 75 kWBToX_HToY_DToZ, 76 kBToX_YIs1_ZIs1 77 }; 78 79 struct OperationDef { 80 CalculationsPrecision precision; 81 std::vector<TensorDescriptor> src_tensors; 82 std::vector<TensorDescriptor> dst_tensors; 83 84 // returns FLOAT32 for F32 precision and FLOAT16 for F16 precision 85 DataType GetDataType() const; 86 // Primary means the first src tensor, because first tensor usually defines 87 // the structure of kernel, all other resources(biases) types and etc. 88 DataType GetPrimaryDataType() const; 89 TensorStorageType GetPrimaryStorageType() const; 90 bool IsBatchSupported() const; 91 }; 92 93 // GPUOperation represents some implementation of neural network operation on 94 // GPU. GPUOperation can contain another GPU operations with flag elementwise_. 95 // When GPUOperation contains another GPU ops, this GPUoperation replaces 96 // some sequence of operations Op + op0 + op1 + ... 97 // Because of this abilities of GPUOperation, usage scenario is next: 98 // Create instance of GPUOperation. 99 // Create all instances of GPUOperations that we will(probably) attach 100 // to GPUOperation. Attach all GPUOperations to GPUOperation. Call 101 // GPUOperation.Compile(). Don't call GPUOperations.Compile() if it 102 // attached, it useless(and may be error) 103 class GPUOperation { 104 public: 105 GPUOperation() = default; 106 explicit GPUOperation(const OperationDef& definition); 107 virtual ~GPUOperation() = default; 108 // Move only 109 GPUOperation(GPUOperation&& operation); 110 GPUOperation& operator=(GPUOperation&& operation); 111 GPUOperation(const GPUOperation&) = delete; 112 GPUOperation& operator=(const GPUOperation&) = delete; 113 114 absl::Status AddOperation(GPUOperation* operation); 115 116 void SetSrc(GpuSpatialTensor* ptr, int index = 0); 117 void SetDst(GpuSpatialTensor* ptr, int index = 0); 118 119 virtual void GetPossibleKernelWorkGroups( 120 TuningType tuning_type, const GpuInfo& gpu_info, 121 const KernelInfo& kernel_info, std::vector<int3>* work_groups) const; 122 123 void AssembleCode(const GpuInfo& gpu_info); 124 PostCompileCheck(const GpuInfo & gpu_info,const KernelInfo & kernel_info)125 virtual absl::Status PostCompileCheck(const GpuInfo& gpu_info, 126 const KernelInfo& kernel_info) { 127 return absl::OkStatus(); 128 } 129 GetDefinition()130 const OperationDef& GetDefinition() const { return definition_; } 131 132 void AddSrcTensor(const std::string& tensor_name, 133 const TensorDescriptor& desc); 134 void AddSrcBuffer(const std::string& buffer_name, 135 const BufferDescriptor& desc); 136 void AddSrcTexture2D(const std::string& texture_name, 137 const Texture2DDescriptor& desc); 138 void AddDstTensor(const std::string& tensor_name, 139 const TensorDescriptor& desc); 140 IsLinkable()141 bool IsLinkable() const { return elementwise_ && linkable_; } 142 143 // for linking 144 void AddUniquePostfix(const std::string& unique_postfix); 145 146 Arguments args_; 147 std::string code_; 148 int3 work_group_size_ = int3(8, 4, 1); 149 std::vector<CompilerOptions> compiler_options_; 150 // not applicable to elementwise 151 TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom; 152 153 bool elementwise_ = false; 154 // applicable only with elementwise_ = true; 155 bool linkable_ = true; // by default every elementwise is linkable 156 // applicable only with elementwise_ = true; 157 bool check_src_channels_size_ = false; 158 159 protected: 160 friend class cl::ClOperation; 161 friend class gl::GlOperation; 162 friend class metal::ComputeTask; 163 friend struct metal::ComputeTaskDescriptor; 164 friend class vulkan::VulkanOperation; 165 friend flatbuffers::Offset<tflite::gpu::data::GPUOperation> Encode( 166 const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder); 167 friend absl::Status Decode(const tflite::gpu::data::GPUOperation* fb_op, 168 GPUOperation* op); 169 BindArguments(ArgumentsBinder * args)170 virtual absl::Status BindArguments(ArgumentsBinder* args) { 171 return absl::OkStatus(); 172 } 173 virtual int3 GetGridSize() const; 174 175 // Defines operation calculation precision and format of src/dst tensors. 176 OperationDef definition_; 177 std::vector<GpuSpatialTensor*> src_; 178 std::vector<GpuSpatialTensor*> dst_; 179 int grid_dimension_ = 3; // can be 1, 2 or 3 180 int3 work_group_launch_order_ = int3(0, 1, 2); 181 int3 grid_size_ = int3(0, 0, 0); 182 std::vector<std::string> src_tensors_names_; 183 std::vector<std::string> dst_tensors_names_; 184 185 private: 186 int3 work_groups_count_ = int3(0, 0, 0); 187 int linkable_count_ = 0; 188 std::string elementwise_code_; // temporary, used during op construction 189 }; 190 191 } // namespace gpu 192 } // namespace tflite 193 194 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_ 195