• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_
18 
19 #include <string>
20 #include <vector>
21 
22 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
23 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
24 #include "tensorflow/lite/delegates/gpu/common/kernel_info.h"
25 #include "tensorflow/lite/delegates/gpu/common/precision.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/arguments.h"
28 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/compiler_options.h"
30 #include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
33 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
34 #include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h"
35 #include "tensorflow/lite/delegates/gpu/common/types.h"
36 
37 namespace tflite {
38 namespace gpu {
39 namespace cl {
40 class ClOperation;
41 }
42 namespace metal {
43 class ComputeTask;
44 struct ComputeTaskDescriptor;
45 }
46 namespace gl {
47 class GlOperation;
48 }
49 namespace vulkan {
50 class VulkanOperation;
51 }
52 
53 // kCustom: default value
54 //   GPUOperation::GetGridSize must be overloaded
55 // kWBToX_HDToY_SToZ:
56 //   grid_x = dst_[0]->Width() * dst_[0]->Batch();
57 //   grid_y = dst_[0]->Height() * dst_[0]->Depth();
58 //   grid_z = dst_[0]->Slices();
59 // kWBToX_HDToY_ZIs1:
60 //   grid_x = dst_[0]->Width() * dst_[0]->Batch();
61 //   grid_y = dst_[0]->Height() * dst_[0]->Depth();
62 //   grid_z = 1;
63 // kWBToX_HToY_DToZ:
64 //   grid_x = dst_[0]->Width() * dst_[0]->Batch();
65 //   grid_y = dst_[0]->Height();
66 //   grid_z = dst_[0]->Depth();
67 // kBToX_YIs1_ZIs1:
68 //   grid_x = dst_[0]->Batch();
69 //   grid_y = 1;
70 //   grid_z = 1;
71 enum class TensorToGrid {
72   kCustom,
73   kWBToX_HDToY_SToZ,
74   kWBToX_HDToY_ZIs1,
75   kWBToX_HToY_DToZ,
76   kBToX_YIs1_ZIs1
77 };
78 
79 struct OperationDef {
80   CalculationsPrecision precision;
81   std::vector<TensorDescriptor> src_tensors;
82   std::vector<TensorDescriptor> dst_tensors;
83 
84   // returns FLOAT32 for F32 precision and FLOAT16 for F16 precision
85   DataType GetDataType() const;
86   // Primary means the first src tensor, because first tensor usually defines
87   // the structure of kernel, all other resources(biases) types and etc.
88   DataType GetPrimaryDataType() const;
89   TensorStorageType GetPrimaryStorageType() const;
90   bool IsBatchSupported() const;
91 };
92 
93 // GPUOperation represents some implementation of neural network operation on
94 // GPU. GPUOperation can contain another GPU operations with flag elementwise_.
95 // When GPUOperation contains another GPU ops, this GPUoperation replaces
96 // some sequence of operations Op + op0 + op1 + ...
97 // Because of this abilities of GPUOperation, usage scenario is next:
98 // Create instance of GPUOperation.
99 // Create all instances of GPUOperations that we will(probably) attach
100 // to GPUOperation. Attach all GPUOperations to GPUOperation. Call
101 // GPUOperation.Compile(). Don't call GPUOperations.Compile() if it
102 // attached, it useless(and may be error)
103 class GPUOperation {
104  public:
105   GPUOperation() = default;
106   explicit GPUOperation(const OperationDef& definition);
107   virtual ~GPUOperation() = default;
108   // Move only
109   GPUOperation(GPUOperation&& operation);
110   GPUOperation& operator=(GPUOperation&& operation);
111   GPUOperation(const GPUOperation&) = delete;
112   GPUOperation& operator=(const GPUOperation&) = delete;
113 
114   absl::Status AddOperation(GPUOperation* operation);
115 
116   void SetSrc(GpuSpatialTensor* ptr, int index = 0);
117   void SetDst(GpuSpatialTensor* ptr, int index = 0);
118 
119   virtual void GetPossibleKernelWorkGroups(
120       TuningType tuning_type, const GpuInfo& gpu_info,
121       const KernelInfo& kernel_info, std::vector<int3>* work_groups) const;
122 
123   void AssembleCode(const GpuInfo& gpu_info);
124 
PostCompileCheck(const GpuInfo & gpu_info,const KernelInfo & kernel_info)125   virtual absl::Status PostCompileCheck(const GpuInfo& gpu_info,
126                                         const KernelInfo& kernel_info) {
127     return absl::OkStatus();
128   }
129 
GetDefinition()130   const OperationDef& GetDefinition() const { return definition_; }
131 
132   void AddSrcTensor(const std::string& tensor_name,
133                     const TensorDescriptor& desc);
134   void AddSrcBuffer(const std::string& buffer_name,
135                     const BufferDescriptor& desc);
136   void AddSrcTexture2D(const std::string& texture_name,
137                        const Texture2DDescriptor& desc);
138   void AddDstTensor(const std::string& tensor_name,
139                     const TensorDescriptor& desc);
140 
IsLinkable()141   bool IsLinkable() const { return elementwise_ && linkable_; }
142 
143   // for linking
144   void AddUniquePostfix(const std::string& unique_postfix);
145 
146   Arguments args_;
147   std::string code_;
148   int3 work_group_size_ = int3(8, 4, 1);
149   std::vector<CompilerOptions> compiler_options_;
150   // not applicable to elementwise
151   TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom;
152 
153   bool elementwise_ = false;
154   // applicable only with elementwise_ = true;
155   bool linkable_ = true;  // by default every elementwise is linkable
156   // applicable only with elementwise_ = true;
157   bool check_src_channels_size_ = false;
158 
159  protected:
160   friend class cl::ClOperation;
161   friend class gl::GlOperation;
162   friend class metal::ComputeTask;
163   friend struct metal::ComputeTaskDescriptor;
164   friend class vulkan::VulkanOperation;
165   friend flatbuffers::Offset<tflite::gpu::data::GPUOperation> Encode(
166       const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
167   friend absl::Status Decode(const tflite::gpu::data::GPUOperation* fb_op,
168                              GPUOperation* op);
169 
BindArguments(ArgumentsBinder * args)170   virtual absl::Status BindArguments(ArgumentsBinder* args) {
171     return absl::OkStatus();
172   }
173   virtual int3 GetGridSize() const;
174 
175   // Defines operation calculation precision and format of src/dst tensors.
176   OperationDef definition_;
177   std::vector<GpuSpatialTensor*> src_;
178   std::vector<GpuSpatialTensor*> dst_;
179   int grid_dimension_ = 3;  // can be 1, 2 or 3
180   int3 work_group_launch_order_ = int3(0, 1, 2);
181   int3 grid_size_ = int3(0, 0, 0);
182   std::vector<std::string> src_tensors_names_;
183   std::vector<std::string> dst_tensors_names_;
184 
185  private:
186   int3 work_groups_count_ = int3(0, 0, 0);
187   int linkable_count_ = 0;
188   std::string elementwise_code_;  // temporary, used during op construction
189 };
190 
191 }  // namespace gpu
192 }  // namespace tflite
193 
194 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_
195