1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
17
18 #include "absl/strings/substitute.h"
19 #include "tensorflow/lite/delegates/gpu/common/access_type.h"
20 #include "tensorflow/lite/delegates/gpu/common/task/work_group_picking.h"
21
22 namespace tflite {
23 namespace gpu {
24 namespace {
GetElementWiseCode(const OperationDef & op_def,bool check_src_slices)25 std::string GetElementWiseCode(const OperationDef& op_def,
26 bool check_src_slices) {
27 std::string c;
28 c += "MAIN_FUNCTION(\n";
29 c += "$0) {\n";
30 c += " int X = GLOBAL_ID_0;\n";
31 c += " int Y = GLOBAL_ID_1;\n";
32 c += " int Z = GLOBAL_ID_2;\n";
33 c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
34 "Z >= args.dst_tensor.Slices()) return; \n";
35 if (check_src_slices) {
36 c += " FLT4 src = INIT_FLT4(0.0f);\n";
37 c += " if (Z < args.src_tensor.Slices()) {\n";
38 c += " src = args.src_tensor.Read(X, Y, Z);\n";
39 c += " }\n";
40 } else {
41 c += " FLT4 src = args.src_tensor.Read(X, Y, Z);\n";
42 }
43 c += " args.dst_tensor.Write(src, X, Y, Z);\n";
44 c += "} \n";
45 return c;
46 }
47
48 } // namespace
49
GetDataType() const50 DataType OperationDef::GetDataType() const {
51 return DeduceDataTypeFromPrecision(precision);
52 }
53
GetPrimaryDataType() const54 DataType OperationDef::GetPrimaryDataType() const {
55 return src_tensors[0].data_type;
56 }
GetPrimaryStorageType() const57 TensorStorageType OperationDef::GetPrimaryStorageType() const {
58 return src_tensors[0].storage_type;
59 }
60
IsBatchSupported() const61 bool OperationDef::IsBatchSupported() const {
62 for (const auto& src : src_tensors) {
63 if (HasAxis(src.layout, Axis::BATCH)) {
64 return true;
65 }
66 }
67 for (const auto& dst : dst_tensors) {
68 if (HasAxis(dst.layout, Axis::BATCH)) {
69 return true;
70 }
71 }
72 return false;
73 }
74
GPUOperation(const OperationDef & definition)75 GPUOperation::GPUOperation(const OperationDef& definition)
76 : definition_(definition) {}
77
SetSrc(GpuSpatialTensor * ptr,int index)78 void GPUOperation::SetSrc(GpuSpatialTensor* ptr, int index) {
79 if (index >= src_.size()) {
80 src_.resize(index + 1, nullptr);
81 }
82 src_[index] = ptr;
83 }
84
SetDst(GpuSpatialTensor * ptr,int index)85 void GPUOperation::SetDst(GpuSpatialTensor* ptr, int index) {
86 if (index >= dst_.size()) {
87 dst_.resize(index + 1, nullptr);
88 }
89 dst_[index] = ptr;
90 }
91
GPUOperation(GPUOperation && operation)92 GPUOperation::GPUOperation(GPUOperation&& operation)
93 : args_(std::move(operation.args_)),
94 code_(std::move(operation.code_)),
95 work_group_size_(operation.work_group_size_),
96 compiler_options_(std::move(operation.compiler_options_)),
97 tensor_to_grid_(operation.tensor_to_grid_),
98 elementwise_(operation.elementwise_),
99 linkable_(operation.linkable_),
100 check_src_channels_size_(operation.check_src_channels_size_),
101 definition_(std::move(operation.definition_)),
102 src_(std::move(operation.src_)),
103 dst_(std::move(operation.dst_)),
104 grid_dimension_(operation.grid_dimension_),
105 work_group_launch_order_(operation.work_group_launch_order_),
106 grid_size_(operation.grid_size_),
107 src_tensors_names_(std::move(operation.src_tensors_names_)),
108 dst_tensors_names_(std::move(operation.dst_tensors_names_)),
109 work_groups_count_(operation.work_groups_count_),
110 linkable_count_(operation.linkable_count_),
111 elementwise_code_(std::move(operation.elementwise_code_)) {}
112
operator =(GPUOperation && operation)113 GPUOperation& GPUOperation::operator=(GPUOperation&& operation) {
114 if (this != &operation) {
115 args_ = std::move(operation.args_);
116 code_ = std::move(operation.code_);
117 std::swap(work_group_size_, operation.work_group_size_);
118 compiler_options_ = std::move(operation.compiler_options_);
119 tensor_to_grid_ = operation.tensor_to_grid_;
120 elementwise_ = operation.elementwise_;
121 linkable_ = operation.linkable_;
122 check_src_channels_size_ = operation.check_src_channels_size_;
123 definition_ = std::move(operation.definition_);
124 src_ = std::move(operation.src_);
125 dst_ = std::move(operation.dst_);
126 std::swap(grid_dimension_, operation.grid_dimension_);
127 std::swap(work_group_launch_order_, operation.work_group_launch_order_);
128 std::swap(grid_size_, operation.grid_size_);
129 src_tensors_names_ = std::move(operation.src_tensors_names_);
130 dst_tensors_names_ = std::move(operation.dst_tensors_names_);
131 std::swap(work_groups_count_, operation.work_groups_count_);
132 std::swap(linkable_count_, operation.linkable_count_);
133 elementwise_code_ = std::move(operation.elementwise_code_);
134 }
135 return *this;
136 }
137
AddOperation(GPUOperation * operation)138 absl::Status GPUOperation::AddOperation(GPUOperation* operation) {
139 linkable_count_ += 1;
140 std::string code = operation->code_;
141 std::string unique_postfix = absl::StrCat("_link", linkable_count_);
142 operation->args_.RenameArgs(unique_postfix, &code);
143 elementwise_code_ += "{\n" + code + "\n}\n";
144 RETURN_IF_ERROR(args_.Merge(std::move(operation->args_), unique_postfix));
145 for (int i = 0; i < operation->src_tensors_names_.size(); ++i) {
146 definition_.src_tensors.push_back(
147 operation->definition_.src_tensors[i + 1]);
148 src_tensors_names_.push_back(operation->src_tensors_names_[i] +
149 unique_postfix);
150 }
151 for (int i = 0; i < operation->dst_tensors_names_.size(); ++i) {
152 dst_tensors_names_.push_back(operation->dst_tensors_names_[i] +
153 unique_postfix);
154 }
155 return absl::OkStatus();
156 }
157
AddSrcTensor(const std::string & tensor_name,const TensorDescriptor & desc)158 void GPUOperation::AddSrcTensor(const std::string& tensor_name,
159 const TensorDescriptor& desc) {
160 src_tensors_names_.push_back(tensor_name);
161 auto desc_new = absl::make_unique<TensorDescriptor>(desc);
162 args_.AddObjectRef(tensor_name, AccessType::READ, std::move(desc_new));
163 }
164
AddSrcBuffer(const std::string & buffer_name,const BufferDescriptor & desc)165 void GPUOperation::AddSrcBuffer(const std::string& buffer_name,
166 const BufferDescriptor& desc) {
167 src_tensors_names_.push_back(buffer_name);
168 auto desc_new = absl::make_unique<BufferDescriptor>(desc);
169 args_.AddObjectRef(buffer_name, AccessType::READ, std::move(desc_new));
170 }
171
AddSrcTexture2D(const std::string & texture_name,const Texture2DDescriptor & desc)172 void GPUOperation::AddSrcTexture2D(const std::string& texture_name,
173 const Texture2DDescriptor& desc) {
174 src_tensors_names_.push_back(texture_name);
175 auto desc_new = absl::make_unique<Texture2DDescriptor>(desc);
176 args_.AddObjectRef(texture_name, AccessType::READ, std::move(desc_new));
177 }
178
AddDstTensor(const std::string & tensor_name,const TensorDescriptor & desc)179 void GPUOperation::AddDstTensor(const std::string& tensor_name,
180 const TensorDescriptor& desc) {
181 dst_tensors_names_.push_back(tensor_name);
182 auto desc_new = absl::make_unique<TensorDescriptor>(desc);
183 args_.AddObjectRef(tensor_name, AccessType::WRITE, std::move(desc_new));
184 }
185
AssembleCode(const GpuInfo & gpu_info)186 void GPUOperation::AssembleCode(const GpuInfo& gpu_info) {
187 if (elementwise_) {
188 auto src_desc =
189 absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
190 if (definition_.IsBatchSupported()) {
191 src_desc->SetStateVar("BatchedWidth", "true");
192 }
193 src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor");
194 args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc));
195
196 auto dst_desc =
197 absl::make_unique<TensorDescriptor>(definition_.dst_tensors[0]);
198 if (definition_.IsBatchSupported()) {
199 dst_desc->SetStateVar("BatchedWidth", "true");
200 }
201 dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
202 args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
203
204 elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
205 code_ = GetElementWiseCode(definition_, check_src_channels_size_);
206 }
207 }
208
GetPossibleKernelWorkGroups(TuningType tuning_type,const GpuInfo & gpu_info,const KernelInfo & kernel_info,std::vector<int3> * work_groups) const209 void GPUOperation::GetPossibleKernelWorkGroups(
210 TuningType tuning_type, const GpuInfo& gpu_info,
211 const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
212 GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_,
213 work_groups);
214 }
215
GetGridSize() const216 int3 GPUOperation::GetGridSize() const {
217 if (elementwise_ || tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_SToZ) {
218 const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
219 const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
220 const int grid_z = dst_[0]->Slices();
221 return int3(grid_x, grid_y, grid_z);
222 }
223 if (tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_ZIs1) {
224 const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
225 const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
226 const int grid_z = 1;
227 return int3(grid_x, grid_y, grid_z);
228 }
229 if (tensor_to_grid_ == TensorToGrid::kWBToX_HToY_DToZ) {
230 const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
231 const int grid_y = dst_[0]->Height();
232 const int grid_z = dst_[0]->Depth();
233 return int3(grid_x, grid_y, grid_z);
234 }
235 if (tensor_to_grid_ == TensorToGrid::kBToX_YIs1_ZIs1) {
236 const int grid_x = dst_[0]->Batch();
237 const int grid_y = 1;
238 const int grid_z = 1;
239 return int3(grid_x, grid_y, grid_z);
240 }
241 return grid_size_;
242 }
243
AddUniquePostfix(const std::string & unique_postfix)244 void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {
245 for (int i = 0; i < src_tensors_names_.size(); ++i) {
246 src_tensors_names_[i] += unique_postfix;
247 }
248 for (int i = 0; i < dst_tensors_names_.size(); ++i) {
249 dst_tensors_names_[i] += unique_postfix;
250 }
251 }
252
253 } // namespace gpu
254 } // namespace tflite
255