• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
17 
18 #include <string>
19 
20 #include "absl/strings/str_cat.h"
21 #include "absl/strings/substitute.h"
22 #include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
23 
24 namespace tflite {
25 namespace gpu {
26 
27 namespace {
GetOneInputCode(const GpuInfo & gpu_info,const OperationType & op_type,CalculationsPrecision precision,const std::string & input0)28 std::string GetOneInputCode(const GpuInfo& gpu_info,
29                             const OperationType& op_type,
30                             CalculationsPrecision precision,
31                             const std::string& input0) {
32   std::string result;
33   switch (op_type) {
34     case OperationType::ABS:
35       result = "$0 = fabs($0);\n";
36       break;
37     case OperationType::COS:
38       result = "$0 = cos($0);\n";
39       break;
40     case OperationType::COPY:
41       // No op as inout_value will be copied to dest automatically.
42       result = "\n";
43       break;
44     case OperationType::ELU:
45       if (gpu_info.IsApiOpenCl()) {
46         result = R"(
47 $0.x = $0.x < INIT_FLT(0.0f) ? expm1($0.x) : $0.x;
48 $0.y = $0.y < INIT_FLT(0.0f) ? expm1($0.y) : $0.y;
49 $0.z = $0.z < INIT_FLT(0.0f) ? expm1($0.z) : $0.z;
50 $0.w = $0.w < INIT_FLT(0.0f) ? expm1($0.w) : $0.w;)";
51       } else {
52         result = R"(
53 $0.x = $0.x < INIT_FLT(0.0f) ? exp($0.x) - INIT_FLT(1.0f) : $0.x;
54 $0.y = $0.y < INIT_FLT(0.0f) ? exp($0.y) - INIT_FLT(1.0f) : $0.y;
55 $0.z = $0.z < INIT_FLT(0.0f) ? exp($0.z) - INIT_FLT(1.0f) : $0.z;
56 $0.w = $0.w < INIT_FLT(0.0f) ? exp($0.w) - INIT_FLT(1.0f) : $0.w;)";
57       }
58       break;
59     case OperationType::EXP:
60       result = "$0 = exp($0);\n";
61       break;
62     case OperationType::FLOOR:
63       result = "$0 = floor($0);\n";
64       break;
65     case OperationType::HARD_SWISH:
66       result =
67           "$0 *= clamp($0 * INIT_FLT(0.16666667f) + INIT_FLT(0.5f), "
68           "INIT_FLT4(0.0f), "
69           "INIT_FLT4(1.0f));\n";
70       break;
71     case OperationType::LOG:
72       result = "$0 = log($0);\n";
73       break;
74     case OperationType::NEG:
75       result = "$0 = -($0);\n";
76       break;
77     case OperationType::RSQRT:
78       result = "$0 = rsqrt($0);\n";
79       break;
80     case OperationType::SIGMOID:
81       if (gpu_info.IsApiOpenCl() && precision != CalculationsPrecision::F32) {
82         result =
83             "$0 = convert_half4(native_recip(1.0f + "
84             "native_exp(convert_float4(-$0))));\n";
85       } else {
86         result = "$0 = INIT_FLT4(1.0f) / (INIT_FLT4(1.0f) + exp(-($0)));\n";
87       }
88       break;
89     case OperationType::SIN:
90       result = "$0 = sin($0);\n";
91       break;
92     case OperationType::SQRT:
93       result = "$0 = sqrt($0);\n";
94       break;
95     case OperationType::SQUARE:
96       result = "$0 *= $0;\n";
97       break;
98     case OperationType::TANH:
99       result = "$0 = tanh($0);\n";
100       break;
101     default:
102       return "Unknown operation type;\n";
103   }
104   return absl::Substitute(result, input0);
105 }
106 
GetTwoInputCode(const OperationType & op_type,const std::string & result_var,const std::string & input0,const std::string & input1,bool swap_inputs=false)107 std::string GetTwoInputCode(const OperationType& op_type,
108                             const std::string& result_var,
109                             const std::string& input0,
110                             const std::string& input1,
111                             bool swap_inputs = false) {
112   std::string result;
113   switch (op_type) {
114     case OperationType::ADD:
115       result += "$0 = $1 + $2;\n";
116       break;
117     case OperationType::DIV:
118       result += "$0 = $1 / $2;\n";
119       break;
120     case OperationType::FLOOR_DIV:
121       result = "$0 = floor($1 / $2);\n";
122       break;
123     case OperationType::FLOOR_MOD:
124       result = "$0 = $1 - floor($1 / $2) * $2;\n";
125       break;
126     case OperationType::MAXIMUM:
127       result += "$0 = max($1, $2);\n";
128       break;
129     case OperationType::MINIMUM:
130       result += "$0 = min($1, $2);\n";
131       break;
132     case OperationType::MUL:
133       result += "$0 = $1 * $2;\n";
134       break;
135     case OperationType::POW:
136       result += "$0 = pow($1, $2);\n";
137       break;
138     case OperationType::SQUARED_DIFF:
139       result += "$0 = ($1 - $2) * ($1 - $2);\n";
140       break;
141     case OperationType::SUB:
142       result += "$0 = $1 - $2;\n";
143       break;
144     // Comparison operators
145     case OperationType::LESS:
146       result = "$0.x = $1.x < $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
147       result += "$0.y = $1.y < $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
148       result += "$0.z = $1.z < $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
149       result += "$0.w = $1.w < $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
150       break;
151     case OperationType::LESS_EQUAL:
152       result = "$0.x = $1.x <= $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
153       result += "$0.y = $1.y <= $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
154       result += "$0.z = $1.z <= $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
155       result += "$0.w = $1.w <= $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
156       break;
157     case OperationType::GREATER:
158       result = "$0.x = $1.x > $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
159       result += "$0.y = $1.y > $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
160       result += "$0.z = $1.z > $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
161       result += "$0.w = $1.w > $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
162       break;
163     case OperationType::GREATER_EQUAL:
164       result = "$0.x = $1.x >= $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
165       result += "$0.y = $1.y >= $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
166       result += "$0.z = $1.z >= $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
167       result += "$0.w = $1.w >= $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
168       break;
169     case OperationType::EQUAL:
170       result = "$0.x = $1.x == $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
171       result += "$0.y = $1.y == $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
172       result += "$0.z = $1.z == $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
173       result += "$0.w = $1.w == $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
174       break;
175     case OperationType::NOT_EQUAL:
176       result = "$0.x = $1.x != $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
177       result += "$0.y = $1.y != $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
178       result += "$0.z = $1.z != $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
179       result += "$0.w = $1.w != $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
180       break;
181     default:
182       return "Unknown operation type;\n";
183   }
184   if (swap_inputs) {
185     return absl::Substitute(result, result_var, input1, input0);
186   } else {
187     return absl::Substitute(result, result_var, input0, input1);
188   }
189 }
190 
191 // Creates simple two input (first input is runtime tensor and second input is
192 // scalar argument) operation, for example sub, div, pow, etc.
CreateElementwiseOneRuntimeOneScalar(const OperationDef & definition,const OperationType & op_type,float scalar_parameter,bool swap_inputs)193 GPUOperation CreateElementwiseOneRuntimeOneScalar(
194     const OperationDef& definition, const OperationType& op_type,
195     float scalar_parameter, bool swap_inputs) {
196   GPUOperation op(definition);
197   op.elementwise_ = true;
198   if (definition.precision == CalculationsPrecision::F32) {
199     op.args_.AddFloat("scalar", scalar_parameter);
200   } else {
201     op.args_.AddHalf("scalar", half(scalar_parameter));
202   }
203   op.code_ = "FLT4 second_val = INIT_FLT4(args.scalar);\n";
204   op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
205                               "second_val", swap_inputs);
206   return op;
207 }
208 
209 // Creates simple two input(first input is runtime tensor and second input is
210 // constant linear tensor) operation, for example sub, div and etc.
CreateElementwiseTwoInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & constant_tensor,bool swap_inputs)211 GPUOperation CreateElementwiseTwoInput(
212     const GpuInfo& gpu_info, const OperationDef& definition,
213     const OperationType& op_type,
214     const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
215     bool swap_inputs) {
216   const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v);
217   TensorStorageType storage_type;
218   auto status = SelectBestStorageType(
219       gpu_info, shape, definition.GetPrimaryStorageType(),
220       definition.GetDataType(), Layout::HWC, &storage_type);
221   if (!status.ok()) {
222     storage_type = TensorStorageType::BUFFER;
223   }
224   TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
225   desc.UploadData(constant_tensor);
226 
227   GPUOperation result(definition);
228   result.elementwise_ = true;
229   result.args_.AddObject("second_tensor",
230                          absl::make_unique<TensorDescriptor>(std::move(desc)));
231   const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
232   result.code_ = absl::StrCat(
233       "FLT4 second_val = args.second_tensor.Read(0, 0, ", s_coord, ");\n");
234   if (shape.c == 1) {
235     result.code_ += "  second_val.y = second_val.x;\n";
236     result.code_ += "  second_val.z = second_val.x;\n";
237     result.code_ += "  second_val.w = second_val.x;\n";
238   }
239   result.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
240                                   "second_val", swap_inputs);
241   return result;
242 }
243 
244 // Creates simple two input(first input is runtime tensor and second input is
245 // constant HWC tensor) operation, for example sub, div and etc.
CreateElementwiseTwoInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const tflite::gpu::Tensor<HWC,DataType::FLOAT32> & constant_tensor,bool swap_inputs)246 GPUOperation CreateElementwiseTwoInput(
247     const GpuInfo& gpu_info, const OperationDef& definition,
248     const OperationType& op_type,
249     const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& constant_tensor,
250     bool swap_inputs) {
251   const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w,
252                           constant_tensor.shape.c);
253   TensorStorageType storage_type;
254   auto status = SelectBestStorageType(
255       gpu_info, shape, definition.GetPrimaryStorageType(),
256       definition.GetDataType(), Layout::HWC, &storage_type);
257   if (!status.ok()) {
258     storage_type = TensorStorageType::BUFFER;
259   }
260   TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
261   desc.UploadData(constant_tensor);
262 
263   GPUOperation result(definition);
264   result.elementwise_ = true;
265   result.args_.AddObject("second_tensor",
266                          absl::make_unique<TensorDescriptor>(std::move(desc)));
267   const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
268   const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
269   const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
270   result.code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(",
271                               x_coord, ", ", y_coord, ", ", s_coord, ");\n");
272   if (shape.c == 1) {
273     result.code_ += "  second_val.y = second_val.x;\n";
274     result.code_ += "  second_val.z = second_val.x;\n";
275     result.code_ += "  second_val.w = second_val.x;\n";
276   }
277   result.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
278                                   "second_val", swap_inputs);
279 
280   return result;
281 }
282 
283 }  // namespace
284 
CreateElementwiseOneInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type)285 GPUOperation CreateElementwiseOneInput(const GpuInfo& gpu_info,
286                                        const OperationDef& definition,
287                                        const OperationType& op_type) {
288   GPUOperation op(definition);
289   op.elementwise_ = true;
290   op.code_ =
291       GetOneInputCode(gpu_info, op_type, definition.precision, "in_out_value");
292   return op;
293 }
294 
CreateElementwise(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const ElementwiseAttributes & attr)295 GPUOperation CreateElementwise(const GpuInfo& gpu_info,
296                                const OperationDef& definition,
297                                const OperationType& op_type,
298                                const ElementwiseAttributes& attr) {
299   const float* scalar = absl::get_if<float>(&attr.param);
300   const auto* linear_tensor =
301       absl::get_if<tflite::gpu::Tensor<Linear, DataType::FLOAT32>>(&attr.param);
302   const auto* hwc_tensor =
303       absl::get_if<tflite::gpu::Tensor<HWC, DataType::FLOAT32>>(&attr.param);
304 
305   if (scalar) {
306     return CreateElementwiseOneRuntimeOneScalar(definition, op_type, *scalar,
307                                                 attr.runtime_tensor_is_second);
308   } else if (linear_tensor) {
309     return CreateElementwiseTwoInput(gpu_info, definition, op_type,
310                                      *linear_tensor,
311                                      attr.runtime_tensor_is_second);
312   } else if (hwc_tensor) {
313     return CreateElementwiseTwoInput(gpu_info, definition, op_type, *hwc_tensor,
314                                      attr.runtime_tensor_is_second);
315   } else {
316     return GPUOperation(definition);
317   }
318 }
319 
CreateElementwiseTwoInput(const OperationDef & definition,const OperationType & op_type,const BHWC & shape)320 GPUOperation CreateElementwiseTwoInput(const OperationDef& definition,
321                                        const OperationType& op_type,
322                                        const BHWC& shape) {
323   GPUOperation op(definition);
324   op.elementwise_ = true;
325   auto src_desc = definition.src_tensors[1];
326   if (definition.IsBatchSupported()) {
327     src_desc.SetStateVar("BatchedWidth", "true");
328   }
329   op.AddSrcTensor("second_tensor", src_desc);
330   const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
331   const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
332   const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
333   op.code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", x_coord,
334                           ", ", y_coord, ", ", s_coord, ");\n");
335   if (shape.c == 1) {
336     op.code_ += "  second_val.y = second_val.x;\n";
337     op.code_ += "  second_val.z = second_val.x;\n";
338     op.code_ += "  second_val.w = second_val.x;\n";
339   }
340   op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
341                               "second_val", false);
342   return op;
343 }
344 
345 }  // namespace gpu
346 }  // namespace tflite
347