1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
17
18 #include <string>
19
20 #include "absl/strings/str_cat.h"
21 #include "absl/strings/substitute.h"
22 #include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
23
24 namespace tflite {
25 namespace gpu {
26
27 namespace {
GetOneInputCode(const GpuInfo & gpu_info,const OperationType & op_type,CalculationsPrecision precision,const std::string & input0)28 std::string GetOneInputCode(const GpuInfo& gpu_info,
29 const OperationType& op_type,
30 CalculationsPrecision precision,
31 const std::string& input0) {
32 std::string result;
33 switch (op_type) {
34 case OperationType::ABS:
35 result = "$0 = fabs($0);\n";
36 break;
37 case OperationType::COS:
38 result = "$0 = cos($0);\n";
39 break;
40 case OperationType::COPY:
41 // No op as inout_value will be copied to dest automatically.
42 result = "\n";
43 break;
44 case OperationType::ELU:
45 if (gpu_info.IsApiOpenCl()) {
46 result = R"(
47 $0.x = $0.x < INIT_FLT(0.0f) ? expm1($0.x) : $0.x;
48 $0.y = $0.y < INIT_FLT(0.0f) ? expm1($0.y) : $0.y;
49 $0.z = $0.z < INIT_FLT(0.0f) ? expm1($0.z) : $0.z;
50 $0.w = $0.w < INIT_FLT(0.0f) ? expm1($0.w) : $0.w;)";
51 } else {
52 result = R"(
53 $0.x = $0.x < INIT_FLT(0.0f) ? exp($0.x) - INIT_FLT(1.0f) : $0.x;
54 $0.y = $0.y < INIT_FLT(0.0f) ? exp($0.y) - INIT_FLT(1.0f) : $0.y;
55 $0.z = $0.z < INIT_FLT(0.0f) ? exp($0.z) - INIT_FLT(1.0f) : $0.z;
56 $0.w = $0.w < INIT_FLT(0.0f) ? exp($0.w) - INIT_FLT(1.0f) : $0.w;)";
57 }
58 break;
59 case OperationType::EXP:
60 result = "$0 = exp($0);\n";
61 break;
62 case OperationType::HARD_SWISH:
63 result =
64 "$0 *= clamp($0 * INIT_FLT(0.16666667f) + INIT_FLT(0.5f), "
65 "INIT_FLT4(0.0f), "
66 "INIT_FLT4(1.0f));\n";
67 break;
68 case OperationType::LOG:
69 result = "$0 = log($0);\n";
70 break;
71 case OperationType::NEG:
72 result = "$0 = -($0);\n";
73 break;
74 case OperationType::RSQRT:
75 result = "$0 = rsqrt($0);\n";
76 break;
77 case OperationType::SIGMOID:
78 if (gpu_info.IsApiOpenCl() && precision != CalculationsPrecision::F32) {
79 result =
80 "$0 = convert_half4(native_recip(1.0f + "
81 "native_exp(convert_float4(-$0))));\n";
82 } else {
83 result = "$0 = INIT_FLT4(1.0f) / (INIT_FLT4(1.0f) + exp(-($0)));\n";
84 }
85 break;
86 case OperationType::SIN:
87 result = "$0 = sin($0);\n";
88 break;
89 case OperationType::SQRT:
90 result = "$0 = sqrt($0);\n";
91 break;
92 case OperationType::SQUARE:
93 result = "$0 *= $0;\n";
94 break;
95 case OperationType::TANH:
96 result = "$0 = tanh($0);\n";
97 break;
98 default:
99 return "Unknown operation type;\n";
100 }
101 return absl::Substitute(result, input0);
102 }
103
GetTwoInputCode(const OperationType & op_type,const std::string & result_var,const std::string & input0,const std::string & input1,bool swap_inputs=false)104 std::string GetTwoInputCode(const OperationType& op_type,
105 const std::string& result_var,
106 const std::string& input0,
107 const std::string& input1,
108 bool swap_inputs = false) {
109 std::string result;
110 switch (op_type) {
111 case OperationType::ADD:
112 result += "$0 = $1 + $2;\n";
113 break;
114 case OperationType::DIV:
115 result += "$0 = $1 / $2;\n";
116 break;
117 case OperationType::MAXIMUM:
118 result += "$0 = max($1, $2);\n";
119 break;
120 case OperationType::MINIMUM:
121 result += "$0 = min($1, $2);\n";
122 break;
123 case OperationType::MUL:
124 result += "$0 = $1 * $2;\n";
125 break;
126 case OperationType::POW:
127 result += "$0 = pow($1, $2);\n";
128 break;
129 case OperationType::SQUARED_DIFF:
130 result += "$0 = ($1 - $2) * ($1 - $2);\n";
131 break;
132 case OperationType::SUB:
133 result += "$0 = $1 - $2;\n";
134 break;
135 // Comparison operators
136 case OperationType::LESS:
137 result = "$0.x = $1.x < $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
138 result += "$0.y = $1.y < $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
139 result += "$0.z = $1.z < $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
140 result += "$0.w = $1.w < $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
141 break;
142 case OperationType::LESS_EQUAL:
143 result = "$0.x = $1.x <= $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
144 result += "$0.y = $1.y <= $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
145 result += "$0.z = $1.z <= $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
146 result += "$0.w = $1.w <= $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
147 break;
148 case OperationType::GREATER:
149 result = "$0.x = $1.x > $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
150 result += "$0.y = $1.y > $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
151 result += "$0.z = $1.z > $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
152 result += "$0.w = $1.w > $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
153 break;
154 case OperationType::GREATER_EQUAL:
155 result = "$0.x = $1.x >= $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
156 result += "$0.y = $1.y >= $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
157 result += "$0.z = $1.z >= $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
158 result += "$0.w = $1.w >= $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
159 break;
160 case OperationType::EQUAL:
161 result = "$0.x = $1.x == $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
162 result += "$0.y = $1.y == $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
163 result += "$0.z = $1.z == $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
164 result += "$0.w = $1.w == $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
165 break;
166 case OperationType::NOT_EQUAL:
167 result = "$0.x = $1.x != $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
168 result += "$0.y = $1.y != $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
169 result += "$0.z = $1.z != $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
170 result += "$0.w = $1.w != $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
171 break;
172 default:
173 return "Unknown operation type;\n";
174 }
175 if (swap_inputs) {
176 return absl::Substitute(result, result_var, input1, input0);
177 } else {
178 return absl::Substitute(result, result_var, input0, input1);
179 }
180 }
181
182 // Creates simple two input (first input is runtime tensor and second input is
183 // scalar argument) operation, for example sub, div, pow, etc.
CreateElementwiseOneRuntimeOneScalar(const OperationDef & definition,const OperationType & op_type,float scalar_parameter,bool swap_inputs)184 GPUOperation CreateElementwiseOneRuntimeOneScalar(
185 const OperationDef& definition, const OperationType& op_type,
186 float scalar_parameter, bool swap_inputs) {
187 GPUOperation op(definition);
188 op.elementwise_ = true;
189 if (definition.precision == CalculationsPrecision::F32) {
190 op.args_.AddFloat("scalar", scalar_parameter);
191 } else {
192 op.args_.AddHalf("scalar", half(scalar_parameter));
193 }
194 op.code_ = "FLT4 second_val = INIT_FLT4(args.scalar);\n";
195 op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
196 "second_val", swap_inputs);
197 return op;
198 }
199
200 // Creates simple two input(first input is runtime tensor and second input is
201 // constant linear tensor) operation, for example sub, div and etc.
CreateElementwiseTwoInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & constant_tensor,bool swap_inputs)202 GPUOperation CreateElementwiseTwoInput(
203 const GpuInfo& gpu_info, const OperationDef& definition,
204 const OperationType& op_type,
205 const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
206 bool swap_inputs) {
207 const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v);
208 TensorStorageType storage_type =
209 SelectBestStorageType(gpu_info, shape, definition.GetPrimaryStorageType(),
210 definition.GetDataType(), Layout::HWC);
211 TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
212 desc.UploadData(constant_tensor);
213
214 GPUOperation result(definition);
215 result.elementwise_ = true;
216 result.args_.AddObject("second_tensor",
217 absl::make_unique<TensorDescriptor>(std::move(desc)));
218 const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
219 result.code_ = absl::StrCat(
220 "FLT4 second_val = args.second_tensor.Read(0, 0, ", s_coord, ");\n");
221 if (shape.c == 1) {
222 result.code_ += " second_val.y = second_val.x;\n";
223 result.code_ += " second_val.z = second_val.x;\n";
224 result.code_ += " second_val.w = second_val.x;\n";
225 }
226 result.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
227 "second_val", swap_inputs);
228 return result;
229 }
230
231 // Creates simple two input(first input is runtime tensor and second input is
232 // constant HWC tensor) operation, for example sub, div and etc.
CreateElementwiseTwoInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const tflite::gpu::Tensor<HWC,DataType::FLOAT32> & constant_tensor,bool swap_inputs)233 GPUOperation CreateElementwiseTwoInput(
234 const GpuInfo& gpu_info, const OperationDef& definition,
235 const OperationType& op_type,
236 const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& constant_tensor,
237 bool swap_inputs) {
238 const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w,
239 constant_tensor.shape.c);
240 TensorStorageType storage_type =
241 SelectBestStorageType(gpu_info, shape, definition.GetPrimaryStorageType(),
242 definition.GetDataType(), Layout::HWC);
243 TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
244 desc.UploadData(constant_tensor);
245
246 GPUOperation result(definition);
247 result.elementwise_ = true;
248 result.args_.AddObject("second_tensor",
249 absl::make_unique<TensorDescriptor>(std::move(desc)));
250 const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
251 const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
252 const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
253 result.code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(",
254 x_coord, ", ", y_coord, ", ", s_coord, ");\n");
255 if (shape.c == 1) {
256 result.code_ += " second_val.y = second_val.x;\n";
257 result.code_ += " second_val.z = second_val.x;\n";
258 result.code_ += " second_val.w = second_val.x;\n";
259 }
260 result.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
261 "second_val", swap_inputs);
262
263 return result;
264 }
265
266 } // namespace
267
CreateElementwiseOneInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type)268 GPUOperation CreateElementwiseOneInput(const GpuInfo& gpu_info,
269 const OperationDef& definition,
270 const OperationType& op_type) {
271 GPUOperation op(definition);
272 op.elementwise_ = true;
273 op.code_ =
274 GetOneInputCode(gpu_info, op_type, definition.precision, "in_out_value");
275 return op;
276 }
277
CreateElementwise(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const ElementwiseAttributes & attr)278 GPUOperation CreateElementwise(const GpuInfo& gpu_info,
279 const OperationDef& definition,
280 const OperationType& op_type,
281 const ElementwiseAttributes& attr) {
282 const float* scalar = absl::get_if<float>(&attr.param);
283 const auto* linear_tensor =
284 absl::get_if<tflite::gpu::Tensor<Linear, DataType::FLOAT32>>(&attr.param);
285 const auto* hwc_tensor =
286 absl::get_if<tflite::gpu::Tensor<HWC, DataType::FLOAT32>>(&attr.param);
287
288 if (scalar) {
289 return CreateElementwiseOneRuntimeOneScalar(definition, op_type, *scalar,
290 attr.runtime_tensor_is_second);
291 } else if (linear_tensor) {
292 return CreateElementwiseTwoInput(gpu_info, definition, op_type,
293 *linear_tensor,
294 attr.runtime_tensor_is_second);
295 } else if (hwc_tensor) {
296 return CreateElementwiseTwoInput(gpu_info, definition, op_type, *hwc_tensor,
297 attr.runtime_tensor_is_second);
298 } else {
299 return GPUOperation(definition);
300 }
301 }
302
CreateElementwiseTwoInput(const OperationDef & definition,const OperationType & op_type,const BHWC & shape)303 GPUOperation CreateElementwiseTwoInput(const OperationDef& definition,
304 const OperationType& op_type,
305 const BHWC& shape) {
306 GPUOperation op(definition);
307 op.elementwise_ = true;
308 auto src_desc = definition.src_tensors[1];
309 if (definition.IsBatchSupported()) {
310 src_desc.SetStateVar("BatchedWidth", "true");
311 }
312 op.AddSrcTensor("second_tensor", src_desc);
313 const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
314 const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
315 const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
316 op.code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", x_coord,
317 ", ", y_coord, ", ", s_coord, ");\n");
318 if (shape.c == 1) {
319 op.code_ += " second_val.y = second_val.x;\n";
320 op.code_ += " second_val.z = second_val.x;\n";
321 op.code_ += " second_val.w = second_val.x;\n";
322 }
323 op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
324 "second_val", false);
325 return op;
326 }
327
328 } // namespace gpu
329 } // namespace tflite
330