1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
17
18 #include <string>
19
20 #include "absl/strings/str_cat.h"
21 #include "absl/strings/substitute.h"
22 #include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
23
24 namespace tflite {
25 namespace gpu {
26
27 namespace {
GetOneInputCode(const GpuInfo & gpu_info,const OperationType & op_type,CalculationsPrecision precision,const std::string & input0)28 std::string GetOneInputCode(const GpuInfo& gpu_info,
29 const OperationType& op_type,
30 CalculationsPrecision precision,
31 const std::string& input0) {
32 std::string result;
33 switch (op_type) {
34 case OperationType::ABS:
35 result = "$0 = fabs($0);\n";
36 break;
37 case OperationType::COS:
38 result = "$0 = cos($0);\n";
39 break;
40 case OperationType::COPY:
41 // No op as inout_value will be copied to dest automatically.
42 result = "\n";
43 break;
44 case OperationType::ELU:
45 if (gpu_info.IsApiOpenCl()) {
46 result = R"(
47 $0.x = $0.x < INIT_FLT(0.0f) ? expm1($0.x) : $0.x;
48 $0.y = $0.y < INIT_FLT(0.0f) ? expm1($0.y) : $0.y;
49 $0.z = $0.z < INIT_FLT(0.0f) ? expm1($0.z) : $0.z;
50 $0.w = $0.w < INIT_FLT(0.0f) ? expm1($0.w) : $0.w;)";
51 } else {
52 result = R"(
53 $0.x = $0.x < INIT_FLT(0.0f) ? exp($0.x) - INIT_FLT(1.0f) : $0.x;
54 $0.y = $0.y < INIT_FLT(0.0f) ? exp($0.y) - INIT_FLT(1.0f) : $0.y;
55 $0.z = $0.z < INIT_FLT(0.0f) ? exp($0.z) - INIT_FLT(1.0f) : $0.z;
56 $0.w = $0.w < INIT_FLT(0.0f) ? exp($0.w) - INIT_FLT(1.0f) : $0.w;)";
57 }
58 break;
59 case OperationType::EXP:
60 result = "$0 = exp($0);\n";
61 break;
62 case OperationType::FLOOR:
63 result = "$0 = floor($0);\n";
64 break;
65 case OperationType::HARD_SWISH:
66 result =
67 "$0 *= clamp($0 * INIT_FLT(0.16666667f) + INIT_FLT(0.5f), "
68 "INIT_FLT4(0.0f), "
69 "INIT_FLT4(1.0f));\n";
70 break;
71 case OperationType::LOG:
72 result = "$0 = log($0);\n";
73 break;
74 case OperationType::NEG:
75 result = "$0 = -($0);\n";
76 break;
77 case OperationType::RSQRT:
78 result = "$0 = rsqrt($0);\n";
79 break;
80 case OperationType::SIGMOID:
81 if (gpu_info.IsApiOpenCl() && precision != CalculationsPrecision::F32) {
82 result =
83 "$0 = convert_half4(native_recip(1.0f + "
84 "native_exp(convert_float4(-$0))));\n";
85 } else {
86 result = "$0 = INIT_FLT4(1.0f) / (INIT_FLT4(1.0f) + exp(-($0)));\n";
87 }
88 break;
89 case OperationType::SIN:
90 result = "$0 = sin($0);\n";
91 break;
92 case OperationType::SQRT:
93 result = "$0 = sqrt($0);\n";
94 break;
95 case OperationType::SQUARE:
96 result = "$0 *= $0;\n";
97 break;
98 case OperationType::TANH:
99 result = "$0 = tanh($0);\n";
100 break;
101 default:
102 return "Unknown operation type;\n";
103 }
104 return absl::Substitute(result, input0);
105 }
106
GetTwoInputCode(const OperationType & op_type,const std::string & result_var,const std::string & input0,const std::string & input1,bool swap_inputs=false)107 std::string GetTwoInputCode(const OperationType& op_type,
108 const std::string& result_var,
109 const std::string& input0,
110 const std::string& input1,
111 bool swap_inputs = false) {
112 std::string result;
113 switch (op_type) {
114 case OperationType::ADD:
115 result += "$0 = $1 + $2;\n";
116 break;
117 case OperationType::DIV:
118 result += "$0 = $1 / $2;\n";
119 break;
120 case OperationType::FLOOR_DIV:
121 result = "$0 = floor($1 / $2);\n";
122 break;
123 case OperationType::FLOOR_MOD:
124 result = "$0 = $1 - floor($1 / $2) * $2;\n";
125 break;
126 case OperationType::MAXIMUM:
127 result += "$0 = max($1, $2);\n";
128 break;
129 case OperationType::MINIMUM:
130 result += "$0 = min($1, $2);\n";
131 break;
132 case OperationType::MUL:
133 result += "$0 = $1 * $2;\n";
134 break;
135 case OperationType::POW:
136 result += "$0 = pow($1, $2);\n";
137 break;
138 case OperationType::SQUARED_DIFF:
139 result += "$0 = ($1 - $2) * ($1 - $2);\n";
140 break;
141 case OperationType::SUB:
142 result += "$0 = $1 - $2;\n";
143 break;
144 // Comparison operators
145 case OperationType::LESS:
146 result = "$0.x = $1.x < $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
147 result += "$0.y = $1.y < $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
148 result += "$0.z = $1.z < $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
149 result += "$0.w = $1.w < $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
150 break;
151 case OperationType::LESS_EQUAL:
152 result = "$0.x = $1.x <= $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
153 result += "$0.y = $1.y <= $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
154 result += "$0.z = $1.z <= $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
155 result += "$0.w = $1.w <= $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
156 break;
157 case OperationType::GREATER:
158 result = "$0.x = $1.x > $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
159 result += "$0.y = $1.y > $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
160 result += "$0.z = $1.z > $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
161 result += "$0.w = $1.w > $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
162 break;
163 case OperationType::GREATER_EQUAL:
164 result = "$0.x = $1.x >= $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
165 result += "$0.y = $1.y >= $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
166 result += "$0.z = $1.z >= $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
167 result += "$0.w = $1.w >= $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
168 break;
169 case OperationType::EQUAL:
170 result = "$0.x = $1.x == $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
171 result += "$0.y = $1.y == $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
172 result += "$0.z = $1.z == $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
173 result += "$0.w = $1.w == $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
174 break;
175 case OperationType::NOT_EQUAL:
176 result = "$0.x = $1.x != $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
177 result += "$0.y = $1.y != $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
178 result += "$0.z = $1.z != $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
179 result += "$0.w = $1.w != $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
180 break;
181 default:
182 return "Unknown operation type;\n";
183 }
184 if (swap_inputs) {
185 return absl::Substitute(result, result_var, input1, input0);
186 } else {
187 return absl::Substitute(result, result_var, input0, input1);
188 }
189 }
190
191 // Creates simple two input (first input is runtime tensor and second input is
192 // scalar argument) operation, for example sub, div, pow, etc.
CreateElementwiseOneRuntimeOneScalar(const OperationDef & definition,const OperationType & op_type,float scalar_parameter,bool swap_inputs)193 GPUOperation CreateElementwiseOneRuntimeOneScalar(
194 const OperationDef& definition, const OperationType& op_type,
195 float scalar_parameter, bool swap_inputs) {
196 GPUOperation op(definition);
197 op.elementwise_ = true;
198 if (definition.precision == CalculationsPrecision::F32) {
199 op.args_.AddFloat("scalar", scalar_parameter);
200 } else {
201 op.args_.AddHalf("scalar", half(scalar_parameter));
202 }
203 op.code_ = "FLT4 second_val = INIT_FLT4(args.scalar);\n";
204 op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
205 "second_val", swap_inputs);
206 return op;
207 }
208
209 // Creates simple two input(first input is runtime tensor and second input is
210 // constant linear tensor) operation, for example sub, div and etc.
CreateElementwiseTwoInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & constant_tensor,bool swap_inputs)211 GPUOperation CreateElementwiseTwoInput(
212 const GpuInfo& gpu_info, const OperationDef& definition,
213 const OperationType& op_type,
214 const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
215 bool swap_inputs) {
216 const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v);
217 TensorStorageType storage_type;
218 auto status = SelectBestStorageType(
219 gpu_info, shape, definition.GetPrimaryStorageType(),
220 definition.GetDataType(), Layout::HWC, &storage_type);
221 if (!status.ok()) {
222 storage_type = TensorStorageType::BUFFER;
223 }
224 TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
225 desc.UploadData(constant_tensor);
226
227 GPUOperation result(definition);
228 result.elementwise_ = true;
229 result.args_.AddObject("second_tensor",
230 absl::make_unique<TensorDescriptor>(std::move(desc)));
231 const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
232 result.code_ = absl::StrCat(
233 "FLT4 second_val = args.second_tensor.Read(0, 0, ", s_coord, ");\n");
234 if (shape.c == 1) {
235 result.code_ += " second_val.y = second_val.x;\n";
236 result.code_ += " second_val.z = second_val.x;\n";
237 result.code_ += " second_val.w = second_val.x;\n";
238 }
239 result.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
240 "second_val", swap_inputs);
241 return result;
242 }
243
244 // Creates simple two input(first input is runtime tensor and second input is
245 // constant HWC tensor) operation, for example sub, div and etc.
CreateElementwiseTwoInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const tflite::gpu::Tensor<HWC,DataType::FLOAT32> & constant_tensor,bool swap_inputs)246 GPUOperation CreateElementwiseTwoInput(
247 const GpuInfo& gpu_info, const OperationDef& definition,
248 const OperationType& op_type,
249 const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& constant_tensor,
250 bool swap_inputs) {
251 const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w,
252 constant_tensor.shape.c);
253 TensorStorageType storage_type;
254 auto status = SelectBestStorageType(
255 gpu_info, shape, definition.GetPrimaryStorageType(),
256 definition.GetDataType(), Layout::HWC, &storage_type);
257 if (!status.ok()) {
258 storage_type = TensorStorageType::BUFFER;
259 }
260 TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
261 desc.UploadData(constant_tensor);
262
263 GPUOperation result(definition);
264 result.elementwise_ = true;
265 result.args_.AddObject("second_tensor",
266 absl::make_unique<TensorDescriptor>(std::move(desc)));
267 const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
268 const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
269 const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
270 result.code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(",
271 x_coord, ", ", y_coord, ", ", s_coord, ");\n");
272 if (shape.c == 1) {
273 result.code_ += " second_val.y = second_val.x;\n";
274 result.code_ += " second_val.z = second_val.x;\n";
275 result.code_ += " second_val.w = second_val.x;\n";
276 }
277 result.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
278 "second_val", swap_inputs);
279
280 return result;
281 }
282
283 } // namespace
284
CreateElementwiseOneInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type)285 GPUOperation CreateElementwiseOneInput(const GpuInfo& gpu_info,
286 const OperationDef& definition,
287 const OperationType& op_type) {
288 GPUOperation op(definition);
289 op.elementwise_ = true;
290 op.code_ =
291 GetOneInputCode(gpu_info, op_type, definition.precision, "in_out_value");
292 return op;
293 }
294
CreateElementwise(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const ElementwiseAttributes & attr)295 GPUOperation CreateElementwise(const GpuInfo& gpu_info,
296 const OperationDef& definition,
297 const OperationType& op_type,
298 const ElementwiseAttributes& attr) {
299 const float* scalar = absl::get_if<float>(&attr.param);
300 const auto* linear_tensor =
301 absl::get_if<tflite::gpu::Tensor<Linear, DataType::FLOAT32>>(&attr.param);
302 const auto* hwc_tensor =
303 absl::get_if<tflite::gpu::Tensor<HWC, DataType::FLOAT32>>(&attr.param);
304
305 if (scalar) {
306 return CreateElementwiseOneRuntimeOneScalar(definition, op_type, *scalar,
307 attr.runtime_tensor_is_second);
308 } else if (linear_tensor) {
309 return CreateElementwiseTwoInput(gpu_info, definition, op_type,
310 *linear_tensor,
311 attr.runtime_tensor_is_second);
312 } else if (hwc_tensor) {
313 return CreateElementwiseTwoInput(gpu_info, definition, op_type, *hwc_tensor,
314 attr.runtime_tensor_is_second);
315 } else {
316 return GPUOperation(definition);
317 }
318 }
319
CreateElementwiseTwoInput(const OperationDef & definition,const OperationType & op_type,const BHWC & shape)320 GPUOperation CreateElementwiseTwoInput(const OperationDef& definition,
321 const OperationType& op_type,
322 const BHWC& shape) {
323 GPUOperation op(definition);
324 op.elementwise_ = true;
325 auto src_desc = definition.src_tensors[1];
326 if (definition.IsBatchSupported()) {
327 src_desc.SetStateVar("BatchedWidth", "true");
328 }
329 op.AddSrcTensor("second_tensor", src_desc);
330 const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
331 const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
332 const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
333 op.code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", x_coord,
334 ", ", y_coord, ", ", s_coord, ");\n");
335 if (shape.c == 1) {
336 op.code_ += " second_val.y = second_val.x;\n";
337 op.code_ += " second_val.z = second_val.x;\n";
338 op.code_ += " second_val.w = second_val.x;\n";
339 }
340 op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
341 "second_val", false);
342 return op;
343 }
344
345 } // namespace gpu
346 } // namespace tflite
347