1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/selectors/operation_selector.h"
17
18 #include "absl/strings/str_cat.h"
19 #include "absl/types/any.h"
20 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
21 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
22 #include "tensorflow/lite/delegates/gpu/common/operations.h"
23 #include "tensorflow/lite/delegates/gpu/common/selectors/convolution_selector.h"
24 #include "tensorflow/lite/delegates/gpu/common/selectors/convolution_transposed_selector.h"
25 #include "tensorflow/lite/delegates/gpu/common/selectors/default_selector.h"
26 #include "tensorflow/lite/delegates/gpu/common/selectors/dw_convolution_selector.h"
27 #include "tensorflow/lite/delegates/gpu/common/selectors/fully_connected_selector.h"
28 #include "tensorflow/lite/delegates/gpu/common/selectors/simple_selectors.h"
29 #include "tensorflow/lite/delegates/gpu/common/shape.h"
30 #include "tensorflow/lite/delegates/gpu/common/status.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
33 #include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
34 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
35 #include "tensorflow/lite/delegates/gpu/common/tasks/mean_stddev_normalization.h"
36 #include "tensorflow/lite/delegates/gpu/common/tasks/transpose.h"
37 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
38 #include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
39
40 namespace tflite {
41 namespace gpu {
42 namespace {
IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes & attr,const GpuInfo & gpu_info,const BHWC & dst_shape)43 bool IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes& attr,
44 const GpuInfo& gpu_info,
45 const BHWC& dst_shape) {
46 const int tiles_x = DivideRoundUp(dst_shape.w, 4);
47 const int tiles_y = DivideRoundUp(dst_shape.h, 4);
48 const int total_tiles = tiles_x * tiles_y;
49 const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
50 const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
51 int min_depth = 16;
52 if (gpu_info.IsAdreno() || gpu_info.IsAMD()) {
53 min_depth = 32;
54 }
55 int min_tiles = 32;
56 if (gpu_info.IsAdreno()) {
57 if (gpu_info.adreno_info.IsAdreno6xx()) {
58 min_tiles = 128;
59 } else {
60 min_tiles = 64;
61 }
62 }
63 if (gpu_info.IsAMD()) {
64 min_tiles = 64;
65 }
66 if (total_tiles >= min_tiles * 8) {
67 min_depth /= 4;
68 min_depth = std::max(min_depth, 8);
69 } else if (total_tiles >= min_tiles * 4) {
70 min_depth /= 2;
71 min_depth = std::max(min_depth, 8);
72 }
73 const bool recommended_channels =
74 src_depth >= min_depth && dst_depth >= min_depth;
75 const bool recommended_hw = total_tiles >= min_tiles;
76 return recommended_channels && recommended_hw;
77 }
78
WinogradFromNode(const GpuInfo & gpu_info,const std::vector<Value * > & inputs,const std::vector<Value * > & outputs,const OperationDef & op_def,ModelHints hints,const BHWC & input_shape,const BHWC & output_shape,const Convolution2DAttributes & attr,GPUOperationsSubgraph * gpu_subgraph)79 absl::Status WinogradFromNode(const GpuInfo& gpu_info,
80 const std::vector<Value*>& inputs,
81 const std::vector<Value*>& outputs,
82 const OperationDef& op_def, ModelHints hints,
83 const BHWC& input_shape, const BHWC& output_shape,
84 const Convolution2DAttributes& attr,
85 GPUOperationsSubgraph* gpu_subgraph) {
86 if (!IsSuitableForWinograd4x4To6x6(attr)) {
87 return absl::UnimplementedError("No implementation for this case.");
88 }
89 if (!IsRecommendedForWinograd4x4To6x6(attr, gpu_info, output_shape)) {
90 return absl::UnimplementedError("Not recommended for this case.");
91 }
92
93 const int tiles_x = DivideRoundUp(output_shape.w, 4);
94 const int tiles_y = DivideRoundUp(output_shape.h, 4);
95 const BHWC shape_0{input_shape.b, 36, tiles_x * tiles_y, input_shape.c};
96 const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c};
97 TensorDescriptor td_0;
98 td_0.storage_type = SelectBestStorageType(
99 gpu_info, shape_0, op_def.src_tensors[0].storage_type,
100 op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout);
101 td_0.data_type = op_def.src_tensors[0].data_type;
102 td_0.layout = op_def.src_tensors[0].layout;
103 TensorDescriptor td_1;
104 td_1.storage_type = SelectBestStorageType(
105 gpu_info, shape_1, op_def.src_tensors[0].storage_type,
106 op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout);
107 td_1.data_type = op_def.src_tensors[0].data_type;
108 td_1.layout = op_def.src_tensors[0].layout;
109 gpu_subgraph->new_tensors = {{shape_0, td_0}, {shape_1, td_1}};
110 gpu_subgraph->operations.clear();
111 gpu_subgraph->operations.resize(3);
112
113 OperationDef winograd_up_def;
114 winograd_up_def.precision = op_def.precision;
115 winograd_up_def.src_tensors.push_back(op_def.src_tensors[0]);
116 winograd_up_def.dst_tensors.push_back(td_0);
117 auto& winograd_up = gpu_subgraph->operations[0];
118 winograd_up.operation =
119 SelectWinograd4x4To36(gpu_info, attr.padding, winograd_up_def);
120 winograd_up.input_ids = {static_cast<int>(inputs[0]->id)};
121 winograd_up.output_ids = {-1};
122
123 OperationDef conv_def;
124 conv_def.precision = op_def.precision;
125 conv_def.src_tensors.push_back(td_0);
126 conv_def.dst_tensors.push_back(td_1);
127 auto& conv = gpu_subgraph->operations[1];
128 conv.input_ids = {-1};
129 conv.output_ids = {-2};
130 conv.operation = SelectConvolutionForWinograd(attr, input_shape, gpu_info,
131 conv_def, hints);
132
133 OperationDef winograd_down_def;
134 winograd_down_def.precision = op_def.precision;
135 winograd_down_def.src_tensors.push_back(td_1);
136 winograd_down_def.dst_tensors.push_back(op_def.dst_tensors[0]);
137 auto& winograd_down = gpu_subgraph->operations[2];
138 winograd_down.input_ids = {-2};
139 winograd_down.output_ids = {static_cast<int>(outputs[0]->id)};
140 auto bias_copy = attr.bias;
141 if (bias_copy.shape.v < attr.weights.shape.o) {
142 bias_copy.shape = Linear(attr.weights.shape.o);
143 bias_copy.data.resize(attr.weights.shape.o);
144 }
145 winograd_down.operation =
146 SelectWinograd36To4x4(gpu_info, winograd_down_def, bias_copy);
147 return absl::OkStatus();
148 }
149
150 } // namespace
151
GPUOperationFromNode(const GpuInfo & gpu_info,const OperationDef & op_def,ModelHints hints,const std::vector<Value * > & inputs,const std::vector<Value * > & outputs,const Node & node,GPUOperationsSubgraph * gpu_subgraph)152 absl::Status GPUOperationFromNode(const GpuInfo& gpu_info,
153 const OperationDef& op_def, ModelHints hints,
154 const std::vector<Value*>& inputs,
155 const std::vector<Value*>& outputs,
156 const Node& node,
157 GPUOperationsSubgraph* gpu_subgraph) {
158 std::unique_ptr<GPUOperation>* gpu_op =
159 InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
160 auto op_type = OperationTypeFromString(node.operation.type);
161 switch (op_type) {
162 case OperationType::ADD: {
163 if (inputs.size() == 2 &&
164 (inputs[0]->tensor.shape.c == inputs[1]->tensor.shape.c ||
165 inputs[1]->tensor.shape.c == 1)) {
166 GPUOperation operation =
167 CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
168 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
169 return absl::OkStatus();
170 } else if (inputs.size() >= 2) {
171 auto output = outputs[0];
172 std::vector<int> channels(inputs.size());
173 for (int i = 0; i < inputs.size(); ++i) {
174 channels[i] = inputs[i]->tensor.shape.c;
175 }
176 SelectAdd(op_def, channels, output->tensor.shape.c, gpu_op);
177 return absl::OkStatus();
178 } else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
179 auto attr =
180 absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
181 GPUOperation operation =
182 CreateElementwise(gpu_info, op_def, op_type, attr);
183 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
184 return absl::OkStatus();
185 }
186 return absl::UnimplementedError(absl::StrCat(
187 "No support of ", node.operation.type, " with this parameters"));
188 }
189 case OperationType::BATCHED_MATMUL: {
190 // Currently only batch = 1 is supported.
191 // Matmul replaced with this sequence:
192 // 1) Transpose second tensor(weights). (1x1xHxW)->(Wx1x1xH)
193 // 2) Convert second tensor(weights) from 1) to Convolution weights
194 // 3) Run usual convolution
195 auto second_shape = inputs[1]->tensor.shape;
196 auto dst_shape = outputs[0]->tensor.shape;
197 if (dst_shape.b != 1) {
198 return absl::UnimplementedError(
199 "Currently only batch = 1 supported for BATCHED_MATMUL.");
200 }
201 BHWC weights_shape(second_shape.c, 1, 1, second_shape.w);
202 Convolution2DAttributes attr;
203 attr.strides = HW(1, 1);
204 attr.dilations = HW(1, 1);
205 attr.padding.appended = HW(0, 0);
206 attr.padding.prepended = HW(0, 0);
207 attr.bias.shape = Linear(weights_shape.b);
208 attr.bias.data.resize(weights_shape.b, 0.0f);
209
210 TensorDescriptor transposed_desc = {op_def.src_tensors[1].data_type,
211 op_def.src_tensors[1].storage_type,
212 Layout::BHWC};
213 transposed_desc.storage_type = SelectBestStorageType(
214 gpu_info, weights_shape, transposed_desc.storage_type,
215 transposed_desc.data_type, transposed_desc.layout);
216 TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type,
217 TensorStorageType::BUFFER, Layout::BHWC};
218 gpu_subgraph->operations.clear();
219 gpu_subgraph->operations.resize(3);
220 auto& transpose_op = gpu_subgraph->operations[0];
221 auto& converter_op = gpu_subgraph->operations[1];
222 auto& conv_op = gpu_subgraph->operations[2];
223 conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
224 conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
225 OperationDef conv_def = op_def;
226 conv_def.src_tensors[1] = weights_desc;
227 WeightsDescription conv_weights_desc;
228 conv_op.operation = SelectConvolutionWithDynamicWeights(
229 attr, weights_shape, dst_shape, gpu_info, conv_def, hints,
230 &conv_weights_desc);
231
232 int aligned_output =
233 AlignByN(weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4);
234 int aligned_input = AlignByN(weights_shape.c, 4);
235 gpu_subgraph->new_tensors = {{BHWC(1, 1, 1,
236 aligned_output * aligned_input *
237 weights_shape.h * weights_shape.w),
238 weights_desc},
239 {weights_shape, transposed_desc}};
240 OperationDef converter_def;
241 converter_def.precision = op_def.precision;
242 converter_def.src_tensors.push_back(transposed_desc);
243 converter_def.dst_tensors.push_back(weights_desc);
244
245 converter_op.input_ids = {-2};
246 converter_op.output_ids = {-1};
247 converter_op.operation =
248 SelectConverterToConvWeights(conv_weights_desc, converter_def, hints);
249
250 OperationDef transpose_def;
251 transpose_def.precision = op_def.precision;
252 transpose_def.src_tensors.push_back(op_def.src_tensors[1]);
253 transpose_def.dst_tensors.push_back(transposed_desc);
254
255 transpose_op.input_ids = {static_cast<int>(inputs[1]->id)};
256 transpose_op.output_ids = {-2};
257 TransposeAttributes transpose_attr;
258 transpose_attr.perm = BHWC(3, 0, 1, 2);
259 transpose_op.operation = absl::make_unique<GPUOperation>(
260 CreateTranspose(transpose_def, transpose_attr));
261 return absl::OkStatus();
262 }
263 case OperationType::CONCAT: {
264 auto attr = absl::any_cast<ConcatAttributes>(node.operation.attributes);
265 std::vector<int> channels(inputs.size());
266 for (int i = 0; i < inputs.size(); ++i) {
267 channels[i] = inputs[i]->tensor.shape.c;
268 }
269 return SelectConcat(attr, channels, op_def, gpu_info, gpu_op);
270 }
271 case OperationType::CONVOLUTION_2D: {
272 auto attr =
273 absl::any_cast<Convolution2DAttributes>(node.operation.attributes);
274 auto input_shape = inputs[0]->tensor.shape;
275 auto output_shape = outputs[0]->tensor.shape;
276 if (inputs.size() == 1) {
277 if (WinogradFromNode(gpu_info, inputs, outputs, op_def, hints,
278 input_shape, output_shape, attr, gpu_subgraph)
279 .ok()) {
280 return absl::OkStatus();
281 } else {
282 gpu_op = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
283 *gpu_op =
284 SelectConvolution(attr, output_shape, gpu_info, op_def, hints);
285 return absl::OkStatus();
286 }
287 } else {
288 auto weights_shape = inputs[1]->tensor.shape;
289 if (attr.bias.data.empty()) {
290 attr.bias.shape = Linear(weights_shape.b);
291 attr.bias.data.resize(weights_shape.b, 0.0f);
292 }
293 TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type,
294 TensorStorageType::BUFFER,
295 Layout::BHWC};
296 gpu_subgraph->operations.clear();
297 gpu_subgraph->operations.resize(2);
298 auto& converter_op = gpu_subgraph->operations[0];
299 auto& conv_op = gpu_subgraph->operations[1];
300 conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
301 conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
302 OperationDef conv_def = op_def;
303 conv_def.src_tensors[1] = weights_desc;
304 WeightsDescription conv_weights_desc;
305 conv_op.operation = SelectConvolutionWithDynamicWeights(
306 attr, weights_shape, output_shape, gpu_info, conv_def, hints,
307 &conv_weights_desc);
308
309 int aligned_output = AlignByN(
310 weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4);
311 int aligned_input = AlignByN(weights_shape.c, 4);
312 gpu_subgraph->new_tensors = {
313 {BHWC(1, 1, 1,
314 aligned_output * aligned_input * weights_shape.h *
315 weights_shape.w),
316 weights_desc}};
317 OperationDef converter_def;
318 converter_def.precision = op_def.precision;
319 converter_def.src_tensors.push_back(op_def.src_tensors[1]);
320 converter_def.dst_tensors.push_back(weights_desc);
321
322 converter_op.input_ids = {static_cast<int>(inputs[1]->id)};
323 converter_op.output_ids = {-1};
324 converter_op.operation = SelectConverterToConvWeights(
325 conv_weights_desc, converter_def, hints);
326 return absl::OkStatus();
327 }
328 }
329 case OperationType::CONVOLUTION_TRANSPOSED: {
330 auto attr = absl::any_cast<ConvolutionTransposedAttributes>(
331 node.operation.attributes);
332 if (inputs.size() == 1) {
333 *gpu_op = SelectConvolutionTransposed(attr, gpu_info, op_def);
334 return absl::OkStatus();
335 } else {
336 // CONVOLUTION_TRANSPOSED with runtime weights
337 OHWI weights_shape =
338 OHWI(inputs[1]->tensor.shape.b, inputs[1]->tensor.shape.h,
339 inputs[1]->tensor.shape.w, inputs[1]->tensor.shape.c);
340 if (attr.bias.data.empty()) {
341 attr.bias.shape = Linear(weights_shape.o);
342 attr.bias.data.resize(weights_shape.o, 0.0f);
343 }
344 gpu_subgraph->operations.clear();
345 gpu_subgraph->operations.resize(2);
346 auto& converter_op = gpu_subgraph->operations[0];
347 auto& conv_op = gpu_subgraph->operations[1];
348 WeightsDescription weights_desc;
349 conv_op.operation = SelectConvolutionTransposedWithDynamicWeights(
350 attr, gpu_info, op_def, &weights_desc);
351 conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
352
353 const int dst_depth = AlignByN(DivideRoundUp(weights_shape.o, 4),
354 weights_desc.GetOutputGroupSize());
355 const int src_depth = DivideRoundUp(weights_shape.i, 4);
356 const int kernel_x = weights_shape.w;
357 const int kernel_y = weights_shape.h;
358 if (weights_desc.layout ==
359 WeightsLayout::k2DX4I4YIsHWIAndXIsOOGroupO4 ||
360 weights_desc.layout ==
361 WeightsLayout::k2DX4O4YIsHWIAndXIsOOGroupI4) {
362 // weights are 4x textures 2d
363 conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1, -2, -3, -4};
364 int texture_width = dst_depth;
365 int texture_height = src_depth * kernel_x * kernel_y;
366 for (int i = 0; i < 4; ++i) {
367 gpu_subgraph->new_tensors.push_back(
368 {BHWC(1, texture_height, texture_width, 4),
369 TensorDescriptor(op_def.GetDataType(),
370 TensorStorageType::TEXTURE_2D, Layout::HWC)});
371 }
372 } else {
373 // weights is single buffer
374 conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
375 gpu_subgraph->new_tensors = {
376 {BHWC(
377 1, 1, 1,
378 GetTotalElementsCountForLayout(weights_desc, weights_shape)),
379 TensorDescriptor(op_def.GetDataType(), TensorStorageType::BUFFER,
380 Layout::HWC)}};
381 }
382 OperationDef conv_def = conv_op.operation->GetDefinition();
383 OperationDef converter_def;
384 converter_def.precision = op_def.precision;
385 converter_def.src_tensors.push_back(op_def.src_tensors[1]);
386 for (int i = 1; i < conv_def.src_tensors.size(); ++i) {
387 converter_def.dst_tensors.push_back(conv_def.src_tensors[i]);
388 converter_op.output_ids.push_back(-i);
389 }
390
391 converter_op.input_ids = {static_cast<int>(inputs[1]->id)};
392 converter_op.operation =
393 SelectConverterToConvWeights(weights_desc, converter_def, hints);
394 return absl::OkStatus();
395 }
396 }
397 case OperationType::DEPTHWISE_CONVOLUTION: {
398 auto attr = absl::any_cast<DepthwiseConvolution2DAttributes>(
399 node.operation.attributes);
400 if (inputs.size() == 1) {
401 *gpu_op = SelectDWConvolution(attr, gpu_info, op_def);
402 } else {
403 if (inputs[1]->tensor.shape.b != 1) {
404 return absl::UnimplementedError(
405 "No support of depthwise runtime weights with channel multiplier "
406 "!= 1");
407 }
408 *gpu_op = SelectDWConvolutionDynamicWeights(attr, gpu_info, op_def);
409 }
410 return absl::OkStatus();
411 }
412 case OperationType::FULLY_CONNECTED: {
413 auto attr =
414 absl::any_cast<FullyConnectedAttributes>(node.operation.attributes);
415 *gpu_op = SelectFullyConnected(attr, gpu_info, op_def,
416 inputs[0]->tensor.shape.b);
417 return absl::OkStatus();
418 }
419 case OperationType::LSTM: {
420 *gpu_op = SelectLSTM(op_def, gpu_info);
421 return absl::OkStatus();
422 }
423 case OperationType::MAX_UNPOOLING_2D: {
424 auto attr =
425 absl::any_cast<MaxUnpooling2DAttributes>(node.operation.attributes);
426 *gpu_op = SelectMaxUnpooling(attr, op_def);
427 return absl::OkStatus();
428 }
429 case OperationType::MEAN: {
430 auto attr = absl::any_cast<MeanAttributes>(node.operation.attributes);
431 *gpu_op = SelectReduce(attr.dims, inputs[0]->tensor.shape, op_type,
432 op_def, gpu_info);
433 return absl::OkStatus();
434 }
435 case OperationType::MEAN_STDDEV_NORMALIZATION: {
436 MeanStdDevNormalization operation = CreateMeanStdDevNormalization(
437 op_def, gpu_info, (inputs[0]->tensor.shape.c + 3) / 4);
438 *gpu_op =
439 absl::make_unique<MeanStdDevNormalization>(std::move(operation));
440 return absl::OkStatus();
441 }
442 case OperationType::PAD: {
443 auto attr = absl::any_cast<PadAttributes>(node.operation.attributes);
444 SelectPadding(attr, op_def, gpu_op);
445 return absl::OkStatus();
446 }
447 case OperationType::POOLING_2D: {
448 auto attr =
449 absl::any_cast<Pooling2DAttributes>(node.operation.attributes);
450 *gpu_op = SelectPooling(attr, op_def);
451 return absl::OkStatus();
452 }
453 case OperationType::PRELU: {
454 auto attr = absl::any_cast<PReLUAttributes>(node.operation.attributes);
455 *gpu_op = SelectPReLU(attr, gpu_info, op_def);
456 return absl::OkStatus();
457 }
458 case OperationType::QUANTIZE_AND_DEQUANTIZE: {
459 auto attr = absl::any_cast<QuantizeAndDequantizeAttributes>(
460 node.operation.attributes);
461 *gpu_op = SelectQuantizeAndDequantize(attr, op_def);
462 return absl::OkStatus();
463 }
464 case OperationType::RELU: {
465 auto attr = absl::any_cast<ReLUAttributes>(node.operation.attributes);
466 *gpu_op = SelectReLU(attr, op_def);
467 return absl::OkStatus();
468 }
469 case OperationType::RESHAPE: {
470 const int src_channels = inputs[0]->tensor.shape.c;
471 auto attr = absl::any_cast<ReshapeAttributes>(node.operation.attributes);
472 SelectReshape(src_channels, attr.new_shape.c, op_def, gpu_op);
473 return absl::OkStatus();
474 }
475 case OperationType::RESIZE: {
476 auto attr = absl::any_cast<Resize2DAttributes>(node.operation.attributes);
477 return SelectResize(attr, op_def, gpu_op);
478 }
479 case OperationType::SLICE: {
480 auto attr = absl::any_cast<SliceAttributes>(node.operation.attributes);
481 SelectStridedSlice(attr, op_def, gpu_op);
482 return absl::OkStatus();
483 }
484 case OperationType::SOFTMAX: {
485 SelectSoftmax(inputs[0]->tensor.shape, op_def, gpu_op);
486 return absl::OkStatus();
487 }
488 case OperationType::SPACE_TO_DEPTH: {
489 auto attr =
490 absl::any_cast<SpaceToDepthAttributes>(node.operation.attributes);
491 SelectSpaceToDepth(attr, op_def, gpu_op);
492 return absl::OkStatus();
493 }
494 case OperationType::SPLIT: {
495 auto attr = absl::any_cast<SplitAttributes>(node.operation.attributes);
496 RETURN_IF_ERROR(SelectSplit(attr, op_def, gpu_op));
497 return absl::OkStatus();
498 }
499 case OperationType::TRANSPOSE: {
500 auto attr =
501 absl::any_cast<TransposeAttributes>(node.operation.attributes);
502 SelectTranspose(attr, op_def, gpu_op);
503 return absl::OkStatus();
504 }
505 case OperationType::ABS:
506 case OperationType::COPY:
507 case OperationType::COS:
508 case OperationType::ELU:
509 case OperationType::EXP:
510 case OperationType::HARD_SWISH:
511 case OperationType::LOG:
512 case OperationType::NEG:
513 case OperationType::RSQRT:
514 case OperationType::SIGMOID:
515 case OperationType::SIN:
516 case OperationType::SQRT:
517 case OperationType::SQUARE:
518 case OperationType::TANH: {
519 GPUOperation operation =
520 CreateElementwiseOneInput(gpu_info, op_def, op_type);
521 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
522 return absl::OkStatus();
523 }
524 case OperationType::DIV:
525 case OperationType::EQUAL:
526 case OperationType::GREATER:
527 case OperationType::GREATER_EQUAL:
528 case OperationType::LESS:
529 case OperationType::LESS_EQUAL:
530 case OperationType::MAXIMUM:
531 case OperationType::MINIMUM:
532 case OperationType::MUL:
533 case OperationType::NOT_EQUAL:
534 case OperationType::POW:
535 case OperationType::SQUARED_DIFF:
536 case OperationType::SUB: {
537 if (inputs.size() == 2) {
538 GPUOperation operation =
539 CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
540 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
541 return absl::OkStatus();
542 } else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
543 auto attr =
544 absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
545 GPUOperation operation =
546 CreateElementwise(gpu_info, op_def, op_type, attr);
547 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
548 return absl::OkStatus();
549 }
550 return absl::UnimplementedError(absl::StrCat(
551 "No support of ", node.operation.type, " with this parameters"));
552 }
553 case OperationType::REDUCE_MAXIMUM:
554 case OperationType::REDUCE_MINIMUM:
555 case OperationType::REDUCE_PRODUCT:
556 case OperationType::REDUCE_SUM: {
557 auto attr = absl::any_cast<ReduceAttributes>(node.operation.attributes);
558 *gpu_op = SelectReduce(attr.dims, inputs[0]->tensor.shape, op_type,
559 op_def, gpu_info);
560 return absl::OkStatus();
561 }
562 default:
563 return SelectDefault(gpu_info, op_def, hints, inputs, outputs, node,
564 gpu_subgraph);
565 }
566 }
567
568 } // namespace gpu
569 } // namespace tflite
570