• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/selectors/operation_selector.h"
17 
18 #include "absl/strings/str_cat.h"
19 #include "absl/types/any.h"
20 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
21 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
22 #include "tensorflow/lite/delegates/gpu/common/operations.h"
23 #include "tensorflow/lite/delegates/gpu/common/selectors/convolution_selector.h"
24 #include "tensorflow/lite/delegates/gpu/common/selectors/convolution_transposed_selector.h"
25 #include "tensorflow/lite/delegates/gpu/common/selectors/default_selector.h"
26 #include "tensorflow/lite/delegates/gpu/common/selectors/dw_convolution_selector.h"
27 #include "tensorflow/lite/delegates/gpu/common/selectors/fully_connected_selector.h"
28 #include "tensorflow/lite/delegates/gpu/common/selectors/simple_selectors.h"
29 #include "tensorflow/lite/delegates/gpu/common/shape.h"
30 #include "tensorflow/lite/delegates/gpu/common/status.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
33 #include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
34 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
35 #include "tensorflow/lite/delegates/gpu/common/tasks/mean_stddev_normalization.h"
36 #include "tensorflow/lite/delegates/gpu/common/tasks/transpose.h"
37 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
38 #include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
39 
40 namespace tflite {
41 namespace gpu {
42 namespace {
IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes & attr,const GpuInfo & gpu_info,const BHWC & dst_shape)43 bool IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes& attr,
44                                       const GpuInfo& gpu_info,
45                                       const BHWC& dst_shape) {
46   const int tiles_x = DivideRoundUp(dst_shape.w, 4);
47   const int tiles_y = DivideRoundUp(dst_shape.h, 4);
48   const int total_tiles = tiles_x * tiles_y;
49   const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
50   const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
51   int min_depth = 16;
52   if (gpu_info.IsAdreno() || gpu_info.IsAMD()) {
53     min_depth = 32;
54   }
55   int min_tiles = 32;
56   if (gpu_info.IsAdreno()) {
57     if (gpu_info.adreno_info.IsAdreno6xx()) {
58       min_tiles = 128;
59     } else {
60       min_tiles = 64;
61     }
62   }
63   if (gpu_info.IsAMD()) {
64     min_tiles = 64;
65   }
66   if (total_tiles >= min_tiles * 8) {
67     min_depth /= 4;
68     min_depth = std::max(min_depth, 8);
69   } else if (total_tiles >= min_tiles * 4) {
70     min_depth /= 2;
71     min_depth = std::max(min_depth, 8);
72   }
73   const bool recommended_channels =
74       src_depth >= min_depth && dst_depth >= min_depth;
75   const bool recommended_hw = total_tiles >= min_tiles;
76   return recommended_channels && recommended_hw;
77 }
78 
WinogradFromNode(const GpuInfo & gpu_info,const std::vector<Value * > & inputs,const std::vector<Value * > & outputs,const OperationDef & op_def,ModelHints hints,const BHWC & input_shape,const BHWC & output_shape,const Convolution2DAttributes & attr,GPUOperationsSubgraph * gpu_subgraph)79 absl::Status WinogradFromNode(const GpuInfo& gpu_info,
80                               const std::vector<Value*>& inputs,
81                               const std::vector<Value*>& outputs,
82                               const OperationDef& op_def, ModelHints hints,
83                               const BHWC& input_shape, const BHWC& output_shape,
84                               const Convolution2DAttributes& attr,
85                               GPUOperationsSubgraph* gpu_subgraph) {
86   if (!IsSuitableForWinograd4x4To6x6(attr)) {
87     return absl::UnimplementedError("No implementation for this case.");
88   }
89   if (!IsRecommendedForWinograd4x4To6x6(attr, gpu_info, output_shape)) {
90     return absl::UnimplementedError("Not recommended for this case.");
91   }
92 
93   const int tiles_x = DivideRoundUp(output_shape.w, 4);
94   const int tiles_y = DivideRoundUp(output_shape.h, 4);
95   const BHWC shape_0{input_shape.b, 36, tiles_x * tiles_y, input_shape.c};
96   const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c};
97   TensorDescriptor td_0;
98   RETURN_IF_ERROR(SelectBestStorageType(
99       gpu_info, shape_0, op_def.src_tensors[0].storage_type,
100       op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout,
101       &td_0.storage_type));
102   td_0.data_type = op_def.src_tensors[0].data_type;
103   td_0.layout = op_def.src_tensors[0].layout;
104   TensorDescriptor td_1;
105   RETURN_IF_ERROR(SelectBestStorageType(
106       gpu_info, shape_1, op_def.src_tensors[0].storage_type,
107       op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout,
108       &td_1.storage_type));
109   td_1.data_type = op_def.src_tensors[0].data_type;
110   td_1.layout = op_def.src_tensors[0].layout;
111   gpu_subgraph->new_tensors = {{shape_0, td_0}, {shape_1, td_1}};
112   gpu_subgraph->operations.clear();
113   gpu_subgraph->operations.resize(3);
114 
115   OperationDef winograd_up_def;
116   winograd_up_def.precision = op_def.precision;
117   winograd_up_def.src_tensors.push_back(op_def.src_tensors[0]);
118   winograd_up_def.dst_tensors.push_back(td_0);
119   auto& winograd_up = gpu_subgraph->operations[0];
120   winograd_up.operation =
121       SelectWinograd4x4To36(gpu_info, attr.padding, winograd_up_def);
122   winograd_up.input_ids = {static_cast<int>(inputs[0]->id)};
123   winograd_up.output_ids = {-1};
124 
125   OperationDef conv_def;
126   conv_def.precision = op_def.precision;
127   conv_def.src_tensors.push_back(td_0);
128   conv_def.dst_tensors.push_back(td_1);
129   auto& conv = gpu_subgraph->operations[1];
130   conv.input_ids = {-1};
131   conv.output_ids = {-2};
132   conv.operation = SelectConvolutionForWinograd(attr, input_shape, gpu_info,
133                                                 conv_def, hints);
134 
135   OperationDef winograd_down_def;
136   winograd_down_def.precision = op_def.precision;
137   winograd_down_def.src_tensors.push_back(td_1);
138   winograd_down_def.dst_tensors.push_back(op_def.dst_tensors[0]);
139   auto& winograd_down = gpu_subgraph->operations[2];
140   winograd_down.input_ids = {-2};
141   winograd_down.output_ids = {static_cast<int>(outputs[0]->id)};
142   auto bias_copy = attr.bias;
143   if (bias_copy.shape.v < attr.weights.shape.o) {
144     bias_copy.shape = Linear(attr.weights.shape.o);
145     bias_copy.data.resize(attr.weights.shape.o);
146   }
147   winograd_down.operation =
148       SelectWinograd36To4x4(gpu_info, winograd_down_def, bias_copy);
149   return absl::OkStatus();
150 }
151 
152 }  // namespace
153 
GPUOperationFromNode(const GpuInfo & gpu_info,const OperationDef & op_def,ModelHints hints,const std::vector<Value * > & inputs,const std::vector<Value * > & outputs,const Node & node,GPUOperationsSubgraph * gpu_subgraph)154 absl::Status GPUOperationFromNode(const GpuInfo& gpu_info,
155                                   const OperationDef& op_def, ModelHints hints,
156                                   const std::vector<Value*>& inputs,
157                                   const std::vector<Value*>& outputs,
158                                   const Node& node,
159                                   GPUOperationsSubgraph* gpu_subgraph) {
160   std::unique_ptr<GPUOperation>* gpu_op =
161       InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
162   auto op_type = OperationTypeFromString(node.operation.type);
163   switch (op_type) {
164     case OperationType::ADD: {
165       if (inputs.size() == 2 &&
166           (inputs[0]->tensor.shape.c == inputs[1]->tensor.shape.c ||
167            inputs[1]->tensor.shape.c == 1)) {
168         GPUOperation operation =
169             CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
170         *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
171         return absl::OkStatus();
172       } else if (inputs.size() >= 2) {
173         auto output = outputs[0];
174         std::vector<int> channels(inputs.size());
175         for (int i = 0; i < inputs.size(); ++i) {
176           channels[i] = inputs[i]->tensor.shape.c;
177         }
178         SelectAdd(op_def, channels, output->tensor.shape.c, gpu_op);
179         return absl::OkStatus();
180       } else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
181         auto attr =
182             absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
183         GPUOperation operation =
184             CreateElementwise(gpu_info, op_def, op_type, attr);
185         *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
186         return absl::OkStatus();
187       }
188       return absl::UnimplementedError(absl::StrCat(
189           "No support of ", node.operation.type, " with this parameters"));
190     }
191     case OperationType::BATCHED_MATMUL: {
192       // Currently only batch = 1 is supported.
193       // Matmul replaced with this sequence:
194       //   1) Transpose second tensor(weights). (1x1xHxW)->(Wx1x1xH)
195       //   2) Convert second tensor(weights) from 1) to Convolution weights
196       //   3) Run usual convolution
197       auto second_shape = inputs[1]->tensor.shape;
198       auto dst_shape = outputs[0]->tensor.shape;
199       if (dst_shape.b != 1) {
200         return absl::UnimplementedError(
201             "Currently only batch = 1 supported for BATCHED_MATMUL.");
202       }
203       BHWC weights_shape(second_shape.c, 1, 1, second_shape.w);
204       Convolution2DAttributes attr;
205       attr.strides = HW(1, 1);
206       attr.dilations = HW(1, 1);
207       attr.padding.appended = HW(0, 0);
208       attr.padding.prepended = HW(0, 0);
209       attr.bias.shape = Linear(weights_shape.b);
210       attr.bias.data.resize(weights_shape.b, 0.0f);
211 
212       TensorDescriptor transposed_desc = {op_def.src_tensors[1].data_type,
213                                           op_def.src_tensors[1].storage_type,
214                                           Layout::BHWC};
215       RETURN_IF_ERROR(SelectBestStorageType(
216           gpu_info, weights_shape, transposed_desc.storage_type,
217           transposed_desc.data_type, transposed_desc.layout,
218           &transposed_desc.storage_type));
219       TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type,
220                                        TensorStorageType::BUFFER, Layout::BHWC};
221       gpu_subgraph->operations.clear();
222       gpu_subgraph->operations.resize(3);
223       auto& transpose_op = gpu_subgraph->operations[0];
224       auto& converter_op = gpu_subgraph->operations[1];
225       auto& conv_op = gpu_subgraph->operations[2];
226       conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
227       conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
228       OperationDef conv_def = op_def;
229       conv_def.src_tensors[1] = weights_desc;
230       WeightsDescription conv_weights_desc;
231       conv_op.operation = SelectConvolutionWithDynamicWeights(
232           attr, weights_shape, dst_shape, gpu_info, conv_def, hints,
233           &conv_weights_desc);
234 
235       int aligned_output =
236           AlignByN(weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4);
237       int aligned_input = AlignByN(weights_shape.c, 4);
238       gpu_subgraph->new_tensors = {{BHWC(1, 1, 1,
239                                          aligned_output * aligned_input *
240                                              weights_shape.h * weights_shape.w),
241                                     weights_desc},
242                                    {weights_shape, transposed_desc}};
243       OperationDef converter_def;
244       converter_def.precision = op_def.precision;
245       converter_def.src_tensors.push_back(transposed_desc);
246       converter_def.dst_tensors.push_back(weights_desc);
247 
248       converter_op.input_ids = {-2};
249       converter_op.output_ids = {-1};
250       converter_op.operation =
251           SelectConverterToConvWeights(conv_weights_desc, converter_def, hints);
252 
253       OperationDef transpose_def;
254       transpose_def.precision = op_def.precision;
255       transpose_def.src_tensors.push_back(op_def.src_tensors[1]);
256       transpose_def.dst_tensors.push_back(transposed_desc);
257 
258       transpose_op.input_ids = {static_cast<int>(inputs[1]->id)};
259       transpose_op.output_ids = {-2};
260       TransposeAttributes transpose_attr;
261       transpose_attr.perm = BHWC(3, 0, 1, 2);
262       transpose_op.operation = absl::make_unique<GPUOperation>(
263           CreateTranspose(transpose_def, transpose_attr));
264       return absl::OkStatus();
265     }
266     case OperationType::CONCAT: {
267       auto attr = absl::any_cast<ConcatAttributes>(node.operation.attributes);
268       const int max_inputs = gpu_info.GetMaxImageArguments() - 8;
269       if (inputs.size() >= max_inputs) {
270         int groups = DivideRoundUp(inputs.size(), max_inputs);
271         gpu_subgraph->operations.clear();
272         gpu_subgraph->operations.resize(groups);
273         BHWC concatenated_shape = inputs[0]->tensor.shape;
274         concatenated_shape.set(attr.axis, 0);
275         for (int g = 0; g < groups; ++g) {
276           std::vector<int> channels;
277           auto& concat_op = gpu_subgraph->operations[g];
278           OperationDef new_def;
279           new_def.precision = op_def.precision;
280           if (g != 0) {
281             // concatenated tensor from previos concats
282             new_def.src_tensors.push_back(op_def.dst_tensors[0]);
283             concat_op.input_ids = {-g};
284             channels.push_back(concatenated_shape.c);
285           }
286           for (int i = 0; i < max_inputs; ++i) {
287             int src_index = g * max_inputs + i;
288             if (src_index >= op_def.src_tensors.size()) {
289               break;
290             }
291             new_def.src_tensors.push_back(op_def.src_tensors[src_index]);
292             concat_op.input_ids.push_back(inputs[src_index]->id);
293             channels.push_back(inputs[src_index]->tensor.shape.c);
294             int current_size = concatenated_shape.get(attr.axis);
295             concatenated_shape.set(
296                 attr.axis,
297                 current_size + inputs[src_index]->tensor.shape.get(attr.axis));
298           }
299           new_def.dst_tensors.push_back(op_def.dst_tensors[0]);
300           if (g == groups - 1) {
301             // last concat
302             concat_op.output_ids = {static_cast<int>(outputs[0]->id)};
303           } else {
304             // intermediate concat, create new tensor for it
305             concat_op.output_ids = {-(g + 1)};
306             gpu_subgraph->new_tensors.push_back(
307                 {concatenated_shape, op_def.dst_tensors[0]});
308           }
309           RETURN_IF_ERROR(SelectConcat(attr, channels, new_def, gpu_info,
310                                        &concat_op.operation));
311         }
312         return absl::OkStatus();
313       } else {
314         std::vector<int> channels(inputs.size());
315         for (int i = 0; i < inputs.size(); ++i) {
316           channels[i] = inputs[i]->tensor.shape.c;
317         }
318         return SelectConcat(attr, channels, op_def, gpu_info, gpu_op);
319       }
320     }
321     case OperationType::CONVOLUTION_2D: {
322       auto attr =
323           absl::any_cast<Convolution2DAttributes>(node.operation.attributes);
324       auto input_shape = inputs[0]->tensor.shape;
325       auto output_shape = outputs[0]->tensor.shape;
326       if (inputs.size() == 1) {
327         if (!hints.Check(ModelHints::kNoWinogradOptimizations) &&
328             WinogradFromNode(gpu_info, inputs, outputs, op_def, hints,
329                              input_shape, output_shape, attr, gpu_subgraph)
330                 .ok()) {
331           return absl::OkStatus();
332         } else {
333           gpu_op = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
334           *gpu_op =
335               SelectConvolution(attr, output_shape, gpu_info, op_def, hints);
336           return absl::OkStatus();
337         }
338       } else {
339         auto weights_shape = inputs[1]->tensor.shape;
340         if (attr.bias.data.empty()) {
341           attr.bias.shape = Linear(weights_shape.b);
342           attr.bias.data.resize(weights_shape.b, 0.0f);
343         }
344         TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type,
345                                          TensorStorageType::BUFFER,
346                                          Layout::BHWC};
347         gpu_subgraph->operations.clear();
348         gpu_subgraph->operations.resize(2);
349         auto& converter_op = gpu_subgraph->operations[0];
350         auto& conv_op = gpu_subgraph->operations[1];
351         conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
352         conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
353         OperationDef conv_def = op_def;
354         conv_def.src_tensors[1] = weights_desc;
355         WeightsDescription conv_weights_desc;
356         conv_op.operation = SelectConvolutionWithDynamicWeights(
357             attr, weights_shape, output_shape, gpu_info, conv_def, hints,
358             &conv_weights_desc);
359 
360         int aligned_output = AlignByN(
361             weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4);
362         int aligned_input = AlignByN(weights_shape.c, 4);
363         gpu_subgraph->new_tensors = {
364             {BHWC(1, 1, 1,
365                   aligned_output * aligned_input * weights_shape.h *
366                       weights_shape.w),
367              weights_desc}};
368         OperationDef converter_def;
369         converter_def.precision = op_def.precision;
370         converter_def.src_tensors.push_back(op_def.src_tensors[1]);
371         converter_def.dst_tensors.push_back(weights_desc);
372 
373         converter_op.input_ids = {static_cast<int>(inputs[1]->id)};
374         converter_op.output_ids = {-1};
375         converter_op.operation = SelectConverterToConvWeights(
376             conv_weights_desc, converter_def, hints);
377         return absl::OkStatus();
378       }
379     }
380     case OperationType::CONVOLUTION_TRANSPOSED: {
381       auto attr = absl::any_cast<ConvolutionTransposedAttributes>(
382           node.operation.attributes);
383       if (inputs.size() == 1) {
384         *gpu_op = SelectConvolutionTransposed(attr, gpu_info, op_def);
385         return absl::OkStatus();
386       } else {
387         // CONVOLUTION_TRANSPOSED with runtime weights
388         OHWI weights_shape =
389             OHWI(inputs[1]->tensor.shape.b, inputs[1]->tensor.shape.h,
390                  inputs[1]->tensor.shape.w, inputs[1]->tensor.shape.c);
391         if (attr.bias.data.empty()) {
392           attr.bias.shape = Linear(weights_shape.o);
393           attr.bias.data.resize(weights_shape.o, 0.0f);
394         }
395         gpu_subgraph->operations.clear();
396         gpu_subgraph->operations.resize(2);
397         auto& converter_op = gpu_subgraph->operations[0];
398         auto& conv_op = gpu_subgraph->operations[1];
399         WeightsDescription weights_desc;
400         conv_op.operation = SelectConvolutionTransposedWithDynamicWeights(
401             attr, gpu_info, op_def, &weights_desc);
402         conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
403 
404         const int dst_depth = AlignByN(DivideRoundUp(weights_shape.o, 4),
405                                        weights_desc.GetOutputGroupSize());
406         const int src_depth = DivideRoundUp(weights_shape.i, 4);
407         const int kernel_x = weights_shape.w;
408         const int kernel_y = weights_shape.h;
409         if (weights_desc.layout ==
410                 WeightsLayout::k2DX4I4YIsSpatialIAndXIsOOGroupO4 ||
411             weights_desc.layout ==
412                 WeightsLayout::k2DX4O4YIsSpatialIAndXIsOOGroupI4) {
413           // weights are 4x textures 2d
414           conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1, -2, -3, -4};
415           int texture_width = dst_depth;
416           int texture_height = src_depth * kernel_x * kernel_y;
417           for (int i = 0; i < 4; ++i) {
418             gpu_subgraph->new_tensors.push_back(
419                 {BHWC(1, texture_height, texture_width, 4),
420                  TensorDescriptor(op_def.GetDataType(),
421                                   TensorStorageType::TEXTURE_2D, Layout::HWC)});
422           }
423         } else {
424           // weights is single buffer
425           conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
426           gpu_subgraph->new_tensors = {
427               {BHWC(
428                    1, 1, 1,
429                    GetTotalElementsCountForLayout(weights_desc, weights_shape)),
430                TensorDescriptor(op_def.GetDataType(), TensorStorageType::BUFFER,
431                                 Layout::HWC)}};
432         }
433         OperationDef conv_def = conv_op.operation->GetDefinition();
434         OperationDef converter_def;
435         converter_def.precision = op_def.precision;
436         converter_def.src_tensors.push_back(op_def.src_tensors[1]);
437         for (int i = 1; i < conv_def.src_tensors.size(); ++i) {
438           converter_def.dst_tensors.push_back(conv_def.src_tensors[i]);
439           converter_op.output_ids.push_back(-i);
440         }
441 
442         converter_op.input_ids = {static_cast<int>(inputs[1]->id)};
443         converter_op.operation =
444             SelectConverterToConvWeights(weights_desc, converter_def, hints);
445         return absl::OkStatus();
446       }
447     }
448     case OperationType::DEPTHWISE_CONVOLUTION: {
449       auto attr = absl::any_cast<DepthwiseConvolution2DAttributes>(
450           node.operation.attributes);
451       if (inputs.size() == 1) {
452         *gpu_op = SelectDWConvolution(attr, gpu_info, op_def);
453       } else {
454         if (inputs[1]->tensor.shape.b != 1) {
455           return absl::UnimplementedError(
456               "No support of depthwise runtime weights with channel multiplier "
457               "!= 1");
458         }
459         *gpu_op = SelectDWConvolutionDynamicWeights(attr, gpu_info, op_def);
460       }
461       return absl::OkStatus();
462     }
463     case OperationType::DEPTH_TO_SPACE: {
464       auto attr =
465           absl::any_cast<SpaceToDepthAttributes>(node.operation.attributes);
466       SelectDepthToSpace(attr, op_def, gpu_op);
467       return absl::OkStatus();
468     }
469     case OperationType::FULLY_CONNECTED: {
470       auto attr =
471           absl::any_cast<FullyConnectedAttributes>(node.operation.attributes);
472       *gpu_op = SelectFullyConnected(attr, gpu_info, op_def,
473                                      inputs[0]->tensor.shape.b);
474       return absl::OkStatus();
475     }
476     case OperationType::FULLY_CONNECTED_INT8: {
477       auto attr = absl::any_cast<FullyConnectedInt8Attributes>(
478           node.operation.attributes);
479       *gpu_op = SelectFullyConnected(attr, gpu_info, op_def);
480       return absl::OkStatus();
481     }
482     case OperationType::GATHER: {
483       auto attr = absl::any_cast<GatherAttributes>(node.operation.attributes);
484       RETURN_IF_ERROR(SelectGather(attr, op_def, gpu_op));
485       return absl::OkStatus();
486     }
487     case OperationType::LSTM: {
488       *gpu_op = SelectLSTM(op_def, gpu_info);
489       return absl::OkStatus();
490     }
491     case OperationType::MAX_UNPOOLING_2D: {
492       auto attr =
493           absl::any_cast<MaxUnpooling2DAttributes>(node.operation.attributes);
494       *gpu_op = SelectMaxUnpooling(attr, op_def);
495       return absl::OkStatus();
496     }
497     case OperationType::MEAN: {
498       auto attr = absl::any_cast<MeanAttributes>(node.operation.attributes);
499       *gpu_op = SelectReduce(attr.dims, inputs[0]->tensor.shape, op_type,
500                              op_def, gpu_info);
501       return absl::OkStatus();
502     }
503     case OperationType::MEAN_STDDEV_NORMALIZATION: {
504       MeanStdDevNormalization operation = CreateMeanStdDevNormalization(
505           op_def, gpu_info, (inputs[0]->tensor.shape.c + 3) / 4);
506       *gpu_op =
507           absl::make_unique<MeanStdDevNormalization>(std::move(operation));
508       return absl::OkStatus();
509     }
510     case OperationType::PAD: {
511       auto attr = absl::any_cast<PadAttributes>(node.operation.attributes);
512       SelectPadding(attr, op_def, gpu_op);
513       return absl::OkStatus();
514     }
515     case OperationType::POOLING_2D: {
516       auto attr =
517           absl::any_cast<Pooling2DAttributes>(node.operation.attributes);
518       *gpu_op = SelectPooling(attr, op_def);
519       return absl::OkStatus();
520     }
521     case OperationType::PRELU: {
522       auto attr = absl::any_cast<PReLUAttributes>(node.operation.attributes);
523       *gpu_op = SelectPReLU(attr, gpu_info, op_def);
524       return absl::OkStatus();
525     }
526     case OperationType::QUANTIZE_AND_DEQUANTIZE: {
527       auto attr = absl::any_cast<QuantizeAndDequantizeAttributes>(
528           node.operation.attributes);
529       *gpu_op = SelectQuantizeAndDequantize(attr, op_def);
530       return absl::OkStatus();
531     }
532     case OperationType::RELU: {
533       auto attr = absl::any_cast<ReLUAttributes>(node.operation.attributes);
534       *gpu_op = SelectReLU(attr, op_def);
535       return absl::OkStatus();
536     }
537     case OperationType::RESAMPLER: {
538       *gpu_op = SelectResampler(op_def);
539       return absl::OkStatus();
540     }
541     case OperationType::RESHAPE: {
542       const int src_channels = inputs[0]->tensor.shape.c;
543       auto attr = absl::any_cast<ReshapeAttributes>(node.operation.attributes);
544       SelectReshape(src_channels, attr.new_shape.c, op_def, gpu_op);
545       return absl::OkStatus();
546     }
547     case OperationType::RESIZE: {
548       auto attr = absl::any_cast<Resize2DAttributes>(node.operation.attributes);
549       return SelectResize(attr, op_def, gpu_op);
550     }
551     case OperationType::SLICE: {
552       auto attr = absl::any_cast<SliceAttributes>(node.operation.attributes);
553       SelectStridedSlice(attr, op_def, gpu_op);
554       return absl::OkStatus();
555     }
556     case OperationType::SOFTMAX: {
557       SelectSoftmax(inputs[0]->tensor.shape, op_def, gpu_op);
558       return absl::OkStatus();
559     }
560     case OperationType::SPACE_TO_DEPTH: {
561       auto attr =
562           absl::any_cast<SpaceToDepthAttributes>(node.operation.attributes);
563       SelectSpaceToDepth(attr, op_def, gpu_op);
564       return absl::OkStatus();
565     }
566     case OperationType::SPLIT: {
567       auto attr = absl::any_cast<SplitAttributes>(node.operation.attributes);
568       SelectSplit(attr, op_def, gpu_op);
569       return absl::OkStatus();
570     }
571     case OperationType::TILE: {
572       *gpu_op = SelectTile(op_def, inputs[0]->tensor.shape);
573       return absl::OkStatus();
574     }
575     case OperationType::TRANSPOSE: {
576       auto attr =
577           absl::any_cast<TransposeAttributes>(node.operation.attributes);
578       SelectTranspose(attr, op_def, gpu_op);
579       return absl::OkStatus();
580     }
581     case OperationType::ABS:
582     case OperationType::COPY:
583     case OperationType::COS:
584     case OperationType::ELU:
585     case OperationType::EXP:
586     case OperationType::HARD_SWISH:
587     case OperationType::LOG:
588     case OperationType::NEG:
589     case OperationType::RSQRT:
590     case OperationType::SIGMOID:
591     case OperationType::SIN:
592     case OperationType::SQRT:
593     case OperationType::SQUARE:
594     case OperationType::TANH: {
595       GPUOperation operation =
596           CreateElementwiseOneInput(gpu_info, op_def, op_type);
597       *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
598       return absl::OkStatus();
599     }
600     case OperationType::DIV:
601     case OperationType::EQUAL:
602     case OperationType::GREATER:
603     case OperationType::GREATER_EQUAL:
604     case OperationType::LESS:
605     case OperationType::LESS_EQUAL:
606     case OperationType::MAXIMUM:
607     case OperationType::MINIMUM:
608     case OperationType::MUL:
609     case OperationType::NOT_EQUAL:
610     case OperationType::POW:
611     case OperationType::SQUARED_DIFF:
612     case OperationType::SUB: {
613       if (inputs.size() == 2) {
614         GPUOperation operation =
615             CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
616         *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
617         return absl::OkStatus();
618       } else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
619         auto attr =
620             absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
621         GPUOperation operation =
622             CreateElementwise(gpu_info, op_def, op_type, attr);
623         *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
624         return absl::OkStatus();
625       }
626       return absl::UnimplementedError(absl::StrCat(
627           "No support of ", node.operation.type, " with this parameters"));
628     }
629     case OperationType::REDUCE_MAXIMUM:
630     case OperationType::REDUCE_MINIMUM:
631     case OperationType::REDUCE_PRODUCT:
632     case OperationType::REDUCE_SUM: {
633       auto attr = absl::any_cast<ReduceAttributes>(node.operation.attributes);
634       *gpu_op = SelectReduce(attr.dims, inputs[0]->tensor.shape, op_type,
635                              op_def, gpu_info);
636       return absl::OkStatus();
637     }
638     default:
639       return SelectDefault(gpu_info, op_def, hints, inputs, outputs, node,
640                            gpu_subgraph);
641   }
642 }
643 
644 }  // namespace gpu
645 }  // namespace tflite
646