1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/selectors/operation_selector.h"
17
18 #include "absl/strings/str_cat.h"
19 #include "absl/types/any.h"
20 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
21 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
22 #include "tensorflow/lite/delegates/gpu/common/operations.h"
23 #include "tensorflow/lite/delegates/gpu/common/selectors/convolution_selector.h"
24 #include "tensorflow/lite/delegates/gpu/common/selectors/convolution_transposed_selector.h"
25 #include "tensorflow/lite/delegates/gpu/common/selectors/default_selector.h"
26 #include "tensorflow/lite/delegates/gpu/common/selectors/dw_convolution_selector.h"
27 #include "tensorflow/lite/delegates/gpu/common/selectors/fully_connected_selector.h"
28 #include "tensorflow/lite/delegates/gpu/common/selectors/simple_selectors.h"
29 #include "tensorflow/lite/delegates/gpu/common/shape.h"
30 #include "tensorflow/lite/delegates/gpu/common/status.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
33 #include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
34 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
35 #include "tensorflow/lite/delegates/gpu/common/tasks/mean_stddev_normalization.h"
36 #include "tensorflow/lite/delegates/gpu/common/tasks/transpose.h"
37 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
38 #include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
39
40 namespace tflite {
41 namespace gpu {
42 namespace {
IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes & attr,const GpuInfo & gpu_info,const BHWC & dst_shape)43 bool IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes& attr,
44 const GpuInfo& gpu_info,
45 const BHWC& dst_shape) {
46 const int tiles_x = DivideRoundUp(dst_shape.w, 4);
47 const int tiles_y = DivideRoundUp(dst_shape.h, 4);
48 const int total_tiles = tiles_x * tiles_y;
49 const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
50 const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
51 int min_depth = 16;
52 if (gpu_info.IsAdreno() || gpu_info.IsAMD()) {
53 min_depth = 32;
54 }
55 int min_tiles = 32;
56 if (gpu_info.IsAdreno()) {
57 if (gpu_info.adreno_info.IsAdreno6xx()) {
58 min_tiles = 128;
59 } else {
60 min_tiles = 64;
61 }
62 }
63 if (gpu_info.IsAMD()) {
64 min_tiles = 64;
65 }
66 if (total_tiles >= min_tiles * 8) {
67 min_depth /= 4;
68 min_depth = std::max(min_depth, 8);
69 } else if (total_tiles >= min_tiles * 4) {
70 min_depth /= 2;
71 min_depth = std::max(min_depth, 8);
72 }
73 const bool recommended_channels =
74 src_depth >= min_depth && dst_depth >= min_depth;
75 const bool recommended_hw = total_tiles >= min_tiles;
76 return recommended_channels && recommended_hw;
77 }
78
WinogradFromNode(const GpuInfo & gpu_info,const std::vector<Value * > & inputs,const std::vector<Value * > & outputs,const OperationDef & op_def,ModelHints hints,const BHWC & input_shape,const BHWC & output_shape,const Convolution2DAttributes & attr,GPUOperationsSubgraph * gpu_subgraph)79 absl::Status WinogradFromNode(const GpuInfo& gpu_info,
80 const std::vector<Value*>& inputs,
81 const std::vector<Value*>& outputs,
82 const OperationDef& op_def, ModelHints hints,
83 const BHWC& input_shape, const BHWC& output_shape,
84 const Convolution2DAttributes& attr,
85 GPUOperationsSubgraph* gpu_subgraph) {
86 if (!IsSuitableForWinograd4x4To6x6(attr)) {
87 return absl::UnimplementedError("No implementation for this case.");
88 }
89 if (!IsRecommendedForWinograd4x4To6x6(attr, gpu_info, output_shape)) {
90 return absl::UnimplementedError("Not recommended for this case.");
91 }
92
93 const int tiles_x = DivideRoundUp(output_shape.w, 4);
94 const int tiles_y = DivideRoundUp(output_shape.h, 4);
95 const BHWC shape_0{input_shape.b, 36, tiles_x * tiles_y, input_shape.c};
96 const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c};
97 TensorDescriptor td_0;
98 RETURN_IF_ERROR(SelectBestStorageType(
99 gpu_info, shape_0, op_def.src_tensors[0].storage_type,
100 op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout,
101 &td_0.storage_type));
102 td_0.data_type = op_def.src_tensors[0].data_type;
103 td_0.layout = op_def.src_tensors[0].layout;
104 TensorDescriptor td_1;
105 RETURN_IF_ERROR(SelectBestStorageType(
106 gpu_info, shape_1, op_def.src_tensors[0].storage_type,
107 op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout,
108 &td_1.storage_type));
109 td_1.data_type = op_def.src_tensors[0].data_type;
110 td_1.layout = op_def.src_tensors[0].layout;
111 gpu_subgraph->new_tensors = {{shape_0, td_0}, {shape_1, td_1}};
112 gpu_subgraph->operations.clear();
113 gpu_subgraph->operations.resize(3);
114
115 OperationDef winograd_up_def;
116 winograd_up_def.precision = op_def.precision;
117 winograd_up_def.src_tensors.push_back(op_def.src_tensors[0]);
118 winograd_up_def.dst_tensors.push_back(td_0);
119 auto& winograd_up = gpu_subgraph->operations[0];
120 winograd_up.operation =
121 SelectWinograd4x4To36(gpu_info, attr.padding, winograd_up_def);
122 winograd_up.input_ids = {static_cast<int>(inputs[0]->id)};
123 winograd_up.output_ids = {-1};
124
125 OperationDef conv_def;
126 conv_def.precision = op_def.precision;
127 conv_def.src_tensors.push_back(td_0);
128 conv_def.dst_tensors.push_back(td_1);
129 auto& conv = gpu_subgraph->operations[1];
130 conv.input_ids = {-1};
131 conv.output_ids = {-2};
132 conv.operation = SelectConvolutionForWinograd(attr, input_shape, gpu_info,
133 conv_def, hints);
134
135 OperationDef winograd_down_def;
136 winograd_down_def.precision = op_def.precision;
137 winograd_down_def.src_tensors.push_back(td_1);
138 winograd_down_def.dst_tensors.push_back(op_def.dst_tensors[0]);
139 auto& winograd_down = gpu_subgraph->operations[2];
140 winograd_down.input_ids = {-2};
141 winograd_down.output_ids = {static_cast<int>(outputs[0]->id)};
142 auto bias_copy = attr.bias;
143 if (bias_copy.shape.v < attr.weights.shape.o) {
144 bias_copy.shape = Linear(attr.weights.shape.o);
145 bias_copy.data.resize(attr.weights.shape.o);
146 }
147 winograd_down.operation =
148 SelectWinograd36To4x4(gpu_info, winograd_down_def, bias_copy);
149 return absl::OkStatus();
150 }
151
152 } // namespace
153
GPUOperationFromNode(const GpuInfo & gpu_info,const OperationDef & op_def,ModelHints hints,const std::vector<Value * > & inputs,const std::vector<Value * > & outputs,const Node & node,GPUOperationsSubgraph * gpu_subgraph)154 absl::Status GPUOperationFromNode(const GpuInfo& gpu_info,
155 const OperationDef& op_def, ModelHints hints,
156 const std::vector<Value*>& inputs,
157 const std::vector<Value*>& outputs,
158 const Node& node,
159 GPUOperationsSubgraph* gpu_subgraph) {
160 std::unique_ptr<GPUOperation>* gpu_op =
161 InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
162 auto op_type = OperationTypeFromString(node.operation.type);
163 switch (op_type) {
164 case OperationType::ADD: {
165 if (inputs.size() == 2 &&
166 (inputs[0]->tensor.shape.c == inputs[1]->tensor.shape.c ||
167 inputs[1]->tensor.shape.c == 1)) {
168 GPUOperation operation =
169 CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
170 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
171 return absl::OkStatus();
172 } else if (inputs.size() >= 2) {
173 auto output = outputs[0];
174 std::vector<int> channels(inputs.size());
175 for (int i = 0; i < inputs.size(); ++i) {
176 channels[i] = inputs[i]->tensor.shape.c;
177 }
178 SelectAdd(op_def, channels, output->tensor.shape.c, gpu_op);
179 return absl::OkStatus();
180 } else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
181 auto attr =
182 absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
183 GPUOperation operation =
184 CreateElementwise(gpu_info, op_def, op_type, attr);
185 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
186 return absl::OkStatus();
187 }
188 return absl::UnimplementedError(absl::StrCat(
189 "No support of ", node.operation.type, " with this parameters"));
190 }
191 case OperationType::BATCHED_MATMUL: {
192 // Currently only batch = 1 is supported.
193 // Matmul replaced with this sequence:
194 // 1) Transpose second tensor(weights). (1x1xHxW)->(Wx1x1xH)
195 // 2) Convert second tensor(weights) from 1) to Convolution weights
196 // 3) Run usual convolution
197 auto second_shape = inputs[1]->tensor.shape;
198 auto dst_shape = outputs[0]->tensor.shape;
199 if (dst_shape.b != 1) {
200 return absl::UnimplementedError(
201 "Currently only batch = 1 supported for BATCHED_MATMUL.");
202 }
203 BHWC weights_shape(second_shape.c, 1, 1, second_shape.w);
204 Convolution2DAttributes attr;
205 attr.strides = HW(1, 1);
206 attr.dilations = HW(1, 1);
207 attr.padding.appended = HW(0, 0);
208 attr.padding.prepended = HW(0, 0);
209 attr.bias.shape = Linear(weights_shape.b);
210 attr.bias.data.resize(weights_shape.b, 0.0f);
211
212 TensorDescriptor transposed_desc = {op_def.src_tensors[1].data_type,
213 op_def.src_tensors[1].storage_type,
214 Layout::BHWC};
215 RETURN_IF_ERROR(SelectBestStorageType(
216 gpu_info, weights_shape, transposed_desc.storage_type,
217 transposed_desc.data_type, transposed_desc.layout,
218 &transposed_desc.storage_type));
219 TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type,
220 TensorStorageType::BUFFER, Layout::BHWC};
221 gpu_subgraph->operations.clear();
222 gpu_subgraph->operations.resize(3);
223 auto& transpose_op = gpu_subgraph->operations[0];
224 auto& converter_op = gpu_subgraph->operations[1];
225 auto& conv_op = gpu_subgraph->operations[2];
226 conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
227 conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
228 OperationDef conv_def = op_def;
229 conv_def.src_tensors[1] = weights_desc;
230 WeightsDescription conv_weights_desc;
231 conv_op.operation = SelectConvolutionWithDynamicWeights(
232 attr, weights_shape, dst_shape, gpu_info, conv_def, hints,
233 &conv_weights_desc);
234
235 int aligned_output =
236 AlignByN(weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4);
237 int aligned_input = AlignByN(weights_shape.c, 4);
238 gpu_subgraph->new_tensors = {{BHWC(1, 1, 1,
239 aligned_output * aligned_input *
240 weights_shape.h * weights_shape.w),
241 weights_desc},
242 {weights_shape, transposed_desc}};
243 OperationDef converter_def;
244 converter_def.precision = op_def.precision;
245 converter_def.src_tensors.push_back(transposed_desc);
246 converter_def.dst_tensors.push_back(weights_desc);
247
248 converter_op.input_ids = {-2};
249 converter_op.output_ids = {-1};
250 converter_op.operation =
251 SelectConverterToConvWeights(conv_weights_desc, converter_def, hints);
252
253 OperationDef transpose_def;
254 transpose_def.precision = op_def.precision;
255 transpose_def.src_tensors.push_back(op_def.src_tensors[1]);
256 transpose_def.dst_tensors.push_back(transposed_desc);
257
258 transpose_op.input_ids = {static_cast<int>(inputs[1]->id)};
259 transpose_op.output_ids = {-2};
260 TransposeAttributes transpose_attr;
261 transpose_attr.perm = BHWC(3, 0, 1, 2);
262 transpose_op.operation = absl::make_unique<GPUOperation>(
263 CreateTranspose(transpose_def, transpose_attr));
264 return absl::OkStatus();
265 }
266 case OperationType::CONCAT: {
267 auto attr = absl::any_cast<ConcatAttributes>(node.operation.attributes);
268 const int max_inputs = gpu_info.GetMaxImageArguments() - 8;
269 if (inputs.size() >= max_inputs) {
270 int groups = DivideRoundUp(inputs.size(), max_inputs);
271 gpu_subgraph->operations.clear();
272 gpu_subgraph->operations.resize(groups);
273 BHWC concatenated_shape = inputs[0]->tensor.shape;
274 concatenated_shape.set(attr.axis, 0);
275 for (int g = 0; g < groups; ++g) {
276 std::vector<int> channels;
277 auto& concat_op = gpu_subgraph->operations[g];
278 OperationDef new_def;
279 new_def.precision = op_def.precision;
280 if (g != 0) {
281 // concatenated tensor from previos concats
282 new_def.src_tensors.push_back(op_def.dst_tensors[0]);
283 concat_op.input_ids = {-g};
284 channels.push_back(concatenated_shape.c);
285 }
286 for (int i = 0; i < max_inputs; ++i) {
287 int src_index = g * max_inputs + i;
288 if (src_index >= op_def.src_tensors.size()) {
289 break;
290 }
291 new_def.src_tensors.push_back(op_def.src_tensors[src_index]);
292 concat_op.input_ids.push_back(inputs[src_index]->id);
293 channels.push_back(inputs[src_index]->tensor.shape.c);
294 int current_size = concatenated_shape.get(attr.axis);
295 concatenated_shape.set(
296 attr.axis,
297 current_size + inputs[src_index]->tensor.shape.get(attr.axis));
298 }
299 new_def.dst_tensors.push_back(op_def.dst_tensors[0]);
300 if (g == groups - 1) {
301 // last concat
302 concat_op.output_ids = {static_cast<int>(outputs[0]->id)};
303 } else {
304 // intermediate concat, create new tensor for it
305 concat_op.output_ids = {-(g + 1)};
306 gpu_subgraph->new_tensors.push_back(
307 {concatenated_shape, op_def.dst_tensors[0]});
308 }
309 RETURN_IF_ERROR(SelectConcat(attr, channels, new_def, gpu_info,
310 &concat_op.operation));
311 }
312 return absl::OkStatus();
313 } else {
314 std::vector<int> channels(inputs.size());
315 for (int i = 0; i < inputs.size(); ++i) {
316 channels[i] = inputs[i]->tensor.shape.c;
317 }
318 return SelectConcat(attr, channels, op_def, gpu_info, gpu_op);
319 }
320 }
321 case OperationType::CONVOLUTION_2D: {
322 auto attr =
323 absl::any_cast<Convolution2DAttributes>(node.operation.attributes);
324 auto input_shape = inputs[0]->tensor.shape;
325 auto output_shape = outputs[0]->tensor.shape;
326 if (inputs.size() == 1) {
327 if (!hints.Check(ModelHints::kNoWinogradOptimizations) &&
328 WinogradFromNode(gpu_info, inputs, outputs, op_def, hints,
329 input_shape, output_shape, attr, gpu_subgraph)
330 .ok()) {
331 return absl::OkStatus();
332 } else {
333 gpu_op = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
334 *gpu_op =
335 SelectConvolution(attr, output_shape, gpu_info, op_def, hints);
336 return absl::OkStatus();
337 }
338 } else {
339 auto weights_shape = inputs[1]->tensor.shape;
340 if (attr.bias.data.empty()) {
341 attr.bias.shape = Linear(weights_shape.b);
342 attr.bias.data.resize(weights_shape.b, 0.0f);
343 }
344 TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type,
345 TensorStorageType::BUFFER,
346 Layout::BHWC};
347 gpu_subgraph->operations.clear();
348 gpu_subgraph->operations.resize(2);
349 auto& converter_op = gpu_subgraph->operations[0];
350 auto& conv_op = gpu_subgraph->operations[1];
351 conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
352 conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
353 OperationDef conv_def = op_def;
354 conv_def.src_tensors[1] = weights_desc;
355 WeightsDescription conv_weights_desc;
356 conv_op.operation = SelectConvolutionWithDynamicWeights(
357 attr, weights_shape, output_shape, gpu_info, conv_def, hints,
358 &conv_weights_desc);
359
360 int aligned_output = AlignByN(
361 weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4);
362 int aligned_input = AlignByN(weights_shape.c, 4);
363 gpu_subgraph->new_tensors = {
364 {BHWC(1, 1, 1,
365 aligned_output * aligned_input * weights_shape.h *
366 weights_shape.w),
367 weights_desc}};
368 OperationDef converter_def;
369 converter_def.precision = op_def.precision;
370 converter_def.src_tensors.push_back(op_def.src_tensors[1]);
371 converter_def.dst_tensors.push_back(weights_desc);
372
373 converter_op.input_ids = {static_cast<int>(inputs[1]->id)};
374 converter_op.output_ids = {-1};
375 converter_op.operation = SelectConverterToConvWeights(
376 conv_weights_desc, converter_def, hints);
377 return absl::OkStatus();
378 }
379 }
380 case OperationType::CONVOLUTION_TRANSPOSED: {
381 auto attr = absl::any_cast<ConvolutionTransposedAttributes>(
382 node.operation.attributes);
383 if (inputs.size() == 1) {
384 *gpu_op = SelectConvolutionTransposed(attr, gpu_info, op_def);
385 return absl::OkStatus();
386 } else {
387 // CONVOLUTION_TRANSPOSED with runtime weights
388 OHWI weights_shape =
389 OHWI(inputs[1]->tensor.shape.b, inputs[1]->tensor.shape.h,
390 inputs[1]->tensor.shape.w, inputs[1]->tensor.shape.c);
391 if (attr.bias.data.empty()) {
392 attr.bias.shape = Linear(weights_shape.o);
393 attr.bias.data.resize(weights_shape.o, 0.0f);
394 }
395 gpu_subgraph->operations.clear();
396 gpu_subgraph->operations.resize(2);
397 auto& converter_op = gpu_subgraph->operations[0];
398 auto& conv_op = gpu_subgraph->operations[1];
399 WeightsDescription weights_desc;
400 conv_op.operation = SelectConvolutionTransposedWithDynamicWeights(
401 attr, gpu_info, op_def, &weights_desc);
402 conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
403
404 const int dst_depth = AlignByN(DivideRoundUp(weights_shape.o, 4),
405 weights_desc.GetOutputGroupSize());
406 const int src_depth = DivideRoundUp(weights_shape.i, 4);
407 const int kernel_x = weights_shape.w;
408 const int kernel_y = weights_shape.h;
409 if (weights_desc.layout ==
410 WeightsLayout::k2DX4I4YIsSpatialIAndXIsOOGroupO4 ||
411 weights_desc.layout ==
412 WeightsLayout::k2DX4O4YIsSpatialIAndXIsOOGroupI4) {
413 // weights are 4x textures 2d
414 conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1, -2, -3, -4};
415 int texture_width = dst_depth;
416 int texture_height = src_depth * kernel_x * kernel_y;
417 for (int i = 0; i < 4; ++i) {
418 gpu_subgraph->new_tensors.push_back(
419 {BHWC(1, texture_height, texture_width, 4),
420 TensorDescriptor(op_def.GetDataType(),
421 TensorStorageType::TEXTURE_2D, Layout::HWC)});
422 }
423 } else {
424 // weights is single buffer
425 conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
426 gpu_subgraph->new_tensors = {
427 {BHWC(
428 1, 1, 1,
429 GetTotalElementsCountForLayout(weights_desc, weights_shape)),
430 TensorDescriptor(op_def.GetDataType(), TensorStorageType::BUFFER,
431 Layout::HWC)}};
432 }
433 OperationDef conv_def = conv_op.operation->GetDefinition();
434 OperationDef converter_def;
435 converter_def.precision = op_def.precision;
436 converter_def.src_tensors.push_back(op_def.src_tensors[1]);
437 for (int i = 1; i < conv_def.src_tensors.size(); ++i) {
438 converter_def.dst_tensors.push_back(conv_def.src_tensors[i]);
439 converter_op.output_ids.push_back(-i);
440 }
441
442 converter_op.input_ids = {static_cast<int>(inputs[1]->id)};
443 converter_op.operation =
444 SelectConverterToConvWeights(weights_desc, converter_def, hints);
445 return absl::OkStatus();
446 }
447 }
448 case OperationType::DEPTHWISE_CONVOLUTION: {
449 auto attr = absl::any_cast<DepthwiseConvolution2DAttributes>(
450 node.operation.attributes);
451 if (inputs.size() == 1) {
452 *gpu_op = SelectDWConvolution(attr, gpu_info, op_def);
453 } else {
454 if (inputs[1]->tensor.shape.b != 1) {
455 return absl::UnimplementedError(
456 "No support of depthwise runtime weights with channel multiplier "
457 "!= 1");
458 }
459 *gpu_op = SelectDWConvolutionDynamicWeights(attr, gpu_info, op_def);
460 }
461 return absl::OkStatus();
462 }
463 case OperationType::DEPTH_TO_SPACE: {
464 auto attr =
465 absl::any_cast<SpaceToDepthAttributes>(node.operation.attributes);
466 SelectDepthToSpace(attr, op_def, gpu_op);
467 return absl::OkStatus();
468 }
469 case OperationType::FULLY_CONNECTED: {
470 auto attr =
471 absl::any_cast<FullyConnectedAttributes>(node.operation.attributes);
472 *gpu_op = SelectFullyConnected(attr, gpu_info, op_def,
473 inputs[0]->tensor.shape.b);
474 return absl::OkStatus();
475 }
476 case OperationType::FULLY_CONNECTED_INT8: {
477 auto attr = absl::any_cast<FullyConnectedInt8Attributes>(
478 node.operation.attributes);
479 *gpu_op = SelectFullyConnected(attr, gpu_info, op_def);
480 return absl::OkStatus();
481 }
482 case OperationType::GATHER: {
483 auto attr = absl::any_cast<GatherAttributes>(node.operation.attributes);
484 RETURN_IF_ERROR(SelectGather(attr, op_def, gpu_op));
485 return absl::OkStatus();
486 }
487 case OperationType::LSTM: {
488 *gpu_op = SelectLSTM(op_def, gpu_info);
489 return absl::OkStatus();
490 }
491 case OperationType::MAX_UNPOOLING_2D: {
492 auto attr =
493 absl::any_cast<MaxUnpooling2DAttributes>(node.operation.attributes);
494 *gpu_op = SelectMaxUnpooling(attr, op_def);
495 return absl::OkStatus();
496 }
497 case OperationType::MEAN: {
498 auto attr = absl::any_cast<MeanAttributes>(node.operation.attributes);
499 *gpu_op = SelectReduce(attr.dims, inputs[0]->tensor.shape, op_type,
500 op_def, gpu_info);
501 return absl::OkStatus();
502 }
503 case OperationType::MEAN_STDDEV_NORMALIZATION: {
504 MeanStdDevNormalization operation = CreateMeanStdDevNormalization(
505 op_def, gpu_info, (inputs[0]->tensor.shape.c + 3) / 4);
506 *gpu_op =
507 absl::make_unique<MeanStdDevNormalization>(std::move(operation));
508 return absl::OkStatus();
509 }
510 case OperationType::PAD: {
511 auto attr = absl::any_cast<PadAttributes>(node.operation.attributes);
512 SelectPadding(attr, op_def, gpu_op);
513 return absl::OkStatus();
514 }
515 case OperationType::POOLING_2D: {
516 auto attr =
517 absl::any_cast<Pooling2DAttributes>(node.operation.attributes);
518 *gpu_op = SelectPooling(attr, op_def);
519 return absl::OkStatus();
520 }
521 case OperationType::PRELU: {
522 auto attr = absl::any_cast<PReLUAttributes>(node.operation.attributes);
523 *gpu_op = SelectPReLU(attr, gpu_info, op_def);
524 return absl::OkStatus();
525 }
526 case OperationType::QUANTIZE_AND_DEQUANTIZE: {
527 auto attr = absl::any_cast<QuantizeAndDequantizeAttributes>(
528 node.operation.attributes);
529 *gpu_op = SelectQuantizeAndDequantize(attr, op_def);
530 return absl::OkStatus();
531 }
532 case OperationType::RELU: {
533 auto attr = absl::any_cast<ReLUAttributes>(node.operation.attributes);
534 *gpu_op = SelectReLU(attr, op_def);
535 return absl::OkStatus();
536 }
537 case OperationType::RESAMPLER: {
538 *gpu_op = SelectResampler(op_def);
539 return absl::OkStatus();
540 }
541 case OperationType::RESHAPE: {
542 const int src_channels = inputs[0]->tensor.shape.c;
543 auto attr = absl::any_cast<ReshapeAttributes>(node.operation.attributes);
544 SelectReshape(src_channels, attr.new_shape.c, op_def, gpu_op);
545 return absl::OkStatus();
546 }
547 case OperationType::RESIZE: {
548 auto attr = absl::any_cast<Resize2DAttributes>(node.operation.attributes);
549 return SelectResize(attr, op_def, gpu_op);
550 }
551 case OperationType::SLICE: {
552 auto attr = absl::any_cast<SliceAttributes>(node.operation.attributes);
553 SelectStridedSlice(attr, op_def, gpu_op);
554 return absl::OkStatus();
555 }
556 case OperationType::SOFTMAX: {
557 SelectSoftmax(inputs[0]->tensor.shape, op_def, gpu_op);
558 return absl::OkStatus();
559 }
560 case OperationType::SPACE_TO_DEPTH: {
561 auto attr =
562 absl::any_cast<SpaceToDepthAttributes>(node.operation.attributes);
563 SelectSpaceToDepth(attr, op_def, gpu_op);
564 return absl::OkStatus();
565 }
566 case OperationType::SPLIT: {
567 auto attr = absl::any_cast<SplitAttributes>(node.operation.attributes);
568 SelectSplit(attr, op_def, gpu_op);
569 return absl::OkStatus();
570 }
571 case OperationType::TILE: {
572 *gpu_op = SelectTile(op_def, inputs[0]->tensor.shape);
573 return absl::OkStatus();
574 }
575 case OperationType::TRANSPOSE: {
576 auto attr =
577 absl::any_cast<TransposeAttributes>(node.operation.attributes);
578 SelectTranspose(attr, op_def, gpu_op);
579 return absl::OkStatus();
580 }
581 case OperationType::ABS:
582 case OperationType::COPY:
583 case OperationType::COS:
584 case OperationType::ELU:
585 case OperationType::EXP:
586 case OperationType::HARD_SWISH:
587 case OperationType::LOG:
588 case OperationType::NEG:
589 case OperationType::RSQRT:
590 case OperationType::SIGMOID:
591 case OperationType::SIN:
592 case OperationType::SQRT:
593 case OperationType::SQUARE:
594 case OperationType::TANH: {
595 GPUOperation operation =
596 CreateElementwiseOneInput(gpu_info, op_def, op_type);
597 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
598 return absl::OkStatus();
599 }
600 case OperationType::DIV:
601 case OperationType::EQUAL:
602 case OperationType::GREATER:
603 case OperationType::GREATER_EQUAL:
604 case OperationType::LESS:
605 case OperationType::LESS_EQUAL:
606 case OperationType::MAXIMUM:
607 case OperationType::MINIMUM:
608 case OperationType::MUL:
609 case OperationType::NOT_EQUAL:
610 case OperationType::POW:
611 case OperationType::SQUARED_DIFF:
612 case OperationType::SUB: {
613 if (inputs.size() == 2) {
614 GPUOperation operation =
615 CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
616 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
617 return absl::OkStatus();
618 } else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
619 auto attr =
620 absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
621 GPUOperation operation =
622 CreateElementwise(gpu_info, op_def, op_type, attr);
623 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
624 return absl::OkStatus();
625 }
626 return absl::UnimplementedError(absl::StrCat(
627 "No support of ", node.operation.type, " with this parameters"));
628 }
629 case OperationType::REDUCE_MAXIMUM:
630 case OperationType::REDUCE_MINIMUM:
631 case OperationType::REDUCE_PRODUCT:
632 case OperationType::REDUCE_SUM: {
633 auto attr = absl::any_cast<ReduceAttributes>(node.operation.attributes);
634 *gpu_op = SelectReduce(attr.dims, inputs[0]->tensor.shape, op_type,
635 op_def, gpu_info);
636 return absl::OkStatus();
637 }
638 default:
639 return SelectDefault(gpu_info, op_def, hints, inputs, outputs, node,
640 gpu_subgraph);
641 }
642 }
643
644 } // namespace gpu
645 } // namespace tflite
646