• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/selectors/special_selector.h"
17 
18 #include "absl/types/any.h"
19 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
20 #include "tensorflow/lite/delegates/gpu/common/operations.h"
21 #include "tensorflow/lite/delegates/gpu/common/shape.h"
22 #include "tensorflow/lite/delegates/gpu/common/status.h"
23 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
24 #include "tensorflow/lite/delegates/gpu/common/tasks/special/depthwise_conv_plus_1x1_conv.h"
25 #include "tensorflow/lite/delegates/gpu/common/tasks/special/fc_fc_add.h"
26 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
27 
28 namespace tflite {
29 namespace gpu {
30 namespace {
TryDepthwiseConvPlus1x1Conv(const GpuInfo & gpu_info,CalculationsPrecision precision,const GraphFloat32 & graph,NodeId first_node_id,const std::map<ValueId,TensorDescriptor> & tensor_descriptors,std::set<NodeId> * consumed_nodes,GPUOperationsSubgraph * gpu_subgraph)31 absl::Status TryDepthwiseConvPlus1x1Conv(
32     const GpuInfo& gpu_info, CalculationsPrecision precision,
33     const GraphFloat32& graph, NodeId first_node_id,
34     const std::map<ValueId, TensorDescriptor>& tensor_descriptors,
35     std::set<NodeId>* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph) {
36   auto* dw_node = graph.GetNode(first_node_id);
37   if (dw_node == nullptr) {
38     return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
39   }
40   if (OperationTypeFromString(dw_node->operation.type) !=
41       OperationType::DEPTHWISE_CONVOLUTION) {
42     return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
43   }
44   auto dw_inputs = graph.FindInputs(dw_node->id);
45   if (dw_inputs.size() != 1) {
46     return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
47   }
48   auto dw_outputs = graph.FindOutputs(dw_node->id);
49   auto consumers = graph.FindConsumers(dw_outputs[0]->id);
50   if (consumers.size() != 1) {
51     return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
52   }
53   auto* conv_node = consumers[0];
54   if (conv_node == nullptr) {
55     return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
56   }
57   if (consumed_nodes->find(conv_node->id) != consumed_nodes->end()) {
58     return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
59   }
60   if (OperationTypeFromString(conv_node->operation.type) !=
61       OperationType::CONVOLUTION_2D) {
62     return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
63   }
64   if (graph.FindInputs(conv_node->id).size() != 1) {
65     return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
66   }
67   auto dw_attr = absl::any_cast<DepthwiseConvolution2DAttributes>(
68       dw_node->operation.attributes);
69   auto conv_attr =
70       absl::any_cast<Convolution2DAttributes>(conv_node->operation.attributes);
71   auto conv_outputs = graph.FindOutputs(conv_node->id);
72   OperationDef op_def;
73   op_def.precision = precision;
74   auto it = tensor_descriptors.find(dw_inputs[0]->id);
75   if (it != tensor_descriptors.end()) {
76     op_def.src_tensors.push_back(it->second);
77   }
78   it = tensor_descriptors.find(conv_outputs[0]->id);
79   if (it != tensor_descriptors.end()) {
80     op_def.dst_tensors.push_back(it->second);
81   }
82   if (!IsDepthwiseConvPlus1x1ConvSupported(op_def, gpu_info, dw_attr,
83                                            conv_attr)) {
84     return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
85   }
86   std::unique_ptr<GPUOperation>* gpu_op =
87       InitSingleOpSubgraph(dw_inputs, conv_outputs, gpu_subgraph);
88   auto operation = CreateDepthwiseConvPlus1x1Conv(op_def, dw_attr, conv_attr);
89   *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
90   consumed_nodes->insert(dw_node->id);
91   consumed_nodes->insert(conv_node->id);
92   return absl::OkStatus();
93 }
94 
95 // fully connected + fully connected + add
TryFCFCAdd(const GpuInfo & gpu_info,CalculationsPrecision precision,const GraphFloat32 & graph,NodeId first_node_id,const std::map<ValueId,TensorDescriptor> & tensor_descriptors,std::set<NodeId> * consumed_nodes,GPUOperationsSubgraph * gpu_subgraph)96 absl::Status TryFCFCAdd(
97     const GpuInfo& gpu_info, CalculationsPrecision precision,
98     const GraphFloat32& graph, NodeId first_node_id,
99     const std::map<ValueId, TensorDescriptor>& tensor_descriptors,
100     std::set<NodeId>* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph) {
101   auto* fc0_node = graph.GetNode(first_node_id);
102   if (fc0_node == nullptr) {
103     return absl::NotFoundError("FCFCAdd not suitable.");
104   }
105   auto first_op_type = OperationTypeFromString(fc0_node->operation.type);
106   if (first_op_type != OperationType::FULLY_CONNECTED &&
107       first_op_type != OperationType::FULLY_CONNECTED_INT8) {
108     return absl::NotFoundError("FCFCAdd not suitable.");
109   }
110   const bool first_quantized =
111       first_op_type == OperationType::FULLY_CONNECTED_INT8;
112   auto fc0_inputs = graph.FindInputs(fc0_node->id);
113   if (fc0_inputs.size() != 1) {
114     return absl::NotFoundError("FCFCAdd not suitable.");
115   }
116   auto fc0_output_id = graph.FindOutputs(fc0_node->id)[0]->id;
117   auto consumers = graph.FindConsumers(fc0_output_id);
118   if (consumers.size() != 1) {
119     return absl::NotFoundError("FCFCAdd not suitable.");
120   }
121   auto* add_node = consumers[0];
122   if (add_node == nullptr) {
123     return absl::NotFoundError("FCFCAdd not suitable.");
124   }
125   if (consumed_nodes->find(add_node->id) != consumed_nodes->end()) {
126     return absl::NotFoundError("FCFCAdd not suitable.");
127   }
128   if (OperationTypeFromString(add_node->operation.type) != OperationType::ADD) {
129     return absl::NotFoundError("FCFCAdd not suitable.");
130   }
131   auto add_inputs = graph.FindInputs(add_node->id);
132   if (add_inputs.size() != 2) {
133     return absl::NotFoundError("FCFCAdd not suitable.");
134   }
135   auto fc1_output_id = add_inputs[0]->id + add_inputs[1]->id - fc0_output_id;
136   auto* fc1_node = graph.FindProducer(fc1_output_id);
137   if (fc1_node == nullptr) {
138     return absl::NotFoundError("FCFCAdd not suitable.");
139   }
140   auto second_op_type = OperationTypeFromString(fc1_node->operation.type);
141   if (second_op_type != OperationType::FULLY_CONNECTED &&
142       second_op_type != OperationType::FULLY_CONNECTED_INT8) {
143     return absl::NotFoundError("FCFCAdd not suitable.");
144   }
145   const bool second_quantized =
146       second_op_type == OperationType::FULLY_CONNECTED_INT8;
147   const bool both_quantized = first_quantized && second_quantized;
148   const bool both_not_quantized = !first_quantized && !second_quantized;
149   if (!(both_quantized || both_not_quantized)) {
150     return absl::NotFoundError("FCFCAdd not suitable.");
151   }
152   if (consumed_nodes->find(fc1_node->id) != consumed_nodes->end()) {
153     return absl::NotFoundError("FCFCAdd not suitable.");
154   }
155   auto fc1_inputs = graph.FindInputs(fc1_node->id);
156   if (fc1_inputs.size() != 1) {
157     return absl::NotFoundError("FCFCAdd not suitable.");
158   }
159   auto add_outputs = graph.FindOutputs(add_node->id);
160 
161   OperationDef op_def;
162   op_def.precision = precision;
163   auto it = tensor_descriptors.find(fc0_inputs[0]->id);
164   if (it != tensor_descriptors.end()) {
165     op_def.src_tensors.push_back(it->second);
166   }
167   it = tensor_descriptors.find(fc1_inputs[0]->id);
168   if (it != tensor_descriptors.end()) {
169     op_def.src_tensors.push_back(it->second);
170   }
171   it = tensor_descriptors.find(add_outputs[0]->id);
172   if (it != tensor_descriptors.end()) {
173     op_def.dst_tensors.push_back(it->second);
174   }
175 
176   for (int i = 0; i < fc1_inputs.size(); ++i) {
177     fc0_inputs.push_back(fc1_inputs[i]);
178   }
179   std::unique_ptr<GPUOperation>* gpu_op =
180       InitSingleOpSubgraph(fc0_inputs, add_outputs, gpu_subgraph);
181   FCFCAdd fc;
182   if (both_not_quantized) {
183     auto fc0_attr = absl::any_cast<FullyConnectedAttributes>(
184         fc0_node->operation.attributes);
185     auto fc1_attr = absl::any_cast<FullyConnectedAttributes>(
186         fc1_node->operation.attributes);
187     if (fc0_attr.weights.shape.o != fc1_attr.weights.shape.o) {
188       return absl::NotFoundError("FCFCAdd not suitable.");
189     }
190     fc = CreateFCFCAdd(gpu_info, op_def, fc0_attr, fc1_attr);
191   } else {
192     // both_quantized
193     auto fc0_attr = absl::any_cast<FullyConnectedInt8Attributes>(
194         fc0_node->operation.attributes);
195     auto fc1_attr = absl::any_cast<FullyConnectedInt8Attributes>(
196         fc1_node->operation.attributes);
197     if (fc0_attr.weights.shape.o != fc1_attr.weights.shape.o) {
198       return absl::NotFoundError("FCFCAdd not suitable.");
199     }
200     fc = CreateFCFCAdd(gpu_info, op_def, fc0_attr, fc1_attr);
201   }
202   *gpu_op = absl::make_unique<FCFCAdd>(std::move(fc));
203   consumed_nodes->insert(fc0_node->id);
204   consumed_nodes->insert(fc1_node->id);
205   consumed_nodes->insert(add_node->id);
206   return absl::OkStatus();
207 }
208 }  // namespace
209 
GPUSubgraphFromGraph(const GpuInfo & gpu_info,CalculationsPrecision precision,const GraphFloat32 & graph,NodeId first_node_id,const std::map<ValueId,TensorDescriptor> & tensor_descriptors,std::set<NodeId> * consumed_nodes,GPUOperationsSubgraph * gpu_subgraph,std::string * name)210 absl::Status GPUSubgraphFromGraph(
211     const GpuInfo& gpu_info, CalculationsPrecision precision,
212     const GraphFloat32& graph, NodeId first_node_id,
213     const std::map<ValueId, TensorDescriptor>& tensor_descriptors,
214     std::set<NodeId>* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph,
215     std::string* name) {
216   if ((gpu_info.IsAdreno() || gpu_info.IsNvidia() ||
217        (gpu_info.IsApple() && gpu_info.apple_info.IsBionic())) &&
218       TryDepthwiseConvPlus1x1Conv(gpu_info, precision, graph, first_node_id,
219                                   tensor_descriptors, consumed_nodes,
220                                   gpu_subgraph)
221           .ok()) {
222     *name = "depthwise_conv_plus_1x1_conv";
223     return absl::OkStatus();
224   }
225   if ((gpu_info.IsIntel() || gpu_info.IsNvidia()) &&
226       TryFCFCAdd(gpu_info, precision, graph, first_node_id, tensor_descriptors,
227                  consumed_nodes, gpu_subgraph)
228           .ok()) {
229     *name = "fully_connected_x2_and_add";
230     return absl::OkStatus();
231   }
232   return absl::NotFoundError("No special combination.");
233 }
234 
235 }  // namespace gpu
236 }  // namespace tflite
237