1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/selectors/special_selector.h"
17
18 #include "absl/types/any.h"
19 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
20 #include "tensorflow/lite/delegates/gpu/common/operations.h"
21 #include "tensorflow/lite/delegates/gpu/common/shape.h"
22 #include "tensorflow/lite/delegates/gpu/common/status.h"
23 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
24 #include "tensorflow/lite/delegates/gpu/common/tasks/special/depthwise_conv_plus_1x1_conv.h"
25 #include "tensorflow/lite/delegates/gpu/common/tasks/special/fc_fc_add.h"
26 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
27
28 namespace tflite {
29 namespace gpu {
30 namespace {
TryDepthwiseConvPlus1x1Conv(const GpuInfo & gpu_info,CalculationsPrecision precision,const GraphFloat32 & graph,NodeId first_node_id,const std::map<ValueId,TensorDescriptor> & tensor_descriptors,std::set<NodeId> * consumed_nodes,GPUOperationsSubgraph * gpu_subgraph)31 absl::Status TryDepthwiseConvPlus1x1Conv(
32 const GpuInfo& gpu_info, CalculationsPrecision precision,
33 const GraphFloat32& graph, NodeId first_node_id,
34 const std::map<ValueId, TensorDescriptor>& tensor_descriptors,
35 std::set<NodeId>* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph) {
36 auto* dw_node = graph.GetNode(first_node_id);
37 if (dw_node == nullptr) {
38 return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
39 }
40 if (OperationTypeFromString(dw_node->operation.type) !=
41 OperationType::DEPTHWISE_CONVOLUTION) {
42 return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
43 }
44 auto dw_inputs = graph.FindInputs(dw_node->id);
45 if (dw_inputs.size() != 1) {
46 return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
47 }
48 auto dw_outputs = graph.FindOutputs(dw_node->id);
49 auto consumers = graph.FindConsumers(dw_outputs[0]->id);
50 if (consumers.size() != 1) {
51 return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
52 }
53 auto* conv_node = consumers[0];
54 if (conv_node == nullptr) {
55 return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
56 }
57 if (consumed_nodes->find(conv_node->id) != consumed_nodes->end()) {
58 return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
59 }
60 if (OperationTypeFromString(conv_node->operation.type) !=
61 OperationType::CONVOLUTION_2D) {
62 return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
63 }
64 if (graph.FindInputs(conv_node->id).size() != 1) {
65 return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
66 }
67 auto dw_attr = absl::any_cast<DepthwiseConvolution2DAttributes>(
68 dw_node->operation.attributes);
69 auto conv_attr =
70 absl::any_cast<Convolution2DAttributes>(conv_node->operation.attributes);
71 auto conv_outputs = graph.FindOutputs(conv_node->id);
72 OperationDef op_def;
73 op_def.precision = precision;
74 auto it = tensor_descriptors.find(dw_inputs[0]->id);
75 if (it != tensor_descriptors.end()) {
76 op_def.src_tensors.push_back(it->second);
77 }
78 it = tensor_descriptors.find(conv_outputs[0]->id);
79 if (it != tensor_descriptors.end()) {
80 op_def.dst_tensors.push_back(it->second);
81 }
82 if (!IsDepthwiseConvPlus1x1ConvSupported(op_def, gpu_info, dw_attr,
83 conv_attr)) {
84 return absl::NotFoundError("DepthwiseConvPlus1x1Conv not suitable.");
85 }
86 std::unique_ptr<GPUOperation>* gpu_op =
87 InitSingleOpSubgraph(dw_inputs, conv_outputs, gpu_subgraph);
88 auto operation = CreateDepthwiseConvPlus1x1Conv(op_def, dw_attr, conv_attr);
89 *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
90 consumed_nodes->insert(dw_node->id);
91 consumed_nodes->insert(conv_node->id);
92 return absl::OkStatus();
93 }
94
95 // fully connected + fully connected + add
TryFCFCAdd(const GpuInfo & gpu_info,CalculationsPrecision precision,const GraphFloat32 & graph,NodeId first_node_id,const std::map<ValueId,TensorDescriptor> & tensor_descriptors,std::set<NodeId> * consumed_nodes,GPUOperationsSubgraph * gpu_subgraph)96 absl::Status TryFCFCAdd(
97 const GpuInfo& gpu_info, CalculationsPrecision precision,
98 const GraphFloat32& graph, NodeId first_node_id,
99 const std::map<ValueId, TensorDescriptor>& tensor_descriptors,
100 std::set<NodeId>* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph) {
101 auto* fc0_node = graph.GetNode(first_node_id);
102 if (fc0_node == nullptr) {
103 return absl::NotFoundError("FCFCAdd not suitable.");
104 }
105 auto first_op_type = OperationTypeFromString(fc0_node->operation.type);
106 if (first_op_type != OperationType::FULLY_CONNECTED &&
107 first_op_type != OperationType::FULLY_CONNECTED_INT8) {
108 return absl::NotFoundError("FCFCAdd not suitable.");
109 }
110 const bool first_quantized =
111 first_op_type == OperationType::FULLY_CONNECTED_INT8;
112 auto fc0_inputs = graph.FindInputs(fc0_node->id);
113 if (fc0_inputs.size() != 1) {
114 return absl::NotFoundError("FCFCAdd not suitable.");
115 }
116 auto fc0_output_id = graph.FindOutputs(fc0_node->id)[0]->id;
117 auto consumers = graph.FindConsumers(fc0_output_id);
118 if (consumers.size() != 1) {
119 return absl::NotFoundError("FCFCAdd not suitable.");
120 }
121 auto* add_node = consumers[0];
122 if (add_node == nullptr) {
123 return absl::NotFoundError("FCFCAdd not suitable.");
124 }
125 if (consumed_nodes->find(add_node->id) != consumed_nodes->end()) {
126 return absl::NotFoundError("FCFCAdd not suitable.");
127 }
128 if (OperationTypeFromString(add_node->operation.type) != OperationType::ADD) {
129 return absl::NotFoundError("FCFCAdd not suitable.");
130 }
131 auto add_inputs = graph.FindInputs(add_node->id);
132 if (add_inputs.size() != 2) {
133 return absl::NotFoundError("FCFCAdd not suitable.");
134 }
135 auto fc1_output_id = add_inputs[0]->id + add_inputs[1]->id - fc0_output_id;
136 auto* fc1_node = graph.FindProducer(fc1_output_id);
137 if (fc1_node == nullptr) {
138 return absl::NotFoundError("FCFCAdd not suitable.");
139 }
140 auto second_op_type = OperationTypeFromString(fc1_node->operation.type);
141 if (second_op_type != OperationType::FULLY_CONNECTED &&
142 second_op_type != OperationType::FULLY_CONNECTED_INT8) {
143 return absl::NotFoundError("FCFCAdd not suitable.");
144 }
145 const bool second_quantized =
146 second_op_type == OperationType::FULLY_CONNECTED_INT8;
147 const bool both_quantized = first_quantized && second_quantized;
148 const bool both_not_quantized = !first_quantized && !second_quantized;
149 if (!(both_quantized || both_not_quantized)) {
150 return absl::NotFoundError("FCFCAdd not suitable.");
151 }
152 if (consumed_nodes->find(fc1_node->id) != consumed_nodes->end()) {
153 return absl::NotFoundError("FCFCAdd not suitable.");
154 }
155 auto fc1_inputs = graph.FindInputs(fc1_node->id);
156 if (fc1_inputs.size() != 1) {
157 return absl::NotFoundError("FCFCAdd not suitable.");
158 }
159 auto add_outputs = graph.FindOutputs(add_node->id);
160
161 OperationDef op_def;
162 op_def.precision = precision;
163 auto it = tensor_descriptors.find(fc0_inputs[0]->id);
164 if (it != tensor_descriptors.end()) {
165 op_def.src_tensors.push_back(it->second);
166 }
167 it = tensor_descriptors.find(fc1_inputs[0]->id);
168 if (it != tensor_descriptors.end()) {
169 op_def.src_tensors.push_back(it->second);
170 }
171 it = tensor_descriptors.find(add_outputs[0]->id);
172 if (it != tensor_descriptors.end()) {
173 op_def.dst_tensors.push_back(it->second);
174 }
175
176 for (int i = 0; i < fc1_inputs.size(); ++i) {
177 fc0_inputs.push_back(fc1_inputs[i]);
178 }
179 std::unique_ptr<GPUOperation>* gpu_op =
180 InitSingleOpSubgraph(fc0_inputs, add_outputs, gpu_subgraph);
181 FCFCAdd fc;
182 if (both_not_quantized) {
183 auto fc0_attr = absl::any_cast<FullyConnectedAttributes>(
184 fc0_node->operation.attributes);
185 auto fc1_attr = absl::any_cast<FullyConnectedAttributes>(
186 fc1_node->operation.attributes);
187 if (fc0_attr.weights.shape.o != fc1_attr.weights.shape.o) {
188 return absl::NotFoundError("FCFCAdd not suitable.");
189 }
190 fc = CreateFCFCAdd(gpu_info, op_def, fc0_attr, fc1_attr);
191 } else {
192 // both_quantized
193 auto fc0_attr = absl::any_cast<FullyConnectedInt8Attributes>(
194 fc0_node->operation.attributes);
195 auto fc1_attr = absl::any_cast<FullyConnectedInt8Attributes>(
196 fc1_node->operation.attributes);
197 if (fc0_attr.weights.shape.o != fc1_attr.weights.shape.o) {
198 return absl::NotFoundError("FCFCAdd not suitable.");
199 }
200 fc = CreateFCFCAdd(gpu_info, op_def, fc0_attr, fc1_attr);
201 }
202 *gpu_op = absl::make_unique<FCFCAdd>(std::move(fc));
203 consumed_nodes->insert(fc0_node->id);
204 consumed_nodes->insert(fc1_node->id);
205 consumed_nodes->insert(add_node->id);
206 return absl::OkStatus();
207 }
208 } // namespace
209
GPUSubgraphFromGraph(const GpuInfo & gpu_info,CalculationsPrecision precision,const GraphFloat32 & graph,NodeId first_node_id,const std::map<ValueId,TensorDescriptor> & tensor_descriptors,std::set<NodeId> * consumed_nodes,GPUOperationsSubgraph * gpu_subgraph,std::string * name)210 absl::Status GPUSubgraphFromGraph(
211 const GpuInfo& gpu_info, CalculationsPrecision precision,
212 const GraphFloat32& graph, NodeId first_node_id,
213 const std::map<ValueId, TensorDescriptor>& tensor_descriptors,
214 std::set<NodeId>* consumed_nodes, GPUOperationsSubgraph* gpu_subgraph,
215 std::string* name) {
216 if ((gpu_info.IsAdreno() || gpu_info.IsNvidia() ||
217 (gpu_info.IsApple() && gpu_info.apple_info.IsBionic())) &&
218 TryDepthwiseConvPlus1x1Conv(gpu_info, precision, graph, first_node_id,
219 tensor_descriptors, consumed_nodes,
220 gpu_subgraph)
221 .ok()) {
222 *name = "depthwise_conv_plus_1x1_conv";
223 return absl::OkStatus();
224 }
225 if ((gpu_info.IsIntel() || gpu_info.IsNvidia()) &&
226 TryFCFCAdd(gpu_info, precision, graph, first_node_id, tensor_descriptors,
227 consumed_nodes, gpu_subgraph)
228 .ok()) {
229 *name = "fully_connected_x2_and_add";
230 return absl::OkStatus();
231 }
232 return absl::NotFoundError("No special combination.");
233 }
234
235 } // namespace gpu
236 } // namespace tflite
237