1 /*
2 * Copyright (c) 2018-2020 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/graph/backends/CL/CLFunctionFactory.h"
25
26 #include "arm_compute/graph/Graph.h"
27 #include "arm_compute/graph/GraphContext.h"
28 #include "arm_compute/graph/backends/FunctionHelpers.h"
29 #include "arm_compute/runtime/CL/CLFunctions.h"
30 #include "arm_compute/runtime/CPP/CPPFunctions.h"
31 #include "src/core/CL/CLKernels.h"
32 #include "support/Cast.h"
33
34 using namespace arm_compute::utils::cast;
35
36 namespace arm_compute
37 {
38 namespace graph
39 {
40 namespace backends
41 {
42 /** Target specific information structure used to pass information to the layer templates */
43 struct CLTargetInfo
44 {
45 using TensorType = arm_compute::ICLTensor;
46 using SrcTensorType = const arm_compute::ICLTensor;
47 using TensorConcreteType = CLTensor;
48 static Target TargetType;
49 };
50
51 Target CLTargetInfo::TargetType = Target::CL;
52
53 /** Collection of CL convolution functions */
54 struct CLConvolutionLayerFunctions
55 {
56 using GenericConvolutionLayer = CLConvolutionLayer;
57 using GEMMConvolutionLayer = CLGEMMConvolutionLayer;
58 using DirectConvolutionLayer = CLDirectConvolutionLayer;
59 using WinogradConvolutionLayer = CLWinogradConvolutionLayer;
60 };
61
62 /** Collection of CL element-wise functions */
63 struct CLEltwiseFunctions
64 {
65 using Addition = CLArithmeticAddition;
66 using Subtraction = CLArithmeticSubtraction;
67 using Multiplication = CLPixelWiseMultiplication;
68 using Maximum = CLElementwiseMax;
69 };
70
71 /** Collection of CL unary element-wise functions */
72 struct CLUnaryEltwiseFunctions
73 {
74 using Exp = CLExpLayer;
75 };
76
77 /** Function and tensor types to be used inside a CL fused convolution/batch normalization layer */
78 struct CLFusedLayerTypes
79 {
80 using ConvolutionLayer = CLConvolutionLayer;
81 using DepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
82 using FuseBatchNormalization = CLFuseBatchNormalization;
83 };
84
85 // TODO (isagot01): Remove once we support heterogeneous scheduling at function level
86 /** Wrapper for the CPP Function in the OpenCL backend **/
87 class CPPWrapperFunction : public IFunction
88 {
89 public:
90 /* Default constructor */
CPPWrapperFunction()91 CPPWrapperFunction()
92 : _tensors(), _func(nullptr)
93 {
94 }
95
run()96 void run() override
97 {
98 for(auto &tensor : _tensors)
99 {
100 tensor->map(CLScheduler::get().queue());
101 }
102 _func->run();
103
104 for(auto &tensor : _tensors)
105 {
106 tensor->unmap(CLScheduler::get().queue());
107 }
108 }
109
register_tensor(ICLTensor * tensor)110 void register_tensor(ICLTensor *tensor)
111 {
112 _tensors.push_back(tensor);
113 }
114
register_function(std::unique_ptr<IFunction> function)115 void register_function(std::unique_ptr<IFunction> function)
116 {
117 _func = std::move(function);
118 }
119
120 private:
121 std::vector<arm_compute::ICLTensor *> _tensors;
122 std::unique_ptr<IFunction> _func;
123 };
124
125 namespace detail
126 {
127 // Specialized functions
128 template <>
create_detection_output_layer(DetectionOutputLayerNode & node)129 std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(DetectionOutputLayerNode &node)
130 {
131 validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
132
133 // Extract IO and info
134 CLTargetInfo::TensorType *input0 = get_backing_tensor<CLTargetInfo>(node.input(0));
135 CLTargetInfo::TensorType *input1 = get_backing_tensor<CLTargetInfo>(node.input(1));
136 CLTargetInfo::TensorType *input2 = get_backing_tensor<CLTargetInfo>(node.input(2));
137 CLTargetInfo::TensorType *output = get_backing_tensor<CLTargetInfo>(node.output(0));
138 const DetectionOutputLayerInfo detect_info = node.detection_output_info();
139
140 ARM_COMPUTE_ERROR_ON(input0 == nullptr);
141 ARM_COMPUTE_ERROR_ON(input1 == nullptr);
142 ARM_COMPUTE_ERROR_ON(input2 == nullptr);
143 ARM_COMPUTE_ERROR_ON(output == nullptr);
144
145 // Create and configure function
146 auto func = support::cpp14::make_unique<CPPDetectionOutputLayer>();
147 func->configure(input0, input1, input2, output, detect_info);
148
149 // Log info
150 ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
151 << node.name()
152 << " Type: " << node.type()
153 << " Target: " << CLTargetInfo::TargetType
154 << " Data Type: " << input0->info()->data_type()
155 << " Input0 shape: " << input0->info()->tensor_shape()
156 << " Input1 shape: " << input1->info()->tensor_shape()
157 << " Input2 shape: " << input2->info()->tensor_shape()
158 << " Output shape: " << output->info()->tensor_shape()
159 << " DetectionOutputLayer info: " << detect_info
160 << std::endl);
161
162 auto wrap_function = support::cpp14::make_unique<CPPWrapperFunction>();
163
164 wrap_function->register_function(std::move(func));
165 wrap_function->register_tensor(input0);
166 wrap_function->register_tensor(input1);
167 wrap_function->register_tensor(input2);
168 wrap_function->register_tensor(output);
169
170 return RETURN_UNIQUE_PTR(wrap_function);
171 }
172 template <>
create_detection_post_process_layer(DetectionPostProcessLayerNode & node)173 std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(DetectionPostProcessLayerNode &node)
174 {
175 validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 4 /* expected outputs */);
176
177 // Extract IO and info
178 CLTargetInfo::TensorType *input0 = get_backing_tensor<CLTargetInfo>(node.input(0));
179 CLTargetInfo::TensorType *input1 = get_backing_tensor<CLTargetInfo>(node.input(1));
180 CLTargetInfo::TensorType *input2 = get_backing_tensor<CLTargetInfo>(node.input(2));
181 CLTargetInfo::TensorType *output0 = get_backing_tensor<CLTargetInfo>(node.output(0));
182 CLTargetInfo::TensorType *output1 = get_backing_tensor<CLTargetInfo>(node.output(1));
183 CLTargetInfo::TensorType *output2 = get_backing_tensor<CLTargetInfo>(node.output(2));
184 CLTargetInfo::TensorType *output3 = get_backing_tensor<CLTargetInfo>(node.output(3));
185 const DetectionPostProcessLayerInfo detect_info = node.detection_post_process_info();
186
187 ARM_COMPUTE_ERROR_ON(input0 == nullptr);
188 ARM_COMPUTE_ERROR_ON(input1 == nullptr);
189 ARM_COMPUTE_ERROR_ON(input2 == nullptr);
190 ARM_COMPUTE_ERROR_ON(output0 == nullptr);
191 ARM_COMPUTE_ERROR_ON(output1 == nullptr);
192 ARM_COMPUTE_ERROR_ON(output2 == nullptr);
193 ARM_COMPUTE_ERROR_ON(output3 == nullptr);
194
195 // Create and configure function
196 auto func = support::cpp14::make_unique<CPPDetectionPostProcessLayer>();
197 func->configure(input0, input1, input2, output0, output1, output2, output3, detect_info);
198
199 // Log info
200 ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
201 << node.name()
202 << " Type: " << node.type()
203 << " Target: " << CLTargetInfo::TargetType
204 << " Data Type: " << input0->info()->data_type()
205 << " Input0 shape: " << input0->info()->tensor_shape()
206 << " Input1 shape: " << input1->info()->tensor_shape()
207 << " Input2 shape: " << input2->info()->tensor_shape()
208 << " Output0 shape: " << output0->info()->tensor_shape()
209 << " Output1 shape: " << output1->info()->tensor_shape()
210 << " Output2 shape: " << output2->info()->tensor_shape()
211 << " Output3 shape: " << output3->info()->tensor_shape()
212 << " DetectionPostProcessLayer info: " << detect_info
213 << std::endl);
214
215 auto wrap_function = support::cpp14::make_unique<CPPWrapperFunction>();
216
217 wrap_function->register_function(std::move(func));
218 wrap_function->register_tensor(input0);
219 wrap_function->register_tensor(input1);
220 wrap_function->register_tensor(input2);
221 wrap_function->register_tensor(output0);
222 wrap_function->register_tensor(output1);
223 wrap_function->register_tensor(output2);
224 wrap_function->register_tensor(output3);
225
226 return RETURN_UNIQUE_PTR(wrap_function);
227 }
228 } // namespace detail
229
create(INode * node,GraphContext & ctx)230 std::unique_ptr<IFunction> CLFunctionFactory::create(INode *node, GraphContext &ctx)
231 {
232 if(node == nullptr)
233 {
234 return nullptr;
235 }
236
237 NodeType type = node->type();
238 switch(type)
239 {
240 case NodeType::ActivationLayer:
241 return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
242 case NodeType::ArgMinMaxLayer:
243 return detail::create_arg_min_max_layer<CLArgMinMaxLayer, CLTargetInfo>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
244 case NodeType::BatchNormalizationLayer:
245 return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
246 case NodeType::BoundingBoxTransformLayer:
247 return detail::create_bounding_box_transform_layer<CLBoundingBoxTransform, CLTargetInfo>(*polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
248 case NodeType::ChannelShuffleLayer:
249 return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
250 case NodeType::ConvolutionLayer:
251 return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
252 case NodeType::DeconvolutionLayer:
253 return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
254 case NodeType::ConcatenateLayer:
255 return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
256 case NodeType::DepthToSpaceLayer:
257 return detail::create_depth_to_space_layer<CLDepthToSpaceLayer, CLTargetInfo>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
258 case NodeType::DepthwiseConvolutionLayer:
259 return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
260 case NodeType::DequantizationLayer:
261 return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
262 case NodeType::DetectionOutputLayer:
263 return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
264 case NodeType::DetectionPostProcessLayer:
265 return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
266 case NodeType::EltwiseLayer:
267 return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
268 case NodeType::UnaryEltwiseLayer:
269 return detail::create_unary_eltwise_layer<CLUnaryEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
270 case NodeType::FlattenLayer:
271 return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
272 case NodeType::FullyConnectedLayer:
273 return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
274 case NodeType::FusedConvolutionBatchNormalizationLayer:
275 return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
276 case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
277 return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
278 case NodeType::GenerateProposalsLayer:
279 return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
280 case NodeType::L2NormalizeLayer:
281 return detail::create_l2_normalize_layer<CLL2NormalizeLayer, CLTargetInfo>(*polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
282 case NodeType::NormalizationLayer:
283 return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
284 case NodeType::NormalizePlanarYUVLayer:
285 return detail::create_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer, CLTargetInfo>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
286 case NodeType::PadLayer:
287 return detail::create_pad_layer<CLPadLayer, CLTargetInfo>(*polymorphic_downcast<PadLayerNode *>(node));
288 case NodeType::PermuteLayer:
289 return detail::create_permute_layer<CLPermute, CLTargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
290 case NodeType::PoolingLayer:
291 return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
292 case NodeType::PReluLayer:
293 return detail::create_prelu_layer<CLPReluLayer, CLTargetInfo>(*polymorphic_downcast<PReluLayerNode *>(node));
294 case NodeType::PrintLayer:
295 return detail::create_print_layer<CLTargetInfo>(*polymorphic_downcast<PrintLayerNode *>(node));
296 case NodeType::PriorBoxLayer:
297 return detail::create_priorbox_layer<CLPriorBoxLayer, CLTargetInfo>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
298 case NodeType::QuantizationLayer:
299 return detail::create_quantization_layer<CLQuantizationLayer, CLTargetInfo>(*polymorphic_downcast<QuantizationLayerNode *>(node));
300 case NodeType::ReductionOperationLayer:
301 return detail::create_reduction_operation_layer<CLReductionOperation, CLTargetInfo>(*polymorphic_downcast<ReductionLayerNode *>(node), ctx);
302 case NodeType::ReorgLayer:
303 return detail::create_reorg_layer<CLReorgLayer, CLTargetInfo>(*polymorphic_downcast<ReorgLayerNode *>(node));
304 case NodeType::ReshapeLayer:
305 return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
306 case NodeType::ResizeLayer:
307 return detail::create_resize_layer<CLScale, CLTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
308 case NodeType::ROIAlignLayer:
309 return detail::create_roi_align_layer<CLROIAlignLayer, CLTargetInfo>(*polymorphic_downcast<ROIAlignLayerNode *>(node));
310 case NodeType::SliceLayer:
311 return detail::create_slice_layer<CLSlice, CLTargetInfo>(*polymorphic_downcast<SliceLayerNode *>(node));
312 case NodeType::SoftmaxLayer:
313 return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
314 case NodeType::StackLayer:
315 return detail::create_stack_layer<CLStackLayer, CLTargetInfo>(*polymorphic_downcast<StackLayerNode *>(node));
316 case NodeType::StridedSliceLayer:
317 return detail::create_strided_slice_layer<CLStridedSlice, CLTargetInfo>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
318 case NodeType::UpsampleLayer:
319 return detail::create_upsample_layer<CLUpsampleLayer, CLTargetInfo>(*polymorphic_downcast<UpsampleLayerNode *>(node), ctx);
320 case NodeType::YOLOLayer:
321 return detail::create_yolo_layer<CLYOLOLayer, CLTargetInfo>(*polymorphic_downcast<YOLOLayerNode *>(node), ctx);
322 default:
323 return nullptr;
324 }
325 }
326 } // namespace backends
327 } // namespace graph
328 } // namespace arm_compute
329