• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/graph/backends/CL/CLFunctionFactory.h"
25 
26 #include "arm_compute/graph/Graph.h"
27 #include "arm_compute/graph/GraphContext.h"
28 #include "arm_compute/graph/backends/FunctionHelpers.h"
29 #include "arm_compute/runtime/CL/CLFunctions.h"
30 #include "arm_compute/runtime/CPP/CPPFunctions.h"
31 #include "src/core/CL/CLKernels.h"
32 #include "support/Cast.h"
33 
34 using namespace arm_compute::utils::cast;
35 
36 namespace arm_compute
37 {
38 namespace graph
39 {
40 namespace backends
41 {
42 /** Target specific information structure used to pass information to the layer templates */
43 struct CLTargetInfo
44 {
45     using TensorType         = arm_compute::ICLTensor;
46     using SrcTensorType      = const arm_compute::ICLTensor;
47     using TensorConcreteType = CLTensor;
48     static Target TargetType;
49 };
50 
51 Target CLTargetInfo::TargetType = Target::CL;
52 
53 /** Collection of CL convolution functions */
54 struct CLConvolutionLayerFunctions
55 {
56     using GenericConvolutionLayer  = CLConvolutionLayer;
57     using GEMMConvolutionLayer     = CLGEMMConvolutionLayer;
58     using DirectConvolutionLayer   = CLDirectConvolutionLayer;
59     using WinogradConvolutionLayer = CLWinogradConvolutionLayer;
60 };
61 
62 /** Collection of CL element-wise functions */
63 struct CLEltwiseFunctions
64 {
65     using Addition       = CLArithmeticAddition;
66     using Subtraction    = CLArithmeticSubtraction;
67     using Multiplication = CLPixelWiseMultiplication;
68     using Maximum        = CLElementwiseMax;
69 };
70 
71 /** Collection of CL unary element-wise functions */
72 struct CLUnaryEltwiseFunctions
73 {
74     using Exp = CLExpLayer;
75 };
76 
77 /** Function and tensor types to be used inside a CL fused convolution/batch normalization layer */
78 struct CLFusedLayerTypes
79 {
80     using ConvolutionLayer          = CLConvolutionLayer;
81     using DepthwiseConvolutionLayer = CLDepthwiseConvolutionLayer;
82     using FuseBatchNormalization    = CLFuseBatchNormalization;
83 };
84 
85 // TODO (isagot01): Remove once we support heterogeneous scheduling at function level
86 /** Wrapper for the CPP Function in the OpenCL backend **/
87 class CPPWrapperFunction : public IFunction
88 {
89 public:
90     /* Default constructor */
CPPWrapperFunction()91     CPPWrapperFunction()
92         : _tensors(), _func(nullptr)
93     {
94     }
95 
run()96     void run() override
97     {
98         for(auto &tensor : _tensors)
99         {
100             tensor->map(CLScheduler::get().queue());
101         }
102         _func->run();
103 
104         for(auto &tensor : _tensors)
105         {
106             tensor->unmap(CLScheduler::get().queue());
107         }
108     }
109 
register_tensor(ICLTensor * tensor)110     void register_tensor(ICLTensor *tensor)
111     {
112         _tensors.push_back(tensor);
113     }
114 
register_function(std::unique_ptr<IFunction> function)115     void register_function(std::unique_ptr<IFunction> function)
116     {
117         _func = std::move(function);
118     }
119 
120 private:
121     std::vector<arm_compute::ICLTensor *> _tensors;
122     std::unique_ptr<IFunction>            _func;
123 };
124 
125 namespace detail
126 {
127 // Specialized functions
128 template <>
create_detection_output_layer(DetectionOutputLayerNode & node)129 std::unique_ptr<IFunction> create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(DetectionOutputLayerNode &node)
130 {
131     validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
132 
133     // Extract IO and info
134     CLTargetInfo::TensorType      *input0      = get_backing_tensor<CLTargetInfo>(node.input(0));
135     CLTargetInfo::TensorType      *input1      = get_backing_tensor<CLTargetInfo>(node.input(1));
136     CLTargetInfo::TensorType      *input2      = get_backing_tensor<CLTargetInfo>(node.input(2));
137     CLTargetInfo::TensorType      *output      = get_backing_tensor<CLTargetInfo>(node.output(0));
138     const DetectionOutputLayerInfo detect_info = node.detection_output_info();
139 
140     ARM_COMPUTE_ERROR_ON(input0 == nullptr);
141     ARM_COMPUTE_ERROR_ON(input1 == nullptr);
142     ARM_COMPUTE_ERROR_ON(input2 == nullptr);
143     ARM_COMPUTE_ERROR_ON(output == nullptr);
144 
145     // Create and configure function
146     auto func = support::cpp14::make_unique<CPPDetectionOutputLayer>();
147     func->configure(input0, input1, input2, output, detect_info);
148 
149     // Log info
150     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
151                                << node.name()
152                                << " Type: " << node.type()
153                                << " Target: " << CLTargetInfo::TargetType
154                                << " Data Type: " << input0->info()->data_type()
155                                << " Input0 shape: " << input0->info()->tensor_shape()
156                                << " Input1 shape: " << input1->info()->tensor_shape()
157                                << " Input2 shape: " << input2->info()->tensor_shape()
158                                << " Output shape: " << output->info()->tensor_shape()
159                                << " DetectionOutputLayer info: " << detect_info
160                                << std::endl);
161 
162     auto wrap_function = support::cpp14::make_unique<CPPWrapperFunction>();
163 
164     wrap_function->register_function(std::move(func));
165     wrap_function->register_tensor(input0);
166     wrap_function->register_tensor(input1);
167     wrap_function->register_tensor(input2);
168     wrap_function->register_tensor(output);
169 
170     return RETURN_UNIQUE_PTR(wrap_function);
171 }
172 template <>
create_detection_post_process_layer(DetectionPostProcessLayerNode & node)173 std::unique_ptr<IFunction> create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(DetectionPostProcessLayerNode &node)
174 {
175     validate_node<CLTargetInfo>(node, 3 /* expected inputs */, 4 /* expected outputs */);
176 
177     // Extract IO and info
178     CLTargetInfo::TensorType           *input0      = get_backing_tensor<CLTargetInfo>(node.input(0));
179     CLTargetInfo::TensorType           *input1      = get_backing_tensor<CLTargetInfo>(node.input(1));
180     CLTargetInfo::TensorType           *input2      = get_backing_tensor<CLTargetInfo>(node.input(2));
181     CLTargetInfo::TensorType           *output0     = get_backing_tensor<CLTargetInfo>(node.output(0));
182     CLTargetInfo::TensorType           *output1     = get_backing_tensor<CLTargetInfo>(node.output(1));
183     CLTargetInfo::TensorType           *output2     = get_backing_tensor<CLTargetInfo>(node.output(2));
184     CLTargetInfo::TensorType           *output3     = get_backing_tensor<CLTargetInfo>(node.output(3));
185     const DetectionPostProcessLayerInfo detect_info = node.detection_post_process_info();
186 
187     ARM_COMPUTE_ERROR_ON(input0 == nullptr);
188     ARM_COMPUTE_ERROR_ON(input1 == nullptr);
189     ARM_COMPUTE_ERROR_ON(input2 == nullptr);
190     ARM_COMPUTE_ERROR_ON(output0 == nullptr);
191     ARM_COMPUTE_ERROR_ON(output1 == nullptr);
192     ARM_COMPUTE_ERROR_ON(output2 == nullptr);
193     ARM_COMPUTE_ERROR_ON(output3 == nullptr);
194 
195     // Create and configure function
196     auto func = support::cpp14::make_unique<CPPDetectionPostProcessLayer>();
197     func->configure(input0, input1, input2, output0, output1, output2, output3, detect_info);
198 
199     // Log info
200     ARM_COMPUTE_LOG_GRAPH_INFO("Instantiated "
201                                << node.name()
202                                << " Type: " << node.type()
203                                << " Target: " << CLTargetInfo::TargetType
204                                << " Data Type: " << input0->info()->data_type()
205                                << " Input0 shape: " << input0->info()->tensor_shape()
206                                << " Input1 shape: " << input1->info()->tensor_shape()
207                                << " Input2 shape: " << input2->info()->tensor_shape()
208                                << " Output0 shape: " << output0->info()->tensor_shape()
209                                << " Output1 shape: " << output1->info()->tensor_shape()
210                                << " Output2 shape: " << output2->info()->tensor_shape()
211                                << " Output3 shape: " << output3->info()->tensor_shape()
212                                << " DetectionPostProcessLayer info: " << detect_info
213                                << std::endl);
214 
215     auto wrap_function = support::cpp14::make_unique<CPPWrapperFunction>();
216 
217     wrap_function->register_function(std::move(func));
218     wrap_function->register_tensor(input0);
219     wrap_function->register_tensor(input1);
220     wrap_function->register_tensor(input2);
221     wrap_function->register_tensor(output0);
222     wrap_function->register_tensor(output1);
223     wrap_function->register_tensor(output2);
224     wrap_function->register_tensor(output3);
225 
226     return RETURN_UNIQUE_PTR(wrap_function);
227 }
228 } // namespace detail
229 
create(INode * node,GraphContext & ctx)230 std::unique_ptr<IFunction> CLFunctionFactory::create(INode *node, GraphContext &ctx)
231 {
232     if(node == nullptr)
233     {
234         return nullptr;
235     }
236 
237     NodeType type = node->type();
238     switch(type)
239     {
240         case NodeType::ActivationLayer:
241             return detail::create_activation_layer<CLActivationLayer, CLTargetInfo>(*polymorphic_downcast<ActivationLayerNode *>(node));
242         case NodeType::ArgMinMaxLayer:
243             return detail::create_arg_min_max_layer<CLArgMinMaxLayer, CLTargetInfo>(*polymorphic_downcast<ArgMinMaxLayerNode *>(node));
244         case NodeType::BatchNormalizationLayer:
245             return detail::create_batch_normalization_layer<CLBatchNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<BatchNormalizationLayerNode *>(node));
246         case NodeType::BoundingBoxTransformLayer:
247             return detail::create_bounding_box_transform_layer<CLBoundingBoxTransform, CLTargetInfo>(*polymorphic_downcast<BoundingBoxTransformLayerNode *>(node));
248         case NodeType::ChannelShuffleLayer:
249             return detail::create_channel_shuffle_layer<CLChannelShuffleLayer, CLTargetInfo>(*polymorphic_downcast<ChannelShuffleLayerNode *>(node));
250         case NodeType::ConvolutionLayer:
251             return detail::create_convolution_layer<CLConvolutionLayerFunctions, CLTargetInfo>(*polymorphic_downcast<ConvolutionLayerNode *>(node), ctx);
252         case NodeType::DeconvolutionLayer:
253             return detail::create_deconvolution_layer<CLDeconvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DeconvolutionLayerNode *>(node), ctx);
254         case NodeType::ConcatenateLayer:
255             return detail::create_concatenate_layer<CLConcatenateLayer, CLTargetInfo>(*polymorphic_downcast<ConcatenateLayerNode *>(node));
256         case NodeType::DepthToSpaceLayer:
257             return detail::create_depth_to_space_layer<CLDepthToSpaceLayer, CLTargetInfo>(*polymorphic_downcast<DepthToSpaceLayerNode *>(node));
258         case NodeType::DepthwiseConvolutionLayer:
259             return detail::create_depthwise_convolution_layer<CLDepthwiseConvolutionLayer, CLTargetInfo>(*polymorphic_downcast<DepthwiseConvolutionLayerNode *>(node));
260         case NodeType::DequantizationLayer:
261             return detail::create_dequantization_layer<CLDequantizationLayer, CLTargetInfo>(*polymorphic_downcast<DequantizationLayerNode *>(node));
262         case NodeType::DetectionOutputLayer:
263             return detail::create_detection_output_layer<CPPDetectionOutputLayer, CLTargetInfo>(*polymorphic_downcast<DetectionOutputLayerNode *>(node));
264         case NodeType::DetectionPostProcessLayer:
265             return detail::create_detection_post_process_layer<CPPDetectionPostProcessLayer, CLTargetInfo>(*polymorphic_downcast<DetectionPostProcessLayerNode *>(node));
266         case NodeType::EltwiseLayer:
267             return detail::create_eltwise_layer<CLEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<EltwiseLayerNode *>(node));
268         case NodeType::UnaryEltwiseLayer:
269             return detail::create_unary_eltwise_layer<CLUnaryEltwiseFunctions, CLTargetInfo>(*polymorphic_downcast<UnaryEltwiseLayerNode *>(node));
270         case NodeType::FlattenLayer:
271             return detail::create_flatten_layer<CLFlattenLayer, CLTargetInfo>(*polymorphic_downcast<FlattenLayerNode *>(node));
272         case NodeType::FullyConnectedLayer:
273             return detail::create_fully_connected_layer<CLFullyConnectedLayer, CLTargetInfo>(*polymorphic_downcast<FullyConnectedLayerNode *>(node), ctx);
274         case NodeType::FusedConvolutionBatchNormalizationLayer:
275             return detail::create_fused_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedConvolutionBatchNormalizationNode *>(node), ctx);
276         case NodeType::FusedDepthwiseConvolutionBatchNormalizationLayer:
277             return detail::create_fused_depthwise_convolution_batch_normalization_layer<CLFusedLayerTypes, CLTargetInfo>(*polymorphic_downcast<FusedDepthwiseConvolutionBatchNormalizationNode *>(node), ctx);
278         case NodeType::GenerateProposalsLayer:
279             return detail::create_generate_proposals_layer<CLGenerateProposalsLayer, CLTargetInfo>(*polymorphic_downcast<GenerateProposalsLayerNode *>(node), ctx);
280         case NodeType::L2NormalizeLayer:
281             return detail::create_l2_normalize_layer<CLL2NormalizeLayer, CLTargetInfo>(*polymorphic_downcast<L2NormalizeLayerNode *>(node), ctx);
282         case NodeType::NormalizationLayer:
283             return detail::create_normalization_layer<CLNormalizationLayer, CLTargetInfo>(*polymorphic_downcast<NormalizationLayerNode *>(node), ctx);
284         case NodeType::NormalizePlanarYUVLayer:
285             return detail::create_normalize_planar_yuv_layer<CLNormalizePlanarYUVLayer, CLTargetInfo>(*polymorphic_downcast<NormalizePlanarYUVLayerNode *>(node));
286         case NodeType::PadLayer:
287             return detail::create_pad_layer<CLPadLayer, CLTargetInfo>(*polymorphic_downcast<PadLayerNode *>(node));
288         case NodeType::PermuteLayer:
289             return detail::create_permute_layer<CLPermute, CLTargetInfo>(*polymorphic_downcast<PermuteLayerNode *>(node));
290         case NodeType::PoolingLayer:
291             return detail::create_pooling_layer<CLPoolingLayer, CLTargetInfo>(*polymorphic_downcast<PoolingLayerNode *>(node));
292         case NodeType::PReluLayer:
293             return detail::create_prelu_layer<CLPReluLayer, CLTargetInfo>(*polymorphic_downcast<PReluLayerNode *>(node));
294         case NodeType::PrintLayer:
295             return detail::create_print_layer<CLTargetInfo>(*polymorphic_downcast<PrintLayerNode *>(node));
296         case NodeType::PriorBoxLayer:
297             return detail::create_priorbox_layer<CLPriorBoxLayer, CLTargetInfo>(*polymorphic_downcast<PriorBoxLayerNode *>(node));
298         case NodeType::QuantizationLayer:
299             return detail::create_quantization_layer<CLQuantizationLayer, CLTargetInfo>(*polymorphic_downcast<QuantizationLayerNode *>(node));
300         case NodeType::ReductionOperationLayer:
301             return detail::create_reduction_operation_layer<CLReductionOperation, CLTargetInfo>(*polymorphic_downcast<ReductionLayerNode *>(node), ctx);
302         case NodeType::ReorgLayer:
303             return detail::create_reorg_layer<CLReorgLayer, CLTargetInfo>(*polymorphic_downcast<ReorgLayerNode *>(node));
304         case NodeType::ReshapeLayer:
305             return detail::create_reshape_layer<CLReshapeLayer, CLTargetInfo>(*polymorphic_downcast<ReshapeLayerNode *>(node));
306         case NodeType::ResizeLayer:
307             return detail::create_resize_layer<CLScale, CLTargetInfo>(*polymorphic_downcast<ResizeLayerNode *>(node));
308         case NodeType::ROIAlignLayer:
309             return detail::create_roi_align_layer<CLROIAlignLayer, CLTargetInfo>(*polymorphic_downcast<ROIAlignLayerNode *>(node));
310         case NodeType::SliceLayer:
311             return detail::create_slice_layer<CLSlice, CLTargetInfo>(*polymorphic_downcast<SliceLayerNode *>(node));
312         case NodeType::SoftmaxLayer:
313             return detail::create_softmax_layer<CLSoftmaxLayer, CLTargetInfo>(*polymorphic_downcast<SoftmaxLayerNode *>(node), ctx);
314         case NodeType::StackLayer:
315             return detail::create_stack_layer<CLStackLayer, CLTargetInfo>(*polymorphic_downcast<StackLayerNode *>(node));
316         case NodeType::StridedSliceLayer:
317             return detail::create_strided_slice_layer<CLStridedSlice, CLTargetInfo>(*polymorphic_downcast<StridedSliceLayerNode *>(node));
318         case NodeType::UpsampleLayer:
319             return detail::create_upsample_layer<CLUpsampleLayer, CLTargetInfo>(*polymorphic_downcast<UpsampleLayerNode *>(node), ctx);
320         case NodeType::YOLOLayer:
321             return detail::create_yolo_layer<CLYOLOLayer, CLTargetInfo>(*polymorphic_downcast<YOLOLayerNode *>(node), ctx);
322         default:
323             return nullptr;
324     }
325 }
326 } // namespace backends
327 } // namespace graph
328 } // namespace arm_compute
329