• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include <armnn/Descriptors.hpp>
8 #include <armnn/Tensor.hpp>
9 #include <armnn/utility/Assert.hpp>
10 #include <armnn/utility/NumericCast.hpp>
11 #include <armnn/backends/WorkloadData.hpp>
12 
13 #include <arm_compute/core/Types.h>
14 #include <arm_compute/runtime/FunctionDescriptors.h>
15 
16 #if defined(ARMCOMPUTENEON_ENABLED)
17 #include "neon/workloads/NeonReduceWorkload.hpp"
18 #endif
19 
20 #if defined(ARMCOMPUTECL_ENABLED)
21 #include "cl/workloads/ClReduceWorkload.hpp"
22 #endif
23 
24 namespace armnn
25 {
26 
27 inline arm_compute::NormalizationLayerInfo
CreateAclNormalizationLayerInfoForL2Normalization(const armnn::TensorInfo & tensorInfo,armnn::DataLayout dataLayout)28 CreateAclNormalizationLayerInfoForL2Normalization(const armnn::TensorInfo& tensorInfo,
29                                                   armnn::DataLayout dataLayout)
30 {
31     unsigned int depthDimension = dataLayout == armnn::DataLayout::NCHW ? 1 : 3;
32     const unsigned int depth = tensorInfo.GetShape()[depthDimension];
33 
34     // At the time of writing, {CL|Neon}L2Normalization performs the reduction only along dimension 0. This version of
35     // L2 Normalization always performs the reduction along the depth axis, though. Thus, we repurpose
36     // {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by carefully chosing the normalization
37     // parameters.
38     //
39     // Please refer to both the reference implementation of the normalization layer and the implementation of
40     // {CL|Neon}NormalizationLayer when checking the derivations for the parameter values below.
41 
42     // Make sure normalization covers the entire depth range. ACL requires the normalization size to be odd.
43     // CL: This does not result in extra kernel threads not doing any work: See usage of the RADIUS parameter in
44     // ACL's normalization_layer_cross_map() CL function.
45     const uint32_t normSize = depth * 2u + 1u;
46 
47     // See ACL's NormalizationLayerInfo::scale_coeff() definition.
48     // For the reference implementation, to make alpha_ become 1, we'd have to use alpha = normSize instead.
49     const float alpha = 1.0f;
50 
51     // Don't offset the reduction.
52     const float kappa = 0.0f;
53 
54     // pow(reduction, -0.5) = 1 / sqrt(reduction)
55     const float beta = 0.5f;
56 
57     return arm_compute::NormalizationLayerInfo(arm_compute::NormType::CROSS_MAP, normSize, alpha, beta, kappa, false);
58 }
59 
60 inline arm_compute::ActivationLayerInfo::ActivationFunction
ConvertActivationFunctionToAclActivationFunction(ActivationFunction armnnFunction)61 ConvertActivationFunctionToAclActivationFunction(ActivationFunction armnnFunction)
62 {
63     using AclActivationFunction = arm_compute::ActivationLayerInfo::ActivationFunction;
64 
65     switch (armnnFunction)
66     {
67         case ActivationFunction::Linear:        return AclActivationFunction::LINEAR;
68         // Arm compute's 'logistic' function is non-parameterized, so it is exactly a sigmoid function.
69         case ActivationFunction::Sigmoid:       return AclActivationFunction::LOGISTIC;
70         case ActivationFunction::ReLu:          return AclActivationFunction::RELU;
71         case ActivationFunction::BoundedReLu:   return AclActivationFunction::LU_BOUNDED_RELU;
72         case ActivationFunction::SoftReLu:      return AclActivationFunction::SOFT_RELU;
73         case ActivationFunction::LeakyReLu:     return AclActivationFunction::LEAKY_RELU;
74         case ActivationFunction::Abs:           return AclActivationFunction::ABS;
75         case ActivationFunction::Sqrt:          return AclActivationFunction::SQRT;
76         case ActivationFunction::Square:        return AclActivationFunction::SQUARE;
77         case ActivationFunction::TanH:          return AclActivationFunction::TANH;
78         case ActivationFunction::Elu:           return AclActivationFunction::ELU;
79         case ActivationFunction::HardSwish:     return AclActivationFunction::HARD_SWISH;
80         default:                                throw InvalidArgumentException("Unsupported activation function");
81     }
82 }
83 
84 inline arm_compute::ActivationLayerInfo
ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor & actDesc)85 ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor& actDesc)
86 {
87     return arm_compute::ActivationLayerInfo(ConvertActivationFunctionToAclActivationFunction(actDesc.m_Function),
88         actDesc.m_A, actDesc.m_B);
89 }
90 
91 inline arm_compute::ActivationLayerInfo
ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor * activationDescPtr)92 ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor* activationDescPtr)
93 {
94     if (activationDescPtr != nullptr)
95     {
96         return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
97                                                                            *activationDescPtr));
98     }
99     return arm_compute::ActivationLayerInfo();
100 }
101 
102 inline arm_compute::ActivationLayerInfo
ConvertAdditionalInfoToAclActivationLayerInfo(const QueueDescriptor & queueDescriptor)103 ConvertAdditionalInfoToAclActivationLayerInfo(const QueueDescriptor& queueDescriptor)
104 {
105     const ActivationDescriptor* activationDescPtr = queueDescriptor.GetAdditionalInformation<ActivationDescriptor>();
106 
107     if (activationDescPtr != nullptr)
108     {
109         return ConvertActivationDescriptorToAclActivationLayerInfo(static_cast<ActivationDescriptor>(
110                 *activationDescPtr));
111     }
112     return arm_compute::ActivationLayerInfo();
113 }
114 
115 inline arm_compute::ActivationLayerInfo
ConvertLstmActivationFuncToAclLayerInfo(uint32_t activationFunction)116 ConvertLstmActivationFuncToAclLayerInfo(uint32_t activationFunction)
117 {
118     // For preparing the object for the class ActivationLayerInfo, we need to consider 5 situations.
119     switch (activationFunction)
120     {
121         case 0:
122             return arm_compute::ActivationLayerInfo(); // no activation, do nothing
123         case 1:
124             return arm_compute::ActivationLayerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
125         case 3:
126             return arm_compute::ActivationLayerInfo(
127                 arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0);
128         case 4:
129             return arm_compute::ActivationLayerInfo(
130                 arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0);
131         case 6:
132             return arm_compute::ActivationLayerInfo(
133                 arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC);
134         default:
135             throw armnn::Exception("Wrong Type of Activation Function!");
136     }
137 }
138 
ConvertComparisonOperationToAcl(const ComparisonDescriptor & descriptor)139 inline arm_compute::ComparisonOperation ConvertComparisonOperationToAcl(const ComparisonDescriptor& descriptor)
140 {
141     switch (descriptor.m_Operation)
142     {
143         case ComparisonOperation::Greater:         return arm_compute::ComparisonOperation::Greater;
144         case ComparisonOperation::GreaterOrEqual:  return arm_compute::ComparisonOperation::GreaterEqual;
145         case ComparisonOperation::Less:            return arm_compute::ComparisonOperation::Less;
146         case ComparisonOperation::LessOrEqual:     return arm_compute::ComparisonOperation::LessEqual;
147         case ComparisonOperation::Equal:           return arm_compute::ComparisonOperation::Equal;
148         case ComparisonOperation::NotEqual:        return arm_compute::ComparisonOperation::NotEqual;
149         default:                                   throw InvalidArgumentException("Unsupported comparison function");
150     }
151 }
152 
ConvertPoolingAlgorithmToAclPoolingType(PoolingAlgorithm poolingAlgorithm)153 inline arm_compute::PoolingType ConvertPoolingAlgorithmToAclPoolingType(PoolingAlgorithm poolingAlgorithm)
154 {
155     using arm_compute::PoolingType;
156 
157     switch (poolingAlgorithm)
158     {
159         case PoolingAlgorithm::Max:             return PoolingType::MAX;
160         case PoolingAlgorithm::Average:         return PoolingType::AVG;
161         case PoolingAlgorithm::L2:              return PoolingType::L2;
162         default:                                throw InvalidArgumentException("Unsupported pooling algorithm");
163     }
164 }
165 
ConvertOutputShapeRoundingToAclDimensionRoundingType(OutputShapeRounding rounding)166 inline arm_compute::DimensionRoundingType ConvertOutputShapeRoundingToAclDimensionRoundingType(OutputShapeRounding
167                                                                                                               rounding)
168 {
169     using arm_compute::DimensionRoundingType;
170 
171     switch (rounding)
172     {
173         case OutputShapeRounding::Ceiling:  return DimensionRoundingType::CEIL;
174         case OutputShapeRounding::Floor:    return DimensionRoundingType::FLOOR;
175         default:                            throw InvalidArgumentException("Unsupported Output Shape Rounding type");
176     }
177 }
178 
179 inline arm_compute::NormType
ConvertNormalizationAlgorithmChannelToAclNormType(NormalizationAlgorithmChannel channelType)180 ConvertNormalizationAlgorithmChannelToAclNormType(NormalizationAlgorithmChannel channelType)
181 {
182     using arm_compute::NormType;
183     switch (channelType)
184     {
185         case NormalizationAlgorithmChannel::Across: return NormType::CROSS_MAP;
186         case NormalizationAlgorithmChannel::Within: return NormType::IN_MAP_2D;
187         default:    throw InvalidArgumentException("Unsupported normalization algorithm channel type");
188     }
189 }
190 
191 inline arm_compute::FullyConnectedLayerInfo
ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor & fullyConnectedDesc,const ActivationDescriptor * activationDesc)192 ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor& fullyConnectedDesc,
193                                                             const ActivationDescriptor* activationDesc)
194 {
195     arm_compute::FullyConnectedLayerInfo fc_info;
196     fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
197     fc_info.activation_info = ConvertActivationDescriptorToAclActivationLayerInfo(activationDesc);
198     return fc_info;
199 }
200 
201 inline arm_compute::FullyConnectedLayerInfo
ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor & fullyConnectedDesc,arm_compute::ActivationLayerInfo activationLayerInfo)202 ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(const FullyConnectedDescriptor& fullyConnectedDesc,
203         arm_compute::ActivationLayerInfo activationLayerInfo)
204 {
205     arm_compute::FullyConnectedLayerInfo fc_info;
206     fc_info.transpose_weights = fullyConnectedDesc.m_TransposeWeightMatrix;
207     fc_info.activation_info = activationLayerInfo;
208     return fc_info;
209 }
210 
ConvertResizeMethodToAclInterpolationPolicy(ResizeMethod resizeMethod)211 inline arm_compute::InterpolationPolicy ConvertResizeMethodToAclInterpolationPolicy(ResizeMethod resizeMethod)
212 {
213     switch (resizeMethod)
214     {
215         case ResizeMethod::Bilinear:
216             return arm_compute::InterpolationPolicy::BILINEAR;
217         case ResizeMethod::NearestNeighbor:
218             return arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR;
219         default:
220             throw InvalidArgumentException("Unsupported resize method");
221     }
222 }
223 
224 template<typename T>
ComputeSoftmaxAclAxis(const SoftmaxDescriptor & softmaxDesc,const armnn::TensorInfo & tensor)225 inline T ComputeSoftmaxAclAxis(const SoftmaxDescriptor& softmaxDesc, const armnn::TensorInfo& tensor)
226 {
227     // Detect the Android default value of -1 and return the ACL default value of 0.
228     if (softmaxDesc.m_Axis == -1)
229     {
230         return 0;
231     }
232 
233     unsigned int dim = tensor.GetNumDimensions();
234 
235     ARMNN_ASSERT(dim != 0);
236 
237     // Currently ArmNN support axis 1.
238     auto aclAxis = (static_cast<T>(dim) - 1);
239     aclAxis = aclAxis > 0 ? aclAxis -1 : aclAxis;
240 
241     return aclAxis;
242 }
243 
ComputeSplitAxis(const armnn::SplitterDescriptor & desc,const TensorShape & input)244 inline std::set<unsigned int> ComputeSplitAxis(const armnn::SplitterDescriptor& desc, const TensorShape& input)
245 {
246     unsigned int numSplit = desc.GetNumViews();
247     unsigned int numDimensions = desc.GetNumDimensions();
248     std::set<unsigned int> splitAxis;
249 
250     for (unsigned int i = 0; i < numSplit; ++i)
251     {
252         for (unsigned int dimIdx = 0; dimIdx < numDimensions; ++dimIdx)
253         {
254             if (desc.GetViewSizes(i)[dimIdx] != input[dimIdx])
255             {
256                 splitAxis.insert(dimIdx);
257             }
258         }
259     }
260     return splitAxis;
261 }
262 
263 /// Function to convert ArmNN axis (left to right) to ACL axis (right to left) ranging from [-rank, rank)
ComputeAclAxis(const int & armnnAxis,const armnn::TensorInfo & tensor)264 inline int ComputeAclAxis(const int& armnnAxis, const armnn::TensorInfo& tensor)
265 {
266     int rank = static_cast<int>(tensor.GetNumDimensions());
267 
268     ARMNN_ASSERT(rank != 0);
269     ARMNN_ASSERT((-1 * rank) <= armnnAxis);
270     ARMNN_ASSERT(armnnAxis < rank);
271 
272     int sign = (armnnAxis < 0) ? -1 : 1;
273     int aclAxis = sign * rank - 1  - armnnAxis;
274 
275     return aclAxis;
276 }
277 
278 /// Function to convert axis to its positive equivalent value.
279 /// [-rank, rank) --> [0, rank)
ComputePositiveAxis(const int & axis,const armnn::TensorInfo & tensor)280 inline unsigned int ComputePositiveAxis(const int& axis, const armnn::TensorInfo& tensor)
281 {
282     int rank = static_cast<int>(tensor.GetNumDimensions());
283 
284     ARMNN_ASSERT(rank != 0);
285     ARMNN_ASSERT((-1 * rank) <= axis);
286     ARMNN_ASSERT(axis < rank);
287 
288     int positiveAxis = (axis < 0) ? rank + axis : axis;
289     return static_cast<unsigned int>(positiveAxis);
290 }
291 
292 /// Utility function used to setup an arm_compute::Conv3dInfo object from convolution3d descriptor.
ComputeConv3DInfo(const armnn::Convolution3dDescriptor descriptor,bool isFastMathEnabled,const ActivationDescriptor * activationDescriptor)293 inline arm_compute::Conv3dInfo ComputeConv3DInfo(const armnn::Convolution3dDescriptor descriptor,
294                                                  bool isFastMathEnabled,
295                                                  const ActivationDescriptor* activationDescriptor)
296 {
297     const arm_compute::Size3D    stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
298     const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
299                                          descriptor.m_PadTop, descriptor.m_PadBottom,
300                                          descriptor.m_PadFront, descriptor.m_PadBack};
301     const arm_compute::Size3D    dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
302 
303     const arm_compute::ActivationLayerInfo activationInfo =
304             ConvertActivationDescriptorToAclActivationLayerInfo(activationDescriptor);
305     const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
306 
307     return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
308 }
309 
ComputeConv3DInfo(const armnn::Convolution3dQueueDescriptor queueDescriptor,bool isFastMathEnabled)310 inline arm_compute::Conv3dInfo ComputeConv3DInfo(const armnn::Convolution3dQueueDescriptor queueDescriptor,
311                                                  bool isFastMathEnabled)
312 {
313     auto descriptor = queueDescriptor.m_Parameters;
314     const arm_compute::Size3D    stride{descriptor.m_StrideX, descriptor.m_StrideY, descriptor.m_StrideZ};
315     const arm_compute::Padding3D padding{descriptor.m_PadLeft, descriptor.m_PadRight,
316                                          descriptor.m_PadTop, descriptor.m_PadBottom,
317                                          descriptor.m_PadFront, descriptor.m_PadBack};
318     const arm_compute::Size3D    dilation{descriptor.m_DilationX, descriptor.m_DilationY, descriptor.m_DilationZ};
319 
320     const arm_compute::ActivationLayerInfo activationInfo =
321             ConvertAdditionalInfoToAclActivationLayerInfo(queueDescriptor);
322     const auto roundType = arm_compute::DimensionRoundingType::FLOOR;
323 
324     return arm_compute::Conv3dInfo{stride, padding, activationInfo, dilation, roundType, isFastMathEnabled};
325 }
326 
ConvertPaddingModeToAcl(const PaddingMode & paddingMode)327 inline arm_compute::PaddingMode ConvertPaddingModeToAcl(const PaddingMode& paddingMode)
328 {
329     switch (paddingMode)
330     {
331         case PaddingMode::Constant:   return arm_compute::PaddingMode::CONSTANT;
332         case PaddingMode::Reflect:    return arm_compute::PaddingMode::REFLECT;
333         case PaddingMode::Symmetric:  return arm_compute::PaddingMode::SYMMETRIC;
334         default:                      throw InvalidArgumentException("Unsupported Padding Mode");
335     }
336 }
337 
ConvertReductionOperationToAcl(const ReduceDescriptor & descriptor)338 inline arm_compute::ReductionOperation ConvertReductionOperationToAcl(const ReduceDescriptor& descriptor)
339 {
340     switch (descriptor.m_ReduceOperation)
341     {
342         case ReduceOperation::Sum:    return arm_compute::ReductionOperation::SUM;
343         case ReduceOperation::Mean:   return arm_compute::ReductionOperation::MEAN_SUM;
344         case ReduceOperation::Max:    return arm_compute::ReductionOperation::MAX;
345         case ReduceOperation::Min:    return arm_compute::ReductionOperation::MIN;
346         case ReduceOperation::Prod:   return arm_compute::ReductionOperation::PROD;
347         default:                      throw InvalidArgumentException("Unsupported Reduction operation");
348     }
349 }
350 
351 /// Function to compute the output tensor shape based on the axes and if keepDims is set.
ComputeReductionTensorShape(const armnn::TensorInfo & input,const std::vector<uint32_t> & vAxis,const bool keepDims)352 inline const TensorInfo ComputeReductionTensorShape(const armnn::TensorInfo& input,
353                                                     const std::vector<uint32_t>& vAxis,
354                                                     const bool keepDims)
355 {
356     auto reducedTensorInfo = input;
357     unsigned int rank = reducedTensorInfo.GetNumDimensions();
358     unsigned int outputRank = 0;
359     // Calculate output dimension
360     if (keepDims)
361     {
362         outputRank = rank;
363     }
364     else if (vAxis.empty())
365     {
366         outputRank = 1;
367     }
368     else if (vAxis.size() > reducedTensorInfo.GetNumDimensions())
369     {
370         throw LayerValidationException("ReduceLayer: Dimensions to reduce can not be bigger than input dimensions");
371     }
372     else
373     {
374         outputRank = reducedTensorInfo.GetNumDimensions() - armnn::numeric_cast<unsigned int>(vAxis.size());
375         if (outputRank == 0)
376         {
377             outputRank = 1;
378         }
379     }
380     std::vector<unsigned int> dimSizes(outputRank, 1);
381     if (!vAxis.empty())
382     {
383         // Skip the dimension that has been reduced unless keepDims is true.
384         unsigned int outputIndex = 0;
385         for (unsigned int i = 0; i < reducedTensorInfo.GetNumDimensions(); ++i)
386         {
387             if (std::find(vAxis.begin(), vAxis.end(), i) == vAxis.end())
388             {
389                 dimSizes[outputIndex] = armnn::numeric_cast<unsigned int>(reducedTensorInfo.GetShape()[i]);
390                 ++outputIndex;
391             }
392             else if (keepDims)
393             {
394                 dimSizes[outputIndex] = 1;
395                 ++outputIndex;
396             }
397         }
398     }
399     const TensorShape inferredShape = TensorShape(outputRank, dimSizes.data());
400     reducedTensorInfo.SetShape(inferredShape);
401     return reducedTensorInfo;
402 }
403 
404 /// Macro function check if layer with multiple axes is supported on each backend
405 #define IS_MULTI_AXES_REDUCE_SUPPORTED(func, input, desc, status)                 \
406     armnn::TensorInfo inputTensorInfo = input;                                    \
407     unsigned int recalulatedAxis = 0;                                             \
408     std::vector<uint32_t> axes;                                                   \
409                                                                                   \
410     for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i)                       \
411     {                                                                             \
412         axes.emplace_back(desc.m_vAxis[i]);                                       \
413                                                                                   \
414         const armnn::TensorInfo& reducedTensorInfo =                              \
415             ComputeReductionTensorShape(input, axes, desc.m_KeepDims);            \
416                                                                                   \
417         std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i] - recalulatedAxis);   \
418                                                                                   \
419         armnn::ReduceDescriptor newReduceDescriptor = desc;                       \
420         newReduceDescriptor.m_vAxis.assign(singleAxis.begin(), singleAxis.end()); \
421                                                                                   \
422         status = func(inputTensorInfo, reducedTensorInfo, newReduceDescriptor);   \
423         if (!status)                                                              \
424         {                                                                         \
425             break;                                                                \
426         }                                                                         \
427                                                                                   \
428         if (!desc.m_KeepDims)                                                     \
429         {                                                                         \
430             recalulatedAxis++;                                                    \
431         }                                                                         \
432                                                                                   \
433         inputTensorInfo = reducedTensorInfo;                                      \
434     }
435 
436 } // namespace armnn
437