• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #include <aclCommon/ArmComputeTensorUtils.hpp>
6 #include <aclCommon/ArmComputeUtils.hpp>
7 
8 #include "armnn/Exceptions.hpp"
9 #include "ArmComputeUtils.hpp"
10 #include <armnn/Descriptors.hpp>
11 
12 #include <fmt/format.h>
13 
14 namespace armnn
15 {
16 namespace armcomputetensorutils
17 {
18 
GetArmComputeDataType(armnn::DataType dataType,bool multiScales)19 arm_compute::DataType GetArmComputeDataType(armnn::DataType dataType, bool multiScales)
20 {
21     switch(dataType)
22     {
23         case armnn::DataType::BFloat16:
24             return arm_compute::DataType::BFLOAT16;
25         case armnn::DataType::Boolean:
26             return arm_compute::DataType::U8;
27         case armnn::DataType::Float16:
28             return arm_compute::DataType::F16;
29         case armnn::DataType::Float32:
30             return arm_compute::DataType::F32;
31         case armnn::DataType::QAsymmS8:
32             return arm_compute::DataType::QASYMM8_SIGNED;
33         case armnn::DataType::QAsymmU8:
34             return arm_compute::DataType::QASYMM8;
35         case armnn::DataType::QSymmS16:
36             return arm_compute::DataType::QSYMM16;
37         case armnn::DataType::Signed64:
38             return arm_compute::DataType::S64;
39         case armnn::DataType::QSymmS8:
40         {
41             return multiScales ? arm_compute::DataType::QSYMM8_PER_CHANNEL : arm_compute::DataType::QSYMM8;
42         }
43         case armnn::DataType::Signed32:
44             return arm_compute::DataType::S32;
45         default:
46             ARMNN_ASSERT_MSG(false, "Unknown data type");
47             return arm_compute::DataType::UNKNOWN;
48     }
49 }
50 
GetArmNNDataType(arm_compute::DataType dataType)51 armnn::DataType GetArmNNDataType(arm_compute::DataType dataType)
52 {
53     switch(dataType)
54     {
55         case arm_compute::DataType::BFLOAT16:
56             return armnn::DataType::BFloat16;
57         case arm_compute::DataType::U8:
58             return armnn::DataType::Boolean;
59         case arm_compute::DataType::F16:
60             return armnn::DataType::Float16;
61         case arm_compute::DataType::F32:
62             return armnn::DataType::Float32;
63         case arm_compute::DataType::QASYMM8_SIGNED:
64             return armnn::DataType::QAsymmS8;
65         case arm_compute::DataType::QASYMM8:
66             return armnn::DataType::QAsymmU8;
67         case arm_compute::DataType::QSYMM16:
68             return armnn::DataType::QSymmS16;
69         case arm_compute::DataType::S64:
70             return armnn::DataType::Signed64;
71         case arm_compute::DataType::QSYMM8_PER_CHANNEL:
72             return armnn::DataType::QSymmS8;
73         case arm_compute::DataType::QSYMM8:
74             return armnn::DataType::QSymmS8;
75         case arm_compute::DataType::S32:
76             return armnn::DataType::Signed32;
77         default:
78             ARMNN_ASSERT_MSG(false, "Unknown data type");
79             return armnn::DataType::Float32;
80     }
81 }
82 
BuildArmComputeReductionCoordinates(size_t inputDimensions,unsigned int originalInputRank,const std::vector<unsigned int> & armnnAxes)83 arm_compute::Coordinates BuildArmComputeReductionCoordinates(size_t inputDimensions,
84                                                              unsigned int originalInputRank,
85                                                              const std::vector<unsigned int>& armnnAxes)
86 {
87     arm_compute::Coordinates outAclCoords;
88 
89     if (armnnAxes.empty())
90     {
91         // If no reduction axes were provided, then the input must be reduced along all dimensions.
92         // Since Compute Library does not accept an empty vector as the reduction dimensions, we then
93         // manually create a vector including all the input dimensions (in reversed order) as:
94         //
95         // { inputDimensions - 1, inputDimensions - 2, ..., 1, 0 }
96         //
97         outAclCoords.set_num_dimensions(inputDimensions);
98         std::generate(outAclCoords.begin(), outAclCoords.end(), [d = inputDimensions - 1] () mutable { return d--; });
99     }
100     else
101     {
102         // Create a vector of reduction dimensions (in reversed order) with the given reduction axes.
103         //
104         // Adjust the given reduction axes according to the original rank of the input tensor (before ACL applied any
105         // dimension correction).
106         // For example, if the input tensor originally had 4 dimensions, and one of the reduction axes was 2, then the
107         // new value for that reduction axis should be 1.
108         //
109         // Example:
110         // ArmNN input shape = { 1, 1, 3, 2 } -> ACL input shape = { 2, 3 }
111         // ArmNN reduction axis = { 2 }       -> ACL reduction axis = { 1 }
112         // ArmNN reduction axis = { 3 }       -> ACL reduction axis = { 0 }
113         //
114         // The transformation: ACL reduction axis index = original rank - ArmNN reduction axis index - 1
115         //
116         outAclCoords.set_num_dimensions(armnnAxes.size());
117         std::transform(armnnAxes.begin(), armnnAxes.end(),
118                        outAclCoords.begin(),
119                        [originalInputRank](unsigned int i){ return originalInputRank - i - 1; });
120     }
121 
122     return outAclCoords;
123 }
124 
BuildArmComputeTensorShape(const armnn::TensorShape & tensorShape)125 arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape)
126 {
127     arm_compute::TensorShape shape;
128 
129     // armnn tensors are (batch, channels, height, width).
130     // arm_compute tensors are (width, height, channels, batch).
131     for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); i++)
132     {
133         // Note that our dimensions are stored in the opposite order to ACL's.
134         shape.set(tensorShape.GetNumDimensions() - i - 1, tensorShape[i], false);
135 
136         // TensorShape::set() flattens leading ones, so that batch size 1 cannot happen.
137         // arm_compute tensors expect this.
138     }
139 
140     // prevent arm_compute issue where tensor is flattened to nothing
141     if (shape.num_dimensions() == 0)
142     {
143         shape.set_num_dimensions(1);
144     }
145 
146     return shape;
147 }
148 
ReduceDimsForACL(const armnn::TensorShape tensorShape,unsigned int dimensions)149 std::vector<unsigned int> ReduceDimsForACL(const armnn::TensorShape tensorShape, unsigned int dimensions)
150 {
151     std::vector<unsigned int> newShape;
152 
153     unsigned int dimsToSkip = 0;
154 
155     if (tensorShape.GetNumDimensions() > dimensions)
156     {
157         dimsToSkip = tensorShape.GetNumDimensions() - dimensions;
158     }
159     unsigned int dimsSkipped = 0;
160     bool insertRemainder = false;
161 
162     for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); ++i)
163     {
164         if (tensorShape[i] == 1 && dimsSkipped < dimsToSkip && !insertRemainder)
165         {
166             ++dimsSkipped;
167             continue;
168         }
169         newShape.insert(newShape.begin(), tensorShape[i]);
170         // Once we insert the first dimension we can't skip any more
171         insertRemainder = true;
172     }
173     return newShape;
174 }
175 
BuildArmComputeTensorShape(const armnn::TensorShape & tensorShape,unsigned int dimensions)176 arm_compute::TensorShape BuildArmComputeTensorShape(const armnn::TensorShape& tensorShape, unsigned int dimensions)
177 {
178     arm_compute::TensorShape shape;
179     std::vector<unsigned int> strippedShape = ReduceDimsForACL(tensorShape, dimensions);
180 
181     for (unsigned int i = 0; i < strippedShape.size(); i++)
182     {
183         shape.set(i, strippedShape[i], false);
184     }
185 
186     // prevent arm_compute issue where tensor is flattened to nothing
187     if (shape.num_dimensions() == 0)
188     {
189         shape.set_num_dimensions(1);
190     }
191     return shape;
192 }
193 
194 // Utility function used to build a TensorInfo object, that can be used to initialise
195 // ARM Compute Tensor and CLTensor allocators.
196 // Note: this utility ignores the value of armnn::TensorInfo.IsConstant(). ACL tensors
197 // default to constant but Arm NN ones default to non constant. In the cases where
198 // we expect ACL to treat a tensor as constant that value must be set after this
199 // utility has been called.
BuildArmComputeTensorInfo(const armnn::TensorInfo & tensorInfo)200 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo)
201 {
202     bool multiScales = tensorInfo.HasMultipleQuantizationScales();
203     const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape());
204     const arm_compute::DataType aclDataType       = GetArmComputeDataType(tensorInfo.GetDataType(), multiScales);
205 
206     const arm_compute::QuantizationInfo aclQuantizationInfo = multiScales ?
207         arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScales()) :
208         arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScale(), tensorInfo.GetQuantizationOffset());
209 
210     return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo);
211 }
212 
BuildArmComputeTensorInfo(const armnn::TensorInfo & tensorInfo,armnn::DataLayout dataLayout)213 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
214                                                   armnn::DataLayout dataLayout)
215 {
216     arm_compute::TensorInfo aclTensorInfo = BuildArmComputeTensorInfo(tensorInfo);
217     aclTensorInfo.set_data_layout(ConvertDataLayout(dataLayout));
218 
219     return aclTensorInfo;
220 }
221 
BuildArmComputeTensorInfo(const armnn::TensorInfo & tensorInfo,unsigned int dimensions)222 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo, unsigned int dimensions)
223 {
224     bool multiScales = tensorInfo.HasMultipleQuantizationScales();
225     const arm_compute::TensorShape aclTensorShape = BuildArmComputeTensorShape(tensorInfo.GetShape(), dimensions);
226     const arm_compute::DataType aclDataType       = GetArmComputeDataType(tensorInfo.GetDataType(), multiScales);
227 
228     const arm_compute::QuantizationInfo aclQuantizationInfo = multiScales ?
229               arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScales()) :
230               arm_compute::QuantizationInfo(tensorInfo.GetQuantizationScale(), tensorInfo.GetQuantizationOffset());
231 
232     return arm_compute::TensorInfo(aclTensorShape, 1, aclDataType, aclQuantizationInfo);
233 }
BuildArmComputeTensorInfo(const armnn::TensorInfo & tensorInfo,armnn::DataLayout dataLayout,unsigned int dimensions)234 arm_compute::TensorInfo BuildArmComputeTensorInfo(const armnn::TensorInfo& tensorInfo,
235                                                   armnn::DataLayout dataLayout, unsigned int dimensions)
236 {
237     arm_compute::TensorInfo aclTensorInfo = BuildArmComputeTensorInfo(tensorInfo, dimensions);
238     aclTensorInfo.set_data_layout(ConvertDataLayout(dataLayout));
239 
240     return aclTensorInfo;
241 }
242 
243 
ConvertDataLayout(armnn::DataLayout dataLayout)244 arm_compute::DataLayout ConvertDataLayout(armnn::DataLayout dataLayout)
245 {
246     switch(dataLayout)
247     {
248         case armnn::DataLayout::NHWC : return arm_compute::DataLayout::NHWC;
249 
250         case armnn::DataLayout::NCHW : return arm_compute::DataLayout::NCHW;
251 
252         case armnn::DataLayout::NDHWC : return arm_compute::DataLayout::NDHWC;
253 
254         case armnn::DataLayout::NCDHW : return arm_compute::DataLayout::NCDHW;
255 
256         default: throw InvalidArgumentException("Unknown armnn::DataLayout: [" +
257                                                 std::to_string(static_cast<int>(dataLayout)) + "]");
258     }
259 }
260 
BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor & descriptor,bool fpMixedPrecision)261 arm_compute::PoolingLayerInfo BuildArmComputePoolingLayerInfo(const Pooling2dDescriptor& descriptor,
262                                                               bool fpMixedPrecision)
263 {
264     // Resolve ARM Compute layer parameters.
265     const arm_compute::PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
266 
267     const arm_compute::DataLayout dataLayout = ConvertDataLayout(descriptor.m_DataLayout);
268 
269     bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0);
270     //use specific constructor if global pooling
271     if(isGlobalPooling)
272     {
273         return arm_compute::PoolingLayerInfo(poolingType, dataLayout);
274     }
275 
276     const arm_compute::DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType(
277                                                                                     descriptor.m_OutputShapeRounding);
278     const arm_compute::PadStrideInfo padStrideInfo(descriptor.m_StrideX,
279                                       descriptor.m_StrideY,
280                                       descriptor.m_PadLeft,
281                                       descriptor.m_PadRight,
282                                       descriptor.m_PadTop,
283                                       descriptor.m_PadBottom,
284                                       rounding);
285 
286     const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
287 
288     const arm_compute::Size2D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight);
289 
290     return arm_compute::PoolingLayerInfo(poolingType, poolSize, dataLayout, padStrideInfo, excludePadding,
291                                          fpMixedPrecision);
292 }
293 
BuildArmComputePooling3dLayerInfo(const Pooling3dDescriptor & descriptor,bool fpMixedPrecision)294 arm_compute::Pooling3dLayerInfo BuildArmComputePooling3dLayerInfo(const Pooling3dDescriptor& descriptor,
295                                                                   bool fpMixedPrecision)
296 {
297     const arm_compute::PoolingType poolingType = ConvertPoolingAlgorithmToAclPoolingType(descriptor.m_PoolType);
298 
299     bool isGlobalPooling = (descriptor.m_StrideX==0 && descriptor.m_StrideY==0 && descriptor.m_StrideZ==0);
300     //use specific constructor if global pooling
301     if(isGlobalPooling)
302     {
303         return arm_compute::Pooling3dLayerInfo(poolingType);
304     }
305 
306     const arm_compute::Size3D poolSize(descriptor.m_PoolWidth, descriptor.m_PoolHeight, descriptor.m_PoolDepth);
307 
308     const arm_compute::Size3D stride(descriptor.m_StrideX,
309                         descriptor.m_StrideY,
310                         descriptor.m_StrideZ);
311 
312     const arm_compute::Padding3D padding(descriptor.m_PadLeft,
313                             descriptor.m_PadRight,
314                             descriptor.m_PadTop,
315                             descriptor.m_PadBottom,
316                             descriptor.m_PadFront,
317                             descriptor.m_PadBack);
318 
319     const bool excludePadding = (descriptor.m_PaddingMethod == PaddingMethod::Exclude);
320 
321     const arm_compute::DimensionRoundingType rounding = ConvertOutputShapeRoundingToAclDimensionRoundingType(
322             descriptor.m_OutputShapeRounding);
323 
324     return arm_compute::Pooling3dLayerInfo(poolingType,
325                                            poolSize,
326                                            stride,
327                                            padding,
328                                            excludePadding,
329                                            fpMixedPrecision,
330                                            rounding);
331 }
332 
BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor & descriptor)333 arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& descriptor)
334 {
335     const arm_compute::NormType normType =
336         ConvertNormalizationAlgorithmChannelToAclNormType(descriptor.m_NormChannelType);
337     return arm_compute::NormalizationLayerInfo(normType,
338                                                descriptor.m_NormSize,
339                                                descriptor.m_Alpha,
340                                                descriptor.m_Beta,
341                                                descriptor.m_K,
342                                                false);
343 }
344 
BuildArmComputePermutationVector(const armnn::PermutationVector & perm)345 arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& perm)
346 {
347     arm_compute::PermutationVector aclPerm;
348 
349     unsigned int start = 0;
350     while ((start < perm.GetSize()) && (start == perm[start]))
351     {
352         ++start;
353     }
354 
355     for (unsigned int i = start; i < perm.GetSize(); ++i)
356     {
357         aclPerm.set(i - start, perm[i] - start);
358     }
359     return aclPerm;
360 }
361 
BuildArmComputeTransposeVector(const armnn::PermutationVector & perm)362 arm_compute::PermutationVector BuildArmComputeTransposeVector(const armnn::PermutationVector& perm)
363 {
364     // As ArmNN indexes are left to right and ACL indexes are right to left,
365     // the permutation vector has to be reversed and then translated into ACL axis.
366     // i.e. {1, 0, 2, 3} --> {3, 2, 0, 1} --> {0, 1, 3, 2}
367 
368     // Below an example of how the ArmNN and ACL index format work:
369     // ArmNN Format:
370     // Input Shape        {1, 10, 20, 30}
371     // Permutation Vector {1,  0,  2,  3}
372     // Output Shape       {10, 1, 20, 30}
373     // dim "1" of input goes into index 0 of the output ([ 10, X, X, X])
374     // dim "0" of input goes into index 1 of the output ([ 10, 1, X, X ])
375     // dim "2" of input goes into index 2 of the output ([ 10, 1, 20, X ])
376     // dim "3" of input goes into index 3 of the output ([ 10, 1, 20, 30 ])
377     // ACL Format:
378     // Input Shape        {30, 20, 10, 1}
379     // Permutation Vector {0,  1,  3,  2}
380     // Output Shape       {30, 20, 1, 10}
381     // dim "0" of input goes into index 0 of the output ([ 30,  X, X, X])
382     // dim "1" of input goes into index 1 of the output ([ 30, 20, X, X ])
383     // dim "3" of input goes into index 2 of the output ([ 30, 20, 1, X ])
384     // dim "2" of input goes into index 3 of the output ([ 30, 20, 1, 10 ])
385 
386     arm_compute::PermutationVector aclPerm;
387     auto rank = perm.GetSize();
388 
389     // Reverse the order. i.e. {1, 0, 2, 3} --> {3, 2, 0, 1}
390     std::vector<unsigned int> reversedPerm;
391     reversedPerm.reserve(rank);
392     for (unsigned int i = rank; i > 0; --i)
393     {
394         reversedPerm.push_back(perm[i-1]);
395     }
396 
397     // Translate from Arm NN axis to ACL axis. i.e. {3, 2, 0, 1} --> {0, 1, 3, 2}
398     for (unsigned int i = 0; i < rank; ++i)
399     {
400         auto aclAxis = rank - 1 - reversedPerm[i];
401         aclPerm.set(i, aclAxis);
402     }
403     return aclPerm;
404 }
405 
BuildArmComputeSize2D(const unsigned int width,const unsigned int height)406 arm_compute::Size2D BuildArmComputeSize2D(const unsigned int width, const unsigned int height)
407 {
408     return arm_compute::Size2D(width, height);
409 }
410 
GetPixelValue(const arm_compute::ITensorInfo * tensorInfo,float value)411 arm_compute::PixelValue GetPixelValue(const arm_compute::ITensorInfo* tensorInfo, float value)
412 {
413     switch (tensorInfo->data_type())
414     {
415         case arm_compute::DataType::F16:
416         {
417             arm_compute::PixelValue pixelValue = arm_compute::PixelValue(static_cast<Half>(value));
418             if (isinf(pixelValue.get<Half>())) {
419                 throw InvalidArgumentException("Under/Overflow converting float value [" + std::to_string(value) +
420                     "] to fp16: [" + std::to_string(pixelValue.get<Half>()) + "]");
421             }
422             return pixelValue;
423         }
424         case arm_compute::DataType::F32:
425             return arm_compute::PixelValue(value);
426         case arm_compute::DataType::QASYMM8:
427             return arm_compute::PixelValue(static_cast<uint8_t>(value));
428         case arm_compute::DataType::QSYMM16:
429             return arm_compute::PixelValue(static_cast<int16_t>(value));
430         case arm_compute::DataType::QSYMM8:
431         case arm_compute::DataType::QASYMM8_SIGNED:
432         case arm_compute::DataType::QSYMM8_PER_CHANNEL:
433             return arm_compute::PixelValue(static_cast<int8_t>(value));
434         case arm_compute::DataType::S32:
435             return arm_compute::PixelValue(static_cast<int32_t>(value));
436         default:
437             throw InvalidArgumentException("Unsupported DataType: [" +
438                                            std::to_string(static_cast<int>(tensorInfo->data_type())) + "]");
439     }
440 }
441 
ComputeDepthwiseConv2dDepthMultiplier(armnn::DataLayout layout,const arm_compute::TensorShape & weightsShape,const arm_compute::TensorShape & inputShape)442 unsigned int ComputeDepthwiseConv2dDepthMultiplier(armnn::DataLayout layout,
443                                                    const arm_compute::TensorShape& weightsShape,
444                                                    const arm_compute::TensorShape& inputShape)
445 {
446     unsigned int depthMultiplier;
447     if (layout == armnn::DataLayout::NHWC)
448     {
449         depthMultiplier = static_cast<uint32_t>(weightsShape[0]) / static_cast<uint32_t>(inputShape[0]);
450     }
451     else if (layout == armnn::DataLayout::NCHW)
452     {
453         depthMultiplier = static_cast<uint32_t>(weightsShape[2]) / static_cast<uint32_t>(inputShape[2]);
454     }
455     else
456     {
457         throw InvalidArgumentException(fmt::format("Unknown data layout for tensor conversion: {}",
458                                                    GetDataLayoutName(layout)));
459     }
460     return depthMultiplier;
461 }
462 
463 } // namespace armcomputetensorutils
464 } // namespace armnn
465