• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include "CpuTensorHandle.hpp"
9 
10 #include <armnn/backends/ITensorHandle.hpp>
11 #include <armnn/Tensor.hpp>
12 #include <armnn/utility/PolymorphicDowncast.hpp>
13 #include <armnnUtils/Permute.hpp>
14 
15 #include <Half.hpp>
16 #include <Profiling.hpp>
17 
18 
19 namespace armnn
20 {
21 namespace
22 {
23 
24 template <typename ArrayType, typename Arg>
AssignValues(unsigned int num,unsigned int & idx,const ArrayType & array,Arg & arg)25 void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
26 {
27     if (idx >= num)
28     {
29         return;
30     }
31 
32     arg = array[(num - 1) - idx];
33     idx++;
34 }
35 
36 template <typename T, typename ArrayType, typename... Args>
AssignValues(unsigned int num,unsigned int idx,const ArrayType & array,T & assignee,Args &...args)37 void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args&... args)
38 {
39     AssignValues(num, idx, array, assignee);
40 
41     AssignValues(num, idx, array, args...);
42 }
43 
44 }    // anonymous namespace
45 
46 template <typename CopyFunc>
CopyTensorContentsGeneric(const ITensorHandle * srcTensor,ITensorHandle * dstTensor,CopyFunc copy)47 void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
48 {
49     // For ease of understanding, names are assigned to the dimensions
50     // of the tensor as if NHWC, however this routine works with any 5D tensor
51     static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
52 
53     TensorShape srcStrides      = srcTensor->GetStrides();
54     const TensorShape& srcShape = srcTensor->GetShape();
55     const auto srcSize          = srcTensor->GetStrides()[0] * srcShape[0];
56     IgnoreUnused(srcSize);  // Only used for asserts
57     TensorShape dstStrides      = dstTensor->GetStrides();
58     const TensorShape& dstShape = dstTensor->GetShape();
59     const auto dstSize          = dstTensor->GetStrides()[0] * dstShape[0];
60     IgnoreUnused(dstSize);  // Only used for asserts
61 
62     size_t srcDepth    = 1;
63     size_t srcBatches  = 1;
64     size_t srcHeight   = 1;
65     size_t srcWidth    = 1;
66     size_t srcChannels = 1;
67     AssignValues(srcShape.GetNumDimensions(),
68                  0,
69                  srcShape,
70                  srcChannels,
71                  srcWidth,
72                  srcHeight,
73                  srcBatches,
74                  srcDepth);
75 
76     size_t srcDepthStride   = 0;
77     size_t srcBatchStride   = 0;
78     size_t srcHeightStride  = 0;
79     size_t srcWidthStride   = 0;
80     size_t srcChannelStride = 0;
81     AssignValues(srcStrides.GetNumDimensions(),
82                  0,
83                  srcStrides,
84                  srcChannelStride,
85                  srcWidthStride,
86                  srcHeightStride,
87                  srcBatchStride,
88                  srcDepthStride);
89 
90     size_t dstDepth    = 1;
91     size_t dstBatches  = 1;
92     size_t dstHeight   = 1;
93     size_t dstWidth    = 1;
94     size_t dstChannels = 1;
95     AssignValues(dstShape.GetNumDimensions(),
96                  0,
97                  dstShape,
98                  dstChannels,
99                  dstWidth,
100                  dstHeight,
101                  dstBatches,
102                  dstDepth);
103 
104     size_t dstDepthStride   = 0;
105     size_t dstBatchStride   = 0;
106     size_t dstHeightStride  = 0;
107     size_t dstWidthStride   = 0;
108     size_t dstChannelStride = 0;
109     AssignValues(dstStrides.GetNumDimensions(),
110                  0,
111                  dstStrides,
112                  dstChannelStride,
113                  dstWidthStride,
114                  dstHeightStride,
115                  dstBatchStride,
116                  dstDepthStride);
117 
118     const unsigned char* srcDataStart;
119     unsigned char* dstDataStart;
120     {
121         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
122         srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
123         dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
124     }
125 
126     size_t copyLength  = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
127     size_t copyWidth   = std::min(srcWidth, dstWidth);
128     size_t copyHeight  = std::min(srcHeight, dstHeight);
129     size_t copyBatches = std::min(srcBatches, dstBatches);
130     size_t copyDepth   = std::min(srcDepth, dstDepth);
131 
132     // Coalesce inner dimensions where possible
133     // to reduce overheard calling copy() and to
134     // allow for memory bandwidth optimisations
135     if (copyLength == srcWidthStride &&
136         copyLength == dstWidthStride)
137     {
138         // There is no special padding between rows,
139         // and sizes are compatible, so copy whole rows
140         copyLength *= copyWidth;
141         copyWidth = 1;
142 
143         if (copyLength == srcHeightStride &&
144             copyLength == dstHeightStride)
145         {
146             // There is no special padding between batches
147             // and sizes are compatible so copy whole batches
148             copyLength *= copyHeight;
149             copyHeight = 1;
150         }
151     }
152 
153     const unsigned char* srcData = srcDataStart;
154     unsigned char* dstData = dstDataStart;
155     for (unsigned int d = 0; d < copyDepth; ++d)
156     {
157         auto srcPtrDepth = srcData;
158         auto dstPtrDepth = dstData;
159         for (unsigned int b = 0; b < copyBatches; ++b)
160         {
161             auto srcPtrBatch = srcData;
162             auto dstPtrBatch = dstData;
163             for (unsigned int h = 0; h < copyHeight; ++h)
164             {
165                 auto srcPtrChannel = srcData;
166                 auto dstPtrChannel = dstData;
167                 for (unsigned int w = 0; w < copyWidth; ++w)
168                 {
169                     ARMNN_ASSERT(srcData >= srcDataStart && srcData + copyLength <= srcDataStart + srcSize);
170                     ARMNN_ASSERT(dstData >= dstDataStart && dstData + copyLength <= dstDataStart + dstSize);
171                     copy(dstData, srcData, copyLength);
172                     dstData += dstWidthStride;
173                     srcData += srcWidthStride;
174                 }
175                 dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
176                 srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
177             }
178             dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
179             srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
180         }
181         dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
182         srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
183     }
184 
185     srcTensor->Unmap();
186     dstTensor->Unmap();
187 }
188 
189 template <typename SrcTensorHandleType, typename DstTensorHandleType, typename DescriptorType>
GatherTensorHandlePairs(const DescriptorType & descriptor,std::vector<std::pair<SrcTensorHandleType *,DstTensorHandleType * >> & tensorHandlePairs)190 void GatherTensorHandlePairs(const DescriptorType& descriptor,
191                              std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs)
192 {
193     const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
194     tensorHandlePairs.reserve(numInputs);
195 
196     for (unsigned int i = 0; i < numInputs; ++i)
197     {
198         SrcTensorHandleType* const srcTensorHandle =
199             PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
200         DstTensorHandleType* const dstTensorHandle =
201             PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
202 
203         tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
204     }
205 }
206 
207 int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim);
208 
209 armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle* tensor,
210                                  const PermutationVector& permutationVector,
211                                  void* permuteBuffer);
212 
213 void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
214 
215 TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout);
216 
217 armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle* weightTensor,
218                                                      DataLayout dataLayout,
219                                                      void* permuteBuffer);
220 
221 }  //namespace armnn
222