1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #pragma once
7
8 #include "CpuTensorHandle.hpp"
9
10 #include <armnn/backends/ITensorHandle.hpp>
11 #include <armnn/Tensor.hpp>
12 #include <armnn/utility/PolymorphicDowncast.hpp>
13 #include <armnnUtils/Permute.hpp>
14
15 #include <Half.hpp>
16 #include <Profiling.hpp>
17
18
19 namespace armnn
20 {
21 namespace
22 {
23
24 template <typename ArrayType, typename Arg>
AssignValues(unsigned int num,unsigned int & idx,const ArrayType & array,Arg & arg)25 void AssignValues(unsigned int num, unsigned int& idx, const ArrayType& array, Arg& arg)
26 {
27 if (idx >= num)
28 {
29 return;
30 }
31
32 arg = array[(num - 1) - idx];
33 idx++;
34 }
35
36 template <typename T, typename ArrayType, typename... Args>
AssignValues(unsigned int num,unsigned int idx,const ArrayType & array,T & assignee,Args &...args)37 void AssignValues(unsigned int num, unsigned int idx, const ArrayType& array, T& assignee, Args&... args)
38 {
39 AssignValues(num, idx, array, assignee);
40
41 AssignValues(num, idx, array, args...);
42 }
43
44 } // anonymous namespace
45
46 template <typename CopyFunc>
CopyTensorContentsGeneric(const ITensorHandle * srcTensor,ITensorHandle * dstTensor,CopyFunc copy)47 void CopyTensorContentsGeneric(const ITensorHandle* srcTensor, ITensorHandle* dstTensor, CopyFunc copy)
48 {
49 // For ease of understanding, names are assigned to the dimensions
50 // of the tensor as if NHWC, however this routine works with any 5D tensor
51 static_assert(MaxNumOfTensorDimensions == 5, "Please update CopyTensorContents");
52
53 TensorShape srcStrides = srcTensor->GetStrides();
54 const TensorShape& srcShape = srcTensor->GetShape();
55 const auto srcSize = srcTensor->GetStrides()[0] * srcShape[0];
56 IgnoreUnused(srcSize); // Only used for asserts
57 TensorShape dstStrides = dstTensor->GetStrides();
58 const TensorShape& dstShape = dstTensor->GetShape();
59 const auto dstSize = dstTensor->GetStrides()[0] * dstShape[0];
60 IgnoreUnused(dstSize); // Only used for asserts
61
62 size_t srcDepth = 1;
63 size_t srcBatches = 1;
64 size_t srcHeight = 1;
65 size_t srcWidth = 1;
66 size_t srcChannels = 1;
67 AssignValues(srcShape.GetNumDimensions(),
68 0,
69 srcShape,
70 srcChannels,
71 srcWidth,
72 srcHeight,
73 srcBatches,
74 srcDepth);
75
76 size_t srcDepthStride = 0;
77 size_t srcBatchStride = 0;
78 size_t srcHeightStride = 0;
79 size_t srcWidthStride = 0;
80 size_t srcChannelStride = 0;
81 AssignValues(srcStrides.GetNumDimensions(),
82 0,
83 srcStrides,
84 srcChannelStride,
85 srcWidthStride,
86 srcHeightStride,
87 srcBatchStride,
88 srcDepthStride);
89
90 size_t dstDepth = 1;
91 size_t dstBatches = 1;
92 size_t dstHeight = 1;
93 size_t dstWidth = 1;
94 size_t dstChannels = 1;
95 AssignValues(dstShape.GetNumDimensions(),
96 0,
97 dstShape,
98 dstChannels,
99 dstWidth,
100 dstHeight,
101 dstBatches,
102 dstDepth);
103
104 size_t dstDepthStride = 0;
105 size_t dstBatchStride = 0;
106 size_t dstHeightStride = 0;
107 size_t dstWidthStride = 0;
108 size_t dstChannelStride = 0;
109 AssignValues(dstStrides.GetNumDimensions(),
110 0,
111 dstStrides,
112 dstChannelStride,
113 dstWidthStride,
114 dstHeightStride,
115 dstBatchStride,
116 dstDepthStride);
117
118 const unsigned char* srcDataStart;
119 unsigned char* dstDataStart;
120 {
121 ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Synchronize buffers");
122 srcDataStart = static_cast<const uint8_t*>(srcTensor->Map());
123 dstDataStart = static_cast<uint8_t*>(dstTensor->Map());
124 }
125
126 size_t copyLength = std::min(srcChannels * srcChannelStride, dstChannels * dstChannelStride);
127 size_t copyWidth = std::min(srcWidth, dstWidth);
128 size_t copyHeight = std::min(srcHeight, dstHeight);
129 size_t copyBatches = std::min(srcBatches, dstBatches);
130 size_t copyDepth = std::min(srcDepth, dstDepth);
131
132 // Coalesce inner dimensions where possible
133 // to reduce overheard calling copy() and to
134 // allow for memory bandwidth optimisations
135 if (copyLength == srcWidthStride &&
136 copyLength == dstWidthStride)
137 {
138 // There is no special padding between rows,
139 // and sizes are compatible, so copy whole rows
140 copyLength *= copyWidth;
141 copyWidth = 1;
142
143 if (copyLength == srcHeightStride &&
144 copyLength == dstHeightStride)
145 {
146 // There is no special padding between batches
147 // and sizes are compatible so copy whole batches
148 copyLength *= copyHeight;
149 copyHeight = 1;
150 }
151 }
152
153 const unsigned char* srcData = srcDataStart;
154 unsigned char* dstData = dstDataStart;
155 for (unsigned int d = 0; d < copyDepth; ++d)
156 {
157 auto srcPtrDepth = srcData;
158 auto dstPtrDepth = dstData;
159 for (unsigned int b = 0; b < copyBatches; ++b)
160 {
161 auto srcPtrBatch = srcData;
162 auto dstPtrBatch = dstData;
163 for (unsigned int h = 0; h < copyHeight; ++h)
164 {
165 auto srcPtrChannel = srcData;
166 auto dstPtrChannel = dstData;
167 for (unsigned int w = 0; w < copyWidth; ++w)
168 {
169 ARMNN_ASSERT(srcData >= srcDataStart && srcData + copyLength <= srcDataStart + srcSize);
170 ARMNN_ASSERT(dstData >= dstDataStart && dstData + copyLength <= dstDataStart + dstSize);
171 copy(dstData, srcData, copyLength);
172 dstData += dstWidthStride;
173 srcData += srcWidthStride;
174 }
175 dstData += (static_cast<long>(dstHeightStride) - (dstData - dstPtrChannel));
176 srcData += (static_cast<long>(srcHeightStride) - (srcData - srcPtrChannel));
177 }
178 dstData += (static_cast<long>(dstBatchStride) - (dstData - dstPtrBatch));
179 srcData += (static_cast<long>(srcBatchStride) - (srcData - srcPtrBatch));
180 }
181 dstData += (static_cast<long>(dstDepthStride) - (dstData - dstPtrDepth));
182 srcData += (static_cast<long>(srcDepthStride) - (srcData - srcPtrDepth));
183 }
184
185 srcTensor->Unmap();
186 dstTensor->Unmap();
187 }
188
189 template <typename SrcTensorHandleType, typename DstTensorHandleType, typename DescriptorType>
GatherTensorHandlePairs(const DescriptorType & descriptor,std::vector<std::pair<SrcTensorHandleType *,DstTensorHandleType * >> & tensorHandlePairs)190 void GatherTensorHandlePairs(const DescriptorType& descriptor,
191 std::vector<std::pair<SrcTensorHandleType*, DstTensorHandleType*>>& tensorHandlePairs)
192 {
193 const unsigned int numInputs = static_cast<unsigned int>(descriptor.m_Inputs.size());
194 tensorHandlePairs.reserve(numInputs);
195
196 for (unsigned int i = 0; i < numInputs; ++i)
197 {
198 SrcTensorHandleType* const srcTensorHandle =
199 PolymorphicDowncast<SrcTensorHandleType*>(descriptor.m_Inputs[i]);
200 DstTensorHandleType* const dstTensorHandle =
201 PolymorphicDowncast<DstTensorHandleType*>(descriptor.m_Outputs[i]);
202
203 tensorHandlePairs.emplace_back(srcTensorHandle, dstTensorHandle);
204 }
205 }
206
207 int32_t ConvertMaskToACLFormat(int32_t mask, int32_t numDim);
208
209 armnn::ConstTensor PermuteTensor(const ConstCpuTensorHandle* tensor,
210 const PermutationVector& permutationVector,
211 void* permuteBuffer);
212
213 void ReshapeWeightsForAcl(TensorInfo& weightInfo, DataLayout dataLayout);
214
215 TensorInfo ConvertWeightTensorInfoFromArmnnToAcl(const TensorInfo& weightInfo, DataLayout dataLayout);
216
217 armnn::ConstTensor ConvertWeightTensorFromArmnnToAcl(const ConstCpuTensorHandle* weightTensor,
218 DataLayout dataLayout,
219 void* permuteBuffer);
220
221 } //namespace armnn
222