1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "ClBackend.hpp"
7 #include "ClBackendId.hpp"
8 #include "ClBackendModelContext.hpp"
9 #include "ClWorkloadFactory.hpp"
10 #include "ClBackendContext.hpp"
11 #include "ClLayerSupport.hpp"
12 #include "ClTensorHandleFactory.hpp"
13
14 #include <armnn/BackendRegistry.hpp>
15 #include <armnn/Descriptors.hpp>
16
17 #include <aclCommon/ArmComputeSubgraphUtils.hpp>
18 #include <aclCommon/ArmComputeUtils.hpp>
19 #include <aclCommon/BaseMemoryManager.hpp>
20
21 #include <armnn/backends/IBackendContext.hpp>
22 #include <armnn/backends/IMemoryManager.hpp>
23 #include <armnn/utility/PolymorphicDowncast.hpp>
24
25 #include "workloads/ClAdditionWorkload.hpp"
26 #include "workloads/ClBatchNormalizationFloatWorkload.hpp"
27 #include "workloads/ClConvolution2dWorkload.hpp"
28 #include "workloads/ClDepthwiseConvolutionWorkload.hpp"
29 #include "workloads/ClDivisionFloatWorkload.hpp"
30 #include "workloads/ClFullyConnectedWorkload.hpp"
31 #include "workloads/ClMultiplicationWorkload.hpp"
32 #include "workloads/ClSubtractionWorkload.hpp"
33
34 #include <Optimizer.hpp>
35
36 #include <arm_compute/core/Types.h>
37 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
38
39 namespace armnn
40 {
41
GetIdStatic()42 const BackendId& ClBackend::GetIdStatic()
43 {
44 static const BackendId s_Id{ClBackendId()};
45 return s_Id;
46 }
47
CreateMemoryManager() const48 IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const
49 {
50 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
51 }
52
CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr & memoryManager) const53 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
54 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
55 {
56 return std::make_unique<ClWorkloadFactory>(
57 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
58 }
59
CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const ModelOptions & modelOptions) const60 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
61 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
62 {
63 return std::make_unique<ClWorkloadFactory>(
64 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
65 }
66
CreateWorkloadFactory(TensorHandleFactoryRegistry & registry) const67 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
68 TensorHandleFactoryRegistry& registry) const
69 {
70 auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
71
72 registry.RegisterMemoryManager(memoryManager);
73 registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
74
75 return std::make_unique<ClWorkloadFactory>(
76 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
77 }
78
CreateWorkloadFactory(TensorHandleFactoryRegistry & registry,const ModelOptions & modelOptions) const79 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
80 TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
81 {
82 auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
83
84 registry.RegisterMemoryManager(memoryManager);
85 registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
86
87 return std::make_unique<ClWorkloadFactory>(
88 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
89 }
90
GetHandleFactoryPreferences() const91 std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
92 {
93 return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic()};
94 }
95
RegisterTensorHandleFactories(TensorHandleFactoryRegistry & registry)96 void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry)
97 {
98 auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
99
100 registry.RegisterMemoryManager(mgr);
101 registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
102 }
103
CreateBackendContext(const IRuntime::CreationOptions & options) const104 IBackendInternal::IBackendContextPtr ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const
105 {
106 return IBackendContextPtr{new ClBackendContext{options}};
107 }
108
CreateBackendProfilingContext(const IRuntime::CreationOptions &,IBackendProfilingPtr &)109 IBackendInternal::IBackendProfilingContextPtr ClBackend::CreateBackendProfilingContext(
110 const IRuntime::CreationOptions&, IBackendProfilingPtr&)
111 {
112 return IBackendProfilingContextPtr{};
113 }
114
GetOptimizations() const115 IBackendInternal::Optimizations ClBackend::GetOptimizations() const
116 {
117 return Optimizations{};
118 }
119
CreateBackendSpecificModelContext(const ModelOptions & modelOptions) const120 IBackendInternal::IBackendSpecificModelContextPtr ClBackend::CreateBackendSpecificModelContext(
121 const ModelOptions& modelOptions) const
122 {
123 return IBackendSpecificModelContextPtr{new ClBackendModelContext{modelOptions}};
124 }
125
GetLayerSupport() const126 IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport() const
127 {
128 static ILayerSupportSharedPtr layerSupport
129 {
130 new ClLayerSupport(IBackendInternal::IBackendSpecificModelContextPtr{})
131 };
132 return layerSupport;
133 }
134
GetLayerSupport(const ModelOptions & modelOptions) const135 IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport(const ModelOptions& modelOptions) const
136 {
137 static ILayerSupportSharedPtr layerSupport
138 {
139 new ClLayerSupport(CreateBackendSpecificModelContext(modelOptions))
140 };
141 return layerSupport;
142 }
143
OptimizeSubgraphView(const SubgraphView & subgraph,const ModelOptions & modelOptions) const144 OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
145 const ModelOptions& modelOptions) const
146 {
147 OptimizationViews optimizationViews;
148
149 auto it = subgraph.end();
150 bool isFastMathEnabled = false;
151 std::map<LayerGuid, Layer*> untouched;
152
153 while (it != subgraph.begin())
154 {
155 --it;
156 Layer& base = **it;
157 untouched.insert({base.GetGuid(), &base});
158 }
159
160 it = subgraph.end();
161 #if defined(ARMCOMPUTECL_ENABLED)
162 IBackendInternal::IBackendSpecificModelContextPtr modelContextPtr = CreateBackendSpecificModelContext(modelOptions);
163
164 if (modelContextPtr)
165 {
166 auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
167 if (clModelOptions)
168 {
169 isFastMathEnabled = clModelOptions->IsFastMathEnabled();
170 }
171 }
172 #endif
173 while (it != subgraph.begin())
174 {
175 --it;
176 Layer& base = **it;
177
178 if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
179 || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
180 || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
181 || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
182 && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
183 {
184 for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
185 {
186 if (output->GetNumConnections() == 1)
187 {
188 for (auto&& childInput : output->GetConnections())
189 {
190 if (childInput->GetOwningLayer().GetType() == LayerType::Activation)
191 {
192 Layer& child = childInput->GetOwningLayer();
193
194 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
195
196 const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
197 base.GetName();
198
199 // Get params from activation layer
200 ActivationDescriptor activationDesc = activationLayer->GetParameters();
201
202 if (base.GetType() == LayerType::Convolution2d)
203 {
204 Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
205
206 Optional<TensorInfo> biases;
207
208 if (baseLayer->GetParameters().m_BiasEnabled)
209 {
210 biases = baseLayer->m_Bias->GetTensorInfo();
211 }
212
213 arm_compute::Status status = ClConvolution2dWorkloadValidate(
214 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
215 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
216 baseLayer->GetParameters(),
217 baseLayer->m_Weight->GetTensorInfo(),
218 biases,
219 isFastMathEnabled,
220 &activationDesc);
221
222 if (status)
223 {
224 FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
225 baseLayer,
226 activationLayer,
227 activationDesc,
228 name);
229 untouched.erase(baseLayer->GetGuid());
230 untouched.erase(activationLayer->GetGuid());
231 }
232 }
233 else if (base.GetType() == LayerType::DepthwiseConvolution2d)
234 {
235 DepthwiseConvolution2dLayer* baseLayer =
236 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
237
238 Optional<TensorInfo> biases;
239
240 if (baseLayer->GetParameters().m_BiasEnabled)
241 {
242 biases = baseLayer->m_Bias->GetTensorInfo();
243 }
244
245 arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate(
246 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
247 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
248 baseLayer->GetParameters(),
249 baseLayer->m_Weight->GetTensorInfo(),
250 biases,
251 &activationDesc);
252
253 if (status)
254 {
255 FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
256 baseLayer,
257 activationLayer,
258 activationDesc,
259 name);
260 untouched.erase(baseLayer->GetGuid());
261 untouched.erase(activationLayer->GetGuid());
262 }
263 }
264 else if (base.GetType() == LayerType::FullyConnected)
265 {
266 FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
267
268 arm_compute::Status status = ClFullyConnectedWorkloadValidate(
269 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
270 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
271 baseLayer->m_Weight->GetTensorInfo(),
272 baseLayer->m_Bias->GetTensorInfo(),
273 baseLayer->GetParameters(),
274 &activationDesc);
275
276 if (status)
277 {
278 FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
279 baseLayer,
280 activationLayer,
281 activationDesc,
282 name);
283 untouched.erase(baseLayer->GetGuid());
284 untouched.erase(activationLayer->GetGuid());
285 }
286 }
287 else if (base.GetType() == LayerType::BatchNormalization)
288 {
289 BatchNormalizationLayer* baseLayer =
290 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
291
292 arm_compute::Status status = ClBatchNormalizationValidate(
293 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
294 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
295 baseLayer->m_Mean->GetTensorInfo(),
296 baseLayer->m_Variance->GetTensorInfo(),
297 baseLayer->m_Beta->GetTensorInfo(),
298 baseLayer->m_Gamma->GetTensorInfo(),
299 baseLayer->GetParameters(),
300 &activationDesc);
301
302 if (status)
303 {
304 BatchNormalizationLayer* replacementLayer =
305 FuseLayerWithParameters<BatchNormalizationLayer>(optimizationViews,
306 baseLayer,
307 activationLayer,
308 activationDesc,
309 name);
310
311 replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
312 replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
313 replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
314 replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
315 untouched.erase(baseLayer->GetGuid());
316 untouched.erase(activationLayer->GetGuid());
317 }
318 }
319 else if (base.GetType() == LayerType::Addition)
320 {
321 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
322
323 arm_compute::Status status = ClAdditionValidate(
324 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
325 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
326 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
327 &activationDesc);
328
329 if (status)
330 {
331 FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
332 baseLayer,
333 activationLayer,
334 activationDesc,
335 name);
336 untouched.erase(baseLayer->GetGuid());
337 untouched.erase(activationLayer->GetGuid());
338 }
339 }
340 else if (base.GetType() == LayerType::Division)
341 {
342 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
343
344 arm_compute::Status status = ClDivisionWorkloadValidate(
345 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
346 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
347 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
348 &activationDesc);
349
350 if (status)
351 {
352 FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
353 baseLayer,
354 activationLayer,
355 activationDesc,
356 name);
357 untouched.erase(baseLayer->GetGuid());
358 untouched.erase(activationLayer->GetGuid());
359 }
360 }
361 else if (base.GetType() == LayerType::Multiplication)
362 {
363 MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
364
365 arm_compute::Status status = ClMultiplicationWorkloadValidate(
366 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
367 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
368 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
369 &activationDesc);
370
371 if (status)
372 {
373 FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
374 baseLayer,
375 activationLayer,
376 activationDesc,
377 name);
378 untouched.erase(baseLayer->GetGuid());
379 untouched.erase(activationLayer->GetGuid());
380 }
381 }
382 else if (base.GetType() == LayerType::Subtraction)
383 {
384 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
385
386 arm_compute::Status status = ClSubtractionValidate(
387 baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
388 baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
389 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
390 &activationDesc);
391
392 if (status)
393 {
394 FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
395 baseLayer,
396 activationLayer,
397 activationDesc,
398 name);
399 untouched.erase(baseLayer->GetGuid());
400 untouched.erase(activationLayer->GetGuid());
401 }
402 }
403 }
404 }
405 }
406 }
407 }
408 }
409
410 if (optimizationViews.GetSubstitutions().empty())
411 {
412 optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
413 }
414 else
415 {
416 ReportUntouchedLayers(optimizationViews, untouched);
417 }
418
419 return optimizationViews;
420 }
421
422 } // namespace armnn
423