• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackend.hpp"
7 #include "NeonBackendId.hpp"
8 #include "NeonBackendModelContext.hpp"
9 #include "NeonWorkloadFactory.hpp"
10 #include "NeonLayerSupport.hpp"
11 #include "NeonTensorHandleFactory.hpp"
12 
13 #include <armnn/BackendRegistry.hpp>
14 #include <armnn/Descriptors.hpp>
15 
16 #include <aclCommon/ArmComputeSubgraphUtils.hpp>
17 #include <aclCommon/ArmComputeUtils.hpp>
18 #include <aclCommon/BaseMemoryManager.hpp>
19 
20 #include <armnn/backends/IBackendContext.hpp>
21 #include <armnn/backends/IMemoryManager.hpp>
22 
23 #include <armnn/utility/PolymorphicDowncast.hpp>
24 
25 #include "workloads/NeonAdditionWorkload.hpp"
26 #include "workloads/NeonBatchNormalizationWorkload.hpp"
27 #include "workloads/NeonConvolution2dWorkload.hpp"
28 #include "workloads/NeonDepthwiseConvolutionWorkload.hpp"
29 #include "workloads/NeonDivisionWorkload.hpp"
30 #include "workloads/NeonFullyConnectedWorkload.hpp"
31 #include "workloads/NeonMultiplicationWorkload.hpp"
32 #include "workloads/NeonSubtractionWorkload.hpp"
33 
34 #include <Optimizer.hpp>
35 
36 #include <arm_compute/core/Types.h>
37 #include <arm_compute/runtime/Allocator.h>
38 
39 namespace armnn
40 {
41 
GetIdStatic()42 const BackendId& NeonBackend::GetIdStatic()
43 {
44     static const BackendId s_Id{NeonBackendId()};
45     return s_Id;
46 }
47 
CreateMemoryManager() const48 IBackendInternal::IMemoryManagerUniquePtr NeonBackend::CreateMemoryManager() const
49 {
50     return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
51                                                BaseMemoryManager::MemoryAffinity::Offset);
52 }
53 
CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr & memoryManager) const54 IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
55     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
56 {
57     return std::make_unique<NeonWorkloadFactory>(
58         PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
59 }
60 
CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const ModelOptions & modelOptions) const61 IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
62     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
63 {
64     return std::make_unique<NeonWorkloadFactory>(
65         PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
66 }
67 
CreateWorkloadFactory(class TensorHandleFactoryRegistry & tensorHandleFactoryRegistry) const68 IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
69     class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
70 {
71     auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
72                                                              BaseMemoryManager::MemoryAffinity::Offset);
73 
74     tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
75     tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
76 
77     return std::make_unique<NeonWorkloadFactory>(
78         PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
79 }
80 
CreateWorkloadFactory(TensorHandleFactoryRegistry & tensorHandleFactoryRegistry,const ModelOptions & modelOptions) const81 IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
82     TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
83 {
84     auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
85                                                              BaseMemoryManager::MemoryAffinity::Offset);
86 
87     tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
88     tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
89 
90     return std::make_unique<NeonWorkloadFactory>(
91         PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
92 }
93 
CreateBackendContext(const IRuntime::CreationOptions &) const94 IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRuntime::CreationOptions&) const
95 {
96     return IBackendContextPtr{};
97 }
98 
CreateBackendProfilingContext(const IRuntime::CreationOptions &,IBackendProfilingPtr &)99 IBackendInternal::IBackendProfilingContextPtr NeonBackend::CreateBackendProfilingContext(
100     const IRuntime::CreationOptions&, IBackendProfilingPtr&)
101 {
102     return IBackendProfilingContextPtr{};
103 }
104 
GetOptimizations() const105 IBackendInternal::Optimizations NeonBackend::GetOptimizations() const
106 {
107     return Optimizations{};
108 }
109 
CreateBackendSpecificModelContext(const ModelOptions & modelOptions) const110 IBackendInternal::IBackendSpecificModelContextPtr NeonBackend::CreateBackendSpecificModelContext(
111     const ModelOptions& modelOptions) const
112 {
113     return IBackendSpecificModelContextPtr{new NeonBackendModelContext{modelOptions}};
114 }
115 
GetLayerSupport() const116 IBackendInternal::ILayerSupportSharedPtr NeonBackend::GetLayerSupport() const
117 {
118     static ILayerSupportSharedPtr layerSupport
119         {
120             new NeonLayerSupport(IBackendInternal::IBackendSpecificModelContextPtr{})
121         };
122     return layerSupport;
123 }
124 
GetLayerSupport(const ModelOptions & modelOptions) const125 IBackendInternal::ILayerSupportSharedPtr NeonBackend::GetLayerSupport(const ModelOptions& modelOptions) const
126 {
127     static ILayerSupportSharedPtr layerSupport
128         {
129             new NeonLayerSupport(CreateBackendSpecificModelContext(modelOptions))
130         };
131     return layerSupport;
132 }
133 
OptimizeSubgraphView(const SubgraphView & subgraph) const134 OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph) const
135 {
136     OptimizationViews optimizationViews;
137 
138     auto it = subgraph.end();
139     std::map<LayerGuid, Layer*> untouched;
140 
141     while (it != subgraph.begin())
142     {
143         --it;
144         Layer& base = **it;
145         untouched.insert({base.GetGuid(), &base});
146     }
147 
148     it = subgraph.end();
149     while (it != subgraph.begin())
150     {
151         --it;
152         Layer& base = **it;
153 
154         if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
155              || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
156              || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
157              || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
158             && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
159         {
160             for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
161             {
162                 if (output->GetNumConnections() == 1)
163                 {
164                     for (auto&& childInput : output->GetConnections())
165                     {
166                         if (childInput->GetOwningLayer().GetType() == LayerType::Activation)
167                         {
168                             Layer& child = childInput->GetOwningLayer();
169 
170                             auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
171 
172                             const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
173                                                      base.GetName();
174 
175                             // Get params from activation layer
176                             ActivationDescriptor activationDesc = activationLayer->GetParameters();
177 
178                             if (base.GetType() == LayerType::Convolution2d)
179                             {
180                                 Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
181 
182                                 Optional<TensorInfo> biases;
183 
184                                 if (baseLayer->GetParameters().m_BiasEnabled)
185                                 {
186                                     biases = baseLayer->m_Bias->GetTensorInfo();
187                                 }
188 
189                                 arm_compute::Status status = NeonConvolution2dWorkloadValidate(
190                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
191                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
192                                         baseLayer->GetParameters(),
193                                         baseLayer->m_Weight->GetTensorInfo(),
194                                         biases,
195                                         false,
196                                         &activationDesc);
197 
198                                 if (status)
199                                 {
200                                     FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
201                                                                                       baseLayer,
202                                                                                       activationLayer,
203                                                                                       activationDesc,
204                                                                                       name);
205                                     untouched.erase(baseLayer->GetGuid());
206                                     untouched.erase(activationLayer->GetGuid());
207                                 }
208                             }
209                             else if (base.GetType() == LayerType::DepthwiseConvolution2d)
210                             {
211                                 DepthwiseConvolution2dLayer* baseLayer =
212                                         PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
213 
214                                 Optional<TensorInfo> biases;
215 
216                                 if (baseLayer->GetParameters().m_BiasEnabled)
217                                 {
218                                     biases = baseLayer->m_Bias->GetTensorInfo();
219                                 }
220 
221                                 arm_compute::Status status = NeonDepthwiseConvolutionWorkloadValidate(
222                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
223                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
224                                         baseLayer->GetParameters(),
225                                         baseLayer->m_Weight->GetTensorInfo(),
226                                         biases,
227                                         &activationDesc);
228 
229                                 if (status)
230                                 {
231                                     FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
232                                                                                                baseLayer,
233                                                                                                activationLayer,
234                                                                                                activationDesc,
235                                                                                                name);
236                                     untouched.erase(baseLayer->GetGuid());
237                                     untouched.erase(activationLayer->GetGuid());
238                                 }
239                             }
240                             else if (base.GetType() == LayerType::FullyConnected)
241                             {
242                                 FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
243 
244                                 arm_compute::Status status = NeonFullyConnectedWorkloadValidate(
245                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
246                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
247                                         baseLayer->m_Weight->GetTensorInfo(),
248                                         baseLayer->m_Bias->GetTensorInfo(),
249                                         baseLayer->GetParameters(),
250                                         &activationDesc);
251 
252                                 if (status)
253                                 {
254                                     FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
255                                                                                        baseLayer,
256                                                                                        activationLayer,
257                                                                                        activationDesc,
258                                                                                        name);
259                                     untouched.erase(baseLayer->GetGuid());
260                                     untouched.erase(activationLayer->GetGuid());
261                                 }
262                             }
263                             else if (base.GetType() == LayerType::BatchNormalization)
264                             {
265                                 BatchNormalizationLayer* baseLayer =
266                                         PolymorphicDowncast<BatchNormalizationLayer*>(&base);
267 
268                                 arm_compute::Status status = NeonBatchNormalizationValidate(
269                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
270                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
271                                         baseLayer->m_Mean->GetTensorInfo(),
272                                         baseLayer->m_Variance->GetTensorInfo(),
273                                         baseLayer->m_Beta->GetTensorInfo(),
274                                         baseLayer->m_Gamma->GetTensorInfo(),
275                                         baseLayer->GetParameters(),
276                                         &activationDesc);
277 
278                                 if (status)
279                                 {
280                                     BatchNormalizationLayer* replacementLayer =
281                                             FuseLayerWithParameters<BatchNormalizationLayer>(
282                                                     optimizationViews,
283                                                     baseLayer,
284                                                     activationLayer,
285                                                     activationDesc,
286                                                     name);
287 
288                                     replacementLayer->m_Beta     = std::move(baseLayer->m_Beta);
289                                     replacementLayer->m_Gamma    = std::move(baseLayer->m_Gamma);
290                                     replacementLayer->m_Mean     = std::move(baseLayer->m_Mean);
291                                     replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
292                                     untouched.erase(baseLayer->GetGuid());
293                                     untouched.erase(activationLayer->GetGuid());
294                                 }
295                             }
296                             else if (base.GetType() == LayerType::Addition)
297                             {
298                                 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
299 
300                                 arm_compute::Status status = NeonAdditionWorkloadValidate(
301                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
302                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
303                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
304                                         &activationDesc);
305 
306                                 if (status)
307                                 {
308                                     FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
309                                                                               baseLayer,
310                                                                               activationLayer,
311                                                                               activationDesc,
312                                                                               name);
313                                     untouched.erase(baseLayer->GetGuid());
314                                     untouched.erase(activationLayer->GetGuid());
315                                 }
316                             }
317                             else if (base.GetType() == LayerType::Division)
318                             {
319                                 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
320 
321                                 arm_compute::Status status = NeonDivisionWorkloadValidate(
322                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
323                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
324                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
325                                         &activationDesc);
326 
327                                 if (status)
328                                 {
329                                     FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
330                                                                               baseLayer,
331                                                                               activationLayer,
332                                                                               activationDesc,
333                                                                               name);
334                                     untouched.erase(baseLayer->GetGuid());
335                                     untouched.erase(activationLayer->GetGuid());
336                                 }
337                             }
338                             else if (base.GetType() == LayerType::Multiplication)
339                             {
340                                 MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
341 
342                                 arm_compute::Status status = NeonMultiplicationWorkloadValidate(
343                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
344                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
345                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
346                                         &activationDesc);
347 
348                                 if (status)
349                                 {
350                                     FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
351                                                                                     baseLayer,
352                                                                                     activationLayer,
353                                                                                     activationDesc,
354                                                                                     name);
355                                     untouched.erase(baseLayer->GetGuid());
356                                     untouched.erase(activationLayer->GetGuid());
357                                 }
358                             }
359                             else if (base.GetType() == LayerType::Subtraction)
360                             {
361                                 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
362 
363                                 arm_compute::Status status = NeonSubtractionWorkloadValidate(
364                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
365                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
366                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
367                                         &activationDesc);
368 
369                                 if (status)
370                                 {
371                                     FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
372                                                                                  baseLayer,
373                                                                                  activationLayer,
374                                                                                  activationDesc,
375                                                                                  name);
376                                     untouched.erase(baseLayer->GetGuid());
377                                     untouched.erase(activationLayer->GetGuid());
378                                 }
379                             }
380                         }
381                     }
382                 }
383             }
384         }
385     }
386 
387     if (optimizationViews.GetSubstitutions().empty())
388     {
389         optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
390     }
391     else
392     {
393         ReportUntouchedLayers(optimizationViews, untouched);
394     }
395 
396     return optimizationViews;
397 }
398 
GetHandleFactoryPreferences() const399 std::vector<ITensorHandleFactory::FactoryId> NeonBackend::GetHandleFactoryPreferences() const
400 {
401     return std::vector<ITensorHandleFactory::FactoryId>() = { NeonTensorHandleFactory::GetIdStatic() };
402 }
403 
RegisterTensorHandleFactories(class TensorHandleFactoryRegistry & registry)404 void NeonBackend::RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry)
405 {
406     auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
407                                                              BaseMemoryManager::MemoryAffinity::Offset);
408 
409     registry.RegisterMemoryManager(memoryManager);
410     registry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
411 }
412 
413 } // namespace armnn
414